timholy
diff --git a/‎.github/workflows/Documenter.yml
+5 b/‎.github/workflows/Documenter.yml
+5
diff --git a/‎SnoopCompileCore/src/snoop_inference.jl
+24-20 b/‎SnoopCompileCore/src/snoop_inference.jl
+24-20
diff --git a/‎SnoopCompileCore/src/snoop_invalidations.jl
+8-6 b/‎SnoopCompileCore/src/snoop_invalidations.jl
+8-6
diff --git a/‎SnoopCompileCore/src/snoop_llvm.jl
+4-5 b/‎SnoopCompileCore/src/snoop_llvm.jl
+4-5
diff --git a/‎docs/Project.toml
+4 b/‎docs/Project.toml
+4
diff --git a/‎docs/make.jl
+10-6 b/‎docs/make.jl
+10-6
diff --git a/‎docs/src/assets/ascend_optimizeme1.png
232 KB b/‎docs/src/assets/ascend_optimizeme1.png
232 KB
diff --git a/‎docs/src/explanations/basic.md
+40 b/‎docs/src/explanations/basic.md
+40
diff --git a/‎docs/src/explanations/fixing_inference.md
+165 b/‎docs/src/explanations/fixing_inference.md
+165
@@ -26,6 +26,11 @@ jobs:
           version: '1'
       - run: julia --project -e 'using Pkg; Pkg.develop([PackageSpec(path=joinpath(pwd(), "SnoopCompileCore"))])'
       - uses: julia-actions/julia-buildpkg@latest
+      # To access the developer tools from within a package's environment, they should be in the default environment
+      - run: julia -e 'using Pkg; Pkg.develop([PackageSpec(path=joinpath(pwd(), "SnoopCompileCore")), PackageSpec(path=joinpath(pwd()))]); Pkg.instantiate()'
+      # Additional packages we'll need
+      - run: julia -e 'using Pkg; Pkg.add(["AbstractTrees", "Cthulhu"])'  # pyplot would be nice but it often errors
+      # Documenter wants them to be in the local environment
       - run: julia --project=docs/ -e 'using Pkg; Pkg.develop([PackageSpec(path=joinpath(pwd(), "SnoopCompileCore")), PackageSpec(path=joinpath(pwd()))]); Pkg.instantiate()'
       - uses: julia-actions/julia-docdeploy@releases/v1
         env:
 
@@ -98,36 +98,40 @@ function _snoop_inference(cmd::Expr)
 end
 
 """
-    tinf = @snoop_inference commands
+    tinf = @snoop_inference commands;
 
-Produce a profile of julia's type inference, recording the amount of time spent inferring
-every `MethodInstance` processed while executing `commands`. Each fresh entrance to
-type inference (whether executed directly in `commands` or because a call was made
-by runtime-dispatch) also collects a backtrace so the caller can be identified.
+Produce a profile of julia's type inference, recording the amount of time spent
+inferring every `MethodInstance` processed while executing `commands`. Each
+fresh entrance to type inference (whether executed directly in `commands` or
+because a call was made by runtime-dispatch) also collects a backtrace so the
+caller can be identified.
 
-`tinf` is a tree, each node containing data on a particular inference "frame" (the method,
-argument-type specializations, parameters, and even any constant-propagated values).
-Each reports the [`exclusive`](@ref)/[`inclusive`](@ref) times, where the exclusive
-time corresponds to the time spent inferring this frame in and of itself, whereas
-the inclusive time includes the time needed to infer all the callees of this frame.
+`tinf` is a tree, each node containing data on a particular inference "frame"
+(the method, argument-type specializations, parameters, and even any
+constant-propagated values). Each reports the
+[`exclusive`](@ref)/[`inclusive`](@ref) times, where the exclusive time
+corresponds to the time spent inferring this frame in and of itself, whereas the
+inclusive time includes the time needed to infer all the callees of this frame.
 
 The top-level node in this profile tree is `ROOT`. Uniquely, its exclusive time
-corresponds to the time spent _not_ in julia's type inference (codegen, llvm_opt, runtime, etc).
+corresponds to the time spent _not_ in julia's type inference (codegen,
+llvm_opt, runtime, etc).
 
-There are many different ways of inspecting and using the data stored in `tinf`.
-The simplest is to load the `AbstracTrees` package and display the tree with
-`AbstractTrees.print_tree(tinf)`.
-See also:  `flamegraph`, `flatten`, `inference_triggers`, `SnoopCompile.parcel`,
-`runtime_inferencetime`.
+Working with `tinf` effectively requires loading `SnoopCompile`.
+
+!!! warning
+    Note the semicolon `;` at the end of the `@snoop_inference` macro call.
+    Because `SnoopCompileCore` is not permitted to invalidate any code, it cannot define
+    the `Base.show` methods that pretty-print `tinf`. Defer inspection of `tinf`
+    until `SnoopCompile` has been loaded.
 
 # Example
-```jldoctest; setup=:(using SnoopCompile), filter=r"([0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?/[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?|\\d direct)"
+
+```jldoctest; setup=:(using SnoopCompileCore), filter=r"([0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?/[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?|\\d direct)"
 julia> tinf = @snoop_inference begin
            sort(rand(100))  # Evaluate some code and profile julia's type inference
-       end
-InferenceTimingNode: 0.110018224/0.131464476 on Core.Compiler.Timings.ROOT() with 2 direct children
+       end;
 ```
-
 """
 macro snoop_inference(cmd)
     return _snoop_inference(cmd)
 
@@ -1,18 +1,18 @@
 export @snoop_invalidations
 
 """
-    list = @snoop_invalidations expr
+    invs = @snoop_invalidations expr
 
 Capture method cache invalidations triggered by evaluating `expr`.
-`list` is a sequence of invalidated `Core.MethodInstance`s together with "explanations," consisting
+`invs` is a sequence of invalidated `Core.MethodInstance`s together with "explanations," consisting
 of integers (encoding depth) and strings (documenting the source of an invalidation).
 
-Unless you are working at a low level, you essentially always want to pass `list`
+Unless you are working at a low level, you essentially always want to pass `invs`
 directly to [`SnoopCompile.invalidation_trees`](@ref).
 
 # Extended help
 
-`list` is in a format where the "reason" comes after the items.
+`invs` is in a format where the "reason" comes after the items.
 Method deletion results in the sequence
 
     [zero or more (mi, "invalidate_mt_cache") pairs..., zero or more (depth1 tree, loctag) pairs..., method, loctag] with loctag = "jl_method_table_disable"
@@ -22,14 +22,16 @@ where `mi` means a `MethodInstance`. `depth1` means a sequence starting at `dept
 Method insertion results in the sequence
 
     [zero or more (depth0 tree, sig) pairs..., same info as with delete_method except loctag = "jl_method_table_insert"]
+
+The authoritative reference is Julia's own `src/gf.c` file.
 """
 macro snoop_invalidations(expr)
     quote
-        local list = ccall(:jl_debug_method_invalidation, Any, (Cint,), 1)
+        local invs = ccall(:jl_debug_method_invalidation, Any, (Cint,), 1)
         Expr(:tryfinally,
             $(esc(expr)),
             ccall(:jl_debug_method_invalidation, Any, (Cint,), 0)
         )
-        list
+        invs
     end
 end
@@ -3,11 +3,10 @@ export @snoop_llvm
 using Serialization
 
 """
-```
-@snoop_llvm "func_names.csv" "llvm_timings.yaml" begin
-    # Commands to execute, in a new process
-end
-```
+    @snoop_llvm "func_names.csv" "llvm_timings.yaml" begin
+        # Commands to execute, in a new process
+    end
+
 causes the julia compiler to log timing information for LLVM optimization during the
 provided commands to the files "func_names.csv" and "llvm_timings.yaml". These files can
 be used for the input to `SnoopCompile.read_snoop_llvm("func_names.csv", "llvm_timings.yaml")`.
 
@@ -1,15 +1,19 @@
 [deps]
 AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
+Cthulhu = "f68482b8-f384-11e8-15f7-abe071a5a75f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
 MethodAnalysis = "85b6ec6f-f7df-4429-9514-a64bcd9ee824"
 PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee"
 SnoopCompile = "aa65fe97-06da-5843-b5b1-d5d13cad87d2"
+SnoopCompileCore = "e2b509da-e806-4183-be48-004708413034"
 
 [compat]
 AbstractTrees = "0.4"
+Cthulhu = "2"
 Documenter = "1"
 JET = "0.9"
 MethodAnalysis = "0.4"
 PyPlot = "2"
 SnoopCompile = "3"
+SnoopCompileCore = "3"
@@ -1,19 +1,23 @@
 using Documenter
+using SnoopCompileCore
 using SnoopCompile
 import PyPlot   # so that the visualizations.jl file is loaded
 
 makedocs(
     sitename = "SnoopCompile",
     format = Documenter.HTML(
-        prettyurls = get(ENV, "CI", nothing) == "true"
+        prettyurls = true,
     ),
-    modules = [SnoopCompile.SnoopCompileCore, SnoopCompile],
-    linkcheck = true,
+    modules = [SnoopCompileCore, SnoopCompile],
+    linkcheck = true,   # the link check is slow, set to false if you're building frequently
     # doctest = :fix,
+    warnonly=true,    # delete when https://github.com/JuliaDocs/Documenter.jl/issues/2541 is fixed
     pages = ["index.md",
-             "tutorial.md",
-             "Modern tools" => ["snoop_invalidations.md", "snoop_inference.md", "pgdsgui.md", "snoop_inference_analysis.md", "snoop_inference_parcel.md", "jet.md"],
-             "reference.md"],
+             "Basic tutorials" => ["tutorials/invalidations.md", "tutorials/snoop_inference.md", "tutorials/snoop_llvm.md", "tutorials/pgdsgui.md", "tutorials/jet.md"],
+             "Advanced tutorials" => ["tutorials/snoop_inference_analysis.md", "tutorials/snoop_inference_parcel.md"],
+             "Explanations" => ["explanations/tools.md", "explanations/gotchas.md", "explanations/fixing_inference.md"],
+             "reference.md",
+    ]
 )
 
 deploydocs(
 
@@ -0,0 +1,40 @@
+# Understanding SnoopCompile and Julia's compilation pipeline
+
+Julia uses
+[Just-in-time (JIT) compilation](https://en.wikipedia.org/wiki/Just-in-time_compilation) to
+generate the code that runs on your CPU.
+Broadly speaking, there are two major compilation steps: *inference* and *code generation*.
+Inference is the process of determining the type of each object, which in turn
+determines which specific methods get called; once type inference is complete,
+code generation performs optimizations and ultimately generates the assembly
+language (native code) used on CPUs.
+Some aspects of this process are documented [here](https://docs.julialang.org/en/v1/devdocs/eval/).
+
+Using code that has never been compiled requires that it first be JIT-compiled, and this contributes to the latency of using the package.
+In some circumstances, you can cache (store) the results of compilation to files to
+reduce the latency when your package is used. These files are the the `*.ji` and
+`*.so` files that live in the `compiled` directory of your Julia depot, usually
+located at `~/.julia/compiled`. However, if these files become large, loading
+them can be another source for latency. Julia needs time both to load and
+validate the cached compiled code. Minimizing the latency of using a package
+involves focusing on caching the compilation of code that is both commonly used
+and takes time to compile.
+
+Caching code for later use is called *precompilation*. Julia has had some forms of precompilation almost since the very first packages. However, it was [Julia
+1.9](https://julialang.org/blog/2023/04/julia-1.9-highlights/#caching_of_native_code) that first supported "complete" precompilation, including the ability to store native code in shared-library cache files.
+
+SnoopCompile is designed to try to allow you to analyze the costs of JIT-compilation, identify
+key bottlenecks that contribute to latency, and set up `precompile` directives to see whether
+it produces measurable benefits.
+
+## Package precompilation
+
+When a package is precompiled, here's what happens under the hood:
+
+- Julia loads all of the package's dependencies (the ones in the `[deps]` section of the `Project.toml` file), typically from precompile cache files
+- Julia evaluates the source code (text files) that define the package module(s). Evaluating `function foo(args...) ... end` creates a new method `foo`. Note that:
+  + the source code might also contain statements that create "data" (e.g., `const`s). In some cases this can lead to some subtle precompilation ["gotchas"](@ref running-during-pc)
+  + the source code might also contain a precompile workload, which forces compilation and tracking of package methods.
+- Julia iterates over the module contents and writes the *result* to disk. Note that the module contents might include compiled code, and if so it is written along with everything else to the cache file.
+
+When Julia loads your package, it just loads the "snapshot" stored in the cache file: it does not re-evaluate the source-text files that defined your package! It is appropriate to think of the source files of your package as "build scripts" that create your module; once the "build scripts" are executed, it's the module itself that gets cached, and the job of the build scripts is done.
@@ -0,0 +1,165 @@
+# Techniques for fixing inference problems
+
+Here we assume you've dug into your code with a tool like Cthulhu, and want to know how to fix some of the problems that you discover. Below is a collection of specific cases and some tricks for handling them.
+
+Note that there is also a [tutorial on fixing inference](@ref inferrability) that delves into advanced topics.
+
+## Adding type annotations
+
+### Using concrete types
+
+Defining variables like `list = []` can be convenient, but it creates a `list` of type `Vector{Any}`. This prevents inference from knowing the type of items extracted from `list`. Using `list = String[]` for a container of strings, etc., is an excellent fix. When in doubt, check the type with `isconcretetype`: a common mistake is to think that `list_of_lists = Array{Int}[]` gives you a vector-of-vectors, but
+
+```jldoctest
+julia> isconcretetype(Array{Int})
+false
+```
+
+reminds you that `Array` requires a second parameter indicating the dimensionality of the array. (Or use `list_of_lists = Vector{Int}[]` instead, as `Vector{Int} === Array{Int, 1}`.)
+
+Many valuable tips can be found among [Julia's performance tips](https://docs.julialang.org/en/v1/manual/performance-tips/), and readers are encouraged to consult that page.
+
+### Working with non-concrete types
+
+In cases where invalidations occur, but you can't use concrete types (there are indeed many valid uses of `Vector{Any}`),
+you can often prevent the invalidation using some additional knowledge.
+One common example is extracting information from an [`IOContext`](https://docs.julialang.org/en/v1/manual/networking-and-streams/#IO-Output-Contextual-Properties-1) structure, which is roughly defined as
+
+```julia
+struct IOContext{IO_t <: IO} <: AbstractPipe
+    io::IO_t
+    dict::ImmutableDict{Symbol, Any}
+end
+```
+
+There are good reasons that `dict` uses a value-type of `Any`, but that makes it impossible for the compiler to infer the type of any object looked up in an `IOContext`.
+Fortunately, you can help!
+For example, the documentation specifies that the `:color` setting should be a `Bool`, and since it appears in documentation it's something we can safely enforce.
+Changing
+
+```
+iscolor = get(io, :color, false)
+```
+
+to
+
+```
+iscolor = get(io, :color, false)::Bool     # assert that the rhs is Bool-valued
+```
+
+will throw an error if it isn't a `Bool`, and this allows the compiler to take advantage of the type being known in subsequent operations.
+
+If the return type is one of a small number of possibilities (generally three or fewer), you can annotate the return type with `Union{...}`. This is generally advantageous only when the intersection of what inference already knows about the types of a variable and the types in the `Union` results in an concrete type.
+
+As a more detailed example, suppose you're writing code that parses Julia's `Expr` type:
+
+```julia
+julia> ex = :(Array{Float32,3})
+:(Array{Float32, 3})
+
+julia> dump(ex)
+Expr
+  head: Symbol curly
+  args: Vector{Any(3,))
+    1: Symbol Array
+    2: Symbol Float32
+    3: Int64 3
+```
+
+`ex.args` is a `Vector{Any}`.
+However, for a `:curly` expression only certain types will be found among the arguments; you could write key portions of your code as
+
+```julia
+a = ex.args[2]
+if a isa Symbol
+    # inside this block, Julia knows `a` is a Symbol, and so methods called on `a` will be resistant to invalidation
+    foo(a)
+elseif a isa Expr && length((a::Expr).args) > 2
+    a::Expr         # sometimes you have to help inference by adding a type-assert
+    x = bar(a)      # `bar` is now resistant to invalidation
+elseif a isa Integer
+    # even though you've not made this fully-inferrable, you've at least reduced the scope for invalidations
+    # by limiting the subset of `foobar` methods that might be called
+    y = foobar(a)
+end
+```
+
+Other tricks include replacing broadcasting on `v::Vector{Any}` with `Base.mapany(f, v)`--`mapany` avoids trying to narrow the type of `f(v[i])` and just assumes it will be `Any`, thereby avoiding invalidations of many `convert` methods.
+
+Adding type-assertions and fixing inference problems are the most common approaches for fixing invalidations.
+You can discover these manually, but using Cthulhu is highly recommended.
+
+## Inferrable field access for abstract types
+
+When invalidations happen for methods that manipulate fields of abstract types, often there is a simple solution: create an "interface" for the abstract type specifying that certain fields must have certain types.
+Here's an example:
+
+```
+abstract type AbstractDisplay end
+
+struct Monitor <: AbstractDisplay
+    height::Int
+    width::Int
+    maker::String
+end
+
+struct Phone <: AbstractDisplay
+    height::Int
+    width::Int
+    maker::Symbol
+end
+
+function Base.show(@nospecialize(d::AbstractDisplay), x)
+    str = string(x)
+    w = d.width
+    if length(str) > w  # do we have to truncate to fit the display width?
+        ...
+```
+
+In this `show` method, we've deliberately chosen to prevent specialization on the specific type of `AbstractDisplay` (to reduce the total number of times we have to compile this method).
+As a consequence, Julia's inference may not realize that `d.width` returns an `Int`.
+
+Fortunately, you can help by defining an interface for generic `AbstractDisplay` objects:
+
+```
+function Base.getproperty(d::AbstractDisplay, name::Symbol)
+    if name === :height
+        return getfield(d, :height)::Int
+    elseif name === :width
+        return getfield(d, :width)::Int
+    elseif name === :maker
+        return getfield(d, :maker)::Union{String,Symbol}
+    end
+    return getfield(d, name)
+end
+```
+
+Julia's [constant propagation](https://en.wikipedia.org/wiki/Constant_folding) will ensure that most accesses of those fields will be determined at compile-time, so this simple change robustly fixes many inference problems.
+
+## Fixing `Core.Box`
+
+[Julia issue 15276](https://github.com/JuliaLang/julia/issues/15276) is one of the more surprising forms of inference failure; it is the most common cause of a `Core.Box` annotation.
+If other variables depend on the `Box`ed variable, then a single `Core.Box` can lead to widespread inference problems.
+For this reason, these are also among the first inference problems you should tackle.
+
+Read [this explanation of why this happens and what you can do to fix it](https://docs.julialang.org/en/v1/manual/performance-tips/#man-performance-captured).
+If you are directed to find `Core.Box` inference triggers via [`suggest`](@ref), you may need to explore around the call site a bit--
+the inference trigger may be in the closure itself, but the fix needs to go in the method that creates the closure.
+
+Use of `ascend` is highly recommended for fixing `Core.Box` inference failures.
+
+## Handling edge cases
+
+You can sometimes get invalidations from failing to handle "formal" possibilities.
+For example, operations with regular expressions might return a `Union{Nothing, RegexMatch}`.
+You can sometimes get poor type inference by writing code that fails to take account of the possibility that `nothing` might be returned.
+For example, a comprehension
+
+```julia
+ms = [m.match for m in match.((rex,), my_strings)]
+```
+might be replaced with
+```julia
+ms = [m.match for m in match.((rex,), my_strings) if m !== nothing]
+```
+and return a better-typed result.