671 lines
36 KiB
HTML
671 lines
36 KiB
HTML
<!DOCTYPE HTML>
|
|
<html lang="en" class="light sidebar-visible" dir="ltr">
|
|
<head>
|
|
<!-- Book generated using mdBook -->
|
|
<meta charset="UTF-8">
|
|
<title>Search - Rust Compiler Development Guide</title>
|
|
|
|
|
|
<!-- Custom HTML head -->
|
|
|
|
<meta name="description" content="A guide to developing the Rust compiler (rustc)">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
<meta name="theme-color" content="#ffffff">
|
|
|
|
<link rel="icon" href="../favicon.svg">
|
|
<link rel="shortcut icon" href="../favicon.png">
|
|
<link rel="stylesheet" href="../css/variables.css">
|
|
<link rel="stylesheet" href="../css/general.css">
|
|
<link rel="stylesheet" href="../css/chrome.css">
|
|
<link rel="stylesheet" href="../css/print.css" media="print">
|
|
|
|
<!-- Fonts -->
|
|
<link rel="stylesheet" href="../FontAwesome/css/font-awesome.css">
|
|
<link rel="stylesheet" href="../fonts/fonts.css">
|
|
|
|
<!-- Highlight.js Stylesheets -->
|
|
<link rel="stylesheet" id="highlight-css" href="../highlight.css">
|
|
<link rel="stylesheet" id="tomorrow-night-css" href="../tomorrow-night.css">
|
|
<link rel="stylesheet" id="ayu-highlight-css" href="../ayu-highlight.css">
|
|
|
|
<!-- Custom theme stylesheets -->
|
|
|
|
|
|
<!-- Provide site root and default themes to javascript -->
|
|
<script>
|
|
const path_to_root = "../";
|
|
const default_light_theme = "light";
|
|
const default_dark_theme = "navy";
|
|
</script>
|
|
<!-- Start loading toc.js asap -->
|
|
<script src="../toc.js"></script>
|
|
</head>
|
|
<body>
|
|
<div id="body-container">
|
|
<!-- Work around some values being stored in localStorage wrapped in quotes -->
|
|
<script>
|
|
try {
|
|
let theme = localStorage.getItem('mdbook-theme');
|
|
let sidebar = localStorage.getItem('mdbook-sidebar');
|
|
|
|
if (theme.startsWith('"') && theme.endsWith('"')) {
|
|
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
|
|
}
|
|
|
|
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
|
|
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
|
|
}
|
|
} catch (e) { }
|
|
</script>
|
|
|
|
<!-- Set the theme before any content is loaded, prevents flash -->
|
|
<script>
|
|
const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme;
|
|
let theme;
|
|
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
|
|
if (theme === null || theme === undefined) { theme = default_theme; }
|
|
const html = document.documentElement;
|
|
html.classList.remove('light')
|
|
html.classList.add(theme);
|
|
html.classList.add("js");
|
|
</script>
|
|
|
|
<input type="checkbox" id="sidebar-toggle-anchor" class="hidden">
|
|
|
|
<!-- Hide / unhide sidebar before it is displayed -->
|
|
<script>
|
|
let sidebar = null;
|
|
const sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
|
|
if (document.body.clientWidth >= 1080) {
|
|
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
|
|
sidebar = sidebar || 'visible';
|
|
} else {
|
|
sidebar = 'hidden';
|
|
}
|
|
sidebar_toggle.checked = sidebar === 'visible';
|
|
html.classList.remove('sidebar-visible');
|
|
html.classList.add("sidebar-" + sidebar);
|
|
</script>
|
|
|
|
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
|
|
<!-- populated by js -->
|
|
<mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox>
|
|
<noscript>
|
|
<iframe class="sidebar-iframe-outer" src="../toc.html"></iframe>
|
|
</noscript>
|
|
<div id="sidebar-resize-handle" class="sidebar-resize-handle">
|
|
<div class="sidebar-resize-indicator"></div>
|
|
</div>
|
|
</nav>
|
|
|
|
<div id="page-wrapper" class="page-wrapper">
|
|
|
|
<div class="page">
|
|
<div id="menu-bar-hover-placeholder"></div>
|
|
<div id="menu-bar" class="menu-bar sticky">
|
|
<div class="left-buttons">
|
|
<label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
|
|
<i class="fa fa-bars"></i>
|
|
</label>
|
|
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
|
|
<i class="fa fa-paint-brush"></i>
|
|
</button>
|
|
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
|
|
<li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
|
|
</ul>
|
|
<button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar">
|
|
<i class="fa fa-search"></i>
|
|
</button>
|
|
</div>
|
|
|
|
<h1 class="menu-title">Rust Compiler Development Guide</h1>
|
|
|
|
<div class="right-buttons">
|
|
<a href="../print.html" title="Print this book" aria-label="Print this book">
|
|
<i id="print-button" class="fa fa-print"></i>
|
|
</a>
|
|
<a href="https://github.com/rust-lang/rustc-dev-guide" title="Git repository" aria-label="Git repository">
|
|
<i id="git-repository-button" class="fa fa-github"></i>
|
|
</a>
|
|
<a href="https://github.com/rust-lang/rustc-dev-guide/edit/master/src/rustdoc-internals/search.md" title="Suggest an edit" aria-label="Suggest an edit">
|
|
<i id="git-edit-button" class="fa fa-edit"></i>
|
|
</a>
|
|
|
|
</div>
|
|
</div>
|
|
|
|
<div id="search-wrapper" class="hidden">
|
|
<form id="searchbar-outer" class="searchbar-outer">
|
|
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
|
|
</form>
|
|
<div id="searchresults-outer" class="searchresults-outer hidden">
|
|
<div id="searchresults-header" class="searchresults-header"></div>
|
|
<ul id="searchresults">
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
|
|
<script>
|
|
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
|
|
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
|
|
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
|
|
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
|
|
});
|
|
</script>
|
|
|
|
<div id="content" class="content">
|
|
<main>
|
|
<h1 id="rustdoc-search"><a class="header" href="#rustdoc-search">Rustdoc search</a></h1>
|
|
<p>Rustdoc Search is two programs: <code>search_index.rs</code>
|
|
and <code>search.js</code>. The first generates a nasty JSON
|
|
file with a full list of items and function signatures
|
|
in the crates in the doc bundle, and the second reads
|
|
it, turns it into some in-memory structures, and
|
|
scans them linearly to search.</p>
|
|
<ul>
|
|
<li><a href="#search-index-format">Search index format</a>
|
|
<ul>
|
|
<li><a href="#parallel-arrays-and-indexed-maps">Parallel arrays and indexed maps</a></li>
|
|
<li><a href="#representing-sparse-columns">Representing sparse columns</a>
|
|
<ul>
|
|
<li><a href="#vlq-hex">VLQ Hex</a></li>
|
|
<li><a href="#roaring-bitmaps">Roaring Bitmaps</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#how-descriptions-are-stored">How descriptions are stored</a></li>
|
|
<li><a href="#i-f-and-p"><code>i</code>, <code>f</code>, and <code>p</code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#searching-by-name">Searching by name</a></li>
|
|
<li><a href="#searching-by-type">Searching by type</a></li>
|
|
<li><a href="#re-exports">Re-exports</a></li>
|
|
<li><a href="#testing-the-search-engine">Testing the search engine</a></li>
|
|
</ul>
|
|
<h2 id="search-index-format"><a class="header" href="#search-index-format">Search index format</a></h2>
|
|
<p><code>search.js</code> calls this Raw, because it turns it into
|
|
a more normal object tree after loading it.
|
|
For space savings, it's also written without newlines or spaces.</p>
|
|
<pre><code class="language-json">[
|
|
[ "crate_name", {
|
|
// name
|
|
"n": ["function_name", "Data"],
|
|
// type
|
|
"t": "HF",
|
|
// parent module
|
|
"q": [[0, "crate_name"]],
|
|
// parent type
|
|
"i": [2, 0],
|
|
// type dictionary
|
|
"p": [[1, "i32"], [1, "str"], [5, "Data", 0]],
|
|
// function signature
|
|
"f": "{{gb}{d}}`", // [[3, 1], [2]]
|
|
// impl disambiguator
|
|
"b": [],
|
|
// deprecated flag
|
|
"c": "OjAAAAAAAAA=", // empty bitmap
|
|
// empty description flag
|
|
"e": "OjAAAAAAAAA=", // empty bitmap
|
|
// aliases
|
|
"a": [["get_name", 0]],
|
|
// description shards
|
|
"D": "g", // 3
|
|
// inlined re-exports
|
|
"r": [],
|
|
}]
|
|
]
|
|
</code></pre>
|
|
<p><a href="https://github.com/rust-lang/rust/blob/2f92f050e83bf3312ce4ba73c31fe843ad3cbc60/src/librustdoc/html/static/js/rustdoc.d.ts#L344-L390"><code>src/librustdoc/html/static/js/rustdoc.d.ts</code></a>
|
|
defines an actual schema in a TypeScript <code>type</code>.</p>
|
|
<div class="table-wrapper"><table><thead><tr><th>Key</th><th>Name</th><th>Description</th></tr></thead><tbody>
|
|
<tr><td><code>n</code></td><td>Names</td><td>Item names</td></tr>
|
|
<tr><td><code>t</code></td><td>Item Type</td><td>One-char item type code</td></tr>
|
|
<tr><td><code>q</code></td><td>Parent module</td><td><code>Map<index, path></code></td></tr>
|
|
<tr><td><code>i</code></td><td>Parent type</td><td>list of indexes</td></tr>
|
|
<tr><td><code>f</code></td><td>Function signature</td><td><a href="#i-f-and-p">encoded</a></td></tr>
|
|
<tr><td><code>b</code></td><td>Impl disambiguator</td><td><code>Map<index, string></code></td></tr>
|
|
<tr><td><code>c</code></td><td>Deprecation flag</td><td><a href="#roaring-bitmaps">roaring bitmap</a></td></tr>
|
|
<tr><td><code>e</code></td><td>Description is empty</td><td><a href="#roaring-bitmaps">roaring bitmap</a></td></tr>
|
|
<tr><td><code>p</code></td><td>Type dictionary</td><td><code>[[item type, path]]</code></td></tr>
|
|
<tr><td><code>a</code></td><td>Alias</td><td><code>Map<string, index></code></td></tr>
|
|
<tr><td><code>D</code></td><td>description shards</td><td><a href="#how-descriptions-are-stored">encoded</a></td></tr>
|
|
</tbody></table>
|
|
</div>
|
|
<p>The above index defines a crate called <code>crate_name</code>
|
|
with a free function called <code>function_name</code> and a struct called <code>Data</code>,
|
|
with the type signature <code>Data, i32 -> str</code>,
|
|
and an alias, <code>get_name</code>, that equivalently refers to <code>function_name</code>.</p>
|
|
<p>The search index needs to fit the needs of the <code>rustdoc</code> compiler,
|
|
the <code>search.js</code> frontend,
|
|
and also be compact and fast to decode.
|
|
It makes a lot of compromises:</p>
|
|
<ul>
|
|
<li>The <code>rustdoc</code> compiler runs on one crate at a time,
|
|
so each crate has an essentially separate search index.
|
|
It <a href="https://github.com/rust-lang/rust/blob/79b710c13968a1a48d94431d024d2b1677940866/src/librustdoc/html/render/write_shared.rs#L151-L164">merges</a> them by having each crate on one line
|
|
and looking at the first quoted string.</li>
|
|
<li>Names in the search index are given
|
|
in their original case and with underscores.
|
|
When the search index is loaded,
|
|
<code>search.js</code> stores the original names for display,
|
|
but also folds them to lowercase and strips underscores for search.
|
|
You'll see them called <code>normalized</code>.</li>
|
|
<li>The <code>f</code> array stores types as offsets into the <code>p</code> array.
|
|
These types might actually be from another crate,
|
|
so <code>search.js</code> has to turn the numbers into names and then
|
|
back into numbers to deduplicate them if multiple crates in the
|
|
same index mention the same types.</li>
|
|
<li>It's a JSON file, but not designed to be human-readable.
|
|
Browsers already include an optimized JSON decoder,
|
|
so this saves on <code>search.js</code> code and performs better for small crates,
|
|
but instead of using objects like normal JSON formats do,
|
|
it tries to put data of the same type next to each other
|
|
so that the sliding window used by <a href="https://en.wikipedia.org/wiki/Deflate">DEFLATE</a> can find redundancies.
|
|
Where <code>search.js</code> does its own compression,
|
|
it's designed to save memory when the file is finally loaded,
|
|
not just size on disk or network transfer.</li>
|
|
</ul>
|
|
<h3 id="parallel-arrays-and-indexed-maps"><a class="header" href="#parallel-arrays-and-indexed-maps">Parallel arrays and indexed maps</a></h3>
|
|
<p>Abstractly, Rustdoc Search data is a table, stored in column-major form.
|
|
Most data in the index represents a set of parallel arrays
|
|
(the "columns") which refer to the same data if they're at the same position.</p>
|
|
<p>For example,
|
|
the above search index can be turned into this table:</p>
|
|
<div class="table-wrapper"><table><thead><tr><th></th><th>n</th><th>t</th><th><a href="#how-descriptions-are-stored">d</a></th><th>q</th><th>i</th><th>f</th><th>b</th><th>c</th></tr></thead><tbody>
|
|
<tr><td>0</td><td><code>crate_name</code></td><td><code>D</code></td><td>Documentation</td><td>NULL</td><td>0</td><td>NULL</td><td>NULL</td><td>0</td></tr>
|
|
<tr><td>1</td><td><code>function_name</code></td><td><code>H</code></td><td>This function gets the name of an integer with Data</td><td><code>crate_name</code></td><td>2</td><td><code>{{gb}{d}}</code></td><td>NULL</td><td>0</td></tr>
|
|
<tr><td>2</td><td><code>Data</code></td><td><code>F</code></td><td>The data struct</td><td><code>crate_name</code></td><td>0</td><td><code>`</code></td><td>NULL</td><td>0</td></tr>
|
|
</tbody></table>
|
|
</div>
|
|
<p>The crate row is implied in most columns, since its type is known (it's a crate),
|
|
it can't have a parent (crates form the root of the module tree),
|
|
its name is specified as the map key,
|
|
and function-specific data like the impl disambiguator can't apply either.
|
|
However, it can still have a description and it can still be deprecated.
|
|
The crate, therefore, has a primary key of <code>0</code>.</p>
|
|
<p>The above code doesn't use <code>c</code>, which holds deprecated indices,
|
|
or <code>b</code>, which maps indices to strings.
|
|
If <code>crate_name::function_name</code> used both, it might look like this.</p>
|
|
<pre><code class="language-json"> "b": [[0, "impl-Foo-for-Bar"]],
|
|
"c": "OjAAAAEAAAAAAAIAEAAAABUAbgZYCQ==",
|
|
</code></pre>
|
|
<p>This attaches a disambiguator to index 1 and marks it deprecated.</p>
|
|
<p>The advantage of this layout is that these APIs often have implicit structure
|
|
that DEFLATE can take advantage of,
|
|
but that rustdoc can't assume.
|
|
Like how names are usually CamelCase or snake_case,
|
|
but descriptions aren't.
|
|
It also makes it easier to use a sparse data for things like boolean flags.</p>
|
|
<p><code>q</code> is a Map from <em>the first applicable</em> ID to a parent module path.
|
|
This is a weird trick, but it makes more sense in pseudo-code:</p>
|
|
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>let mut parent_module = "";
|
|
for (i, entry) in search_index.iter().enumerate() {
|
|
if q.contains(i) {
|
|
parent_module = q.get(i);
|
|
}
|
|
// ... do other stuff with `entry` ...
|
|
}
|
|
<span class="boring">}</span></code></pre></pre>
|
|
<p>This is valid because everything has a parent module
|
|
(even if it's just the crate itself),
|
|
and is easy to assemble because the rustdoc generator sorts by path
|
|
before serializing.
|
|
Doing this allows rustdoc to not only make the search index smaller,
|
|
but reuse the same string representing the parent path across multiple in-memory items.</p>
|
|
<h3 id="representing-sparse-columns"><a class="header" href="#representing-sparse-columns">Representing sparse columns</a></h3>
|
|
<h4 id="vlq-hex"><a class="header" href="#vlq-hex">VLQ Hex</a></h4>
|
|
<p>This format is, as far as I know, used nowhere other than rustdoc.
|
|
It follows this grammar:</p>
|
|
<pre><code class="language-ebnf">VLQHex = { VHItem | VHBackref }
|
|
VHItem = VHNumber | ( '{', {VHItem}, '}' )
|
|
VHNumber = { '@' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' }, ( '`' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k ' | 'l' | 'm' | 'n' | 'o' )
|
|
VHBackref = ( '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | ':' | ';' | '<' | '=' | '>' | '?' )
|
|
</code></pre>
|
|
<p>A VHNumber is a variable-length, self-terminating base16 number
|
|
(terminated because the last hexit is lowercase while all others are uppercase).
|
|
The sign bit is represented using <a href="https://en.wikipedia.org/wiki/Variable-length_quantity#Zigzag_encoding">zig-zag encoding</a>.</p>
|
|
<p>This alphabet is chosen because the characters can be turned into hexits by
|
|
masking off the last four bits of the ASCII encoding.</p>
|
|
<p>A major feature of this encoding, as with all of the "compression" done in rustdoc,
|
|
is that it can remain in its compressed format <em>even in memory at runtime</em>.
|
|
This is why <code>HBackref</code> is only used at the top level,
|
|
and why we don't just use <a href="https://en.wikipedia.org/wiki/Deflate">Flate</a> for everything: the decoder in search.js
|
|
will reuse the entire decoded object whenever a backref is seen,
|
|
saving decode work and memory.</p>
|
|
<h4 id="roaring-bitmaps"><a class="header" href="#roaring-bitmaps">Roaring Bitmaps</a></h4>
|
|
<p>Flag-style data, such as deprecation and empty descriptions,
|
|
are stored using the <a href="https://github.com/RoaringBitmap/RoaringFormatSpec">standard Roaring Bitmap serialization format with runs</a>.
|
|
The data is then base64 encoded when writing it.</p>
|
|
<p>As a brief overview: a roaring bitmap is a chunked array of bits,
|
|
described in <a href="https://arxiv.org/pdf/1603.06549.pdf">this paper</a>.
|
|
A chunk can either be a list of integers, a bitfield, or a list of runs.
|
|
In any case, the search engine has to base64 decode it,
|
|
and read the chunk index itself,
|
|
but the payload data stays as-is.</p>
|
|
<p>All roaring bitmaps in rustdoc currently store a flag for each item index.
|
|
The crate is item 0, all others start at 1.</p>
|
|
<h3 id="how-descriptions-are-stored"><a class="header" href="#how-descriptions-are-stored">How descriptions are stored</a></h3>
|
|
<p>The largest amount of data,
|
|
and the main thing Rustdoc Search deals with that isn't
|
|
actually used for searching, is descriptions.
|
|
In a SERP table, this is what appears on the rightmost column.</p>
|
|
<blockquote>
|
|
<div class="table-wrapper"><table><thead><tr><th>item type</th><th>item path</th><th><em><strong>description</strong></em> (this part)</th></tr></thead><tbody>
|
|
<tr><td>function</td><td>my_crate::my_function</td><td>This function gets the name of an integer with Data</td></tr>
|
|
</tbody></table>
|
|
</div></blockquote>
|
|
<p>When someone runs a search in rustdoc for the first time, their browser will
|
|
work through a "sandwich workload" of three steps:</p>
|
|
<ol>
|
|
<li>Download the search-index.js and search.js files (a network bottleneck).</li>
|
|
<li>Perform the actual search (a CPU and memory bandwidth bottleneck).</li>
|
|
<li>Download the description data (another network bottleneck).</li>
|
|
</ol>
|
|
<p>Reducing the amount of data downloaded here will almost always increase latency,
|
|
by delaying the decision of what to download behind other work and/or adding
|
|
data dependencies where something can't be downloaded without first downloading
|
|
something else. In this case, we can't start downloading descriptions until
|
|
after the search is done, because that's what allows it to decide <em>which</em>
|
|
descriptions to download (it needs to sort the results then truncate to 200).</p>
|
|
<p>To do this, two columns are stored in the search index, building on both
|
|
Roaring Bitmaps and on VLQ Hex.</p>
|
|
<ul>
|
|
<li><code>e</code> is an index of <strong>e</strong>mpty descriptions. It's a <a href="#roaring-bitmaps">roaring bitmap</a> of
|
|
each item (the crate itself is item 0, the rest start at 1).</li>
|
|
<li><code>D</code> is a shard list, stored in <a href="#vlq-hex">VLQ hex</a> as flat list of integers.
|
|
Each integer gives you the number of descriptions in the shard.
|
|
As the decoder walks the index, it checks if the description is empty.
|
|
if it's not, then it's in the "current" shard. When all items are
|
|
exhausted, it goes on to the next shard.</li>
|
|
</ul>
|
|
<p>Inside each shard is a newline-delimited list of descriptions,
|
|
wrapped in a JSONP-style function call.</p>
|
|
<h3 id="i-f-and-p"><a class="header" href="#i-f-and-p"><code>i</code>, <code>f</code>, and <code>p</code></a></h3>
|
|
<p><code>i</code> and <code>f</code> both index into <code>p</code>, the array of parent items.</p>
|
|
<p><code>i</code> is just a one-indexed number
|
|
(not zero-indexed because <code>0</code> is used for items that have no parent item).
|
|
It's different from <code>q</code> because <code>q</code> represents the parent <em>module or crate</em>,
|
|
which everything has,
|
|
while <code>i</code>/<code>q</code> are used for <em>type and trait-associated items</em> like methods.</p>
|
|
<p><code>f</code>, the function signatures, use a <a href="#vlq-hex">VLQ hex</a> tree.
|
|
A number is either a one-indexed reference into <code>p</code>,
|
|
a negative number representing a generic,
|
|
or zero for null.</p>
|
|
<p>(the internal object representation also uses negative numbers,
|
|
even after decoding,
|
|
to represent generics).</p>
|
|
<p>For example, <code>{{gb}{d}}</code> is equivalent to the json <code>[[3, 1], [2]]</code>.
|
|
Because of zigzag encoding, <code>`</code> is +0, <code>a</code> is -0 (which is not used),
|
|
<code>b</code> is +1, and <code>c</code> is -1.</p>
|
|
<h2 id="searching-by-name"><a class="header" href="#searching-by-name">Searching by name</a></h2>
|
|
<p>Searching by name works by looping through the search index
|
|
and running these functions on each:</p>
|
|
<ul>
|
|
<li><a href="https://github.com/rust-lang/rust/blob/79b710c13968a1a48d94431d024d2b1677940866/src/librustdoc/html/static/js/search.js#L137"><code>editDistance</code></a> is always used to determine a match
|
|
(unless quotes are specified, which would use simple equality instead).
|
|
It computes the number of swaps, inserts, and removes needed to turn
|
|
the query name into the entry name.
|
|
For example, <code>foo</code> has zero distance from itself,
|
|
but a distance of 1 from <code>ofo</code> (one swap) and <code>foob</code> (one insert).
|
|
It is checked against an heuristic threshold, and then,
|
|
if it is within that threshold, the distance is stored for ranking.</li>
|
|
<li><a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/indexOf"><code>String.prototype.indexOf</code></a> is always used to determine a match.
|
|
If it returns anything other than -1, the result is added,
|
|
even if <code>editDistance</code> exceeds its threshold,
|
|
and the index is stored for ranking.</li>
|
|
<li><a href="https://github.com/rust-lang/rust/blob/79b710c13968a1a48d94431d024d2b1677940866/src/librustdoc/html/static/js/search.js#L1814"><code>checkPath</code></a> is used if, and only if, a parent path is specified
|
|
in the query. For example, <code>vec</code> has no parent path, but <code>vec::vec</code> does.
|
|
Within checkPath, editDistance and indexOf are used,
|
|
and the path query has its own heuristic threshold, too.
|
|
If it's not within the threshold, the entry is rejected,
|
|
even if the first two pass.
|
|
If it's within the threshold, the path distance is stored
|
|
for ranking.</li>
|
|
<li><a href="https://github.com/rust-lang/rust/blob/79b710c13968a1a48d94431d024d2b1677940866/src/librustdoc/html/static/js/search.js#L1787"><code>checkType</code></a> is used only if there's a type filter,
|
|
like the struct in <code>struct:vec</code>. If it fails,
|
|
the entry is rejected.</li>
|
|
</ul>
|
|
<p>If all four criteria pass
|
|
(plus the crate filter, which isn't technically part of the query),
|
|
the results are sorted by <a href="https://github.com/rust-lang/rust/blob/79b710c13968a1a48d94431d024d2b1677940866/src/librustdoc/html/static/js/search.js#L1229"><code>sortResults</code></a>.</p>
|
|
<h2 id="searching-by-type"><a class="header" href="#searching-by-type">Searching by type</a></h2>
|
|
<p>Searching by type can be divided into two phases,
|
|
and the second phase has two sub-phases.</p>
|
|
<ul>
|
|
<li>Turn names in the query into numbers.</li>
|
|
<li>Loop over each entry in the search index:
|
|
<ul>
|
|
<li>Quick rejection using a bloom filter.</li>
|
|
<li>Slow rejection using a recursive type unification algorithm.</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
<p>In the names->numbers phase, if the query has only one name in it,
|
|
the editDistance function is used to find a near match if the exact match fails,
|
|
but if there's multiple items in the query,
|
|
non-matching items are treated as generics instead.
|
|
This means <code>hahsmap</code> will match hashmap on its own, but <code>hahsmap, u32</code>
|
|
is going to match the same things <code>T, u32</code> matches
|
|
(though rustdoc will detect this particular problem and warn about it).</p>
|
|
<p>Then, when actually looping over each item,
|
|
the bloom filter will probably reject entries that don't have every
|
|
type mentioned in the query.
|
|
For example, the bloom query allows a query of <code>i32 -> u32</code> to match
|
|
a function with the type <code>i32, u32 -> bool</code>,
|
|
but unification will reject it later.</p>
|
|
<p>The unification filter ensures that:</p>
|
|
<ul>
|
|
<li>Bag semantics are respected. If you query says <code>i32, i32</code>,
|
|
then the function has to mention <em>two</em> i32s, not just one.</li>
|
|
<li>Nesting semantics are respected. If your query says <code>vec<option></code>,
|
|
then <code>vec<option<i32>></code> is fine, but <code>option<vec<i32>></code> <em>is not</em> a match.</li>
|
|
<li>The division between return type and parameter is respected.
|
|
<code>i32 -> u32</code> and <code>u32 -> i32</code> are completely different.</li>
|
|
</ul>
|
|
<p>The bloom filter checks none of these things,
|
|
and, on top of that, can have false positives.
|
|
But it's fast and uses very little memory, so the bloom filter helps.</p>
|
|
<h2 id="re-exports"><a class="header" href="#re-exports">Re-exports</a></h2>
|
|
<p><a href="https://doc.rust-lang.org/nightly/rustdoc/write-documentation/re-exports.html">Re-export inlining</a> allows the same item to be found by multiple names.
|
|
Search supports this by giving the same item multiple entries and tracking a canonical path
|
|
for any items where that differs from the given path.</p>
|
|
<p>For example, this sample index has a single struct exported from two paths:</p>
|
|
<pre><code class="language-json">[
|
|
[ "crate_name", {
|
|
"doc": "Documentation",
|
|
"n": ["Data", "Data"],
|
|
"t": "FF",
|
|
"d": ["The data struct", "The data struct"],
|
|
"q": [[0, "crate_name"], [1, "crate_name::submodule"]],
|
|
"i": [0, 0],
|
|
"p": [],
|
|
"f": "``",
|
|
"b": [],
|
|
"c": [],
|
|
"a": [],
|
|
"r": [[0, 1]],
|
|
}]
|
|
]
|
|
</code></pre>
|
|
<p>The important part of this example is the <code>r</code> array,
|
|
which indicates that path entry 1 in the <code>q</code> array is
|
|
the canonical path for item 0.
|
|
That is, <code>crate_name::Data</code> has a canonical path of <code>crate_name::submodule::Data</code>.</p>
|
|
<p>This might sound like a strange design, since it has the duplicate data.
|
|
It's done that way because inlining can happen across crates,
|
|
which are compiled separately and might not all be present in the docs.</p>
|
|
<pre><code class="language-json">[
|
|
[ "crate_name", ... ],
|
|
[ "crate_name_2", { "q": [[0, "crate_name::submodule"], [5, "core::option"]], ... }]
|
|
]
|
|
</code></pre>
|
|
<p>In the above example, a canonical path actually comes from a dependency,
|
|
and another one comes from an inlined standard library item:
|
|
the canonical path isn't even in the index!
|
|
The canonical path might also be private.
|
|
In either case, it's never shown to the user, and is only used for deduplication.</p>
|
|
<p>Associated types, like methods, store them differently.
|
|
These types are connected with an entry in <code>p</code> (their "parent")
|
|
and each one has an optional third tuple element:</p>
|
|
<pre><code>"p": [[5, "Data", 0, 1]]
|
|
</code></pre>
|
|
<p>That's:</p>
|
|
<ul>
|
|
<li>5: It's a struct</li>
|
|
<li>"Data": Its name</li>
|
|
<li>0: Its display path, "crate_name"</li>
|
|
<li>1: Its canonical path, "crate_name::submodule"</li>
|
|
</ul>
|
|
<p>In both cases, the canonical path might not be public at all,
|
|
or it might be from another crate that isn't in the docs,
|
|
so it's never shown to the user, but is used for deduplication.</p>
|
|
<h2 id="testing-the-search-engine"><a class="header" href="#testing-the-search-engine">Testing the search engine</a></h2>
|
|
<p>While the generated UI is tested using <code>rustdoc-gui</code> tests, the
|
|
primary way the search engine is tested is the <code>rustdoc-js</code> and
|
|
<code>rustdoc-js-std</code> tests. They run in NodeJS.</p>
|
|
<p>A <code>rustdoc-js</code> test has a <code>.rs</code> and <code>.js</code> file, with the same name.
|
|
The <code>.rs</code> file specifies the hypothetical library crate to run
|
|
the searches on (make sure you mark anything you need to find as <code>pub</code>).
|
|
The <code>.js</code> file specifies the actual searches.
|
|
The <code>rustdoc-js-std</code> tests are the same, but don't require an <code>.rs</code>
|
|
file, since they use the standard library.</p>
|
|
<p>The <code>.js</code> file is like a module (except the loader takes care of
|
|
<code>exports</code> for you). It uses these variables:</p>
|
|
<div class="table-wrapper"><table><thead><tr><th>Name</th><th>Type</th><th>Description</th></tr></thead><tbody>
|
|
<tr><td><code>FILTER_CRATE</code></td><td><code>string</code></td><td>Only include results from the given crate. In the GUI, this is the "Results in <kbd>crate</kbd>" drop-down menu.</td></tr>
|
|
<tr><td><code>EXPECTED</code></td><td><code>[ResultsTable]|ResultsTable</code></td><td>List of tests to run, specifying what the hypothetical user types into the search box and sees in the tabs</td></tr>
|
|
<tr><td><code>PARSED</code></td><td><code>[ParsedQuery]|ParsedQuery</code></td><td>List of parser tests to run, without running an actual search</td></tr>
|
|
</tbody></table>
|
|
</div>
|
|
<p><code>FILTER_CRATE</code> can be left out (equivalent to searching "all crates"), but you
|
|
have to specify <code>EXPECTED</code> or <code>PARSED</code>.</p>
|
|
<p>By default, the test fails if any of the results specified in the test case are
|
|
not found after running the search, or if the results found after running the
|
|
search don't appear in the same order that they do in the test.
|
|
The actual search results may, however, include results that aren't in the test.
|
|
To override this, specify any of the following magic comments.
|
|
Put them on their own line, without indenting.</p>
|
|
<ul>
|
|
<li><code>// exact-check</code>: If search results appear that aren't part of the test case,
|
|
then fail.</li>
|
|
<li><code>// ignore-order</code>: Allow search results to appear in any order.</li>
|
|
<li><code>// should-fail</code>: Used to write negative tests.</li>
|
|
</ul>
|
|
<p>Standard library tests usually shouldn't specify <code>// exact-check</code>, since we
|
|
want the libs team to be able to add new items without causing unrelated
|
|
tests to fail, but standalone tests will use it more often.</p>
|
|
<p>The <code>ResultsTable</code> and <code>ParsedQuery</code> types are specified in
|
|
<a href="https://github.com/rust-lang/rust/blob/master/src/librustdoc/html/static/js/rustdoc.d.ts"><code>rustdoc.d.ts</code></a>.</p>
|
|
<p>For example, imagine we needed to fix a bug where a function named
|
|
<code>constructor</code> couldn't be found. To do this, write two files:</p>
|
|
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>// tests/rustdoc-js/constructor_search.rs
|
|
// The test case needs to find this result.
|
|
pub fn constructor(_input: &str) -> i32 { 1 }
|
|
<span class="boring">}</span></code></pre></pre>
|
|
<pre><code class="language-js">// tests/rustdoc-js/constructor_search.js
|
|
// exact-check
|
|
// Since this test runs against its own crate,
|
|
// new items should not appear in the search results.
|
|
const EXPECTED = [
|
|
// This first test targets name-based search.
|
|
{
|
|
query: "constructor",
|
|
others: [
|
|
{ path: "constructor_search", name: "constructor" },
|
|
],
|
|
in_args: [],
|
|
returned: [],
|
|
},
|
|
// This test targets the second tab.
|
|
{
|
|
query: "str",
|
|
others: [],
|
|
in_args: [
|
|
{ path: "constructor_search", name: "constructor" },
|
|
],
|
|
returned: [],
|
|
},
|
|
// This test targets the third tab.
|
|
{
|
|
query: "i32",
|
|
others: [],
|
|
in_args: [],
|
|
returned: [
|
|
{ path: "constructor_search", name: "constructor" },
|
|
],
|
|
},
|
|
// This test targets advanced type-driven search.
|
|
{
|
|
query: "str -> i32",
|
|
others: [
|
|
{ path: "constructor_search", name: "constructor" },
|
|
],
|
|
in_args: [],
|
|
returned: [],
|
|
},
|
|
]
|
|
</code></pre>
|
|
|
|
</main>
|
|
|
|
<nav class="nav-wrapper" aria-label="Page navigation">
|
|
<!-- Mobile navigation buttons -->
|
|
<a rel="prev" href="../rustdoc-internals.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
|
<i class="fa fa-angle-left"></i>
|
|
</a>
|
|
|
|
<a rel="next prefetch" href="../rustdoc-internals/rustdoc-test-suite.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
|
<i class="fa fa-angle-right"></i>
|
|
</a>
|
|
|
|
<div style="clear: both"></div>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
|
|
<nav class="nav-wide-wrapper" aria-label="Page navigation">
|
|
<a rel="prev" href="../rustdoc-internals.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
|
<i class="fa fa-angle-left"></i>
|
|
</a>
|
|
|
|
<a rel="next prefetch" href="../rustdoc-internals/rustdoc-test-suite.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
|
<i class="fa fa-angle-right"></i>
|
|
</a>
|
|
</nav>
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<script>
|
|
window.playground_copyable = true;
|
|
</script>
|
|
|
|
|
|
<script src="../elasticlunr.min.js"></script>
|
|
<script src="../mark.min.js"></script>
|
|
<script src="../searcher.js"></script>
|
|
|
|
<script src="../clipboard.min.js"></script>
|
|
<script src="../highlight.js"></script>
|
|
<script src="../book.js"></script>
|
|
|
|
<!-- Custom JS scripts -->
|
|
<script src="../mermaid.min.js"></script>
|
|
<script src="../mermaid-init.js"></script>
|
|
|
|
|
|
</div>
|
|
</body>
|
|
</html>
|