616 lines
41 KiB
HTML
616 lines
41 KiB
HTML
<!DOCTYPE HTML>
|
|
<html lang="en" class="light sidebar-visible" dir="ltr">
|
|
<head>
|
|
<!-- Book generated using mdBook -->
|
|
<meta charset="UTF-8">
|
|
<title>Overview of the compiler - Rust Compiler Development Guide</title>
|
|
|
|
|
|
<!-- Custom HTML head -->
|
|
|
|
<meta name="description" content="A guide to developing the Rust compiler (rustc)">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
<meta name="theme-color" content="#ffffff">
|
|
|
|
<link rel="icon" href="favicon.svg">
|
|
<link rel="shortcut icon" href="favicon.png">
|
|
<link rel="stylesheet" href="css/variables.css">
|
|
<link rel="stylesheet" href="css/general.css">
|
|
<link rel="stylesheet" href="css/chrome.css">
|
|
<link rel="stylesheet" href="css/print.css" media="print">
|
|
|
|
<!-- Fonts -->
|
|
<link rel="stylesheet" href="FontAwesome/css/font-awesome.css">
|
|
<link rel="stylesheet" href="fonts/fonts.css">
|
|
|
|
<!-- Highlight.js Stylesheets -->
|
|
<link rel="stylesheet" id="highlight-css" href="highlight.css">
|
|
<link rel="stylesheet" id="tomorrow-night-css" href="tomorrow-night.css">
|
|
<link rel="stylesheet" id="ayu-highlight-css" href="ayu-highlight.css">
|
|
|
|
<!-- Custom theme stylesheets -->
|
|
|
|
|
|
<!-- Provide site root and default themes to javascript -->
|
|
<script>
|
|
const path_to_root = "";
|
|
const default_light_theme = "light";
|
|
const default_dark_theme = "navy";
|
|
</script>
|
|
<!-- Start loading toc.js asap -->
|
|
<script src="toc.js"></script>
|
|
</head>
|
|
<body>
|
|
<div id="body-container">
|
|
<!-- Work around some values being stored in localStorage wrapped in quotes -->
|
|
<script>
|
|
try {
|
|
let theme = localStorage.getItem('mdbook-theme');
|
|
let sidebar = localStorage.getItem('mdbook-sidebar');
|
|
|
|
if (theme.startsWith('"') && theme.endsWith('"')) {
|
|
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
|
|
}
|
|
|
|
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
|
|
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
|
|
}
|
|
} catch (e) { }
|
|
</script>
|
|
|
|
<!-- Set the theme before any content is loaded, prevents flash -->
|
|
<script>
|
|
const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme;
|
|
let theme;
|
|
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
|
|
if (theme === null || theme === undefined) { theme = default_theme; }
|
|
const html = document.documentElement;
|
|
html.classList.remove('light')
|
|
html.classList.add(theme);
|
|
html.classList.add("js");
|
|
</script>
|
|
|
|
<input type="checkbox" id="sidebar-toggle-anchor" class="hidden">
|
|
|
|
<!-- Hide / unhide sidebar before it is displayed -->
|
|
<script>
|
|
let sidebar = null;
|
|
const sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
|
|
if (document.body.clientWidth >= 1080) {
|
|
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
|
|
sidebar = sidebar || 'visible';
|
|
} else {
|
|
sidebar = 'hidden';
|
|
}
|
|
sidebar_toggle.checked = sidebar === 'visible';
|
|
html.classList.remove('sidebar-visible');
|
|
html.classList.add("sidebar-" + sidebar);
|
|
</script>
|
|
|
|
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
|
|
<!-- populated by js -->
|
|
<mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox>
|
|
<noscript>
|
|
<iframe class="sidebar-iframe-outer" src="toc.html"></iframe>
|
|
</noscript>
|
|
<div id="sidebar-resize-handle" class="sidebar-resize-handle">
|
|
<div class="sidebar-resize-indicator"></div>
|
|
</div>
|
|
</nav>
|
|
|
|
<div id="page-wrapper" class="page-wrapper">
|
|
|
|
<div class="page">
|
|
<div id="menu-bar-hover-placeholder"></div>
|
|
<div id="menu-bar" class="menu-bar sticky">
|
|
<div class="left-buttons">
|
|
<label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
|
|
<i class="fa fa-bars"></i>
|
|
</label>
|
|
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
|
|
<i class="fa fa-paint-brush"></i>
|
|
</button>
|
|
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
|
|
<li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
|
|
</ul>
|
|
<button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar">
|
|
<i class="fa fa-search"></i>
|
|
</button>
|
|
</div>
|
|
|
|
<h1 class="menu-title">Rust Compiler Development Guide</h1>
|
|
|
|
<div class="right-buttons">
|
|
<a href="print.html" title="Print this book" aria-label="Print this book">
|
|
<i id="print-button" class="fa fa-print"></i>
|
|
</a>
|
|
<a href="https://github.com/rust-lang/rustc-dev-guide" title="Git repository" aria-label="Git repository">
|
|
<i id="git-repository-button" class="fa fa-github"></i>
|
|
</a>
|
|
<a href="https://github.com/rust-lang/rustc-dev-guide/edit/master/src/overview.md" title="Suggest an edit" aria-label="Suggest an edit">
|
|
<i id="git-edit-button" class="fa fa-edit"></i>
|
|
</a>
|
|
|
|
</div>
|
|
</div>
|
|
|
|
<div id="search-wrapper" class="hidden">
|
|
<form id="searchbar-outer" class="searchbar-outer">
|
|
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
|
|
</form>
|
|
<div id="searchresults-outer" class="searchresults-outer hidden">
|
|
<div id="searchresults-header" class="searchresults-header"></div>
|
|
<ul id="searchresults">
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
|
|
<script>
|
|
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
|
|
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
|
|
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
|
|
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
|
|
});
|
|
</script>
|
|
|
|
<div id="content" class="content">
|
|
<main>
|
|
<h1 id="overview-of-the-compiler"><a class="header" href="#overview-of-the-compiler">Overview of the compiler</a></h1>
|
|
<ul>
|
|
<li><a href="#what-the-compiler-does-to-your-code">What the compiler does to your code</a>
|
|
<ul>
|
|
<li><a href="#invocation">Invocation</a></li>
|
|
<li><a href="#lexing-and-parsing">Lexing and parsing</a></li>
|
|
<li><a href="#ast-lowering"><code>AST</code> lowering</a></li>
|
|
<li><a href="#mir-lowering"><code>MIR</code> lowering</a></li>
|
|
<li><a href="#code-generation">Code generation</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#how-it-does-it">How it does it</a>
|
|
<ul>
|
|
<li><a href="#intermediate-representations">Intermediate representations</a></li>
|
|
<li><a href="#queries">Queries</a></li>
|
|
<li><a href="#tyty"><code>ty::Ty</code></a></li>
|
|
<li><a href="#parallelism">Parallelism</a></li>
|
|
<li><a href="#bootstrapping">Bootstrapping</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#references">References</a></li>
|
|
</ul>
|
|
<p>This chapter is about the overall process of compiling a program -- how
|
|
everything fits together.</p>
|
|
<p>The Rust compiler is special in two ways: it does things to your code that
|
|
other compilers don't do (e.g. borrow-checking) and it has a lot of
|
|
unconventional implementation choices (e.g. queries). We will talk about these
|
|
in turn in this chapter, and in the rest of the guide, we will look at the
|
|
individual pieces in more detail.</p>
|
|
<h2 id="what-the-compiler-does-to-your-code"><a class="header" href="#what-the-compiler-does-to-your-code">What the compiler does to your code</a></h2>
|
|
<p>So first, let's look at what the compiler does to your code. For now, we will
|
|
avoid mentioning how the compiler implements these steps except as needed.</p>
|
|
<h3 id="invocation"><a class="header" href="#invocation">Invocation</a></h3>
|
|
<p>Compilation begins when a user writes a Rust source program in text and invokes
|
|
the <code>rustc</code> compiler on it. The work that the compiler needs to perform is
|
|
defined by command-line options. For example, it is possible to enable nightly
|
|
features (<code>-Z</code> flags), perform <code>check</code>-only builds, or emit the LLVM
|
|
Intermediate Representation (<code>LLVM-IR</code>) rather than executable machine code.
|
|
The <code>rustc</code> executable call may be indirect through the use of <code>cargo</code>.</p>
|
|
<p>Command line argument parsing occurs in the <a href="rustc-driver/intro.html"><code>rustc_driver</code></a>. This crate
|
|
defines the compile configuration that is requested by the user and passes it
|
|
to the rest of the compilation process as a <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_interface/interface/struct.Config.html"><code>rustc_interface::Config</code></a>.</p>
|
|
<h3 id="lexing-and-parsing"><a class="header" href="#lexing-and-parsing">Lexing and parsing</a></h3>
|
|
<p>The raw Rust source text is analyzed by a low-level <em>lexer</em> located in
|
|
<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lexer/index.html"><code>rustc_lexer</code></a>. At this stage, the source text is turned into a stream of
|
|
atomic source code units known as <em>tokens</em>. The <code>lexer</code> supports the
|
|
Unicode character encoding.</p>
|
|
<p>The token stream passes through a higher-level lexer located in
|
|
<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/index.html"><code>rustc_parse</code></a> to prepare for the next stage of the compile process. The
|
|
<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/lexer/struct.Lexer.html"><code>Lexer</code></a> <code>struct</code> is used at this stage to perform a set of validations
|
|
and turn strings into interned symbols (<em>interning</em> is discussed later).
|
|
<a href="https://en.wikipedia.org/wiki/String_interning">String interning</a> is a way of storing only one immutable
|
|
copy of each distinct string value.</p>
|
|
<p>The lexer has a small interface and doesn't depend directly on the diagnostic
|
|
infrastructure in <code>rustc</code>. Instead it provides diagnostics as plain data which
|
|
are emitted in <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/lexer/index.html"><code>rustc_parse::lexer</code></a> as real diagnostics. The <code>lexer</code>
|
|
preserves full fidelity information for both IDEs and procedural macros
|
|
(sometimes referred to as "proc-macros").</p>
|
|
<p>The <em>parser</em> <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/index.html">translates the token stream from the <code>lexer</code> into an Abstract Syntax
|
|
Tree (AST)</a>. It uses a recursive descent (top-down) approach to syntax
|
|
analysis. The crate entry points for the <code>parser</code> are the
|
|
<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/parser/struct.Parser.html#method.parse_crate_mod"><code>Parser::parse_crate_mod()</code></a> and <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/parser/struct.Parser.html#method.parse_mod"><code>Parser::parse_mod()</code></a>
|
|
methods found in <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/parser/struct.Parser.html"><code>rustc_parse::parser::Parser</code></a>. The external module parsing
|
|
entry point is <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/module/fn.parse_external_mod.html"><code>rustc_expand::module::parse_external_mod</code></a>.
|
|
And the macro-<code>parser</code> entry point is <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/parser/struct.Parser.html#method.parse_nonterminal"><code>Parser::parse_nonterminal()</code></a>.</p>
|
|
<p>Parsing is performed with a set of <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/parser/struct.Parser.html"><code>parser</code></a> utility methods including <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/parser/struct.Parser.html#method.bump"><code>bump</code></a>,
|
|
<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/parser/struct.Parser.html#method.check"><code>check</code></a>, <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/parser/struct.Parser.html#method.eat"><code>eat</code></a>, <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/parser/struct.Parser.html#method.expect"><code>expect</code></a>, <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/parser/struct.Parser.html#method.look_ahead"><code>look_ahead</code></a>.</p>
|
|
<p>Parsing is organized by semantic construct. Separate
|
|
<code>parse_*</code> methods can be found in the <a href="https://github.com/rust-lang/rust/tree/master/compiler/rustc_parse/src/parser"><code>rustc_parse</code></a>
|
|
directory. The source file name follows the construct name. For example, the
|
|
following files are found in the <code>parser</code>:</p>
|
|
<ul>
|
|
<li><a href="https://github.com/rust-lang/rust/blob/master/compiler/rustc_parse/src/parser/expr.rs"><code>expr.rs</code></a></li>
|
|
<li><a href="https://github.com/rust-lang/rust/blob/master/compiler/rustc_parse/src/parser/pat.rs"><code>pat.rs</code></a></li>
|
|
<li><a href="https://github.com/rust-lang/rust/blob/master/compiler/rustc_parse/src/parser/ty.rs"><code>ty.rs</code></a></li>
|
|
<li><a href="https://github.com/rust-lang/rust/blob/master/compiler/rustc_parse/src/parser/stmt.rs"><code>stmt.rs</code></a></li>
|
|
</ul>
|
|
<p>This naming scheme is used across many compiler stages. You will find either a
|
|
file or directory with the same name across the parsing, lowering, type
|
|
checking, <a href="./thir.html">Typed High-level Intermediate Representation (<code>THIR</code>)</a> lowering, and
|
|
<a href="mir/index.html">Mid-level Intermediate Representation (<code>MIR</code>)</a> building sources.</p>
|
|
<p>Macro-expansion, <code>AST</code>-validation, name-resolution, and early linting also take
|
|
place during the lexing and parsing stage.</p>
|
|
<p>The <a href="https://doc.rust-lang.org/beta/nightly-rustc/rustc_ast/index.html"><code>rustc_ast::ast</code></a>::{<a href="https://doc.rust-lang.org/beta/nightly-rustc/rustc_ast/ast/struct.Crate.html"><code>Crate</code></a>, <a href="https://doc.rust-lang.org/beta/nightly-rustc/rustc_ast/ast/struct.Expr.html"><code>Expr</code></a>, <a href="https://doc.rust-lang.org/beta/nightly-rustc/rustc_ast/ast/struct.Pat.html"><code>Pat</code></a>, ...} <code>AST</code> nodes are
|
|
returned from the parser while the standard <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_errors/struct.Diag.html"><code>Diag</code></a> API is used
|
|
for error handling. Generally Rust's compiler will try to recover from errors
|
|
by parsing a superset of Rust's grammar, while also emitting an error type.</p>
|
|
<h3 id="ast-lowering"><a class="header" href="#ast-lowering"><code>AST</code> lowering</a></h3>
|
|
<p>Next the <code>AST</code> is converted into <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/index.html">High-Level Intermediate Representation
|
|
(<code>HIR</code>)</a>, a more compiler-friendly representation of the <code>AST</code>. This process
|
|
is called "lowering" and involves a lot of desugaring (the expansion and
|
|
formalizing of shortened or abbreviated syntax constructs) of things like loops
|
|
and <code>async fn</code>.</p>
|
|
<p>We then use the <code>HIR</code> to do <a href="type-inference.html"><em>type inference</em></a> (the process of automatic
|
|
detection of the type of an expression), <a href="traits/resolution.html"><em>trait solving</em></a> (the process of
|
|
pairing up an impl with each reference to a <code>trait</code>), and <a href="type-checking.html"><em>type checking</em></a>. Type
|
|
checking is the process of converting the types found in the <code>HIR</code> (<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/hir/struct.Ty.html"><code>hir::Ty</code></a>),
|
|
which represent what the user wrote, into the internal representation used by
|
|
the compiler (<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.Ty.html"><code>Ty<'tcx></code></a>). It's called type checking because the information
|
|
is used to verify the type safety, correctness and coherence of the types used
|
|
in the program.</p>
|
|
<h3 id="mir-lowering"><a class="header" href="#mir-lowering"><code>MIR</code> lowering</a></h3>
|
|
<p>The <code>HIR</code> is further lowered to <code>MIR</code>
|
|
(used for <a href="borrow_check.html">borrow checking</a>) by constructing the <code>THIR</code> (an even more desugared <code>HIR</code> used for
|
|
pattern and exhaustiveness checking) to convert into <code>MIR</code>.</p>
|
|
<p>We do <a href="mir/optimizations.html">many optimizations on the MIR</a> because it is generic and that
|
|
improves later code generation and compilation speed. It is easier to do some
|
|
optimizations at <code>MIR</code> level than at <code>LLVM-IR</code> level. For example LLVM doesn't seem
|
|
to be able to optimize the pattern the <a href="https://github.com/rust-lang/rust/pull/66282"><code>simplify_try</code></a> <code>MIR</code>-opt looks for.</p>
|
|
<p>Rust code is also <a href="https://en.wikipedia.org/wiki/Monomorphization"><em>monomorphized</em></a> during code generation, which means making
|
|
copies of all the generic code with the type parameters replaced by concrete
|
|
types. To do this, we need to collect a list of what concrete types to generate
|
|
code for. This is called <em>monomorphization collection</em> and it happens at the
|
|
<code>MIR</code> level.</p>
|
|
<h3 id="code-generation"><a class="header" href="#code-generation">Code generation</a></h3>
|
|
<p>We then begin what is simply called <em>code generation</em> or <em>codegen</em>. The <a href="backend/codegen.html">code
|
|
generation stage</a> is when higher-level representations of source are
|
|
turned into an executable binary. Since <code>rustc</code> uses LLVM for code generation,
|
|
the first step is to convert the <code>MIR</code> to <code>LLVM-IR</code>. This is where the <code>MIR</code> is
|
|
actually monomorphized. The <code>LLVM-IR</code> is passed to LLVM, which does a lot more
|
|
optimizations on it, emitting machine code which is basically assembly code
|
|
with additional low-level types and annotations added (e.g. an ELF object or
|
|
<code>WASM</code>). The different libraries/binaries are then linked together to produce
|
|
the final binary.</p>
|
|
<h2 id="how-it-does-it"><a class="header" href="#how-it-does-it">How it does it</a></h2>
|
|
<p>Now that we have a high-level view of what the compiler does to your code,
|
|
let's take a high-level view of <em>how</em> it does all that stuff. There are a lot
|
|
of constraints and conflicting goals that the compiler needs to
|
|
satisfy/optimize for. For example,</p>
|
|
<ul>
|
|
<li>Compilation speed: how fast is it to compile a program? More/better
|
|
compile-time analyses often means compilation is slower.
|
|
<ul>
|
|
<li>Also, we want to support incremental compilation, so we need to take that
|
|
into account. How can we keep track of what work needs to be redone and
|
|
what can be reused if the user modifies their program?
|
|
<ul>
|
|
<li>Also we can't store too much stuff in the incremental cache because
|
|
it would take a long time to load from disk and it could take a lot
|
|
of space on the user's system...</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>Compiler memory usage: while compiling a program, we don't want to use more
|
|
memory than we need.</li>
|
|
<li>Program speed: how fast is your compiled program? More/better compile-time
|
|
analyses often means the compiler can do better optimizations.</li>
|
|
<li>Program size: how large is the compiled binary? Similar to the previous
|
|
point.</li>
|
|
<li>Compiler compilation speed: how long does it take to compile the compiler?
|
|
This impacts contributors and compiler maintenance.</li>
|
|
<li>Implementation complexity: building a compiler is one of the hardest
|
|
things a person/group can do, and Rust is not a very simple language, so how
|
|
do we make the compiler's code base manageable?</li>
|
|
<li>Compiler correctness: the binaries produced by the compiler should do what
|
|
the input programs says they do, and should continue to do so despite the
|
|
tremendous amount of change constantly going on.</li>
|
|
<li>Integration: a number of other tools need to use the compiler in
|
|
various ways (e.g. <code>cargo</code>, <code>clippy</code>, <code>MIRI</code>) that must be supported.</li>
|
|
<li>Compiler stability: the compiler should not crash or fail ungracefully on the
|
|
stable channel.</li>
|
|
<li>Rust stability: the compiler must respect Rust's stability guarantees by not
|
|
breaking programs that previously compiled despite the many changes that are
|
|
always going on to its implementation.</li>
|
|
<li>Limitations of other tools: <code>rustc</code> uses LLVM in its backend, and LLVM has some
|
|
strengths we leverage and some aspects we need to work around.</li>
|
|
</ul>
|
|
<p>So, as you continue your journey through the rest of the guide, keep these
|
|
things in mind. They will often inform decisions that we make.</p>
|
|
<h3 id="intermediate-representations"><a class="header" href="#intermediate-representations">Intermediate representations</a></h3>
|
|
<p>As with most compilers, <code>rustc</code> uses some intermediate representations (IRs) to
|
|
facilitate computations. In general, working directly with the source code is
|
|
extremely inconvenient and error-prone. Source code is designed to be human-friendly while at
|
|
the same time being unambiguous, but it's less convenient for doing something
|
|
like, say, type checking.</p>
|
|
<p>Instead most compilers, including <code>rustc</code>, build some sort of IR out of the
|
|
source code which is easier to analyze. <code>rustc</code> has a few IRs, each optimized
|
|
for different purposes:</p>
|
|
<ul>
|
|
<li>Token stream: the lexer produces a stream of tokens directly from the source
|
|
code. This stream of tokens is easier for the parser to deal with than raw
|
|
text.</li>
|
|
<li>Abstract Syntax Tree (<code>AST</code>): the abstract syntax tree is built from the stream
|
|
of tokens produced by the lexer. It represents
|
|
pretty much exactly what the user wrote. It helps to do some syntactic sanity
|
|
checking (e.g. checking that a type is expected where the user wrote one).</li>
|
|
<li>High-level IR (HIR): This is a sort of desugared <code>AST</code>. It's still close
|
|
to what the user wrote syntactically, but it includes some implicit things
|
|
such as some elided lifetimes, etc. This IR is amenable to type checking.</li>
|
|
<li>Typed <code>HIR</code> (THIR) <em>formerly High-level Abstract IR (HAIR)</em>: This is an
|
|
intermediate between <code>HIR</code> and MIR. It is like the <code>HIR</code> but it is fully typed
|
|
and a bit more desugared (e.g. method calls and implicit dereferences are
|
|
made fully explicit). As a result, it is easier to lower to <code>MIR</code> from <code>THIR</code> than
|
|
from HIR.</li>
|
|
<li>Middle-level IR (<code>MIR</code>): This IR is basically a Control-Flow Graph (CFG). A CFG
|
|
is a type of diagram that shows the basic blocks of a program and how control
|
|
flow can go between them. Likewise, <code>MIR</code> also has a bunch of basic blocks with
|
|
simple typed statements inside them (e.g. assignment, simple computations,
|
|
etc) and control flow edges to other basic blocks (e.g., calls, dropping
|
|
values). <code>MIR</code> is used for borrow checking and other
|
|
important dataflow-based checks, such as checking for uninitialized values.
|
|
It is also used for a series of optimizations and for constant evaluation (via
|
|
<code>MIRI</code>). Because <code>MIR</code> is still generic, we can do a lot of analyses here more
|
|
efficiently than after monomorphization.</li>
|
|
<li><code>LLVM-IR</code>: This is the standard form of all input to the LLVM compiler. <code>LLVM-IR</code>
|
|
is a sort of typed assembly language with lots of annotations. It's
|
|
a standard format that is used by all compilers that use LLVM (e.g. the clang
|
|
C compiler also outputs <code>LLVM-IR</code>). <code>LLVM-IR</code> is designed to be easy for other
|
|
compilers to emit and also rich enough for LLVM to run a bunch of
|
|
optimizations on it.</li>
|
|
</ul>
|
|
<p>One other thing to note is that many values in the compiler are <em>interned</em>.
|
|
This is a performance and memory optimization in which we allocate the values in
|
|
a special allocator called an
|
|
<em><a href="https://en.wikipedia.org/wiki/Region-based_memory_management">arena</a></em>. Then, we pass
|
|
around references to the values allocated in the arena. This allows us to make
|
|
sure that identical values (e.g. types in your program) are only allocated once
|
|
and can be compared cheaply by comparing pointers. Many of the intermediate
|
|
representations are interned.</p>
|
|
<h3 id="queries"><a class="header" href="#queries">Queries</a></h3>
|
|
<p>The first big implementation choice is Rust's use of the <em>query</em> system in its
|
|
compiler. The Rust compiler <em>is not</em> organized as a series of passes over the
|
|
code which execute sequentially. The Rust compiler does this to make
|
|
incremental compilation possible -- that is, if the user makes a change to
|
|
their program and recompiles, we want to do as little redundant work as
|
|
possible to output the new binary.</p>
|
|
<p>In <code>rustc</code>, all the major steps above are organized as a bunch of queries that
|
|
call each other. For example, there is a query to ask for the type of something
|
|
and another to ask for the optimized <code>MIR</code> of a function. These queries can call
|
|
each other and are all tracked through the query system. The results of the
|
|
queries are cached on disk so that the compiler can tell which queries' results
|
|
changed from the last compilation and only redo those. This is how incremental
|
|
compilation works.</p>
|
|
<p>In principle, for the query-fied steps, we do each of the above for each item
|
|
individually. For example, we will take the <code>HIR</code> for a function and use queries
|
|
to ask for the <code>LLVM-IR</code> for that HIR. This drives the generation of optimized
|
|
<code>MIR</code>, which drives the borrow checker, which drives the generation of <code>MIR</code>, and
|
|
so on.</p>
|
|
<p>... except that this is very over-simplified. In fact, some queries are not
|
|
cached on disk, and some parts of the compiler have to run for all code anyway
|
|
for correctness even if the code is dead code (e.g. the borrow checker). For
|
|
example, <a href="https://github.com/rust-lang/rust/blob/e69c7306e2be08939d95f14229e3f96566fb206c/compiler/rustc_interface/src/passes.rs#L791">currently the <code>mir_borrowck</code> query is first executed on all functions
|
|
of a crate.</a> Then the codegen backend invokes the
|
|
<code>collect_and_partition_mono_items</code> query, which first recursively requests the
|
|
<code>optimized_mir</code> for all reachable functions, which in turn runs <code>mir_borrowck</code>
|
|
for that function and then creates codegen units. This kind of split will need
|
|
to remain to ensure that unreachable functions still have their errors emitted.</p>
|
|
<p>Moreover, the compiler wasn't originally built to use a query system; the query
|
|
system has been retrofitted into the compiler, so parts of it are not query-fied
|
|
yet. Also, LLVM isn't our code, so that isn't querified either. The plan is to
|
|
eventually query-fy all of the steps listed in the previous section,
|
|
but as of <!-- date-check --> November 2022, only the steps between <code>HIR</code> and
|
|
<code>LLVM-IR</code> are query-fied. That is, lexing, parsing, name resolution, and macro
|
|
expansion are done all at once for the whole program.</p>
|
|
<p>One other thing to mention here is the all-important "typing context",
|
|
<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.TyCtxt.html"><code>TyCtxt</code></a>, which is a giant struct that is at the center of all things.
|
|
(Note that the name is mostly historic. This is <em>not</em> a "typing context" in the
|
|
sense of <code>Γ</code> or <code>Δ</code> from type theory. The name is retained because that's what
|
|
the name of the struct is in the source code.) All
|
|
queries are defined as methods on the <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.TyCtxt.html"><code>TyCtxt</code></a> type, and the in-memory query
|
|
cache is stored there too. In the code, there is usually a variable called
|
|
<code>tcx</code> which is a handle on the typing context. You will also see lifetimes with
|
|
the name <code>'tcx</code>, which means that something is tied to the lifetime of the
|
|
<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.TyCtxt.html"><code>TyCtxt</code></a> (usually it is stored or interned there).</p>
|
|
<h3 id="tyty"><a class="header" href="#tyty"><code>ty::Ty</code></a></h3>
|
|
<p>Types are really important in Rust, and they form the core of a lot of compiler
|
|
analyses. The main type (in the compiler) that represents types (in the user's
|
|
program) is <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.Ty.html"><code>rustc_middle::ty::Ty</code></a>. This is so important that we have a whole chapter
|
|
on <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.Ty.html"><code>ty::Ty</code></a>, but for now, we just want to mention that it exists and is the way
|
|
<code>rustc</code> represents types!</p>
|
|
<p>Also note that the <a href="https://doc.rust-lang.org/beta/nightly-rustc/rustc_middle/ty/index.html"><code>rustc_middle::ty</code></a> module defines the <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.TyCtxt.html"><code>TyCtxt</code></a> struct we mentioned before.</p>
|
|
<h3 id="parallelism"><a class="header" href="#parallelism">Parallelism</a></h3>
|
|
<p>Compiler performance is a problem that we would like to improve on
|
|
(and are always working on). One aspect of that is parallelizing
|
|
<code>rustc</code> itself.</p>
|
|
<p>Currently, there is only one part of rustc that is parallel by default:
|
|
<a href="./parallel-rustc.html#Codegen">code generation</a>.</p>
|
|
<p>However, the rest of the compiler is still not yet parallel. There have been
|
|
lots of efforts spent on this, but it is generally a hard problem. The current
|
|
approach is to turn <a href="https://doc.rust-lang.org/std/cell/struct.RefCell.html"><code>RefCell</code></a>s into <a href="https://doc.rust-lang.org/std/sync/struct.Mutex.html"><code>Mutex</code></a>s -- that is, we
|
|
switch to thread-safe internal mutability. However, there are ongoing
|
|
challenges with lock contention, maintaining query-system invariants under
|
|
concurrency, and the complexity of the code base. One can try out the current
|
|
work by enabling parallel compilation in <code>bootstrap.toml</code>. It's still early days,
|
|
but there are already some promising performance improvements.</p>
|
|
<h3 id="bootstrapping"><a class="header" href="#bootstrapping">Bootstrapping</a></h3>
|
|
<p><code>rustc</code> itself is written in Rust. So how do we compile the compiler? We use an
|
|
older compiler to compile the newer compiler. This is called <a href="https://en.wikipedia.org/wiki/Bootstrapping_(compilers)"><em>bootstrapping</em></a>.</p>
|
|
<p>Bootstrapping has a lot of interesting implications. For example, it means
|
|
that one of the major users of Rust is the Rust compiler, so we are
|
|
constantly testing our own software ("eating our own dogfood").</p>
|
|
<p>For more details on bootstrapping, see
|
|
<a href="building/bootstrapping/intro.html">the bootstrapping section of the guide</a>.</p>
|
|
<!--
|
|
# Unresolved Questions
|
|
|
|
- Does LLVM ever do optimizations in debug builds?
|
|
- How do I explore phases of the compile process in my own sources (lexer,
|
|
parser, HIR, etc)? - e.g., `cargo rustc -- -Z unpretty=hir-tree` allows you to
|
|
view `HIR` representation
|
|
- What is the main source entry point for `X`?
|
|
- Where do phases diverge for cross-compilation to machine code across
|
|
different platforms?
|
|
-->
|
|
<h1 id="references"><a class="header" href="#references">References</a></h1>
|
|
<ul>
|
|
<li>Command line parsing
|
|
<ul>
|
|
<li>Guide: <a href="rustc-driver/intro.html">The Rustc Driver and Interface</a></li>
|
|
<li>Driver definition: <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_driver/"><code>rustc_driver</code></a></li>
|
|
<li>Main entry point: <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_session/config/fn.build_session_options.html"><code>rustc_session::config::build_session_options</code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li>Lexical Analysis: Lex the user program to a stream of tokens
|
|
<ul>
|
|
<li>Guide: <a href="the-parser.html">Lexing and Parsing</a></li>
|
|
<li>Lexer definition: <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lexer/index.html"><code>rustc_lexer</code></a></li>
|
|
<li>Main entry point: <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lexer/cursor/struct.Cursor.html#method.advance_token"><code>rustc_lexer::cursor::Cursor::advance_token</code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li>Parsing: Parse the stream of tokens to an Abstract Syntax Tree (AST)
|
|
<ul>
|
|
<li>Guide: <a href="the-parser.html">Lexing and Parsing</a></li>
|
|
<li>Guide: <a href="macro-expansion.html">Macro Expansion</a></li>
|
|
<li>Guide: <a href="name-resolution.html">Name Resolution</a></li>
|
|
<li>Parser definition: <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/index.html"><code>rustc_parse</code></a></li>
|
|
<li>Main entry points:
|
|
<ul>
|
|
<li><a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_interface/passes/fn.parse.html">Entry point for first file in crate</a></li>
|
|
<li><a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/module/fn.parse_external_mod.html">Entry point for outline module parsing</a></li>
|
|
<li><a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/parser/struct.Parser.html#method.parse_nonterminal">Entry point for macro fragments</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><code>AST</code> definition: <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/ast/index.html"><code>rustc_ast</code></a></li>
|
|
<li>Feature gating: <strong>TODO</strong></li>
|
|
<li>Early linting: <strong>TODO</strong></li>
|
|
</ul>
|
|
</li>
|
|
<li>The High Level Intermediate Representation (HIR)
|
|
<ul>
|
|
<li>Guide: <a href="hir.html">The HIR</a></li>
|
|
<li>Guide: <a href="hir.html#identifiers-in-the-hir">Identifiers in the HIR</a></li>
|
|
<li>Guide: <a href="hir.html#the-hir-map">The <code>HIR</code> Map</a></li>
|
|
<li>Guide: <a href="./hir/lowering.html">Lowering <code>AST</code> to <code>HIR</code></a></li>
|
|
<li>How to view <code>HIR</code> representation for your code <code>cargo rustc -- -Z unpretty=hir-tree</code></li>
|
|
<li>Rustc <code>HIR</code> definition: <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/index.html"><code>rustc_hir</code></a></li>
|
|
<li>Main entry point: <strong>TODO</strong></li>
|
|
<li>Late linting: <strong>TODO</strong></li>
|
|
</ul>
|
|
</li>
|
|
<li>Type Inference
|
|
<ul>
|
|
<li>Guide: <a href="type-inference.html">Type Inference</a></li>
|
|
<li>Guide: <a href="ty.html">The ty Module: Representing Types</a> (semantics)</li>
|
|
<li>Main entry point (type inference): <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_infer/infer/struct.InferCtxtBuilder.html#method.enter"><code>InferCtxtBuilder::enter</code></a></li>
|
|
<li>Main entry point (type checking bodies): <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.TyCtxt.html#method.typeck">the <code>typeck</code> query</a>
|
|
<ul>
|
|
<li>These two functions can't be decoupled.</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>The Mid Level Intermediate Representation (MIR)
|
|
<ul>
|
|
<li>Guide: <a href="mir/index.html">The <code>MIR</code> (Mid level IR)</a></li>
|
|
<li>Definition: <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/index.html"><code>rustc_middle/src/mir</code></a></li>
|
|
<li>Definition of sources that manipulates the MIR: <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_build/index.html"><code>rustc_mir_build</code></a>, <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/index.html"><code>rustc_mir_dataflow</code></a>, <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_transform/index.html"><code>rustc_mir_transform</code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li>The Borrow Checker
|
|
<ul>
|
|
<li>Guide: <a href="borrow_check.html">MIR Borrow Check</a></li>
|
|
<li>Definition: <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/index.html"><code>rustc_borrowck</code></a></li>
|
|
<li>Main entry point: <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/fn.mir_borrowck.html"><code>mir_borrowck</code> query</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><code>MIR</code> Optimizations
|
|
<ul>
|
|
<li>Guide: <a href="mir/optimizations.html">MIR Optimizations</a></li>
|
|
<li>Definition: <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_transform/index.html"><code>rustc_mir_transform</code></a></li>
|
|
<li>Main entry point: <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_transform/fn.optimized_mir.html"><code>optimized_mir</code> query</a></li>
|
|
</ul>
|
|
</li>
|
|
<li>Code Generation
|
|
<ul>
|
|
<li>Guide: <a href="backend/codegen.html">Code Generation</a></li>
|
|
<li>Generating Machine Code from <code>LLVM-IR</code> with LLVM - <strong>TODO: reference?</strong></li>
|
|
<li>Main entry point: <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/base/fn.codegen_crate.html"><code>rustc_codegen_ssa::base::codegen_crate</code></a>
|
|
<ul>
|
|
<li>This monomorphizes and produces <code>LLVM-IR</code> for one codegen unit. It then
|
|
starts a background thread to run LLVM, which must be joined later.</li>
|
|
<li>Monomorphization happens lazily via <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/mir/struct.FunctionCx.html#method.monomorphize"><code>FunctionCx::monomorphize</code></a> and <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/base/fn.codegen_instance.html"><code>rustc_codegen_ssa::base::codegen_instance </code></a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
|
|
</main>
|
|
|
|
<nav class="nav-wrapper" aria-label="Page navigation">
|
|
<!-- Mobile navigation buttons -->
|
|
<a rel="prev" href="part-2-intro.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
|
<i class="fa fa-angle-left"></i>
|
|
</a>
|
|
|
|
<a rel="next prefetch" href="compiler-src.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
|
<i class="fa fa-angle-right"></i>
|
|
</a>
|
|
|
|
<div style="clear: both"></div>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
|
|
<nav class="nav-wide-wrapper" aria-label="Page navigation">
|
|
<a rel="prev" href="part-2-intro.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
|
<i class="fa fa-angle-left"></i>
|
|
</a>
|
|
|
|
<a rel="next prefetch" href="compiler-src.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
|
<i class="fa fa-angle-right"></i>
|
|
</a>
|
|
</nav>
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<script>
|
|
window.playground_copyable = true;
|
|
</script>
|
|
|
|
|
|
<script src="elasticlunr.min.js"></script>
|
|
<script src="mark.min.js"></script>
|
|
<script src="searcher.js"></script>
|
|
|
|
<script src="clipboard.min.js"></script>
|
|
<script src="highlight.js"></script>
|
|
<script src="book.js"></script>
|
|
|
|
<!-- Custom JS scripts -->
|
|
<script src="mermaid.min.js"></script>
|
|
<script src="mermaid-init.js"></script>
|
|
|
|
|
|
</div>
|
|
</body>
|
|
</html>
|