278 lines
16 KiB
HTML
278 lines
16 KiB
HTML
<!DOCTYPE HTML>
|
|
<html lang="en" class="light sidebar-visible" dir="ltr">
|
|
<head>
|
|
<!-- Book generated using mdBook -->
|
|
<meta charset="UTF-8">
|
|
<title>Lexing and parsing - Rust Compiler Development Guide</title>
|
|
|
|
|
|
<!-- Custom HTML head -->
|
|
|
|
<meta name="description" content="A guide to developing the Rust compiler (rustc)">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
<meta name="theme-color" content="#ffffff">
|
|
|
|
<link rel="icon" href="favicon.svg">
|
|
<link rel="shortcut icon" href="favicon.png">
|
|
<link rel="stylesheet" href="css/variables.css">
|
|
<link rel="stylesheet" href="css/general.css">
|
|
<link rel="stylesheet" href="css/chrome.css">
|
|
<link rel="stylesheet" href="css/print.css" media="print">
|
|
|
|
<!-- Fonts -->
|
|
<link rel="stylesheet" href="FontAwesome/css/font-awesome.css">
|
|
<link rel="stylesheet" href="fonts/fonts.css">
|
|
|
|
<!-- Highlight.js Stylesheets -->
|
|
<link rel="stylesheet" id="highlight-css" href="highlight.css">
|
|
<link rel="stylesheet" id="tomorrow-night-css" href="tomorrow-night.css">
|
|
<link rel="stylesheet" id="ayu-highlight-css" href="ayu-highlight.css">
|
|
|
|
<!-- Custom theme stylesheets -->
|
|
|
|
|
|
<!-- Provide site root and default themes to javascript -->
|
|
<script>
|
|
const path_to_root = "";
|
|
const default_light_theme = "light";
|
|
const default_dark_theme = "navy";
|
|
</script>
|
|
<!-- Start loading toc.js asap -->
|
|
<script src="toc.js"></script>
|
|
</head>
|
|
<body>
|
|
<div id="body-container">
|
|
<!-- Work around some values being stored in localStorage wrapped in quotes -->
|
|
<script>
|
|
try {
|
|
let theme = localStorage.getItem('mdbook-theme');
|
|
let sidebar = localStorage.getItem('mdbook-sidebar');
|
|
|
|
if (theme.startsWith('"') && theme.endsWith('"')) {
|
|
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
|
|
}
|
|
|
|
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
|
|
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
|
|
}
|
|
} catch (e) { }
|
|
</script>
|
|
|
|
<!-- Set the theme before any content is loaded, prevents flash -->
|
|
<script>
|
|
const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme;
|
|
let theme;
|
|
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
|
|
if (theme === null || theme === undefined) { theme = default_theme; }
|
|
const html = document.documentElement;
|
|
html.classList.remove('light')
|
|
html.classList.add(theme);
|
|
html.classList.add("js");
|
|
</script>
|
|
|
|
<input type="checkbox" id="sidebar-toggle-anchor" class="hidden">
|
|
|
|
<!-- Hide / unhide sidebar before it is displayed -->
|
|
<script>
|
|
let sidebar = null;
|
|
const sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
|
|
if (document.body.clientWidth >= 1080) {
|
|
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
|
|
sidebar = sidebar || 'visible';
|
|
} else {
|
|
sidebar = 'hidden';
|
|
}
|
|
sidebar_toggle.checked = sidebar === 'visible';
|
|
html.classList.remove('sidebar-visible');
|
|
html.classList.add("sidebar-" + sidebar);
|
|
</script>
|
|
|
|
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
|
|
<!-- populated by js -->
|
|
<mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox>
|
|
<noscript>
|
|
<iframe class="sidebar-iframe-outer" src="toc.html"></iframe>
|
|
</noscript>
|
|
<div id="sidebar-resize-handle" class="sidebar-resize-handle">
|
|
<div class="sidebar-resize-indicator"></div>
|
|
</div>
|
|
</nav>
|
|
|
|
<div id="page-wrapper" class="page-wrapper">
|
|
|
|
<div class="page">
|
|
<div id="menu-bar-hover-placeholder"></div>
|
|
<div id="menu-bar" class="menu-bar sticky">
|
|
<div class="left-buttons">
|
|
<label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
|
|
<i class="fa fa-bars"></i>
|
|
</label>
|
|
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
|
|
<i class="fa fa-paint-brush"></i>
|
|
</button>
|
|
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
|
|
<li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
|
|
</ul>
|
|
<button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar">
|
|
<i class="fa fa-search"></i>
|
|
</button>
|
|
</div>
|
|
|
|
<h1 class="menu-title">Rust Compiler Development Guide</h1>
|
|
|
|
<div class="right-buttons">
|
|
<a href="print.html" title="Print this book" aria-label="Print this book">
|
|
<i id="print-button" class="fa fa-print"></i>
|
|
</a>
|
|
<a href="https://github.com/rust-lang/rustc-dev-guide" title="Git repository" aria-label="Git repository">
|
|
<i id="git-repository-button" class="fa fa-github"></i>
|
|
</a>
|
|
<a href="https://github.com/rust-lang/rustc-dev-guide/edit/master/src/the-parser.md" title="Suggest an edit" aria-label="Suggest an edit">
|
|
<i id="git-edit-button" class="fa fa-edit"></i>
|
|
</a>
|
|
|
|
</div>
|
|
</div>
|
|
|
|
<div id="search-wrapper" class="hidden">
|
|
<form id="searchbar-outer" class="searchbar-outer">
|
|
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
|
|
</form>
|
|
<div id="searchresults-outer" class="searchresults-outer hidden">
|
|
<div id="searchresults-header" class="searchresults-header"></div>
|
|
<ul id="searchresults">
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
|
|
<script>
|
|
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
|
|
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
|
|
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
|
|
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
|
|
});
|
|
</script>
|
|
|
|
<div id="content" class="content">
|
|
<main>
|
|
<h1 id="lexing-and-parsing"><a class="header" href="#lexing-and-parsing">Lexing and parsing</a></h1>
|
|
<p>The very first thing the compiler does is take the program (in UTF-8 Unicode text)
|
|
and turn it into a data format the compiler can work with more conveniently than strings.
|
|
This happens in two stages: Lexing and Parsing.</p>
|
|
<ol>
|
|
<li><em>Lexing</em> takes strings and turns them into streams of <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/token/index.html">tokens</a>. For
|
|
example, <code>foo.bar + buz</code> would be turned into the tokens <code>foo</code>, <code>.</code>, <code>bar</code>,
|
|
<code>+</code>, and <code>buz</code>. This is implemented in <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lexer/index.html"><code>rustc_lexer</code></a>.</li>
|
|
</ol>
|
|
<ol start="2">
|
|
<li><em>Parsing</em> takes streams of tokens and turns them into a structured form
|
|
which is easier for the compiler to work with, usually called an <a href="./ast-validation.html"><em>Abstract
|
|
Syntax Tree</em> (AST)</a> .</li>
|
|
</ol>
|
|
<h2 id="the-ast"><a class="header" href="#the-ast">The AST</a></h2>
|
|
<p>The AST mirrors the structure of a Rust program in memory, using a <code>Span</code> to
|
|
link a particular AST node back to its source text. The AST is defined in
|
|
<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/index.html"><code>rustc_ast</code></a>, along with some definitions for tokens and token
|
|
streams, data structures/traits for mutating ASTs, and shared definitions for
|
|
other AST-related parts of the compiler (like the lexer and
|
|
macro-expansion).</p>
|
|
<p>Every node in the AST has its own <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/node_id/struct.NodeId.html"><code>NodeId</code></a>, including top-level items
|
|
such as structs, but also individual statements and expressions. A <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/node_id/struct.NodeId.html"><code>NodeId</code></a>
|
|
is an identifier number that uniquely identifies an AST node within a crate.</p>
|
|
<p>However, because they are absolute within a crate, adding or removing a single
|
|
node in the AST causes all the subsequent <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/node_id/struct.NodeId.html"><code>NodeId</code></a>s to change. This renders
|
|
<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/node_id/struct.NodeId.html"><code>NodeId</code></a>s pretty much useless for incremental compilation, where you want as
|
|
few things as possible to change.</p>
|
|
<p><a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/node_id/struct.NodeId.html"><code>NodeId</code></a>s are used in all the <code>rustc</code> bits that operate directly on the AST,
|
|
like macro expansion and name resolution (more on these over the next couple chapters).</p>
|
|
<h2 id="parsing"><a class="header" href="#parsing">Parsing</a></h2>
|
|
<p>The parser is defined in <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/index.html"><code>rustc_parse</code></a>, along with a
|
|
high-level interface to the lexer and some validation routines that run after
|
|
macro expansion. In particular, the <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/parser/index.html"><code>rustc_parse::parser</code></a> contains
|
|
the parser implementation.</p>
|
|
<p>The main entrypoint to the parser is via the various <code>parse_*</code> functions and others in
|
|
<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/index.html">rustc_parse</a>. They let you do things like turn a <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/struct.SourceFile.html"><code>SourceFile</code></a>
|
|
(e.g. the source in a single file) into a token stream, create a parser from
|
|
the token stream, and then execute the parser to get a <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/ast/struct.Crate.html"><code>Crate</code></a> (the root AST
|
|
node).</p>
|
|
<p>To minimize the amount of copying that is done,
|
|
both <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/lexer/struct.Lexer.html"><code>Lexer</code></a> and <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/parser/struct.Parser.html"><code>Parser</code></a> have lifetimes which bind them to the parent <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_session/parse/struct.ParseSess.html"><code>ParseSess</code></a>.
|
|
This contains all the information needed while parsing, as well as the <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/source_map/struct.SourceMap.html"><code>SourceMap</code></a> itself.</p>
|
|
<p>Note that while parsing, we may encounter macro definitions or invocations.
|
|
We set these aside to be expanded (see <a href="./macro-expansion.html">Macro Expansion</a>).
|
|
Expansion itself may require parsing the output of a macro, which may reveal more macros to be expanded, and so on.</p>
|
|
<h2 id="more-on-lexical-analysis"><a class="header" href="#more-on-lexical-analysis">More on lexical analysis</a></h2>
|
|
<p>Code for lexical analysis is split between two crates:</p>
|
|
<ul>
|
|
<li>
|
|
<p><a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lexer/index.html"><code>rustc_lexer</code></a> crate is responsible for breaking a <code>&str</code> into chunks
|
|
constituting tokens. Although it is popular to implement lexers as generated
|
|
finite state machines, the lexer in <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lexer/index.html"><code>rustc_lexer</code></a> is hand-written.</p>
|
|
</li>
|
|
<li>
|
|
<p><a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/lexer/struct.Lexer.html"><code>Lexer</code></a> integrates <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lexer/index.html"><code>rustc_lexer</code></a> with data structures specific to
|
|
<code>rustc</code>. Specifically, it adds <code>Span</code> information to tokens returned by
|
|
<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lexer/index.html"><code>rustc_lexer</code></a> and interns identifiers.</p>
|
|
</li>
|
|
</ul>
|
|
|
|
</main>
|
|
|
|
<nav class="nav-wrapper" aria-label="Page navigation">
|
|
<!-- Mobile navigation buttons -->
|
|
<a rel="prev" href="syntax-intro.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
|
<i class="fa fa-angle-left"></i>
|
|
</a>
|
|
|
|
<a rel="next prefetch" href="macro-expansion.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
|
<i class="fa fa-angle-right"></i>
|
|
</a>
|
|
|
|
<div style="clear: both"></div>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
|
|
<nav class="nav-wide-wrapper" aria-label="Page navigation">
|
|
<a rel="prev" href="syntax-intro.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
|
<i class="fa fa-angle-left"></i>
|
|
</a>
|
|
|
|
<a rel="next prefetch" href="macro-expansion.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
|
<i class="fa fa-angle-right"></i>
|
|
</a>
|
|
</nav>
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<script>
|
|
window.playground_copyable = true;
|
|
</script>
|
|
|
|
|
|
<script src="elasticlunr.min.js"></script>
|
|
<script src="mark.min.js"></script>
|
|
<script src="searcher.js"></script>
|
|
|
|
<script src="clipboard.min.js"></script>
|
|
<script src="highlight.js"></script>
|
|
<script src="book.js"></script>
|
|
|
|
<!-- Custom JS scripts -->
|
|
<script src="mermaid.min.js"></script>
|
|
<script src="mermaid-init.js"></script>
|
|
|
|
|
|
</div>
|
|
</body>
|
|
</html>
|