diff --git a/src/library/text/mod.rs b/src/library/text/mod.rs index 1d750689..a25b2827 100644 --- a/src/library/text/mod.rs +++ b/src/library/text/mod.rs @@ -3,12 +3,14 @@ mod deco; mod link; mod par; +mod quotes; mod raw; mod shaping; pub use deco::*; pub use link::*; pub use par::*; +pub use quotes::*; pub use raw::*; pub use shaping::*; @@ -72,6 +74,8 @@ impl TextNode { /// will will be hyphenated if and only if justification is enabled. #[property(resolve)] pub const HYPHENATE: Smart = Smart::Auto; + /// Whether to apply smart quotes. + pub const SMART_QUOTES: bool = true; /// Whether to apply kerning ("kern"). pub const KERNING: bool = true; diff --git a/src/library/text/par.rs b/src/library/text/par.rs index cf7dc4a9..8dcbfeb3 100644 --- a/src/library/text/par.rs +++ b/src/library/text/par.rs @@ -4,7 +4,7 @@ use unicode_bidi::{BidiInfo, Level}; use unicode_script::{Script, UnicodeScript}; use xi_unicode::LineBreakIterator; -use super::{shape, Lang, ShapedText, TextNode}; +use super::{shape, Lang, Quoter, Quotes, ShapedText, TextNode}; use crate::font::FontStore; use crate::library::layout::Spacing; use crate::library::prelude::*; @@ -386,9 +386,11 @@ fn collect<'a>( styles: &'a StyleChain<'a>, ) -> (String, Vec<(Segment<'a>, StyleChain<'a>)>) { let mut full = String::new(); + let mut quoter = Quoter::new(); let mut segments = vec![]; + let mut iter = par.0.iter().peekable(); - for (child, map) in par.0.iter() { + while let Some((child, map)) = iter.next() { let styles = map.chain(&styles); let segment = match child { ParChild::Text(text) => { @@ -402,7 +404,25 @@ fn collect<'a>( } ParChild::Quote(double) => { let prev = full.len(); - full.push(if *double { '"' } else { '\'' }); + if styles.get(TextNode::SMART_QUOTES) { + // TODO: Also get region. + let lang = styles.get(TextNode::LANG); + let quotes = lang + .as_ref() + .map(|lang| Quotes::from_lang(lang.as_str(), "")) + .unwrap_or_default(); + + let peeked = iter.peek().and_then(|(child, _)| match child { + ParChild::Text(text) => text.chars().next(), + ParChild::Quote(_) => Some('"'), + ParChild::Spacing(_) => Some(SPACING_REPLACE), + ParChild::Node(_) => Some(NODE_REPLACE), + }); + + full.push_str(quoter.quote("es, *double, peeked)); + } else { + full.push(if *double { '"' } else { '\'' }); + } Segment::Text(full.len() - prev) } ParChild::Spacing(spacing) => { @@ -415,6 +435,10 @@ fn collect<'a>( } }; + if let Some(last) = full.chars().last() { + quoter.last(last); + } + if let (Some((Segment::Text(last_len), last_styles)), Segment::Text(len)) = (segments.last_mut(), segment) { diff --git a/src/library/text/quotes.rs b/src/library/text/quotes.rs new file mode 100644 index 00000000..5f67bdb5 --- /dev/null +++ b/src/library/text/quotes.rs @@ -0,0 +1,146 @@ +use crate::parse::is_newline; + +/// State machine for smart quote subtitution. +#[derive(Debug, Clone)] +pub struct Quoter { + /// How many quotes have been opened. + quote_depth: usize, + /// Whether an opening quote might follow. + expect_opening: bool, + /// Whether the last character was numeric. + last_num: bool, +} + +impl Quoter { + /// Start quoting. + pub fn new() -> Self { + Self { + quote_depth: 0, + expect_opening: true, + last_num: false, + } + } + + /// Process the last seen character. + pub fn last(&mut self, c: char) { + self.expect_opening = is_ignorable(c) || is_opening_bracket(c); + self.last_num = c.is_numeric(); + } + + /// Process and substitute a quote. + pub fn quote<'a>( + &mut self, + quotes: &Quotes<'a>, + double: bool, + peeked: Option, + ) -> &'a str { + let peeked = peeked.unwrap_or(' '); + if self.expect_opening { + self.quote_depth += 1; + quotes.open(double) + } else if self.quote_depth > 0 + && (peeked.is_ascii_punctuation() || is_ignorable(peeked)) + { + self.quote_depth -= 1; + quotes.close(double) + } else if self.last_num { + quotes.prime(double) + } else { + quotes.fallback(double) + } + } +} + +impl Default for Quoter { + fn default() -> Self { + Self::new() + } +} + +fn is_ignorable(c: char) -> bool { + c.is_whitespace() || is_newline(c) +} + +fn is_opening_bracket(c: char) -> bool { + matches!(c, '(' | '{' | '[') +} + +/// Decides which quotes to subtitute smart quotes with. +pub struct Quotes<'s> { + /// The opening single quote. + pub single_open: &'s str, + /// The closing single quote. + pub single_close: &'s str, + /// The opening double quote. + pub double_open: &'s str, + /// The closing double quote. + pub double_close: &'s str, +} + +impl<'s> Quotes<'s> { + /// Create a new `Quotes` struct with the defaults for a language and + /// region. + /// + /// The language should be specified as an all-lowercase ISO 639-1 code, the + /// region as an all-uppercase ISO 3166-alpha2 code. + /// + /// Currently, the supported languages are: English, Czech, Danish, German, + /// Swiss / Liechtensteinian German, Estonian, Icelandic, Lithuanian, + /// Latvian, Slovak, Slovenian, Bosnian, Finnish, Swedish, French, + /// Hungarian, Polish, Romanian, Japanese, Traditional Chinese, Russian, and + /// Norwegian. + /// + /// For unknown languages, the English quotes are used. + pub fn from_lang(language: &str, region: &str) -> Self { + let (single_open, single_close, double_open, double_close) = match language { + "de" if matches!(region, "CH" | "LI") => ("‹", "›", "«", "»"), + "cs" | "da" | "de" | "et" | "is" | "lt" | "lv" | "sk" | "sl" => { + ("‚", "‘", "„", "“") + } + "fr" => ("‹\u{00A0}", "\u{00A0}›", "«\u{00A0}", "\u{00A0}»"), + "bs" | "fi" | "sv" => ("’", "’", "”", "”"), + "hu" | "pl" | "ro" => ("’", "’", "„", "”"), + "ru" | "no" | "nn" => ("’", "’", "«", "»"), + _ => return Self::default(), + }; + + Self { + single_open, + single_close, + double_open, + double_close, + } + } + + /// The opening quote. + fn open(&self, double: bool) -> &'s str { + if double { self.double_open } else { self.single_open } + } + + /// The closing quote. + fn close(&self, double: bool) -> &'s str { + if double { self.double_close } else { self.single_close } + } + + /// Which character should be used as a prime. + fn prime(&self, double: bool) -> &'static str { + if double { "″" } else { "′" } + } + + /// Which character should be used as a fallback quote. + fn fallback(&self, double: bool) -> &'static str { + if double { "\"" } else { "’" } + } +} + +impl Default for Quotes<'_> { + /// Returns the english quotes as default. + fn default() -> Self { + Self { + single_open: "‘", + single_close: "’", + double_open: "“", + double_close: "”", + } + } +} diff --git a/src/library/text/raw.rs b/src/library/text/raw.rs index d96100af..80b6ef2a 100644 --- a/src/library/text/raw.rs +++ b/src/library/text/raw.rs @@ -100,6 +100,7 @@ impl Show for RawNode { let mut map = StyleMap::new(); map.set(TextNode::OVERHANG, false); map.set(TextNode::HYPHENATE, Smart::Custom(Hyphenate(false))); + map.set(TextNode::SMART_QUOTES, false); if let Smart::Custom(family) = styles.get(Self::FAMILY) { map.set_family(family.clone(), styles); diff --git a/tests/ref/code/closure.png b/tests/ref/code/closure.png index 7d933033..b4c83256 100644 Binary files a/tests/ref/code/closure.png and b/tests/ref/code/closure.png differ diff --git a/tests/ref/code/include.png b/tests/ref/code/include.png index 2d5d9ca7..001d7d1e 100644 Binary files a/tests/ref/code/include.png and b/tests/ref/code/include.png differ diff --git a/tests/ref/layout/columns.png b/tests/ref/layout/columns.png index 8a65443d..3f471415 100644 Binary files a/tests/ref/layout/columns.png and b/tests/ref/layout/columns.png differ diff --git a/tests/ref/text/basic.png b/tests/ref/text/basic.png index e7887f07..bfdf47a2 100644 Binary files a/tests/ref/text/basic.png and b/tests/ref/text/basic.png differ diff --git a/tests/ref/text/escape.png b/tests/ref/text/escape.png index 3434d6e0..77cc21f2 100644 Binary files a/tests/ref/text/escape.png and b/tests/ref/text/escape.png differ diff --git a/tests/ref/text/hyphenate.png b/tests/ref/text/hyphenate.png index 0560d5b7..48338f58 100644 Binary files a/tests/ref/text/hyphenate.png and b/tests/ref/text/hyphenate.png differ diff --git a/tests/ref/text/justify.png b/tests/ref/text/justify.png index d0b6c7bf..396adc77 100644 Binary files a/tests/ref/text/justify.png and b/tests/ref/text/justify.png differ diff --git a/tests/ref/text/quotes.png b/tests/ref/text/quotes.png new file mode 100644 index 00000000..d31ae937 Binary files /dev/null and b/tests/ref/text/quotes.png differ diff --git a/tests/ref/text/tracking-spacing.png b/tests/ref/text/tracking-spacing.png index 8e6db3cc..69fc1eef 100644 Binary files a/tests/ref/text/tracking-spacing.png and b/tests/ref/text/tracking-spacing.png differ diff --git a/tests/typ/code/closure.typ b/tests/typ/code/closure.typ index 5524ba99..29fca404 100644 --- a/tests/typ/code/closure.typ +++ b/tests/typ/code/closure.typ @@ -5,11 +5,10 @@ // Don't parse closure directly in content. // Ref: true -#let x = "\"hi\"" +#let x = "x" -// Should output `"hi" => "bye"`. -#set text(overhang: false) -#x => "bye" +// Should output `x => y`. +#x => y --- // Basic closure without captures. diff --git a/tests/typ/text/escape.typ b/tests/typ/text/escape.typ index 6ec469c1..e03d73e5 100644 --- a/tests/typ/text/escape.typ +++ b/tests/typ/text/escape.typ @@ -2,7 +2,8 @@ --- // Escapable symbols. -\\ \/ \[ \] \{ \} \# \* \_ \= \~ \` \$ +\\ \/ \[ \] \{ \} \# \* \_ \ +\= \~ \` \$ \" \' // No need to escape. ( ) ; < > diff --git a/tests/typ/text/quotes.typ b/tests/typ/text/quotes.typ new file mode 100644 index 00000000..3f0649e8 --- /dev/null +++ b/tests/typ/text/quotes.typ @@ -0,0 +1,54 @@ +// Test smart quotes. + +--- +#set page(width: 200pt) + +// Test simple quotations in various languages. +#set text(lang: "en") +"The horse eats no cucumber salad" was the first sentence ever uttered on the 'telephone.' + +#set text(lang: "de") +"Das Pferd frisst keinen Gurkensalat" war der erste jemals am 'Fernsprecher' gesagte Satz. + +#set text(lang: "fr") +"Le cheval ne mange pas de salade de concombres" est la première phrase jamais prononcée au 'téléphone'. + +#set text(lang: "fi") +"Hevonen ei syö kurkkusalaattia" oli ensimmäinen koskaan 'puhelimessa' lausuttu lause. + +#set text(lang: "ro") +"Calul nu mănâncă salată de castraveți" a fost prima propoziție rostită vreodată la 'telefon'. + +#set text(lang: "ru") +"Лошадь не ест салат из огурцов" - это была первая фраза, сказанная по 'телефону'. + +--- +// Test single pair of quotes. +#set text(lang: "en") +"" + +--- +// Test sentences with numbers and apostrophes. +#set text(lang: "en") +The 5'11" 'quick' brown fox jumps over the "lazy" dog's ear. + +He said "I'm a big fella." + +--- +// Test escape sequences. +The 5\'11\" 'quick\' brown fox jumps over the \"lazy" dog\'s ear. + +--- +// Test turning smart quotes off. +#set text(lang: "en") +He's told some books contain questionable "example text". + +#set text(smart-quotes: false) +He's told some books contain questionable "example text". + +--- +// Test changing properties within text. +#set text(lang: "en") +"She suddenly started speaking french: #text(lang: "fr")['Je suis une banane.']" Roman told me. + +Some people's thought on this would be #text(smart-quotes: false)["strange."]