diff --git a/crates/typst-library/src/layout/par.rs b/crates/typst-library/src/layout/par.rs index 5d21c8e9..bb280069 100644 --- a/crates/typst-library/src/layout/par.rs +++ b/crates/typst-library/src/layout/par.rs @@ -17,7 +17,7 @@ use crate::math::EquationElem; use crate::prelude::*; use crate::text::{ char_is_cjk_script, is_gb_style, shape, LinebreakElem, Quoter, Quotes, ShapedGlyph, - ShapedText, SmartquoteElem, SpaceElem, TextElem, + ShapedText, SmartquoteElem, SpaceElem, TextElem, BEGIN_PUNCT_PAT, END_PUNCT_PAT, }; /// Arranges text, spacing and inline-level elements into a paragraph. @@ -1287,11 +1287,6 @@ fn line<'a>( let end = range.end; let mut justify = p.justify && end < p.bidi.text.len() && !mandatory; - // The CJK punctuation that can appear at the beginning or end of a line. - const BEGIN_PUNCT_PAT: &[char] = &['“', '‘', '《', '(', '『', '「']; - const END_PUNCT_PAT: &[char] = - &['”', '’', ',', '。', '、', ':', ';', '》', ')', '』', '」']; - if range.is_empty() { return Line { bidi: &p.bidi, diff --git a/crates/typst-library/src/text/shaping.rs b/crates/typst-library/src/text/shaping.rs index 39122837..0cfffce0 100644 --- a/crates/typst-library/src/text/shaping.rs +++ b/crates/typst-library/src/text/shaping.rs @@ -1001,6 +1001,14 @@ fn assert_glyph_ranges_in_order(glyphs: &[ShapedGlyph], dir: Dir) { } } +// The CJK punctuation that can appear at the beginning or end of a line. +pub(crate) const BEGIN_PUNCT_PAT: &[char] = + &['“', '‘', '《', '〈', '(', '『', '「', '【', '〖', '〔', '[', '{']; +pub(crate) const END_PUNCT_PAT: &[char] = &[ + '”', '’', ',', '.', '。', '、', ':', ';', '》', '〉', ')', '』', '」', '】', + '〗', '〕', ']', '}', '?', '!', +]; + /// Whether the glyph is a space. #[inline] fn is_space(c: char) -> bool { @@ -1035,11 +1043,15 @@ fn is_cjk_left_aligned_punctuation( return true; } - if gb_style && matches!(c, ',' | '。' | '、' | ':' | ';') { + if gb_style && matches!(c, ',' | '。' | '.' | '、' | ':' | ';' | '!' | '?') + { + // In GB style, exclamations and question marks are also left aligned and can be adjusted. + // Note that they are not adjustable in other styles. return true; } - matches!(c, '》' | ')' | '』' | '」') + // See appendix A.3 https://www.w3.org/TR/clreq/#tables_of_chinese_punctuation_marks + matches!(c, '》' | ')' | '』' | '」' | '】' | '〗' | '〕' | '〉' | ']' | '}') } /// See @@ -1054,14 +1066,14 @@ fn is_cjk_right_aligned_punctuation( if matches!(c, '“' | '‘') && x_advance + stretchability.0 == Em::one() { return true; } - - matches!(c, '《' | '(' | '『' | '「') + // See appendix A.3 https://www.w3.org/TR/clreq/#tables_of_chinese_punctuation_marks + matches!(c, '《' | '(' | '『' | '「' | '【' | '〖' | '〔' | '〈' | '[' | '{') } /// See #[inline] fn is_cjk_center_aligned_punctuation(c: char, gb_style: bool) -> bool { - if !gb_style && matches!(c, ',' | '。' | '、' | ':' | ';') { + if !gb_style && matches!(c, ',' | '。' | '.' | '、' | ':' | ';') { return true; } diff --git a/tests/ref/layout/cjk-punctuation-adjustment.png b/tests/ref/layout/cjk-punctuation-adjustment.png new file mode 100644 index 00000000..71179751 Binary files /dev/null and b/tests/ref/layout/cjk-punctuation-adjustment.png differ diff --git a/tests/typ/layout/cjk-punctuation-adjustment.typ b/tests/typ/layout/cjk-punctuation-adjustment.typ new file mode 100644 index 00000000..0f1f2894 --- /dev/null +++ b/tests/typ/layout/cjk-punctuation-adjustment.typ @@ -0,0 +1,38 @@ +#set page(width: 15em) + +// In the following example, the space between 》! and ? should be squeezed. +// because zh-CN follows GB style +#set text(lang: "zh", region: "CN", font: "Noto Serif CJK SC") +原来,你也玩《原神》!? + +// However, in the following example, the space between 》! and ? should not be squeezed. +// because zh-TW does not follow GB style +#set text(lang: "zh", region: "TW", font: "Noto Serif CJK TC") +原來,你也玩《原神》! ? +--- + +#set text(lang: "zh", region: "CN", font: "Noto Serif CJK SC") +《书名〈章节〉》 // the space between 〉 and 》 should be squeezed + +〔茸毛〕:很细的毛 // the space between 〕 and : should be squeezed + +--- +#set page(width: 21em) +#set text(lang: "zh", region: "CN", font: "Noto Serif CJK SC") + +// These examples contain extensive use of Chinese punctuation marks, +// from 《Which parentheses should be used when applying parentheses?》. +// link: https://archive.md/2bb1N + + +(〔中〕医、〔中〕药、技)系列评审 + +(长三角[长江三角洲])(GB/T 16159—2012《汉语拼音正词法基本规则》) + +【爱因斯坦(Albert Einstein)】物理学家 + +〔(2009)民申字第1622号〕 + +“江南海北长相忆,浅水深山独掩扉。”([唐]刘长卿《会赦后酬主簿所问》) + +参看1378页〖象形文字〗。(《现代汉语词典》修订本)