From fe8a704c7e7279a56320c7fb7282df303aefbee9 Mon Sep 17 00:00:00 2001 From: Camelid Date: Sat, 23 Jan 2021 13:38:22 -0800 Subject: [PATCH] Implement date-checker This tool looks for HTML comments like `` in each Markdown source file and compiles a list of dates that are older than six months. It then opens an issue with that list, with checkboxes for each file and date. Note that it will only open an issue if there was at least one date older than six months; it does nothing if the list is empty. This tool is automatically run monthly in a GitHub Actions workflow. I have tested the tool on a private repo and confirmed that it works. --- .github/workflows/date-check.yml | 44 ++++++ .gitignore | 4 +- ci/date-check/Cargo.lock | 147 +++++++++++++++++++ ci/date-check/Cargo.toml | 12 ++ ci/date-check/src/main.rs | 236 +++++++++++++++++++++++++++++++ 5 files changed, 442 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/date-check.yml create mode 100644 ci/date-check/Cargo.lock create mode 100644 ci/date-check/Cargo.toml create mode 100644 ci/date-check/src/main.rs diff --git a/.github/workflows/date-check.yml b/.github/workflows/date-check.yml new file mode 100644 index 00000000..41111723 --- /dev/null +++ b/.github/workflows/date-check.yml @@ -0,0 +1,44 @@ +name: Date-Check + +on: + schedule: + # Run at noon UTC every 1st of the month + - cron: '00 12 01 * *' + + # Allow manually starting the workflow + workflow_dispatch: + +jobs: + date-check: + runs-on: ubuntu-latest + + steps: + - name: Checkout repo + uses: actions/checkout@v2 + + - name: Run `date-check` + working-directory: ci/date-check + run: | + cargo run -- ../../src/ > ../../date-check-output.txt + + - name: Open issue + uses: actions/github-script@v3 + with: + github-token: ${{secrets.GITHUB_TOKEN}} + script: | + const fs = require('fs'); + + const rawText = fs.readFileSync('date-check-output.txt', { encoding: 'utf8' }); + const title = rawText.split('\n')[0]; + if (title != 'empty') { + const body = rawText.split('\n').slice(1).join('\n'); + github.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title, + body, + }); + console.log('Opened issue.'); + } else { + console.log('No dates to triage.'); + } diff --git a/.gitignore b/.gitignore index b8cb3488..2901de5e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ book -# prevent accidentally changes +# prevent accidental changes book.toml + +ci/date-check/target/ diff --git a/ci/date-check/Cargo.lock b/ci/date-check/Cargo.lock new file mode 100644 index 00000000..296a132d --- /dev/null +++ b/ci/date-check/Cargo.lock @@ -0,0 +1,147 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +dependencies = [ + "memchr", +] + +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "time", + "winapi", +] + +[[package]] +name = "date-check" +version = "0.1.0" +dependencies = [ + "chrono", + "glob", + "regex", +] + +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89203f3fba0a3795506acaad8ebce3c80c0af93f994d5a1d7a0b1eeb23271929" + +[[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + +[[package]] +name = "num-integer" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "regex" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", +] + +[[package]] +name = "regex-syntax" +version = "0.6.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581" + +[[package]] +name = "thread_local" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb9bc092d0d51e76b2b19d9d85534ffc9ec2db959a2523cdae0697e2972cd447" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "time" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +dependencies = [ + "libc", + "wasi", + "winapi", +] + +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/ci/date-check/Cargo.toml b/ci/date-check/Cargo.toml new file mode 100644 index 00000000..95251fb1 --- /dev/null +++ b/ci/date-check/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "date-check" +version = "0.1.0" +authors = ["Camelid "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +glob = "0.3" +regex = "1" +chrono = "0.4" diff --git a/ci/date-check/src/main.rs b/ci/date-check/src/main.rs new file mode 100644 index 00000000..d78b430b --- /dev/null +++ b/ci/date-check/src/main.rs @@ -0,0 +1,236 @@ +use std::{ + collections::BTreeMap, + convert::TryInto as _, + env, fmt, fs, + path::{Path, PathBuf}, +}; + +use chrono::{Datelike as _, TimeZone as _, Utc}; +use glob::glob; +use regex::Regex; + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +struct Date { + year: u32, + month: u32, +} + +impl Date { + fn months_since(self, other: Date) -> Option { + let self_chrono = Utc.ymd(self.year.try_into().unwrap(), self.month, 1); + let other_chrono = Utc.ymd(other.year.try_into().unwrap(), other.month, 1); + let duration_since = self_chrono.signed_duration_since(other_chrono); + let months_since = duration_since.num_days() / 30; + if months_since < 0 { + None + } else { + Some(months_since.try_into().unwrap()) + } + } +} + +impl fmt::Display for Date { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:04}-{:02}", self.year, self.month) + } +} + +fn make_date_regex() -> Regex { + Regex::new( + r"(?x) # insignificant whitespace mode + ", + ) + .unwrap() +} + +fn collect_dates_from_file(date_regex: &Regex, text: &str) -> Vec<(usize, Date)> { + let mut line = 1; + let mut end_of_last_cap = 0; + date_regex + .captures_iter(&text) + .map(|cap| { + ( + cap.get(0).unwrap().range(), + Date { + year: cap["y"].parse().unwrap(), + month: cap["m"].parse().unwrap(), + }, + ) + }) + .map(|(byte_range, date)| { + line += text[end_of_last_cap..byte_range.end] + .chars() + .filter(|c| *c == '\n') + .count(); + end_of_last_cap = byte_range.end; + (line, date) + }) + .collect() +} + +fn collect_dates(paths: impl Iterator) -> BTreeMap> { + let date_regex = make_date_regex(); + let mut data = BTreeMap::new(); + for path in paths { + let text = fs::read_to_string(&path).unwrap(); + let dates = collect_dates_from_file(&date_regex, &text); + if !dates.is_empty() { + data.insert(path, dates); + } + } + data +} + +fn filter_dates( + current_month: Date, + min_months_since: u32, + dates_by_file: impl Iterator)>, +) -> impl Iterator)> { + dates_by_file + .map(move |(path, dates)| { + ( + path, + dates + .into_iter() + .filter(|(_, date)| { + current_month + .months_since(*date) + .expect("found date that is after current month") + >= min_months_since + }) + .collect::>(), + ) + }) + .filter(|(_, dates)| !dates.is_empty()) +} + +fn main() { + let root_dir = env::args() + .nth(1) + .expect("expect root Markdown directory as CLI argument"); + let root_dir_path = Path::new(&root_dir); + let glob_pat = format!("{}/**/*.md", root_dir); + let today_chrono = Utc::today(); + let current_month = Date { + year: today_chrono.year_ce().1, + month: today_chrono.month(), + }; + + let dates_by_file = collect_dates(glob(&glob_pat).unwrap().map(Result::unwrap)); + let dates_by_file: BTreeMap<_, _> = + filter_dates(current_month, 6, dates_by_file.into_iter()).collect(); + + if dates_by_file.is_empty() { + println!("empty"); + } else { + println!("Date Reference Triage for {}", current_month); + println!("## Procedure"); + println!(); + println!( + "Each of these dates should be checked to see if the docs they annotate are \ + up-to-date. Each date should be updated (in the Markdown file where it appears) to \ + use the current month ({current_month}), or removed if the docs it annotates are not \ + expected to fall out of date quickly.", + current_month = current_month + ); + println!(); + println!( + "Please check off each date once a PR to update it (and, if applicable, its \ + surrounding docs) has been merged. Please also mention that you are working on a \ + particular set of dates so duplicate work is avoided." + ); + println!(); + println!("Finally, once all the dates have been updated, please close this issue."); + println!(); + println!("## Dates"); + println!(); + + for (path, dates) in dates_by_file { + println!( + "- [ ] {}", + path.strip_prefix(&root_dir_path).unwrap().display() + ); + for (line, date) in dates { + println!(" - [ ] line {}: {}", line, date); + } + } + println!(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_months_since() { + let date1 = Date { + year: 2020, + month: 3, + }; + let date2 = Date { + year: 2021, + month: 1, + }; + assert_eq!(date2.months_since(date1), Some(10)); + } + + #[test] + fn test_date_regex() { + let regex = make_date_regex(); + assert!(regex.is_match("foo bar")); + } + + #[test] + fn test_collect_dates_from_file() { + let text = "Test1\n\nTest2\nFoo\nTest3\nTest4\nFooBar\n\nTest5\nTest6\nTest7\n\nTest8 + "; + assert_eq!( + collect_dates_from_file(&make_date_regex(), text), + vec![ + ( + 2, + Date { + year: 2021, + month: 1, + } + ), + ( + 4, + Date { + year: 2021, + month: 2, + } + ), + ( + 7, + Date { + year: 2021, + month: 3, + } + ), + ( + 8, + Date { + year: 2021, + month: 4, + } + ), + ( + 14, + Date { + year: 2021, + month: 5, + } + ), + ] + ); + } +}