Skip to content

Commit

Permalink
Add markdown crate which hand rolls markdown formatting in rustfmt
Browse files Browse the repository at this point in the history
  • Loading branch information
ytmimi committed Sep 12, 2023
1 parent 1adcbf1 commit beef9c5
Show file tree
Hide file tree
Showing 17 changed files with 23,868 additions and 0 deletions.
1 change: 1 addition & 0 deletions markdown/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
target/
158 changes: 158 additions & 0 deletions markdown/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions markdown/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[package]
name = "markdown"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
itertools = "0.10"
pulldown-cmark = { version = "0.9.3", default-features = false }
unicode-width = "0.1"
unicode-segmentation = "1.9"

[features]
gen-tests = []

[build-dependencies]
serde = { version = "1.0.160", features = ["derive"] }
serde_json = "1.0"
152 changes: 152 additions & 0 deletions markdown/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
fn main() {
generate_tests_markdown_tests().unwrap()
}

#[cfg(not(feature = "gen-tests"))]
fn generate_tests_markdown_tests() -> std::io::Result<()> {
Ok(())
}

#[cfg(feature = "gen-tests")]
fn generate_tests_markdown_tests() -> std::io::Result<()> {
use std::fs::File;
use std::io::BufWriter;
use std::path::PathBuf;

let spec_folder = "./tests/spec/";
let test_folder = "./tests/";

let spec_files = [
(
"",
"commonmark_v0_30_spec.json",
"https://spec.commonmark.org/0.30/",
),
("gfm_", "gfm_spec.json", "https://github.github.com/gfm/"),
];

for (prefix, spec, url) in spec_files {
let input_file = format!("{spec_folder}{spec}");
let mut output_file = PathBuf::from(format!("{test_folder}{spec}"));
output_file.set_extension("rs");

let test_cases: Vec<TestCase<'_>> = serde_json::from_reader(File::open(&input_file)?)?;
let mut output = BufWriter::new(File::create(&output_file)?);

write_test_cases(&mut output, prefix, test_cases, url)
.expect("generated test case successfully");
}

Ok(())
}

#[cfg(feature = "gen-tests")]
#[derive(Debug, serde::Deserialize)]
struct TestCase<'a> {
#[serde(rename(deserialize = "markdown"))]
input: std::borrow::Cow<'a, str>,
#[serde(rename(deserialize = "formattedMarkdown"))]
output: Option<std::borrow::Cow<'a, str>>,
#[serde(rename(deserialize = "example"))]
id: usize,
section: std::borrow::Cow<'a, str>,
#[serde(default)]
skip: bool,
#[serde(default = "default_test", rename(deserialize = "testMacro"))]
test_macro: std::borrow::Cow<'a, str>,
comment: Option<std::borrow::Cow<'a, str>>,
}

#[cfg(feature = "gen-tests")]
fn default_test() -> std::borrow::Cow<'static, str> {
// Name of the test macro to use
"test_identical_markdown_events".into()
}

#[cfg(feature = "gen-tests")]
fn write_test_cases<W>(
writer: &mut W,
prefix: &str,
test_cases: Vec<TestCase<'_>>,
url: &str,
) -> std::io::Result<()>
where
W: std::io::Write,
{
write!(writer, "// @generated\n")?;
write!(writer, "// generated running `cargo build -F gen-tests`\n")?;
write!(
writer,
"// test macros are defined in tests/common/mod.rs\n"
)?;
write!(writer, "mod common;\n")?;

for test_case in test_cases.into_iter() {
write_test_case(writer, prefix, test_case, url)?;
}
Ok(())
}

#[cfg(feature = "gen-tests")]
fn write_test_case<W: std::io::Write>(
writer: &mut W,
prefix: &str,
test_case: TestCase<'_>,
url: &str,
) -> std::io::Result<()> {
let url = if url.ends_with("/") {
format!("{}#example-{}", url, test_case.id)
} else {
format!("{}/#example-1{}", url, test_case.id)
};

let replace_tab_chars = test_case.input.replace('→', "\t");
let input = replace_tab_chars.trim_end_matches('\n');

if let Some(comment) = test_case.comment {
write!(writer, "\n// {comment}")?;
}

if test_case.skip {
write!(writer, "\n#[ignore]")?;
}

write!(
writer,
r##"
#[test]
fn {}markdown_{}_{}() {{
// {}
{}!("##,
prefix,
test_case
.section
.to_lowercase()
.replace(char::is_whitespace, "_")
.replace("(", "")
.replace(")", ""),
test_case.id,
url,
test_case.test_macro,
)?;

let has_trailing_whitespace = input.lines().any(|l| l.ends_with(char::is_whitespace));
if has_trailing_whitespace {
write!(writer, "{:?}", input)?;
} else {
write!(writer, "r##\"{}\"##", input)?;
}
if let Some(expected_output) = test_case.output {
let has_trailing_whitespace = expected_output
.lines()
.any(|l| l.ends_with(char::is_whitespace));
if has_trailing_whitespace {
write!(writer, ",{:?}", expected_output)?;
} else {
write!(writer, ",r##\"{}\"##", expected_output)?;
}
}
write!(writer, ");")?;
write!(writer, "\n}}\n")?;
Ok(())
}
4 changes: 4 additions & 0 deletions markdown/rustfmt.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
error_on_line_overflow = true
error_on_unformatted = true
format_generated_files = false
version = "Two"
45 changes: 45 additions & 0 deletions markdown/src/escape.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
use super::MarkdownFormatter;

const ATX_HEADER_ESCAPES: [&'static str; 6] = ["# ", "## ", "### ", "#### ", "##### ", "###### "];

impl<'i, F> MarkdownFormatter<'i, F> {
pub(super) fn needs_escape(&mut self, input: &str) -> bool {
if !self.last_was_softbreak {
// We _should_ only need to escape after a softbreak since the markdown formatter will
// adjust the indentation. Depending on the context we'll either remove leading spaces
// or add indentation (spaces or '>') depending on if we're in a list or blockquote.
// See <https://spec.commonmark.org/0.30/#example-70> as an example where the semantics
// would change without an escape after removing indentation.
return false;
}

self.last_was_softbreak = false;

if input.len() <= 2 {
return false;
}

let Some(first_char) = input.chars().next() else {
return false;
};

let is_setext_heading = |value: u8| input.trim_end().bytes().all(|b| b == value);
let is_unordered_list_marker = |value: &str| input.starts_with(value);
let is_thematic_break = |value: u8| input.bytes().all(|b| b == value || b == b' ');

match first_char {
'#' => ATX_HEADER_ESCAPES
.iter()
.any(|header| input.starts_with(header)),
'=' => is_setext_heading(b'='),
'-' => {
is_unordered_list_marker("- ") || is_setext_heading(b'-') || is_thematic_break(b'-')
}
'_' => is_thematic_break(b'_'),
'*' => is_unordered_list_marker("* ") || is_thematic_break(b'*'),
'+' => is_unordered_list_marker("+ "),
'>' => true,
_ => false,
}
}
}
Loading

0 comments on commit beef9c5

Please sign in to comment.