diff --git a/Cargo.lock b/Cargo.lock index f93496f..159ab8d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,6 +57,12 @@ dependencies = [ "unscanny", ] +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + [[package]] name = "bumpalo" version = "3.15.4" @@ -132,23 +138,6 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e769b5c8c8283982a987c6e948e540254f1058d5a74b8794914d4ef5fc2a24" -[[package]] -name = "comrak" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0436149c9f6a1935b13306206c739b1ba84fa81f551b5eb87fc2ca7a13700af" -dependencies = [ - "derive_builder", - "emojis", - "entities", - "memchr", - "once_cell", - "regex", - "slug", - "typed-arena", - "unicode_categories", -] - [[package]] name = "copy_dir" version = "0.1.3" @@ -236,12 +225,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "deunicode" -version = "1.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6e854126756c496b8c81dec88f9a706b15b875c5849d4097a3854476b9fdf94" - [[package]] name = "displaydoc" version = "0.2.4" @@ -259,21 +242,6 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" -[[package]] -name = "emojis" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3407bc749191827d456a282321770847daf4b0a1128fde02597a8ed2e987b95d" -dependencies = [ - "phf", -] - -[[package]] -name = "entities" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5320ae4c3782150d900b79807611a59a99fc9a1d61d686faafc24b93fc8d7ca" - [[package]] name = "equivalent" version = "1.0.1" @@ -295,6 +263,15 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.12" @@ -688,6 +665,23 @@ dependencies = [ "version_check", ] +[[package]] +name = "pulldown-cmark" +version = "0.10.0" +source = "git+https://github.com/pulldown-cmark/pulldown-cmark.git?branch=branch_0.11#2540d4095d5f94fd972c93b927e9bec83b0c5079" +dependencies = [ + "bitflags", + "getopts", + "memchr", + "pulldown-cmark-escape", + "unicase", +] + +[[package]] +name = "pulldown-cmark-escape" +version = "0.10.0" +source = "git+https://github.com/pulldown-cmark/pulldown-cmark.git?branch=branch_0.11#2540d4095d5f94fd972c93b927e9bec83b0c5079" + [[package]] name = "quick-js" version = "0.4.1" @@ -861,23 +855,12 @@ version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" -[[package]] -name = "slug" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bd94acec9c8da640005f8e135a39fc0372e74535e6b368b7a04b875f784c8c4" -dependencies = [ - "deunicode", - "wasm-bindgen", -] - [[package]] name = "ssg" version = "0.1.0" dependencies = [ "aho-corasick", "chrono", - "comrak", "glob", "grass", "gray_matter", @@ -885,6 +868,7 @@ dependencies = [ "hypertext", "katex", "once_cell", + "pulldown-cmark", "regex", "serde", "tree-sitter", @@ -1158,12 +1142,6 @@ dependencies = [ "tree-sitter", ] -[[package]] -name = "typed-arena" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" - [[package]] name = "unic-langid" version = "0.9.4" @@ -1183,6 +1161,15 @@ dependencies = [ "tinystr", ] +[[package]] +name = "unicase" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" +dependencies = [ + "version_check", +] + [[package]] name = "unicode-bidi" version = "0.3.15" @@ -1211,10 +1198,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" [[package]] -name = "unicode_categories" -version = "0.1.1" +name = "unicode-width" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" [[package]] name = "unsafe-libyaml" diff --git a/Cargo.toml b/Cargo.toml index 2228d04..56b9ffc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,6 @@ edition = "2021" [dependencies] aho-corasick = "1.1.3" chrono = "0.4.35" -comrak = { version = "0.22.0", default-features = false, features = ["shortcodes"] } glob = "0.3.1" grass = { version = "0.13.2", default-features = false, features = ["random"] } gray_matter = { version = "0.2.6", default-features = false, features = ["yaml"] } @@ -17,10 +16,14 @@ once_cell = "1.19.0" regex = "1.10.4" serde = { version = "1.0.197", features = ["derive"] } +# Markdown +pulldown-cmark = { git = "https://github.com/pulldown-cmark/pulldown-cmark.git", branch = "branch_0.11" } + # Treesitter tree-sitter = "0.20.10" tree-sitter-highlight = "0.20.1" +# Treesitter languages tree-sitter-astro = { git = "https://github.com/virchau13/tree-sitter-astro.git", rev = "e924787e12e8a03194f36a113290ac11d6dc10f3" } tree-sitter-css = "0.20.0" tree-sitter-haskell = { git = "https://github.com/tree-sitter/tree-sitter-haskell", rev = "1da347c88599faad7964e63facead5d163ac7dba" } diff --git a/content/index.md b/content/index.md index 6db6ee9..e5f0cb0 100644 --- a/content/index.md +++ b/content/index.md @@ -1,7 +1,3 @@ ---- -title: "Test" -date: 2021-09-10T19:34:01+02:00 ---- # Welcome to my website! :heart: You have found this little floating rock in the middle of the Internet! Congrats 🎉 diff --git a/content/posts/ruby-in-markdown.md b/content/posts/ruby-in-markdown.md index c6436cc..5460523 100644 --- a/content/posts/ruby-in-markdown.md +++ b/content/posts/ruby-in-markdown.md @@ -6,16 +6,15 @@ tags: [Japanese, zola, hugo, astro] Sadly, as far as I know CommonMark currently doesn't include anything about ruby in its spec. On top of that ruby is pretty uncommon, so it is pretty rare for any ruby extensions to exist. As I move through any new frameworks, I will try to document any simple solutions that I figure out. - ## Examples | Language | Example | | -------- | ------- | -| Japanese | :ruby[日本語]{help=に;ほん;ご}の:ruby[文法]{help=ぶん;ぽう}は:ruby[難]{help=むずか}しい | -| Chinese | :ruby[北京]{help=Běi;jīng}
:ruby[北京]{help=ㄅㄟˇ;ㄐㄧㄥ} | -| Korean | :ruby[韓國]{help=한;국} | -| Vietnamese | :ruby[河內]{help=Hà;Nội} | -| Other | I :ruby[love]{help=like} ruby! | +| Japanese | [日本語]{にほんご}の[文法]{ぶんぽう}は[難]{むずか}しい | +| Chinese | [北京]{Běijīng}
[北京]{ㄅㄟˇㄐㄧㄥ} | +| Korean | [韓國]{한국} | +| Vietnamese | [河內]{HàNội} | +| Other | I [love]{like} ruby! | ## Remark diff --git a/src/html/home.rs b/src/html/home.rs index ba8326f..3248de2 100644 --- a/src/html/home.rs +++ b/src/html/home.rs @@ -1,6 +1,6 @@ use hypertext::{html_elements, maud, maud_move, GlobalAttributes, Raw, Renderable}; -use crate::md::render; +use crate::text::md::parse; use super::page; @@ -21,7 +21,7 @@ const INTRO: &str = r#" fn intro() -> impl Renderable { maud!( section .p-card.intro-jp lang="ja-JP" { - (Raw(render(INTRO))) + (Raw(parse(INTRO))) } ) } diff --git a/src/main.rs b/src/main.rs index e7cfdb2..6f22ee9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,7 @@ use std::process::Command; use std::{collections::HashMap, path::Path}; use std::fs; -use chrono::{Datelike, Utc}; +use chrono::Datelike; use grass; use html::LinkableData; use hypertext::{Raw, Renderable}; @@ -12,6 +12,7 @@ mod html; mod ts; mod gen; mod utils; +mod text; #[derive(Debug)] @@ -90,7 +91,7 @@ impl Transformable for md::Post { } fn render(data: &str) -> String { - md::render(data) + text::md::parse(data) } } @@ -115,7 +116,7 @@ impl Transformable for md::Slide { fn render(data: &str) -> String { data .split("\n-----\n") - .map(|chunk| chunk.split("\n---\n").map(md::render).collect::>()) + .map(|chunk| chunk.split("\n---\n").map(text::md::parse).collect::>()) .map(|stack| match stack.len() > 1 { true => format!("
{}
", stack.into_iter().map(|slide| format!("
{slide}
")).collect::()), false => format!("
{}
", stack[0]) @@ -138,7 +139,7 @@ impl Transformable for md::Wiki { } fn render(data: &str) -> String { - md::render(data) + text::md::parse(data) } } @@ -246,7 +247,7 @@ fn main() { gen::Asset { kind: gen::AssetKind::Html(Box::new(|_| { let data = std::fs::read_to_string("content/index.md").unwrap(); - let data = md::render(&data); + let data = text::md::parse(&data); html::home(Raw(data)).render().to_owned().into() })), out: "index.html".into(), diff --git a/src/md/cite.rs b/src/md/cite.rs deleted file mode 100644 index 45bfc12..0000000 --- a/src/md/cite.rs +++ /dev/null @@ -1,29 +0,0 @@ -use std::cell::RefCell; - -use comrak::{Arena, nodes::{Ast, AstNode, LineColumn, NodeValue}}; -use hayagriva::{BibliographyDriver, Library}; -use once_cell::sync::Lazy; -use regex::Regex; - -use super::render::iter_nodes; - - -static RE_CITE: Lazy = Lazy::new(|| - Regex::new(r":cite\[(\w+)\]").unwrap() -); - -pub fn add_cite<'a>(root: &'a AstNode<'a>, arena: &'a Arena>) { - // let mut driver = BibliographyDriver::new(); - - iter_nodes(root, &|node| { - match &mut node.data.borrow_mut().value { - &mut NodeValue::Text(ref text) => { - for xd in RE_CITE.captures_iter(text) { - let text = xd.get(1).unwrap().as_str(); - println!("{:?}", text); - } - }, - _ => (), - } - }); -} diff --git a/src/md/mod.rs b/src/md/mod.rs index eeeff42..352f2c4 100644 --- a/src/md/mod.rs +++ b/src/md/mod.rs @@ -1,8 +1,4 @@ mod matter; -mod render; -mod ruby; -mod cite; pub use matter::{Post, Slide, Wiki}; pub use matter::preflight; -pub use render::render; diff --git a/src/md/render.rs b/src/md/render.rs deleted file mode 100644 index 9875340..0000000 --- a/src/md/render.rs +++ /dev/null @@ -1,77 +0,0 @@ -use std::cell::RefCell; -use comrak::{Arena, parse_document, format_html, Options}; -use comrak::nodes::{Ast, AstNode, LineColumn, NodeValue}; -use once_cell::unsync::Lazy; - -use crate::ts; - - -const OPTIONS: Lazy = Lazy::new(|| - Options { - extension: comrak::ExtensionOptionsBuilder::default() - .front_matter_delimiter(Some("---".into())) - .table(true) - .math_dollars(true) - .shortcodes(true) - .build() - .unwrap(), - parse: comrak::ParseOptionsBuilder::default() - .smart(true) - .build() - .unwrap(), - render: comrak::RenderOptionsBuilder::default() - .unsafe_(true) - .build() - .unwrap(), - } -); - - -pub fn iter_nodes<'a, F>(node: &'a AstNode<'a>, f: &F) - where F : Fn(&'a AstNode<'a>) { - f(node); - for c in node.children() { - iter_nodes(c, f); - } -} - - -pub fn render(raw: &str) -> String { - let arena = Arena::new(); - let root = parse_document(&arena, raw, &OPTIONS); - - iter_nodes(root, &|node| { - match &mut node.data.borrow_mut().value { - &mut NodeValue::CodeBlock(ref mut inner) => { - let html = ts::highlight(&inner.info, &inner.literal); - let html = hypertext::Renderable::render(html); - let elem = AstNode::new(RefCell::new(Ast::new(NodeValue::HtmlInline(html.into()), LineColumn { line: 0, column: 0 }))); - let elem = arena.alloc(elem); - node.insert_before(elem); - node.detach(); - }, - &mut NodeValue::Math(ref text) => { - let opts = katex::opts::Opts::builder() - .output_type(katex::OutputType::Mathml) - .display_mode(text.display_math) - .build() - .unwrap(); - let math = katex::render_with_opts(&text.literal, opts).unwrap(); - let elem = AstNode::new(RefCell::new(Ast::new(NodeValue::HtmlInline(math.into()), LineColumn { line: 0, column: 0 }))); - let elem = arena.alloc(elem); - node.insert_before(elem); - node.detach(); - }, - _ => (), - } - }); - - super::ruby::add_ruby(root, &arena); - super::cite::add_cite(root, &arena); - - let mut html = vec![]; - format_html(root, &OPTIONS, &mut html).unwrap(); - - String::from_utf8(html).unwrap() -} - diff --git a/src/md/ruby.rs b/src/md/ruby.rs deleted file mode 100644 index 268573b..0000000 --- a/src/md/ruby.rs +++ /dev/null @@ -1,63 +0,0 @@ -use std::cell::RefCell; - -use comrak::{Arena, nodes::{Ast, AstNode, LineColumn, NodeValue}}; -use once_cell::unsync::Lazy; -use regex::Regex; - -use super::render::iter_nodes; - - -const RE_RUBY: Lazy = Lazy::new(|| - Regex::new(r"\[([^\]]+)\]\{([^}]+)\}").unwrap() -); - -#[derive(Debug)] -enum Annotated<'a> { - Text(&'a str), - Ruby(&'a str, &'a str), -} - - -pub fn add_ruby<'a>(root: &'a AstNode<'a>, arena: &'a Arena>) { - iter_nodes(root, &|node| { - match &mut node.data.borrow_mut().value { - &mut NodeValue::Text(ref text) => { - for item in annotate(text) { - let new = match item { - Annotated::Text(text) => NodeValue::Text(text.into()), - Annotated::Ruby(t, f) => NodeValue::HtmlInline(format!("{t}({f})")), - }; - let elem = AstNode::new(RefCell::new(Ast::new(new, LineColumn { line: 0, column: 0 }))); - let elem = arena.alloc(elem); - node.insert_before(elem) - } - node.detach(); - }, - _ => (), - } - }); -} - -fn annotate(input: &str) -> Vec { - let mut parts: Vec = Vec::new(); - let mut last_index = 0; - - for cap in RE_RUBY.captures_iter(input) { - let text = cap.get(1).unwrap().as_str(); - let ruby = cap.get(2).unwrap().as_str(); - let index = cap.get(0).unwrap().start(); - - if index > last_index { - parts.push(Annotated::Text(&input[last_index..index])); - } - - parts.push(Annotated::Ruby(text, ruby)); - last_index = cap.get(0).unwrap().end(); - } - - if last_index < input.len() { - parts.push(Annotated::Text(&input[last_index..])); - } - - parts -} diff --git a/src/text/md.rs b/src/text/md.rs new file mode 100644 index 0000000..0779144 --- /dev/null +++ b/src/text/md.rs @@ -0,0 +1,101 @@ +use hypertext::Renderable; +use once_cell::sync::Lazy; +use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd}; + +use crate::ts; + +use super::ruby; + + +static OPTS: Lazy = Lazy::new(|| + Options::empty() + .union(Options::ENABLE_MATH) + .union(Options::ENABLE_TABLES) + .union(Options::ENABLE_TASKLISTS) + .union(Options::ENABLE_STRIKETHROUGH) + .union(Options::ENABLE_SMART_PUNCTUATION) +); + +static KATEX_I: Lazy = Lazy::new(|| + katex::opts::Opts::builder() + .output_type(katex::OutputType::Mathml) + .build() + .unwrap() +); + +static KATEX_B: Lazy = Lazy::new(|| + katex::opts::Opts::builder() + .output_type(katex::OutputType::Mathml) + .display_mode(true) + .build() + .unwrap() +); + + +pub fn parse(text: &str) -> String { + let stream = Parser::new_ext(text, *OPTS) + .map(make_math) + .collect::>(); + + let stream = make_code(stream) + .into_iter() + .flat_map(make_ruby); + + let mut html = String::new(); + pulldown_cmark::html::push_html(&mut html, stream.into_iter()); + html +} + +fn make_math(event: Event) -> Event { + match event { + Event::InlineMath(math) => Event::InlineHtml(katex::render_with_opts(&math, &*KATEX_I).unwrap().into()), + Event::DisplayMath(math) => Event::Html(katex::render_with_opts(&math, &*KATEX_B).unwrap().into()), + _ => event + } +} + +fn make_code(es: Vec) -> Vec { + let mut buff = Vec::new(); + let mut lang = None; + let mut code = String::new(); + + for event in es { + match event { + Event::Start(Tag::CodeBlock(kind)) => match kind { + CodeBlockKind::Indented => (), + CodeBlockKind::Fenced(name) => lang = Some(name), + }, + Event::End(TagEnd::CodeBlock) => { + let lang = lang.take().unwrap_or("".into()); + let html = ts::highlight(&lang, &code).render().as_str().to_owned(); + buff.push(Event::Html(html.into())); + code.clear(); + }, + Event::Text(text) => match lang { + None => buff.push(Event::Text(text)), + Some(_) => code.push_str(&text), + }, + _ => buff.push(event) + } + } + + buff +} + +fn make_ruby(event: Event) -> Vec { + match event { + Event::Text(text) => { + let mut buff = Vec::new(); + + for item in ruby::annotate(&text) { + match item { + ruby::Annotated::Text(text) => buff.push(Event::Text(text.to_owned().into())), + ruby::Annotated::Ruby(t, f) => buff.push(Event::InlineHtml(format!("{t}({f})").into())), + }; + } + + buff + }, + _ => vec![event], + } +} diff --git a/src/text/mod.rs b/src/text/mod.rs new file mode 100644 index 0000000..65919e6 --- /dev/null +++ b/src/text/mod.rs @@ -0,0 +1,2 @@ +pub mod md; +pub mod ruby; diff --git a/src/text/ruby.rs b/src/text/ruby.rs new file mode 100644 index 0000000..ec4df02 --- /dev/null +++ b/src/text/ruby.rs @@ -0,0 +1,38 @@ +use once_cell::sync::Lazy; +use regex::Regex; + + +static RE_RUBY: Lazy = Lazy::new(|| + Regex::new(r"\[([^\]]+)\]\{([^}]+)\}").unwrap() +); + +#[derive(Debug)] +pub(crate) enum Annotated<'a> { + Text(&'a str), + Ruby(&'a str, &'a str), +} + + +pub fn annotate(input: &str) -> Vec { + let mut parts: Vec = Vec::new(); + let mut last_index = 0; + + for cap in RE_RUBY.captures_iter(input) { + let text = cap.get(1).unwrap().as_str(); + let ruby = cap.get(2).unwrap().as_str(); + let index = cap.get(0).unwrap().start(); + + if index > last_index { + parts.push(Annotated::Text(&input[last_index..index])); + } + + parts.push(Annotated::Ruby(text, ruby)); + last_index = cap.get(0).unwrap().end(); + } + + if last_index < input.len() { + parts.push(Annotated::Text(&input[last_index..])); + } + + parts +}