feat: improve treesitter highlighting

This commit is contained in:
Maciej Jur 2024-02-13 23:44:35 +01:00
parent a8cb3c0c78
commit 21dd3e6045
Signed by: kamov
GPG key ID: 191CBFF5F72ECAFD
19 changed files with 498 additions and 271 deletions

View file

@ -25,7 +25,7 @@ When looking for a way to extend remark I first looked for an existing plugin wh
I was able to write a really simple and short solution using a pair of Regexes working in conjunction to split strings and replace custom ruby shorthands with HTML, which then passes through to Rehype.
```ts
```typescript
import { visit } from "unist-util-visit";
import type { Node } from "unist-util-visit/lib";

View file

@ -226,7 +226,6 @@ An example of this is the `Functor` instance of `Either`:
instance Functor (Either a) where
fmap _ (Left x) = Left x
fmap f (Right x) = Right (f x)
```
In the instance declaration above, we specify that, for any type `a`, `Either a` is an instance of the `Functor` type class.

View file

@ -40,50 +40,57 @@
.kanagawa {
background-color: var(--kngw-sumiInk1);
border: 0.25rem solid var(--kngw-sumiInk3);
color: var(--kngw-fujiWhite);
.string {
color: var(--kngw-springGreen);
&::before {
background: var(--kngw-sumiInk3);
}
.type, .type-builtin {
color: var(--kngw-waveAqua2);
}
// Identifiers
.variable-builtin { color: var(--kngw-waveRed); }
.variable-parameter { color: var(--kngw-springViolet2); }
.keyword {
color: var(--kngw-oniViolet);
}
.constant { color: var(--kngw-surimiOrange); }
.constructor {
color: var(--kngw-springBlue);
}
.label { color: var(--kngw-oniViolet); }
.tag {
color: var(--kngw-springBlue);
}
// Literals
.string { color: var(--kngw-springGreen); }
.string-special { color: var(--kngw-boatYellow2); }
.variable {
color: var(--kngw-fujiWhite);
}
.number { color: var(--kngw-sakuraPink); }
.number-float { color: var(--kngw-sakuraPink); };
.operator, .string-special {
color: var(--kngw-boatYellow2);
}
// Types
.type { color: var(--kngw-waveAqua2); }
.type-builtin { color: var(--kngw-waveAqua2); }
.comment {
color: var(--kngw-fujiGray);
}
.attribute { color: var(--kngw-carpYellow); }
.property { color: var(--kngw-carpYellow); }
.include {
color: var(--kngw-oniViolet);
}
// Functions
.function { color: var(--kngw-crystalBlue); }
.function-macro { color: var(--kngw-waveRed); }
.function-method { color: var(--kngw-crystalBlue) }
.function {
color: var(--kngw-crystalBlue);
}
.constructor { color: var(--kngw-springBlue); }
.operator { color: var(--kngw-boatYellow2); }
.property, .attribute {
color: var(--kngw-carpYellow);
}
// Keywords
.keyword { color: var(--kngw-oniViolet); }
.keyword-return { color: var(--kngw-peachRed); }
.keyword-debug { color: var(--kngw-peachRed); }
.keyword-exception { color: var(--kngw-peachRed); }
// Punctuation
.punctuation-delimiter { color: var(--kngw-springViolet2); }
.punctuation-bracket { color: var(--kngw-springViolet2); }
// Comments
.comment { color: var(--kngw-fujiGray); }
// Markup
.tag { color: var(--kngw-springBlue); }
}

View file

@ -104,18 +104,30 @@
border: 1px dashed lightgray;
}
> :is(pre:has(> code)),
> pre.astro-code {
.listing {
position: relative;
border-radius: 0.5rem;
margin: 1em 0.5em;
padding: 0.5em;
max-width: 100%;
border-radius: 0.5em;
overflow-x: auto;
border: 1px dashed lightgray;
> code {
&::before {
content: attr(data-lang);
display: block;
line-height: 1.5em;
top: 0;
right: 0;
position: absolute;
padding: 0.1em 0.2em 0 0.3em;
border-radius: 0 0 0 0.5rem;
}
pre {
padding: 0.5em;
max-width: 100%;
overflow-x: auto;
> code {
display: block;
line-height: 1.5em;
}
}
}

View file

@ -9,50 +9,54 @@ function text(value: string) {
}
}
function span(classes: string[], value: string) {
function span(name: string) {
return {
type: 'element',
tagName: 'span',
properties: {
className: classes.map(c => c.replace('.', '-')).join(' '),
className: name.replace('.', '-'),
},
children: [
text(value),
]
children: []
}
}
export default function rehypeTreesitter() {
return function (tree: any) {
visit(tree, null, (node, _, parent) => {
if (node.tagName !== 'code' || parent.tagName !== 'pre') return;
parent.properties.className = ['kanagawa'];
visit(tree, null, (node, _, above) => {
if (node.tagName !== 'code' || above.tagName !== 'pre') return;
const code = node.children?.[0].value || '';
const lang = node.properties.className?.[0].replace('language-', '') || '';
const parent = { ...above };
const code = node.children?.[0].value;
const lang = node.properties.className?.[0].replace('language-', '');
if (!lang || !code) return;
above.tagName = 'figure';
above.children = [parent];
above.properties = {
className: 'listing kanagawa',
...!!lang && { "data-lang": lang },
};
const stack: string[] = [];
const children = (node.children = [] as any[] );
const events = treesitter.hl(lang, code);
const root = { children: [] };
const ptrs: any[] = [root];
for (const event of events) {
for (const event of treesitter.hl(lang, code)) {
switch (event.kind) {
case 'text': {
const child = (stack.length)
? span(stack, event.text)
: text(event.text);
children.push(child);
const inserted = text(event.text);
ptrs.at(-1).children.push(inserted);
} break;
case 'open': {
stack.push(event.name);
const inserted = span(event.name);
ptrs.at(-1).children.push(inserted);
ptrs.push(inserted);
} break;
case 'close': {
stack.pop();
ptrs.pop();
} break;
}
}
node.children = root.children;
});
};
}

View file

@ -17,11 +17,13 @@ tree-sitter = "0.20.10"
tree-sitter-highlight = "0.20.1"
# Languages
tree-sitter-astro = { git = "https://github.com/virchau13/tree-sitter-astro.git", rev = "e924787e12e8a03194f36a113290ac11d6dc10f3" }
tree-sitter-css = "0.20.0"
tree-sitter-haskell = { git = "https://github.com/tree-sitter/tree-sitter-haskell", rev = "cf98de23e4285b8e6bcb57b050ef2326e2cc284b" }
tree-sitter-haskell = { git = "https://github.com/tree-sitter/tree-sitter-haskell", rev = "1da347c88599faad7964e63facead5d163ac7dba" }
tree-sitter-html = "0.20.0"
tree-sitter-javascript = "0.20.3"
tree-sitter-md = "0.1.7"
tree-sitter-regex = "0.20.0"
tree-sitter-rust = "0.20.4"
tree-sitter-typescript = "0.20.5"

View file

@ -1,3 +0,0 @@
# `xd-darwin-x64`
This is the **x86_64-apple-darwin** binary for `xd`

View file

@ -1,18 +0,0 @@
{
"name": "xd-darwin-x64",
"version": "0.0.0",
"os": [
"darwin"
],
"cpu": [
"x64"
],
"main": "xd.darwin-x64.node",
"files": [
"xd.darwin-x64.node"
],
"license": "MIT",
"engines": {
"node": ">= 10"
}
}

View file

@ -1,3 +0,0 @@
# `xd-linux-x64-gnu`
This is the **x86_64-unknown-linux-gnu** binary for `xd`

View file

@ -1,21 +0,0 @@
{
"name": "xd-linux-x64-gnu",
"version": "0.0.0",
"os": [
"linux"
],
"cpu": [
"x64"
],
"main": "xd.linux-x64-gnu.node",
"files": [
"xd.linux-x64-gnu.node"
],
"license": "MIT",
"engines": {
"node": ">= 10"
},
"libc": [
"glibc"
]
}

View file

@ -1,3 +0,0 @@
# `xd-win32-x64-msvc`
This is the **x86_64-pc-windows-msvc** binary for `xd`

View file

@ -1,18 +0,0 @@
{
"name": "xd-win32-x64-msvc",
"version": "0.0.0",
"os": [
"win32"
],
"cpu": [
"x64"
],
"main": "xd.win32-x64-msvc.node",
"files": [
"xd.win32-x64-msvc.node"
],
"license": "MIT",
"engines": {
"node": ">= 10"
}
}

View file

@ -0,0 +1,25 @@
(tag_name) @tag
(erroneous_end_tag_name) @keyword
(doctype) @constant
(attribute_name) @property
(attribute_value) @string
(comment) @comment
[
(attribute_value)
(quoted_attribute_value)
] @string
"=" @operator
[
"{"
"}"
] @punctuation.bracket
[
"<"
">"
"</"
"/>"
] @tag.delimiter

View file

@ -0,0 +1,15 @@
(frontmatter
(raw_text) @injection.content
(#set! "injection.language" "typescript"))
(interpolation
(raw_text) @injection.content
(#set! "injection.language" "tsx"))
(script_element
(raw_text) @injection.content
(#set! "injection.language" "typescript"))
(style_element
(raw_text) @injection.content
(#set! "injection.language" "css"))

View file

@ -0,0 +1,78 @@
(comment) @comment
[
(tag_name)
(nesting_selector)
(universal_selector)
] @tag
[
"~"
">"
"+"
"-"
"*"
"/"
"="
"^="
"|="
"~="
"$="
"*="
"and"
"or"
"not"
"only"
] @operator
(attribute_selector (plain_value) @string)
(attribute_name) @attribute
(pseudo_element_selector (tag_name) @attribute)
(pseudo_class_selector (class_name) @attribute)
[
(class_name)
(id_name)
(namespace_name)
(property_name)
(feature_name)
] @property
(function_name) @function
(
[
(property_name)
(plain_value)
] @variable.special
(#match? @variable.special "^--")
)
[
"@media"
"@import"
"@charset"
"@namespace"
"@supports"
"@keyframes"
(at_keyword)
(to)
(from)
(important)
] @keyword
(string_value) @string
(color_value) @string.special
[
(integer_value)
(float_value)
] @number
(unit) @type
[
","
":"
] @punctuation.delimiter

View file

@ -0,0 +1,47 @@
; Forked from tree-sitter-regex
; The MIT License (MIT) Copyright (c) 2014 Max Brunsfeld
[
"("
")"
"(?"
"(?:"
"(?<"
">"
"["
"]"
"{"
"}"
] @punctuation.bracket
(group_name) @property
; These are escaped special characters that lost their special meaning
; -> no special highlighting
(identity_escape) @string.regexp
(class_character) @constant
[
(control_letter_escape)
(character_class_escape)
(control_escape)
(boundary_assertion)
(non_boundary_assertion)
] @string.escape
[
"*"
"+"
"?"
"|"
"="
"!"
"-"
] @operator
[
(start_assertion)
(end_assertion)
] @punctuation.delimiter
(any_character) @variable.builtin

View file

@ -0,0 +1,118 @@
/// Source: https://github.com/nvim-treesitter/nvim-treesitter/blob/master/CONTRIBUTING.md
pub const NAMES: &[&str] = &[
// Identifiers
"variable", // various variable names
"variable.builtin", // built-in variable names (e.g. `this`)
"variable.parameter", // parameters of a function
"variable.member", // object and struct fields
"constant", // constant identifiers
"constant.builtin", // built-in constant values
"constant.macro", // constants defined by the preprocessor
"module", // modules or namespaces
"module.builtin", // built-in modules or namespaces
"label", // GOTO and other labels (e.g. `label:` in C), including heredoc labels
// Literals
"string", // string literals
"string.documentation", // string documenting code (e.g. Python docstrings)
"string.regexp", // regular expressions
"string.escape", // escape sequences
"string.special", // other special strings (e.g. dates)
"string.special.symbol",// symbols or atoms
"string.special.url", // URIs (e.g. hyperlinks)
"string.special.path", // filenames
"character", // character literals
"character.special", // special characters (e.g. wildcards)
"boolean", // boolean literals
"number", // numeric literals
"number.float", // floating-point number literals
// Types
"type", // type or class definitions and annotations
"type.builtin", // built-in types
"type.definition", // identifiers in type definitions (e.g. `typedef <type> <identifier>` in C)
"type.qualifier", // type qualifiers (e.g. `const`)
"attribute", // attribute annotations (e.g. Python decorators)
"property", // the key in key/value pairs
// Functions
"function", // function definitions
"function.builtin", // built-in functions
"function.call", // function calls
"function.macro", // preprocessor macros
"function.method", // method definitions
"function.method.call", // method calls
"constructor", // constructor calls and definitions
"operator", // symbolic operators (e.g. `+` / `*`)
// Keywords
"keyword", // keywords not fitting into specific categories
"keyword.coroutine", // keywords related to coroutines (e.g. `go` in Go, `async/await` in Python)
"keyword.function", // keywords that define a function (e.g. `func` in Go, `def` in Python)
"keyword.operator", // operators that are English words (e.g. `and` / `or`)
"keyword.import", // keywords for including modules (e.g. `import` / `from` in Python)
"keyword.storage", // modifiers that affect storage in memory or life-time
"keyword.repeat", // keywords related to loops (e.g. `for` / `while`)
"keyword.return", // keywords like `return` and `yield`
"keyword.debug", // keywords related to debugging
"keyword.exception", // keywords related to exceptions (e.g. `throw` / `catch`)
"keyword.conditional", // keywords related to conditionals (e.g. `if` / `else`)
"keyword.conditional.ternary", // ternary operator (e.g. `?` / `:`)
"keyword.directive", // various preprocessor directives & shebangs
"keyword.directive.define", // preprocessor definition directives
// Punctuation
"punctuation.delimiter", // delimiters (e.g. `;` / `.` / `,`)
"punctuation.bracket", // brackets (e.g. `()` / `{}` / `[]`)
"punctuation.special", // special symbols (e.g. `{}` in string interpolation)
// Comments
"comment", // line and block comments
"comment.documentation", // comments documenting code
"comment.error", // error-type comments (e.g. `ERROR`, `FIXME`, `DEPRECATED:`)
"comment.warning", // warning-type comments (e.g. `WARNING:`, `FIX:`, `HACK:`)
"comment.todo", // todo-type comments (e.g. `TODO:`, `WIP:`, `FIXME:`)
"comment.note", // note-type comments (e.g. `NOTE:`, `INFO:`, `XXX`)
// Markup
"markup.strong", // bold text
"markup.italic", // italic text
"markup.strikethrough", // struck-through text
"markup.underline", // underlined text (only for literal underline markup!)
"markup.heading", // headings, titles (including markers)
"markup.quote", // block quotes
"markup.math", // math environments (e.g. `$ ... $` in LaTeX)
"markup.environment", // environments (e.g. in LaTeX)
"markup.link", // text references, footnotes, citations, etc.
"markup.link.label", // link, reference descriptions
"markup.link.url", // URL-style links
"markup.raw", // literal or verbatim text (e.g. inline code)
"markup.raw.block", // literal or verbatim text as a stand-alone block
// (use priority 90 for blocks with injections)
"markup.list", // list markers
"markup.list.checked", // checked todo-style list markers
"markup.list.unchecked", // unchecked todo-style list markers
"diff.plus", // added text (for diff files)
"diff.minus", // deleted text (for diff files)
"diff.delta", // changed text (for diff files)
"tag", // XML-style tag names (and similar)
"tag.attribute", // XML-style tag attributes
"tag.delimiter", // XML-style tag delimiters
];

View file

@ -1,156 +1,137 @@
use std::collections::HashMap;
use std::sync::Arc;
use once_cell::sync::Lazy;
use tree_sitter::Language;
use tree_sitter_highlight::HighlightConfiguration;
pub const NAMES: &[&str] = &[
"comment",
"attribute",
"carriage-return",
"comment",
"comment.documentation",
"constant",
"constant.builtin",
"constructor",
"constructor.builtin",
"embedded",
"error",
"escape",
"function",
"function.builtin",
"include",
"keyword",
"markup",
"markup.bold",
"markup.heading",
"markup.italic",
"markup.link",
"markup.link.url",
"markup.list",
"markup.list.checked",
"markup.list.numbered",
"markup.list.unchecked",
"markup.list.unnumbered",
"markup.quote",
"markup.raw",
"markup.raw.block",
"markup.raw.inline",
"markup.strikethrough",
"module",
"number",
"operator",
"property",
"property.builtin",
"punctuation",
"punctuation.bracket",
"punctuation.delimiter",
"punctuation.special",
"string",
"string.escape",
"string.regexp",
"string.special",
"string.special.symbol",
"tag",
"type",
"type.builtin",
"variable",
"variable.builtin",
"variable.member",
"variable.parameter",
];
use crate::captures;
pub static CONFIGS: Lazy<HashMap<&'static str, Arc<HighlightConfiguration>>> = Lazy::new(|| {
[
macro_rules! query {
($path:literal) => {
include_str!(concat!(
env!("CARGO_MANIFEST_DIR"),
"/queries/",
$path,
".scm"
))
};
}
pub static EXTENSIONS: Lazy<HashMap<&'static str, &'static str>> = Lazy::new(|| {
HashMap::from([
("hs", "haskell"),
("js", "javascript"),
("md", "markdown"),
("mdx", "markdown"),
("scss", "css"),
("ts", "typescript")
])
});
fn config_for(
lang: Language,
highlights: &str,
injections: &str,
locals: &str,
) -> HighlightConfiguration {
let mut config = HighlightConfiguration::new(lang, highlights, injections, locals).unwrap();
config.configure(captures::NAMES);
config
}
pub static CONFIGS: Lazy<HashMap<&'static str, HighlightConfiguration>> = Lazy::new(|| {
HashMap::from([
(
vec!["css", "scss"],
HighlightConfiguration::new(
tree_sitter_css::language(),
tree_sitter_css::HIGHLIGHTS_QUERY,
"",
"",
).unwrap()
"astro",
config_for(
tree_sitter_astro::language(),
query!("astro/highlights"),
query!("astro/injections"),
""
)
),
(
vec!["hs", "haskell"],
HighlightConfiguration::new(
"css",
config_for(
tree_sitter_css::language(),
query!("css/highlights"),
"",
"",
)
),
(
"haskell",
config_for(
tree_sitter_haskell::language(),
tree_sitter_haskell::HIGHLIGHTS_QUERY,
"",
tree_sitter_haskell::LOCALS_QUERY,
).unwrap()
)
),
(
vec!["html", "html"],
HighlightConfiguration::new(
"html",
config_for(
tree_sitter_html::language(),
tree_sitter_html::HIGHLIGHTS_QUERY,
tree_sitter_html::INJECTIONS_QUERY,
"",
).unwrap()
)
),
(
vec!["md", "markdown"],
HighlightConfiguration::new(
tree_sitter_md::language(),
// &format!("{}\n\n{}",
tree_sitter_md::HIGHLIGHT_QUERY_BLOCK,
// tree_sitter_md::HIGHLIGHT_QUERY_INLINE,
// ),
// &format!("{}\n\n{}",
tree_sitter_md::INJECTION_QUERY_BLOCK,
// tree_sitter_md::INJECTION_QUERY_INLINE,
// ),
""
).unwrap()
),
(
vec!["rs", "rust"],
HighlightConfiguration::new(
tree_sitter_rust::language(),
tree_sitter_rust::HIGHLIGHT_QUERY,
tree_sitter_rust::INJECTIONS_QUERY,
"",
).unwrap()
),
(
vec!["js", "javascript"],
HighlightConfiguration::new(
"javascript",
config_for(
tree_sitter_javascript::language(),
tree_sitter_javascript::HIGHLIGHT_QUERY,
tree_sitter_javascript::INJECTION_QUERY,
tree_sitter_javascript::LOCALS_QUERY,
).unwrap()
)
),
(
vec!["jsx"],
HighlightConfiguration::new(
tree_sitter_javascript::language(),
tree_sitter_javascript::JSX_HIGHLIGHT_QUERY,
tree_sitter_javascript::INJECTION_QUERY,
tree_sitter_javascript::LOCALS_QUERY,
).unwrap()
"markdown",
config_for(
tree_sitter_md::language(),
tree_sitter_md::HIGHLIGHT_QUERY_BLOCK,
tree_sitter_md::INJECTION_QUERY_BLOCK,
""
)
),
(
vec!["ts", "typescript"],
HighlightConfiguration::new(
"regex",
config_for(
tree_sitter_regex::language(),
query!("regex/highlights"),
"",
""
)
),
(
"rust",
config_for(
tree_sitter_rust::language(),
tree_sitter_rust::HIGHLIGHT_QUERY,
tree_sitter_rust::INJECTIONS_QUERY,
"",
)
),
(
"typescript",
config_for(
tree_sitter_typescript::language_typescript(),
&format!("{}\n\n{}",
&format!("{}\n{}",
tree_sitter_javascript::HIGHLIGHT_QUERY,
tree_sitter_typescript::HIGHLIGHT_QUERY
),
tree_sitter_javascript::INJECTION_QUERY,
tree_sitter_typescript::LOCALS_QUERY,
).unwrap()
)
),
]
.into_iter()
.flat_map(|(keys, mut config)| {
config.configure(NAMES);
let config = Arc::new(config);
keys.into_iter().map(move |key| (key, config.clone()))
})
.collect()
])
});
pub fn get_config(name: &str) -> Option<&'static HighlightConfiguration> {
match EXTENSIONS.get(name) {
Some(name) => CONFIGS.get(name),
None => CONFIGS.get(name),
}
}

View file

@ -1,19 +1,35 @@
#![deny(clippy::all)]
mod captures;
mod configs;
use std::collections::HashMap;
use tree_sitter_highlight::Highlighter;
use tree_sitter_highlight::HighlightEvent;
use configs::{CONFIGS, NAMES};
#[macro_use]
extern crate napi_derive;
fn map_event(event: HighlightEvent, src: &str) -> HashMap<String, String> {
match event {
HighlightEvent::Source {start, end} => HashMap::from([
("kind".into(), "text".into()),
("text".into(), src[start..end].into())
]),
HighlightEvent::HighlightStart(s) => HashMap::from([
("kind".into(), "open".into()),
("name".into(), captures::NAMES[s.0].into())
]),
HighlightEvent::HighlightEnd => HashMap::from([
("kind".into(), "close".into())
]),
}
}
#[napi]
pub fn hl(lang: String, src: String) -> Vec<HashMap<String, String>> {
let config = match CONFIGS.get(&*lang) {
let config = match configs::get_config(&lang) {
Some(c) => c,
None => return vec![
HashMap::from([
@ -24,31 +40,20 @@ pub fn hl(lang: String, src: String) -> Vec<HashMap<String, String>> {
};
let mut highlighter = Highlighter::new();
let highlights = highlighter.highlight(
let mut hl = Highlighter::new();
let highlights = hl.highlight(
&config,
src.as_bytes(),
None,
|key| CONFIGS.get(key).map(|arc| arc.as_ref())
|name| configs::get_config(name)
).unwrap();
let mut out = vec![];
for event in highlights {
match event.unwrap() {
HighlightEvent::Source {start, end} => out.push(HashMap::from([
("kind".into(), "text".into()),
("text".into(), src[start..end].into())
])),
HighlightEvent::HighlightStart(s) => out.push(HashMap::from([
("kind".into(), "open".into()),
("name".into(), NAMES[s.0].into())
])),
HighlightEvent::HighlightEnd => out.push(HashMap::from([
("kind".into(), "close".into())
]))
}
let event = event.unwrap();
let obj = map_event(event, &src);
out.push(obj);
}
out
}