From 7216068e5cc27560fa06a718333f89b67025b98a Mon Sep 17 00:00:00 2001 From: Maciej Jur Date: Sun, 6 Oct 2024 22:11:58 +0200 Subject: [PATCH] content: building-an-ssg-in-rust --- content/index.md | 5 + .../building-an-ssg-in-rust/bibliography.bib | 17 ++ .../posts/building-an-ssg-in-rust/index.md | 179 ++++++++++++++++++ content/posts/typeclasses/index.md | 20 +- src/main.rs | 16 +- src/model.rs | 6 + styles/styles.scss | 38 ++-- 7 files changed, 246 insertions(+), 35 deletions(-) create mode 100644 content/posts/building-an-ssg-in-rust/bibliography.bib create mode 100644 content/posts/building-an-ssg-in-rust/index.md diff --git a/content/index.md b/content/index.md index e5f0cb0..bc7d6e5 100644 --- a/content/index.md +++ b/content/index.md @@ -1,3 +1,7 @@ +--- +title: Home +--- + # Welcome to my website! :heart: You have found this little floating rock in the middle of the Internet! Congrats 🎉 @@ -15,6 +19,7 @@ Throughout the last few years I've been taking photos of random things in random [Take me to the map!](/map/) ## A short guide to the Hanafuda card game + I've been playing Hanafuda for a while now, so I decided to write a short introduction into this interesting card game. I am by no means good at the game, so take everything with a grain of salt, but I feel like it should be enough to get you started. [Take me to the guide!](/posts/hanafuda/) diff --git a/content/posts/building-an-ssg-in-rust/bibliography.bib b/content/posts/building-an-ssg-in-rust/bibliography.bib new file mode 100644 index 0000000..d2940c1 --- /dev/null +++ b/content/posts/building-an-ssg-in-rust/bibliography.bib @@ -0,0 +1,17 @@ +@article{10.1145/3236774, + author = {Mokhov, Andrey and Mitchell, Neil and Peyton Jones, Simon}, + title = {Build systems \`{a} la carte}, + year = {2018}, + issue_date = {September 2018}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + volume = {2}, + number = {ICFP}, + url = {https://doi.org/10.1145/3236774}, + doi = {10.1145/3236774}, + journal = {Proc. ACM Program. Lang.}, + month = jul, + articleno = {79}, + numpages = {29}, + keywords = {algorithms, build systems, functional programming} +} diff --git a/content/posts/building-an-ssg-in-rust/index.md b/content/posts/building-an-ssg-in-rust/index.md new file mode 100644 index 0000000..b628fe2 --- /dev/null +++ b/content/posts/building-an-ssg-in-rust/index.md @@ -0,0 +1,179 @@ +--- +title: Building an SSG in Rust +date: 2024-10-05T12:29:54.111Z +desc: > + Some reflections on using Rust to write a static site generator... +--- + +For the past few months in my spare time, I’ve been programming a simple +library in Rust that can be used to generate a static website from markdown +files and images. I myself use it to generate this very website, and it is +available under the GPL license on +[crates.io](https://crates.io/crates/hauchiwa), though it might be outdated - +the latest version is always on [GitHub](https://github.com/kamoshi/hauchiwa) +and the documentation is available on +[docs.rs](https://docs.rs/hauchiwa/latest/hauchiwa/). + +You can add it to your own project in two ways, like so: + +```rust +hauchiwa = "*" +hauchiwa = { git = "https://github.com/kamoshi/hauchiwa" } +``` + +## Background + +Throughout the years I've tried many different tools, some of them better than +others.  For example, I really liked the speed of Hugo and the flexibility of +Astro, but none of the available tools fulfilled my needs. I wanted both speed +and lots of flexibility at the same time, I figured that I needed to create my own +generator from scratch to accomplish what I want to do. + +The first thing I had to do was choose the language and the ecosystem for the +generator, and as you already know, I ended up with Rust. I've considered +different languages and ecosystems - like Haskell - but Rust currently has a lot +of industrial momentum, that's the current zeitgeist. + +Contrary to what many people say, Rust is not a silver bullet, the fact that in +Rust you have to deal with memory, even if it's automatic most of the time can +be a deal breaker. Sometimes you just don't need to care about memory, so having +To deal with it is a waste of mental energy. Nevertheless, I decided that this +tradeoff is worth taking in this case, given that Rust has: + +- lots of good enough libraries +- vibrant community +- ergonomic abstractions +- automatic memory management with borrow checker + +So given these facts, I came to the conclusion that going with Rust will make it +easy to find any library I need to create a generator, and the memory management +is an acceptable tradeoff for the fact that Rust programs are generally quite +fast and compile to a single binary. + +When it comes to the actual form factor of the library, I wanted it to be really +minimal and allow for maximal flexibility. I really enjoyed the way Astro works, +you use it as a general framework, you have lots of freedom to define each page +on the generated website. I would like to preserve this spirit in my library, +while at the same time creating a robust and idiomatic API in Rust. + +Some of the requirements I had in mind are: + +- The library should be decoupled from any templating engine, the user should + be able to choose their own way to generate the HTML, they should even be able + to do it by concatenating strings manually if they so desire. +- The user shouldn't be limited to Markdown, the library should be format + agnostic and the user should be allowed to bring any parser they want and use + it to convert any kind of file to HTML. +- The user should be able to generate HTML pages that don't have any original + source files related to them, think of dynamically generated lists of pages, + tags, etc. +- There should be a way to render different pages differently, some collections + of Markdown files should output different looking pages. +- The library should make it easy to watch for changes and allow the user to add + live reload while editing their website. + +In my library I've tried to address all of these requirements, but it's still +being worked out. I've spent a lot of time thinking through lots of design +decisions until I landed on some sweet spot in the design space, but even now, +I'm not sure if there are better ways to accomplish some things... + +## Incremental build system + +To implement the incremental build process and live reload, I've ended up reading +an article called _Build systems à la carte_, which goes over different ways to +implent a build system in Haskell, I would recommend reading it; it was really +useful. Based on some prior experience as well as this article, I've decided to +go with a _suspending_ scheduler, as well as both _verifying traces_ and +_constructive traces_ for the rebuilding strategy. + +_Suspending_ means that the moment a certain page requires, for example, a PNG +image or a CSS stylesheet I pause the page build process in order to prepare the +required asset. In practice, this just means I call a function that is supposed +to build that image, so it's not anything difficult. + +```rust + /// Get compiled CSS style by file path. + pub fn get_styles(&self, path: &Utf8Path) -> Option { + let input = self.items.values().find(|item| item.file == path)?; + if !matches!(input.data, Input::Stylesheet(..)) { + return None; + } + + self.tracked + .borrow_mut() + .insert(input.file.clone(), input.hash.clone()); + + self.schedule(input) + } +``` + +This function calls another function `schedule`, which builds the asset if it +needs to be built. + +```rust + fn schedule(&self, input: &InputItem) -> Option { + let res = self.builder.read().unwrap().check(input); + if res.is_some() { + return res; + } + + let res = self.builder.write().unwrap().build(input); + Some(res) + } +``` + +Here `self.builder` is behind an `RwLock` which needs to be acquired in order to +build the asset. This is just an implementation detail; `RwLock` allows the +builder to be shared in a multithreaded environment and allows many reads at +the same time. This is optimal for the case when the asset is in fact already +built. + +When it comes to the traces, I've decided to use the following strategy to trace +input assets: + +```rust +#[derive(Debug)] +pub(crate) struct InputItem { + pub(crate) hash: Vec, + pub(crate) file: Utf8PathBuf, + pub(crate) slug: Utf8PathBuf, + pub(crate) data: Input, +} +``` + +Each asset has a binary `hash` and with this information alone we can easily +check if the input asset has changed in a meaningful way between two builds. + +In order to trace the individual build tasks that are defined by the user to +generate the HTML pages, I've decided to use the following struct: + +```rust +#[derive(Debug)] +struct Trace { + task: Task, + init: bool, + deps: HashMap>, +} +``` + +Here we have `task` which is in fact a closure pointer - a pointer to a function +defined by the user of the library. This function consumes a `Sack` which is the access point ftracks +the dependencies required by the task. + +```rust +/// Task function pointer used to dynamically generate a website page. +type TaskFnPtr = Arc) -> Vec<(Utf8PathBuf, String)> + Send + Sync>; + +/// Wraps `TaskFnPtr` and implements `Debug` trait for function pointer. +pub(crate) struct Task(TaskFnPtr); +``` + +These dependencies are then kept as the `deps` field, so we can check if any of +the input files required by a certain task have changed. If they have, we can +rebuild the task and update the dependencies. There's also the `init` field +which just forces the task to be built for the first time. + +This is just the bare minimum to make this build system work, there are still +some open questions, like "What if the build task is nondeterministic, should it +be rebuilt every time?". Please take a look at the library code to see how the +current build system works in detail. diff --git a/content/posts/typeclasses/index.md b/content/posts/typeclasses/index.md index 19dea88..7c47135 100644 --- a/content/posts/typeclasses/index.md +++ b/content/posts/typeclasses/index.md @@ -1,11 +1,11 @@ --- -title: Breaking apart the Haskell type class -date: 2023-11-02T17:28:25.466Z -icon: haskell +title: Breaking apart the Haskell type class +date: 2023-11-02T17:28:25.466Z +icon: haskell desc: > - Type classes are perhaps the most distinctive feature of Haskell. - I’ve found them pretty confusing, but in reality they are an incredibly - elegant solution to a certain problem found in functional languages. + Type classes are perhaps the most distinctive feature of Haskell. + I’ve found them pretty confusing, but in reality they are an incredibly + elegant solution to a certain problem found in functional languages. --- Type classes are perhaps the most distinctive feature of Haskell. @@ -103,7 +103,6 @@ add d a b = (+) d a b Effectively, `Num a` serves as a table of operations for the `Num` class for type `a`. This elegant mechanism allows Haskell to implement polymorphism with type classes while maintaining strict type safety. - ## Kinds A type in Haskell is a classification that defines what kind of data a value can represent. @@ -116,11 +115,13 @@ The notation for kinds in Haskell uses an asterisk `*` to represent the most bas You can check the kind of any type by using the `:kind` command in GHCi. The kind of `Int` is `*`, indicating that `Int` is a concrete type. + ```haskell Int :: * ``` Similarly, the kind of `Float` is `*`, signifying that it is also a concrete type. + ```haskell Float :: * ``` @@ -130,14 +131,17 @@ For instance, type classes introduce a kind called `Constraint`, denoted as `=>` The kind Constraint is used to represent constraints on types and is commonly encountered when defining type classes and their instances. The kind of `Num` is `* -> Constraint`, showing that `Num` is a type class that takes a type (like `Int`) as an argument. + ```haskell Num :: * -> Constraint ``` When `Num` is applied to `Int`, it becomes a constraint on the `Int` type, indicating that `Int` is an instance of the `Num` type class. + ```haskell Num Int :: Constraint ``` + Another example of a kind in Haskell is the kind of unary type constructors `* -> *`. These unary type constructors are similar to generic types in other languages because they accept a type parameter and produce a new type. The signature `* -> *` signifies that the type constructor transforms one concrete type into another concrete type. @@ -179,7 +183,6 @@ In this definition, `Either` is a binary type constructor because it requires tw The `Either` type is often used for scenarios where a value can have one of two possible types, such as representing success or failure or providing an alternative to error handling. Conventionally, the `Left` value indicates error conditions, while the `Right` value signifies valid or correct values. - ### Higher Kinded Types In Haskell, understanding the concept of "Higher Kinded Types" is crucial for grasping how certain more abstract type classes work. @@ -231,7 +234,6 @@ instance Functor (Either a) where In the instance declaration above, we specify that, for any type `a`, `Either a` is an instance of the `Functor` type class. This allows you to use the `fmap` function with `Either a`, providing a way to map over the second type argument of the `Either` type while keeping the first type argument fixed. - ## Other languages ### Scala diff --git a/src/main.rs b/src/main.rs index b8461e6..f0ca917 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,7 +10,7 @@ use chrono::{DateTime, Datelike, Utc}; use clap::{Parser, ValueEnum}; use hauchiwa::{Collection, Sack, Website}; use hypertext::Renderable; -use model::{Post, Slideshow, Wiki}; +use model::{Home, Post, Slideshow, Wiki}; #[derive(Parser, Debug, Clone)] struct Args { @@ -73,6 +73,7 @@ fn main() { let website = Website::setup() .add_collections(vec![ + Collection::glob_with::("content", "index.md", ["md"].into()), Collection::glob_with::("content", "about.md", ["md"].into()), Collection::glob_with::("content", "posts/**/*", ["md", "mdx"].into()), Collection::glob_with::("content", "slides/**/*", ["md", "lhs"].into()), @@ -87,6 +88,13 @@ fn main() { ("editor", "./js/flox/main.ts"), ("lambda", "./js/flox/lambda.ts"), ]) + // Task: generate home page + .add_task(|sack| { + let query = sack.get_content::("").unwrap(); + let (parsed, _, _) = text::md::parse(query.content, &sack, query.area, None); + let out_buff = html::home(&sack, &parsed); + vec![("index.html".into(), out_buff)] + }) .add_task(|sack| { let query = sack.get_content::("about").unwrap(); let (parsed, outline, bib) = @@ -147,12 +155,6 @@ fn main() { }) // Task: generate search .add_task(|sack| vec![("search/index.html".into(), crate::html::search(&sack))]) - // Task: generate home page - .add_task(|sack| { - let data = std::fs::read_to_string("content/index.md").unwrap(); - let (parsed, _, _) = text::md::parse(&data, &sack, "".into(), None); - vec![("index.html".into(), crate::html::home(&sack, &parsed))] - }) .add_task(|sack| { let query = sack.get_content("projects/flox").unwrap(); diff --git a/src/model.rs b/src/model.rs index bb6acd0..a4af2a6 100644 --- a/src/model.rs +++ b/src/model.rs @@ -1,6 +1,12 @@ use chrono::{DateTime, Utc}; use serde::Deserialize; +/// Represents a wiki page +#[derive(Deserialize, Debug, Clone)] +pub struct Home { + pub title: String, +} + /// Represents a simple post. #[derive(Deserialize, Debug, Clone)] pub struct Post { diff --git a/styles/styles.scss b/styles/styles.scss index b051290..bac4238 100644 --- a/styles/styles.scss +++ b/styles/styles.scss @@ -1,30 +1,30 @@ // Config -@use 'root'; -@use 'reset'; -@use 'fonts'; -@use 'base'; -@use 'markdown'; -@use 'kanagawa'; +@use "root"; +@use "reset"; +@use "fonts"; +@use "base"; +@use "markdown"; +@use "kanagawa"; // Components -@use 'components/map'; -@use 'components/search'; -@use 'components/link-tree'; -@use 'components/bibliography'; -@use 'components/kanji'; +@use "components/map"; +@use "components/search"; +@use "components/link-tree"; +@use "components/bibliography"; +@use "components/kanji"; // Partials -@use 'partials/navbar'; -@use 'partials/footer'; +@use "partials/navbar"; +@use "partials/footer"; // Shortcodes -@use 'shortcodes/timeline'; -@use 'shortcodes/marginnote'; +@use "shortcodes/timeline"; +@use "shortcodes/marginnote"; // Layouts -@use 'layouts/home'; -@use 'layouts/page'; -@use 'layouts/list'; +@use "layouts/home"; +@use "layouts/page"; +@use "layouts/list"; // Special -@use 'flox'; +@use "flox";