feat: cache kanji request, add kklc tools

This commit is contained in:
Maciej Jur 2024-02-03 13:44:41 +01:00
parent b984c9fadb
commit 588a0d1480
Signed by: kamov
GPG key ID: 191CBFF5F72ECAFD
7 changed files with 350 additions and 27 deletions

6
.gitignore vendored
View file

@ -32,5 +32,7 @@ src/**/*.webp
.obsidian/
.stfolder/
# generated kanji
public/static/kanji/*
# Kanji generator
tools/kklc/kklc.csv
tools/kklc/target/
public/static/kanji/

View file

@ -1,19 +1,18 @@
type Ruby = Array<[string, string]>;
export interface KKLCEntry {
entry: number;
id: number;
char: string;
meanings: string[];
on: string[];
kun: string[];
examples: Array<
[Array<[string, string]>, string]
>;
keys: string[];
senses: string[];
onyomi: string[];
kunyomi: string[];
examples: Array<[string, Ruby]>;
}
async function chooseKanji(): Promise<number> {
async function chooseId(): Promise<number> {
const date = new Date().toLocaleDateString('en');
console.log(date);
const data = new TextEncoder().encode(date);
const hash = await crypto.subtle.digest('SHA-256', data);
@ -23,10 +22,30 @@ async function chooseKanji(): Promise<number> {
const min = 1;
const max = 2300;
const id = min + (hashValue % (max - min + 1));
return id;
return min + (hashValue % (max - min + 1));
}
export async function fetchKanji(): Promise<KKLCEntry> {
return await fetch(`/static/kanji/${await chooseKanji()}.json`).then(res => res.json());
function tryGetCache(id: number) {
const item = localStorage.getItem('kanji');
if (!item) return;
const cache = JSON.parse(item);
if (cache.id === id) {
return cache.data;
}
}
function insertCache(id: number, data: KKLCEntry) {
localStorage.setItem('kanji', JSON.stringify({ id, data }));
}
export async function getKanji(): Promise<KKLCEntry> {
const id = await chooseId();
const cache = tryGetCache(id);
if (cache) return cache;
const data = await fetch(`/static/kanji/${id}.json`).then(res => res.json());
insertCache(id, data);
return data;
}

View file

@ -1,9 +1,9 @@
<script lang="ts">
import { fetchKanji, type KKLCEntry } from './data.svelte.ts';
import { getKanji, type KKLCEntry } from './data.svelte.ts';
let state = $state<Promise<KKLCEntry>>(new Promise(() => {}));
$effect(() => void (state = fetchKanji()));
$effect(() => void (state = getKanji()));
</script>
@ -19,26 +19,26 @@
</div>
<div class="info-meta">
<div class="info-key">
{state.meanings.join(', ')}
{state.keys.join(', ')}
</div>
<div class="info-on">
{state.on.join(', ')}
{state.onyomi.join(', ')}
</div>
<div class="info-kun">
{state.kun.join(', ')}
{state.kunyomi.join(', ')}
</div>
</div>
</div>
<table>
<table class="examples">
<tbody>
{#each state.examples as [example, meaning]}
{#each state.examples as [meaning, example]}
<tr>
<td>
<td class="examples-ja">
<ruby>
{#each example as [kanji, furigana]}{kanji}<rt>{furigana || ''}</rt>{/each}
{#each example as [expr, ruby]}{expr}<rt>{ruby||''}</rt>{/each}
</ruby>
</td>
<td>
<td class="examples-en">
{meaning}
</td>
</tr>

View file

@ -15,6 +15,7 @@
}
.info-char {
align-self: flex-start;
position: relative;
padding: 0.1em;
border: 1px dashed black;
@ -42,8 +43,10 @@
display: block;
}
}
.ruby-cell {
min-width: 5em;
.examples-ja {
padding-right: 0.5em;
max-width: 3em;
}
$side: 5em;

124
tools/kklc/Cargo.lock generated Normal file
View file

@ -0,0 +1,124 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "csv"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe"
dependencies = [
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70"
dependencies = [
"memchr",
]
[[package]]
name = "furigana"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f66a30b0d76741b8ddf54f5cd6bd80c635810ce9dcef147c14ee545e1b07bcd"
[[package]]
name = "itoa"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
[[package]]
name = "kklc"
version = "0.1.0"
dependencies = [
"csv",
"furigana",
"serde",
"serde_json",
]
[[package]]
name = "memchr"
version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
[[package]]
name = "proc-macro2"
version = "1.0.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
[[package]]
name = "ryu"
version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c"
[[package]]
name = "serde"
version = "1.0.196"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.196"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.113"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "syn"
version = "2.0.48"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"

12
tools/kklc/Cargo.toml Normal file
View file

@ -0,0 +1,12 @@
[package]
name = "kklc"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
serde = { version = "1.0.196", features = ["derive"] }
serde_json = "1.0.113"
csv = "1.3"
furigana = "0.1.1"

163
tools/kklc/src/main.rs Normal file
View file

@ -0,0 +1,163 @@
use std::{collections::HashMap, error::Error};
use std::fs::File;
use serde::{Serialize, Deserialize};
use csv::{ReaderBuilder, StringRecord};
const PATH_CSV: &str = "kklc.csv";
type Readings = HashMap<String, Vec<String>>;
type Ruby = Vec<(String, Option<String>)>;
#[derive(Debug, Serialize, Deserialize)]
struct KKLCEntry {
id: i32,
char: String,
keys: Vec<String>,
senses: Vec<String>,
onyomi: Vec<String>,
kunyomi: Vec<String>,
examples: Vec<(String, Ruby)>,
}
fn get_readings() -> HashMap<String, Vec<String>> {
ReaderBuilder::new()
.from_path(PATH_CSV)
.expect("Failed opening CSV")
.records()
.map(|record| {
let record = record.expect("Error reading CSV record");
let kanji = record.get(1).unwrap();
let onyomi = record.get(3)
.unwrap()
.split("\n\n")
.map(|s| s.split('(').next().unwrap().to_owned());
let kunyomi = record.get(4)
.unwrap()
.split("\n\n")
.map(|s| s.split('(').next().unwrap().to_owned());
(kanji.to_owned(), kunyomi.chain(onyomi).collect())
})
.collect()
}
fn map_vocab(str: &str) -> (String, String, String) {
let mut iter = str.split("").map(String::from);
(
iter.next().unwrap()
.replace("¹", "")
.replace("²", "")
.replace("", "")
.replace("*", "")
.trim()
.into(),
iter.next().unwrap()
.split('[').next().unwrap()
.split('(').next().unwrap()
.replace(" ", "")
.replace("¹", "")
.replace("²", "")
.replace("", "")
.trim()
.into(),
iter.next().unwrap(),
)
}
fn map_ruby(fst: &str, snd: &str, readings: &Readings) -> Ruby {
if !fst.contains("") {
let mapping = furigana::map(fst, snd, &readings)
.into_iter()
.max_by_key(|f| f.accuracy);
if let Some(mapping) = mapping {
if mapping.accuracy > 0 {
return mapping.furigana
.into_iter()
.map(|x| (x.segment.to_owned(), x.furigana.map(String::from)))
.collect()
}
}
}
vec![(fst.to_owned(), Some(snd.to_owned()))]
}
fn map_example(vocab: &[(String, String, String)], readings: &Readings) -> Vec<(String, Ruby)> {
vocab.iter()
.map(|(fst, snd, word)| (word.to_owned(), map_ruby(fst, snd, readings)))
.collect()
}
fn map_record(record: StringRecord, readings: &Readings) -> KKLCEntry {
let id = record.get(0)
.unwrap()
.parse()
.unwrap();
let char: String = record.get(1)
.unwrap()
.into();
let keys = record.get(2)
.unwrap_or("")
.split("\n\n")
.map(String::from)
.collect();
let onyomi = record.get(3)
.unwrap_or("")
.split("\n\n")
.map(String::from)
.collect::<Vec<_>>();
let kunyomi = record.get(4)
.unwrap_or("")
.split("\n\n")
.map(String::from)
.collect::<Vec<_>>();
let vocab = record.get(5)
.unwrap_or("")
.split("\n\n")
.map(map_vocab)
.collect::<Vec<_>>();
let senses = record.get(6)
.unwrap_or("")
.split("\n\n")
.filter(|s| !s.is_empty())
.map(String::from)
.collect();
let examples = map_example(&vocab, readings);
KKLCEntry { id, char, keys, senses, onyomi, kunyomi, examples }
}
fn get_entries(readings: &HashMap<String, Vec<String>>) -> Vec<KKLCEntry> {
let mut reader = ReaderBuilder::new()
.from_path(PATH_CSV)
.expect("Error opening file");
reader.records()
.map(|record| {
let record = record.expect("Error reading CSV record");
map_record(record, readings)
})
.collect()
}
fn main() -> Result<(), Box<dyn Error>> {
let readings = get_readings();
let entries = get_entries(&readings);
for (i, entry) in entries.iter().enumerate() {
let path = format!("../../public/static/kanji/{}.json", i + 1);
let file = File::create(path).expect("Error creating file");
serde_json::to_writer(&file, entry).expect("Error writing JSON to file");
}
Ok(())
}