From 5b470239f55539cc824ebf73209ba71e821e8fb7 Mon Sep 17 00:00:00 2001 From: mike Date: Thu, 8 Jan 2026 22:54:14 +0100 Subject: [PATCH] Gather data --- src/main/java/puzzle/ExportFormat.java | 2 +- src/main/java/puzzle/SwedishGenerator.java | 7 +- tools/hint/Dockerfile | 13 - tools/hint/dbjsonl.sh | 38 -- tools/hint/jsonl-to-sqlite.mjs | 59 --- tools/hint/pom.xml | 46 -- tools/hint/src/HintJob.java | 581 --------------------- 7 files changed, 6 insertions(+), 740 deletions(-) delete mode 100644 tools/hint/Dockerfile delete mode 100755 tools/hint/dbjsonl.sh delete mode 100644 tools/hint/jsonl-to-sqlite.mjs delete mode 100644 tools/hint/pom.xml delete mode 100644 tools/hint/src/HintJob.java diff --git a/src/main/java/puzzle/ExportFormat.java b/src/main/java/puzzle/ExportFormat.java index 3335437..057c123 100644 --- a/src/main/java/puzzle/ExportFormat.java +++ b/src/main/java/puzzle/ExportFormat.java @@ -54,7 +54,7 @@ public record ExportFormat() { puz.swe().forEachSlot(g, (int key, long rs, long cs, int len) -> { var word = clueMap.get(key); if (word != null) { - var p = extractPlacedFromSlot(new Slot(key, rs, cs, len), word); + var p = extractPlacedFromSlot(Slot.from(key, rs, cs, len), word); if (p != null) placed.add(p); } }); diff --git a/src/main/java/puzzle/SwedishGenerator.java b/src/main/java/puzzle/SwedishGenerator.java index fb85996..8f3c5dc 100644 --- a/src/main/java/puzzle/SwedishGenerator.java +++ b/src/main/java/puzzle/SwedishGenerator.java @@ -328,8 +328,11 @@ public record SwedishGenerator(int[] buff) { return new CandidateInfo(cur, curLen); } static record Slot(int key, long rs, long cs, int len) { - + //perhaps just put len into key and use hash-code and derrive index from key. or just put both ints into tail of the two longs. static Slot from(int key, long rs, long cs, int len) { +/* if ((Long.highestOneBit(rs | cs) >> 2) != (len - 1)) throw new RuntimeException(); + if ((Long.highestOneBit(cs) >> 2) != (len - 1)) throw new RuntimeException(); + if ((Long.highestOneBit(rs) >> 2) != (len - 1)) throw new RuntimeException();*/ return new Slot(key, rs, cs, len); } @@ -385,7 +388,7 @@ public record SwedishGenerator(int[] buff) { ArrayList extractSlots(Grid grid) { var slots = new ArrayList(64); - forEachSlot(grid, (key, rs, cs, len) -> slots.add(new Slot(key, rs, cs, len))); + forEachSlot(grid, (key, rs, cs, len) -> slots.add(Slot.from(key, rs, cs, len))); return slots; } diff --git a/tools/hint/Dockerfile b/tools/hint/Dockerfile deleted file mode 100644 index 18a9c13..0000000 --- a/tools/hint/Dockerfile +++ /dev/null @@ -1,13 +0,0 @@ -FROM eclipse-temurin:25-jdk-alpine -WORKDIR /app -RUN apk add --no-cache curl tzdata - -COPY src/HintJob.java /app/HintJob.java - -RUN mkdir -p /app/target /app/lib \ - && curl -fsSL -o /app/lib/postgresql.jar \ - https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.3/postgresql-42.7.3.jar \ - && javac -d /app/target /app/HintJob.java - -ENTRYPOINT ["/opt/java/openjdk/bin/java","-cp","/app/target:/app/lib/postgresql.jar","HintJob"] -CMD ["3000"] diff --git a/tools/hint/dbjsonl.sh b/tools/hint/dbjsonl.sh deleted file mode 100755 index 936f4ec..0000000 --- a/tools/hint/dbjsonl.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Usage: -# ./import_jsonl.sh "postgresql://user:pass@host:5432/dbname" gloss doc /path/to/file.jsonl -# -# Notes: -# - Creates table if it doesn't exist. -# - Inserts each JSON line into a jsonb column. -# - Skips blank lines. - -DB_URL="${1:?db url}" -TABLE="${2:?table name}" -COL="${3:?json column name}" -FILE="${4:?jsonl file path}" - -psql "$DB_URL" -v ON_ERROR_STOP=1 < ''; - --- optional: show count inserted this run -SELECT count(*) AS inserted_now FROM _jsonl_stage WHERE btrim(line) <> ''; -SQL diff --git a/tools/hint/jsonl-to-sqlite.mjs b/tools/hint/jsonl-to-sqlite.mjs deleted file mode 100644 index 2a28806..0000000 --- a/tools/hint/jsonl-to-sqlite.mjs +++ /dev/null @@ -1,59 +0,0 @@ -// jsonl-to-sqlite.mjs -import fs from 'node:fs' -import readline from 'node:readline' -import Database from 'better-sqlite3' - -const jsonlPath = process.argv[2] -const dbPath = process.argv[3] ?? 'out.sqlite' -const table = process.argv[4] ?? 'events' - -if (!jsonlPath) { - console.error('Usage: node jsonl-to-sqlite.mjs [out.sqlite] [table]') - process.exit(1) -} - -const db = new Database(dbPath) -db.pragma('journal_mode = WAL') - -db.exec(` - CREATE TABLE IF NOT EXISTS ${ table } - ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - json TEXT NOT NULL - ); -`) - -const insert = db.prepare(`INSERT INTO ${ table }(json) - VALUES (?)`) -const insertMany = db.transaction((rows) => { - for (const r of rows) insert.run(r) -}) - -const rl = readline.createInterface({ - input : fs.createReadStream(jsonlPath, { encoding: 'utf8' }), - crlfDelay: Infinity -}) - -let batch = [] -let lineNo = 0 -for await (const line of rl) { - lineNo++ - const trimmed = line.trim() - if (!trimmed) continue - - try { - JSON.parse(trimmed) // validate - batch.push(trimmed) - } catch (e) { - console.warn(`Skipping invalid JSON on line ${ lineNo }: ${ e.message }`) - continue - } - - if (batch.length >= 1000) { - insertMany(batch) - batch = [] - } -} -if (batch.length) insertMany(batch) - -console.log(`Done. Imported into ${ dbPath }, table=${ table }`) diff --git a/tools/hint/pom.xml b/tools/hint/pom.xml deleted file mode 100644 index 1ffa580..0000000 --- a/tools/hint/pom.xml +++ /dev/null @@ -1,46 +0,0 @@ - - 4.0.0 - - puzzle - hintjob - 1.0.0 - - - 25 - 25 - - - - - org.postgresql - postgresql - 42.7.3 - - - - - - - org.apache.maven.plugins - maven-shade-plugin - 3.6.0 - - - package - shade - - false - - - HintJob - - - hintjob-all - - - - - - - diff --git a/tools/hint/src/HintJob.java b/tools/hint/src/HintJob.java deleted file mode 100644 index f7731dd..0000000 --- a/tools/hint/src/HintJob.java +++ /dev/null @@ -1,581 +0,0 @@ -import java.sql.DriverManager; - -static final String JDBC_URL = env("JDBC_URL", "jdbc:postgresql://192.168.1.159:5432/postgres"); -static final String JDBC_USER = env("JDBC_USER", "puzzle"); -static final String JDBC_PASS = env("JDBC_PASS", "heel-goed-wachtwoord"); -static final String OLLAMA_URL = env("OLLAMA_URL", "http://192.168.1.159:8081/v1/chat/completions"); -static final String MODEL = env("MODEL", "/models/TheBloke/Llama-2-13B-Chat-Dutch-GGUF/llama-2-13b-chat-dutch.Q8_0.gguf"); - -static String env(String k, String def) { - var v = System.getenv(k); - return (v == null || v.isBlank()) ? def : v; -} - -void main(String[] args) throws Exception { - Class.forName("org.postgresql.Driver"); - var limit = args.length > 0 ? Integer.parseInt(args[0]) : 3000; - - try (var c = DriverManager.getConnection(JDBC_URL, JDBC_USER, JDBC_PASS)) { - c.setAutoCommit(false); - - try (var sel = c.prepareStatement( - "select ctid::text, woord, hint, hint_score " + - "from export_real_words_with_hints " + - "order by updated_at nulls first " + - "limit ? for update skip locked"); - var upd = c.prepareStatement( - "update export_real_words_with_hints set hint = ?, hint_score = ?, guessed_word = ?, suggested_hint = ?, updated_at = now() where ctid::text = ?")) { - - sel.setInt(1, limit); - - var done = 0; - try (var rs = sel.executeQuery()) { - while (rs.next()) { - if (done % 10 == 0) IO.println("Committed " + done); - var ctid = rs.getString(1); - var woord = rs.getString(2); - var oldHint = rs.getString(3); - var oldScore = rs.getInt(4); - if (rs.wasNull()) oldScore = -1; - - var newHint = generateHint(woord); - if (newHint == null || newHint.isBlank()) continue; - newHint = sanitizeHint(newHint); - - var scoreRes = scoreHint(newHint, woord); - var newScore = scoreRes.score; - - // De gebruiker wil voornamelijk hints toevoegen aan records die er geen hebben. - // En de originele hint behouden omdat de LLM resultaten soms tegenvallen. - if (oldHint != null && !oldHint.isBlank()) { - // Er is al een hint. We genereren nog steeds een suggestie, - // maar we overschrijven de originele 'hint' kolom NIET. - var updSug = c.prepareStatement( - "update export_real_words_with_hints set suggested_hint = ?, hint_score = ?, guessed_word = ?, updated_at = now() where ctid::text = ?"); - updSug.setString(1, newHint); - updSug.setInt(2, newScore); - updSug.setString(3, scoreRes.guessedWord); - updSug.setString(4, ctid); - updSug.executeUpdate(); - c.commit(); - done++; - continue; - } - - // Geen bestaande hint, dus we vullen hem nu in - upd.setString(1, newHint); - upd.setInt(2, newScore); - upd.setString(3, scoreRes.guessedWord); - upd.setString(4, newHint); - upd.setString(5, ctid); - upd.executeUpdate(); - c.commit(); - - done++; - } - } - - IO.println("Done. Updated " + done + " rows."); - } - } -} - -static String sanitizeHint(String hint) { - if (hint == null) return null; - hint = hint.trim(); - if (hint.contains("\n")) { - var lines = hint.split("\n"); - hint = lines[lines.length - 1].trim(); - } - return hint; -} - -record ScoreResult(String guessedWord, int score) { -} - -record Candidate(String w, double p) { } - -static ScoreResult scoreHint(String hint, String woord) throws Exception { - int L = woord.replaceAll("[^A-Za-z]", "").length(); - - String schema = """ - { - "type": "json_object", - "additionalProperties": false, - "properties":{ - "candidates":{ - "type":"array", - "minItems":5, - "maxItems":5, - "items":{ - "type":"object", - "additionalProperties": false, - "properties":{"w":{"type":"string","minLength":%d,"maxLength":%d},"p":{"type":"number","minimum":0,"maximum":1}}, - "required":["w","p"] - } - } - }, - "required":["candidates"] - } - """.formatted(L, L); - - String prompt = """ - Je bent een Nederlandse kruiswoord-hulp. - Geef EXACT 5 kandidaatwoorden (allemaal %d letters) die passen bij de hint. - Geef ook p (0..1) per kandidaat, som p ≈ 1.0. - Alleen JSON volgens dit schema, geen extra tekst. - Hint: %s - Schema: %s - """.formatted(L, hint, schema); - - String payload = "{" - + "\"model\":\"" + jsonEscape(MODEL) + "\"," - + "\"messages\":[" - + "{\"role\":\"system\",\"content\":\"Je antwoordt uitsluitend met geldige JSON volgens het schema.\"}," - + "{\"role\":\"user\",\"content\":\"" + jsonEscape(prompt) + "\"}" - + "]," - + "\"stream\":false," - + "\"max_tokens\":100," - + "\"response_format\":" + schema + "," - + "\"options\":{" - + "\"temperature\":0.4," - + "\"seed\":1," - + "\"num_predict\":400," - + "\"stop\":[\"\\n\"]" - + "}," - + "\"keep_alive\":\"5m\"" - + "}"; - - var p = new ProcessBuilder( - "curl", "-sS", - "-H", "Content-Type: application/json", - "-X", "POST", OLLAMA_URL, - "-d", payload - ).redirectErrorStream(true).start(); - - String out; - try (var in = p.getInputStream()) { - out = new String(in.readAllBytes(), StandardCharsets.UTF_8); - } - p.waitFor(); - - // Ollama /api/chat => message.content bevat de JSON-string :contentReference[oaicite:2]{index=2} - String content = extractJsonString(out, "content"); - if (content == null) content = ""; - - // parse content JSON -> candidates[ {w,p}, ... ] - var candidates = parseCandidates(content); // implement met Jackson/Gson/whatever - - String original = woord.toUpperCase().replaceAll("[^A-Z]", ""); - String guessed = candidates.isEmpty() ? "" : normalizeWord(candidates.get(0).w); - - int rank = findRank(candidates, original); - int score; - - if (rank > 0) { - var candidate = candidates.get(rank - 1); - guessed = normalizeWord(candidate.w); - double pCorrect = candidate.p; - score = (int) Math.round(100 * pCorrect * (1.0 - 0.15 * (rank - 1))); - } else { - int bestDist = Integer.MAX_VALUE; - for (var c : candidates) { - String cw = normalizeWord(c.w); - if (cw.isEmpty()) continue; - var levenshtein = levenshtein(cw, original); - if (levenshtein < bestDist) { - bestDist = levenshtein; - guessed = cw; - } - } - // “bijna”-score: pas de factor aan naar smaak - score = Math.max(0, 60 - bestDist * 10); - } - return new ScoreResult(guessed, score); -} -static int findRank(List candidates, String targetWord) { - if (candidates == null || candidates.isEmpty()) return -1; - - String target = normalizeWord(targetWord); - if (target.isEmpty()) return -1; - - for (int i = 0; i < candidates.size(); i++) { - String cw = normalizeWord(candidates.get(i).w); - if (cw.equals(target)) return i + 1; // 1-based rank - } - return -1; -} -private static String normalizeWord(String s) { - if (s == null) return ""; - return s.trim().toUpperCase().replaceAll("[^A-Z]", ""); -} -private static int skipWs(String s, int i) { - while (i < s.length()) { - char c = s.charAt(i); - if (c != ' ' && c != '\n' && c != '\r' && c != '\t') break; - i++; - } - return i; -} -private static int findMatching(String s, int start, char open, char close) { - boolean inString = false; - boolean esc = false; - int depth = 0; - - for (int i = start; i < s.length(); i++) { - char c = s.charAt(i); - - if (inString) { - if (esc) { - esc = false; - } else if (c == '\\') { - esc = true; - } else if (c == '"') { - inString = false; - } - continue; - } - - if (c == '"') { - inString = true; - continue; - } - - if (c == open) depth++; - else if (c == close) { - depth--; - if (depth == 0) return i; - } - } - return -1; -} -static List parseCandidates(String contentJson) throws Exception { - if (contentJson == null) return List.of(); - - int a = contentJson.indexOf('{'); - int b = contentJson.lastIndexOf('}'); - String json = (a >= 0) ? contentJson.substring(a) : contentJson; // ook ok bij truncatie - - int keyPos = json.indexOf("\"candidates\""); - if (keyPos < 0) return List.of(); - - int colon = json.indexOf(':', keyPos); - if (colon < 0) return List.of(); - - int arrStart = json.indexOf('[', colon); - if (arrStart < 0) return List.of(); - - int arrEnd = findMatching(json, arrStart, '[', ']'); - if (arrEnd < 0) arrEnd = json.length(); // <-- TRUNCATIE-TOLERANT - - String arr = json.substring(arrStart + 1, arrEnd); - - List out = new ArrayList<>(5); - int i = 0; - - while (i < arr.length() && out.size() < 5) { - i = skipWs(arr, i); - if (i >= arr.length()) break; - if (arr.charAt(i) == ',') { - i++; - continue; - } - - int objStart = arr.indexOf('{', i); - if (objStart < 0) break; - - int objEnd = findMatching(arr, objStart, '{', '}'); - if (objEnd < 0) { - // object is afgebroken -> stop (want alles hierna is ook verdacht) - break; - } - - String obj = arr.substring(objStart, objEnd + 1); - - String w = extractJsonString(obj, "w"); - Double p = extractJsonNumber(obj, "p"); - - if (w != null && p != null) { - String nw = normalizeWord(w); - out.add(new Candidate(nw, p)); - } - - i = objEnd + 1; - } - - return out; -} - -static int levenshtein(String s1, String s2) { - var dp = new int[s1.length() + 1][s2.length() + 1]; - for (var i = 0; i <= s1.length(); i++) dp[i][0] = i; - for (var j = 0; j <= s2.length(); j++) dp[0][j] = j; - for (var i = 1; i <= s1.length(); i++) - for (var j = 1; j <= s2.length(); j++) { - var cost = (s1.charAt(i - 1) == s2.charAt(j - 1)) ? 0 : 1; - dp[i][j] = Math.min(Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1), dp[i - 1][j - 1] + cost); - } - return dp[s1.length()][s2.length()]; -} - -static String generateHint(String woord) throws Exception { - - String jsonSchema = """ - { - "type":"object", - "additionalProperties": false, - "properties":{ - "hint":{"type":"string","minLength":3,"maxLength":120} - }, - "required":["hint"] - } - """; - - String system = - "Je antwoordt uitsluitend met geldige JSON volgens response_format. " - + "Geen extra tekst."; - - String user = """ - Je bent een kruiswoordpuzzelmaker. - Maak precies 1 hint voor "". - - Regels: - - 4 t/m 8 woorden. - - Vermijd "" en elk deel ervan. - - Geen inleiding, geen aanhalingstekens, geen punt. - - Neutrale omschrijving, niet cryptisch. - - """.formatted(woord,woord); - - String payload = "{" - + "\"model\":\"" + jsonEscape(MODEL) + "\"," - + "\"messages\":[" - + "{\"role\":\"system\",\"content\":\"" + jsonEscape(system) + "\"}," - + "{\"role\":\"user\",\"content\":\"" + jsonEscape(user) + "\"}" - + "]," - + "\"stream\":false," - + "\"max_tokens\":100," - + "\"response_format\":{" - + "\"type\":\"json_object\"," - + "\"schema\":" + jsonSchema - + "}," - + "\"options\":{" - + "\"temperature\":0.35," - + "\"seed\":1," - + "\"num_predict\":120" - + "}" - + "}"; - - String out = postCurl(OLLAMA_URL, payload); - - String content = jsonGetString(out, "content"); // -> {"hint":"..."} - if (content == null) return null; - - String hint = jsonGetString(content, "hint"); // -> de echte hint - return hint == null ? null : hint.trim(); -} - -static boolean leaksHint(String hint, String[] forbidden) { - String h = normalize(hint); - for (String f : forbidden) { - if (f == null || f.isBlank()) continue; - if (h.contains(normalize(f))) return true; - } - return false; -} - -static String[] forbiddenForms(String woord) { - String w = normalize(woord).toLowerCase(); - // simpele NL-varianten (goed genoeg voor jouw voorbeeld “radijsje”) - List forms = new ArrayList<>(); - forms.add(w); - - if (w.endsWith("je") && w.length() > 4) forms.add(w.substring(0, w.length() - 2)); // radijsje -> radijs - if (w.endsWith("en") && w.length() > 4) forms.add(w.substring(0, w.length() - 2)); // meervoud - if (w.endsWith("s") && w.length() > 4) forms.add(w.substring(0, w.length() - 1)); // bezits/meervoud - - // unieke lijst terug - return forms.stream().distinct().toArray(String[]::new); -} - -static String normalize(String s) { - if (s == null) return ""; - return s.toLowerCase().replaceAll("[^a-z]", ""); -} - -static String jsonEscape(String s) { - var b = new StringBuilder(s.length() + 16); - for (var i = 0; i < s.length(); i++) { - var ch = s.charAt(i); - switch (ch) { - case '\\' -> b.append("\\\\"); - case '"' -> b.append("\\\""); - case '\n' -> b.append("\\n"); - case '\r' -> b.append("\\r"); - case '\t' -> b.append("\\t"); - default -> b.append(ch); - } - } - return b.toString(); -} - -private static String extractJsonString(String obj, String key) throws Exception { - int k = obj.indexOf("\"" + key + "\""); - if (k < 0) return null; - - int colon = obj.indexOf(':', k); - if (colon < 0) return null; - - int i = skipWs(obj, colon + 1); - if (i >= obj.length() || obj.charAt(i) != '"') return null; - - StringBuilder sb = new StringBuilder(); - i++; // na eerste quote - - boolean esc = false; - while (i < obj.length()) { - char c = obj.charAt(i++); - if (esc) { - esc = false; - switch (c) { - case '"': - sb.append('"'); - break; - case '\\': - sb.append('\\'); - break; - case '/': - sb.append('/'); - break; - case 'b': - sb.append('\b'); - break; - case 'f': - sb.append('\f'); - break; - case 'n': - sb.append('\n'); - break; - case 'r': - sb.append('\r'); - break; - case 't': - sb.append('\t'); - break; - case 'u': - if (i + 4 > obj.length()) throw new Exception("Bad unicode escape"); - String hex = obj.substring(i, i + 4); - sb.append((char) Integer.parseInt(hex, 16)); - i += 4; - break; - default: - // onbekende escape: neem letterlijk - sb.append(c); - } - continue; - } - - if (c == '\\') { - esc = true; - continue; - } - if (c == '"') break; // einde string - sb.append(c); - } - return sb.toString(); -} -// minimal JSON string extractor for {"key":"value"} with escapes -static String jsonGetString(String json, String key) { - var needle = "\"" + key + "\""; - var i = json.indexOf(needle); - if (i < 0) return null; - i = json.indexOf(':', i + needle.length()); - if (i < 0) return null; - i++; - while (i < json.length() && Character.isWhitespace(json.charAt(i))) i++; - if (i >= json.length() || json.charAt(i) != '"') return null; - i++; // after opening quote - - var b = new StringBuilder(); - var esc = false; - for (; i < json.length(); i++) { - var ch = json.charAt(i); - if (esc) { - switch (ch) { - case 'n' -> b.append('\n'); - case 'r' -> b.append('\r'); - case 't' -> b.append('\t'); - case '"' -> b.append('"'); - case '\\' -> b.append('\\'); - default -> b.append(ch); - } - esc = false; - } else if (ch == '\\') esc = true; - else if (ch == '"') return b.toString(); - else b.append(ch); - } - return null; -} -private static Double extractJsonNumber(String obj, String key) { - int k = obj.indexOf("\"" + key + "\""); - if (k < 0) return null; - - int colon = obj.indexOf(':', k); - if (colon < 0) return null; - - int i = skipWs(obj, colon + 1); - if (i >= obj.length()) return null; - - int j = i; - while (j < obj.length()) { - char c = obj.charAt(j); - if ((c >= '0' && c <= '9') || c == '-' || c == '+' || c == '.' || c == 'e' || c == 'E') { - j++; - } else { - break; - } - } - if (j == i) return null; - - try { - return Double.parseDouble(obj.substring(i, j)); - } catch (NumberFormatException e) { - return null; - } -} -static String postCurl(String url, String payload) throws Exception { - var p = new ProcessBuilder( - "curl", "-sS", "-f", - "-H", "Content-Type: application/json", - "-X", "POST", url, - "--data-binary", "@-", - "-w", "\n__HTTP_STATUS__:%{http_code}\n" - ).redirectErrorStream(true).start(); - - try (var os = p.getOutputStream()) { - os.write(payload.getBytes(StandardCharsets.UTF_8)); - } - - String out; - try (var in = p.getInputStream()) { - out = new String(in.readAllBytes(), StandardCharsets.UTF_8); - } - - int code = p.waitFor(); - if (code != 0) { - System.err.println("curl failed (" + code + "): " + out); - return null; - } - - // strip status marker from output - int k = out.lastIndexOf("__HTTP_STATUS__:"); - if (k >= 0) { - String status = out.substring(k + "__HTTP_STATUS__:".length()).trim(); - out = out.substring(0, k).trim(); - - // optioneel: log status - // System.err.println("HTTP status: " + status); - } - - return out; -}