diff --git a/Dockerfile b/Dockerfile
index 0cace24..239eb59 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -11,7 +11,7 @@ WORKDIR /app
# Copy source files
COPY src/ /app/src/
-COPY word-list.txt /app/word-list.txt
+COPY export_real_words_with_hints.csv /app/export_real_words_with_hints.csv
COPY compile.sh /app/compile.sh
COPY docker-entrypoint.sh /app/docker-entrypoint.sh
COPY crontab /app/crontab
@@ -19,7 +19,8 @@ COPY crontab /app/crontab
# Compile Java code
RUN chmod +x /app/compile.sh && \
mkdir -p /app/target && \
- javac -d /app/target src/puzzle/*.java
+ cp src/puzzle/postgresql-42.7.8.jar /app/target/ && \
+ javac -cp /app/target/postgresql-42.7.8.jar -d /app/target src/puzzle/*.java
# Create output directory
RUN mkdir -p /data/puzzles
diff --git a/README.md b/README.md
index ac34674..acf6022 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ A high-performance Java-based puzzle generator with theme-based word filtering a
- Edit distance similarity matching
- Automatic theme detection
-3. **DailyGenerator.java** - Daily puzzle automation
+3. **Main.java** - Core generator and daily automation
- Generates themed puzzles
- JSON output with metadata
- Index file generation
@@ -47,7 +47,6 @@ A high-performance Java-based puzzle generator with theme-based word filtering a
java -cp ~/dev/.target puzzle.Main --seed 42 --pop 18 --gens 100
# Generate daily puzzles
-java -cp ~/dev/.target puzzle.DailyGenerator
```
### Docker Deployment
@@ -153,13 +152,13 @@ Puzzles are generated daily at **3:15 AM** (configurable in `crontab`).
Edit `crontab` to change schedule:
```cron
# Daily at 3:15 AM
-15 3 * * * java -cp /app/target puzzle.DailyGenerator
+15 3 * * * java -cp /app/target puzzle.Main
# Every 6 hours
-0 */6 * * * java -cp /app/target puzzle.DailyGenerator
+0 */6 * * * java -cp /app/target puzzle.Main
# Weekly on Monday at 1 AM
-0 1 * * 1 java -cp /app/target puzzle.DailyGenerator
+0 1 * * 1 java -cp /app/target puzzle.Main
```
## Word List Format
@@ -208,7 +207,7 @@ The Java version maintains module-wise compatibility with the Node.js generator:
|------------------------|-------------------------------------|
| `swedish_generator.js` | `SwedishGenerator.java` |
| `export_format.js` | `ExportFormat.java` |
-| `main.js` | `Main.java` + `DailyGenerator.java` |
+| `main.js` | `Main.java` |
| N/A | `ThemeGraph.java` (new) |
## Volume Management
diff --git a/compile.sh b/compile.sh
index 91f6fb0..fa24d40 100755
--- a/compile.sh
+++ b/compile.sh
@@ -1,4 +1,4 @@
#!/bin/bash
TARGET=${1:-~/dev/.target}
mkdir -p "$TARGET"
-javac -d "$TARGET" src/puzzle/*.java
+javac -cp src/puzzle/postgresql-42.7.8.jar -d "$TARGET" src/puzzle/*.java
diff --git a/crontab b/crontab
index 06fb0d0..933e8f2 100644
--- a/crontab
+++ b/crontab
@@ -1,2 +1,2 @@
# Generate puzzles daily at 3:15 AM
-15 3 * * * java -cp /app/target puzzle.DailyGenerator >> /var/log/cron.log 2>&1
+15 3 * * * java -cp /app/target puzzle.Main >> /var/log/cron.log 2>&1
diff --git a/docker-compose.yml b/docker-compose.yml
index e12642c..6e1edc0 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,18 +1,22 @@
services:
- puzzle_gen:
+ puzzle_create_one:
build:
context: ${PUZZLE_ROOT_DIR:-/opt/apps/puzzle}
- dockerfile: tools/puzzle-gen/Dockerfile
- container_name: puzzle_gen
- restart: unless-stopped
+ dockerfile: Dockerfile
+ container_name: puzzle_create_one
+ restart: "no"
networks: [ traefik_net ]
environment:
TZ: Europe/Amsterdam
+ OUT_DIR: /data/puzzle
+ WORDS_PATH: "/app/export_real_words_with_hints.csv"
LM_STUDIO_BASE_URL: "http://192.168.1.159:1234/v1"
- PUZZLES_PER_DAY: "3"
+ GENERATE_ON_START: "true"
+ START_CLASS: "puzzle.Main"
+ SCORES_PATH: "/app/export_real_words_with_hints.csv"
volumes:
- - puzzles_data:/data/puzzles:rw
+ - puzzles_data:/data/puzzle:rw
update_hints:
build:
@@ -45,13 +49,15 @@ services:
networks: [ traefik_net ]
environment:
TZ: Europe/Amsterdam
- OUT_DIR: /data/puzzles
+ OUT_DIR: /data/puzzle
+ WORDS_PATH: "/app/export_real_words_with_hints.csv"
+ SCORES_PATH: "/app/export_real_words_with_hints.csv"
PUZZLES_PER_DAY: "3"
LM_STUDIO_BASE_URL: "http://192.168.1.159:1234/v1"
THEME_FILTER: "true"
THEME_MIN_SCORE: "0.6"
volumes:
- - puzzles_data:/data/puzzles:rw
+ - puzzles_data:/data/puzzle:rw
volumes:
puzzles_data:
diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh
index 9c4c674..f721f32 100644
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -8,12 +8,14 @@ echo "Puzzles per day: ${PUZZLES_PER_DAY}"
echo ""
# Ensure output directory exists
-mkdir -p "${OUT_DIR}"
+mkdir -p "${OUT_DIR}/puzzles"
# Generate initial puzzle on startup (optional)
if [ "${GENERATE_ON_START}" = "true" ]; then
echo "Generating initial puzzles..."
- java -cp /app/target puzzle.DailyGenerator
+ START_CLASS=${START_CLASS:-puzzle.Main}
+ echo "Running ${START_CLASS}..."
+ java -cp /app/target/postgresql-42.7.8.jar:/app/target ${START_CLASS}
echo ""
fi
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..edf2f03
--- /dev/null
+++ b/package.json
@@ -0,0 +1,13 @@
+{
+ "name": "puzzle-generator",
+ "version": "1.0.0",
+ "description": "",
+ "main": "index.js",
+ "scripts": {
+ "test": "echo \"Error: no test specified\" && exit 1"
+ },
+ "private": true,
+ "dependencies": {
+ "better-sqlite3": "^12.5.0"
+ }
+}
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..d1934b9
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,67 @@
+
+ 4.0.0
+
+ puzzle
+ tools
+ 0.0.1
+
+
+ 25
+ 25
+
+
+
+
+ org.postgresql
+ postgresql
+ 42.7.3
+
+
+ org.xerial
+ sqlite-jdbc
+ 3.46.1.0
+
+
+
+ org.slf4j
+ slf4j-api
+ 2.0.13
+
+
+
+
+ org.slf4j
+ slf4j-simple
+ 2.0.13
+ runtime
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.6.0
+
+
+ package
+
+ shade
+
+
+ false
+
+
+ HintScores
+
+
+ tools-all
+
+
+
+
+
+
+
diff --git a/py/import.py b/py/import.py
deleted file mode 100644
index 80813a8..0000000
--- a/py/import.py
+++ /dev/null
@@ -1,142 +0,0 @@
-#!/usr/bin/env python3
-import argparse
-import json
-import re
-import sqlite3
-from pathlib import Path
-
-RE_ASCII_WORD = re.compile(r"^[A-Za-z]+$")
-RE_SPACE = re.compile(r"\s+")
-RE_PARENS = re.compile(r"\s*\([^)]*\)\s*") # verwijder (labels)
-RE_BRACKETS = re.compile(r"\s*\[[^]]*]\s*") # verwijder [labels]
-
-def clean_hint(s: str) -> str:
- s = s.strip()
- s = RE_BRACKETS.sub(" ", s)
- s = RE_PARENS.sub(" ", s)
- s = s.replace("’", "'")
- s = RE_SPACE.sub(" ", s).strip(" -;:,.\t")
- return s
-
-def pick_gloss(obj: dict) -> tuple[str | None, str | None]:
- """Return (hint, pos) from a Wiktextract JSON line."""
- pos = obj.get("pos")
- senses = obj.get("senses") or []
- best = None
-
- for s in senses:
- glosses = s.get("glosses") or []
- if not glosses:
- continue
- # Neem de eerste gloss die "normaal" oogt
- for g in glosses:
- if not isinstance(g, str):
- continue
- g2 = clean_hint(g)
- if len(g2) < 3:
- continue
- best = g2
- break
- if best:
- break
-
- return best, pos
-
-def main():
- ap = argparse.ArgumentParser()
- ap.add_argument("--db", required=True, help="pad naar jouw sqlite db")
- ap.add_argument("--jsonl", required=True, help="pad naar nl-extract.jsonl")
- ap.add_argument("--minlen", type=int, default=2)
- ap.add_argument("--maxlen", type=int, default=8)
- ap.add_argument("--maxhint", type=int, default=80)
- args = ap.parse_args()
-
- db_path = Path(args.db)
- jsonl_path = Path(args.jsonl)
-
- con = sqlite3.connect(db_path)
- cur = con.cursor()
-
- # speed pragmas (alleen tijdens import)
- cur.execute("PRAGMA journal_mode=WAL;")
- cur.execute("PRAGMA synchronous=NORMAL;")
- cur.execute("PRAGMA temp_store=MEMORY;")
-
- cur.execute("""
- CREATE TABLE IF NOT EXISTS hints (
- word TEXT NOT NULL,
- hint TEXT NOT NULL,
- source TEXT NOT NULL DEFAULT 'wiktionary',
- pos TEXT,
- quality INTEGER NOT NULL DEFAULT 80,
- PRIMARY KEY (word, hint, source)
- );
- """)
- cur.execute("CREATE INDEX IF NOT EXISTS idx_hints_word ON hints(word);")
- con.commit()
-
- batch = []
- inserted = 0
- seen = 0
-
- con.execute("BEGIN;")
- with jsonl_path.open("r", encoding="utf-8") as f:
- for line in f:
- line = line.strip()
- if not line:
- continue
- try:
- obj = json.loads(line)
- except json.JSONDecodeError:
- continue
-
- # Kaikki/Wiktextract: vaak lang_code = "nl" en lang = "Dutch"
- lang_code = obj.get("lang_code")
- if lang_code and lang_code != "nl":
- continue
-
- word = obj.get("word")
- if not word:
- continue
-
- word_up = word.upper().strip()
- if not (args.minlen <= len(word_up) <= args.maxlen):
- continue
- if not RE_ASCII_WORD.match(word_up):
- continue
-
- hint, pos = pick_gloss(obj)
- if not hint:
- continue
-
- # Hint kort houden
- hint = hint[: args.maxhint].rstrip()
-
- # Simpele kwaliteit: iets hoger als POS bekend is
- quality = 85 if pos else 80
-
- batch.append((word_up, hint, "wiktionary", pos, quality))
- seen += 1
-
- if len(batch) >= 2000:
- cur.executemany(
- "INSERT OR IGNORE INTO hints(word,hint,source,pos,quality) VALUES (?,?,?,?,?)",
- batch
- )
- inserted += cur.rowcount if cur.rowcount != -1 else 0
- batch.clear()
-
- if batch:
- cur.executemany(
- "INSERT OR IGNORE INTO hints(word,hint,source,pos,quality) VALUES (?,?,?,?,?)",
- batch
- )
- inserted += cur.rowcount if cur.rowcount != -1 else 0
-
- con.commit()
- con.close()
-
- print(f"Done. processed_lines≈{seen}, inserted≈{inserted} (OR IGNORE kan inserts verlagen).")
-
-if __name__ == "__main__":
- main()
diff --git a/requirements-marker.txt b/requirements-marker.txt
deleted file mode 100644
index 4ab456a..0000000
--- a/requirements-marker.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-# PyTorch with CUDA 12.4 support
---index-url https://download.pytorch.org/whl/cu124
-torch
-torchvision
-torchaudio
-
-# Transformers and marker
-transformers
-marker-pdf
diff --git a/run.sh b/run.sh
index 20c3ad9..4783f43 100755
--- a/run.sh
+++ b/run.sh
@@ -1,3 +1,2 @@
#!/bin/bash
java -cp ~/dev/.target puzzle.Main "$@"
-java -cp ~/dev/.target puzzle.DailyGenerator "$@"
diff --git a/src/main/java/puzzle/ClueGenerator.java b/src/main/java/puzzle/ClueGenerator.java
deleted file mode 100644
index effa935..0000000
--- a/src/main/java/puzzle/ClueGenerator.java
+++ /dev/null
@@ -1,222 +0,0 @@
-package puzzle;
-
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.*;
-import static puzzle.ExportFormat.*;
-
-public class ClueGenerator {
-
- private static final String OLLAMA_URL = "http://localhost:11434/api/chat";
- private static final String MODEL = "qwen2.5:14b";
- private static final String HINTS_FILE = "/home/mike/dev/puzzle-generator/nl_score_hints.csv";
- private static Map prebuiltClues = null;
-
- private static synchronized void ensurePrebuiltCluesLoaded() {
- if (prebuiltClues != null) return;
- prebuiltClues = new HashMap<>();
- try {
- var lines = Files.readAllLines(Path.of(HINTS_FILE), StandardCharsets.UTF_8);
- for (var line : lines) {
- var parts = line.split(",", 4);
- if (parts.length >= 4) {
- var word = parts[0].trim().toUpperCase(Locale.ROOT);
- var rawClue = parts[3].trim();
- if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
- rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
- }
- if (!word.isEmpty() && !rawClue.isEmpty()) {
- prebuiltClues.put(word, rawClue);
- }
- }
- }
- } catch (IOException e) {
- System.err.println("Warning: " + HINTS_FILE + " not found or could not be read.");
- }
- }
-
- public static ExportedPuzzle applyClues(ExportedPuzzle puzzle) {
- if (puzzle == null || puzzle.words().isEmpty()) {
- return puzzle;
- }
-
- ensurePrebuiltCluesLoaded();
-
- Map finalClueMap = new HashMap<>();
- List wordsMissingClues = new ArrayList<>();
-
- for (var w : puzzle.words()) {
- var wordUpper = w.word().toUpperCase(Locale.ROOT);
- if (prebuiltClues.containsKey(wordUpper)) {
- finalClueMap.put(w.word(), prebuiltClues.get(wordUpper));
- } else {
- wordsMissingClues.add(w.word());
- }
- }
-
- if (!wordsMissingClues.isEmpty()) {
- var generatedClues = generateClues(wordsMissingClues);
- finalClueMap.putAll(generatedClues);
- }
-
- List wordsWithClues = new ArrayList<>();
- for (var w : puzzle.words()) {
- var clue = finalClueMap.getOrDefault(w.word(), w.word());
- wordsWithClues.add(new WordOut(
- w.word(),
- clue,
- w.startRow(),
- w.startCol(),
- w.direction(),
- w.answer(),
- w.arrowRow(),
- w.arrowCol(),
- w.isReversed(),
- w.complex()
- ));
- }
-
- return new ExportedPuzzle(puzzle.gridv2(), wordsWithClues, puzzle.difficulty(), puzzle.rewards());
- }
-
- public static Map generateClues(List words) {
- if (words == null || words.isEmpty()) {
- return Collections.emptyMap();
- }
-
- var prompt = createCluePrompt(words);
- try {
- var jsonRequest = String.format(
- "{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"stream\":false,\"temperature\":0.7}",
- MODEL, escapeJson(prompt)
- );
-
- var responseBody = curlPostJson(OLLAMA_URL, jsonRequest, 120);
- var content = extractChatContent(responseBody);
-
- if (content == null || content.isEmpty()) {
- return Collections.emptyMap();
- }
-
- return parseCluesFromReply(words, content);
- } catch (Exception e) {
- System.err.println("Failed to generate clues: " + e.getMessage());
- return Collections.emptyMap();
- }
- }
-
- private static String createCluePrompt(List words) {
- return "Je bent een expert in het maken van kruiswoordpuzzels. Geef voor elk van de onderstaande woorden een korte, uitdagende maar duidelijke cryptische of beschrijvende aanwijzing in het Nederlands.\n\n" +
- "Output ALLEEN in dit formaat:\n" +
- "woord1:aanwijzing\n" +
- "woord2:aanwijzing\n\n" +
- "GEEN andere tekst of uitleg. Sla GEEN woorden over.\n\n" +
- "Lijst:\n" +
- String.join("\n", words);
- }
-
- private static Map parseCluesFromReply(List expectedWords, String reply) {
- Map wordClueMap = new HashMap<>();
- var lines = reply.split("\n");
-
- for (var line : lines) {
- line = line.trim();
- if (line.contains(":")) {
- var parts = line.split(":", 2);
- if (parts.length == 2) {
- var wordPart = parts[0].trim().replaceAll("^[\\d+.)*\\-\\s]+", "").toLowerCase();
- var clue = parts[1].trim();
- if (!clue.isEmpty()) {
- wordClueMap.put(wordPart, clue);
- }
- }
- }
- }
-
- Map results = new HashMap<>();
- for (var word : expectedWords) {
- var clue = wordClueMap.get(word.toLowerCase());
- if (clue != null) {
- results.put(word, clue);
- }
- }
- return results;
- }
-
- private static String curlPostJson(String url, String jsonBody, int timeoutSeconds) throws Exception {
- var tempFile = Files.createTempFile("clue-request-", ".json");
- try {
- Files.writeString(tempFile, jsonBody, StandardCharsets.UTF_8);
- List cmd = new ArrayList<>();
- cmd.add("curl");
- cmd.add("-fsSL");
- cmd.add("--connect-timeout");
- cmd.add("10");
- cmd.add("--max-time");
- cmd.add(String.valueOf(timeoutSeconds));
- cmd.add("-H");
- cmd.add("Content-Type: application/json");
- cmd.add("-d");
- cmd.add("@" + tempFile);
- cmd.add(url);
-
- var p = new ProcessBuilder(cmd)
- .redirectErrorStream(true)
- .start();
-
- var bytes = p.getInputStream().readAllBytes();
- var code = p.waitFor();
-
- if (code != 0) {
- throw new IOException("curl POST failed (" + code + ") url=" + url + "\nOutput:\n" +
- new String(bytes, StandardCharsets.UTF_8));
- }
-
- return new String(bytes, StandardCharsets.UTF_8);
- } finally {
- Files.deleteIfExists(tempFile);
- }
- }
-
- private static String extractChatContent(String json) {
- if (json == null) return null;
- var choices = json.indexOf("\"choices\"");
- var p = (choices >= 0) ? choices : 0;
- var i = json.indexOf("\"content\"", p);
- if (i < 0) {
- // Fallback for Ollama non-chat format if needed, but we used /api/chat
- // Ollama /api/chat returns {"model":"...","message":{"role":"assistant","content":"..."}}
- i = json.indexOf("\"content\"");
- if (i < 0) return null;
- }
- var colon = json.indexOf(':', i);
- if (colon < 0) return null;
- var q = json.indexOf('"', colon + 1);
- if (q < 0) return null;
- var sb = new StringBuilder();
- var esc = false;
- for (var k = q + 1; k < json.length(); k++) {
- var ch = json.charAt(k);
- if (esc) {
- if (ch == 'n') sb.append('\n');
- else if (ch == 't') sb.append('\t');
- else if (ch == 'r') sb.append('\r');
- else sb.append(ch);
- esc = false;
- } else {
- if (ch == '\\') esc = true;
- else if (ch == '"') break;
- else sb.append(ch);
- }
- }
- return sb.toString();
- }
-
- private static String escapeJson(String str) {
- return str.replace("\\", "\\\\")
- .replace("\"", "\\\"")
- .replace("\n", "\\n");
- }
-}
diff --git a/src/main/java/puzzle/ConcurrentWordScorer.java b/src/main/java/puzzle/ConcurrentWordScorer.java
deleted file mode 100644
index d059705..0000000
--- a/src/main/java/puzzle/ConcurrentWordScorer.java
+++ /dev/null
@@ -1,532 +0,0 @@
-package puzzle;
-
-import java.nio.charset.StandardCharsets;
-import java.nio.file.*;
-import java.util.*;
-import java.util.concurrent.*;
-import java.io.*;
-import java.time.*;
-import java.util.concurrent.atomic.*;
-
-/**
- * CONCURRENT MULTI-ENDPOINT Dutch Wordlist Scorer
- * Distributes batches across Ollama, LM-Studio, and a third endpoint simultaneously
- */
-public class ConcurrentWordScorer {
-
- // ===== CONFIGURATION =====
- private static final String INPUT_WORDLIST = "word-list.txt";
- private static final String OUTPUT_SCORES = "word_scores.csv";
- private static final int BATCH_SIZE = 10; // Even smaller for the difficult remaining words
- private static final int MAX_RETRIES = 3;
-
- // Define all three endpoints
- private static final LLMEndpoint[] ENDPOINTS = {
- new OllamaEndpoint(),
- new LMStudioEndpoint(),
- new LMStudioEndpoint("LM-Studio", "http://192.168.1.74:1234/v1/chat/completions",
- "mistralai/mistral-nemo-instruct-2407", 1)
- // new CustomEndpoint()
- };
-
- // ===== ENDPOINT CLASSES =====
- abstract static class LLMEndpoint {
-
- String name;
- String baseUrl;
- String model;
- Semaphore rateLimiter; // Per-endpoint rate limiting
-
- int maxConcurrent;
-
- LLMEndpoint(String name, String baseUrl, String model, int maxConcurrent) {
- this.name = name;
- this.baseUrl = baseUrl;
- this.model = model;
- this.maxConcurrent = maxConcurrent;
- this.rateLimiter = new Semaphore(maxConcurrent);
- }
-
- abstract String buildRequestJson(String prompt);
- abstract String extractResponseContent(String responseBody);
-
- // Rate-limited request execution
- List execute(List batch) throws Exception {
- rateLimiter.acquire(); // Wait for slot
- try {
- return executeInternal(batch);
- } finally {
- rateLimiter.release();
- }
- }
-
- private List executeInternal(List batch) throws Exception {
- var prompt = createScoringPrompt(batch);
- var jsonRequest = buildRequestJson(prompt);
-
- var responseBody = curlPostJson(baseUrl, jsonRequest, 120);
- var content = extractResponseContent(responseBody);
-
- if (content == null || content.isEmpty()) {
- throw new IOException("[" + name + "] Empty response content");
- }
-
- return parseScoresFromReply(batch, content, name);
- }
- }
-
- static class OllamaEndpoint
- extends LLMEndpoint {
-
- OllamaEndpoint() {
- super("Ollama", "http://localhost:11434/api/chat",
- "qwen2.5:14b", 1); // 2 concurrent requests
- }
-
- @Override String buildRequestJson(String prompt) {
- return String.format("{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"stream\":false,\"temperature\":0.1}",
- model, escapeJson(prompt));
- }
-
- @Override String extractResponseContent(String responseBody) {
- // Ollama uses "message" -> "content"
- var start = responseBody.indexOf("\"content\":\"") + 11;
- var end = responseBody.indexOf("\"", start);
- if (start < 11 || end < 0) return "";
- return responseBody.substring(start, end).replace("\\n", "\n");
- }
- }
-
- static class LMStudioEndpoint
- extends LLMEndpoint {
-
- LMStudioEndpoint() {
- super("LM-Studio", "http://192.168.1.159:1234/v1/chat/completions",
- "mistralai/mistral-nemo-instruct-2407", 1); // LM-Studio can handle more
- }
- public LMStudioEndpoint(String s, String url, String s1, int i) {
- super(
- s, url, s1, i
- );
- }
-
- @Override String buildRequestJson(String prompt) {
- return String.format("{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"temperature\":0.1,\"max_tokens\":2048}",
- model, escapeJson(prompt));
- }
-
- @Override String extractResponseContent(String responseBody) {
- return extractChatContent(responseBody);
- }
- }
-
- static class CustomEndpoint
- extends LLMEndpoint {
-
- CustomEndpoint() {
- super("Custom", "http://192.168.1.74:1234/v1/chat/completions",
- "qwen2.5-vl-7b-abliterated-caption-it_gguf", 2);
- }
-
- @Override String buildRequestJson(String prompt) {
- // Adapt to your third endpoint's format
- return new LMStudioEndpoint().buildRequestJson(prompt);
- }
-
- @Override String extractResponseContent(String responseBody) {
- return new LMStudioEndpoint().extractResponseContent(responseBody);
- }
- }
-
- // ===== MAIN COORDINATOR =====
- static void main(String[] args) throws Exception {
- System.out.println("=== CONCURRENT 3-Endpoint Scorer ===");
- for (var ep : ENDPOINTS) {
- System.out.printf("- %s: %s%n", ep.name, ep.baseUrl);
- }
- System.out.println();
-
- cleanupOutputFile();
-
- // Load work queue
- var allWords = Files.readAllLines(Paths.get(INPUT_WORDLIST));
- var scoredWords = loadAlreadyScoredWords();
- var workQueue = createWorkQueue(allWords, scoredWords);
-
- System.out.printf("Total words: %d | Already scored: %d | Remaining: %d%n%n",
- allWords.size(), scoredWords.size(), workQueue.size());
-
- if (workQueue.isEmpty()) {
- System.out.println("All done!");
- return;
- }
-
- // Start result writer thread
- BlockingQueue> resultQueue = new LinkedBlockingQueue<>();
- var writerThread = startResultWriter(resultQueue);
-
- // Start worker threads
- var totalThreads = 0;
- for (var ep : ENDPOINTS) totalThreads += ep.maxConcurrent;
-
- var executor = Executors.newFixedThreadPool(totalThreads);
- var totalProcessed = new AtomicInteger(scoredWords.size());
-
- for (var endpoint : ENDPOINTS) {
- for (var i = 0; i < endpoint.maxConcurrent; i++) {
- executor.submit(() -> {
- processBatches(endpoint, workQueue, resultQueue, totalProcessed, allWords.size());
- });
- }
- }
-
- // Wait for completion
- executor.shutdown();
- executor.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
-
- // Signal writer to stop
- resultQueue.put(Collections.singletonList(new WordScore(null, 0, "STOP")));
- writerThread.join();
-
- // Update hints in the database
-
- System.out.println("\n✓ All endpoints finished!");
- }
-
- // ===== WORKER THREAD LOGIC =====
- private static void processBatches(LLMEndpoint endpoint,
- BlockingQueue workQueue,
- BlockingQueue> resultQueue,
- AtomicInteger totalProcessed,
- int totalWords) {
-
- System.out.printf("[%s] Worker started%n", endpoint.name);
-
- while (!Thread.currentThread().isInterrupted()) {
- try {
- var work = workQueue.poll(1, TimeUnit.SECONDS);
- if (work == null) {
- if (workQueue.isEmpty()) break; // No more work in queue
- continue;
- }
-
- var scores = processWithRetry(endpoint, work.batch);
-
- // Add metadata
- scores.forEach(s -> {
- s.endpoint = endpoint.name;
- s.batchId = work.batchId;
- });
-
- resultQueue.put(scores);
-
- // Progress update
- var processed = totalProcessed.addAndGet(scores.size());
- if (processed % 100 < BATCH_SIZE) { // Reduce console spam
- System.out.printf("Progress: %d/%d (%.1f%%)%n",
- processed, totalWords, (processed * 100.0 / totalWords));
- }
-
- } catch (InterruptedException e) {
- Thread.currentThread().interrupt();
- break;
- } catch (Exception e) {
- System.err.printf("[%s] Fatal error: %s%n", endpoint.name, e.getMessage());
- break;
- }
- }
-
- System.out.printf("[%s] Worker stopped%n", endpoint.name);
- }
-
- private static List processWithRetry(LLMEndpoint endpoint, List batch) {
- var retries = 0;
-
- while (retries < MAX_RETRIES) {
- try {
- return endpoint.execute(batch);
- } catch (Exception e) {
- retries++;
- System.err.printf("[%s] Attempt %d/%d failed: %s%n",
- endpoint.name, retries, MAX_RETRIES, e.getMessage());
-
- if (retries >= MAX_RETRIES) {
- return createFailedScores(batch, endpoint.name);
- }
-
- try {
- Thread.sleep(2000L * retries);
- } catch (InterruptedException ie) {
- Thread.currentThread().interrupt();
- return createFailedScores(batch, endpoint.name);
- }
- }
- }
- return createFailedScores(batch, endpoint.name);
- }
-
- // ===== RESULT WRITER THREAD =====
- private static Thread startResultWriter(BlockingQueue> resultQueue) throws Exception {
- var writer = new BufferedWriter(new FileWriter(OUTPUT_SCORES, true));
- var isNew = Files.size(Paths.get(OUTPUT_SCORES)) == 0;
-
- if (isNew) {
- writer.write("word,score,status,endpoint,batch_id,timestamp\n");
- writer.flush();
- }
-
- var thread = new Thread(() -> {
- try {
- while (true) {
- var scores = resultQueue.take();
-
- // Stop signal
- if (scores.size() == 1 && scores.get(0).status.equals("STOP")) {
- break;
- }
-
- writeBatch(writer, scores);
- }
- writer.close();
- } catch (Exception e) {
- System.err.println("Writer thread error: " + e.getMessage());
- }
- });
-
- thread.start();
- return thread;
- }
-
- private static synchronized void writeBatch(BufferedWriter writer, List scores) throws Exception {
- var timestamp = Instant.now().toString();
- for (var ws : scores) {
- writer.write(String.format("%s,%d,%s,%s,%d,%s\n",
- ws.word, ws.score, ws.status, ws.endpoint, ws.batchId, timestamp));
- }
- writer.flush();
- }
-
- // ===== QUEUE & DATA STRUCTURES =====
- record WorkItem(int batchId, List batch) {
-
- }
-
- private static BlockingQueue createWorkQueue(List allWords, Set scored) {
- BlockingQueue queue = new LinkedBlockingQueue<>();
- var batchId = 0;
-
- for (var i = 0; i < allWords.size(); i += BATCH_SIZE) {
- List batch = new ArrayList<>();
- for (var j = i; j < Math.min(i + BATCH_SIZE, allWords.size()); j++) {
- var word = allWords.get(j);
- if (!scored.contains(word.toLowerCase())) {
- batch.add(word);
- }
- }
-
- if (!batch.isEmpty()) {
- queue.add(new WorkItem(batchId++, batch));
- }
- }
-
- return queue;
- }
-
- // ===== LOADING & PARSING =====
- private static Set loadAlreadyScoredWords() throws Exception {
- Set scored = new HashSet<>();
- var file = new File(OUTPUT_SCORES);
- if (!file.exists()) return scored;
-
- var lines = Files.readAllLines(file.toPath());
- var first = true;
- for (var line : lines) {
- if (first) {
- first = false;
- continue;
- }
- var parts = line.split(",");
- if (parts.length >= 3) {
- var word = parts[0].trim().toLowerCase();
- var status = parts[2].trim();
- if ("OK".equalsIgnoreCase(status)) {
- scored.add(word);
- }
- }
- }
- return scored;
- }
-
- private static void cleanupOutputFile() throws IOException {
- var path = Paths.get(OUTPUT_SCORES);
- if (!Files.exists(path)) return;
-
- System.out.println("Cleaning up " + OUTPUT_SCORES + "...");
- var lines = Files.readAllLines(path);
- if (lines.isEmpty()) return;
-
- var header = lines.get(0);
- Map latestOkEntries = new LinkedHashMap<>();
-
- for (int i = 1; i < lines.size(); i++) {
- var line = lines.get(i);
- var parts = line.split(",");
- if (parts.length >= 3) {
- var word = parts[0].trim().toLowerCase();
- var status = parts[2].trim();
- if ("OK".equalsIgnoreCase(status)) {
- latestOkEntries.put(word, line);
- }
- }
- }
-
- var cleanedLines = new ArrayList();
- cleanedLines.add(header);
- cleanedLines.addAll(latestOkEntries.values());
-
- Files.write(path, cleanedLines, StandardCharsets.UTF_8);
- System.out.printf("Cleanup complete. Kept %d unique OK entries. Removed %d non-OK or duplicate entries.%n",
- latestOkEntries.size(), lines.size() - cleanedLines.size());
- }
-
- private static List createFailedScores(List words, String endpoint) {
- List failed = new ArrayList<>();
- for (var word : words) {
- failed.add(new WordScore(word, -1, "FAILED", endpoint, -1));
- }
- return failed;
- }
-
- // Parsing logic
- private static List parseScoresFromReply(List expectedWords, String reply, String endpointName) {
- Map wordScoreMap = new HashMap<>();
- var lines = reply.split("\n");
-
- for (var line : lines) {
- line = line.trim();
- // Handle formats like "1. word:score", "word: score", "word - score"
- String sep = null;
- if (line.contains(":")) sep = ":";
- else if (line.contains("-")) sep = "-";
-
- if (sep != null) {
- var parts = line.split(sep, 2);
- if (parts.length == 2) {
- var wordPart = parts[0].trim();
- // Remove leading numbering like "1. " or bullets like "* ", "- "
- wordPart = wordPart.replaceAll("^[\\d+.)*\\-\\s]+", "");
- var word = wordPart.toLowerCase();
-
- try {
- var scoreStr = parts[1].trim();
- // Handle potential non-numeric junk after the number
- scoreStr = scoreStr.replaceAll("[^0-9].*", "");
- if (!scoreStr.isEmpty()) {
- var score = Integer.parseInt(scoreStr);
- wordScoreMap.put(word, Math.max(1, Math.min(10, score)));
- }
- } catch (NumberFormatException e) {
- // Skip invalid lines
- }
- }
- }
- }
-
- // Match scores to original words (maintaining order)
- List results = new ArrayList<>();
- for (var word : expectedWords) {
- var score = wordScoreMap.get(word.toLowerCase());
- if (score != null) {
- results.add(new WordScore(word, score, "OK"));
- } else {
- results.add(new WordScore(word, -1, "MISSING"));
- }
- }
-
- return results;
- }
-
- // Prompt creation
- private static String createScoringPrompt(List words) {
- return "Je bent een Nederlandse taalexpert. Geef elk van de " + words.size() + " onderstaande woorden een populariteitsscore van 1 (zeer zeldzaam) tot 10 (zeer algemeen).\n\n" +
- "Output ALLEEN in dit formaat:\n" +
- "woord1:score\n" +
- "woord2:score\n\n" +
- "GEEN andere tekst of uitleg. Sla GEEN woorden over.\n\n" +
- "Lijst:\n" +
- String.join("\n", words);
- }
-
- // Utility methods
- private static String escapeJson(String str) {
- return str.replace("\\", "\\\\")
- .replace("\"", "\\\"")
- .replace("\n", "\\n");
- }
-
- private static String curlPostJson(String url, String jsonBody, int timeoutSeconds) throws Exception {
- // Write JSON body to temp file to avoid shell escaping issues
- var tempFile = Files.createTempFile("lm-request-", ".json");
- try {
- Files.writeString(tempFile, jsonBody, StandardCharsets.UTF_8);
-
- List cmd = new ArrayList<>();
- cmd.add("curl");
- cmd.add("-fsSL");
- cmd.add("--connect-timeout");
- cmd.add("10");
- cmd.add("--max-time");
- cmd.add(String.valueOf(timeoutSeconds));
- cmd.add("-H");
- cmd.add("Content-Type: application/json");
- cmd.add("-d");
- cmd.add("@" + tempFile);
- cmd.add(url);
-
- var p = new ProcessBuilder(cmd)
- .redirectErrorStream(true)
- .start();
-
- var bytes = p.getInputStream().readAllBytes();
- var code = p.waitFor();
-
- if (code != 0) {
- throw new IOException("curl POST failed (" + code + ") url=" + url + "\nOutput:\n" +
- new String(bytes, StandardCharsets.UTF_8));
- }
-
- return new String(bytes, StandardCharsets.UTF_8);
- } finally {
- Files.deleteIfExists(tempFile);
- }
- }
-
- private static String extractChatContent(String json) {
- if (json == null) return null;
- var choices = json.indexOf("\"choices\"");
- var p = (choices >= 0) ? choices : 0;
- var i = json.indexOf("\"content\"", p);
- if (i < 0) return null;
- var colon = json.indexOf(':', i);
- if (colon < 0) return null;
- var q = json.indexOf('"', colon + 1);
- if (q < 0) return null;
- var sb = new StringBuilder();
- var esc = false;
- for (var k = q + 1; k < json.length(); k++) {
- var ch = json.charAt(k);
- if (esc) {
- if (ch == 'n') sb.append('\n');
- else if (ch == 't') sb.append('\t');
- else if (ch == 'r') sb.append('\r');
- else sb.append(ch);
- esc = false;
- } else {
- if (ch == '\\') esc = true;
- else if (ch == '"') break;
- else sb.append(ch);
- }
- }
- return sb.toString();
- }
-}
diff --git a/src/main/java/puzzle/ExportFormat.java b/src/main/java/puzzle/ExportFormat.java
index 8c71f93..dbd921c 100644
--- a/src/main/java/puzzle/ExportFormat.java
+++ b/src/main/java/puzzle/ExportFormat.java
@@ -39,7 +39,7 @@ public final class ExportFormat {
var word = clueMap.get(s.key());
if (word == null) continue;
- var p = extractPlacedFromSlot(s, word);
+ var p = extractPlacedFromSlot(puz.dict(),s, word);
if (p == null) continue;
placed.add(p);
}
@@ -121,7 +121,7 @@ public final class ExportFormat {
/**
* Convert a generator Slot + assigned word into a Placed object for export.
*/
- private static Placed extractPlacedFromSlot(Slot s, String word) {
+ private static Placed extractPlacedFromSlot(Dict dict,Slot s, String word) {
int r = s.clueR();
int c = s.clueC();
char d = s.dir();
@@ -168,7 +168,7 @@ public final class ExportFormat {
return new Placed(
word,
- word, // clue placeholder
+ dict.words().get(word).clue(), // clue placeholder
startRow,
startCol,
direction,
@@ -182,14 +182,9 @@ public final class ExportFormat {
}
// pack (r,c) into one long key (handles negatives too)
- private static long pack(int r, int c) {
- return (((long) r) << 32) ^ (c & 0xFFFFFFFFL);
- }
-
- // ---------- Data models ----------
-
+ private static long pack(int r, int c) { return (((long) r) << 32) ^ (c & 0xFFFFFFFFL); }
/**
- * @param direction "horizontal" | "vertical"
+ * @param direction "h" | "v"
* @param cells word cells
* @param arrow [arrowRow, arrowCol] */
private record Placed(String word, String clue, int startRow, int startCol, String direction, String answer, int arrowRow, int arrowCol, List cells, int[] arrow,
@@ -197,8 +192,7 @@ public final class ExportFormat {
public record Rewards(int coins, int stars, int hints) { }
- /**
- * @param direction "horizontal" | "vertical" */
+ /// @param direction "h" | "v"
public record WordOut(String word, String clue, int startRow, int startCol, String direction, String answer, int arrowRow, int arrowCol, boolean isReversed, int complex) { }
public record ExportedPuzzle(List gridv2, List words, int difficulty, Rewards rewards) { }
diff --git a/src/main/java/puzzle/Main.java b/src/main/java/puzzle/Main.java
index f0993b9..693d168 100644
--- a/src/main/java/puzzle/Main.java
+++ b/src/main/java/puzzle/Main.java
@@ -83,7 +83,7 @@ public class Main {
section("Clues");
info("status : generating...");
info("generatedFor : " + exported.words().size());
- exported = ClueGenerator.applyClues(exported);
+ //exported = ClueGenerator.applyClues(exported);
info("status : done");
section("Words");
diff --git a/src/main/java/puzzle/SwedishGenerator.java b/src/main/java/puzzle/SwedishGenerator.java
index e35b9b4..4a3b5d2 100644
--- a/src/main/java/puzzle/SwedishGenerator.java
+++ b/src/main/java/puzzle/SwedishGenerator.java
@@ -132,24 +132,22 @@ public class SwedishGenerator {
int[] data() { return a; } // note: may have extra capacity
}
- static final class DictEntry {
+ static record DictEntry(ArrayList words, IntList[][] pos) {
- final ArrayList words = new ArrayList<>();
- final IntList[][] pos; // pos[i][letter] -> indices (sorted by insertion)
- DictEntry(int L) {
- pos = new IntList[L][26];
+ public DictEntry(int L) {
+ this(new ArrayList<>(), new IntList[L][26]);
for (var i = 0; i < L; i++) {
for (var j = 0; j < 26; j++) pos[i][j] = new IntList();
}
}
}
- static record WordDifficulty(String word, int difficulty, int simpel, int score, int cross) {
+ static record WordDifficulty(String word, int difficulty, int simpel, int score, int cross, String clue) {
- public WordDifficulty(String word, int simpel, int score) {
+ public WordDifficulty(String word, int simpel, int score, String clue) {
var difficulty1 = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
var crossScore = ThemePoolBuilderLength.crossabilityScore(word);
- this(word, difficulty1, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15));
+ this(word, difficulty1, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15), clue);
// Prioritize simple words (high lScore) and long words.
// lScore (1-10) adds up to 1000 points (weight 100).
@@ -163,7 +161,6 @@ public class SwedishGenerator {
}
}
-
public static record Dict(Map words,
HashMap index,
HashMap lenCounts) { }
@@ -193,8 +190,12 @@ public class SwedishGenerator {
// CSV has level 1-10. llmScores use 10-level.
score = 10 - Integer.parseInt(parts[1].trim());
simpel = Integer.parseInt(parts[2].trim());
+ var rawClue = parts[3].trim();
+ if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
+ rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
+ }
if (score >= 1)
- map.put(s, new WordDifficulty(s, simpel, score));
+ map.put(s, new WordDifficulty(s, simpel, score, rawClue));
}
}
var words = map.values().stream().collect(Collectors.toCollection(ArrayList::new));
@@ -682,11 +683,10 @@ public class SwedishGenerator {
System.out.flush();
};
- class Pick {
+ record Pick(Slot slot,
+ CandidateInfo info,
+ boolean done) {
- Slot slot;
- CandidateInfo info;
- boolean done;
}
java.util.function.Supplier chooseMRV = () -> {
@@ -699,22 +699,14 @@ public class SwedishGenerator {
var entry = dictIndex.get(s.len);
if (entry == null) {
- var p = new Pick();
- p.slot = null;
- p.info = null;
- p.done = false;
- return p;
+ return new Pick(null, null, false);
}
var pat = patternForSlot(grid, s);
var info = candidateInfoForPattern(entry, pat);
if (info.count == 0) {
- var p = new Pick();
- p.slot = null;
- p.info = null;
- p.done = false;
- return p;
+ return new Pick(null, null, false);
}
if (best == null
@@ -726,17 +718,11 @@ public class SwedishGenerator {
}
}
- var p = new Pick();
if (best == null) {
- p.slot = null;
- p.info = null;
- p.done = true;
+ return new Pick(null, null, true);
} else {
- p.slot = best;
- p.info = bestInfo;
- p.done = false;
+ return new Pick(best, bestInfo, false);
}
- return p;
};
final var MAX_TRIES_PER_SLOT = 2000;
@@ -868,9 +854,9 @@ public class SwedishGenerator {
public record PuzzleResult(Dict dict, char[][] mask, FillResult filled) { }
public static PuzzleResult generatePuzzle(Main.Opts opts) {
- var tLoad0 = System.nanoTime();
- var dict = loadWords(opts.wordsPath);
- var tLoad1 = System.nanoTime();
+ var tLoad0 = System.nanoTime();
+ var dict = loadWords(opts.wordsPath);
+ var tLoad1 = System.nanoTime();
System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n %s words%n", (tLoad1 - tLoad0) / 1e9, dict.words.size());
if (opts.threads > 1) {
diff --git a/src/main/java/puzzle/WordScore.java b/src/main/java/puzzle/WordScore.java
deleted file mode 100644
index 9eec631..0000000
--- a/src/main/java/puzzle/WordScore.java
+++ /dev/null
@@ -1,24 +0,0 @@
-package puzzle;
-
-// ===== DATA CLASS =====
-class WordScore {
-
- String word;
- int score;
- String status;
- String endpoint;
- int batchId;
-
- WordScore(String word, int score, String status, String endpoint, int batchId) {
- this.word = word;
- this.score = score;
- this.status = status;
- this.endpoint = endpoint;
- this.batchId = batchId;
- }
- WordScore(String word, int score, String status) {
- this.word = word;
- this.score = score;
- this.status = status;
- }
-}
\ No newline at end of file
diff --git a/tools/hint/dbjsonl.sh b/tools/hint/dbjsonl.sh
new file mode 100755
index 0000000..936f4ec
--- /dev/null
+++ b/tools/hint/dbjsonl.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Usage:
+# ./import_jsonl.sh "postgresql://user:pass@host:5432/dbname" gloss doc /path/to/file.jsonl
+#
+# Notes:
+# - Creates table if it doesn't exist.
+# - Inserts each JSON line into a jsonb column.
+# - Skips blank lines.
+
+DB_URL="${1:?db url}"
+TABLE="${2:?table name}"
+COL="${3:?json column name}"
+FILE="${4:?jsonl file path}"
+
+psql "$DB_URL" -v ON_ERROR_STOP=1 < '';
+
+-- optional: show count inserted this run
+SELECT count(*) AS inserted_now FROM _jsonl_stage WHERE btrim(line) <> '';
+SQL
diff --git a/tools/hint/jsonl-to-sqlite.mjs b/tools/hint/jsonl-to-sqlite.mjs
new file mode 100644
index 0000000..2a28806
--- /dev/null
+++ b/tools/hint/jsonl-to-sqlite.mjs
@@ -0,0 +1,59 @@
+// jsonl-to-sqlite.mjs
+import fs from 'node:fs'
+import readline from 'node:readline'
+import Database from 'better-sqlite3'
+
+const jsonlPath = process.argv[2]
+const dbPath = process.argv[3] ?? 'out.sqlite'
+const table = process.argv[4] ?? 'events'
+
+if (!jsonlPath) {
+ console.error('Usage: node jsonl-to-sqlite.mjs [out.sqlite] [table]')
+ process.exit(1)
+}
+
+const db = new Database(dbPath)
+db.pragma('journal_mode = WAL')
+
+db.exec(`
+ CREATE TABLE IF NOT EXISTS ${ table }
+ (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ json TEXT NOT NULL
+ );
+`)
+
+const insert = db.prepare(`INSERT INTO ${ table }(json)
+ VALUES (?)`)
+const insertMany = db.transaction((rows) => {
+ for (const r of rows) insert.run(r)
+})
+
+const rl = readline.createInterface({
+ input : fs.createReadStream(jsonlPath, { encoding: 'utf8' }),
+ crlfDelay: Infinity
+})
+
+let batch = []
+let lineNo = 0
+for await (const line of rl) {
+ lineNo++
+ const trimmed = line.trim()
+ if (!trimmed) continue
+
+ try {
+ JSON.parse(trimmed) // validate
+ batch.push(trimmed)
+ } catch (e) {
+ console.warn(`Skipping invalid JSON on line ${ lineNo }: ${ e.message }`)
+ continue
+ }
+
+ if (batch.length >= 1000) {
+ insertMany(batch)
+ batch = []
+ }
+}
+if (batch.length) insertMany(batch)
+
+console.log(`Done. Imported into ${ dbPath }, table=${ table }`)
diff --git a/tools/puzzle-gen/Dockerfile b/tools/puzzle-gen/Dockerfile
deleted file mode 100644
index df7db3e..0000000
--- a/tools/puzzle-gen/Dockerfile
+++ /dev/null
@@ -1,16 +0,0 @@
-FROM python:3.13-slim
-
-RUN apt-get update \
- && apt-get install -y --no-install-recommends ca-certificates tzdata curl \
- && rm -rf /var/lib/apt/lists/*
-
-# supercronic
-RUN curl -fsSL -o /usr/local/bin/supercronic \
- https://github.com/aptible/supercronic/releases/download/v0.2.30/supercronic-linux-amd64 \
- && chmod +x /usr/local/bin/supercronic
-
-WORKDIR /app
-COPY tools/puzzle-gen/generate_daily_puzzles.py /app/generate_daily_puzzles.py
-COPY tools/puzzle-gen/crontab /app/crontab
-
-CMD ["/usr/local/bin/supercronic", "/app/crontab"]
diff --git a/tools/puzzle-gen/crontab b/tools/puzzle-gen/crontab
deleted file mode 100644
index 85d3197..0000000
--- a/tools/puzzle-gen/crontab
+++ /dev/null
@@ -1 +0,0 @@
-15 3 * * * python /app/generate_daily_puzzles.py
\ No newline at end of file
diff --git a/tools/puzzle-gen/generate_daily_puzzles.py b/tools/puzzle-gen/generate_daily_puzzles.py
deleted file mode 100644
index 70f9bac..0000000
--- a/tools/puzzle-gen/generate_daily_puzzles.py
+++ /dev/null
@@ -1,399 +0,0 @@
-#!/usr/bin/env python3
-import datetime as dt
-import json
-import os
-import random
-import re
-import urllib.request
-import xml.etree.ElementTree as ET
-import json, re
-
-# --- USER-FRIENDLY CONFIG ---
-# Max 7 letters for shorter, more common words
-WORD_RE = re.compile(r"^[A-Z]{3,7}$")
-EMPTY = " "
-# Slightly smaller grid for denser puzzles
-SIZE = 10
-# More words needed since they're shorter
-TARGET_WORDS = 15
-MIN_ACCEPT_WORDS = 10
-
-FEEDS = [
- "https://feeds.nos.nl/nosnieuwsalgemeen",
- "https://feeds.nos.nl/nosnieuwstech",
-]
-
-
-def env(name, default=None):
- v = os.getenv(name)
- return default if v is None or v == "" else v
-
-
-def http_get(url, timeout=15):
- req = urllib.request.Request(url, headers={"User-Agent": "puzzle-gen/1.0"})
- with urllib.request.urlopen(req, timeout=timeout) as r:
- return r.read()
-
-
-def http_post_json(url, payload, timeout=45):
- data = json.dumps(payload).encode("utf-8")
- req = urllib.request.Request(
- url,
- data=data,
- headers={
- "Content-Type": "application/json",
- "Authorization": "Bearer lm-studio",
- "User-Agent": "puzzle-gen/1.0",
- },
- method="POST",
- )
- with urllib.request.urlopen(req, timeout=timeout) as r:
- return json.loads(r.read().decode("utf-8"))
-
-
-def fetch_rss_items(url, limit=12):
- raw = http_get(url)
- root = ET.fromstring(raw)
- channel = root.find("channel") if root.tag.lower().endswith("rss") else root
- items = []
- for it in channel.findall("item"):
- title = (it.findtext("title") or "").strip()
- desc = (it.findtext("description") or "").strip()
- if title:
- items.append((title, desc))
- if len(items) >= limit:
- break
- return items
-
-
-def safe_slug(s, maxlen=50):
- s = s.lower()
- s = re.sub(r"[^a-z0-9]+", "-", s).strip("-")
- return (s[:maxlen] or "news")
-
-
-def extract_first_json(text: str):
- """Parse first JSON value (object OR array) from any text."""
- if not text:
- return None
- starts = [i for i in (text.find("{"), text.find("[")) if i != -1]
- if not starts:
- return None
- i = min(starts)
- try:
- return json.JSONDecoder().raw_decode(text[i:])[0]
- except json.JSONDecodeError:
- return None
-
-
-def normalize_word(raw: str) -> str:
- # A-Z only, remove hyphens/digits/spaces/etc.
- w = re.sub(r"[^A-Za-z]", "", (raw or "")).upper()
- return w
-
-
-def sanitize_wordcluemap(obj):
- """
- Accepts:
- - dict: {"WORD":"clue", ...}
- - list: [{"word":"...","clue":"..."}, {"WOORD":"...","clue":"..."}, ...]
- Returns dict with keys A-Z 3..7 and non-empty clue.
- """
- out = {}
-
- if isinstance(obj, dict):
- items = list(obj.items())
- elif isinstance(obj, list):
- items = []
- for it in obj:
- if not isinstance(it, dict):
- continue
- raw_word = it.get("word") or it.get("WOORD") or it.get("Word")
- clue = it.get("clue") or it.get("CLUE") or it.get("hint") or it.get("HINT")
- items.append((raw_word, clue))
- else:
- return out
-
- for raw_word, clue in items:
- if not isinstance(raw_word, str) or not isinstance(clue, str):
- continue
- w = normalize_word(raw_word)
- if not WORD_RE.fullmatch(w):
- continue
- clue = clue.strip()
- if not clue:
- continue
- out[w] = clue
-
- return out
-
-
-# ---- generator (no-touch) ----
-def make_grid():
- return [[EMPTY for _ in range(SIZE)] for _ in range(SIZE)]
-
-
-def in_bounds(g, r, c):
- return 0 <= r < len(g) and 0 <= c < len(g[0])
-
-
-def can_place_notouch(g, word, r, c, direction):
- H, W = len(g), len(g[0])
- if r < 0 or c < 0:
- return False
- if direction == "horizontal" and c + len(word) > W:
- return False
- if direction == "vertical" and r + len(word) > H:
- return False
-
- # no "glue" before/after
- br = r if direction == "horizontal" else r - 1
- bc = c - 1 if direction == "horizontal" else c
- if in_bounds(g, br, bc) and g[br][bc] != EMPTY:
- return False
-
- ar = r if direction == "horizontal" else r + len(word)
- ac = c + len(word) if direction == "horizontal" else c
- if in_bounds(g, ar, ac) and g[ar][ac] != EMPTY:
- return False
-
- for i, ch in enumerate(word):
- rr = r if direction == "horizontal" else r + i
- cc = c + i if direction == "horizontal" else c
- cell = g[rr][cc]
- crossing = cell != EMPTY
- if crossing and cell != ch:
- return False
-
- if not crossing:
- if direction == "horizontal":
- if in_bounds(g, rr - 1, cc) and g[rr - 1][cc] != EMPTY: return False
- if in_bounds(g, rr + 1, cc) and g[rr + 1][cc] != EMPTY: return False
- else:
- if in_bounds(g, rr, cc - 1) and g[rr][cc - 1] != EMPTY: return False
- if in_bounds(g, rr, cc + 1) and g[rr][cc + 1] != EMPTY: return False
- return True
-
-
-def place_word(g, word, r, c, direction):
- for i, ch in enumerate(word):
- rr = r if direction == "horizontal" else r + i
- cc = c + i if direction == "horizontal" else c
- g[rr][cc] = ch
-
-
-def find_spots(g, word, placed):
- spots = []
- for p in placed:
- pw = p["word"]
- for i, pch in enumerate(pw):
- pr = p["row"] if p["direction"] == "horizontal" else p["row"] + i
- pc = p["col"] + i if p["direction"] == "horizontal" else p["col"]
- for j, wch in enumerate(word):
- if wch != pch:
- continue
- direction = "vertical" if p["direction"] == "horizontal" else "horizontal"
- r = pr if direction == "horizontal" else pr - j
- c = pc - j if direction == "horizontal" else pc
- if can_place_notouch(g, word, r, c, direction):
- spots.append((r, c, direction))
- return spots
-
-
-def generate_puzzle(wordcluemap, rnd):
- words = sorted(wordcluemap.keys(), key=len, reverse=True)
- g = make_grid()
- placed = []
-
- first = words[0]
- sr = SIZE // 2
- sc = (SIZE - len(first)) // 2
- if not can_place_notouch(g, first, sr, sc, "horizontal"):
- return None
- place_word(g, first, sr, sc, "horizontal")
- placed.append({"word": first, "clue": wordcluemap[first], "row": sr, "col": sc, "direction": "horizontal"})
-
- for w in words[1:]:
- spots = find_spots(g, w, placed)
- rnd.shuffle(spots)
- if not spots:
- continue
- r, c, d = spots[0]
- place_word(g, w, r, c, d)
- placed.append({"word": w, "clue": wordcluemap[w], "row": r, "col": c, "direction": d})
-
- return {"grid": g, "placed": placed}
-
-
-def export_format(puz, difficulty=1, rewards=None):
- if rewards is None:
- rewards = {"coins": 50, "stars": 2, "hints": 1}
-
- g = puz["grid"]
- placed = puz["placed"]
- H, W = len(g), len(g[0])
-
- cells = []
- for p in placed:
- for i in range(len(p["word"])):
- r = p["row"] if p["direction"] == "horizontal" else p["row"] + i
- c = p["col"] + i if p["direction"] == "horizontal" else p["col"]
- cells.append((r, c))
- # arrow cell: before the start
- ar = p["row"] if p["direction"] == "horizontal" else p["row"] - 1
- ac = p["col"] - 1 if p["direction"] == "horizontal" else p["col"]
- cells.append((ar, ac))
-
- minR = min(r for r, _ in cells) - 1
- minC = min(c for _, c in cells) - 1
- maxR = max(r for r, _ in cells) + 1
- maxC = max(c for _, c in cells) + 1
-
- def ch_at(r, c):
- if r < 0 or c < 0 or r >= H or c >= W:
- return "#"
- ch = g[r][c]
- return "#" if ch == EMPTY else ch
-
- gridv2 = []
- for r in range(minR, maxR + 1):
- row = "".join(ch_at(r, c) for c in range(minC, maxC + 1))
- gridv2.append(row)
-
- words_out = []
- for p in placed:
- arrowRow = (p["row"] if p["direction"] == "horizontal" else p["row"] - 1) - minR
- arrowCol = (p["col"] - 1 if p["direction"] == "horizontal" else p["col"]) - minC
- words_out.append({
- "word": p["word"],
- "clue": p["clue"],
- "startRow": p["row"] - minR,
- "startCol": p["col"] - minC,
- "direction": p["direction"],
- "answer": p["word"],
- "arrowRow": arrowRow,
- "arrowCol": arrowCol,
- })
-
- return {"gridv2": gridv2, "words": words_out, "difficulty": difficulty, "rewards": rewards}
-
-
-def list_models(base_url):
- try:
- data = json.loads(http_get(f"{base_url}/models").decode("utf-8"))
- return [m.get("id") for m in data.get("data", []) if m.get("id")]
- except Exception:
- return []
-
-
-def llm_make_wordcluemap(base_url, model, title, desc, n_words=12):
- prompt = f"""
-Geef ALLEEN een JSON object terug (geen array, geen markdown).
-Formaat exact:
-{{
- "WOORD": "clue",
- ...
-}}
-
-REGELS:
-- WOORD: alleen letters A-Z, geen streepjes/cijfers, lengte 3..7.
-- Gebruik KORTE, GEBRUIKELIJKE Nederlandse woorden (geen jargon, geen moeilijke termen).
-- Clue: korte, duidelijke hint in het Nederlands.
-- Maak {n_words} items.
-Thema: {title}
-Context: {desc[:260]}
-""".strip()
-
- payload = {
- "model": model,
- "temperature": 0.7,
- "messages": [
- {"role": "system", "content": "Return STRICT JSON object only."},
- {"role": "user", "content": prompt},
- ],
- }
-
- data = http_post_json(f"{base_url}/chat/completions", payload)
- content = data["choices"][0]["message"]["content"]
- obj = extract_first_json(content)
- wc = sanitize_wordcluemap(obj)
-
- # Aggressive repair for short words
- if len(wc) < MIN_ACCEPT_WORDS:
- repair = f"""
-Zet dit om naar een STRICT JSON OBJECT (geen array) "WOORD":"clue".
-KRITIEK:
-- WOORD: A-Z only, lengte 3..7. GEEN lange woorden!
-- Gebruik ALLEEN korte, bekende Nederlandse woorden bij twijfel.
-- Vervang ongeldige/moeilijke woorden door veelvoorkomende synoniemen.
-Input:
-{content}
-""".strip()
-
- payload["messages"] = [
- {"role": "system", "content": "Return STRICT JSON object only."},
- {"role": "user", "content": repair},
- ]
- data = http_post_json(f"{base_url}/chat/completions", payload)
- content2 = data["choices"][0]["message"]["content"]
- obj2 = extract_first_json(content2)
- wc2 = sanitize_wordcluemap(obj2)
- if len(wc2) > len(wc):
- wc = wc2
-
- return wc
-
-
-def main():
- base_url = env("LM_STUDIO_BASE_URL", "http://192.168.1.159:1234/v1")
- out_dir = env("OUT_DIR", "/data/puzzles")
- per_day = int(env("PUZZLES_PER_DAY", "3"))
- today = dt.date.today().isoformat()
- rnd = random.Random(today)
-
- os.makedirs(out_dir, exist_ok=True)
-
- items = []
- for f in FEEDS:
- try:
- items.extend(fetch_rss_items(f))
- except Exception:
- pass
- if not items:
- raise SystemExit("No RSS items found")
-
- models = list_models(base_url)
- model = env("LM_MODEL", models[0] if models else "model-identifier")
-
- made = 0
- for idx in range(1, per_day + 1):
- title, desc = rnd.choice(items)
- slug = safe_slug(title)
-
- wc = llm_make_wordcluemap(base_url, model, title, desc, n_words=TARGET_WORDS)
- # Stricter validation: need more words since they're shorter
- if len(wc) < MIN_ACCEPT_WORDS:
- continue
-
- puz = generate_puzzle(wc, rnd)
- # Require at least 7 placed words for a decent puzzle
- if not puz or len(puz["placed"]) < 7:
- continue
-
- exported = export_format(puz, difficulty=1, rewards={"coins": 50, "stars": 2, "hints": 1})
- fn = f"crossword_{today}_{idx:02d}_{slug}.json"
- path = os.path.join(out_dir, fn)
- with open(path, "w", encoding="utf-8") as fp:
- json.dump(exported, fp, ensure_ascii=False, indent=2)
- made += 1
-
- # index.json (handig voor je frontend)
- files = sorted([f for f in os.listdir(out_dir) if f.startswith(f"crossword_{today}_") and f.endswith(".json")])
- with open(os.path.join(out_dir, "index.json"), "w", encoding="utf-8") as fp:
- json.dump({"date": today, "files": files}, fp, ensure_ascii=False, indent=2)
-
- print(f"Generated {made} puzzles for {today}")
-
-
-if __name__ == "__main__":
- main()
\ No newline at end of file