diff --git a/Dockerfile b/Dockerfile index 0cace24..239eb59 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,7 @@ WORKDIR /app # Copy source files COPY src/ /app/src/ -COPY word-list.txt /app/word-list.txt +COPY export_real_words_with_hints.csv /app/export_real_words_with_hints.csv COPY compile.sh /app/compile.sh COPY docker-entrypoint.sh /app/docker-entrypoint.sh COPY crontab /app/crontab @@ -19,7 +19,8 @@ COPY crontab /app/crontab # Compile Java code RUN chmod +x /app/compile.sh && \ mkdir -p /app/target && \ - javac -d /app/target src/puzzle/*.java + cp src/puzzle/postgresql-42.7.8.jar /app/target/ && \ + javac -cp /app/target/postgresql-42.7.8.jar -d /app/target src/puzzle/*.java # Create output directory RUN mkdir -p /data/puzzles diff --git a/README.md b/README.md index ac34674..acf6022 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ A high-performance Java-based puzzle generator with theme-based word filtering a - Edit distance similarity matching - Automatic theme detection -3. **DailyGenerator.java** - Daily puzzle automation +3. **Main.java** - Core generator and daily automation - Generates themed puzzles - JSON output with metadata - Index file generation @@ -47,7 +47,6 @@ A high-performance Java-based puzzle generator with theme-based word filtering a java -cp ~/dev/.target puzzle.Main --seed 42 --pop 18 --gens 100 # Generate daily puzzles -java -cp ~/dev/.target puzzle.DailyGenerator ``` ### Docker Deployment @@ -153,13 +152,13 @@ Puzzles are generated daily at **3:15 AM** (configurable in `crontab`). Edit `crontab` to change schedule: ```cron # Daily at 3:15 AM -15 3 * * * java -cp /app/target puzzle.DailyGenerator +15 3 * * * java -cp /app/target puzzle.Main # Every 6 hours -0 */6 * * * java -cp /app/target puzzle.DailyGenerator +0 */6 * * * java -cp /app/target puzzle.Main # Weekly on Monday at 1 AM -0 1 * * 1 java -cp /app/target puzzle.DailyGenerator +0 1 * * 1 java -cp /app/target puzzle.Main ``` ## Word List Format @@ -208,7 +207,7 @@ The Java version maintains module-wise compatibility with the Node.js generator: |------------------------|-------------------------------------| | `swedish_generator.js` | `SwedishGenerator.java` | | `export_format.js` | `ExportFormat.java` | -| `main.js` | `Main.java` + `DailyGenerator.java` | +| `main.js` | `Main.java` | | N/A | `ThemeGraph.java` (new) | ## Volume Management diff --git a/compile.sh b/compile.sh index 91f6fb0..fa24d40 100755 --- a/compile.sh +++ b/compile.sh @@ -1,4 +1,4 @@ #!/bin/bash TARGET=${1:-~/dev/.target} mkdir -p "$TARGET" -javac -d "$TARGET" src/puzzle/*.java +javac -cp src/puzzle/postgresql-42.7.8.jar -d "$TARGET" src/puzzle/*.java diff --git a/crontab b/crontab index 06fb0d0..933e8f2 100644 --- a/crontab +++ b/crontab @@ -1,2 +1,2 @@ # Generate puzzles daily at 3:15 AM -15 3 * * * java -cp /app/target puzzle.DailyGenerator >> /var/log/cron.log 2>&1 +15 3 * * * java -cp /app/target puzzle.Main >> /var/log/cron.log 2>&1 diff --git a/docker-compose.yml b/docker-compose.yml index e12642c..6e1edc0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,18 +1,22 @@ services: - puzzle_gen: + puzzle_create_one: build: context: ${PUZZLE_ROOT_DIR:-/opt/apps/puzzle} - dockerfile: tools/puzzle-gen/Dockerfile - container_name: puzzle_gen - restart: unless-stopped + dockerfile: Dockerfile + container_name: puzzle_create_one + restart: "no" networks: [ traefik_net ] environment: TZ: Europe/Amsterdam + OUT_DIR: /data/puzzle + WORDS_PATH: "/app/export_real_words_with_hints.csv" LM_STUDIO_BASE_URL: "http://192.168.1.159:1234/v1" - PUZZLES_PER_DAY: "3" + GENERATE_ON_START: "true" + START_CLASS: "puzzle.Main" + SCORES_PATH: "/app/export_real_words_with_hints.csv" volumes: - - puzzles_data:/data/puzzles:rw + - puzzles_data:/data/puzzle:rw update_hints: build: @@ -45,13 +49,15 @@ services: networks: [ traefik_net ] environment: TZ: Europe/Amsterdam - OUT_DIR: /data/puzzles + OUT_DIR: /data/puzzle + WORDS_PATH: "/app/export_real_words_with_hints.csv" + SCORES_PATH: "/app/export_real_words_with_hints.csv" PUZZLES_PER_DAY: "3" LM_STUDIO_BASE_URL: "http://192.168.1.159:1234/v1" THEME_FILTER: "true" THEME_MIN_SCORE: "0.6" volumes: - - puzzles_data:/data/puzzles:rw + - puzzles_data:/data/puzzle:rw volumes: puzzles_data: diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 9c4c674..f721f32 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -8,12 +8,14 @@ echo "Puzzles per day: ${PUZZLES_PER_DAY}" echo "" # Ensure output directory exists -mkdir -p "${OUT_DIR}" +mkdir -p "${OUT_DIR}/puzzles" # Generate initial puzzle on startup (optional) if [ "${GENERATE_ON_START}" = "true" ]; then echo "Generating initial puzzles..." - java -cp /app/target puzzle.DailyGenerator + START_CLASS=${START_CLASS:-puzzle.Main} + echo "Running ${START_CLASS}..." + java -cp /app/target/postgresql-42.7.8.jar:/app/target ${START_CLASS} echo "" fi diff --git a/package.json b/package.json new file mode 100644 index 0000000..edf2f03 --- /dev/null +++ b/package.json @@ -0,0 +1,13 @@ +{ + "name": "puzzle-generator", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "private": true, + "dependencies": { + "better-sqlite3": "^12.5.0" + } +} diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..d1934b9 --- /dev/null +++ b/pom.xml @@ -0,0 +1,67 @@ + + 4.0.0 + + puzzle + tools + 0.0.1 + + + 25 + 25 + + + + + org.postgresql + postgresql + 42.7.3 + + + org.xerial + sqlite-jdbc + 3.46.1.0 + + + + org.slf4j + slf4j-api + 2.0.13 + + + + + org.slf4j + slf4j-simple + 2.0.13 + runtime + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.6.0 + + + package + + shade + + + false + + + HintScores + + + tools-all + + + + + + + diff --git a/py/import.py b/py/import.py deleted file mode 100644 index 80813a8..0000000 --- a/py/import.py +++ /dev/null @@ -1,142 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import json -import re -import sqlite3 -from pathlib import Path - -RE_ASCII_WORD = re.compile(r"^[A-Za-z]+$") -RE_SPACE = re.compile(r"\s+") -RE_PARENS = re.compile(r"\s*\([^)]*\)\s*") # verwijder (labels) -RE_BRACKETS = re.compile(r"\s*\[[^]]*]\s*") # verwijder [labels] - -def clean_hint(s: str) -> str: - s = s.strip() - s = RE_BRACKETS.sub(" ", s) - s = RE_PARENS.sub(" ", s) - s = s.replace("’", "'") - s = RE_SPACE.sub(" ", s).strip(" -;:,.\t") - return s - -def pick_gloss(obj: dict) -> tuple[str | None, str | None]: - """Return (hint, pos) from a Wiktextract JSON line.""" - pos = obj.get("pos") - senses = obj.get("senses") or [] - best = None - - for s in senses: - glosses = s.get("glosses") or [] - if not glosses: - continue - # Neem de eerste gloss die "normaal" oogt - for g in glosses: - if not isinstance(g, str): - continue - g2 = clean_hint(g) - if len(g2) < 3: - continue - best = g2 - break - if best: - break - - return best, pos - -def main(): - ap = argparse.ArgumentParser() - ap.add_argument("--db", required=True, help="pad naar jouw sqlite db") - ap.add_argument("--jsonl", required=True, help="pad naar nl-extract.jsonl") - ap.add_argument("--minlen", type=int, default=2) - ap.add_argument("--maxlen", type=int, default=8) - ap.add_argument("--maxhint", type=int, default=80) - args = ap.parse_args() - - db_path = Path(args.db) - jsonl_path = Path(args.jsonl) - - con = sqlite3.connect(db_path) - cur = con.cursor() - - # speed pragmas (alleen tijdens import) - cur.execute("PRAGMA journal_mode=WAL;") - cur.execute("PRAGMA synchronous=NORMAL;") - cur.execute("PRAGMA temp_store=MEMORY;") - - cur.execute(""" - CREATE TABLE IF NOT EXISTS hints ( - word TEXT NOT NULL, - hint TEXT NOT NULL, - source TEXT NOT NULL DEFAULT 'wiktionary', - pos TEXT, - quality INTEGER NOT NULL DEFAULT 80, - PRIMARY KEY (word, hint, source) - ); - """) - cur.execute("CREATE INDEX IF NOT EXISTS idx_hints_word ON hints(word);") - con.commit() - - batch = [] - inserted = 0 - seen = 0 - - con.execute("BEGIN;") - with jsonl_path.open("r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if not line: - continue - try: - obj = json.loads(line) - except json.JSONDecodeError: - continue - - # Kaikki/Wiktextract: vaak lang_code = "nl" en lang = "Dutch" - lang_code = obj.get("lang_code") - if lang_code and lang_code != "nl": - continue - - word = obj.get("word") - if not word: - continue - - word_up = word.upper().strip() - if not (args.minlen <= len(word_up) <= args.maxlen): - continue - if not RE_ASCII_WORD.match(word_up): - continue - - hint, pos = pick_gloss(obj) - if not hint: - continue - - # Hint kort houden - hint = hint[: args.maxhint].rstrip() - - # Simpele kwaliteit: iets hoger als POS bekend is - quality = 85 if pos else 80 - - batch.append((word_up, hint, "wiktionary", pos, quality)) - seen += 1 - - if len(batch) >= 2000: - cur.executemany( - "INSERT OR IGNORE INTO hints(word,hint,source,pos,quality) VALUES (?,?,?,?,?)", - batch - ) - inserted += cur.rowcount if cur.rowcount != -1 else 0 - batch.clear() - - if batch: - cur.executemany( - "INSERT OR IGNORE INTO hints(word,hint,source,pos,quality) VALUES (?,?,?,?,?)", - batch - ) - inserted += cur.rowcount if cur.rowcount != -1 else 0 - - con.commit() - con.close() - - print(f"Done. processed_lines≈{seen}, inserted≈{inserted} (OR IGNORE kan inserts verlagen).") - -if __name__ == "__main__": - main() diff --git a/requirements-marker.txt b/requirements-marker.txt deleted file mode 100644 index 4ab456a..0000000 --- a/requirements-marker.txt +++ /dev/null @@ -1,9 +0,0 @@ -# PyTorch with CUDA 12.4 support ---index-url https://download.pytorch.org/whl/cu124 -torch -torchvision -torchaudio - -# Transformers and marker -transformers -marker-pdf diff --git a/run.sh b/run.sh index 20c3ad9..4783f43 100755 --- a/run.sh +++ b/run.sh @@ -1,3 +1,2 @@ #!/bin/bash java -cp ~/dev/.target puzzle.Main "$@" -java -cp ~/dev/.target puzzle.DailyGenerator "$@" diff --git a/src/main/java/puzzle/ClueGenerator.java b/src/main/java/puzzle/ClueGenerator.java deleted file mode 100644 index effa935..0000000 --- a/src/main/java/puzzle/ClueGenerator.java +++ /dev/null @@ -1,222 +0,0 @@ -package puzzle; - -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.*; -import static puzzle.ExportFormat.*; - -public class ClueGenerator { - - private static final String OLLAMA_URL = "http://localhost:11434/api/chat"; - private static final String MODEL = "qwen2.5:14b"; - private static final String HINTS_FILE = "/home/mike/dev/puzzle-generator/nl_score_hints.csv"; - private static Map prebuiltClues = null; - - private static synchronized void ensurePrebuiltCluesLoaded() { - if (prebuiltClues != null) return; - prebuiltClues = new HashMap<>(); - try { - var lines = Files.readAllLines(Path.of(HINTS_FILE), StandardCharsets.UTF_8); - for (var line : lines) { - var parts = line.split(",", 4); - if (parts.length >= 4) { - var word = parts[0].trim().toUpperCase(Locale.ROOT); - var rawClue = parts[3].trim(); - if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) { - rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\""); - } - if (!word.isEmpty() && !rawClue.isEmpty()) { - prebuiltClues.put(word, rawClue); - } - } - } - } catch (IOException e) { - System.err.println("Warning: " + HINTS_FILE + " not found or could not be read."); - } - } - - public static ExportedPuzzle applyClues(ExportedPuzzle puzzle) { - if (puzzle == null || puzzle.words().isEmpty()) { - return puzzle; - } - - ensurePrebuiltCluesLoaded(); - - Map finalClueMap = new HashMap<>(); - List wordsMissingClues = new ArrayList<>(); - - for (var w : puzzle.words()) { - var wordUpper = w.word().toUpperCase(Locale.ROOT); - if (prebuiltClues.containsKey(wordUpper)) { - finalClueMap.put(w.word(), prebuiltClues.get(wordUpper)); - } else { - wordsMissingClues.add(w.word()); - } - } - - if (!wordsMissingClues.isEmpty()) { - var generatedClues = generateClues(wordsMissingClues); - finalClueMap.putAll(generatedClues); - } - - List wordsWithClues = new ArrayList<>(); - for (var w : puzzle.words()) { - var clue = finalClueMap.getOrDefault(w.word(), w.word()); - wordsWithClues.add(new WordOut( - w.word(), - clue, - w.startRow(), - w.startCol(), - w.direction(), - w.answer(), - w.arrowRow(), - w.arrowCol(), - w.isReversed(), - w.complex() - )); - } - - return new ExportedPuzzle(puzzle.gridv2(), wordsWithClues, puzzle.difficulty(), puzzle.rewards()); - } - - public static Map generateClues(List words) { - if (words == null || words.isEmpty()) { - return Collections.emptyMap(); - } - - var prompt = createCluePrompt(words); - try { - var jsonRequest = String.format( - "{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"stream\":false,\"temperature\":0.7}", - MODEL, escapeJson(prompt) - ); - - var responseBody = curlPostJson(OLLAMA_URL, jsonRequest, 120); - var content = extractChatContent(responseBody); - - if (content == null || content.isEmpty()) { - return Collections.emptyMap(); - } - - return parseCluesFromReply(words, content); - } catch (Exception e) { - System.err.println("Failed to generate clues: " + e.getMessage()); - return Collections.emptyMap(); - } - } - - private static String createCluePrompt(List words) { - return "Je bent een expert in het maken van kruiswoordpuzzels. Geef voor elk van de onderstaande woorden een korte, uitdagende maar duidelijke cryptische of beschrijvende aanwijzing in het Nederlands.\n\n" + - "Output ALLEEN in dit formaat:\n" + - "woord1:aanwijzing\n" + - "woord2:aanwijzing\n\n" + - "GEEN andere tekst of uitleg. Sla GEEN woorden over.\n\n" + - "Lijst:\n" + - String.join("\n", words); - } - - private static Map parseCluesFromReply(List expectedWords, String reply) { - Map wordClueMap = new HashMap<>(); - var lines = reply.split("\n"); - - for (var line : lines) { - line = line.trim(); - if (line.contains(":")) { - var parts = line.split(":", 2); - if (parts.length == 2) { - var wordPart = parts[0].trim().replaceAll("^[\\d+.)*\\-\\s]+", "").toLowerCase(); - var clue = parts[1].trim(); - if (!clue.isEmpty()) { - wordClueMap.put(wordPart, clue); - } - } - } - } - - Map results = new HashMap<>(); - for (var word : expectedWords) { - var clue = wordClueMap.get(word.toLowerCase()); - if (clue != null) { - results.put(word, clue); - } - } - return results; - } - - private static String curlPostJson(String url, String jsonBody, int timeoutSeconds) throws Exception { - var tempFile = Files.createTempFile("clue-request-", ".json"); - try { - Files.writeString(tempFile, jsonBody, StandardCharsets.UTF_8); - List cmd = new ArrayList<>(); - cmd.add("curl"); - cmd.add("-fsSL"); - cmd.add("--connect-timeout"); - cmd.add("10"); - cmd.add("--max-time"); - cmd.add(String.valueOf(timeoutSeconds)); - cmd.add("-H"); - cmd.add("Content-Type: application/json"); - cmd.add("-d"); - cmd.add("@" + tempFile); - cmd.add(url); - - var p = new ProcessBuilder(cmd) - .redirectErrorStream(true) - .start(); - - var bytes = p.getInputStream().readAllBytes(); - var code = p.waitFor(); - - if (code != 0) { - throw new IOException("curl POST failed (" + code + ") url=" + url + "\nOutput:\n" + - new String(bytes, StandardCharsets.UTF_8)); - } - - return new String(bytes, StandardCharsets.UTF_8); - } finally { - Files.deleteIfExists(tempFile); - } - } - - private static String extractChatContent(String json) { - if (json == null) return null; - var choices = json.indexOf("\"choices\""); - var p = (choices >= 0) ? choices : 0; - var i = json.indexOf("\"content\"", p); - if (i < 0) { - // Fallback for Ollama non-chat format if needed, but we used /api/chat - // Ollama /api/chat returns {"model":"...","message":{"role":"assistant","content":"..."}} - i = json.indexOf("\"content\""); - if (i < 0) return null; - } - var colon = json.indexOf(':', i); - if (colon < 0) return null; - var q = json.indexOf('"', colon + 1); - if (q < 0) return null; - var sb = new StringBuilder(); - var esc = false; - for (var k = q + 1; k < json.length(); k++) { - var ch = json.charAt(k); - if (esc) { - if (ch == 'n') sb.append('\n'); - else if (ch == 't') sb.append('\t'); - else if (ch == 'r') sb.append('\r'); - else sb.append(ch); - esc = false; - } else { - if (ch == '\\') esc = true; - else if (ch == '"') break; - else sb.append(ch); - } - } - return sb.toString(); - } - - private static String escapeJson(String str) { - return str.replace("\\", "\\\\") - .replace("\"", "\\\"") - .replace("\n", "\\n"); - } -} diff --git a/src/main/java/puzzle/ConcurrentWordScorer.java b/src/main/java/puzzle/ConcurrentWordScorer.java deleted file mode 100644 index d059705..0000000 --- a/src/main/java/puzzle/ConcurrentWordScorer.java +++ /dev/null @@ -1,532 +0,0 @@ -package puzzle; - -import java.nio.charset.StandardCharsets; -import java.nio.file.*; -import java.util.*; -import java.util.concurrent.*; -import java.io.*; -import java.time.*; -import java.util.concurrent.atomic.*; - -/** - * CONCURRENT MULTI-ENDPOINT Dutch Wordlist Scorer - * Distributes batches across Ollama, LM-Studio, and a third endpoint simultaneously - */ -public class ConcurrentWordScorer { - - // ===== CONFIGURATION ===== - private static final String INPUT_WORDLIST = "word-list.txt"; - private static final String OUTPUT_SCORES = "word_scores.csv"; - private static final int BATCH_SIZE = 10; // Even smaller for the difficult remaining words - private static final int MAX_RETRIES = 3; - - // Define all three endpoints - private static final LLMEndpoint[] ENDPOINTS = { - new OllamaEndpoint(), - new LMStudioEndpoint(), - new LMStudioEndpoint("LM-Studio", "http://192.168.1.74:1234/v1/chat/completions", - "mistralai/mistral-nemo-instruct-2407", 1) - // new CustomEndpoint() - }; - - // ===== ENDPOINT CLASSES ===== - abstract static class LLMEndpoint { - - String name; - String baseUrl; - String model; - Semaphore rateLimiter; // Per-endpoint rate limiting - - int maxConcurrent; - - LLMEndpoint(String name, String baseUrl, String model, int maxConcurrent) { - this.name = name; - this.baseUrl = baseUrl; - this.model = model; - this.maxConcurrent = maxConcurrent; - this.rateLimiter = new Semaphore(maxConcurrent); - } - - abstract String buildRequestJson(String prompt); - abstract String extractResponseContent(String responseBody); - - // Rate-limited request execution - List execute(List batch) throws Exception { - rateLimiter.acquire(); // Wait for slot - try { - return executeInternal(batch); - } finally { - rateLimiter.release(); - } - } - - private List executeInternal(List batch) throws Exception { - var prompt = createScoringPrompt(batch); - var jsonRequest = buildRequestJson(prompt); - - var responseBody = curlPostJson(baseUrl, jsonRequest, 120); - var content = extractResponseContent(responseBody); - - if (content == null || content.isEmpty()) { - throw new IOException("[" + name + "] Empty response content"); - } - - return parseScoresFromReply(batch, content, name); - } - } - - static class OllamaEndpoint - extends LLMEndpoint { - - OllamaEndpoint() { - super("Ollama", "http://localhost:11434/api/chat", - "qwen2.5:14b", 1); // 2 concurrent requests - } - - @Override String buildRequestJson(String prompt) { - return String.format("{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"stream\":false,\"temperature\":0.1}", - model, escapeJson(prompt)); - } - - @Override String extractResponseContent(String responseBody) { - // Ollama uses "message" -> "content" - var start = responseBody.indexOf("\"content\":\"") + 11; - var end = responseBody.indexOf("\"", start); - if (start < 11 || end < 0) return ""; - return responseBody.substring(start, end).replace("\\n", "\n"); - } - } - - static class LMStudioEndpoint - extends LLMEndpoint { - - LMStudioEndpoint() { - super("LM-Studio", "http://192.168.1.159:1234/v1/chat/completions", - "mistralai/mistral-nemo-instruct-2407", 1); // LM-Studio can handle more - } - public LMStudioEndpoint(String s, String url, String s1, int i) { - super( - s, url, s1, i - ); - } - - @Override String buildRequestJson(String prompt) { - return String.format("{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"temperature\":0.1,\"max_tokens\":2048}", - model, escapeJson(prompt)); - } - - @Override String extractResponseContent(String responseBody) { - return extractChatContent(responseBody); - } - } - - static class CustomEndpoint - extends LLMEndpoint { - - CustomEndpoint() { - super("Custom", "http://192.168.1.74:1234/v1/chat/completions", - "qwen2.5-vl-7b-abliterated-caption-it_gguf", 2); - } - - @Override String buildRequestJson(String prompt) { - // Adapt to your third endpoint's format - return new LMStudioEndpoint().buildRequestJson(prompt); - } - - @Override String extractResponseContent(String responseBody) { - return new LMStudioEndpoint().extractResponseContent(responseBody); - } - } - - // ===== MAIN COORDINATOR ===== - static void main(String[] args) throws Exception { - System.out.println("=== CONCURRENT 3-Endpoint Scorer ==="); - for (var ep : ENDPOINTS) { - System.out.printf("- %s: %s%n", ep.name, ep.baseUrl); - } - System.out.println(); - - cleanupOutputFile(); - - // Load work queue - var allWords = Files.readAllLines(Paths.get(INPUT_WORDLIST)); - var scoredWords = loadAlreadyScoredWords(); - var workQueue = createWorkQueue(allWords, scoredWords); - - System.out.printf("Total words: %d | Already scored: %d | Remaining: %d%n%n", - allWords.size(), scoredWords.size(), workQueue.size()); - - if (workQueue.isEmpty()) { - System.out.println("All done!"); - return; - } - - // Start result writer thread - BlockingQueue> resultQueue = new LinkedBlockingQueue<>(); - var writerThread = startResultWriter(resultQueue); - - // Start worker threads - var totalThreads = 0; - for (var ep : ENDPOINTS) totalThreads += ep.maxConcurrent; - - var executor = Executors.newFixedThreadPool(totalThreads); - var totalProcessed = new AtomicInteger(scoredWords.size()); - - for (var endpoint : ENDPOINTS) { - for (var i = 0; i < endpoint.maxConcurrent; i++) { - executor.submit(() -> { - processBatches(endpoint, workQueue, resultQueue, totalProcessed, allWords.size()); - }); - } - } - - // Wait for completion - executor.shutdown(); - executor.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS); - - // Signal writer to stop - resultQueue.put(Collections.singletonList(new WordScore(null, 0, "STOP"))); - writerThread.join(); - - // Update hints in the database - - System.out.println("\n✓ All endpoints finished!"); - } - - // ===== WORKER THREAD LOGIC ===== - private static void processBatches(LLMEndpoint endpoint, - BlockingQueue workQueue, - BlockingQueue> resultQueue, - AtomicInteger totalProcessed, - int totalWords) { - - System.out.printf("[%s] Worker started%n", endpoint.name); - - while (!Thread.currentThread().isInterrupted()) { - try { - var work = workQueue.poll(1, TimeUnit.SECONDS); - if (work == null) { - if (workQueue.isEmpty()) break; // No more work in queue - continue; - } - - var scores = processWithRetry(endpoint, work.batch); - - // Add metadata - scores.forEach(s -> { - s.endpoint = endpoint.name; - s.batchId = work.batchId; - }); - - resultQueue.put(scores); - - // Progress update - var processed = totalProcessed.addAndGet(scores.size()); - if (processed % 100 < BATCH_SIZE) { // Reduce console spam - System.out.printf("Progress: %d/%d (%.1f%%)%n", - processed, totalWords, (processed * 100.0 / totalWords)); - } - - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - break; - } catch (Exception e) { - System.err.printf("[%s] Fatal error: %s%n", endpoint.name, e.getMessage()); - break; - } - } - - System.out.printf("[%s] Worker stopped%n", endpoint.name); - } - - private static List processWithRetry(LLMEndpoint endpoint, List batch) { - var retries = 0; - - while (retries < MAX_RETRIES) { - try { - return endpoint.execute(batch); - } catch (Exception e) { - retries++; - System.err.printf("[%s] Attempt %d/%d failed: %s%n", - endpoint.name, retries, MAX_RETRIES, e.getMessage()); - - if (retries >= MAX_RETRIES) { - return createFailedScores(batch, endpoint.name); - } - - try { - Thread.sleep(2000L * retries); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - return createFailedScores(batch, endpoint.name); - } - } - } - return createFailedScores(batch, endpoint.name); - } - - // ===== RESULT WRITER THREAD ===== - private static Thread startResultWriter(BlockingQueue> resultQueue) throws Exception { - var writer = new BufferedWriter(new FileWriter(OUTPUT_SCORES, true)); - var isNew = Files.size(Paths.get(OUTPUT_SCORES)) == 0; - - if (isNew) { - writer.write("word,score,status,endpoint,batch_id,timestamp\n"); - writer.flush(); - } - - var thread = new Thread(() -> { - try { - while (true) { - var scores = resultQueue.take(); - - // Stop signal - if (scores.size() == 1 && scores.get(0).status.equals("STOP")) { - break; - } - - writeBatch(writer, scores); - } - writer.close(); - } catch (Exception e) { - System.err.println("Writer thread error: " + e.getMessage()); - } - }); - - thread.start(); - return thread; - } - - private static synchronized void writeBatch(BufferedWriter writer, List scores) throws Exception { - var timestamp = Instant.now().toString(); - for (var ws : scores) { - writer.write(String.format("%s,%d,%s,%s,%d,%s\n", - ws.word, ws.score, ws.status, ws.endpoint, ws.batchId, timestamp)); - } - writer.flush(); - } - - // ===== QUEUE & DATA STRUCTURES ===== - record WorkItem(int batchId, List batch) { - - } - - private static BlockingQueue createWorkQueue(List allWords, Set scored) { - BlockingQueue queue = new LinkedBlockingQueue<>(); - var batchId = 0; - - for (var i = 0; i < allWords.size(); i += BATCH_SIZE) { - List batch = new ArrayList<>(); - for (var j = i; j < Math.min(i + BATCH_SIZE, allWords.size()); j++) { - var word = allWords.get(j); - if (!scored.contains(word.toLowerCase())) { - batch.add(word); - } - } - - if (!batch.isEmpty()) { - queue.add(new WorkItem(batchId++, batch)); - } - } - - return queue; - } - - // ===== LOADING & PARSING ===== - private static Set loadAlreadyScoredWords() throws Exception { - Set scored = new HashSet<>(); - var file = new File(OUTPUT_SCORES); - if (!file.exists()) return scored; - - var lines = Files.readAllLines(file.toPath()); - var first = true; - for (var line : lines) { - if (first) { - first = false; - continue; - } - var parts = line.split(","); - if (parts.length >= 3) { - var word = parts[0].trim().toLowerCase(); - var status = parts[2].trim(); - if ("OK".equalsIgnoreCase(status)) { - scored.add(word); - } - } - } - return scored; - } - - private static void cleanupOutputFile() throws IOException { - var path = Paths.get(OUTPUT_SCORES); - if (!Files.exists(path)) return; - - System.out.println("Cleaning up " + OUTPUT_SCORES + "..."); - var lines = Files.readAllLines(path); - if (lines.isEmpty()) return; - - var header = lines.get(0); - Map latestOkEntries = new LinkedHashMap<>(); - - for (int i = 1; i < lines.size(); i++) { - var line = lines.get(i); - var parts = line.split(","); - if (parts.length >= 3) { - var word = parts[0].trim().toLowerCase(); - var status = parts[2].trim(); - if ("OK".equalsIgnoreCase(status)) { - latestOkEntries.put(word, line); - } - } - } - - var cleanedLines = new ArrayList(); - cleanedLines.add(header); - cleanedLines.addAll(latestOkEntries.values()); - - Files.write(path, cleanedLines, StandardCharsets.UTF_8); - System.out.printf("Cleanup complete. Kept %d unique OK entries. Removed %d non-OK or duplicate entries.%n", - latestOkEntries.size(), lines.size() - cleanedLines.size()); - } - - private static List createFailedScores(List words, String endpoint) { - List failed = new ArrayList<>(); - for (var word : words) { - failed.add(new WordScore(word, -1, "FAILED", endpoint, -1)); - } - return failed; - } - - // Parsing logic - private static List parseScoresFromReply(List expectedWords, String reply, String endpointName) { - Map wordScoreMap = new HashMap<>(); - var lines = reply.split("\n"); - - for (var line : lines) { - line = line.trim(); - // Handle formats like "1. word:score", "word: score", "word - score" - String sep = null; - if (line.contains(":")) sep = ":"; - else if (line.contains("-")) sep = "-"; - - if (sep != null) { - var parts = line.split(sep, 2); - if (parts.length == 2) { - var wordPart = parts[0].trim(); - // Remove leading numbering like "1. " or bullets like "* ", "- " - wordPart = wordPart.replaceAll("^[\\d+.)*\\-\\s]+", ""); - var word = wordPart.toLowerCase(); - - try { - var scoreStr = parts[1].trim(); - // Handle potential non-numeric junk after the number - scoreStr = scoreStr.replaceAll("[^0-9].*", ""); - if (!scoreStr.isEmpty()) { - var score = Integer.parseInt(scoreStr); - wordScoreMap.put(word, Math.max(1, Math.min(10, score))); - } - } catch (NumberFormatException e) { - // Skip invalid lines - } - } - } - } - - // Match scores to original words (maintaining order) - List results = new ArrayList<>(); - for (var word : expectedWords) { - var score = wordScoreMap.get(word.toLowerCase()); - if (score != null) { - results.add(new WordScore(word, score, "OK")); - } else { - results.add(new WordScore(word, -1, "MISSING")); - } - } - - return results; - } - - // Prompt creation - private static String createScoringPrompt(List words) { - return "Je bent een Nederlandse taalexpert. Geef elk van de " + words.size() + " onderstaande woorden een populariteitsscore van 1 (zeer zeldzaam) tot 10 (zeer algemeen).\n\n" + - "Output ALLEEN in dit formaat:\n" + - "woord1:score\n" + - "woord2:score\n\n" + - "GEEN andere tekst of uitleg. Sla GEEN woorden over.\n\n" + - "Lijst:\n" + - String.join("\n", words); - } - - // Utility methods - private static String escapeJson(String str) { - return str.replace("\\", "\\\\") - .replace("\"", "\\\"") - .replace("\n", "\\n"); - } - - private static String curlPostJson(String url, String jsonBody, int timeoutSeconds) throws Exception { - // Write JSON body to temp file to avoid shell escaping issues - var tempFile = Files.createTempFile("lm-request-", ".json"); - try { - Files.writeString(tempFile, jsonBody, StandardCharsets.UTF_8); - - List cmd = new ArrayList<>(); - cmd.add("curl"); - cmd.add("-fsSL"); - cmd.add("--connect-timeout"); - cmd.add("10"); - cmd.add("--max-time"); - cmd.add(String.valueOf(timeoutSeconds)); - cmd.add("-H"); - cmd.add("Content-Type: application/json"); - cmd.add("-d"); - cmd.add("@" + tempFile); - cmd.add(url); - - var p = new ProcessBuilder(cmd) - .redirectErrorStream(true) - .start(); - - var bytes = p.getInputStream().readAllBytes(); - var code = p.waitFor(); - - if (code != 0) { - throw new IOException("curl POST failed (" + code + ") url=" + url + "\nOutput:\n" + - new String(bytes, StandardCharsets.UTF_8)); - } - - return new String(bytes, StandardCharsets.UTF_8); - } finally { - Files.deleteIfExists(tempFile); - } - } - - private static String extractChatContent(String json) { - if (json == null) return null; - var choices = json.indexOf("\"choices\""); - var p = (choices >= 0) ? choices : 0; - var i = json.indexOf("\"content\"", p); - if (i < 0) return null; - var colon = json.indexOf(':', i); - if (colon < 0) return null; - var q = json.indexOf('"', colon + 1); - if (q < 0) return null; - var sb = new StringBuilder(); - var esc = false; - for (var k = q + 1; k < json.length(); k++) { - var ch = json.charAt(k); - if (esc) { - if (ch == 'n') sb.append('\n'); - else if (ch == 't') sb.append('\t'); - else if (ch == 'r') sb.append('\r'); - else sb.append(ch); - esc = false; - } else { - if (ch == '\\') esc = true; - else if (ch == '"') break; - else sb.append(ch); - } - } - return sb.toString(); - } -} diff --git a/src/main/java/puzzle/ExportFormat.java b/src/main/java/puzzle/ExportFormat.java index 8c71f93..dbd921c 100644 --- a/src/main/java/puzzle/ExportFormat.java +++ b/src/main/java/puzzle/ExportFormat.java @@ -39,7 +39,7 @@ public final class ExportFormat { var word = clueMap.get(s.key()); if (word == null) continue; - var p = extractPlacedFromSlot(s, word); + var p = extractPlacedFromSlot(puz.dict(),s, word); if (p == null) continue; placed.add(p); } @@ -121,7 +121,7 @@ public final class ExportFormat { /** * Convert a generator Slot + assigned word into a Placed object for export. */ - private static Placed extractPlacedFromSlot(Slot s, String word) { + private static Placed extractPlacedFromSlot(Dict dict,Slot s, String word) { int r = s.clueR(); int c = s.clueC(); char d = s.dir(); @@ -168,7 +168,7 @@ public final class ExportFormat { return new Placed( word, - word, // clue placeholder + dict.words().get(word).clue(), // clue placeholder startRow, startCol, direction, @@ -182,14 +182,9 @@ public final class ExportFormat { } // pack (r,c) into one long key (handles negatives too) - private static long pack(int r, int c) { - return (((long) r) << 32) ^ (c & 0xFFFFFFFFL); - } - - // ---------- Data models ---------- - + private static long pack(int r, int c) { return (((long) r) << 32) ^ (c & 0xFFFFFFFFL); } /** - * @param direction "horizontal" | "vertical" + * @param direction "h" | "v" * @param cells word cells * @param arrow [arrowRow, arrowCol] */ private record Placed(String word, String clue, int startRow, int startCol, String direction, String answer, int arrowRow, int arrowCol, List cells, int[] arrow, @@ -197,8 +192,7 @@ public final class ExportFormat { public record Rewards(int coins, int stars, int hints) { } - /** - * @param direction "horizontal" | "vertical" */ + /// @param direction "h" | "v" public record WordOut(String word, String clue, int startRow, int startCol, String direction, String answer, int arrowRow, int arrowCol, boolean isReversed, int complex) { } public record ExportedPuzzle(List gridv2, List words, int difficulty, Rewards rewards) { } diff --git a/src/main/java/puzzle/Main.java b/src/main/java/puzzle/Main.java index f0993b9..693d168 100644 --- a/src/main/java/puzzle/Main.java +++ b/src/main/java/puzzle/Main.java @@ -83,7 +83,7 @@ public class Main { section("Clues"); info("status : generating..."); info("generatedFor : " + exported.words().size()); - exported = ClueGenerator.applyClues(exported); + //exported = ClueGenerator.applyClues(exported); info("status : done"); section("Words"); diff --git a/src/main/java/puzzle/SwedishGenerator.java b/src/main/java/puzzle/SwedishGenerator.java index e35b9b4..4a3b5d2 100644 --- a/src/main/java/puzzle/SwedishGenerator.java +++ b/src/main/java/puzzle/SwedishGenerator.java @@ -132,24 +132,22 @@ public class SwedishGenerator { int[] data() { return a; } // note: may have extra capacity } - static final class DictEntry { + static record DictEntry(ArrayList words, IntList[][] pos) { - final ArrayList words = new ArrayList<>(); - final IntList[][] pos; // pos[i][letter] -> indices (sorted by insertion) - DictEntry(int L) { - pos = new IntList[L][26]; + public DictEntry(int L) { + this(new ArrayList<>(), new IntList[L][26]); for (var i = 0; i < L; i++) { for (var j = 0; j < 26; j++) pos[i][j] = new IntList(); } } } - static record WordDifficulty(String word, int difficulty, int simpel, int score, int cross) { + static record WordDifficulty(String word, int difficulty, int simpel, int score, int cross, String clue) { - public WordDifficulty(String word, int simpel, int score) { + public WordDifficulty(String word, int simpel, int score, String clue) { var difficulty1 = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15); var crossScore = ThemePoolBuilderLength.crossabilityScore(word); - this(word, difficulty1, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15)); + this(word, difficulty1, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15), clue); // Prioritize simple words (high lScore) and long words. // lScore (1-10) adds up to 1000 points (weight 100). @@ -163,7 +161,6 @@ public class SwedishGenerator { } } - public static record Dict(Map words, HashMap index, HashMap lenCounts) { } @@ -193,8 +190,12 @@ public class SwedishGenerator { // CSV has level 1-10. llmScores use 10-level. score = 10 - Integer.parseInt(parts[1].trim()); simpel = Integer.parseInt(parts[2].trim()); + var rawClue = parts[3].trim(); + if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) { + rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\""); + } if (score >= 1) - map.put(s, new WordDifficulty(s, simpel, score)); + map.put(s, new WordDifficulty(s, simpel, score, rawClue)); } } var words = map.values().stream().collect(Collectors.toCollection(ArrayList::new)); @@ -682,11 +683,10 @@ public class SwedishGenerator { System.out.flush(); }; - class Pick { + record Pick(Slot slot, + CandidateInfo info, + boolean done) { - Slot slot; - CandidateInfo info; - boolean done; } java.util.function.Supplier chooseMRV = () -> { @@ -699,22 +699,14 @@ public class SwedishGenerator { var entry = dictIndex.get(s.len); if (entry == null) { - var p = new Pick(); - p.slot = null; - p.info = null; - p.done = false; - return p; + return new Pick(null, null, false); } var pat = patternForSlot(grid, s); var info = candidateInfoForPattern(entry, pat); if (info.count == 0) { - var p = new Pick(); - p.slot = null; - p.info = null; - p.done = false; - return p; + return new Pick(null, null, false); } if (best == null @@ -726,17 +718,11 @@ public class SwedishGenerator { } } - var p = new Pick(); if (best == null) { - p.slot = null; - p.info = null; - p.done = true; + return new Pick(null, null, true); } else { - p.slot = best; - p.info = bestInfo; - p.done = false; + return new Pick(best, bestInfo, false); } - return p; }; final var MAX_TRIES_PER_SLOT = 2000; @@ -868,9 +854,9 @@ public class SwedishGenerator { public record PuzzleResult(Dict dict, char[][] mask, FillResult filled) { } public static PuzzleResult generatePuzzle(Main.Opts opts) { - var tLoad0 = System.nanoTime(); - var dict = loadWords(opts.wordsPath); - var tLoad1 = System.nanoTime(); + var tLoad0 = System.nanoTime(); + var dict = loadWords(opts.wordsPath); + var tLoad1 = System.nanoTime(); System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n %s words%n", (tLoad1 - tLoad0) / 1e9, dict.words.size()); if (opts.threads > 1) { diff --git a/src/main/java/puzzle/WordScore.java b/src/main/java/puzzle/WordScore.java deleted file mode 100644 index 9eec631..0000000 --- a/src/main/java/puzzle/WordScore.java +++ /dev/null @@ -1,24 +0,0 @@ -package puzzle; - -// ===== DATA CLASS ===== -class WordScore { - - String word; - int score; - String status; - String endpoint; - int batchId; - - WordScore(String word, int score, String status, String endpoint, int batchId) { - this.word = word; - this.score = score; - this.status = status; - this.endpoint = endpoint; - this.batchId = batchId; - } - WordScore(String word, int score, String status) { - this.word = word; - this.score = score; - this.status = status; - } -} \ No newline at end of file diff --git a/tools/hint/dbjsonl.sh b/tools/hint/dbjsonl.sh new file mode 100755 index 0000000..936f4ec --- /dev/null +++ b/tools/hint/dbjsonl.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Usage: +# ./import_jsonl.sh "postgresql://user:pass@host:5432/dbname" gloss doc /path/to/file.jsonl +# +# Notes: +# - Creates table if it doesn't exist. +# - Inserts each JSON line into a jsonb column. +# - Skips blank lines. + +DB_URL="${1:?db url}" +TABLE="${2:?table name}" +COL="${3:?json column name}" +FILE="${4:?jsonl file path}" + +psql "$DB_URL" -v ON_ERROR_STOP=1 < ''; + +-- optional: show count inserted this run +SELECT count(*) AS inserted_now FROM _jsonl_stage WHERE btrim(line) <> ''; +SQL diff --git a/tools/hint/jsonl-to-sqlite.mjs b/tools/hint/jsonl-to-sqlite.mjs new file mode 100644 index 0000000..2a28806 --- /dev/null +++ b/tools/hint/jsonl-to-sqlite.mjs @@ -0,0 +1,59 @@ +// jsonl-to-sqlite.mjs +import fs from 'node:fs' +import readline from 'node:readline' +import Database from 'better-sqlite3' + +const jsonlPath = process.argv[2] +const dbPath = process.argv[3] ?? 'out.sqlite' +const table = process.argv[4] ?? 'events' + +if (!jsonlPath) { + console.error('Usage: node jsonl-to-sqlite.mjs [out.sqlite] [table]') + process.exit(1) +} + +const db = new Database(dbPath) +db.pragma('journal_mode = WAL') + +db.exec(` + CREATE TABLE IF NOT EXISTS ${ table } + ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + json TEXT NOT NULL + ); +`) + +const insert = db.prepare(`INSERT INTO ${ table }(json) + VALUES (?)`) +const insertMany = db.transaction((rows) => { + for (const r of rows) insert.run(r) +}) + +const rl = readline.createInterface({ + input : fs.createReadStream(jsonlPath, { encoding: 'utf8' }), + crlfDelay: Infinity +}) + +let batch = [] +let lineNo = 0 +for await (const line of rl) { + lineNo++ + const trimmed = line.trim() + if (!trimmed) continue + + try { + JSON.parse(trimmed) // validate + batch.push(trimmed) + } catch (e) { + console.warn(`Skipping invalid JSON on line ${ lineNo }: ${ e.message }`) + continue + } + + if (batch.length >= 1000) { + insertMany(batch) + batch = [] + } +} +if (batch.length) insertMany(batch) + +console.log(`Done. Imported into ${ dbPath }, table=${ table }`) diff --git a/tools/puzzle-gen/Dockerfile b/tools/puzzle-gen/Dockerfile deleted file mode 100644 index df7db3e..0000000 --- a/tools/puzzle-gen/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -FROM python:3.13-slim - -RUN apt-get update \ - && apt-get install -y --no-install-recommends ca-certificates tzdata curl \ - && rm -rf /var/lib/apt/lists/* - -# supercronic -RUN curl -fsSL -o /usr/local/bin/supercronic \ - https://github.com/aptible/supercronic/releases/download/v0.2.30/supercronic-linux-amd64 \ - && chmod +x /usr/local/bin/supercronic - -WORKDIR /app -COPY tools/puzzle-gen/generate_daily_puzzles.py /app/generate_daily_puzzles.py -COPY tools/puzzle-gen/crontab /app/crontab - -CMD ["/usr/local/bin/supercronic", "/app/crontab"] diff --git a/tools/puzzle-gen/crontab b/tools/puzzle-gen/crontab deleted file mode 100644 index 85d3197..0000000 --- a/tools/puzzle-gen/crontab +++ /dev/null @@ -1 +0,0 @@ -15 3 * * * python /app/generate_daily_puzzles.py \ No newline at end of file diff --git a/tools/puzzle-gen/generate_daily_puzzles.py b/tools/puzzle-gen/generate_daily_puzzles.py deleted file mode 100644 index 70f9bac..0000000 --- a/tools/puzzle-gen/generate_daily_puzzles.py +++ /dev/null @@ -1,399 +0,0 @@ -#!/usr/bin/env python3 -import datetime as dt -import json -import os -import random -import re -import urllib.request -import xml.etree.ElementTree as ET -import json, re - -# --- USER-FRIENDLY CONFIG --- -# Max 7 letters for shorter, more common words -WORD_RE = re.compile(r"^[A-Z]{3,7}$") -EMPTY = " " -# Slightly smaller grid for denser puzzles -SIZE = 10 -# More words needed since they're shorter -TARGET_WORDS = 15 -MIN_ACCEPT_WORDS = 10 - -FEEDS = [ - "https://feeds.nos.nl/nosnieuwsalgemeen", - "https://feeds.nos.nl/nosnieuwstech", -] - - -def env(name, default=None): - v = os.getenv(name) - return default if v is None or v == "" else v - - -def http_get(url, timeout=15): - req = urllib.request.Request(url, headers={"User-Agent": "puzzle-gen/1.0"}) - with urllib.request.urlopen(req, timeout=timeout) as r: - return r.read() - - -def http_post_json(url, payload, timeout=45): - data = json.dumps(payload).encode("utf-8") - req = urllib.request.Request( - url, - data=data, - headers={ - "Content-Type": "application/json", - "Authorization": "Bearer lm-studio", - "User-Agent": "puzzle-gen/1.0", - }, - method="POST", - ) - with urllib.request.urlopen(req, timeout=timeout) as r: - return json.loads(r.read().decode("utf-8")) - - -def fetch_rss_items(url, limit=12): - raw = http_get(url) - root = ET.fromstring(raw) - channel = root.find("channel") if root.tag.lower().endswith("rss") else root - items = [] - for it in channel.findall("item"): - title = (it.findtext("title") or "").strip() - desc = (it.findtext("description") or "").strip() - if title: - items.append((title, desc)) - if len(items) >= limit: - break - return items - - -def safe_slug(s, maxlen=50): - s = s.lower() - s = re.sub(r"[^a-z0-9]+", "-", s).strip("-") - return (s[:maxlen] or "news") - - -def extract_first_json(text: str): - """Parse first JSON value (object OR array) from any text.""" - if not text: - return None - starts = [i for i in (text.find("{"), text.find("[")) if i != -1] - if not starts: - return None - i = min(starts) - try: - return json.JSONDecoder().raw_decode(text[i:])[0] - except json.JSONDecodeError: - return None - - -def normalize_word(raw: str) -> str: - # A-Z only, remove hyphens/digits/spaces/etc. - w = re.sub(r"[^A-Za-z]", "", (raw or "")).upper() - return w - - -def sanitize_wordcluemap(obj): - """ - Accepts: - - dict: {"WORD":"clue", ...} - - list: [{"word":"...","clue":"..."}, {"WOORD":"...","clue":"..."}, ...] - Returns dict with keys A-Z 3..7 and non-empty clue. - """ - out = {} - - if isinstance(obj, dict): - items = list(obj.items()) - elif isinstance(obj, list): - items = [] - for it in obj: - if not isinstance(it, dict): - continue - raw_word = it.get("word") or it.get("WOORD") or it.get("Word") - clue = it.get("clue") or it.get("CLUE") or it.get("hint") or it.get("HINT") - items.append((raw_word, clue)) - else: - return out - - for raw_word, clue in items: - if not isinstance(raw_word, str) or not isinstance(clue, str): - continue - w = normalize_word(raw_word) - if not WORD_RE.fullmatch(w): - continue - clue = clue.strip() - if not clue: - continue - out[w] = clue - - return out - - -# ---- generator (no-touch) ---- -def make_grid(): - return [[EMPTY for _ in range(SIZE)] for _ in range(SIZE)] - - -def in_bounds(g, r, c): - return 0 <= r < len(g) and 0 <= c < len(g[0]) - - -def can_place_notouch(g, word, r, c, direction): - H, W = len(g), len(g[0]) - if r < 0 or c < 0: - return False - if direction == "horizontal" and c + len(word) > W: - return False - if direction == "vertical" and r + len(word) > H: - return False - - # no "glue" before/after - br = r if direction == "horizontal" else r - 1 - bc = c - 1 if direction == "horizontal" else c - if in_bounds(g, br, bc) and g[br][bc] != EMPTY: - return False - - ar = r if direction == "horizontal" else r + len(word) - ac = c + len(word) if direction == "horizontal" else c - if in_bounds(g, ar, ac) and g[ar][ac] != EMPTY: - return False - - for i, ch in enumerate(word): - rr = r if direction == "horizontal" else r + i - cc = c + i if direction == "horizontal" else c - cell = g[rr][cc] - crossing = cell != EMPTY - if crossing and cell != ch: - return False - - if not crossing: - if direction == "horizontal": - if in_bounds(g, rr - 1, cc) and g[rr - 1][cc] != EMPTY: return False - if in_bounds(g, rr + 1, cc) and g[rr + 1][cc] != EMPTY: return False - else: - if in_bounds(g, rr, cc - 1) and g[rr][cc - 1] != EMPTY: return False - if in_bounds(g, rr, cc + 1) and g[rr][cc + 1] != EMPTY: return False - return True - - -def place_word(g, word, r, c, direction): - for i, ch in enumerate(word): - rr = r if direction == "horizontal" else r + i - cc = c + i if direction == "horizontal" else c - g[rr][cc] = ch - - -def find_spots(g, word, placed): - spots = [] - for p in placed: - pw = p["word"] - for i, pch in enumerate(pw): - pr = p["row"] if p["direction"] == "horizontal" else p["row"] + i - pc = p["col"] + i if p["direction"] == "horizontal" else p["col"] - for j, wch in enumerate(word): - if wch != pch: - continue - direction = "vertical" if p["direction"] == "horizontal" else "horizontal" - r = pr if direction == "horizontal" else pr - j - c = pc - j if direction == "horizontal" else pc - if can_place_notouch(g, word, r, c, direction): - spots.append((r, c, direction)) - return spots - - -def generate_puzzle(wordcluemap, rnd): - words = sorted(wordcluemap.keys(), key=len, reverse=True) - g = make_grid() - placed = [] - - first = words[0] - sr = SIZE // 2 - sc = (SIZE - len(first)) // 2 - if not can_place_notouch(g, first, sr, sc, "horizontal"): - return None - place_word(g, first, sr, sc, "horizontal") - placed.append({"word": first, "clue": wordcluemap[first], "row": sr, "col": sc, "direction": "horizontal"}) - - for w in words[1:]: - spots = find_spots(g, w, placed) - rnd.shuffle(spots) - if not spots: - continue - r, c, d = spots[0] - place_word(g, w, r, c, d) - placed.append({"word": w, "clue": wordcluemap[w], "row": r, "col": c, "direction": d}) - - return {"grid": g, "placed": placed} - - -def export_format(puz, difficulty=1, rewards=None): - if rewards is None: - rewards = {"coins": 50, "stars": 2, "hints": 1} - - g = puz["grid"] - placed = puz["placed"] - H, W = len(g), len(g[0]) - - cells = [] - for p in placed: - for i in range(len(p["word"])): - r = p["row"] if p["direction"] == "horizontal" else p["row"] + i - c = p["col"] + i if p["direction"] == "horizontal" else p["col"] - cells.append((r, c)) - # arrow cell: before the start - ar = p["row"] if p["direction"] == "horizontal" else p["row"] - 1 - ac = p["col"] - 1 if p["direction"] == "horizontal" else p["col"] - cells.append((ar, ac)) - - minR = min(r for r, _ in cells) - 1 - minC = min(c for _, c in cells) - 1 - maxR = max(r for r, _ in cells) + 1 - maxC = max(c for _, c in cells) + 1 - - def ch_at(r, c): - if r < 0 or c < 0 or r >= H or c >= W: - return "#" - ch = g[r][c] - return "#" if ch == EMPTY else ch - - gridv2 = [] - for r in range(minR, maxR + 1): - row = "".join(ch_at(r, c) for c in range(minC, maxC + 1)) - gridv2.append(row) - - words_out = [] - for p in placed: - arrowRow = (p["row"] if p["direction"] == "horizontal" else p["row"] - 1) - minR - arrowCol = (p["col"] - 1 if p["direction"] == "horizontal" else p["col"]) - minC - words_out.append({ - "word": p["word"], - "clue": p["clue"], - "startRow": p["row"] - minR, - "startCol": p["col"] - minC, - "direction": p["direction"], - "answer": p["word"], - "arrowRow": arrowRow, - "arrowCol": arrowCol, - }) - - return {"gridv2": gridv2, "words": words_out, "difficulty": difficulty, "rewards": rewards} - - -def list_models(base_url): - try: - data = json.loads(http_get(f"{base_url}/models").decode("utf-8")) - return [m.get("id") for m in data.get("data", []) if m.get("id")] - except Exception: - return [] - - -def llm_make_wordcluemap(base_url, model, title, desc, n_words=12): - prompt = f""" -Geef ALLEEN een JSON object terug (geen array, geen markdown). -Formaat exact: -{{ - "WOORD": "clue", - ... -}} - -REGELS: -- WOORD: alleen letters A-Z, geen streepjes/cijfers, lengte 3..7. -- Gebruik KORTE, GEBRUIKELIJKE Nederlandse woorden (geen jargon, geen moeilijke termen). -- Clue: korte, duidelijke hint in het Nederlands. -- Maak {n_words} items. -Thema: {title} -Context: {desc[:260]} -""".strip() - - payload = { - "model": model, - "temperature": 0.7, - "messages": [ - {"role": "system", "content": "Return STRICT JSON object only."}, - {"role": "user", "content": prompt}, - ], - } - - data = http_post_json(f"{base_url}/chat/completions", payload) - content = data["choices"][0]["message"]["content"] - obj = extract_first_json(content) - wc = sanitize_wordcluemap(obj) - - # Aggressive repair for short words - if len(wc) < MIN_ACCEPT_WORDS: - repair = f""" -Zet dit om naar een STRICT JSON OBJECT (geen array) "WOORD":"clue". -KRITIEK: -- WOORD: A-Z only, lengte 3..7. GEEN lange woorden! -- Gebruik ALLEEN korte, bekende Nederlandse woorden bij twijfel. -- Vervang ongeldige/moeilijke woorden door veelvoorkomende synoniemen. -Input: -{content} -""".strip() - - payload["messages"] = [ - {"role": "system", "content": "Return STRICT JSON object only."}, - {"role": "user", "content": repair}, - ] - data = http_post_json(f"{base_url}/chat/completions", payload) - content2 = data["choices"][0]["message"]["content"] - obj2 = extract_first_json(content2) - wc2 = sanitize_wordcluemap(obj2) - if len(wc2) > len(wc): - wc = wc2 - - return wc - - -def main(): - base_url = env("LM_STUDIO_BASE_URL", "http://192.168.1.159:1234/v1") - out_dir = env("OUT_DIR", "/data/puzzles") - per_day = int(env("PUZZLES_PER_DAY", "3")) - today = dt.date.today().isoformat() - rnd = random.Random(today) - - os.makedirs(out_dir, exist_ok=True) - - items = [] - for f in FEEDS: - try: - items.extend(fetch_rss_items(f)) - except Exception: - pass - if not items: - raise SystemExit("No RSS items found") - - models = list_models(base_url) - model = env("LM_MODEL", models[0] if models else "model-identifier") - - made = 0 - for idx in range(1, per_day + 1): - title, desc = rnd.choice(items) - slug = safe_slug(title) - - wc = llm_make_wordcluemap(base_url, model, title, desc, n_words=TARGET_WORDS) - # Stricter validation: need more words since they're shorter - if len(wc) < MIN_ACCEPT_WORDS: - continue - - puz = generate_puzzle(wc, rnd) - # Require at least 7 placed words for a decent puzzle - if not puz or len(puz["placed"]) < 7: - continue - - exported = export_format(puz, difficulty=1, rewards={"coins": 50, "stars": 2, "hints": 1}) - fn = f"crossword_{today}_{idx:02d}_{slug}.json" - path = os.path.join(out_dir, fn) - with open(path, "w", encoding="utf-8") as fp: - json.dump(exported, fp, ensure_ascii=False, indent=2) - made += 1 - - # index.json (handig voor je frontend) - files = sorted([f for f in os.listdir(out_dir) if f.startswith(f"crossword_{today}_") and f.endswith(".json")]) - with open(os.path.join(out_dir, "index.json"), "w", encoding="utf-8") as fp: - json.dump({"date": today, "files": files}, fp, ensure_ascii=False, indent=2) - - print(f"Generated {made} puzzles for {today}") - - -if __name__ == "__main__": - main() \ No newline at end of file