Gather data
This commit is contained in:
@@ -11,7 +11,7 @@ WORKDIR /app
|
||||
|
||||
# Copy source files
|
||||
COPY src/ /app/src/
|
||||
COPY word-list.txt /app/word-list.txt
|
||||
COPY export_real_words_with_hints.csv /app/export_real_words_with_hints.csv
|
||||
COPY compile.sh /app/compile.sh
|
||||
COPY docker-entrypoint.sh /app/docker-entrypoint.sh
|
||||
COPY crontab /app/crontab
|
||||
@@ -19,7 +19,8 @@ COPY crontab /app/crontab
|
||||
# Compile Java code
|
||||
RUN chmod +x /app/compile.sh && \
|
||||
mkdir -p /app/target && \
|
||||
javac -d /app/target src/puzzle/*.java
|
||||
cp src/puzzle/postgresql-42.7.8.jar /app/target/ && \
|
||||
javac -cp /app/target/postgresql-42.7.8.jar -d /app/target src/puzzle/*.java
|
||||
|
||||
# Create output directory
|
||||
RUN mkdir -p /data/puzzles
|
||||
|
||||
11
README.md
11
README.md
@@ -25,7 +25,7 @@ A high-performance Java-based puzzle generator with theme-based word filtering a
|
||||
- Edit distance similarity matching
|
||||
- Automatic theme detection
|
||||
|
||||
3. **DailyGenerator.java** - Daily puzzle automation
|
||||
3. **Main.java** - Core generator and daily automation
|
||||
- Generates themed puzzles
|
||||
- JSON output with metadata
|
||||
- Index file generation
|
||||
@@ -47,7 +47,6 @@ A high-performance Java-based puzzle generator with theme-based word filtering a
|
||||
java -cp ~/dev/.target puzzle.Main --seed 42 --pop 18 --gens 100
|
||||
|
||||
# Generate daily puzzles
|
||||
java -cp ~/dev/.target puzzle.DailyGenerator
|
||||
```
|
||||
|
||||
### Docker Deployment
|
||||
@@ -153,13 +152,13 @@ Puzzles are generated daily at **3:15 AM** (configurable in `crontab`).
|
||||
Edit `crontab` to change schedule:
|
||||
```cron
|
||||
# Daily at 3:15 AM
|
||||
15 3 * * * java -cp /app/target puzzle.DailyGenerator
|
||||
15 3 * * * java -cp /app/target puzzle.Main
|
||||
|
||||
# Every 6 hours
|
||||
0 */6 * * * java -cp /app/target puzzle.DailyGenerator
|
||||
0 */6 * * * java -cp /app/target puzzle.Main
|
||||
|
||||
# Weekly on Monday at 1 AM
|
||||
0 1 * * 1 java -cp /app/target puzzle.DailyGenerator
|
||||
0 1 * * 1 java -cp /app/target puzzle.Main
|
||||
```
|
||||
|
||||
## Word List Format
|
||||
@@ -208,7 +207,7 @@ The Java version maintains module-wise compatibility with the Node.js generator:
|
||||
|------------------------|-------------------------------------|
|
||||
| `swedish_generator.js` | `SwedishGenerator.java` |
|
||||
| `export_format.js` | `ExportFormat.java` |
|
||||
| `main.js` | `Main.java` + `DailyGenerator.java` |
|
||||
| `main.js` | `Main.java` |
|
||||
| N/A | `ThemeGraph.java` (new) |
|
||||
|
||||
## Volume Management
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
TARGET=${1:-~/dev/.target}
|
||||
mkdir -p "$TARGET"
|
||||
javac -d "$TARGET" src/puzzle/*.java
|
||||
javac -cp src/puzzle/postgresql-42.7.8.jar -d "$TARGET" src/puzzle/*.java
|
||||
|
||||
2
crontab
2
crontab
@@ -1,2 +1,2 @@
|
||||
# Generate puzzles daily at 3:15 AM
|
||||
15 3 * * * java -cp /app/target puzzle.DailyGenerator >> /var/log/cron.log 2>&1
|
||||
15 3 * * * java -cp /app/target puzzle.Main >> /var/log/cron.log 2>&1
|
||||
|
||||
@@ -1,18 +1,22 @@
|
||||
services:
|
||||
|
||||
puzzle_gen:
|
||||
puzzle_create_one:
|
||||
build:
|
||||
context: ${PUZZLE_ROOT_DIR:-/opt/apps/puzzle}
|
||||
dockerfile: tools/puzzle-gen/Dockerfile
|
||||
container_name: puzzle_gen
|
||||
restart: unless-stopped
|
||||
dockerfile: Dockerfile
|
||||
container_name: puzzle_create_one
|
||||
restart: "no"
|
||||
networks: [ traefik_net ]
|
||||
environment:
|
||||
TZ: Europe/Amsterdam
|
||||
OUT_DIR: /data/puzzle
|
||||
WORDS_PATH: "/app/export_real_words_with_hints.csv"
|
||||
LM_STUDIO_BASE_URL: "http://192.168.1.159:1234/v1"
|
||||
PUZZLES_PER_DAY: "3"
|
||||
GENERATE_ON_START: "true"
|
||||
START_CLASS: "puzzle.Main"
|
||||
SCORES_PATH: "/app/export_real_words_with_hints.csv"
|
||||
volumes:
|
||||
- puzzles_data:/data/puzzles:rw
|
||||
- puzzles_data:/data/puzzle:rw
|
||||
|
||||
update_hints:
|
||||
build:
|
||||
@@ -45,13 +49,15 @@ services:
|
||||
networks: [ traefik_net ]
|
||||
environment:
|
||||
TZ: Europe/Amsterdam
|
||||
OUT_DIR: /data/puzzles
|
||||
OUT_DIR: /data/puzzle
|
||||
WORDS_PATH: "/app/export_real_words_with_hints.csv"
|
||||
SCORES_PATH: "/app/export_real_words_with_hints.csv"
|
||||
PUZZLES_PER_DAY: "3"
|
||||
LM_STUDIO_BASE_URL: "http://192.168.1.159:1234/v1"
|
||||
THEME_FILTER: "true"
|
||||
THEME_MIN_SCORE: "0.6"
|
||||
volumes:
|
||||
- puzzles_data:/data/puzzles:rw
|
||||
- puzzles_data:/data/puzzle:rw
|
||||
|
||||
volumes:
|
||||
puzzles_data:
|
||||
|
||||
@@ -8,12 +8,14 @@ echo "Puzzles per day: ${PUZZLES_PER_DAY}"
|
||||
echo ""
|
||||
|
||||
# Ensure output directory exists
|
||||
mkdir -p "${OUT_DIR}"
|
||||
mkdir -p "${OUT_DIR}/puzzles"
|
||||
|
||||
# Generate initial puzzle on startup (optional)
|
||||
if [ "${GENERATE_ON_START}" = "true" ]; then
|
||||
echo "Generating initial puzzles..."
|
||||
java -cp /app/target puzzle.DailyGenerator
|
||||
START_CLASS=${START_CLASS:-puzzle.Main}
|
||||
echo "Running ${START_CLASS}..."
|
||||
java -cp /app/target/postgresql-42.7.8.jar:/app/target ${START_CLASS}
|
||||
echo ""
|
||||
fi
|
||||
|
||||
|
||||
13
package.json
Normal file
13
package.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"name": "puzzle-generator",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"better-sqlite3": "^12.5.0"
|
||||
}
|
||||
}
|
||||
67
pom.xml
Normal file
67
pom.xml
Normal file
@@ -0,0 +1,67 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>puzzle</groupId>
|
||||
<artifactId>tools</artifactId>
|
||||
<version>0.0.1</version>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>25</maven.compiler.source>
|
||||
<maven.compiler.target>25</maven.compiler.target>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.postgresql</groupId>
|
||||
<artifactId>postgresql</artifactId>
|
||||
<version>42.7.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.xerial</groupId>
|
||||
<artifactId>sqlite-jdbc</artifactId>
|
||||
<version>3.46.1.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
<version>2.0.13</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Kies precies één binding: -->
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-simple</artifactId>
|
||||
<version>2.0.13</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.6.0</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<createDependencyReducedPom>false</createDependencyReducedPom>
|
||||
<transformers>
|
||||
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||
<mainClass>HintScores</mainClass>
|
||||
</transformer>
|
||||
</transformers>
|
||||
<finalName>tools-all</finalName>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
142
py/import.py
142
py/import.py
@@ -1,142 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
RE_ASCII_WORD = re.compile(r"^[A-Za-z]+$")
|
||||
RE_SPACE = re.compile(r"\s+")
|
||||
RE_PARENS = re.compile(r"\s*\([^)]*\)\s*") # verwijder (labels)
|
||||
RE_BRACKETS = re.compile(r"\s*\[[^]]*]\s*") # verwijder [labels]
|
||||
|
||||
def clean_hint(s: str) -> str:
|
||||
s = s.strip()
|
||||
s = RE_BRACKETS.sub(" ", s)
|
||||
s = RE_PARENS.sub(" ", s)
|
||||
s = s.replace("’", "'")
|
||||
s = RE_SPACE.sub(" ", s).strip(" -;:,.\t")
|
||||
return s
|
||||
|
||||
def pick_gloss(obj: dict) -> tuple[str | None, str | None]:
|
||||
"""Return (hint, pos) from a Wiktextract JSON line."""
|
||||
pos = obj.get("pos")
|
||||
senses = obj.get("senses") or []
|
||||
best = None
|
||||
|
||||
for s in senses:
|
||||
glosses = s.get("glosses") or []
|
||||
if not glosses:
|
||||
continue
|
||||
# Neem de eerste gloss die "normaal" oogt
|
||||
for g in glosses:
|
||||
if not isinstance(g, str):
|
||||
continue
|
||||
g2 = clean_hint(g)
|
||||
if len(g2) < 3:
|
||||
continue
|
||||
best = g2
|
||||
break
|
||||
if best:
|
||||
break
|
||||
|
||||
return best, pos
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--db", required=True, help="pad naar jouw sqlite db")
|
||||
ap.add_argument("--jsonl", required=True, help="pad naar nl-extract.jsonl")
|
||||
ap.add_argument("--minlen", type=int, default=2)
|
||||
ap.add_argument("--maxlen", type=int, default=8)
|
||||
ap.add_argument("--maxhint", type=int, default=80)
|
||||
args = ap.parse_args()
|
||||
|
||||
db_path = Path(args.db)
|
||||
jsonl_path = Path(args.jsonl)
|
||||
|
||||
con = sqlite3.connect(db_path)
|
||||
cur = con.cursor()
|
||||
|
||||
# speed pragmas (alleen tijdens import)
|
||||
cur.execute("PRAGMA journal_mode=WAL;")
|
||||
cur.execute("PRAGMA synchronous=NORMAL;")
|
||||
cur.execute("PRAGMA temp_store=MEMORY;")
|
||||
|
||||
cur.execute("""
|
||||
CREATE TABLE IF NOT EXISTS hints (
|
||||
word TEXT NOT NULL,
|
||||
hint TEXT NOT NULL,
|
||||
source TEXT NOT NULL DEFAULT 'wiktionary',
|
||||
pos TEXT,
|
||||
quality INTEGER NOT NULL DEFAULT 80,
|
||||
PRIMARY KEY (word, hint, source)
|
||||
);
|
||||
""")
|
||||
cur.execute("CREATE INDEX IF NOT EXISTS idx_hints_word ON hints(word);")
|
||||
con.commit()
|
||||
|
||||
batch = []
|
||||
inserted = 0
|
||||
seen = 0
|
||||
|
||||
con.execute("BEGIN;")
|
||||
with jsonl_path.open("r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# Kaikki/Wiktextract: vaak lang_code = "nl" en lang = "Dutch"
|
||||
lang_code = obj.get("lang_code")
|
||||
if lang_code and lang_code != "nl":
|
||||
continue
|
||||
|
||||
word = obj.get("word")
|
||||
if not word:
|
||||
continue
|
||||
|
||||
word_up = word.upper().strip()
|
||||
if not (args.minlen <= len(word_up) <= args.maxlen):
|
||||
continue
|
||||
if not RE_ASCII_WORD.match(word_up):
|
||||
continue
|
||||
|
||||
hint, pos = pick_gloss(obj)
|
||||
if not hint:
|
||||
continue
|
||||
|
||||
# Hint kort houden
|
||||
hint = hint[: args.maxhint].rstrip()
|
||||
|
||||
# Simpele kwaliteit: iets hoger als POS bekend is
|
||||
quality = 85 if pos else 80
|
||||
|
||||
batch.append((word_up, hint, "wiktionary", pos, quality))
|
||||
seen += 1
|
||||
|
||||
if len(batch) >= 2000:
|
||||
cur.executemany(
|
||||
"INSERT OR IGNORE INTO hints(word,hint,source,pos,quality) VALUES (?,?,?,?,?)",
|
||||
batch
|
||||
)
|
||||
inserted += cur.rowcount if cur.rowcount != -1 else 0
|
||||
batch.clear()
|
||||
|
||||
if batch:
|
||||
cur.executemany(
|
||||
"INSERT OR IGNORE INTO hints(word,hint,source,pos,quality) VALUES (?,?,?,?,?)",
|
||||
batch
|
||||
)
|
||||
inserted += cur.rowcount if cur.rowcount != -1 else 0
|
||||
|
||||
con.commit()
|
||||
con.close()
|
||||
|
||||
print(f"Done. processed_lines≈{seen}, inserted≈{inserted} (OR IGNORE kan inserts verlagen).")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,9 +0,0 @@
|
||||
# PyTorch with CUDA 12.4 support
|
||||
--index-url https://download.pytorch.org/whl/cu124
|
||||
torch
|
||||
torchvision
|
||||
torchaudio
|
||||
|
||||
# Transformers and marker
|
||||
transformers
|
||||
marker-pdf
|
||||
1
run.sh
1
run.sh
@@ -1,3 +1,2 @@
|
||||
#!/bin/bash
|
||||
java -cp ~/dev/.target puzzle.Main "$@"
|
||||
java -cp ~/dev/.target puzzle.DailyGenerator "$@"
|
||||
|
||||
@@ -1,222 +0,0 @@
|
||||
package puzzle;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.*;
|
||||
import static puzzle.ExportFormat.*;
|
||||
|
||||
public class ClueGenerator {
|
||||
|
||||
private static final String OLLAMA_URL = "http://localhost:11434/api/chat";
|
||||
private static final String MODEL = "qwen2.5:14b";
|
||||
private static final String HINTS_FILE = "/home/mike/dev/puzzle-generator/nl_score_hints.csv";
|
||||
private static Map<String, String> prebuiltClues = null;
|
||||
|
||||
private static synchronized void ensurePrebuiltCluesLoaded() {
|
||||
if (prebuiltClues != null) return;
|
||||
prebuiltClues = new HashMap<>();
|
||||
try {
|
||||
var lines = Files.readAllLines(Path.of(HINTS_FILE), StandardCharsets.UTF_8);
|
||||
for (var line : lines) {
|
||||
var parts = line.split(",", 4);
|
||||
if (parts.length >= 4) {
|
||||
var word = parts[0].trim().toUpperCase(Locale.ROOT);
|
||||
var rawClue = parts[3].trim();
|
||||
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
|
||||
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
|
||||
}
|
||||
if (!word.isEmpty() && !rawClue.isEmpty()) {
|
||||
prebuiltClues.put(word, rawClue);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.err.println("Warning: " + HINTS_FILE + " not found or could not be read.");
|
||||
}
|
||||
}
|
||||
|
||||
public static ExportedPuzzle applyClues(ExportedPuzzle puzzle) {
|
||||
if (puzzle == null || puzzle.words().isEmpty()) {
|
||||
return puzzle;
|
||||
}
|
||||
|
||||
ensurePrebuiltCluesLoaded();
|
||||
|
||||
Map<String, String> finalClueMap = new HashMap<>();
|
||||
List<String> wordsMissingClues = new ArrayList<>();
|
||||
|
||||
for (var w : puzzle.words()) {
|
||||
var wordUpper = w.word().toUpperCase(Locale.ROOT);
|
||||
if (prebuiltClues.containsKey(wordUpper)) {
|
||||
finalClueMap.put(w.word(), prebuiltClues.get(wordUpper));
|
||||
} else {
|
||||
wordsMissingClues.add(w.word());
|
||||
}
|
||||
}
|
||||
|
||||
if (!wordsMissingClues.isEmpty()) {
|
||||
var generatedClues = generateClues(wordsMissingClues);
|
||||
finalClueMap.putAll(generatedClues);
|
||||
}
|
||||
|
||||
List<WordOut> wordsWithClues = new ArrayList<>();
|
||||
for (var w : puzzle.words()) {
|
||||
var clue = finalClueMap.getOrDefault(w.word(), w.word());
|
||||
wordsWithClues.add(new WordOut(
|
||||
w.word(),
|
||||
clue,
|
||||
w.startRow(),
|
||||
w.startCol(),
|
||||
w.direction(),
|
||||
w.answer(),
|
||||
w.arrowRow(),
|
||||
w.arrowCol(),
|
||||
w.isReversed(),
|
||||
w.complex()
|
||||
));
|
||||
}
|
||||
|
||||
return new ExportedPuzzle(puzzle.gridv2(), wordsWithClues, puzzle.difficulty(), puzzle.rewards());
|
||||
}
|
||||
|
||||
public static Map<String, String> generateClues(List<String> words) {
|
||||
if (words == null || words.isEmpty()) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
|
||||
var prompt = createCluePrompt(words);
|
||||
try {
|
||||
var jsonRequest = String.format(
|
||||
"{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"stream\":false,\"temperature\":0.7}",
|
||||
MODEL, escapeJson(prompt)
|
||||
);
|
||||
|
||||
var responseBody = curlPostJson(OLLAMA_URL, jsonRequest, 120);
|
||||
var content = extractChatContent(responseBody);
|
||||
|
||||
if (content == null || content.isEmpty()) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
|
||||
return parseCluesFromReply(words, content);
|
||||
} catch (Exception e) {
|
||||
System.err.println("Failed to generate clues: " + e.getMessage());
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
}
|
||||
|
||||
private static String createCluePrompt(List<String> words) {
|
||||
return "Je bent een expert in het maken van kruiswoordpuzzels. Geef voor elk van de onderstaande woorden een korte, uitdagende maar duidelijke cryptische of beschrijvende aanwijzing in het Nederlands.\n\n" +
|
||||
"Output ALLEEN in dit formaat:\n" +
|
||||
"woord1:aanwijzing\n" +
|
||||
"woord2:aanwijzing\n\n" +
|
||||
"GEEN andere tekst of uitleg. Sla GEEN woorden over.\n\n" +
|
||||
"Lijst:\n" +
|
||||
String.join("\n", words);
|
||||
}
|
||||
|
||||
private static Map<String, String> parseCluesFromReply(List<String> expectedWords, String reply) {
|
||||
Map<String, String> wordClueMap = new HashMap<>();
|
||||
var lines = reply.split("\n");
|
||||
|
||||
for (var line : lines) {
|
||||
line = line.trim();
|
||||
if (line.contains(":")) {
|
||||
var parts = line.split(":", 2);
|
||||
if (parts.length == 2) {
|
||||
var wordPart = parts[0].trim().replaceAll("^[\\d+.)*\\-\\s]+", "").toLowerCase();
|
||||
var clue = parts[1].trim();
|
||||
if (!clue.isEmpty()) {
|
||||
wordClueMap.put(wordPart, clue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, String> results = new HashMap<>();
|
||||
for (var word : expectedWords) {
|
||||
var clue = wordClueMap.get(word.toLowerCase());
|
||||
if (clue != null) {
|
||||
results.put(word, clue);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
private static String curlPostJson(String url, String jsonBody, int timeoutSeconds) throws Exception {
|
||||
var tempFile = Files.createTempFile("clue-request-", ".json");
|
||||
try {
|
||||
Files.writeString(tempFile, jsonBody, StandardCharsets.UTF_8);
|
||||
List<String> cmd = new ArrayList<>();
|
||||
cmd.add("curl");
|
||||
cmd.add("-fsSL");
|
||||
cmd.add("--connect-timeout");
|
||||
cmd.add("10");
|
||||
cmd.add("--max-time");
|
||||
cmd.add(String.valueOf(timeoutSeconds));
|
||||
cmd.add("-H");
|
||||
cmd.add("Content-Type: application/json");
|
||||
cmd.add("-d");
|
||||
cmd.add("@" + tempFile);
|
||||
cmd.add(url);
|
||||
|
||||
var p = new ProcessBuilder(cmd)
|
||||
.redirectErrorStream(true)
|
||||
.start();
|
||||
|
||||
var bytes = p.getInputStream().readAllBytes();
|
||||
var code = p.waitFor();
|
||||
|
||||
if (code != 0) {
|
||||
throw new IOException("curl POST failed (" + code + ") url=" + url + "\nOutput:\n" +
|
||||
new String(bytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
return new String(bytes, StandardCharsets.UTF_8);
|
||||
} finally {
|
||||
Files.deleteIfExists(tempFile);
|
||||
}
|
||||
}
|
||||
|
||||
private static String extractChatContent(String json) {
|
||||
if (json == null) return null;
|
||||
var choices = json.indexOf("\"choices\"");
|
||||
var p = (choices >= 0) ? choices : 0;
|
||||
var i = json.indexOf("\"content\"", p);
|
||||
if (i < 0) {
|
||||
// Fallback for Ollama non-chat format if needed, but we used /api/chat
|
||||
// Ollama /api/chat returns {"model":"...","message":{"role":"assistant","content":"..."}}
|
||||
i = json.indexOf("\"content\"");
|
||||
if (i < 0) return null;
|
||||
}
|
||||
var colon = json.indexOf(':', i);
|
||||
if (colon < 0) return null;
|
||||
var q = json.indexOf('"', colon + 1);
|
||||
if (q < 0) return null;
|
||||
var sb = new StringBuilder();
|
||||
var esc = false;
|
||||
for (var k = q + 1; k < json.length(); k++) {
|
||||
var ch = json.charAt(k);
|
||||
if (esc) {
|
||||
if (ch == 'n') sb.append('\n');
|
||||
else if (ch == 't') sb.append('\t');
|
||||
else if (ch == 'r') sb.append('\r');
|
||||
else sb.append(ch);
|
||||
esc = false;
|
||||
} else {
|
||||
if (ch == '\\') esc = true;
|
||||
else if (ch == '"') break;
|
||||
else sb.append(ch);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static String escapeJson(String str) {
|
||||
return str.replace("\\", "\\\\")
|
||||
.replace("\"", "\\\"")
|
||||
.replace("\n", "\\n");
|
||||
}
|
||||
}
|
||||
@@ -1,532 +0,0 @@
|
||||
package puzzle;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.*;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.*;
|
||||
import java.io.*;
|
||||
import java.time.*;
|
||||
import java.util.concurrent.atomic.*;
|
||||
|
||||
/**
|
||||
* CONCURRENT MULTI-ENDPOINT Dutch Wordlist Scorer
|
||||
* Distributes batches across Ollama, LM-Studio, and a third endpoint simultaneously
|
||||
*/
|
||||
public class ConcurrentWordScorer {
|
||||
|
||||
// ===== CONFIGURATION =====
|
||||
private static final String INPUT_WORDLIST = "word-list.txt";
|
||||
private static final String OUTPUT_SCORES = "word_scores.csv";
|
||||
private static final int BATCH_SIZE = 10; // Even smaller for the difficult remaining words
|
||||
private static final int MAX_RETRIES = 3;
|
||||
|
||||
// Define all three endpoints
|
||||
private static final LLMEndpoint[] ENDPOINTS = {
|
||||
new OllamaEndpoint(),
|
||||
new LMStudioEndpoint(),
|
||||
new LMStudioEndpoint("LM-Studio", "http://192.168.1.74:1234/v1/chat/completions",
|
||||
"mistralai/mistral-nemo-instruct-2407", 1)
|
||||
// new CustomEndpoint()
|
||||
};
|
||||
|
||||
// ===== ENDPOINT CLASSES =====
|
||||
abstract static class LLMEndpoint {
|
||||
|
||||
String name;
|
||||
String baseUrl;
|
||||
String model;
|
||||
Semaphore rateLimiter; // Per-endpoint rate limiting
|
||||
|
||||
int maxConcurrent;
|
||||
|
||||
LLMEndpoint(String name, String baseUrl, String model, int maxConcurrent) {
|
||||
this.name = name;
|
||||
this.baseUrl = baseUrl;
|
||||
this.model = model;
|
||||
this.maxConcurrent = maxConcurrent;
|
||||
this.rateLimiter = new Semaphore(maxConcurrent);
|
||||
}
|
||||
|
||||
abstract String buildRequestJson(String prompt);
|
||||
abstract String extractResponseContent(String responseBody);
|
||||
|
||||
// Rate-limited request execution
|
||||
List<WordScore> execute(List<String> batch) throws Exception {
|
||||
rateLimiter.acquire(); // Wait for slot
|
||||
try {
|
||||
return executeInternal(batch);
|
||||
} finally {
|
||||
rateLimiter.release();
|
||||
}
|
||||
}
|
||||
|
||||
private List<WordScore> executeInternal(List<String> batch) throws Exception {
|
||||
var prompt = createScoringPrompt(batch);
|
||||
var jsonRequest = buildRequestJson(prompt);
|
||||
|
||||
var responseBody = curlPostJson(baseUrl, jsonRequest, 120);
|
||||
var content = extractResponseContent(responseBody);
|
||||
|
||||
if (content == null || content.isEmpty()) {
|
||||
throw new IOException("[" + name + "] Empty response content");
|
||||
}
|
||||
|
||||
return parseScoresFromReply(batch, content, name);
|
||||
}
|
||||
}
|
||||
|
||||
static class OllamaEndpoint
|
||||
extends LLMEndpoint {
|
||||
|
||||
OllamaEndpoint() {
|
||||
super("Ollama", "http://localhost:11434/api/chat",
|
||||
"qwen2.5:14b", 1); // 2 concurrent requests
|
||||
}
|
||||
|
||||
@Override String buildRequestJson(String prompt) {
|
||||
return String.format("{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"stream\":false,\"temperature\":0.1}",
|
||||
model, escapeJson(prompt));
|
||||
}
|
||||
|
||||
@Override String extractResponseContent(String responseBody) {
|
||||
// Ollama uses "message" -> "content"
|
||||
var start = responseBody.indexOf("\"content\":\"") + 11;
|
||||
var end = responseBody.indexOf("\"", start);
|
||||
if (start < 11 || end < 0) return "";
|
||||
return responseBody.substring(start, end).replace("\\n", "\n");
|
||||
}
|
||||
}
|
||||
|
||||
static class LMStudioEndpoint
|
||||
extends LLMEndpoint {
|
||||
|
||||
LMStudioEndpoint() {
|
||||
super("LM-Studio", "http://192.168.1.159:1234/v1/chat/completions",
|
||||
"mistralai/mistral-nemo-instruct-2407", 1); // LM-Studio can handle more
|
||||
}
|
||||
public LMStudioEndpoint(String s, String url, String s1, int i) {
|
||||
super(
|
||||
s, url, s1, i
|
||||
);
|
||||
}
|
||||
|
||||
@Override String buildRequestJson(String prompt) {
|
||||
return String.format("{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"temperature\":0.1,\"max_tokens\":2048}",
|
||||
model, escapeJson(prompt));
|
||||
}
|
||||
|
||||
@Override String extractResponseContent(String responseBody) {
|
||||
return extractChatContent(responseBody);
|
||||
}
|
||||
}
|
||||
|
||||
static class CustomEndpoint
|
||||
extends LLMEndpoint {
|
||||
|
||||
CustomEndpoint() {
|
||||
super("Custom", "http://192.168.1.74:1234/v1/chat/completions",
|
||||
"qwen2.5-vl-7b-abliterated-caption-it_gguf", 2);
|
||||
}
|
||||
|
||||
@Override String buildRequestJson(String prompt) {
|
||||
// Adapt to your third endpoint's format
|
||||
return new LMStudioEndpoint().buildRequestJson(prompt);
|
||||
}
|
||||
|
||||
@Override String extractResponseContent(String responseBody) {
|
||||
return new LMStudioEndpoint().extractResponseContent(responseBody);
|
||||
}
|
||||
}
|
||||
|
||||
// ===== MAIN COORDINATOR =====
|
||||
static void main(String[] args) throws Exception {
|
||||
System.out.println("=== CONCURRENT 3-Endpoint Scorer ===");
|
||||
for (var ep : ENDPOINTS) {
|
||||
System.out.printf("- %s: %s%n", ep.name, ep.baseUrl);
|
||||
}
|
||||
System.out.println();
|
||||
|
||||
cleanupOutputFile();
|
||||
|
||||
// Load work queue
|
||||
var allWords = Files.readAllLines(Paths.get(INPUT_WORDLIST));
|
||||
var scoredWords = loadAlreadyScoredWords();
|
||||
var workQueue = createWorkQueue(allWords, scoredWords);
|
||||
|
||||
System.out.printf("Total words: %d | Already scored: %d | Remaining: %d%n%n",
|
||||
allWords.size(), scoredWords.size(), workQueue.size());
|
||||
|
||||
if (workQueue.isEmpty()) {
|
||||
System.out.println("All done!");
|
||||
return;
|
||||
}
|
||||
|
||||
// Start result writer thread
|
||||
BlockingQueue<List<WordScore>> resultQueue = new LinkedBlockingQueue<>();
|
||||
var writerThread = startResultWriter(resultQueue);
|
||||
|
||||
// Start worker threads
|
||||
var totalThreads = 0;
|
||||
for (var ep : ENDPOINTS) totalThreads += ep.maxConcurrent;
|
||||
|
||||
var executor = Executors.newFixedThreadPool(totalThreads);
|
||||
var totalProcessed = new AtomicInteger(scoredWords.size());
|
||||
|
||||
for (var endpoint : ENDPOINTS) {
|
||||
for (var i = 0; i < endpoint.maxConcurrent; i++) {
|
||||
executor.submit(() -> {
|
||||
processBatches(endpoint, workQueue, resultQueue, totalProcessed, allWords.size());
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for completion
|
||||
executor.shutdown();
|
||||
executor.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
|
||||
|
||||
// Signal writer to stop
|
||||
resultQueue.put(Collections.singletonList(new WordScore(null, 0, "STOP")));
|
||||
writerThread.join();
|
||||
|
||||
// Update hints in the database
|
||||
|
||||
System.out.println("\n✓ All endpoints finished!");
|
||||
}
|
||||
|
||||
// ===== WORKER THREAD LOGIC =====
|
||||
private static void processBatches(LLMEndpoint endpoint,
|
||||
BlockingQueue<WorkItem> workQueue,
|
||||
BlockingQueue<List<WordScore>> resultQueue,
|
||||
AtomicInteger totalProcessed,
|
||||
int totalWords) {
|
||||
|
||||
System.out.printf("[%s] Worker started%n", endpoint.name);
|
||||
|
||||
while (!Thread.currentThread().isInterrupted()) {
|
||||
try {
|
||||
var work = workQueue.poll(1, TimeUnit.SECONDS);
|
||||
if (work == null) {
|
||||
if (workQueue.isEmpty()) break; // No more work in queue
|
||||
continue;
|
||||
}
|
||||
|
||||
var scores = processWithRetry(endpoint, work.batch);
|
||||
|
||||
// Add metadata
|
||||
scores.forEach(s -> {
|
||||
s.endpoint = endpoint.name;
|
||||
s.batchId = work.batchId;
|
||||
});
|
||||
|
||||
resultQueue.put(scores);
|
||||
|
||||
// Progress update
|
||||
var processed = totalProcessed.addAndGet(scores.size());
|
||||
if (processed % 100 < BATCH_SIZE) { // Reduce console spam
|
||||
System.out.printf("Progress: %d/%d (%.1f%%)%n",
|
||||
processed, totalWords, (processed * 100.0 / totalWords));
|
||||
}
|
||||
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
break;
|
||||
} catch (Exception e) {
|
||||
System.err.printf("[%s] Fatal error: %s%n", endpoint.name, e.getMessage());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
System.out.printf("[%s] Worker stopped%n", endpoint.name);
|
||||
}
|
||||
|
||||
private static List<WordScore> processWithRetry(LLMEndpoint endpoint, List<String> batch) {
|
||||
var retries = 0;
|
||||
|
||||
while (retries < MAX_RETRIES) {
|
||||
try {
|
||||
return endpoint.execute(batch);
|
||||
} catch (Exception e) {
|
||||
retries++;
|
||||
System.err.printf("[%s] Attempt %d/%d failed: %s%n",
|
||||
endpoint.name, retries, MAX_RETRIES, e.getMessage());
|
||||
|
||||
if (retries >= MAX_RETRIES) {
|
||||
return createFailedScores(batch, endpoint.name);
|
||||
}
|
||||
|
||||
try {
|
||||
Thread.sleep(2000L * retries);
|
||||
} catch (InterruptedException ie) {
|
||||
Thread.currentThread().interrupt();
|
||||
return createFailedScores(batch, endpoint.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
return createFailedScores(batch, endpoint.name);
|
||||
}
|
||||
|
||||
// ===== RESULT WRITER THREAD =====
|
||||
private static Thread startResultWriter(BlockingQueue<List<WordScore>> resultQueue) throws Exception {
|
||||
var writer = new BufferedWriter(new FileWriter(OUTPUT_SCORES, true));
|
||||
var isNew = Files.size(Paths.get(OUTPUT_SCORES)) == 0;
|
||||
|
||||
if (isNew) {
|
||||
writer.write("word,score,status,endpoint,batch_id,timestamp\n");
|
||||
writer.flush();
|
||||
}
|
||||
|
||||
var thread = new Thread(() -> {
|
||||
try {
|
||||
while (true) {
|
||||
var scores = resultQueue.take();
|
||||
|
||||
// Stop signal
|
||||
if (scores.size() == 1 && scores.get(0).status.equals("STOP")) {
|
||||
break;
|
||||
}
|
||||
|
||||
writeBatch(writer, scores);
|
||||
}
|
||||
writer.close();
|
||||
} catch (Exception e) {
|
||||
System.err.println("Writer thread error: " + e.getMessage());
|
||||
}
|
||||
});
|
||||
|
||||
thread.start();
|
||||
return thread;
|
||||
}
|
||||
|
||||
private static synchronized void writeBatch(BufferedWriter writer, List<WordScore> scores) throws Exception {
|
||||
var timestamp = Instant.now().toString();
|
||||
for (var ws : scores) {
|
||||
writer.write(String.format("%s,%d,%s,%s,%d,%s\n",
|
||||
ws.word, ws.score, ws.status, ws.endpoint, ws.batchId, timestamp));
|
||||
}
|
||||
writer.flush();
|
||||
}
|
||||
|
||||
// ===== QUEUE & DATA STRUCTURES =====
|
||||
record WorkItem(int batchId, List<String> batch) {
|
||||
|
||||
}
|
||||
|
||||
private static BlockingQueue<WorkItem> createWorkQueue(List<String> allWords, Set<String> scored) {
|
||||
BlockingQueue<WorkItem> queue = new LinkedBlockingQueue<>();
|
||||
var batchId = 0;
|
||||
|
||||
for (var i = 0; i < allWords.size(); i += BATCH_SIZE) {
|
||||
List<String> batch = new ArrayList<>();
|
||||
for (var j = i; j < Math.min(i + BATCH_SIZE, allWords.size()); j++) {
|
||||
var word = allWords.get(j);
|
||||
if (!scored.contains(word.toLowerCase())) {
|
||||
batch.add(word);
|
||||
}
|
||||
}
|
||||
|
||||
if (!batch.isEmpty()) {
|
||||
queue.add(new WorkItem(batchId++, batch));
|
||||
}
|
||||
}
|
||||
|
||||
return queue;
|
||||
}
|
||||
|
||||
// ===== LOADING & PARSING =====
|
||||
private static Set<String> loadAlreadyScoredWords() throws Exception {
|
||||
Set<String> scored = new HashSet<>();
|
||||
var file = new File(OUTPUT_SCORES);
|
||||
if (!file.exists()) return scored;
|
||||
|
||||
var lines = Files.readAllLines(file.toPath());
|
||||
var first = true;
|
||||
for (var line : lines) {
|
||||
if (first) {
|
||||
first = false;
|
||||
continue;
|
||||
}
|
||||
var parts = line.split(",");
|
||||
if (parts.length >= 3) {
|
||||
var word = parts[0].trim().toLowerCase();
|
||||
var status = parts[2].trim();
|
||||
if ("OK".equalsIgnoreCase(status)) {
|
||||
scored.add(word);
|
||||
}
|
||||
}
|
||||
}
|
||||
return scored;
|
||||
}
|
||||
|
||||
private static void cleanupOutputFile() throws IOException {
|
||||
var path = Paths.get(OUTPUT_SCORES);
|
||||
if (!Files.exists(path)) return;
|
||||
|
||||
System.out.println("Cleaning up " + OUTPUT_SCORES + "...");
|
||||
var lines = Files.readAllLines(path);
|
||||
if (lines.isEmpty()) return;
|
||||
|
||||
var header = lines.get(0);
|
||||
Map<String, String> latestOkEntries = new LinkedHashMap<>();
|
||||
|
||||
for (int i = 1; i < lines.size(); i++) {
|
||||
var line = lines.get(i);
|
||||
var parts = line.split(",");
|
||||
if (parts.length >= 3) {
|
||||
var word = parts[0].trim().toLowerCase();
|
||||
var status = parts[2].trim();
|
||||
if ("OK".equalsIgnoreCase(status)) {
|
||||
latestOkEntries.put(word, line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var cleanedLines = new ArrayList<String>();
|
||||
cleanedLines.add(header);
|
||||
cleanedLines.addAll(latestOkEntries.values());
|
||||
|
||||
Files.write(path, cleanedLines, StandardCharsets.UTF_8);
|
||||
System.out.printf("Cleanup complete. Kept %d unique OK entries. Removed %d non-OK or duplicate entries.%n",
|
||||
latestOkEntries.size(), lines.size() - cleanedLines.size());
|
||||
}
|
||||
|
||||
private static List<WordScore> createFailedScores(List<String> words, String endpoint) {
|
||||
List<WordScore> failed = new ArrayList<>();
|
||||
for (var word : words) {
|
||||
failed.add(new WordScore(word, -1, "FAILED", endpoint, -1));
|
||||
}
|
||||
return failed;
|
||||
}
|
||||
|
||||
// Parsing logic
|
||||
private static List<WordScore> parseScoresFromReply(List<String> expectedWords, String reply, String endpointName) {
|
||||
Map<String, Integer> wordScoreMap = new HashMap<>();
|
||||
var lines = reply.split("\n");
|
||||
|
||||
for (var line : lines) {
|
||||
line = line.trim();
|
||||
// Handle formats like "1. word:score", "word: score", "word - score"
|
||||
String sep = null;
|
||||
if (line.contains(":")) sep = ":";
|
||||
else if (line.contains("-")) sep = "-";
|
||||
|
||||
if (sep != null) {
|
||||
var parts = line.split(sep, 2);
|
||||
if (parts.length == 2) {
|
||||
var wordPart = parts[0].trim();
|
||||
// Remove leading numbering like "1. " or bullets like "* ", "- "
|
||||
wordPart = wordPart.replaceAll("^[\\d+.)*\\-\\s]+", "");
|
||||
var word = wordPart.toLowerCase();
|
||||
|
||||
try {
|
||||
var scoreStr = parts[1].trim();
|
||||
// Handle potential non-numeric junk after the number
|
||||
scoreStr = scoreStr.replaceAll("[^0-9].*", "");
|
||||
if (!scoreStr.isEmpty()) {
|
||||
var score = Integer.parseInt(scoreStr);
|
||||
wordScoreMap.put(word, Math.max(1, Math.min(10, score)));
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
// Skip invalid lines
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Match scores to original words (maintaining order)
|
||||
List<WordScore> results = new ArrayList<>();
|
||||
for (var word : expectedWords) {
|
||||
var score = wordScoreMap.get(word.toLowerCase());
|
||||
if (score != null) {
|
||||
results.add(new WordScore(word, score, "OK"));
|
||||
} else {
|
||||
results.add(new WordScore(word, -1, "MISSING"));
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// Prompt creation
|
||||
private static String createScoringPrompt(List<String> words) {
|
||||
return "Je bent een Nederlandse taalexpert. Geef elk van de " + words.size() + " onderstaande woorden een populariteitsscore van 1 (zeer zeldzaam) tot 10 (zeer algemeen).\n\n" +
|
||||
"Output ALLEEN in dit formaat:\n" +
|
||||
"woord1:score\n" +
|
||||
"woord2:score\n\n" +
|
||||
"GEEN andere tekst of uitleg. Sla GEEN woorden over.\n\n" +
|
||||
"Lijst:\n" +
|
||||
String.join("\n", words);
|
||||
}
|
||||
|
||||
// Utility methods
|
||||
private static String escapeJson(String str) {
|
||||
return str.replace("\\", "\\\\")
|
||||
.replace("\"", "\\\"")
|
||||
.replace("\n", "\\n");
|
||||
}
|
||||
|
||||
private static String curlPostJson(String url, String jsonBody, int timeoutSeconds) throws Exception {
|
||||
// Write JSON body to temp file to avoid shell escaping issues
|
||||
var tempFile = Files.createTempFile("lm-request-", ".json");
|
||||
try {
|
||||
Files.writeString(tempFile, jsonBody, StandardCharsets.UTF_8);
|
||||
|
||||
List<String> cmd = new ArrayList<>();
|
||||
cmd.add("curl");
|
||||
cmd.add("-fsSL");
|
||||
cmd.add("--connect-timeout");
|
||||
cmd.add("10");
|
||||
cmd.add("--max-time");
|
||||
cmd.add(String.valueOf(timeoutSeconds));
|
||||
cmd.add("-H");
|
||||
cmd.add("Content-Type: application/json");
|
||||
cmd.add("-d");
|
||||
cmd.add("@" + tempFile);
|
||||
cmd.add(url);
|
||||
|
||||
var p = new ProcessBuilder(cmd)
|
||||
.redirectErrorStream(true)
|
||||
.start();
|
||||
|
||||
var bytes = p.getInputStream().readAllBytes();
|
||||
var code = p.waitFor();
|
||||
|
||||
if (code != 0) {
|
||||
throw new IOException("curl POST failed (" + code + ") url=" + url + "\nOutput:\n" +
|
||||
new String(bytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
return new String(bytes, StandardCharsets.UTF_8);
|
||||
} finally {
|
||||
Files.deleteIfExists(tempFile);
|
||||
}
|
||||
}
|
||||
|
||||
private static String extractChatContent(String json) {
|
||||
if (json == null) return null;
|
||||
var choices = json.indexOf("\"choices\"");
|
||||
var p = (choices >= 0) ? choices : 0;
|
||||
var i = json.indexOf("\"content\"", p);
|
||||
if (i < 0) return null;
|
||||
var colon = json.indexOf(':', i);
|
||||
if (colon < 0) return null;
|
||||
var q = json.indexOf('"', colon + 1);
|
||||
if (q < 0) return null;
|
||||
var sb = new StringBuilder();
|
||||
var esc = false;
|
||||
for (var k = q + 1; k < json.length(); k++) {
|
||||
var ch = json.charAt(k);
|
||||
if (esc) {
|
||||
if (ch == 'n') sb.append('\n');
|
||||
else if (ch == 't') sb.append('\t');
|
||||
else if (ch == 'r') sb.append('\r');
|
||||
else sb.append(ch);
|
||||
esc = false;
|
||||
} else {
|
||||
if (ch == '\\') esc = true;
|
||||
else if (ch == '"') break;
|
||||
else sb.append(ch);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
@@ -39,7 +39,7 @@ public final class ExportFormat {
|
||||
var word = clueMap.get(s.key());
|
||||
if (word == null) continue;
|
||||
|
||||
var p = extractPlacedFromSlot(s, word);
|
||||
var p = extractPlacedFromSlot(puz.dict(),s, word);
|
||||
if (p == null) continue;
|
||||
placed.add(p);
|
||||
}
|
||||
@@ -121,7 +121,7 @@ public final class ExportFormat {
|
||||
/**
|
||||
* Convert a generator Slot + assigned word into a Placed object for export.
|
||||
*/
|
||||
private static Placed extractPlacedFromSlot(Slot s, String word) {
|
||||
private static Placed extractPlacedFromSlot(Dict dict,Slot s, String word) {
|
||||
int r = s.clueR();
|
||||
int c = s.clueC();
|
||||
char d = s.dir();
|
||||
@@ -168,7 +168,7 @@ public final class ExportFormat {
|
||||
|
||||
return new Placed(
|
||||
word,
|
||||
word, // clue placeholder
|
||||
dict.words().get(word).clue(), // clue placeholder
|
||||
startRow,
|
||||
startCol,
|
||||
direction,
|
||||
@@ -182,14 +182,9 @@ public final class ExportFormat {
|
||||
}
|
||||
|
||||
// pack (r,c) into one long key (handles negatives too)
|
||||
private static long pack(int r, int c) {
|
||||
return (((long) r) << 32) ^ (c & 0xFFFFFFFFL);
|
||||
}
|
||||
|
||||
// ---------- Data models ----------
|
||||
|
||||
private static long pack(int r, int c) { return (((long) r) << 32) ^ (c & 0xFFFFFFFFL); }
|
||||
/**
|
||||
* @param direction "horizontal" | "vertical"
|
||||
* @param direction "h" | "v"
|
||||
* @param cells word cells
|
||||
* @param arrow [arrowRow, arrowCol] */
|
||||
private record Placed(String word, String clue, int startRow, int startCol, String direction, String answer, int arrowRow, int arrowCol, List<int[]> cells, int[] arrow,
|
||||
@@ -197,8 +192,7 @@ public final class ExportFormat {
|
||||
|
||||
public record Rewards(int coins, int stars, int hints) { }
|
||||
|
||||
/**
|
||||
* @param direction "horizontal" | "vertical" */
|
||||
/// @param direction "h" | "v"
|
||||
public record WordOut(String word, String clue, int startRow, int startCol, String direction, String answer, int arrowRow, int arrowCol, boolean isReversed, int complex) { }
|
||||
|
||||
public record ExportedPuzzle(List<String> gridv2, List<WordOut> words, int difficulty, Rewards rewards) { }
|
||||
|
||||
@@ -83,7 +83,7 @@ public class Main {
|
||||
section("Clues");
|
||||
info("status : generating...");
|
||||
info("generatedFor : " + exported.words().size());
|
||||
exported = ClueGenerator.applyClues(exported);
|
||||
//exported = ClueGenerator.applyClues(exported);
|
||||
info("status : done");
|
||||
|
||||
section("Words");
|
||||
|
||||
@@ -132,24 +132,22 @@ public class SwedishGenerator {
|
||||
int[] data() { return a; } // note: may have extra capacity
|
||||
}
|
||||
|
||||
static final class DictEntry {
|
||||
static record DictEntry(ArrayList<String> words, IntList[][] pos) {
|
||||
|
||||
final ArrayList<String> words = new ArrayList<>();
|
||||
final IntList[][] pos; // pos[i][letter] -> indices (sorted by insertion)
|
||||
DictEntry(int L) {
|
||||
pos = new IntList[L][26];
|
||||
public DictEntry(int L) {
|
||||
this(new ArrayList<>(), new IntList[L][26]);
|
||||
for (var i = 0; i < L; i++) {
|
||||
for (var j = 0; j < 26; j++) pos[i][j] = new IntList();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static record WordDifficulty(String word, int difficulty, int simpel, int score, int cross) {
|
||||
static record WordDifficulty(String word, int difficulty, int simpel, int score, int cross, String clue) {
|
||||
|
||||
public WordDifficulty(String word, int simpel, int score) {
|
||||
public WordDifficulty(String word, int simpel, int score, String clue) {
|
||||
var difficulty1 = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
|
||||
var crossScore = ThemePoolBuilderLength.crossabilityScore(word);
|
||||
this(word, difficulty1, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15));
|
||||
this(word, difficulty1, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15), clue);
|
||||
|
||||
// Prioritize simple words (high lScore) and long words.
|
||||
// lScore (1-10) adds up to 1000 points (weight 100).
|
||||
@@ -163,7 +161,6 @@ public class SwedishGenerator {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static record Dict(Map<String, WordDifficulty> words,
|
||||
HashMap<Integer, DictEntry> index,
|
||||
HashMap<Integer, Integer> lenCounts) { }
|
||||
@@ -193,8 +190,12 @@ public class SwedishGenerator {
|
||||
// CSV has level 1-10. llmScores use 10-level.
|
||||
score = 10 - Integer.parseInt(parts[1].trim());
|
||||
simpel = Integer.parseInt(parts[2].trim());
|
||||
var rawClue = parts[3].trim();
|
||||
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
|
||||
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
|
||||
}
|
||||
if (score >= 1)
|
||||
map.put(s, new WordDifficulty(s, simpel, score));
|
||||
map.put(s, new WordDifficulty(s, simpel, score, rawClue));
|
||||
}
|
||||
}
|
||||
var words = map.values().stream().collect(Collectors.toCollection(ArrayList::new));
|
||||
@@ -682,11 +683,10 @@ public class SwedishGenerator {
|
||||
System.out.flush();
|
||||
};
|
||||
|
||||
class Pick {
|
||||
record Pick(Slot slot,
|
||||
CandidateInfo info,
|
||||
boolean done) {
|
||||
|
||||
Slot slot;
|
||||
CandidateInfo info;
|
||||
boolean done;
|
||||
}
|
||||
|
||||
java.util.function.Supplier<Pick> chooseMRV = () -> {
|
||||
@@ -699,22 +699,14 @@ public class SwedishGenerator {
|
||||
|
||||
var entry = dictIndex.get(s.len);
|
||||
if (entry == null) {
|
||||
var p = new Pick();
|
||||
p.slot = null;
|
||||
p.info = null;
|
||||
p.done = false;
|
||||
return p;
|
||||
return new Pick(null, null, false);
|
||||
}
|
||||
|
||||
var pat = patternForSlot(grid, s);
|
||||
var info = candidateInfoForPattern(entry, pat);
|
||||
|
||||
if (info.count == 0) {
|
||||
var p = new Pick();
|
||||
p.slot = null;
|
||||
p.info = null;
|
||||
p.done = false;
|
||||
return p;
|
||||
return new Pick(null, null, false);
|
||||
}
|
||||
|
||||
if (best == null
|
||||
@@ -726,17 +718,11 @@ public class SwedishGenerator {
|
||||
}
|
||||
}
|
||||
|
||||
var p = new Pick();
|
||||
if (best == null) {
|
||||
p.slot = null;
|
||||
p.info = null;
|
||||
p.done = true;
|
||||
return new Pick(null, null, true);
|
||||
} else {
|
||||
p.slot = best;
|
||||
p.info = bestInfo;
|
||||
p.done = false;
|
||||
return new Pick(best, bestInfo, false);
|
||||
}
|
||||
return p;
|
||||
};
|
||||
|
||||
final var MAX_TRIES_PER_SLOT = 2000;
|
||||
@@ -868,9 +854,9 @@ public class SwedishGenerator {
|
||||
public record PuzzleResult(Dict dict, char[][] mask, FillResult filled) { }
|
||||
|
||||
public static PuzzleResult generatePuzzle(Main.Opts opts) {
|
||||
var tLoad0 = System.nanoTime();
|
||||
var dict = loadWords(opts.wordsPath);
|
||||
var tLoad1 = System.nanoTime();
|
||||
var tLoad0 = System.nanoTime();
|
||||
var dict = loadWords(opts.wordsPath);
|
||||
var tLoad1 = System.nanoTime();
|
||||
System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n %s words%n", (tLoad1 - tLoad0) / 1e9, dict.words.size());
|
||||
|
||||
if (opts.threads > 1) {
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
package puzzle;
|
||||
|
||||
// ===== DATA CLASS =====
|
||||
class WordScore {
|
||||
|
||||
String word;
|
||||
int score;
|
||||
String status;
|
||||
String endpoint;
|
||||
int batchId;
|
||||
|
||||
WordScore(String word, int score, String status, String endpoint, int batchId) {
|
||||
this.word = word;
|
||||
this.score = score;
|
||||
this.status = status;
|
||||
this.endpoint = endpoint;
|
||||
this.batchId = batchId;
|
||||
}
|
||||
WordScore(String word, int score, String status) {
|
||||
this.word = word;
|
||||
this.score = score;
|
||||
this.status = status;
|
||||
}
|
||||
}
|
||||
38
tools/hint/dbjsonl.sh
Executable file
38
tools/hint/dbjsonl.sh
Executable file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Usage:
|
||||
# ./import_jsonl.sh "postgresql://user:pass@host:5432/dbname" gloss doc /path/to/file.jsonl
|
||||
#
|
||||
# Notes:
|
||||
# - Creates table if it doesn't exist.
|
||||
# - Inserts each JSON line into a jsonb column.
|
||||
# - Skips blank lines.
|
||||
|
||||
DB_URL="${1:?db url}"
|
||||
TABLE="${2:?table name}"
|
||||
COL="${3:?json column name}"
|
||||
FILE="${4:?jsonl file path}"
|
||||
|
||||
psql "$DB_URL" -v ON_ERROR_STOP=1 <<SQL
|
||||
CREATE TABLE IF NOT EXISTS ${TABLE} (
|
||||
id bigserial PRIMARY KEY,
|
||||
${COL} jsonb NOT NULL
|
||||
);
|
||||
SQL
|
||||
|
||||
# \copy runs on the client, so we can feed it from a local file.
|
||||
# We copy into a 1-column staging table, then cast to jsonb and insert.
|
||||
psql "$DB_URL" -v ON_ERROR_STOP=1 <<SQL
|
||||
CREATE TEMP TABLE _jsonl_stage(line text);
|
||||
|
||||
\\copy _jsonl_stage(line) FROM '${FILE}' WITH (FORMAT text);
|
||||
|
||||
INSERT INTO ${TABLE}(${COL})
|
||||
SELECT line::jsonb
|
||||
FROM _jsonl_stage
|
||||
WHERE btrim(line) <> '';
|
||||
|
||||
-- optional: show count inserted this run
|
||||
SELECT count(*) AS inserted_now FROM _jsonl_stage WHERE btrim(line) <> '';
|
||||
SQL
|
||||
59
tools/hint/jsonl-to-sqlite.mjs
Normal file
59
tools/hint/jsonl-to-sqlite.mjs
Normal file
@@ -0,0 +1,59 @@
|
||||
// jsonl-to-sqlite.mjs
|
||||
import fs from 'node:fs'
|
||||
import readline from 'node:readline'
|
||||
import Database from 'better-sqlite3'
|
||||
|
||||
const jsonlPath = process.argv[2]
|
||||
const dbPath = process.argv[3] ?? 'out.sqlite'
|
||||
const table = process.argv[4] ?? 'events'
|
||||
|
||||
if (!jsonlPath) {
|
||||
console.error('Usage: node jsonl-to-sqlite.mjs <file.jsonl> [out.sqlite] [table]')
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
const db = new Database(dbPath)
|
||||
db.pragma('journal_mode = WAL')
|
||||
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS ${ table }
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
json TEXT NOT NULL
|
||||
);
|
||||
`)
|
||||
|
||||
const insert = db.prepare(`INSERT INTO ${ table }(json)
|
||||
VALUES (?)`)
|
||||
const insertMany = db.transaction((rows) => {
|
||||
for (const r of rows) insert.run(r)
|
||||
})
|
||||
|
||||
const rl = readline.createInterface({
|
||||
input : fs.createReadStream(jsonlPath, { encoding: 'utf8' }),
|
||||
crlfDelay: Infinity
|
||||
})
|
||||
|
||||
let batch = []
|
||||
let lineNo = 0
|
||||
for await (const line of rl) {
|
||||
lineNo++
|
||||
const trimmed = line.trim()
|
||||
if (!trimmed) continue
|
||||
|
||||
try {
|
||||
JSON.parse(trimmed) // validate
|
||||
batch.push(trimmed)
|
||||
} catch (e) {
|
||||
console.warn(`Skipping invalid JSON on line ${ lineNo }: ${ e.message }`)
|
||||
continue
|
||||
}
|
||||
|
||||
if (batch.length >= 1000) {
|
||||
insertMany(batch)
|
||||
batch = []
|
||||
}
|
||||
}
|
||||
if (batch.length) insertMany(batch)
|
||||
|
||||
console.log(`Done. Imported into ${ dbPath }, table=${ table }`)
|
||||
@@ -1,16 +0,0 @@
|
||||
FROM python:3.13-slim
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends ca-certificates tzdata curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# supercronic
|
||||
RUN curl -fsSL -o /usr/local/bin/supercronic \
|
||||
https://github.com/aptible/supercronic/releases/download/v0.2.30/supercronic-linux-amd64 \
|
||||
&& chmod +x /usr/local/bin/supercronic
|
||||
|
||||
WORKDIR /app
|
||||
COPY tools/puzzle-gen/generate_daily_puzzles.py /app/generate_daily_puzzles.py
|
||||
COPY tools/puzzle-gen/crontab /app/crontab
|
||||
|
||||
CMD ["/usr/local/bin/supercronic", "/app/crontab"]
|
||||
@@ -1 +0,0 @@
|
||||
15 3 * * * python /app/generate_daily_puzzles.py
|
||||
@@ -1,399 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import datetime as dt
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as ET
|
||||
import json, re
|
||||
|
||||
# --- USER-FRIENDLY CONFIG ---
|
||||
# Max 7 letters for shorter, more common words
|
||||
WORD_RE = re.compile(r"^[A-Z]{3,7}$")
|
||||
EMPTY = " "
|
||||
# Slightly smaller grid for denser puzzles
|
||||
SIZE = 10
|
||||
# More words needed since they're shorter
|
||||
TARGET_WORDS = 15
|
||||
MIN_ACCEPT_WORDS = 10
|
||||
|
||||
FEEDS = [
|
||||
"https://feeds.nos.nl/nosnieuwsalgemeen",
|
||||
"https://feeds.nos.nl/nosnieuwstech",
|
||||
]
|
||||
|
||||
|
||||
def env(name, default=None):
|
||||
v = os.getenv(name)
|
||||
return default if v is None or v == "" else v
|
||||
|
||||
|
||||
def http_get(url, timeout=15):
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "puzzle-gen/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=timeout) as r:
|
||||
return r.read()
|
||||
|
||||
|
||||
def http_post_json(url, payload, timeout=45):
|
||||
data = json.dumps(payload).encode("utf-8")
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=data,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": "Bearer lm-studio",
|
||||
"User-Agent": "puzzle-gen/1.0",
|
||||
},
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as r:
|
||||
return json.loads(r.read().decode("utf-8"))
|
||||
|
||||
|
||||
def fetch_rss_items(url, limit=12):
|
||||
raw = http_get(url)
|
||||
root = ET.fromstring(raw)
|
||||
channel = root.find("channel") if root.tag.lower().endswith("rss") else root
|
||||
items = []
|
||||
for it in channel.findall("item"):
|
||||
title = (it.findtext("title") or "").strip()
|
||||
desc = (it.findtext("description") or "").strip()
|
||||
if title:
|
||||
items.append((title, desc))
|
||||
if len(items) >= limit:
|
||||
break
|
||||
return items
|
||||
|
||||
|
||||
def safe_slug(s, maxlen=50):
|
||||
s = s.lower()
|
||||
s = re.sub(r"[^a-z0-9]+", "-", s).strip("-")
|
||||
return (s[:maxlen] or "news")
|
||||
|
||||
|
||||
def extract_first_json(text: str):
|
||||
"""Parse first JSON value (object OR array) from any text."""
|
||||
if not text:
|
||||
return None
|
||||
starts = [i for i in (text.find("{"), text.find("[")) if i != -1]
|
||||
if not starts:
|
||||
return None
|
||||
i = min(starts)
|
||||
try:
|
||||
return json.JSONDecoder().raw_decode(text[i:])[0]
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
|
||||
def normalize_word(raw: str) -> str:
|
||||
# A-Z only, remove hyphens/digits/spaces/etc.
|
||||
w = re.sub(r"[^A-Za-z]", "", (raw or "")).upper()
|
||||
return w
|
||||
|
||||
|
||||
def sanitize_wordcluemap(obj):
|
||||
"""
|
||||
Accepts:
|
||||
- dict: {"WORD":"clue", ...}
|
||||
- list: [{"word":"...","clue":"..."}, {"WOORD":"...","clue":"..."}, ...]
|
||||
Returns dict with keys A-Z 3..7 and non-empty clue.
|
||||
"""
|
||||
out = {}
|
||||
|
||||
if isinstance(obj, dict):
|
||||
items = list(obj.items())
|
||||
elif isinstance(obj, list):
|
||||
items = []
|
||||
for it in obj:
|
||||
if not isinstance(it, dict):
|
||||
continue
|
||||
raw_word = it.get("word") or it.get("WOORD") or it.get("Word")
|
||||
clue = it.get("clue") or it.get("CLUE") or it.get("hint") or it.get("HINT")
|
||||
items.append((raw_word, clue))
|
||||
else:
|
||||
return out
|
||||
|
||||
for raw_word, clue in items:
|
||||
if not isinstance(raw_word, str) or not isinstance(clue, str):
|
||||
continue
|
||||
w = normalize_word(raw_word)
|
||||
if not WORD_RE.fullmatch(w):
|
||||
continue
|
||||
clue = clue.strip()
|
||||
if not clue:
|
||||
continue
|
||||
out[w] = clue
|
||||
|
||||
return out
|
||||
|
||||
|
||||
# ---- generator (no-touch) ----
|
||||
def make_grid():
|
||||
return [[EMPTY for _ in range(SIZE)] for _ in range(SIZE)]
|
||||
|
||||
|
||||
def in_bounds(g, r, c):
|
||||
return 0 <= r < len(g) and 0 <= c < len(g[0])
|
||||
|
||||
|
||||
def can_place_notouch(g, word, r, c, direction):
|
||||
H, W = len(g), len(g[0])
|
||||
if r < 0 or c < 0:
|
||||
return False
|
||||
if direction == "horizontal" and c + len(word) > W:
|
||||
return False
|
||||
if direction == "vertical" and r + len(word) > H:
|
||||
return False
|
||||
|
||||
# no "glue" before/after
|
||||
br = r if direction == "horizontal" else r - 1
|
||||
bc = c - 1 if direction == "horizontal" else c
|
||||
if in_bounds(g, br, bc) and g[br][bc] != EMPTY:
|
||||
return False
|
||||
|
||||
ar = r if direction == "horizontal" else r + len(word)
|
||||
ac = c + len(word) if direction == "horizontal" else c
|
||||
if in_bounds(g, ar, ac) and g[ar][ac] != EMPTY:
|
||||
return False
|
||||
|
||||
for i, ch in enumerate(word):
|
||||
rr = r if direction == "horizontal" else r + i
|
||||
cc = c + i if direction == "horizontal" else c
|
||||
cell = g[rr][cc]
|
||||
crossing = cell != EMPTY
|
||||
if crossing and cell != ch:
|
||||
return False
|
||||
|
||||
if not crossing:
|
||||
if direction == "horizontal":
|
||||
if in_bounds(g, rr - 1, cc) and g[rr - 1][cc] != EMPTY: return False
|
||||
if in_bounds(g, rr + 1, cc) and g[rr + 1][cc] != EMPTY: return False
|
||||
else:
|
||||
if in_bounds(g, rr, cc - 1) and g[rr][cc - 1] != EMPTY: return False
|
||||
if in_bounds(g, rr, cc + 1) and g[rr][cc + 1] != EMPTY: return False
|
||||
return True
|
||||
|
||||
|
||||
def place_word(g, word, r, c, direction):
|
||||
for i, ch in enumerate(word):
|
||||
rr = r if direction == "horizontal" else r + i
|
||||
cc = c + i if direction == "horizontal" else c
|
||||
g[rr][cc] = ch
|
||||
|
||||
|
||||
def find_spots(g, word, placed):
|
||||
spots = []
|
||||
for p in placed:
|
||||
pw = p["word"]
|
||||
for i, pch in enumerate(pw):
|
||||
pr = p["row"] if p["direction"] == "horizontal" else p["row"] + i
|
||||
pc = p["col"] + i if p["direction"] == "horizontal" else p["col"]
|
||||
for j, wch in enumerate(word):
|
||||
if wch != pch:
|
||||
continue
|
||||
direction = "vertical" if p["direction"] == "horizontal" else "horizontal"
|
||||
r = pr if direction == "horizontal" else pr - j
|
||||
c = pc - j if direction == "horizontal" else pc
|
||||
if can_place_notouch(g, word, r, c, direction):
|
||||
spots.append((r, c, direction))
|
||||
return spots
|
||||
|
||||
|
||||
def generate_puzzle(wordcluemap, rnd):
|
||||
words = sorted(wordcluemap.keys(), key=len, reverse=True)
|
||||
g = make_grid()
|
||||
placed = []
|
||||
|
||||
first = words[0]
|
||||
sr = SIZE // 2
|
||||
sc = (SIZE - len(first)) // 2
|
||||
if not can_place_notouch(g, first, sr, sc, "horizontal"):
|
||||
return None
|
||||
place_word(g, first, sr, sc, "horizontal")
|
||||
placed.append({"word": first, "clue": wordcluemap[first], "row": sr, "col": sc, "direction": "horizontal"})
|
||||
|
||||
for w in words[1:]:
|
||||
spots = find_spots(g, w, placed)
|
||||
rnd.shuffle(spots)
|
||||
if not spots:
|
||||
continue
|
||||
r, c, d = spots[0]
|
||||
place_word(g, w, r, c, d)
|
||||
placed.append({"word": w, "clue": wordcluemap[w], "row": r, "col": c, "direction": d})
|
||||
|
||||
return {"grid": g, "placed": placed}
|
||||
|
||||
|
||||
def export_format(puz, difficulty=1, rewards=None):
|
||||
if rewards is None:
|
||||
rewards = {"coins": 50, "stars": 2, "hints": 1}
|
||||
|
||||
g = puz["grid"]
|
||||
placed = puz["placed"]
|
||||
H, W = len(g), len(g[0])
|
||||
|
||||
cells = []
|
||||
for p in placed:
|
||||
for i in range(len(p["word"])):
|
||||
r = p["row"] if p["direction"] == "horizontal" else p["row"] + i
|
||||
c = p["col"] + i if p["direction"] == "horizontal" else p["col"]
|
||||
cells.append((r, c))
|
||||
# arrow cell: before the start
|
||||
ar = p["row"] if p["direction"] == "horizontal" else p["row"] - 1
|
||||
ac = p["col"] - 1 if p["direction"] == "horizontal" else p["col"]
|
||||
cells.append((ar, ac))
|
||||
|
||||
minR = min(r for r, _ in cells) - 1
|
||||
minC = min(c for _, c in cells) - 1
|
||||
maxR = max(r for r, _ in cells) + 1
|
||||
maxC = max(c for _, c in cells) + 1
|
||||
|
||||
def ch_at(r, c):
|
||||
if r < 0 or c < 0 or r >= H or c >= W:
|
||||
return "#"
|
||||
ch = g[r][c]
|
||||
return "#" if ch == EMPTY else ch
|
||||
|
||||
gridv2 = []
|
||||
for r in range(minR, maxR + 1):
|
||||
row = "".join(ch_at(r, c) for c in range(minC, maxC + 1))
|
||||
gridv2.append(row)
|
||||
|
||||
words_out = []
|
||||
for p in placed:
|
||||
arrowRow = (p["row"] if p["direction"] == "horizontal" else p["row"] - 1) - minR
|
||||
arrowCol = (p["col"] - 1 if p["direction"] == "horizontal" else p["col"]) - minC
|
||||
words_out.append({
|
||||
"word": p["word"],
|
||||
"clue": p["clue"],
|
||||
"startRow": p["row"] - minR,
|
||||
"startCol": p["col"] - minC,
|
||||
"direction": p["direction"],
|
||||
"answer": p["word"],
|
||||
"arrowRow": arrowRow,
|
||||
"arrowCol": arrowCol,
|
||||
})
|
||||
|
||||
return {"gridv2": gridv2, "words": words_out, "difficulty": difficulty, "rewards": rewards}
|
||||
|
||||
|
||||
def list_models(base_url):
|
||||
try:
|
||||
data = json.loads(http_get(f"{base_url}/models").decode("utf-8"))
|
||||
return [m.get("id") for m in data.get("data", []) if m.get("id")]
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def llm_make_wordcluemap(base_url, model, title, desc, n_words=12):
|
||||
prompt = f"""
|
||||
Geef ALLEEN een JSON object terug (geen array, geen markdown).
|
||||
Formaat exact:
|
||||
{{
|
||||
"WOORD": "clue",
|
||||
...
|
||||
}}
|
||||
|
||||
REGELS:
|
||||
- WOORD: alleen letters A-Z, geen streepjes/cijfers, lengte 3..7.
|
||||
- Gebruik KORTE, GEBRUIKELIJKE Nederlandse woorden (geen jargon, geen moeilijke termen).
|
||||
- Clue: korte, duidelijke hint in het Nederlands.
|
||||
- Maak {n_words} items.
|
||||
Thema: {title}
|
||||
Context: {desc[:260]}
|
||||
""".strip()
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"temperature": 0.7,
|
||||
"messages": [
|
||||
{"role": "system", "content": "Return STRICT JSON object only."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
}
|
||||
|
||||
data = http_post_json(f"{base_url}/chat/completions", payload)
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
obj = extract_first_json(content)
|
||||
wc = sanitize_wordcluemap(obj)
|
||||
|
||||
# Aggressive repair for short words
|
||||
if len(wc) < MIN_ACCEPT_WORDS:
|
||||
repair = f"""
|
||||
Zet dit om naar een STRICT JSON OBJECT (geen array) "WOORD":"clue".
|
||||
KRITIEK:
|
||||
- WOORD: A-Z only, lengte 3..7. GEEN lange woorden!
|
||||
- Gebruik ALLEEN korte, bekende Nederlandse woorden bij twijfel.
|
||||
- Vervang ongeldige/moeilijke woorden door veelvoorkomende synoniemen.
|
||||
Input:
|
||||
{content}
|
||||
""".strip()
|
||||
|
||||
payload["messages"] = [
|
||||
{"role": "system", "content": "Return STRICT JSON object only."},
|
||||
{"role": "user", "content": repair},
|
||||
]
|
||||
data = http_post_json(f"{base_url}/chat/completions", payload)
|
||||
content2 = data["choices"][0]["message"]["content"]
|
||||
obj2 = extract_first_json(content2)
|
||||
wc2 = sanitize_wordcluemap(obj2)
|
||||
if len(wc2) > len(wc):
|
||||
wc = wc2
|
||||
|
||||
return wc
|
||||
|
||||
|
||||
def main():
|
||||
base_url = env("LM_STUDIO_BASE_URL", "http://192.168.1.159:1234/v1")
|
||||
out_dir = env("OUT_DIR", "/data/puzzles")
|
||||
per_day = int(env("PUZZLES_PER_DAY", "3"))
|
||||
today = dt.date.today().isoformat()
|
||||
rnd = random.Random(today)
|
||||
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
items = []
|
||||
for f in FEEDS:
|
||||
try:
|
||||
items.extend(fetch_rss_items(f))
|
||||
except Exception:
|
||||
pass
|
||||
if not items:
|
||||
raise SystemExit("No RSS items found")
|
||||
|
||||
models = list_models(base_url)
|
||||
model = env("LM_MODEL", models[0] if models else "model-identifier")
|
||||
|
||||
made = 0
|
||||
for idx in range(1, per_day + 1):
|
||||
title, desc = rnd.choice(items)
|
||||
slug = safe_slug(title)
|
||||
|
||||
wc = llm_make_wordcluemap(base_url, model, title, desc, n_words=TARGET_WORDS)
|
||||
# Stricter validation: need more words since they're shorter
|
||||
if len(wc) < MIN_ACCEPT_WORDS:
|
||||
continue
|
||||
|
||||
puz = generate_puzzle(wc, rnd)
|
||||
# Require at least 7 placed words for a decent puzzle
|
||||
if not puz or len(puz["placed"]) < 7:
|
||||
continue
|
||||
|
||||
exported = export_format(puz, difficulty=1, rewards={"coins": 50, "stars": 2, "hints": 1})
|
||||
fn = f"crossword_{today}_{idx:02d}_{slug}.json"
|
||||
path = os.path.join(out_dir, fn)
|
||||
with open(path, "w", encoding="utf-8") as fp:
|
||||
json.dump(exported, fp, ensure_ascii=False, indent=2)
|
||||
made += 1
|
||||
|
||||
# index.json (handig voor je frontend)
|
||||
files = sorted([f for f in os.listdir(out_dir) if f.startswith(f"crossword_{today}_") and f.endswith(".json")])
|
||||
with open(os.path.join(out_dir, "index.json"), "w", encoding="utf-8") as fp:
|
||||
json.dump({"date": today, "files": files}, fp, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"Generated {made} puzzles for {today}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user