Gather data
This commit is contained in:
100
.db/init.sql
Normal file
100
.db/init.sql
Normal file
@@ -0,0 +1,100 @@
|
||||
-- 0) Optioneel: index (handig)
|
||||
CREATE INDEX IF NOT EXISTS idx_frequency_word ON frequency(Word);
|
||||
|
||||
-- 1) View: normalisatie voor Zipf en lengte (0..1)
|
||||
DROP VIEW IF EXISTS word_norm;
|
||||
CREATE VIEW word_norm AS
|
||||
WITH base AS (
|
||||
SELECT
|
||||
Word,
|
||||
CAST(NULLIF(REPLACE(Zipf, ',', '.'), '') AS REAL) AS zipf_num,
|
||||
LENGTH(Word) AS len
|
||||
FROM frequency
|
||||
WHERE Word IS NOT NULL
|
||||
),
|
||||
stats AS (
|
||||
SELECT
|
||||
MIN(zipf_num) AS min_zipf,
|
||||
MAX(zipf_num) AS max_zipf,
|
||||
MIN(len) AS min_len,
|
||||
MAX(len) AS max_len
|
||||
FROM base
|
||||
WHERE zipf_num IS NOT NULL
|
||||
)
|
||||
SELECT
|
||||
b.Word,
|
||||
b.zipf_num,
|
||||
b.len,
|
||||
|
||||
-- freq_norm: 0 = heel frequent, 1 = heel zeldzaam
|
||||
(1.0 - (b.zipf_num - s.min_zipf) / NULLIF(s.max_zipf - s.min_zipf, 0.0)) AS freq_norm,
|
||||
|
||||
-- len_norm: 0..1
|
||||
( (b.len - s.min_len) / NULLIF(CAST(s.max_len - s.min_len AS REAL), 0.0) ) AS len_norm
|
||||
FROM base b
|
||||
CROSS JOIN stats s
|
||||
WHERE b.zipf_num IS NOT NULL;
|
||||
|
||||
|
||||
-- 2) Letter-frequenties bouwen (letter_freq)
|
||||
CREATE TABLE IF NOT EXISTS letter_freq (
|
||||
ch TEXT PRIMARY KEY,
|
||||
cnt INTEGER NOT NULL
|
||||
);
|
||||
|
||||
DELETE FROM letter_freq;
|
||||
|
||||
WITH RECURSIVE chars(word, pos, ch) AS (
|
||||
SELECT Word, 1, LOWER(SUBSTR(Word, 1, 1))
|
||||
FROM frequency
|
||||
WHERE Word IS NOT NULL AND LENGTH(Word) > 0
|
||||
UNION ALL
|
||||
SELECT word, pos + 1, LOWER(SUBSTR(word, pos + 1, 1))
|
||||
FROM chars
|
||||
WHERE pos < LENGTH(word)
|
||||
)
|
||||
INSERT INTO letter_freq(ch, cnt)
|
||||
SELECT ch, COUNT(*) AS cnt
|
||||
FROM chars
|
||||
WHERE ch <> '' -- basic guard
|
||||
GROUP BY ch;
|
||||
|
||||
|
||||
-- 3) View: letterRarity per woord (0..1)
|
||||
DROP VIEW IF EXISTS letter_rarity;
|
||||
CREATE VIEW letter_rarity AS
|
||||
WITH total AS (
|
||||
SELECT CAST(SUM(cnt) AS REAL) AS total_cnt
|
||||
FROM letter_freq
|
||||
),
|
||||
word_chars(word, pos, ch) AS (
|
||||
SELECT Word, 1, LOWER(SUBSTR(Word, 1, 1))
|
||||
FROM frequency
|
||||
WHERE Word IS NOT NULL AND LENGTH(Word) > 0
|
||||
UNION ALL
|
||||
SELECT word, pos + 1, LOWER(SUBSTR(word, pos + 1, 1))
|
||||
FROM word_chars
|
||||
WHERE pos < LENGTH(word)
|
||||
)
|
||||
SELECT
|
||||
wc.word AS Word,
|
||||
AVG(1.0 - (lf.cnt / t.total_cnt)) AS letter_rarity_norm
|
||||
FROM word_chars wc
|
||||
JOIN letter_freq lf ON lf.ch = wc.ch
|
||||
CROSS JOIN total t
|
||||
GROUP BY wc.word;
|
||||
|
||||
|
||||
-- 4) View: complexiteit v1 (ngramProb + morfologie = placeholder 0.5)
|
||||
DROP VIEW IF EXISTS word_complexity_v1;
|
||||
CREATE VIEW word_complexity_v1 AS
|
||||
SELECT
|
||||
n.Word,
|
||||
(0.35 * n.freq_norm) +
|
||||
(0.20 * n.len_norm) +
|
||||
(0.20 * 0.5) + -- ngramProb placeholder
|
||||
(0.15 * COALESCE(r.letter_rarity_norm, 0.5)) +
|
||||
(0.10 * 0.5) -- morfologie placeholder
|
||||
AS complexity
|
||||
FROM word_norm n
|
||||
LEFT JOIN letter_rarity r USING (Word);
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -6,3 +6,4 @@ target/
|
||||
out/puzzle/
|
||||
*.log
|
||||
.output.txt
|
||||
out/
|
||||
|
||||
142
import.py
Normal file
142
import.py
Normal file
@@ -0,0 +1,142 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
RE_ASCII_WORD = re.compile(r"^[A-Za-z]+$")
|
||||
RE_SPACE = re.compile(r"\s+")
|
||||
RE_PARENS = re.compile(r"\s*\([^)]*\)\s*") # verwijder (labels)
|
||||
RE_BRACKETS = re.compile(r"\s*\[[^]]*]\s*") # verwijder [labels]
|
||||
|
||||
def clean_hint(s: str) -> str:
|
||||
s = s.strip()
|
||||
s = RE_BRACKETS.sub(" ", s)
|
||||
s = RE_PARENS.sub(" ", s)
|
||||
s = s.replace("’", "'")
|
||||
s = RE_SPACE.sub(" ", s).strip(" -;:,.\t")
|
||||
return s
|
||||
|
||||
def pick_gloss(obj: dict) -> tuple[str | None, str | None]:
|
||||
"""Return (hint, pos) from a Wiktextract JSON line."""
|
||||
pos = obj.get("pos")
|
||||
senses = obj.get("senses") or []
|
||||
best = None
|
||||
|
||||
for s in senses:
|
||||
glosses = s.get("glosses") or []
|
||||
if not glosses:
|
||||
continue
|
||||
# Neem de eerste gloss die "normaal" oogt
|
||||
for g in glosses:
|
||||
if not isinstance(g, str):
|
||||
continue
|
||||
g2 = clean_hint(g)
|
||||
if len(g2) < 3:
|
||||
continue
|
||||
best = g2
|
||||
break
|
||||
if best:
|
||||
break
|
||||
|
||||
return best, pos
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--db", required=True, help="pad naar jouw sqlite db")
|
||||
ap.add_argument("--jsonl", required=True, help="pad naar nl-extract.jsonl")
|
||||
ap.add_argument("--minlen", type=int, default=2)
|
||||
ap.add_argument("--maxlen", type=int, default=8)
|
||||
ap.add_argument("--maxhint", type=int, default=80)
|
||||
args = ap.parse_args()
|
||||
|
||||
db_path = Path(args.db)
|
||||
jsonl_path = Path(args.jsonl)
|
||||
|
||||
con = sqlite3.connect(db_path)
|
||||
cur = con.cursor()
|
||||
|
||||
# speed pragmas (alleen tijdens import)
|
||||
cur.execute("PRAGMA journal_mode=WAL;")
|
||||
cur.execute("PRAGMA synchronous=NORMAL;")
|
||||
cur.execute("PRAGMA temp_store=MEMORY;")
|
||||
|
||||
cur.execute("""
|
||||
CREATE TABLE IF NOT EXISTS hints (
|
||||
word TEXT NOT NULL,
|
||||
hint TEXT NOT NULL,
|
||||
source TEXT NOT NULL DEFAULT 'wiktionary',
|
||||
pos TEXT,
|
||||
quality INTEGER NOT NULL DEFAULT 80,
|
||||
PRIMARY KEY (word, hint, source)
|
||||
);
|
||||
""")
|
||||
cur.execute("CREATE INDEX IF NOT EXISTS idx_hints_word ON hints(word);")
|
||||
con.commit()
|
||||
|
||||
batch = []
|
||||
inserted = 0
|
||||
seen = 0
|
||||
|
||||
con.execute("BEGIN;")
|
||||
with jsonl_path.open("r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# Kaikki/Wiktextract: vaak lang_code = "nl" en lang = "Dutch"
|
||||
lang_code = obj.get("lang_code")
|
||||
if lang_code and lang_code != "nl":
|
||||
continue
|
||||
|
||||
word = obj.get("word")
|
||||
if not word:
|
||||
continue
|
||||
|
||||
word_up = word.upper().strip()
|
||||
if not (args.minlen <= len(word_up) <= args.maxlen):
|
||||
continue
|
||||
if not RE_ASCII_WORD.match(word_up):
|
||||
continue
|
||||
|
||||
hint, pos = pick_gloss(obj)
|
||||
if not hint:
|
||||
continue
|
||||
|
||||
# Hint kort houden
|
||||
hint = hint[: args.maxhint].rstrip()
|
||||
|
||||
# Simpele kwaliteit: iets hoger als POS bekend is
|
||||
quality = 85 if pos else 80
|
||||
|
||||
batch.append((word_up, hint, "wiktionary", pos, quality))
|
||||
seen += 1
|
||||
|
||||
if len(batch) >= 2000:
|
||||
cur.executemany(
|
||||
"INSERT OR IGNORE INTO hints(word,hint,source,pos,quality) VALUES (?,?,?,?,?)",
|
||||
batch
|
||||
)
|
||||
inserted += cur.rowcount if cur.rowcount != -1 else 0
|
||||
batch.clear()
|
||||
|
||||
if batch:
|
||||
cur.executemany(
|
||||
"INSERT OR IGNORE INTO hints(word,hint,source,pos,quality) VALUES (?,?,?,?,?)",
|
||||
batch
|
||||
)
|
||||
inserted += cur.rowcount if cur.rowcount != -1 else 0
|
||||
|
||||
con.commit()
|
||||
con.close()
|
||||
|
||||
print(f"Done. processed_lines≈{seen}, inserted≈{inserted} (OR IGNORE kan inserts verlagen).")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
27831
out/pool.txt
27831
out/pool.txt
File diff suppressed because it is too large
Load Diff
@@ -1,38 +0,0 @@
|
||||
Date: 2025-12-22
|
||||
Feeds: https://feeds.nos.nl/nosnieuwsalgemeen, https://feeds.nos.nl/nosnieuwstech
|
||||
Model: mistralai/mistral-nemo-instruct-2407
|
||||
|
||||
Master size: 91871
|
||||
Theme kept (in master): 0
|
||||
Bridge size: 21125
|
||||
Shorts kept: 130
|
||||
Pool total: 27831
|
||||
|
||||
Enforced minima:
|
||||
2: 1000
|
||||
3: 1000
|
||||
4: 1000
|
||||
5: 1000
|
||||
6: 2000
|
||||
7: 2000
|
||||
8: 2000
|
||||
|
||||
Counts per length (theme):
|
||||
2: 0
|
||||
3: 0
|
||||
4: 0
|
||||
5: 0
|
||||
6: 0
|
||||
7: 0
|
||||
8: 0
|
||||
|
||||
|
||||
Counts per length (pool):
|
||||
2: 242
|
||||
3: 1000
|
||||
4: 1000
|
||||
5: 1000
|
||||
6: 2000
|
||||
7: 2000
|
||||
8: 20589
|
||||
|
||||
40
out/rss.txt
40
out/rss.txt
File diff suppressed because one or more lines are too long
220
src/puzzle/ClueGenerator.java
Normal file
220
src/puzzle/ClueGenerator.java
Normal file
@@ -0,0 +1,220 @@
|
||||
package puzzle;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.*;
|
||||
|
||||
public class ClueGenerator {
|
||||
|
||||
private static final String OLLAMA_URL = "http://localhost:11434/api/chat";
|
||||
private static final String MODEL = "qwen2.5:14b";
|
||||
private static final String HINTS_FILE = "export_with_hints.csv";
|
||||
private static Map<String, String> prebuiltClues = null;
|
||||
|
||||
private static synchronized void ensurePrebuiltCluesLoaded() {
|
||||
if (prebuiltClues != null) return;
|
||||
prebuiltClues = new HashMap<>();
|
||||
try {
|
||||
List<String> lines = Files.readAllLines(Path.of(HINTS_FILE), StandardCharsets.UTF_8);
|
||||
for (String line : lines) {
|
||||
String[] parts = line.split(",", 3);
|
||||
if (parts.length >= 3) {
|
||||
String word = parts[0].trim().toUpperCase(Locale.ROOT);
|
||||
String rawClue = parts[2].trim();
|
||||
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
|
||||
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
|
||||
}
|
||||
if (!word.isEmpty() && !rawClue.isEmpty()) {
|
||||
prebuiltClues.put(word, rawClue);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.err.println("Warning: " + HINTS_FILE + " not found or could not be read.");
|
||||
}
|
||||
}
|
||||
|
||||
public static ExportFormat.ExportedPuzzle applyClues(ExportFormat.ExportedPuzzle puzzle) {
|
||||
if (puzzle == null || puzzle.words().isEmpty()) {
|
||||
return puzzle;
|
||||
}
|
||||
|
||||
ensurePrebuiltCluesLoaded();
|
||||
|
||||
Map<String, String> finalClueMap = new HashMap<>();
|
||||
List<String> wordsMissingClues = new ArrayList<>();
|
||||
|
||||
for (var w : puzzle.words()) {
|
||||
String wordUpper = w.word().toUpperCase(Locale.ROOT);
|
||||
if (prebuiltClues.containsKey(wordUpper)) {
|
||||
finalClueMap.put(w.word(), prebuiltClues.get(wordUpper));
|
||||
} else {
|
||||
wordsMissingClues.add(w.word());
|
||||
}
|
||||
}
|
||||
|
||||
if (!wordsMissingClues.isEmpty()) {
|
||||
Map<String, String> generatedClues = generateClues(wordsMissingClues);
|
||||
finalClueMap.putAll(generatedClues);
|
||||
}
|
||||
|
||||
List<ExportFormat.WordOut> wordsWithClues = new ArrayList<>();
|
||||
for (var w : puzzle.words()) {
|
||||
String clue = finalClueMap.getOrDefault(w.word(), w.word());
|
||||
wordsWithClues.add(new ExportFormat.WordOut(
|
||||
w.word(),
|
||||
clue,
|
||||
w.startRow(),
|
||||
w.startCol(),
|
||||
w.direction(),
|
||||
w.answer(),
|
||||
w.arrowRow(),
|
||||
w.arrowCol(),
|
||||
w.isReversed()
|
||||
));
|
||||
}
|
||||
|
||||
return new ExportFormat.ExportedPuzzle(puzzle.gridv2(), wordsWithClues, puzzle.difficulty(), puzzle.rewards());
|
||||
}
|
||||
|
||||
public static Map<String, String> generateClues(List<String> words) {
|
||||
if (words == null || words.isEmpty()) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
|
||||
String prompt = createCluePrompt(words);
|
||||
try {
|
||||
String jsonRequest = String.format(
|
||||
"{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"stream\":false,\"temperature\":0.7}",
|
||||
MODEL, escapeJson(prompt)
|
||||
);
|
||||
|
||||
String responseBody = curlPostJson(OLLAMA_URL, jsonRequest, 120);
|
||||
String content = extractChatContent(responseBody);
|
||||
|
||||
if (content == null || content.isEmpty()) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
|
||||
return parseCluesFromReply(words, content);
|
||||
} catch (Exception e) {
|
||||
System.err.println("Failed to generate clues: " + e.getMessage());
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
}
|
||||
|
||||
private static String createCluePrompt(List<String> words) {
|
||||
return "Je bent een expert in het maken van kruiswoordpuzzels. Geef voor elk van de onderstaande woorden een korte, uitdagende maar duidelijke cryptische of beschrijvende aanwijzing in het Nederlands.\n\n" +
|
||||
"Output ALLEEN in dit formaat:\n" +
|
||||
"woord1:aanwijzing\n" +
|
||||
"woord2:aanwijzing\n\n" +
|
||||
"GEEN andere tekst of uitleg. Sla GEEN woorden over.\n\n" +
|
||||
"Lijst:\n" +
|
||||
String.join("\n", words);
|
||||
}
|
||||
|
||||
private static Map<String, String> parseCluesFromReply(List<String> expectedWords, String reply) {
|
||||
Map<String, String> wordClueMap = new HashMap<>();
|
||||
String[] lines = reply.split("\n");
|
||||
|
||||
for (String line : lines) {
|
||||
line = line.trim();
|
||||
if (line.contains(":")) {
|
||||
String[] parts = line.split(":", 2);
|
||||
if (parts.length == 2) {
|
||||
String wordPart = parts[0].trim().replaceAll("^[\\d+.)*\\-\\s]+", "").toLowerCase();
|
||||
String clue = parts[1].trim();
|
||||
if (!clue.isEmpty()) {
|
||||
wordClueMap.put(wordPart, clue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, String> results = new HashMap<>();
|
||||
for (String word : expectedWords) {
|
||||
String clue = wordClueMap.get(word.toLowerCase());
|
||||
if (clue != null) {
|
||||
results.put(word, clue);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
private static String curlPostJson(String url, String jsonBody, int timeoutSeconds) throws Exception {
|
||||
var tempFile = Files.createTempFile("clue-request-", ".json");
|
||||
try {
|
||||
Files.writeString(tempFile, jsonBody, StandardCharsets.UTF_8);
|
||||
List<String> cmd = new ArrayList<>();
|
||||
cmd.add("curl");
|
||||
cmd.add("-fsSL");
|
||||
cmd.add("--connect-timeout");
|
||||
cmd.add("10");
|
||||
cmd.add("--max-time");
|
||||
cmd.add(String.valueOf(timeoutSeconds));
|
||||
cmd.add("-H");
|
||||
cmd.add("Content-Type: application/json");
|
||||
cmd.add("-d");
|
||||
cmd.add("@" + tempFile);
|
||||
cmd.add(url);
|
||||
|
||||
var p = new ProcessBuilder(cmd)
|
||||
.redirectErrorStream(true)
|
||||
.start();
|
||||
|
||||
var bytes = p.getInputStream().readAllBytes();
|
||||
var code = p.waitFor();
|
||||
|
||||
if (code != 0) {
|
||||
throw new IOException("curl POST failed (" + code + ") url=" + url + "\nOutput:\n" +
|
||||
new String(bytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
return new String(bytes, StandardCharsets.UTF_8);
|
||||
} finally {
|
||||
Files.deleteIfExists(tempFile);
|
||||
}
|
||||
}
|
||||
|
||||
private static String extractChatContent(String json) {
|
||||
if (json == null) return null;
|
||||
var choices = json.indexOf("\"choices\"");
|
||||
var p = (choices >= 0) ? choices : 0;
|
||||
var i = json.indexOf("\"content\"", p);
|
||||
if (i < 0) {
|
||||
// Fallback for Ollama non-chat format if needed, but we used /api/chat
|
||||
// Ollama /api/chat returns {"model":"...","message":{"role":"assistant","content":"..."}}
|
||||
i = json.indexOf("\"content\"");
|
||||
if (i < 0) return null;
|
||||
}
|
||||
var colon = json.indexOf(':', i);
|
||||
if (colon < 0) return null;
|
||||
var q = json.indexOf('"', colon + 1);
|
||||
if (q < 0) return null;
|
||||
var sb = new StringBuilder();
|
||||
var esc = false;
|
||||
for (var k = q + 1; k < json.length(); k++) {
|
||||
var ch = json.charAt(k);
|
||||
if (esc) {
|
||||
if (ch == 'n') sb.append('\n');
|
||||
else if (ch == 't') sb.append('\t');
|
||||
else if (ch == 'r') sb.append('\r');
|
||||
else sb.append(ch);
|
||||
esc = false;
|
||||
} else {
|
||||
if (ch == '\\') esc = true;
|
||||
else if (ch == '"') break;
|
||||
else sb.append(ch);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static String escapeJson(String str) {
|
||||
return str.replace("\\", "\\\\")
|
||||
.replace("\"", "\\\"")
|
||||
.replace("\n", "\\n");
|
||||
}
|
||||
}
|
||||
@@ -31,7 +31,7 @@ public class DailyGenerator {
|
||||
|
||||
public static void main(String[] args) {
|
||||
var outDir = env("OUT_DIR", "/home/mike/dev/puzzle-generator/data/");
|
||||
var wordsPath = env("WORDS_PATH", "./word-list.txt");
|
||||
var wordsPath = env("WORDS_PATH", "./export_words_only.txt");
|
||||
var puzzlesPerDay = envInt("PUZZLES_PER_DAY", 3);
|
||||
var seed = envInt("SEED", (int) System.currentTimeMillis());
|
||||
var themeFilter = envBool("THEME_FILTER", true);
|
||||
@@ -119,6 +119,10 @@ public class DailyGenerator {
|
||||
result, 1, new ExportFormat.Rewards(50, 2, 1)
|
||||
);
|
||||
|
||||
// Generate clues via LLM
|
||||
System.out.println("Generating clues for " + exported.words().size() + " words...");
|
||||
exported = ClueGenerator.applyClues(exported);
|
||||
|
||||
// Write to JSON file
|
||||
var filename = String.format("crossword_%s_%02d_%s.json", dateStr, i, safeSlug(theme));
|
||||
var outputPath = Paths.get(outDir, filename);
|
||||
|
||||
@@ -7,7 +7,10 @@ import java.nio.file.Paths;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
public class Main {
|
||||
// ---------------- CLI ----------------
|
||||
@@ -15,7 +18,7 @@ public class Main {
|
||||
public static class Opts {
|
||||
public int seed = 1;
|
||||
public int pop = 18;
|
||||
public int gens = 200;
|
||||
public int gens = 1000;
|
||||
public int tries = 5;
|
||||
public String wordsPath = "./out/pool.txt";
|
||||
public double minSimplicity = 0; // 0 means no limit
|
||||
@@ -74,6 +77,11 @@ public class Main {
|
||||
System.out.println(SwedishGenerator.renderHuman(res.filled().grid));
|
||||
System.out.printf(Locale.ROOT, "Puzzle Simplicity: %.2f%n", res.filled().simplicity);
|
||||
var out = ExportFormat.exportFormatFromFilled(res, 1, new ExportFormat.Rewards(50, 2, 1));
|
||||
|
||||
// Generate clues via LLM
|
||||
System.out.println("Generating clues for " + out.words().size() + " words...");
|
||||
out = ClueGenerator.applyClues(out);
|
||||
|
||||
System.out.println("gridv2:");
|
||||
for (String row : out.gridv2()) System.out.println(row);
|
||||
System.out.println("words: " + out.words().size());
|
||||
|
||||
@@ -153,7 +153,7 @@ public class SwedishGenerator {
|
||||
static Map<String, Integer> loadScores() {
|
||||
var scores = new HashMap<String, Integer>();
|
||||
try {
|
||||
var lines = Files.readAllLines(Path.of("word_scores.csv"), StandardCharsets.UTF_8);
|
||||
var lines = Files.readAllLines(Path.of("export_words.csv"), StandardCharsets.UTF_8);
|
||||
var first = true;
|
||||
for (var line : lines) {
|
||||
if (first) {
|
||||
@@ -161,16 +161,11 @@ public class SwedishGenerator {
|
||||
continue;
|
||||
}
|
||||
var parts = line.split(",");
|
||||
if (parts.length >= 3) {
|
||||
if (parts.length >= 2) {
|
||||
try {
|
||||
var word = parts[0].trim().toUpperCase(Locale.ROOT);
|
||||
var score = Integer.parseInt(parts[1].trim());
|
||||
var status = parts[2].trim();
|
||||
if ("OK".equalsIgnoreCase(status)) {
|
||||
scores.put(word, score);
|
||||
} else {
|
||||
System.err.println("Skipping:" +Arrays.toString( parts));
|
||||
}
|
||||
} catch (NumberFormatException ignored) {
|
||||
System.err.println("Illegal number format: " + line);
|
||||
}
|
||||
|
||||
@@ -56,7 +56,7 @@ public class ThemePoolBuilderLength {
|
||||
|
||||
static final class Opts {
|
||||
|
||||
String wordsPath = "/home/mike/dev/puzzle-generator/word-list.txt";
|
||||
String wordsPath = "/home/mike/dev/puzzle-generator/export_words_only.txt";
|
||||
String endpoint = "https://jarvis-lan.appmodel.nl/api/stoic/";
|
||||
List<String> feeds = new ArrayList<>(DEFAULT_FEEDS);
|
||||
String outDir = "./out";
|
||||
@@ -303,10 +303,10 @@ public class ThemePoolBuilderLength {
|
||||
try {
|
||||
var word = parts[0].trim().toUpperCase(Locale.ROOT);
|
||||
var score = Integer.parseInt(parts[1].trim());
|
||||
var status = parts[2].trim();
|
||||
if ("OK".equalsIgnoreCase(status)) {
|
||||
// var status = parts[2].trim();
|
||||
// if ("OK".equalsIgnoreCase(status)) {
|
||||
llmScores.put(word, score);
|
||||
}
|
||||
//}
|
||||
} catch (NumberFormatException ignored) { }
|
||||
}
|
||||
}
|
||||
@@ -731,7 +731,7 @@ public class ThemePoolBuilderLength {
|
||||
// Optionally filter out VERY complex words from the bridge (e.g. lScore < 3)
|
||||
// But since we sort by score (which is now dominated by lScore),
|
||||
// they will be at the very bottom anyway.
|
||||
if (lex.score[i] < 800) continue;
|
||||
// if (lex.score[i] < 800) continue;
|
||||
ids.add(i);
|
||||
}
|
||||
|
||||
@@ -774,8 +774,8 @@ public class ThemePoolBuilderLength {
|
||||
|
||||
var out = new ArrayList<String>(ids.size());
|
||||
for (var id : ids) {
|
||||
/* if (lex.score[id] < 680)
|
||||
continue;*/
|
||||
if (lex.score[id] < 680)
|
||||
continue;
|
||||
out.add(lex.words.get(id));
|
||||
}
|
||||
Files.write(path, out, StandardCharsets.UTF_8, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
|
||||
|
||||
Reference in New Issue
Block a user