update them
This commit is contained in:
@@ -49,8 +49,9 @@ public class DailyGenerator {
|
||||
|
||||
// Load word list
|
||||
SwedishGenerator.Dict dict;
|
||||
var llmScores = SwedishGenerator.loadScores();
|
||||
try {
|
||||
dict = SwedishGenerator.loadWords(wordsPath);
|
||||
dict = SwedishGenerator.loadWords(wordsPath, llmScores);
|
||||
System.out.println("Loaded " + dict.words.size() + " words");
|
||||
} catch (Exception e) {
|
||||
System.err.println("Failed to load words: " + e.getMessage());
|
||||
@@ -102,8 +103,9 @@ public class DailyGenerator {
|
||||
opts.gens = 100;
|
||||
opts.tries = 50;
|
||||
opts.wordsPath = wordsPath;
|
||||
opts.minSimplicity = 0; // default
|
||||
|
||||
var result = generateWithFilteredDict(opts, themedDict);
|
||||
var result = generateWithFilteredDict(opts, themedDict, llmScores);
|
||||
|
||||
if (result == null) {
|
||||
System.out.println("Failed to generate puzzle " + i);
|
||||
@@ -172,12 +174,12 @@ public class DailyGenerator {
|
||||
return new SwedishGenerator.Dict(new ArrayList<>(allowed), newIndex, newLenCounts);
|
||||
}
|
||||
|
||||
private static SwedishGenerator.PuzzleResult generateWithFilteredDict(Main.Opts opts, SwedishGenerator.Dict dict) {
|
||||
private static SwedishGenerator.PuzzleResult generateWithFilteredDict(Main.Opts opts, SwedishGenerator.Dict dict, Map<String, Integer> llmScores) {
|
||||
var rng = new SwedishGenerator.Rng(opts.seed);
|
||||
|
||||
for (var attempt = 1; attempt <= opts.tries; attempt++) {
|
||||
var mask = SwedishGenerator.generateMask(rng, dict.lenCounts, opts.pop, opts.gens);
|
||||
var filled = SwedishGenerator.fillMask(rng, mask, dict.index, 200, 30000);
|
||||
var filled = SwedishGenerator.fillMask(rng, mask, dict.index, llmScores, 200, 30000);
|
||||
|
||||
if (filled.ok) {
|
||||
return new SwedishGenerator.PuzzleResult(mask, filled);
|
||||
|
||||
@@ -5,6 +5,7 @@ import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.LocalDate;
|
||||
import java.util.Date;
|
||||
import java.util.Locale;
|
||||
|
||||
public class Main {
|
||||
// ---------------- CLI ----------------
|
||||
@@ -13,21 +14,23 @@ public class Main {
|
||||
public int seed = 1;
|
||||
public int pop = 18;
|
||||
public int gens = 100;
|
||||
public int tries = 50;
|
||||
public int tries = 5;
|
||||
public String wordsPath = "./out/pool.txt";
|
||||
public double minSimplicity = 0; // 0 means no limit
|
||||
}
|
||||
|
||||
static void usage() {
|
||||
System.out.println("""
|
||||
Usage:
|
||||
java SwedishGenerator [--seed N] [--pop N] [--gens N] [--tries N] [--words word-list.txt]
|
||||
java SwedishGenerator [--seed N] [--pop N] [--gens N] [--tries N] [--words word-list.txt] [--min-simplicity N.N]
|
||||
|
||||
Defaults:
|
||||
--seed 1
|
||||
--pop 18
|
||||
--gens 100
|
||||
--tries 50
|
||||
--words ./word-list.txt
|
||||
--words ./out/pool.txt
|
||||
--min-simplicity 0 (no limit)
|
||||
""");
|
||||
}
|
||||
|
||||
@@ -45,6 +48,7 @@ public class Main {
|
||||
else if (a.equals("--gens")) { out.gens = Integer.parseInt(v); i++; }
|
||||
else if (a.equals("--tries")) { out.tries = Integer.parseInt(v); i++; }
|
||||
else if (a.equals("--words")) { out.wordsPath = v; i++; }
|
||||
else if (a.equals("--min-simplicity")) { out.minSimplicity = Double.parseDouble(v); i++; }
|
||||
else throw new IllegalArgumentException("Unknown arg: " + a);
|
||||
}
|
||||
return out;
|
||||
@@ -66,11 +70,13 @@ public class Main {
|
||||
|
||||
System.out.println("\n=== FILLED PUZZLE (HUMAN) ===");
|
||||
System.out.println(SwedishGenerator.renderHuman(res.filled().grid));
|
||||
System.out.printf(Locale.ROOT, "Puzzle Simplicity: %.2f%n", res.filled().simplicity);
|
||||
var out = ExportFormat.exportFormatFromFilled(res, 1, new ExportFormat.Rewards(50, 2, 1));
|
||||
System.out.println("gridv2:");
|
||||
for (String row : out.gridv2()) System.out.println(row);
|
||||
System.out.println("words: " + out.words().size());
|
||||
for (var w : out.words()) {
|
||||
var simplicityOfWord =
|
||||
System.out.printf("%s %s start=(%d,%d) arrow=(%d,%d)%n",
|
||||
w.word(), w.direction(), w.startRow(), w.startCol(), w.arrowRow(), w.arrowCol());
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ import java.util.stream.IntStream;
|
||||
* javac SwedishGenerator.java
|
||||
* java SwedishGenerator [--seed N] [--pop N] [--gens N] [--tries N] [--words word-list.txt]
|
||||
*/
|
||||
@SuppressWarnings("ALL")
|
||||
public class SwedishGenerator {
|
||||
|
||||
static final int W = 9, H = 8;
|
||||
@@ -136,7 +137,7 @@ public class SwedishGenerator {
|
||||
final int score;
|
||||
|
||||
public WordDifficulty(String word, int score) {
|
||||
this.word = word;
|
||||
this.word = word;
|
||||
this.score = score;
|
||||
// We want LONGER and SIMPLER words to be tried earlier (lower difficulty value).
|
||||
// word.length() is 2 to 8.
|
||||
@@ -144,7 +145,8 @@ public class SwedishGenerator {
|
||||
// Base difficulty starts high and decreases with length and score.
|
||||
// Length impact: up to 8 * 10 = 80
|
||||
// Score impact: up to 10 * 15 = 150
|
||||
this.difficulty = 250 - (word.length() * 10) - (score * 15);
|
||||
var difficulty1 = 0 + ((8 - word.length()) * 30) + ((10-score) * 15);
|
||||
this.difficulty = difficulty1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -154,17 +156,26 @@ public class SwedishGenerator {
|
||||
var lines = Files.readAllLines(Path.of("word_scores.csv"), StandardCharsets.UTF_8);
|
||||
var first = true;
|
||||
for (var line : lines) {
|
||||
if (first) { first = false; continue; }
|
||||
var parts = line.split(",", 3);
|
||||
if (first) {
|
||||
first = false;
|
||||
continue;
|
||||
}
|
||||
var parts = line.split("," );
|
||||
if (parts.length >= 3) {
|
||||
try {
|
||||
var word = parts[0].trim().toUpperCase(Locale.ROOT);
|
||||
var score = Integer.parseInt(parts[1].trim());
|
||||
var word = parts[0].trim().toUpperCase(Locale.ROOT);
|
||||
var score = Integer.parseInt(parts[1].trim());
|
||||
var status = parts[2].trim();
|
||||
if ("OK".equalsIgnoreCase(status)) {
|
||||
scores.put(word, score);
|
||||
} else {
|
||||
System.err.println("Skipping:" +Arrays.toString( parts));
|
||||
}
|
||||
} catch (NumberFormatException ignored) {}
|
||||
} catch (NumberFormatException ignored) {
|
||||
System.err.println("Illegal number format: " + line);
|
||||
}
|
||||
} else {
|
||||
System.err.println("Illegal word: " + line);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
@@ -184,7 +195,7 @@ public class SwedishGenerator {
|
||||
this.lenCounts = lenCounts;
|
||||
}
|
||||
}
|
||||
static Dict loadWords(String wordsPath) {
|
||||
static Dict loadWords(String wordsPath, Map<String, Integer> llmScores) {
|
||||
String raw;
|
||||
try {
|
||||
raw = Files.readString(Path.of(wordsPath), StandardCharsets.UTF_8);
|
||||
@@ -192,8 +203,7 @@ public class SwedishGenerator {
|
||||
raw = "EU\nUUR\nAUTO\nBOOM\nHUIS\nKAT\nZEE\nRODE\nDRAAD\nKENNIS\nNETWERK\nPAKTE\n";
|
||||
}
|
||||
|
||||
var llmScores = loadScores();
|
||||
var words = new ArrayList<WordDifficulty>();
|
||||
var words = new ArrayList<WordDifficulty>();
|
||||
for (var line : raw.split("\\R")) {
|
||||
var s = line.trim().toUpperCase(Locale.ROOT);
|
||||
if (s.matches("^[A-Z]{2,8}$")) {
|
||||
@@ -289,11 +299,10 @@ public class SwedishGenerator {
|
||||
return ci;
|
||||
}
|
||||
static int indexToDifficulty(DictEntry entry, int index, Map<String, Integer> llmScores) {
|
||||
var word = entry.words.get(index);
|
||||
var word = entry.words.get(index);
|
||||
var score = llmScores.getOrDefault(word, 5);
|
||||
return new WordDifficulty(word, score).difficulty;
|
||||
}
|
||||
|
||||
|
||||
// ---------------- Slots ----------------
|
||||
|
||||
@@ -617,6 +626,7 @@ public class SwedishGenerator {
|
||||
public char[][] grid;
|
||||
public HashMap<String, String> clueMap;
|
||||
public FillStats stats;
|
||||
public double simplicity;
|
||||
}
|
||||
|
||||
record Undo(int[] rs, int[] cs, char[] prev, int n) {
|
||||
@@ -667,6 +677,7 @@ public class SwedishGenerator {
|
||||
}
|
||||
|
||||
static FillResult fillMask(Rng rng, char[][] mask, HashMap<Integer, DictEntry> dictIndex,
|
||||
Map<String, Integer> llmScores,
|
||||
int logEveryMs, int timeLimitMs) {
|
||||
|
||||
var grid = deepCopyGrid(mask);
|
||||
@@ -819,10 +830,10 @@ public class SwedishGenerator {
|
||||
// (lower difficulty) but still have some randomness.
|
||||
for (var t = 0; t < tries; t++) {
|
||||
// Bias strongly towards lower indices (simpler words) using r^3
|
||||
double r = rng.nextFloat();
|
||||
int idxInArray = (int) (r * r * r * L);
|
||||
var idx = idxs[idxInArray];
|
||||
var w = entry.words.get(idx);
|
||||
double r = rng.nextFloat();
|
||||
int idxInArray = (int) (r * r * r * L);
|
||||
var idx = idxs[idxInArray];
|
||||
var w = entry.words.get(idx);
|
||||
if (tryWord.apply(w)) return true;
|
||||
}
|
||||
stats.backtracks++;
|
||||
@@ -837,9 +848,9 @@ public class SwedishGenerator {
|
||||
|
||||
var tries = Math.min(MAX_TRIES_PER_SLOT, N);
|
||||
for (var t = 0; t < tries; t++) {
|
||||
double r = rng.nextFloat();
|
||||
int idxInArray = (int) (r * r * r * N);
|
||||
var w = entry.words.get(idxInArray);
|
||||
double r = rng.nextFloat();
|
||||
int idxInArray = (int) (r * r * r * N);
|
||||
var w = entry.words.get(idxInArray);
|
||||
if (tryWord.apply(w)) return true;
|
||||
}
|
||||
|
||||
@@ -862,6 +873,14 @@ public class SwedishGenerator {
|
||||
stats.seconds = (System.currentTimeMillis() - t0) / 1000.0;
|
||||
res.stats = stats;
|
||||
|
||||
if (ok) {
|
||||
double totalSimplicity = 0;
|
||||
for (var w : assigned.values()) {
|
||||
totalSimplicity += llmScores.getOrDefault(w, 5);
|
||||
}
|
||||
res.simplicity = assigned.isEmpty() ? 0 : totalSimplicity / assigned.size();
|
||||
}
|
||||
|
||||
// print a final progress line
|
||||
System.out.println(
|
||||
String.format(Locale.ROOT,
|
||||
@@ -882,11 +901,12 @@ public class SwedishGenerator {
|
||||
public record PuzzleResult(char[][] mask, FillResult filled) { }
|
||||
|
||||
public static PuzzleResult generatePuzzle(Main.Opts opts) {
|
||||
var rng = new Rng(opts.seed);
|
||||
var tLoad0 = System.nanoTime();
|
||||
var dict = loadWords(opts.wordsPath);
|
||||
var tLoad1 = System.nanoTime();
|
||||
System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n", (tLoad1 - tLoad0) / 1e9);
|
||||
var rng = new Rng(opts.seed);
|
||||
var llmScores = loadScores();
|
||||
var tLoad0 = System.nanoTime();
|
||||
var dict = loadWords(opts.wordsPath, llmScores);
|
||||
var tLoad1 = System.nanoTime();
|
||||
System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n %s words", (tLoad1 - tLoad0) / 1e9,dict.words.size());
|
||||
|
||||
for (var attempt = 1; attempt <= opts.tries; attempt++) {
|
||||
System.out.println("\nAttempt " + attempt + "/" + opts.tries);
|
||||
@@ -897,14 +917,19 @@ public class SwedishGenerator {
|
||||
System.out.printf(Locale.ROOT, "MASK: %.3fs%n", (tMask1 - tMask0) / 1e9);
|
||||
|
||||
var tFill0 = System.nanoTime();
|
||||
var filled = fillMask(rng, mask, dict.index, 200, 60000);
|
||||
var filled = fillMask(rng, mask, dict.index, llmScores, 200, 60000);
|
||||
var tFill1 = System.nanoTime();
|
||||
System.out.printf(Locale.ROOT, "FILL: %.3fms%n", (tFill1 - tFill0) / 1e6);
|
||||
System.out.printf(Locale.ROOT, "FILL: %.3fms | Simplicity: %.2f%n", (tFill1 - tFill0) / 1e6, filled.simplicity);
|
||||
|
||||
if (filled.ok) {
|
||||
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
|
||||
return new PuzzleResult(mask, filled);
|
||||
}
|
||||
if (filled.ok) {
|
||||
System.out.printf(Locale.ROOT, "Puzzle simplicity %.2f is below min %.2f, retrying...%n",
|
||||
filled.simplicity, opts.minSimplicity);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
26
src/puzzle/TestSort.java
Normal file
26
src/puzzle/TestSort.java
Normal file
@@ -0,0 +1,26 @@
|
||||
package puzzle;
|
||||
import puzzle.ThemePoolBuilderLength.Lexicon;
|
||||
import java.nio.file.*;
|
||||
import java.util.*;
|
||||
public class TestSort {
|
||||
public static void main(String[] args) throws Exception {
|
||||
Lexicon lex = new Lexicon(
|
||||
Arrays.asList("A", "B", "C"),
|
||||
new HashMap<>(),
|
||||
new int[]{10, 30, 20},
|
||||
new BitSet[9]
|
||||
);
|
||||
BitSet bs = new BitSet();
|
||||
bs.set(0); bs.set(1); bs.set(2);
|
||||
Path p = Paths.get("test_pool.txt");
|
||||
ThemePoolBuilderLength.writeWordList(p, lex, bs);
|
||||
List<String> lines = Files.readAllLines(p);
|
||||
System.out.println("Sorted words: " + lines);
|
||||
if (lines.get(0).equals("B") && lines.get(1).equals("C") && lines.get(2).equals("A")) {
|
||||
System.out.println("SUCCESS");
|
||||
} else {
|
||||
System.out.println("FAILURE");
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -14,6 +14,7 @@ import java.text.Normalizer;
|
||||
import java.time.LocalDate;
|
||||
import java.util.*;
|
||||
|
||||
@SuppressWarnings("ALL")
|
||||
public class ThemePoolBuilderLength {
|
||||
|
||||
private static final List<String> DEFAULT_FEEDS = List.of(
|
||||
@@ -28,8 +29,8 @@ public class ThemePoolBuilderLength {
|
||||
"ING", "KPN", "KVK", "RIVM", "GGD", "AIVD", "MIVD", "CEO", "CFO", "HR",
|
||||
"NL", "BE", "BRU", "EUR", "EURO", "WET", "ART", "BTW", "DI", "MA",
|
||||
"PVV", "VVD", "CDA", "FNV",
|
||||
"EN","IN","OP","OM","TE","ER","DE","HET","EEN","VAN","MET","NOG","OOK","MAAR","WEL","NIET",
|
||||
"HOE","ALS",
|
||||
"EN", "IN", "OP", "OM", "TE", "ER", "DE", "HET", "EEN", "VAN", "MET", "NOG", "OOK", "MAAR", "WEL", "NIET",
|
||||
"HOE", "ALS",
|
||||
|
||||
"ZO", "DO", "WO", "VR", "MO", "WA", "WE", "TAAL",
|
||||
"LAND", "GEMEENTE", "STAAT", "BUREAU", "HUIS", "SCHOOL", "STR", "BAAN",
|
||||
@@ -41,12 +42,13 @@ public class ThemePoolBuilderLength {
|
||||
"WINKEL", "MARKT", "KIOSK", "AUTO", "MOBILE", "FIETS", "SCOOTER",
|
||||
|
||||
// afkortingen
|
||||
"DHR","MEVR","DR","ST","CA","IVM","MBT","TAV","TOV","DWZ","MAW","OA","TM",
|
||||
"ANWB","BRP","CBS",
|
||||
|
||||
"DHR", "MEVR", "DR", "ST", "CA", "IVM", "MBT", "TAV", "TOV", "DWZ", "MAW", "OA", "TM",
|
||||
"ANWB", "BRP", "CBS",
|
||||
"AL", "NU", "TO", "NA", "BIJ", "TOT", "DAN", "WAT", "DAT",
|
||||
"IK", "JE", "WE", "WIJ", "JIJ", "ZIJ", "HIJ", "HEN", "ONS", "JOU",
|
||||
// romeinse cijfers (2-8)
|
||||
"II","III","IV","VI","VII","VIII","IX",
|
||||
"XI","XII","XIII","XIV","XV","XVI","XVII","XVIII","XIX","XX"
|
||||
"II", "III", "IV", "VI", "VII", "VIII", "IX",
|
||||
"XI", "XII", "XIII", "XIV", "XV", "XVI", "XVII", "XVIII", "XIX", "XX"
|
||||
);
|
||||
|
||||
private static final String BROWSER_UA =
|
||||
@@ -59,7 +61,7 @@ public class ThemePoolBuilderLength {
|
||||
List<String> feeds = new ArrayList<>(DEFAULT_FEEDS);
|
||||
String outDir = "./out";
|
||||
|
||||
int bridgeN = 32000;
|
||||
int bridgeN = 40000;
|
||||
int themeN = 800;
|
||||
int relatedN = 2200;
|
||||
int rssItemsPerFeed = 10;
|
||||
@@ -74,9 +76,9 @@ public class ThemePoolBuilderLength {
|
||||
int minLen3 = 1000;
|
||||
int minLen4 = 1000;
|
||||
int minLen5 = 1000; // set if you also want to force 5-letter words, etc.
|
||||
int minLen6 = 1000;
|
||||
int minLen7 = 1000;
|
||||
int minLen8 = 1000;
|
||||
int minLen6 = 2000;
|
||||
int minLen7 = 2000;
|
||||
int minLen8 = 2000;
|
||||
}
|
||||
|
||||
static Opts parseArgs(String[] args) {
|
||||
@@ -85,53 +87,107 @@ public class ThemePoolBuilderLength {
|
||||
var a = args[i];
|
||||
var v = (i + 1 < args.length) ? args[i + 1] : null;
|
||||
switch (a) {
|
||||
case "--words" -> { o.wordsPath = v; i++; }
|
||||
case "--endpoint" -> { o.endpoint = v; i++; }
|
||||
case "--feeds" -> { o.feeds = Arrays.asList(v.split(",")); i++; }
|
||||
case "--out" -> { o.outDir = v; i++; }
|
||||
case "--bridge" -> { o.bridgeN = Integer.parseInt(v); i++; }
|
||||
case "--theme" -> { o.themeN = Integer.parseInt(v); i++; }
|
||||
case "--related" -> { o.relatedN = Integer.parseInt(v); i++; }
|
||||
case "--items" -> { o.rssItemsPerFeed = Integer.parseInt(v); i++; }
|
||||
case "--model" -> { o.model = v; i++; }
|
||||
case "--timeout" -> { o.timeoutSeconds = Integer.parseInt(v); i++; }
|
||||
case "--retries" -> { o.retries = Integer.parseInt(v); i++; }
|
||||
case "--words" -> {
|
||||
o.wordsPath = v;
|
||||
i++;
|
||||
}
|
||||
case "--endpoint" -> {
|
||||
o.endpoint = v;
|
||||
i++;
|
||||
}
|
||||
case "--feeds" -> {
|
||||
o.feeds = Arrays.asList(v.split(","));
|
||||
i++;
|
||||
}
|
||||
case "--out" -> {
|
||||
o.outDir = v;
|
||||
i++;
|
||||
}
|
||||
case "--bridge" -> {
|
||||
o.bridgeN = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--theme" -> {
|
||||
o.themeN = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--related" -> {
|
||||
o.relatedN = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--items" -> {
|
||||
o.rssItemsPerFeed = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--model" -> {
|
||||
o.model = v;
|
||||
i++;
|
||||
}
|
||||
case "--timeout" -> {
|
||||
o.timeoutSeconds = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--retries" -> {
|
||||
o.retries = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
|
||||
// ---- NEW: minima per length ----
|
||||
case "--min2" -> { o.minLen2 = Integer.parseInt(v); i++; }
|
||||
case "--min3" -> { o.minLen3 = Integer.parseInt(v); i++; }
|
||||
case "--min4" -> { o.minLen4 = Integer.parseInt(v); i++; }
|
||||
case "--min5" -> { o.minLen5 = Integer.parseInt(v); i++; }
|
||||
case "--min6" -> { o.minLen6 = Integer.parseInt(v); i++; }
|
||||
case "--min7" -> { o.minLen7 = Integer.parseInt(v); i++; }
|
||||
case "--min8" -> { o.minLen8 = Integer.parseInt(v); i++; }
|
||||
case "--min2" -> {
|
||||
o.minLen2 = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--min3" -> {
|
||||
o.minLen3 = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--min4" -> {
|
||||
o.minLen4 = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--min5" -> {
|
||||
o.minLen5 = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--min6" -> {
|
||||
o.minLen6 = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--min7" -> {
|
||||
o.minLen7 = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--min8" -> {
|
||||
o.minLen8 = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
|
||||
case "-h", "--help" -> {
|
||||
System.out.println("""
|
||||
Usage:
|
||||
java puzzle.ThemePoolBuilder --words WORDS.txt [options]
|
||||
|
||||
Options:
|
||||
--endpoint http://HOST:1234/v1 (LM Studio)
|
||||
--feeds url1,url2
|
||||
--out ./out
|
||||
--bridge 5000
|
||||
--theme 300
|
||||
--related 1200
|
||||
--items 20 (per feed)
|
||||
--model <id> (recommended; skips /v1/models)
|
||||
--timeout 60 (seconds)
|
||||
--retries 4
|
||||
|
||||
# enforce minima per length in final pool
|
||||
--min2 4000
|
||||
--min3 7000
|
||||
--min4 9000
|
||||
--min5 0
|
||||
--min6 0
|
||||
--min7 0
|
||||
--min8 0
|
||||
""");
|
||||
Usage:
|
||||
java puzzle.ThemePoolBuilder --words WORDS.txt [options]
|
||||
|
||||
Options:
|
||||
--endpoint http://HOST:1234/v1 (LM Studio)
|
||||
--feeds url1,url2
|
||||
--out ./out
|
||||
--bridge 5000
|
||||
--theme 300
|
||||
--related 1200
|
||||
--items 20 (per feed)
|
||||
--model <id> (recommended; skips /v1/models)
|
||||
--timeout 60 (seconds)
|
||||
--retries 4
|
||||
|
||||
# enforce minima per length in final pool
|
||||
--min2 4000
|
||||
--min3 7000
|
||||
--min4 9000
|
||||
--min5 0
|
||||
--min6 0
|
||||
--min7 0
|
||||
--min8 0
|
||||
""");
|
||||
System.exit(0);
|
||||
}
|
||||
default -> throw new IllegalArgumentException("Unknown arg: " + a);
|
||||
@@ -223,64 +279,54 @@ public class ThemePoolBuilderLength {
|
||||
out.add(w);
|
||||
}
|
||||
|
||||
// 2) ensure DEFAULT_SHORTS are present even if absent in word-list.txt
|
||||
/* // 2) ensure DEFAULT_SHORTS are present even if absent in word-list.txt
|
||||
for (var raw : DEFAULT_SHORTS) {
|
||||
var w = normalizeDutchToken(raw);
|
||||
if (w == null) continue;
|
||||
if (idOf.containsKey(w)) continue;
|
||||
idOf.put(w, out.size());
|
||||
out.add(w);
|
||||
}
|
||||
|
||||
// 3) small extra injects (optional)
|
||||
var extraShorts = List.of(
|
||||
"AL","NU","TO","NA","BIJ","TOT","DAN","WAT","DAT",
|
||||
"IK","JE","WE","WIJ","JIJ","ZIJ","HIJ","HEN","ONS","JOU"
|
||||
);
|
||||
for (var wRaw : extraShorts) {
|
||||
var w = normalizeDutchToken(wRaw);
|
||||
if (w == null) continue;
|
||||
if (idOf.containsKey(w)) continue;
|
||||
idOf.put(w, out.size());
|
||||
out.add(w);
|
||||
}
|
||||
}*/
|
||||
|
||||
// Load LLM scores
|
||||
var llmScores = new HashMap<String, Integer>();
|
||||
try {
|
||||
var scoreLines = Files.readAllLines(Path.of("word_scores.csv"), StandardCharsets.UTF_8);
|
||||
var first = true;
|
||||
var first = true;
|
||||
for (var line : scoreLines) {
|
||||
if (first) { first = false; continue; }
|
||||
if (first) {
|
||||
first = false;
|
||||
continue;
|
||||
}
|
||||
var parts = line.split(",", 3);
|
||||
if (parts.length >= 3) {
|
||||
try {
|
||||
var word = parts[0].trim().toUpperCase(Locale.ROOT);
|
||||
var score = Integer.parseInt(parts[1].trim());
|
||||
var word = parts[0].trim().toUpperCase(Locale.ROOT);
|
||||
var score = Integer.parseInt(parts[1].trim());
|
||||
var status = parts[2].trim();
|
||||
if ("OK".equalsIgnoreCase(status)) {
|
||||
llmScores.put(word, score);
|
||||
}
|
||||
} catch (NumberFormatException ignored) {}
|
||||
} catch (NumberFormatException ignored) { }
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.err.println("Warning: word_scores.csv not found, using default scores.");
|
||||
}
|
||||
|
||||
|
||||
var n = out.size();
|
||||
var score = new int[n];
|
||||
var byLen = new BitSet[9];
|
||||
for (var L = 0; L <= 8; L++) byLen[L] = new BitSet(n);
|
||||
|
||||
for (var i = 0; i < n; i++) {
|
||||
var w = out.get(i);
|
||||
var w = out.get(i);
|
||||
var crossScore = crossabilityScore(w);
|
||||
var lScore = llmScores.getOrDefault(w, 5);
|
||||
var lScore = llmScores.getOrDefault(w, 5);
|
||||
// Prioritize simple words (high lScore) and long words.
|
||||
// lScore (1-10) adds up to 300 points (weight 30).
|
||||
// lScore (1-10) adds up to 1000 points (weight 100).
|
||||
// Length (2-8) adds up to 160 points (weight 20).
|
||||
score[i] = crossScore + (lScore * 30) + (w.length() * 20);
|
||||
score[i] = crossScore + (lScore * 100) + (w.length() * 30);
|
||||
byLen[w.length()].set(i);
|
||||
}
|
||||
|
||||
@@ -290,6 +336,7 @@ public class ThemePoolBuilderLength {
|
||||
// ---------------- RSS via curl (browser-like) ----------------
|
||||
|
||||
static final class RssItem {
|
||||
|
||||
final String title;
|
||||
final String desc;
|
||||
RssItem(String title, String desc) {
|
||||
@@ -407,6 +454,7 @@ public class ThemePoolBuilderLength {
|
||||
static SSLContext insecureSslContext() throws Exception {
|
||||
var trustAll = new TrustManager[]{
|
||||
new X509TrustManager() {
|
||||
|
||||
public X509Certificate[] getAcceptedIssuers() { return new X509Certificate[0]; }
|
||||
public void checkClientTrusted(X509Certificate[] chain, String authType) { }
|
||||
public void checkServerTrusted(X509Certificate[] chain, String authType) { }
|
||||
@@ -579,6 +627,16 @@ public class ThemePoolBuilderLength {
|
||||
var body = s.substring(a + 1, b);
|
||||
var out = new ArrayList<String>();
|
||||
|
||||
// If it's a simple comma-separated list without quotes (or with mixed quotes),
|
||||
// let's try a more robust approach.
|
||||
if (!body.contains("\"")) {
|
||||
for (var part : body.split(",")) {
|
||||
var trimmed = part.trim();
|
||||
if (!trimmed.isEmpty()) out.add(trimmed);
|
||||
}
|
||||
if (!out.isEmpty()) return out;
|
||||
}
|
||||
|
||||
var cur = new StringBuilder();
|
||||
boolean in = false, esc = false;
|
||||
|
||||
@@ -626,7 +684,7 @@ public class ThemePoolBuilderLength {
|
||||
static List<String> llmThemeWords(Opts o, String modelId, String rssText) throws Exception {
|
||||
var prompt = """
|
||||
Je genereert woorden voor een Nederlandse kruiswoordpuzzel.
|
||||
|
||||
|
||||
Regels:
|
||||
- Output MOET exact één JSON array zijn: ["WOORD", ...]
|
||||
- Alleen A-Z, 2-8 letters woorden
|
||||
@@ -635,10 +693,10 @@ public class ThemePoolBuilderLength {
|
||||
- Focus op zelfstandige naamwoorden/termen uit het nieuws en relevante Zweedse kruiswoordpuzzel koppelwoorden in het thema.
|
||||
- Lever %d THEMA-woorden en daarna %d GERELATEERDE woorden (totaal %d).
|
||||
- Voeg ook wat korte woorden/afkortingen toe (2-4 letters), maar houd het totaal gelijk.
|
||||
|
||||
|
||||
Nieuws (koppen/samenvattingen):
|
||||
%s
|
||||
""".formatted(o.themeN, o.relatedN, (o.themeN + o.relatedN), rssText.substring(0, 8000));
|
||||
""".formatted(o.themeN, o.relatedN, (o.themeN + o.relatedN), rssText.substring(0, Math.min(rssText.length(), 8000)));
|
||||
|
||||
var body = """
|
||||
{
|
||||
@@ -668,14 +726,20 @@ public class ThemePoolBuilderLength {
|
||||
|
||||
static BitSet buildBridgeBitmap(Lexicon lex, int bridgeN) {
|
||||
var n = lex.words.size();
|
||||
var ids = new Integer[n];
|
||||
for (var i = 0; i < n; i++) ids[i] = i;
|
||||
var ids = new ArrayList<Integer>(n);
|
||||
for (var i = 0; i < n; i++) {
|
||||
// Optionally filter out VERY complex words from the bridge (e.g. lScore < 3)
|
||||
// But since we sort by score (which is now dominated by lScore),
|
||||
// they will be at the very bottom anyway.
|
||||
if (lex.score[i] < 800) continue;
|
||||
ids.add(i);
|
||||
}
|
||||
|
||||
Arrays.sort(ids, (a, b) -> Integer.compare(lex.score[b], lex.score[a]));
|
||||
ids.sort((a, b) -> Integer.compare(lex.score[b], lex.score[a]));
|
||||
|
||||
var bs = new BitSet(n);
|
||||
var take = Math.min(bridgeN, n);
|
||||
for (var i = 0; i < take; i++) bs.set(ids[i]);
|
||||
var take = Math.min(bridgeN, ids.size());
|
||||
for (var i = 0; i < take; i++) bs.set(ids.get(i));
|
||||
return bs;
|
||||
}
|
||||
|
||||
@@ -710,6 +774,8 @@ public class ThemePoolBuilderLength {
|
||||
|
||||
var out = new ArrayList<String>(ids.size());
|
||||
for (var id : ids) {
|
||||
/* if (lex.score[id] < 680)
|
||||
continue;*/
|
||||
out.add(lex.words.get(id));
|
||||
}
|
||||
Files.write(path, out, StandardCharsets.UTF_8, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
|
||||
@@ -811,7 +877,7 @@ public class ThemePoolBuilderLength {
|
||||
System.out.println("Using model: " + modelId);
|
||||
|
||||
System.out.println("Generating theme words via LM Studio...");
|
||||
var llmWords = llmThemeWords(o, modelId, rssText.toString());
|
||||
List<String> llmWords = Arrays.asList();//llmThemeWords(o, modelId, rssText.toString());
|
||||
|
||||
// Normalize + keep only those present in master lexicon
|
||||
var themeKept = new LinkedHashSet<String>();
|
||||
@@ -843,13 +909,13 @@ public class ThemePoolBuilderLength {
|
||||
Date: %s
|
||||
Feeds: %s
|
||||
Model: %s
|
||||
|
||||
|
||||
Master size: %d
|
||||
Theme kept (in master): %d
|
||||
Bridge size: %d
|
||||
Shorts kept: %d
|
||||
Pool total: %d
|
||||
|
||||
|
||||
Enforced minima:
|
||||
2: %d
|
||||
3: %d
|
||||
@@ -858,10 +924,10 @@ public class ThemePoolBuilderLength {
|
||||
6: %d
|
||||
7: %d
|
||||
8: %d
|
||||
|
||||
|
||||
Counts per length (theme):
|
||||
%s
|
||||
|
||||
|
||||
Counts per length (pool):
|
||||
%s
|
||||
""".formatted(
|
||||
|
||||
Reference in New Issue
Block a user