update them

This commit is contained in:
mike
2025-12-21 19:42:20 +01:00
parent e2bad52d1f
commit 22133e86f0
11 changed files with 17859 additions and 7169 deletions

View File

@@ -0,0 +1,245 @@
{
"date": "2025-12-21",
"theme": "algemeen-1766342006477",
"difficulty": 1,
"rewards": {
"coins": 50,
"stars": 2,
"hints": 1
},
"gridv2": [
"############",
"############",
"##DE####RA##",
"##IDRANIEM##",
"#####CORTES#",
"#####ELAINE#",
"##GM#T#KUNT#",
"##AM#A#ENER#",
"###MEA#ESMA#",
"###DATES####",
"############"
],
"words": [
{
"word": "DE",
"clue": "DE",
"startRow": 2,
"startCol": 2,
"direction": "horizontal",
"answer": "DE",
"arrowRow": 2,
"arrowCol": 1
},
{
"word": "ACETAAT",
"clue": "ACETAAT",
"startRow": 3,
"startCol": 5,
"direction": "vertical",
"answer": "ACETAAT",
"arrowRow": 2,
"arrowCol": 5
},
{
"word": "NOL",
"clue": "NOL",
"startRow": 3,
"startCol": 6,
"direction": "vertical",
"answer": "NOL",
"arrowRow": 2,
"arrowCol": 6
},
{
"word": "IRAKEES",
"clue": "IRAKEES",
"startRow": 3,
"startCol": 7,
"direction": "vertical",
"answer": "IRAKEES",
"arrowRow": 2,
"arrowCol": 7
},
{
"word": "RA",
"clue": "RA",
"startRow": 2,
"startCol": 8,
"direction": "horizontal",
"answer": "RA",
"arrowRow": 2,
"arrowCol": 7
},
{
"word": "IDRANIEM",
"clue": "IDRANIEM",
"startRow": 3,
"startCol": 2,
"direction": "horizontal",
"answer": "IDRANIEM",
"arrowRow": 3,
"arrowCol": 1
},
{
"word": "DI",
"clue": "DI",
"startRow": 2,
"startCol": 2,
"direction": "vertical",
"answer": "DI",
"arrowRow": 1,
"arrowCol": 2
},
{
"word": "ED",
"clue": "ED",
"startRow": 2,
"startCol": 3,
"direction": "vertical",
"answer": "ED",
"arrowRow": 1,
"arrowCol": 3
},
{
"word": "CORTES",
"clue": "CORTES",
"startRow": 4,
"startCol": 5,
"direction": "horizontal",
"answer": "CORTES",
"arrowRow": 4,
"arrowCol": 4
},
{
"word": "GA",
"clue": "GA",
"startRow": 6,
"startCol": 2,
"direction": "vertical",
"answer": "GA",
"arrowRow": 5,
"arrowCol": 2
},
{
"word": "MMMD",
"clue": "MMMD",
"startRow": 6,
"startCol": 3,
"direction": "vertical",
"answer": "MMMD",
"arrowRow": 5,
"arrowCol": 3
},
{
"word": "ELAINE",
"clue": "ELAINE",
"startRow": 5,
"startCol": 5,
"direction": "horizontal",
"answer": "ELAINE",
"arrowRow": 5,
"arrowCol": 4
},
{
"word": "GM",
"clue": "GM",
"startRow": 6,
"startCol": 2,
"direction": "horizontal",
"answer": "GM",
"arrowRow": 6,
"arrowCol": 1
},
{
"word": "KUNT",
"clue": "KUNT",
"startRow": 6,
"startCol": 7,
"direction": "horizontal",
"answer": "KUNT",
"arrowRow": 6,
"arrowCol": 6
},
{
"word": "AM",
"clue": "AM",
"startRow": 7,
"startCol": 2,
"direction": "horizontal",
"answer": "AM",
"arrowRow": 7,
"arrowCol": 1
},
{
"word": "ENER",
"clue": "ENER",
"startRow": 7,
"startCol": 7,
"direction": "horizontal",
"answer": "ENER",
"arrowRow": 7,
"arrowCol": 6
},
{
"word": "MEA",
"clue": "MEA",
"startRow": 8,
"startCol": 3,
"direction": "horizontal",
"answer": "MEA",
"arrowRow": 8,
"arrowCol": 2
},
{
"word": "ESMA",
"clue": "ESMA",
"startRow": 8,
"startCol": 7,
"direction": "horizontal",
"answer": "ESMA",
"arrowRow": 8,
"arrowCol": 6
},
{
"word": "DATES",
"clue": "DATES",
"startRow": 9,
"startCol": 3,
"direction": "horizontal",
"answer": "DATES",
"arrowRow": 9,
"arrowCol": 2
},
{
"word": "RETIUNS",
"clue": "RETIUNS",
"startRow": 2,
"startCol": 8,
"direction": "vertical",
"answer": "RETIUNS",
"arrowRow": 1,
"arrowCol": 8
},
{
"word": "AMENNEM",
"clue": "AMENNEM",
"startRow": 2,
"startCol": 9,
"direction": "vertical",
"answer": "AMENNEM",
"arrowRow": 1,
"arrowCol": 9
},
{
"word": "SETRA",
"clue": "SETRA",
"startRow": 4,
"startCol": 10,
"direction": "vertical",
"answer": "SETRA",
"arrowRow": 3,
"arrowCol": 10
}
]
}

View File

@@ -0,0 +1,245 @@
{
"date": "2025-12-21",
"theme": "algemeen-1766342178309",
"difficulty": 1,
"rewards": {
"coins": 50,
"stars": 2,
"hints": 1
},
"gridv2": [
"############",
"############",
"##XD####LA##",
"##IMUOLLAH##",
"#####NIETTE#",
"#####GEINIG#",
"##BM#E#DELI#",
"##EM#V#DIAN#",
"###CIA#ETTE#",
"###DELEN####",
"############"
],
"words": [
{
"word": "XD",
"clue": "XD",
"startRow": 2,
"startCol": 2,
"direction": "horizontal",
"answer": "XD",
"arrowRow": 2,
"arrowCol": 1
},
{
"word": "ONGEVAL",
"clue": "ONGEVAL",
"startRow": 3,
"startCol": 5,
"direction": "vertical",
"answer": "ONGEVAL",
"arrowRow": 2,
"arrowCol": 5
},
{
"word": "LIE",
"clue": "LIE",
"startRow": 3,
"startCol": 6,
"direction": "vertical",
"answer": "LIE",
"arrowRow": 2,
"arrowCol": 6
},
{
"word": "LEIDDEN",
"clue": "LEIDDEN",
"startRow": 3,
"startCol": 7,
"direction": "vertical",
"answer": "LEIDDEN",
"arrowRow": 2,
"arrowCol": 7
},
{
"word": "LA",
"clue": "LA",
"startRow": 2,
"startCol": 8,
"direction": "horizontal",
"answer": "LA",
"arrowRow": 2,
"arrowCol": 7
},
{
"word": "IMUOLLAH",
"clue": "IMUOLLAH",
"startRow": 3,
"startCol": 2,
"direction": "horizontal",
"answer": "IMUOLLAH",
"arrowRow": 3,
"arrowCol": 1
},
{
"word": "XI",
"clue": "XI",
"startRow": 2,
"startCol": 2,
"direction": "vertical",
"answer": "XI",
"arrowRow": 1,
"arrowCol": 2
},
{
"word": "DM",
"clue": "DM",
"startRow": 2,
"startCol": 3,
"direction": "vertical",
"answer": "DM",
"arrowRow": 1,
"arrowCol": 3
},
{
"word": "NIETTE",
"clue": "NIETTE",
"startRow": 4,
"startCol": 5,
"direction": "horizontal",
"answer": "NIETTE",
"arrowRow": 4,
"arrowCol": 4
},
{
"word": "BE",
"clue": "BE",
"startRow": 6,
"startCol": 2,
"direction": "vertical",
"answer": "BE",
"arrowRow": 5,
"arrowCol": 2
},
{
"word": "MMCD",
"clue": "MMCD",
"startRow": 6,
"startCol": 3,
"direction": "vertical",
"answer": "MMCD",
"arrowRow": 5,
"arrowCol": 3
},
{
"word": "GEINIG",
"clue": "GEINIG",
"startRow": 5,
"startCol": 5,
"direction": "horizontal",
"answer": "GEINIG",
"arrowRow": 5,
"arrowCol": 4
},
{
"word": "BM",
"clue": "BM",
"startRow": 6,
"startCol": 2,
"direction": "horizontal",
"answer": "BM",
"arrowRow": 6,
"arrowCol": 1
},
{
"word": "DELI",
"clue": "DELI",
"startRow": 6,
"startCol": 7,
"direction": "horizontal",
"answer": "DELI",
"arrowRow": 6,
"arrowCol": 6
},
{
"word": "EM",
"clue": "EM",
"startRow": 7,
"startCol": 2,
"direction": "horizontal",
"answer": "EM",
"arrowRow": 7,
"arrowCol": 1
},
{
"word": "DIAN",
"clue": "DIAN",
"startRow": 7,
"startCol": 7,
"direction": "horizontal",
"answer": "DIAN",
"arrowRow": 7,
"arrowCol": 6
},
{
"word": "CIA",
"clue": "CIA",
"startRow": 8,
"startCol": 3,
"direction": "horizontal",
"answer": "CIA",
"arrowRow": 8,
"arrowCol": 2
},
{
"word": "ETTE",
"clue": "ETTE",
"startRow": 8,
"startCol": 7,
"direction": "horizontal",
"answer": "ETTE",
"arrowRow": 8,
"arrowCol": 6
},
{
"word": "DELEN",
"clue": "DELEN",
"startRow": 9,
"startCol": 3,
"direction": "horizontal",
"answer": "DELEN",
"arrowRow": 9,
"arrowCol": 2
},
{
"word": "LATNEIT",
"clue": "LATNEIT",
"startRow": 2,
"startCol": 8,
"direction": "vertical",
"answer": "LATNEIT",
"arrowRow": 1,
"arrowCol": 8
},
{
"word": "AHTILAT",
"clue": "AHTILAT",
"startRow": 2,
"startCol": 9,
"direction": "vertical",
"answer": "AHTILAT",
"arrowRow": 1,
"arrowCol": 9
},
{
"word": "EGINE",
"clue": "EGINE",
"startRow": 4,
"startCol": 10,
"direction": "vertical",
"answer": "EGINE",
"arrowRow": 3,
"arrowCol": 10
}
]
}

View File

@@ -0,0 +1,245 @@
{
"date": "2025-12-21",
"theme": "algemeen-1766342418066",
"difficulty": 1,
"rewards": {
"coins": 50,
"stars": 2,
"hints": 1
},
"gridv2": [
"############",
"############",
"##NA####ST##",
"##IAREOMAS##",
"#####EDITIE#",
"#####NAASTE#",
"##KI#D#SIEM#",
"##AP#E#MEIR#",
"###CAR#ERPE#",
"###CLEAN####",
"############"
],
"words": [
{
"word": "NA",
"clue": "NA",
"startRow": 2,
"startCol": 2,
"direction": "horizontal",
"answer": "NA",
"arrowRow": 2,
"arrowCol": 1
},
{
"word": "EENDERE",
"clue": "EENDERE",
"startRow": 3,
"startCol": 5,
"direction": "vertical",
"answer": "EENDERE",
"arrowRow": 2,
"arrowCol": 5
},
{
"word": "ODA",
"clue": "ODA",
"startRow": 3,
"startCol": 6,
"direction": "vertical",
"answer": "ODA",
"arrowRow": 2,
"arrowCol": 6
},
{
"word": "MIASMEN",
"clue": "MIASMEN",
"startRow": 3,
"startCol": 7,
"direction": "vertical",
"answer": "MIASMEN",
"arrowRow": 2,
"arrowCol": 7
},
{
"word": "ST",
"clue": "ST",
"startRow": 2,
"startCol": 8,
"direction": "horizontal",
"answer": "ST",
"arrowRow": 2,
"arrowCol": 7
},
{
"word": "IAREOMAS",
"clue": "IAREOMAS",
"startRow": 3,
"startCol": 2,
"direction": "horizontal",
"answer": "IAREOMAS",
"arrowRow": 3,
"arrowCol": 1
},
{
"word": "NI",
"clue": "NI",
"startRow": 2,
"startCol": 2,
"direction": "vertical",
"answer": "NI",
"arrowRow": 1,
"arrowCol": 2
},
{
"word": "AA",
"clue": "AA",
"startRow": 2,
"startCol": 3,
"direction": "vertical",
"answer": "AA",
"arrowRow": 1,
"arrowCol": 3
},
{
"word": "EDITIE",
"clue": "EDITIE",
"startRow": 4,
"startCol": 5,
"direction": "horizontal",
"answer": "EDITIE",
"arrowRow": 4,
"arrowCol": 4
},
{
"word": "KA",
"clue": "KA",
"startRow": 6,
"startCol": 2,
"direction": "vertical",
"answer": "KA",
"arrowRow": 5,
"arrowCol": 2
},
{
"word": "IPCC",
"clue": "IPCC",
"startRow": 6,
"startCol": 3,
"direction": "vertical",
"answer": "IPCC",
"arrowRow": 5,
"arrowCol": 3
},
{
"word": "NAASTE",
"clue": "NAASTE",
"startRow": 5,
"startCol": 5,
"direction": "horizontal",
"answer": "NAASTE",
"arrowRow": 5,
"arrowCol": 4
},
{
"word": "KI",
"clue": "KI",
"startRow": 6,
"startCol": 2,
"direction": "horizontal",
"answer": "KI",
"arrowRow": 6,
"arrowCol": 1
},
{
"word": "SIEM",
"clue": "SIEM",
"startRow": 6,
"startCol": 7,
"direction": "horizontal",
"answer": "SIEM",
"arrowRow": 6,
"arrowCol": 6
},
{
"word": "AP",
"clue": "AP",
"startRow": 7,
"startCol": 2,
"direction": "horizontal",
"answer": "AP",
"arrowRow": 7,
"arrowCol": 1
},
{
"word": "MEIR",
"clue": "MEIR",
"startRow": 7,
"startCol": 7,
"direction": "horizontal",
"answer": "MEIR",
"arrowRow": 7,
"arrowCol": 6
},
{
"word": "CAR",
"clue": "CAR",
"startRow": 8,
"startCol": 3,
"direction": "horizontal",
"answer": "CAR",
"arrowRow": 8,
"arrowCol": 2
},
{
"word": "ERPE",
"clue": "ERPE",
"startRow": 8,
"startCol": 7,
"direction": "horizontal",
"answer": "ERPE",
"arrowRow": 8,
"arrowCol": 6
},
{
"word": "CLEAN",
"clue": "CLEAN",
"startRow": 9,
"startCol": 3,
"direction": "horizontal",
"answer": "CLEAN",
"arrowRow": 9,
"arrowCol": 2
},
{
"word": "SATSIER",
"clue": "SATSIER",
"startRow": 2,
"startCol": 8,
"direction": "vertical",
"answer": "SATSIER",
"arrowRow": 1,
"arrowCol": 8
},
{
"word": "TSITEIP",
"clue": "TSITEIP",
"startRow": 2,
"startCol": 9,
"direction": "vertical",
"answer": "TSITEIP",
"arrowRow": 1,
"arrowCol": 9
},
{
"word": "EEMRE",
"clue": "EEMRE",
"startRow": 4,
"startCol": 10,
"direction": "vertical",
"answer": "EEMRE",
"arrowRow": 3,
"arrowCol": 10
}
]
}

View File

@@ -0,0 +1,245 @@
{
"date": "2025-12-21",
"theme": "algemeen-1766342459539",
"difficulty": 1,
"rewards": {
"coins": 50,
"stars": 2,
"hints": 1
},
"gridv2": [
"############",
"############",
"##NA####ST##",
"##IAREOMAS##",
"#####EDITIE#",
"#####NAASTE#",
"##KI#D#SIEM#",
"##AP#E#MEIR#",
"###CAR#ERPE#",
"###CLEAN####",
"############"
],
"words": [
{
"word": "NA",
"clue": "NA",
"startRow": 2,
"startCol": 2,
"direction": "horizontal",
"answer": "NA",
"arrowRow": 2,
"arrowCol": 1
},
{
"word": "EENDERE",
"clue": "EENDERE",
"startRow": 3,
"startCol": 5,
"direction": "vertical",
"answer": "EENDERE",
"arrowRow": 2,
"arrowCol": 5
},
{
"word": "ODA",
"clue": "ODA",
"startRow": 3,
"startCol": 6,
"direction": "vertical",
"answer": "ODA",
"arrowRow": 2,
"arrowCol": 6
},
{
"word": "MIASMEN",
"clue": "MIASMEN",
"startRow": 3,
"startCol": 7,
"direction": "vertical",
"answer": "MIASMEN",
"arrowRow": 2,
"arrowCol": 7
},
{
"word": "ST",
"clue": "ST",
"startRow": 2,
"startCol": 8,
"direction": "horizontal",
"answer": "ST",
"arrowRow": 2,
"arrowCol": 7
},
{
"word": "IAREOMAS",
"clue": "IAREOMAS",
"startRow": 3,
"startCol": 2,
"direction": "horizontal",
"answer": "IAREOMAS",
"arrowRow": 3,
"arrowCol": 1
},
{
"word": "NI",
"clue": "NI",
"startRow": 2,
"startCol": 2,
"direction": "vertical",
"answer": "NI",
"arrowRow": 1,
"arrowCol": 2
},
{
"word": "AA",
"clue": "AA",
"startRow": 2,
"startCol": 3,
"direction": "vertical",
"answer": "AA",
"arrowRow": 1,
"arrowCol": 3
},
{
"word": "EDITIE",
"clue": "EDITIE",
"startRow": 4,
"startCol": 5,
"direction": "horizontal",
"answer": "EDITIE",
"arrowRow": 4,
"arrowCol": 4
},
{
"word": "KA",
"clue": "KA",
"startRow": 6,
"startCol": 2,
"direction": "vertical",
"answer": "KA",
"arrowRow": 5,
"arrowCol": 2
},
{
"word": "IPCC",
"clue": "IPCC",
"startRow": 6,
"startCol": 3,
"direction": "vertical",
"answer": "IPCC",
"arrowRow": 5,
"arrowCol": 3
},
{
"word": "NAASTE",
"clue": "NAASTE",
"startRow": 5,
"startCol": 5,
"direction": "horizontal",
"answer": "NAASTE",
"arrowRow": 5,
"arrowCol": 4
},
{
"word": "KI",
"clue": "KI",
"startRow": 6,
"startCol": 2,
"direction": "horizontal",
"answer": "KI",
"arrowRow": 6,
"arrowCol": 1
},
{
"word": "SIEM",
"clue": "SIEM",
"startRow": 6,
"startCol": 7,
"direction": "horizontal",
"answer": "SIEM",
"arrowRow": 6,
"arrowCol": 6
},
{
"word": "AP",
"clue": "AP",
"startRow": 7,
"startCol": 2,
"direction": "horizontal",
"answer": "AP",
"arrowRow": 7,
"arrowCol": 1
},
{
"word": "MEIR",
"clue": "MEIR",
"startRow": 7,
"startCol": 7,
"direction": "horizontal",
"answer": "MEIR",
"arrowRow": 7,
"arrowCol": 6
},
{
"word": "CAR",
"clue": "CAR",
"startRow": 8,
"startCol": 3,
"direction": "horizontal",
"answer": "CAR",
"arrowRow": 8,
"arrowCol": 2
},
{
"word": "ERPE",
"clue": "ERPE",
"startRow": 8,
"startCol": 7,
"direction": "horizontal",
"answer": "ERPE",
"arrowRow": 8,
"arrowCol": 6
},
{
"word": "CLEAN",
"clue": "CLEAN",
"startRow": 9,
"startCol": 3,
"direction": "horizontal",
"answer": "CLEAN",
"arrowRow": 9,
"arrowCol": 2
},
{
"word": "SATSIER",
"clue": "SATSIER",
"startRow": 2,
"startCol": 8,
"direction": "vertical",
"answer": "SATSIER",
"arrowRow": 1,
"arrowCol": 8
},
{
"word": "TSITEIP",
"clue": "TSITEIP",
"startRow": 2,
"startCol": 9,
"direction": "vertical",
"answer": "TSITEIP",
"arrowRow": 1,
"arrowCol": 9
},
{
"word": "EEMRE",
"clue": "EEMRE",
"startRow": 4,
"startCol": 10,
"direction": "vertical",
"answer": "EEMRE",
"arrowRow": 3,
"arrowCol": 10
}
]
}

13770
out/pool.txt

File diff suppressed because it is too large Load Diff

View File

@@ -3,10 +3,10 @@ Feeds: https://feeds.nos.nl/nosnieuwsalgemeen, https://feeds.nos.nl/nosnieuwstec
Model: mistralai/mistral-nemo-instruct-2407
Master size: 91892
Theme kept (in master): 36
Theme kept (in master): 0
Bridge size: 32000
Shorts kept: 133
Pool total: 38805
Pool total: 38391
Enforced minima:
2: 4000
@@ -19,20 +19,20 @@ Enforced minima:
Counts per length (theme):
2: 0
3: 1
4: 6
5: 8
6: 11
7: 4
8: 6
3: 0
4: 0
5: 0
6: 0
7: 0
8: 0
Counts per length (pool):
2: 248
3: 1666
4: 4850
5: 17
6: 432
7: 7810
8: 23782
5: 795
6: 1987
7: 8550
8: 20295

File diff suppressed because one or more lines are too long

View File

@@ -1,36 +0,0 @@
KANAAL
STEKEN
RECORD
RUIM
ZAKEN
BOTEN
ENGELAND
FRANSE
BRIEFJES
EXPLOSIE
HAAGS
INGANG
GLAZEN
DEUR
TIJD
BETALEN
KESTER
MAARTEN
OMROEP
WEST
RAADSEL
DADER
SPOREN
TEGEN
POLITIE
CAMIEL
JANSEN
MUZIKALE
DIEPGANG
MOTTO
KLANK
KLOOF
PLATFORM
WEG
BUMA
CULT

View File

@@ -133,14 +133,40 @@ public class SwedishGenerator {
final String word;
final int difficulty;
final int score;
public WordDifficulty(String word) {
public WordDifficulty(String word, int score) {
this.word = word;
this.score = score;
// Simple heuristic for difficulty: shorter words have lower difficulty
this.difficulty = -Math.min(40,word.length() * 5);
// We combine this with the score (10 = common/simple, 1 = rare/hard)
// Lower difficulty value means it is tried EARLIER.
// We want LONGER and SIMPLER words to be tried earlier.
// Increasing simplicity weight: score (1-10) now has max impact of 50.
this.difficulty = -Math.min(40, word.length() * 5) - (score * 5);
}
}
static Map<String, Integer> loadScores() {
var scores = new HashMap<String, Integer>();
try {
var lines = Files.readAllLines(Path.of("word_scores.csv"), StandardCharsets.UTF_8);
var first = true;
for (var line : lines) {
if (first) { first = false; continue; }
var parts = line.split(",", 3);
if (parts.length >= 2) {
try {
scores.put(parts[0].trim().toUpperCase(Locale.ROOT), Integer.parseInt(parts[1].trim()));
} catch (NumberFormatException ignored) {}
}
}
} catch (IOException e) {
System.err.println("Warning: word_scores.csv not found, using default scores.");
}
return scores;
}
static final class Dict {
final ArrayList<String> words;
@@ -160,11 +186,13 @@ public class SwedishGenerator {
raw = "EU\nUUR\nAUTO\nBOOM\nHUIS\nKAT\nZEE\nRODE\nDRAAD\nKENNIS\nNETWERK\nPAKTE\n";
}
var words = new ArrayList<WordDifficulty>();
var llmScores = loadScores();
var words = new ArrayList<WordDifficulty>();
for (var line : raw.split("\\R")) {
var s = line.trim().toUpperCase(Locale.ROOT);
if (s.matches("^[A-Z]{2,8}$")) {
words.add(new WordDifficulty(s));
var score = llmScores.getOrDefault(s, 5); // Default to middle
words.add(new WordDifficulty(s, score));
}
}
@@ -254,9 +282,10 @@ public class SwedishGenerator {
ci.count = curLen;
return ci;
}
static int indexToDifficulty(DictEntry entry, int index) {
static int indexToDifficulty(DictEntry entry, int index, Map<String, Integer> llmScores) {
var word = entry.words.get(index);
return new WordDifficulty(word).difficulty;
var score = llmScores.getOrDefault(word, 5);
return new WordDifficulty(word, score).difficulty;
}
@@ -729,7 +758,7 @@ public class SwedishGenerator {
return p;
};
final var MAX_TRIES_PER_SLOT = 500;
final var MAX_TRIES_PER_SLOT = 2000;
class Solver {
@@ -783,10 +812,9 @@ public class SwedishGenerator {
// When picking words from sorted indices, we want to favor the beginning
// (lower difficulty) but still have some randomness.
for (var t = 0; t < tries; t++) {
// Power law or similar to favor lower indices:
// pick a random double in [0, 1), square it to bias towards 0.
// Bias strongly towards lower indices (simpler words) using r^3
double r = rng.nextFloat();
int idxInArray = (int) (r * r * L);
int idxInArray = (int) (r * r * r * L);
var idx = idxs[idxInArray];
var w = entry.words.get(idx);
if (tryWord.apply(w)) return true;
@@ -804,7 +832,7 @@ public class SwedishGenerator {
var tries = Math.min(MAX_TRIES_PER_SLOT, N);
for (var t = 0; t < tries; t++) {
double r = rng.nextFloat();
int idxInArray = (int) (r * r * N);
int idxInArray = (int) (r * r * r * N);
var w = entry.words.get(idxInArray);
if (tryWord.apply(w)) return true;
}
@@ -863,7 +891,7 @@ public class SwedishGenerator {
System.out.printf(Locale.ROOT, "MASK: %.3fs%n", (tMask1 - tMask0) / 1e9);
var tFill0 = System.nanoTime();
var filled = fillMask(rng, mask, dict.index, 200, 30000);
var filled = fillMask(rng, mask, dict.index, 200, 60000);
var tFill1 = System.nanoTime();
System.out.printf(Locale.ROOT, "FILL: %.3fms%n", (tFill1 - tFill0) / 1e6);

View File

@@ -245,6 +245,24 @@ public class ThemePoolBuilderLength {
out.add(w);
}
// Load LLM scores
var llmScores = new HashMap<String, Integer>();
try {
var scoreLines = Files.readAllLines(Path.of("word_scores.csv"), StandardCharsets.UTF_8);
var first = true;
for (var line : scoreLines) {
if (first) { first = false; continue; }
var parts = line.split(",", 3);
if (parts.length >= 2) {
try {
llmScores.put(parts[0].trim().toUpperCase(Locale.ROOT), Integer.parseInt(parts[1].trim()));
} catch (NumberFormatException ignored) {}
}
}
} catch (IOException e) {
System.err.println("Warning: word_scores.csv not found, using default scores.");
}
var n = out.size();
var score = new int[n];
var byLen = new BitSet[9];
@@ -252,7 +270,11 @@ public class ThemePoolBuilderLength {
for (var i = 0; i < n; i++) {
var w = out.get(i);
score[i] = crossabilityScore(w);
var crossScore = crossabilityScore(w);
var lScore = llmScores.getOrDefault(w, 5);
// Prioritize simple words (high lScore) and high crossability
// Increased simplicity weight: lScore (1-10) now adds up to 200 points.
score[i] = crossScore + (lScore * 20);
byLen[w.length()].set(i);
}

File diff suppressed because it is too large Load Diff