Gather data

This commit is contained in:
mike
2026-01-05 23:43:44 +01:00
parent f031591105
commit 5bba12caf8
3 changed files with 77 additions and 41 deletions

View File

@@ -5,7 +5,7 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.concurrent.*;
import java.util.function.Predicate;
import java.util.stream.Collectors;
/**
@@ -132,7 +132,7 @@ public class SwedishGenerator {
int[] data() { return a; } // note: may have extra capacity
}
static record DictEntry(ArrayList<String> words, IntList[][] pos) {
static record DictEntry(ArrayList<Lemma> words, IntList[][] pos) {
public DictEntry(int L) {
this(new ArrayList<>(), new IntList[L][26]);
@@ -142,14 +142,14 @@ public class SwedishGenerator {
}
}
static record WordDifficulty(String word, int difficulty, int simpel, int score, int cross, ArrayList<String> clue) {
static record Lemma(String word, int difficulty, int simpel, int score, int cross, ArrayList<String> clue) {
public WordDifficulty(String word, int simpel, int score, String clue) {
var difficulty1 = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
var crossScore = ThemePoolBuilderLength.crossabilityScore(word);
var list = new ArrayList<String>(10);
public Lemma(String word, int simpel, int score, String clue) {
var complex = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
var crossScore = ThemePoolBuilderLength.crossabilityScore(word);
var list = new ArrayList<String>(10);
list.add(clue);
this(word, difficulty1, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15), list);
this(word, complex, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15), list);
// Prioritize simple words (high lScore) and long words.
// lScore (1-10) adds up to 1000 points (weight 100).
@@ -161,9 +161,10 @@ public class SwedishGenerator {
// Length impact: up to 8 * 10 = 80
// Score impact: up to 10 * 15 = 150
}
char charAt(int idx) { return word.charAt(idx); }
}
public static record Dict(Map<String, WordDifficulty> words,
public static record Dict(Map<String, Lemma> words,
HashMap<Integer, DictEntry> index,
HashMap<Integer, Integer> lenCounts) { }
static Dict loadWords(String wordsPath) {
@@ -171,10 +172,11 @@ public class SwedishGenerator {
try {
raw = Files.readString(Path.of(wordsPath), StandardCharsets.UTF_8);
} catch (IOException e) {
e.printStackTrace();
raw = "WOORD,level_1_to_10,hint\nEU,2,hint\nUUR,2,hint\nAUTO,2,hint\nBOOM,2,hint\nHUIS,2,hint\nKAT,2,hint\nZEE,2,hint\nRODE,2,hint\nDRAAD,2,hint\nKENNIS,2,hint\nNETWERK,2,hint\nPAKTE,2,hint\n";
}
var map = new HashMap<String, WordDifficulty>();
var map = new HashMap<String, Lemma>();
boolean first = true;
for (var line : raw.split("\\R")) {
if (line.isBlank()) continue;
@@ -200,7 +202,7 @@ public class SwedishGenerator {
if (map.containsKey(s)) {
map.get(s).clue.add(rawClue);
} else {
map.put(s, new WordDifficulty(s, simpel, score, rawClue));
map.put(s, new Lemma(s, simpel, score, rawClue));
}
}
}
@@ -223,7 +225,7 @@ public class SwedishGenerator {
}
var idx = entry.words.size();
entry.words.add(w.word);
entry.words.add(w);
for (var i = 0; i < L; i++) {
var letter = w.word.charAt(i) - 'A';
@@ -618,7 +620,7 @@ public class SwedishGenerator {
return cross * 10 + s.len;
}
static Undo placeWord(char[][] grid, Slot s, String w) {
static Undo placeWord(char[][] grid, Slot s, Lemma w) {
var urs = new int[s.len];
var ucs = new int[s.len];
var up = new char[s.len];
@@ -648,7 +650,7 @@ public class SwedishGenerator {
}
static FillResult fillMask(Rng rng, char[][] mask, HashMap<Integer, DictEntry> dictIndex,
Map<String, WordDifficulty> llmScores,
Map<String, Lemma> llmScores,
int logEveryMs, int timeLimitMs, boolean verbose) {
var grid = deepCopyGrid(mask);
@@ -757,9 +759,9 @@ public class SwedishGenerator {
var entry = dictIndex.get(s.len);
var pat = patternForSlot(grid, s);
java.util.function.Function<String, Boolean> tryWord = (String w) -> {
Predicate<Lemma> tryWord = (Lemma w) -> {
if (w == null) return false;
if (used.contains(w)) return false;
if (used.contains(w.word())) return false;
for (var i = 0; i < pat.length; i++) {
if (pat[i] != 0 && pat[i] != w.charAt(i)) return false;
@@ -768,8 +770,8 @@ public class SwedishGenerator {
var undo = placeWord(grid, s, w);
if (undo == null) return false;
used.add(w);
assigned.put(k, w);
used.add(w.word());
assigned.put(k, w.word());
if (backtrack()) return true;
@@ -792,7 +794,7 @@ public class SwedishGenerator {
int idxInArray = (int) (r * r * r * L);
var idx = idxs[idxInArray];
var w = entry.words.get(idx);
if (tryWord.apply(w)) return true;
if (tryWord.test(w)) return true;
}
stats.backtracks++;
return false;
@@ -809,7 +811,7 @@ public class SwedishGenerator {
double r = rng.nextFloat();
int idxInArray = (int) (r * r * r * N);
var w = entry.words.get(idxInArray);
if (tryWord.apply(w)) return true;
if (tryWord.test(w)) return true;
}
stats.backtracks++;