Gather data

This commit is contained in:
mike
2026-01-06 00:41:42 +01:00
parent 7154ee757c
commit 42a69235d2
2 changed files with 27 additions and 31 deletions

View File

@@ -138,7 +138,7 @@ public class SwedishGenerator {
}
}
static record Lemma(int index, String word,int length, int difficulty, int simpel, int score, int cross, ArrayList<String> clue) {
static record Lemma(int index, String word, int length, int difficulty, int simpel, int score, int cross, ArrayList<String> clue) {
static int LEMMA_COUNTER = 0;
public Lemma(String word, int simpel, int score, String clue) {
@@ -146,7 +146,7 @@ public class SwedishGenerator {
var crossScore = ThemePoolBuilderLength.crossabilityScore(word);
var list = new ArrayList<String>(10);
list.add(clue);
this(++LEMMA_COUNTER, word,word.length(), complex, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15), list);
this(++LEMMA_COUNTER, word, word.length(), complex, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15), list);
// Prioritize simple words (high lScore) and long words.
// lScore (1-10) adds up to 1000 points (weight 100).
@@ -167,8 +167,8 @@ public class SwedishGenerator {
}
public static record Dict(Map<String, Lemma> words,
HashMap<Integer, DictEntry> index,
HashMap<Integer, Integer> lenCounts) { }
DictEntry[] index,
int[] lenCounts) { }
static Dict loadWords(String wordsPath) {
String raw;
try {
@@ -213,20 +213,17 @@ public class SwedishGenerator {
// Sort words by difficulty in ascending order
words.sort(Comparator.comparingInt(wd -> wd.simpel));
var index = new HashMap<Integer, DictEntry>();
var lenCounts = new HashMap<Integer, Integer>();
var lenCounts = new int[12];
var index = new DictEntry[12];
Arrays.setAll(index, i -> new DictEntry(i));
int maxLength = -1;
for (var w : words) {
var L = w.length();
lenCounts.put(L, lenCounts.getOrDefault(L, 0) + 1);
if (L > maxLength) maxLength = L;
lenCounts[L]++;
var entry = index.get(L);
if (entry == null) {
entry = new DictEntry(L);
index.put(L, entry);
}
var idx = entry.words.size();
var entry = index[L];
var idx = entry.words.size();
entry.words.add(w);
for (var i = 0; i < L; i++) {
@@ -349,7 +346,7 @@ public class SwedishGenerator {
// ---------------- FAST mask fitness ----------------
static long maskFitness(char[][] grid, HashMap<Integer, Integer> lenCounts) {
static long maskFitness(char[][] grid, int[] lenCounts) {
long penalty = 0;
var clueCount = 0;
@@ -371,7 +368,7 @@ public class SwedishGenerator {
if (s.len > MAX_LEN) penalty += 8000 + (long) (s.len - MAX_LEN) * 500L;
if (s.len >= MIN_LEN && s.len <= MAX_LEN) {
if (!lenCounts.containsKey(s.len)) penalty += 12000;
if (lenCounts[s.len]<=0) penalty += 12000;
}
for (var i = 0; i < s.len; i++) {
@@ -514,7 +511,7 @@ public class SwedishGenerator {
return out;
}
static char[][] hillclimb(Rng rng, char[][] start, HashMap<Integer, Integer> lenCounts, int limit) {
static char[][] hillclimb(Rng rng, char[][] start, int[] lenCounts, int limit) {
var best = deepCopyGrid(start);
var bestF = maskFitness(best, lenCounts);
var fails = 0;
@@ -539,7 +536,7 @@ public class SwedishGenerator {
return same / (double) (W * H);
}
static char[][] generateMask(Rng rng, HashMap<Integer, Integer> lenCounts, int popSize, int gens, boolean verbose) {
static char[][] generateMask(Rng rng, int[] lenCounts, int popSize, int gens, boolean verbose) {
if (verbose) System.out.println("generateMask init pop: " + popSize);
var pop = new ArrayList<char[][]>();
@@ -599,11 +596,11 @@ public class SwedishGenerator {
public static final class FillResult {
public boolean ok;
public char[][] grid;
public boolean ok;
public char[][] grid;
public HashMap<String, Lemma> clueMap;
public FillStats stats;
public double simplicity;
public FillStats stats;
public double simplicity;
}
record Undo(int[] rs, int[] cs, char[] prev, int n) {
@@ -653,8 +650,7 @@ public class SwedishGenerator {
for (var i = 0; i < u.n; i++) grid[u.rs[i]][u.cs[i]] = u.prev[i];
}
static FillResult fillMask(Rng rng, char[][] mask, HashMap<Integer, DictEntry> dictIndex,
Map<String, Lemma> llmScores,
static FillResult fillMask(Rng rng, char[][] mask, DictEntry[] dictIndex,
int logEveryMs, int timeLimitMs, boolean verbose) {
var grid = deepCopyGrid(mask);
@@ -710,7 +706,7 @@ public class SwedishGenerator {
var k = s.key();
if (assigned.containsKey(k)) continue;
var entry = dictIndex.get(s.len);
var entry = dictIndex[s.len];
if (entry == null) {
return new Pick(null, null, false);
}
@@ -760,7 +756,7 @@ public class SwedishGenerator {
var s = pick.slot;
var k = s.key();
var entry = dictIndex.get(s.len);
var entry = dictIndex[s.len];
var pat = patternForSlot(grid, s);
Predicate<Lemma> tryWord = (Lemma w) -> {
@@ -783,7 +779,7 @@ public class SwedishGenerator {
if (backtrack()) return true;
assigned.remove(k);
used.set(w.index,false);
used.set(w.index, false);
undoPlace(grid, undo);
return false;
};