Gather data

This commit is contained in:
mike
2026-01-04 01:37:42 +01:00
parent 795067472f
commit 3e25ce3e1f
22 changed files with 233 additions and 1414 deletions

View File

@@ -132,24 +132,22 @@ public class SwedishGenerator {
int[] data() { return a; } // note: may have extra capacity
}
static final class DictEntry {
static record DictEntry(ArrayList<String> words, IntList[][] pos) {
final ArrayList<String> words = new ArrayList<>();
final IntList[][] pos; // pos[i][letter] -> indices (sorted by insertion)
DictEntry(int L) {
pos = new IntList[L][26];
public DictEntry(int L) {
this(new ArrayList<>(), new IntList[L][26]);
for (var i = 0; i < L; i++) {
for (var j = 0; j < 26; j++) pos[i][j] = new IntList();
}
}
}
static record WordDifficulty(String word, int difficulty, int simpel, int score, int cross) {
static record WordDifficulty(String word, int difficulty, int simpel, int score, int cross, String clue) {
public WordDifficulty(String word, int simpel, int score) {
public WordDifficulty(String word, int simpel, int score, String clue) {
var difficulty1 = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
var crossScore = ThemePoolBuilderLength.crossabilityScore(word);
this(word, difficulty1, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15));
this(word, difficulty1, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15), clue);
// Prioritize simple words (high lScore) and long words.
// lScore (1-10) adds up to 1000 points (weight 100).
@@ -163,7 +161,6 @@ public class SwedishGenerator {
}
}
public static record Dict(Map<String, WordDifficulty> words,
HashMap<Integer, DictEntry> index,
HashMap<Integer, Integer> lenCounts) { }
@@ -193,8 +190,12 @@ public class SwedishGenerator {
// CSV has level 1-10. llmScores use 10-level.
score = 10 - Integer.parseInt(parts[1].trim());
simpel = Integer.parseInt(parts[2].trim());
var rawClue = parts[3].trim();
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
}
if (score >= 1)
map.put(s, new WordDifficulty(s, simpel, score));
map.put(s, new WordDifficulty(s, simpel, score, rawClue));
}
}
var words = map.values().stream().collect(Collectors.toCollection(ArrayList::new));
@@ -682,11 +683,10 @@ public class SwedishGenerator {
System.out.flush();
};
class Pick {
record Pick(Slot slot,
CandidateInfo info,
boolean done) {
Slot slot;
CandidateInfo info;
boolean done;
}
java.util.function.Supplier<Pick> chooseMRV = () -> {
@@ -699,22 +699,14 @@ public class SwedishGenerator {
var entry = dictIndex.get(s.len);
if (entry == null) {
var p = new Pick();
p.slot = null;
p.info = null;
p.done = false;
return p;
return new Pick(null, null, false);
}
var pat = patternForSlot(grid, s);
var info = candidateInfoForPattern(entry, pat);
if (info.count == 0) {
var p = new Pick();
p.slot = null;
p.info = null;
p.done = false;
return p;
return new Pick(null, null, false);
}
if (best == null
@@ -726,17 +718,11 @@ public class SwedishGenerator {
}
}
var p = new Pick();
if (best == null) {
p.slot = null;
p.info = null;
p.done = true;
return new Pick(null, null, true);
} else {
p.slot = best;
p.info = bestInfo;
p.done = false;
return new Pick(best, bestInfo, false);
}
return p;
};
final var MAX_TRIES_PER_SLOT = 2000;
@@ -868,9 +854,9 @@ public class SwedishGenerator {
public record PuzzleResult(Dict dict, char[][] mask, FillResult filled) { }
public static PuzzleResult generatePuzzle(Main.Opts opts) {
var tLoad0 = System.nanoTime();
var dict = loadWords(opts.wordsPath);
var tLoad1 = System.nanoTime();
var tLoad0 = System.nanoTime();
var dict = loadWords(opts.wordsPath);
var tLoad1 = System.nanoTime();
System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n %s words%n", (tLoad1 - tLoad0) / 1e9, dict.words.size());
if (opts.threads > 1) {