Gather data

This commit is contained in:
mike
2026-01-06 02:22:26 +01:00
parent 42a69235d2
commit f031e97a58
5 changed files with 143 additions and 164 deletions

View File

@@ -6,7 +6,6 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.function.Predicate;
import java.util.stream.Collectors;
/**
* SwedishGenerator.java
@@ -19,6 +18,7 @@ import java.util.stream.Collectors;
public class SwedishGenerator {
static final int W = 9, H = 8,
SIZE = W * H,
CLUE_SIZE = 4,
SIMPLICITY_DEFAULT_SCORE = 2;
static final int MIN_LEN = 2, MAX_LEN = 8;
@@ -138,15 +138,14 @@ public class SwedishGenerator {
}
}
static record Lemma(int index, String word, int length, int difficulty, int simpel, int score, int cross, ArrayList<String> clue) {
static record Lemma(int index, String word, int length, int difficulty, int simpel, int score, ArrayList<String> clue) {
static int LEMMA_COUNTER = 0;
public Lemma(String word, int simpel, int score, String clue) {
var complex = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
var crossScore = ThemePoolBuilderLength.crossabilityScore(word);
var list = new ArrayList<String>(10);
public Lemma(int index, String word, int simpel, int score, String clue) {
var complex = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
var list = new ArrayList<String>(10);
list.add(clue);
this(++LEMMA_COUNTER, word, word.length(), complex, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15), list);
this(index, word, word.length(), complex, simpel, score, list);
// Prioritize simple words (high lScore) and long words.
// lScore (1-10) adds up to 1000 points (weight 100).
@@ -158,17 +157,46 @@ public class SwedishGenerator {
// Length impact: up to 8 * 10 = 80
// Score impact: up to 10 * 15 = 150
}
char charAt(int idx) { return word.charAt(idx); }
@Override public int hashCode() { return index; }
public Lemma(String word, int simpel, int score, String clue) { this(LEMMA_COUNTER++, word, simpel, score, clue); }
char charAt(int idx) { return word.charAt(idx); }
@Override public int hashCode() { return index; }
@Override public boolean equals(Object o) {
if (o == this) return true;
return o instanceof Lemma l && l.index == index;
}
}
public static record Dict(Map<String, Lemma> words,
public static record Dict(Lemma[] wordz,
DictEntry[] index,
int[] lenCounts) { }
int[] lenCounts) {
public Dict(Lemma[] wordz) {
// Sort words by difficulty in ascending order
Lemma[] words = wordz.clone();
Arrays.sort(words, Comparator.comparingInt(wd -> wd.simpel));
var lenCounts = new int[12];
var index = new DictEntry[12];
Arrays.setAll(index, i -> new DictEntry(i));
int maxLength = -1;
for (var w : words) {
var L = w.length();
if (L > maxLength) maxLength = L;
lenCounts[L]++;
var entry = index[L];
var idx = entry.words.size();
entry.words.add(w);
for (var i = 0; i < L; i++) {
var letter = w.charAt(i) - 'A';
if (letter >= 0 && letter < 26) entry.pos[i][letter].add(idx);
else throw new RuntimeException("Illegal letter: " + letter + " in word " + w);
}
}
this(wordz, index, lenCounts);
}
}
static Dict loadWords(String wordsPath) {
String raw;
try {
@@ -191,11 +219,10 @@ public class SwedishGenerator {
first = false;
var s = word.toUpperCase(Locale.ROOT);
if (s.matches("^[A-Z]{2,8}$")) {
int score = SIMPLICITY_DEFAULT_SCORE;
int simpel = 0;
// CSV has level 1-10. llmScores use 10-level.
score = 10 - Integer.parseInt(parts[1].trim());
simpel = Integer.parseInt(parts[2].trim());
int score = 10 - Integer.parseInt(parts[1].trim());
int simpel = Integer.parseInt(parts[2].trim());
var rawClue = parts[3].trim();
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
@@ -207,33 +234,12 @@ public class SwedishGenerator {
map.put(s, new Lemma(s, simpel, score, rawClue));
}
}
}
}
var words = map.values().stream().collect(Collectors.toCollection(ArrayList::new));
// Sort words by difficulty in ascending order
words.sort(Comparator.comparingInt(wd -> wd.simpel));
var lenCounts = new int[12];
var index = new DictEntry[12];
Arrays.setAll(index, i -> new DictEntry(i));
int maxLength = -1;
for (var w : words) {
var L = w.length();
if (L > maxLength) maxLength = L;
lenCounts[L]++;
var entry = index[L];
var idx = entry.words.size();
entry.words.add(w);
for (var i = 0; i < L; i++) {
var letter = w.charAt(i) - 'A';
if (letter >= 0 && letter < 26) entry.pos[i][letter].add(idx);
else throw new RuntimeException("Illegal letter: " + letter + " in word " + w);
} else {
System.err.println("Invalid word: " + line);
}
}
return new Dict(map, index, lenCounts);
return new Dict(map.values().toArray(Lemma[]::new));
}
static int[] intersectSorted(int[] a, int aLen, int[] b, int bLen) {
@@ -284,13 +290,9 @@ public class SwedishGenerator {
}
// ---------------- Slots ----------------
static record Slot(int clueR, int clueC, char dir, int[] rs, int[] cs, int len) {
static record Slot(String key, int clueR, int clueC, char dir, int[] rs, int[] cs, int len) {
public Slot(int clueR, int clueC, char dir, int[] rs, int[] cs) {
this(clueR, clueC, dir, rs, cs, rs.length);
}
String key() { return clueR + "," + clueC + ":" + dir; }
public Slot(int clueR, int clueC, char dir, int[] rs, int[] cs) { this(clueR + "," + clueC + ":" + dir, clueR, clueC, dir, rs, cs, rs.length); }
}
static ArrayList<Slot> extractSlots(char[][] grid) {
@@ -345,14 +347,19 @@ public class SwedishGenerator {
}
// ---------------- FAST mask fitness ----------------
final static int[][] nbrs8 = new int[][]{
{ -1, -1 }, { -1, 0 }, { -1, 1 },
{ 0, -1 }, { 0, 1 },
{ 1, -1 }, { 1, 0 }, { 1, 1 }
};
static final int[][] nbrs4 = new int[][]{ { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 } };
static long maskFitness(char[][] grid, int[] lenCounts) {
long penalty = 0;
var clueCount = 0;
for (var r = 0; r < H; r++) for (var c = 0; c < W; c++) if (isDigit(grid[r][c])) clueCount++;
var targetClues = (int) Math.round(W * H * 0.25); // ~18
var targetClues = (int) Math.round(SIZE * 0.25); // ~18
penalty += 8L * Math.abs(clueCount - targetClues);
var slots = extractSlots(grid);
@@ -368,7 +375,7 @@ public class SwedishGenerator {
if (s.len > MAX_LEN) penalty += 8000 + (long) (s.len - MAX_LEN) * 500L;
if (s.len >= MIN_LEN && s.len <= MAX_LEN) {
if (lenCounts[s.len]<=0) penalty += 12000;
if (lenCounts[s.len] <= 0) penalty += 12000;
}
for (var i = 0; i < s.len; i++) {
@@ -389,13 +396,8 @@ public class SwedishGenerator {
// clue clustering (8-connected)
var seen = new boolean[H][W];
var stack = new int[W * H];
var stack = new int[SIZE];
int sp;
var nbrs8 = new int[][]{
{ -1, -1 }, { -1, 0 }, { -1, 1 },
{ 0, -1 }, { 0, 1 },
{ 1, -1 }, { 1, 0 }, { 1, 1 }
};
for (var r = 0; r < H; r++)
for (var c = 0; c < W; c++) {
@@ -424,7 +426,7 @@ public class SwedishGenerator {
}
// dead-end-ish letter cell (3+ walls)
var nbrs4 = new int[][]{ { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 } };
for (var r = 0; r < H; r++)
for (var c = 0; c < W; c++) {
if (!isLetterCell(grid[r][c])) continue;
@@ -447,7 +449,7 @@ public class SwedishGenerator {
static char[][] randomMask(Rng rng) {
var g = makeEmptyGrid();
var targetClues = (int) Math.round(W * H * 0.25);
var targetClues = (int) Math.round(SIZE * 0.25);
int placed = 0, guard = 0;
while (placed < targetClues && guard++ < 4000) {
@@ -594,17 +596,23 @@ public class SwedishGenerator {
public int lastMRV;
}
public static final class FillResult {
public static final record FillResult(boolean ok,
char[][] grid,
HashMap<String, Lemma> clueMap,
FillStats stats,
double simplicity) {
public boolean ok;
public char[][] grid;
public HashMap<String, Lemma> clueMap;
public FillStats stats;
public double simplicity;
public FillResult(boolean ok, char[][] grid, HashMap<String, Lemma> assigned, FillStats stats) {
double totalSimplicity = 0;
if (ok) {
for (var w : assigned.values()) totalSimplicity += w.difficulty;
totalSimplicity = assigned.isEmpty() ? 0 : totalSimplicity / assigned.size();
}
this(ok, grid, assigned, stats, totalSimplicity);
}
}
record Undo(int[] rs, int[] cs, char[] prev, int n) {
}
record Undo(int[] rs, int[] cs, char[] prev, int n) { }
static char[] patternForSlot(char[][] grid, Slot s) {
var pat = new char[s.len];
@@ -645,11 +653,13 @@ public class SwedishGenerator {
}
return new Undo(urs, ucs, up, n);
}
static final int MAX_TRIES_PER_SLOT = 2000;
static void undoPlace(char[][] grid, Undo u) {
for (var i = 0; i < u.n; i++) grid[u.rs[i]][u.cs[i]] = u.prev[i];
}
record Pick(Slot slot,
CandidateInfo info,
boolean done) { }
static FillResult fillMask(Rng rng, char[][] mask, DictEntry[] dictIndex,
int logEveryMs, int timeLimitMs, boolean verbose) {
@@ -692,12 +702,6 @@ public class SwedishGenerator {
System.out.flush();
};
record Pick(Slot slot,
CandidateInfo info,
boolean done) {
}
java.util.function.Supplier<Pick> chooseMRV = () -> {
Slot best = null;
CandidateInfo bestInfo = null;
@@ -734,7 +738,6 @@ public class SwedishGenerator {
}
};
IO.println("hit");
final var MAX_TRIES_PER_SLOT = 2000;
class Solver {
@@ -829,20 +832,8 @@ public class SwedishGenerator {
System.out.print("\r" + padRight("", 120) + "\r");
System.out.flush();
var res = new FillResult();
res.ok = ok;
res.grid = grid;
res.clueMap = assigned;
stats.seconds = (System.currentTimeMillis() - t0) / 1000.0;
res.stats = stats;
if (ok) {
double totalSimplicity = 0;
for (var w : assigned.values()) {
totalSimplicity += w.difficulty;
}
res.simplicity = assigned.isEmpty() ? 0 : totalSimplicity / assigned.size();
}
var res = new FillResult(ok, grid, assigned, stats);
// print a final progress line
if (verbose) {