Gather data

This commit is contained in:
mike
2026-01-06 05:01:17 +01:00
parent 7705873798
commit ac81bc6032
2 changed files with 122 additions and 95 deletions

View File

@@ -27,8 +27,8 @@ public final class ExportFormat {
public static ExportedPuzzle exportFormatFromFilled(PuzzleResult puz, int difficulty, Rewards rewards) {
Objects.requireNonNull(puz, "puz");
var g = puz.filled().grid();
var H = g.length;
var W = g[0].length;
var H = g.H();
var W = g.W();
// 1) extract "placed" list from all clue digits in the filled grid
var placed = new ArrayList<Placed>();
@@ -47,10 +47,10 @@ public final class ExportFormat {
// If nothing placed: return full grid mapped to letters/# only
if (placed.isEmpty()) {
List<String> gridv2 = new ArrayList<>(H);
for (var chars : g) {
for (var r = 0; r < H; r++) {
var sb = new StringBuilder(W);
for (var c = 0; c < W; c++) {
var ch = chars[c];
var ch = g.getCharAt(r, c);
sb.append(isLetter(ch) ? ch : '#');
}
gridv2.add(sb.toString());
@@ -81,8 +81,8 @@ public final class ExportFormat {
for (var p : placed) {
for (var rc : p.cells) {
int rr = rc[0], cc = rc[1];
if (inBounds(H, W, rr, cc) && isLetter(g[rr][cc])) {
letterAt.put(pack(rr, cc), g[rr][cc]);
if (inBounds(H, W, rr, cc) && isLetter(g.getCharAt(rr, cc))) {
letterAt.put(pack(rr, cc), g.getCharAt(rr, cc));
}
}
}

View File

@@ -81,46 +81,71 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
}
static int clamp(int x, int a, int b) { return Math.max(a, Math.min(b, x)); }
record Grid(char[][] g,int H,int W) {
public Grid(char[][] grid) {
this(grid, grid.length, grid[0].length);
}
static final int BITS_PER_CELL = 1;
record Grid(byte[] g, int H, int W) {
Grid deepCopyGrid() {
var out = new char[H][W];
for (var r = 0; r < H; r++) out[r] = Arrays.copyOf(g[r], W);
return new Grid(out);
return new Grid(g.clone(), H, W);
}
int getOffset(int r, int c) { return (r * W + c) * BITS_PER_CELL; }
int getCellValue(int r, int c) {
int offset = getOffset(r, c);
int val = g[offset];
return val;
}
char getCharAt(int r, int c) {
int val = getCellValue(r, c);
if (val == 0) return '#';
if (val >= 1 && val <= 26) return (char) ('A' + val - 1);
if (val >= 27 && val <= 32) return (char) ('1' + val - 27);
return 0;
}
void setCharAt(int r, int c, char ch) {
int offset = getOffset(r, c);
int val = 0;
if (ch >= 'A' && ch <= 'Z') val = ch - 'A' + 1;
else if (ch == '#') val = 0;
else if (ch >= '1' && ch <= '6') val = ch - '1' + 27;
g[offset] = (byte) val;
}
boolean isLetterAt(int r, int c) {
int val = getCellValue(r, c);
return val >= 1 && val <= 26;
}
boolean isDigitAt(int r, int c) {
int val = getCellValue(r, c);
return val >= 27 && val <= 32;
}
}
// ---------------- Grid helpers ----------------
char[][] makeEmptyGrid() {
var g = new char[H][W];
for (var r = 0; r < H; r++) Arrays.fill(g[r], '#');
return g;
Grid makeEmptyGrid() {
var g = new byte[H * W];// * BITS_PER_CELL);
return new Grid(g, H, W);
}
char[][] deepCopyGrid(char[][] g) {
return new Grid(g).deepCopyGrid().g;
/*var out = new char[H][W];
for (var r = 0; r < H; r++) out[r] = Arrays.copyOf(g[r], W);
return out;*/
}
String gridToString(char[][] g) {
String gridToString(Grid g) {
var sb = new StringBuilder();
for (var r = 0; r < H; r++) {
if (r > 0) sb.append('\n');
sb.append(g[r]);
for (var c = 0; c < W; c++) sb.append(g.getCharAt(r, c));
}
return sb.toString();
}
public String renderHuman(char[][] g) {
public String renderHuman(Grid g) {
var sb = new StringBuilder();
for (var r = 0; r < H; r++) {
if (r > 0) sb.append('\n');
for (var c = 0; c < W; c++) {
var ch = g[r][c];
var ch = g.getCharAt(r, c);
sb.append(isDigit(ch) ? ' ' : ch);
}
}
@@ -185,26 +210,26 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
public Dict(Lemma[] wordz) {
// Sort words by difficulty in ascending order
Lemma[] words = wordz.clone();
Arrays.sort(words, Comparator.comparingInt(wd -> wd.simpel));
Lemma[] lemmas = wordz.clone();
Arrays.sort(lemmas, Comparator.comparingInt(wd -> wd.simpel));
var lenCounts = new int[12];
var index = new DictEntry[12];
Arrays.setAll(index, i -> new DictEntry(i));
int maxLength = -1;
for (var w : words) {
var L = w.length();
for (var lemma : lemmas) {
var L = lemma.length();
if (L > maxLength) maxLength = L;
lenCounts[L]++;
var entry = index[L];
var idx = entry.words.size();
entry.words.add(w);
entry.words.add(lemma);
for (var i = 0; i < L; i++) {
var letter = w.charAt(i) - 'A';
var letter = lemma.charAt(i) - 'A';
if (letter >= 0 && letter < 26) entry.pos[i][letter].add(idx);
else throw new RuntimeException("Illegal letter: " + letter + " in word " + w);
else throw new RuntimeException("Illegal letter: " + letter + " in word " + lemma);
}
}
this(wordz, index, lenCounts);
@@ -308,28 +333,26 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
public Slot(int clueR, int clueC, char dir, int[] rs, int[] cs) { this(clueR + "," + clueC + ":" + dir, clueR, clueC, dir, rs, cs, rs.length); }
}
ArrayList<Slot> extractSlots(char[][] grid) {
ArrayList<Slot> extractSlots(Grid grid) {
var slots = new ArrayList<Slot>();
for (var r = 0; r < H; r++) {
for (var c = 0; c < W; c++) {
var d = grid[r][c];
var d = grid.getCharAt(r, c);
if (!isDigit(d)) continue;
var dir = d - '0';
// Check all possible directions for clue placement
// for (int dir = 1; dir <= 4; dir++) {
int or = OFFSETS[dir][0], oc = OFFSETS[dir][1];
int dr = STEPS[dir][0], dc = STEPS[dir][1];
int or = OFFSETS[dir][0], oc = OFFSETS[dir][1];
int dr = STEPS[dir][0], dc = STEPS[dir][1];
int rr = r + or, cc = c + oc;
if (rr < 0 || rr >= H || cc < 0 || cc >= W) continue;
if (!isLetterCell(grid[rr][cc])) continue;
if (!isLetterCell(grid.getCharAt(rr, cc))) continue;
var rs = new int[MAX_LEN + 1];
var cs = new int[MAX_LEN + 1];
var n = 0;
while (rr >= 0 && rr < H && cc >= 0 && cc < W) {
var ch = grid[rr][cc];
var ch = grid.getCharAt(rr, cc);
if (!isLetterCell(ch)) break;
rs[n] = rr;
cs[n] = cc;
@@ -340,18 +363,18 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
}
slots.add(new Slot(r, c, d, Arrays.copyOf(rs, n), Arrays.copyOf(cs, n)));
// }
}
}
return slots;
}
boolean hasRoomForClue(char[][] grid, int r, int c, char d) {
boolean hasRoomForClue(Grid grid, int r, int c, char d) {
var di = d - '0';
int or = OFFSETS[di][0], oc = OFFSETS[di][1];
int dr = STEPS[di][0], dc = STEPS[di][1];
int rr = r + or, cc = c + oc;
var run = 0;
while (rr >= 0 && rr < H && cc >= 0 && cc < W && isLetterCell(grid[rr][cc]) && run < MAX_LEN) {
while (rr >= 0 && rr < H && cc >= 0 && cc < W && isLetterCell(grid.getCharAt(rr, cc)) && run < MAX_LEN) {
run++;
rr += dr;
cc += dc;
@@ -373,11 +396,12 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
new nbrs_8(1, 1)
};
static final int[][] nbrs4 = new int[][]{ { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 } };
long maskFitness(char[][] grid, int[] lenCounts) {
long maskFitness(Grid grid, int[] lenCounts) {
long penalty = 0;
var clueCount = 0;
for (var r = 0; r < H; r++) for (var c = 0; c < W; c++) if (isDigit(grid[r][c])) clueCount++;
for (var r = 0; r < H; r++) for (var c = 0; c < W; c++) if (grid.isDigitAt(r, c)) clueCount++;
var targetClues = (int) Math.round(SIZE * 0.25); // ~18
penalty += 8L * Math.abs(clueCount - targetClues);
@@ -407,7 +431,7 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
for (var r = 0; r < H; r++)
for (var c = 0; c < W; c++) {
if (!isLetterCell(grid[r][c])) continue;
if (!isLetterCell(grid.getCharAt(r, c))) continue;
int h = covH[r][c], v = covV[r][c];
if (h == 0 && v == 0) penalty += 1500;
else if (h > 0 && v > 0) { /* ok */ } else if (h + v == 1) penalty += 200;
@@ -421,7 +445,7 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
for (var r = 0; r < H; r++)
for (var c = 0; c < W; c++) {
if (!isDigit(grid[r][c]) || seen[r][c]) continue;
if (!grid.isDigitAt(r, c) || seen[r][c]) continue;
sp = 0;
stack[sp++] = r * W + c;
seen[r][c] = true;
@@ -436,7 +460,7 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
int nx = x + d.x, ny = y + d.y;
if (nx < 0 || nx >= H || ny < 0 || ny >= W) continue;
if (seen[nx][ny]) continue;
if (!isDigit(grid[nx][ny])) continue;
if (!grid.isDigitAt(nx, ny)) continue;
seen[nx][ny] = true;
stack[sp++] = nx * W + ny;
}
@@ -449,7 +473,7 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
for (var r = 0; r < H; r++)
for (var c = 0; c < W; c++) {
if (!isLetterCell(grid[r][c])) continue;
if (!isLetterCell(grid.getCharAt(r, c))) continue;
var walls = 0;
for (var d : nbrs4) {
int rr = r + d[0], cc = c + d[1];
@@ -457,7 +481,7 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
walls++;
continue;
}
if (!isLetterCell(grid[rr][cc])) walls++;
if (!isLetterCell(grid.getCharAt(rr, cc))) walls++;
}
if (walls >= 3) penalty += 400;
}
@@ -467,7 +491,7 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
// ---------------- Mask generation ----------------
char[][] randomMask(Rng rng) {
Grid randomMask(Rng rng) {
var g = makeEmptyGrid();
var targetClues = (int) Math.round(SIZE * 0.25);
int placed = 0, guard = 0;
@@ -475,12 +499,12 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
while (placed < targetClues && guard++ < 4000) {
var r = rng.randint(0, H - 1);
var c = rng.randint(0, W - 1);
if (isDigit(g[r][c])) continue;
if (g.isDigitAt(r, c)) continue;
var d = (char) ('0' + rng.randint(1, c == 0 ? CLUE_SIZE : 4));
g[r][c] = d;
g.setCharAt(r, c, d);
if (!hasRoomForClue(g, r, c, d)) {
g[r][c] = '#';
g.setCharAt(r, c, '#');
continue;
}
placed++;
@@ -488,8 +512,8 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
return g;
}
char[][] mutate(Rng rng, char[][] grid) {
var g = deepCopyGrid(grid);
Grid mutate(Rng rng, Grid grid) {
var g = grid.deepCopyGrid();
var cx = rng.randint(0, H - 1);
var cy = rng.randint(0, W - 1);
@@ -498,19 +522,19 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
var rr = clamp(cx + (rng.randint(-2, 2) + rng.randint(-2, 2)), 0, H - 1);
var cc = clamp(cy + (rng.randint(-2, 2) + rng.randint(-2, 2)), 0, W - 1);
var cur = g[rr][cc];
var cur = g.getCharAt(rr, cc);
if (isDigit(cur)) {
g[rr][cc] = '#';
g.setCharAt(rr, cc, '#');
} else {
var d = (char) ('0' + rng.randint(1, cc == 0 ? CLUE_SIZE : 4));
g[rr][cc] = d;
if (!hasRoomForClue(g, rr, cc, d)) g[rr][cc] = '#';
g.setCharAt(rr, cc, d);
if (!hasRoomForClue(g, rr, cc, d)) g.setCharAt(rr, cc, '#');
}
}
return g;
}
char[][] crossover(Rng rng, char[][] a, char[][] b) {
Grid crossover(Rng rng, Grid a, Grid b) {
var out = makeEmptyGrid();
var cx = (H - 1) / 2.0;
var cy = (W - 1) / 2.0;
@@ -522,19 +546,19 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
for (var c = 0; c < W; c++) {
double x = r - cx, y = c - cy;
var side = x * nx + y * ny;
out[r][c] = (side >= 0) ? a[r][c] : b[r][c];
out.setCharAt(r, c, (side >= 0) ? a.getCharAt(r, c) : b.getCharAt(r, c));
}
for (var r = 0; r < H; r++)
for (var c = 0; c < W; c++) {
var ch = out[r][c];
if (isDigit(ch) && !hasRoomForClue(out, r, c, ch)) out[r][c] = '#';
var ch = out.getCharAt(r, c);
if (isDigit(ch) && !hasRoomForClue(out, r, c, ch)) out.setCharAt(r, c, '#');
}
return out;
}
char[][] hillclimb(Rng rng, char[][] start, int[] lenCounts, int limit) {
var best = deepCopyGrid(start);
Grid hillclimb(Rng rng, Grid start, int[] lenCounts, int limit) {
var best = start.deepCopyGrid();
var bestF = maskFitness(best, lenCounts);
var fails = 0;
@@ -552,15 +576,15 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
return best;
}
double similarity(char[][] a, char[][] b) {
double similarity(Grid a, Grid b) {
var same = 0;
for (var r = 0; r < H; r++) for (var c = 0; c < W; c++) if (a[r][c] == b[r][c]) same++;
for (var r = 0; r < H; r++) for (var c = 0; c < W; c++) if (a.getCharAt(r, c) == b.getCharAt(r, c)) same++;
return same / (double) (W * H);
}
public char[][] generateMask(Rng rng, int[] lenCounts, int popSize, int gens, boolean verbose) {
public Grid generateMask(Rng rng, int[] lenCounts, int popSize, int gens, boolean verbose) {
if (verbose) System.out.println("generateMask init pop: " + popSize);
var pop = new ArrayList<char[][]>();
var pop = new ArrayList<Grid>();
for (var i = 0; i < popSize; i++) {
var g = randomMask(rng);
@@ -569,7 +593,7 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
for (var gen = 0; gen < gens; gen++) {
if (Thread.currentThread().isInterrupted()) break;
var children = new ArrayList<char[][]>();
var children = new ArrayList<Grid>();
var pairs = Math.max(popSize, (int) Math.floor(popSize * 1.5));
for (var k = 0; k < pairs; k++) {
@@ -582,7 +606,7 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
pop.addAll(children);
pop.sort(Comparator.comparingLong(g -> maskFitness(g, lenCounts)));
var next = new ArrayList<char[][]>();
var next = new ArrayList<Grid>();
for (var cand : pop) {
if (next.size() >= popSize) break;
var ok = true;
@@ -622,12 +646,12 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
boolean done) { }
public static final record FillResult(boolean ok,
char[][] grid,
Grid grid,
HashMap<String, Lemma> clueMap,
FillStats stats,
double simplicity) {
public FillResult(boolean ok, char[][] grid, HashMap<String, Lemma> assigned, FillStats stats) {
public FillResult(boolean ok, Grid grid, HashMap<String, Lemma> assigned, FillStats stats) {
double totalSimplicity = 0;
if (ok) {
for (var w : assigned.values()) totalSimplicity += w.difficulty;
@@ -637,10 +661,10 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
}
}
static char[] patternForSlot(char[][] grid, Slot s) {
static char[] patternForSlot(Grid grid, Slot s) {
var pat = new char[s.len];
for (var i = 0; i < s.len; i++) {
var ch = grid[s.rs[i]][s.cs[i]];
var ch = grid.getCharAt(s.rs[i], s.cs[i]);
pat[i] = isLetter(ch) ? ch : 0;
}
return pat;
@@ -652,39 +676,42 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
return cross * 10 + s.len;
}
static Undo placeWord(char[][] grid, Slot s, Lemma w) {
static Undo placeWord(Grid grid, Slot s, Lemma w) {
var urs = new int[s.len];
var ucs = new int[s.len];
var up = new char[s.len];
var n = 0;
for (var i = 0; i < s.len; i++) {
int r = s.rs[i], c = s.cs[i];
var prev = grid[r][c];
var ch = w.charAt(i);
if (prev == '#') {
int r = s.rs[i], c = s.cs[i];
int val = grid.getCellValue(r, c);
var ch = w.charAt(i);
if (val == 0) { // '#'
urs[n] = r;
ucs[n] = c;
up[n] = prev;
up[n] = '#';
n++;
grid[r][c] = ch;
} else if (prev != ch) {
// rollback immediate changes
for (var j = 0; j < n; j++) grid[urs[j]][ucs[j]] = up[j];
return null;
grid.setCharAt(r, c, ch);
} else {
int targetVal = ch - 'A' + 1;
if (val != targetVal) {
// rollback immediate changes
for (var j = 0; j < n; j++) grid.setCharAt(urs[j], ucs[j], up[j]);
return null;
}
}
}
return new Undo(urs, ucs, up, n);
}
static void undoPlace(char[][] grid, Undo u) {
for (var i = 0; i < u.n; i++) grid[u.rs[i]][u.cs[i]] = u.prev[i];
static void undoPlace(Grid grid, Undo u) {
for (var i = 0; i < u.n; i++) grid.setCharAt(u.rs[i], u.cs[i], u.prev[i]);
}
public FillResult fillMask(Rng rng, char[][] mask, DictEntry[] dictIndex,
public FillResult fillMask(Rng rng, Grid mask, DictEntry[] dictIndex,
int logEveryMs, int timeLimitMs, boolean verbose) {
var grid = deepCopyGrid(mask);
var grid = mask.deepCopyGrid();
var allSlots = extractSlots(grid);
var slots = new ArrayList<Slot>();
for (var s : allSlots) if (s.len >= MIN_LEN && s.len <= MAX_LEN) slots.add(s);
@@ -875,6 +902,6 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
}
// ---------------- Top-level generatePuzzle ----------------
public record PuzzleResult(SwedishGenerator swe, Dict dict, char[][] mask, FillResult filled) { }
public record PuzzleResult(SwedishGenerator swe, Dict dict, Grid mask, FillResult filled) { }
}