Gather data

This commit is contained in:
mike
2026-01-06 18:51:59 +01:00
parent 4b7e6deb91
commit 73f06a2b13

View File

@@ -17,6 +17,8 @@ import java.util.function.Predicate;
@SuppressWarnings("ALL") @SuppressWarnings("ALL")
public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) { public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
record nbrs_8(int x, int y) { }
class Data { class Data {
static byte[] EXAMPLE = new byte[0]; static byte[] EXAMPLE = new byte[0];
@@ -33,36 +35,49 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
static final int MIN_LEN = 2; static final int MIN_LEN = 2;
static final int MAX_TRIES_PER_SLOT = 2000; static final int MAX_TRIES_PER_SLOT = 2000;
// Directions for '1'..'6' // Directions for '1'..'6'
static final int[][] OFFSETS = new int[7][2]; static final nbrs_8[] OFFSETS = new nbrs_8[7];
static final int[][] STEPS = new int[7][2]; static final nbrs_8[] STEPS = new nbrs_8[7];
static { static {
// 1: up // 1: up
OFFSETS[1] = new int[]{ -1, 0 }; OFFSETS[1] = new nbrs_8(-1, 0);
STEPS[1] = new int[]{ -1, 0 }; STEPS[1] = new nbrs_8(-1, 0);
// 2: right // 2: right
OFFSETS[2] = new int[]{ 0, 1 }; OFFSETS[2] = new nbrs_8(0, 1);
STEPS[2] = new int[]{ 0, 1 }; STEPS[2] = new nbrs_8(0, 1);
// 3: down // 3: down
OFFSETS[3] = new int[]{ 1, 0 }; OFFSETS[3] = new nbrs_8(1, 0);
STEPS[3] = new int[]{ 1, 0 }; STEPS[3] = new nbrs_8(1, 0);
// 4: left // 4: left
OFFSETS[4] = new int[]{ 0, -1 }; OFFSETS[4] = new nbrs_8(0, -1);
STEPS[4] = new int[]{ 0, -1 }; STEPS[4] = new nbrs_8(0, -1);
// 5: vertical down, clue is on the right of the first letter // 5: vertical down, clue is on the right of the first letter
OFFSETS[5] = new int[]{ 0, -1 }; OFFSETS[5] = new nbrs_8(0, -1);
STEPS[5] = new int[]{ 1, 0 }; STEPS[5] = new nbrs_8(1, 0);
// 6: vertical down, clue is on the left of the first letter // 6: vertical down, clue is on the left of the first letter
OFFSETS[6] = new int[]{ 0, 1 }; OFFSETS[6] = new nbrs_8(0, 1);
STEPS[6] = new int[]{ 1, 0 }; STEPS[6] = new nbrs_8(1, 0);
} }
final static nbrs_8[] nbrs8 = new nbrs_8[]{
new nbrs_8(-1, -1),
new nbrs_8(-1, 0),
new nbrs_8(-1, 1),
new nbrs_8(0, -1),
new nbrs_8(0, 1),
new nbrs_8(1, -1),
new nbrs_8(1, 0),
new nbrs_8(1, 1)
};
static final nbrs_8[] nbrs4 = new nbrs_8[]{
new nbrs_8(-1, 0),
new nbrs_8(1, 0),
new nbrs_8(0, -1),
new nbrs_8(0, 1)
};
static final char FIRST_ABC = 'A'; static final char FIRST_ABC = 'A';
static final char LAST_ABC = 'Z'; static final char LAST_ABC = 'Z';
static final char FIRST_ARROW = '1', LAST_ARROW = '6', HOR_ARROW_1 = '2', HOR_ARROW_2 = '4'; static final char FIRST_ARROW = '1', LAST_ARROW = '6', HOR_ARROW_1 = '2', HOR_ARROW_2 = '4';
static boolean isDigit(char ch) { return ch >= FIRST_ARROW && ch <= LAST_ARROW; } static boolean isDigit(char ch) { return ch >= FIRST_ARROW && ch <= LAST_ARROW; }
static boolean isLetter(char ch) { return ch >= FIRST_ABC && ch <= LAST_ABC; } static boolean isLetter(char ch) { return ch >= FIRST_ABC && ch <= LAST_ABC; }
static boolean isLetterCell(char ch) { return ch == '#' || isLetter(ch); }
// ---------------- RNG (xorshift32) ----------------
static final class Rng { static final class Rng {
@@ -89,40 +104,27 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
} }
static int clamp(int x, int a, int b) { return Math.max(a, Math.min(b, x)); } static int clamp(int x, int a, int b) { return Math.max(a, Math.min(b, x)); }
static final record CandidateInfo(int[] indices, int count) { }
record Grid(byte[] g, int H, int W) { record Grid(byte[] g, int H, int W) {
Grid deepCopyGrid() { Grid deepCopyGrid() { return new Grid(g.clone(), H, W); }
return new Grid(g.clone(), H, W);
}
int getOffset(int r, int c) { return r * W + c; } int getOffset(int r, int c) { return r * W + c; }
boolean isLettercell(int r, int c) { return !isDigitAt(r, c); }
boolean isLettercell(int r, int c) { char getCharAt(int r, int c) { return (char) (g[getOffset(r, c)] & 0xFF); }
return isLetterCell(getCharAt(r, c)); void setCharAt(int r, int c, char ch) { g[getOffset(r, c)] = (byte) ch; }
} static final byte _1 = 49, _9 = 57, A = 65, Z = 90;
char getCharAt(int r, int c) {
return (char) (g[getOffset(r, c)] & 0xFF);
}
void setCharAt(int r, int c, char ch) {
g[getOffset(r, c)] = (byte) ch;
}
boolean isLetterAt(int r, int c) { boolean isLetterAt(int r, int c) {
char ch = getCharAt(r, c); byte ch = g[getOffset(r, c)];
return ch >= 'A' && ch <= 'Z'; return ch >= A && ch <= Z;
} }
boolean isDigitAt(int r, int c) { boolean isDigitAt(int r, int c) {
char ch = getCharAt(r, c); byte ch = g[getOffset(r, c)];
return ch >= '1' && ch <= '6'; return ch >= _1 && ch <= _9;
} }
} }
// ---------------- Grid helpers ---------------- Grid makeEmptyGrid() { return new Grid(Data.EXAMPLE.clone(), H, W); }
Grid makeEmptyGrid() {
return new Grid(Data.EXAMPLE.clone(), H, W);
}
String gridToString(Grid g) { String gridToString(Grid g) {
var sb = new StringBuilder(); var sb = new StringBuilder();
@@ -145,8 +147,6 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
return sb.toString(); return sb.toString();
} }
// ---------------- Words / index ----------------
static final class IntList { static final class IntList {
int[] a = new int[8]; int[] a = new int[8];
@@ -156,16 +156,14 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
a[n++] = v; a[n++] = v;
} }
int size() { return n; } int size() { return n; }
int[] data() { return a; } // note: may have extra capacity int[] data() { return a; }
} }
static record DictEntry(ArrayList<Lemma> words, IntList[][] pos) { static record DictEntry(ArrayList<Lemma> words, IntList[][] pos) {
public DictEntry(int L) { public DictEntry(int L) {
this(new ArrayList<>(), new IntList[L][26]); this(new ArrayList<>(), new IntList[L][26]);
for (var i = 0; i < L; i++) { for (var i = 0; i < L; i++) for (var j = 0; j < 26; j++) pos[i][j] = new IntList();
for (var j = 0; j < 26; j++) pos[i][j] = new IntList();
}
} }
} }
@@ -177,24 +175,11 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
var list = new ArrayList<String>(10); var list = new ArrayList<String>(10);
list.add(clue); list.add(clue);
this(index, word, word.length(), complex, simpel, score, list); this(index, word, word.length(), complex, simpel, score, list);
// Prioritize simple words (high lScore) and long words.
// lScore (1-10) adds up to 1000 points (weight 100).
// Length (2-8) adds up to 160 points (weight 20).
// We want LONGER and SIMPLER words to be tried earlier (lower difficulty value).
// word.length() is 2 to 8.
// score is 1 to 10.
// Base difficulty starts high and decreases with length and score.
// Length impact: up to 8 * 10 = 80
// Score impact: up to 10 * 15 = 150
} }
public Lemma(String word, int simpel, int score, String clue) { this(LEMMA_COUNTER++, word, simpel, score, clue); } public Lemma(String word, int simpel, int score, String clue) { this(LEMMA_COUNTER++, word, simpel, score, clue); }
char charAt(int idx) { return word.charAt(idx); } char charAt(int idx) { return word.charAt(idx); }
@Override public int hashCode() { return index; } @Override public int hashCode() { return index; }
@Override public boolean equals(Object o) { @Override public boolean equals(Object o) { return (o == this) || (o instanceof Lemma l && l.index == index); }
if (o == this) return true;
return o instanceof Lemma l && l.index == index;
}
} }
public static record Dict(Lemma[] wordz, public static record Dict(Lemma[] wordz,
@@ -288,9 +273,6 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
return Arrays.copyOf(out, k); return Arrays.copyOf(out, k);
} }
static final record CandidateInfo(int[] indices, int count) {
}
static CandidateInfo candidateInfoForPattern(DictEntry entry, char[] pattern /* 0 means null */) { static CandidateInfo candidateInfoForPattern(DictEntry entry, char[] pattern /* 0 means null */) {
var lists = new ArrayList<IntList>(); var lists = new ArrayList<IntList>();
for (var i = 0; i < pattern.length; i++) { for (var i = 0; i < pattern.length; i++) {
@@ -319,11 +301,11 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
return new CandidateInfo(cur, curLen); return new CandidateInfo(cur, curLen);
} }
// ---------------- Slots ---------------- static record Slot(String key, int clueR, int clueC, char dir, int[] rs, int[] cs, int len, boolean horiz) {
static record Slot(String key, int clueR, int clueC, char dir, int[] rs, int[] cs, int len) { public Slot(int clueR, int clueC, char dir, int[] rs, int[] cs) {
this(clueR + "," + clueC + ":" + dir, clueR, clueC, dir, rs, cs, rs.length, dir == HOR_ARROW_1 || dir == HOR_ARROW_2);
public Slot(int clueR, int clueC, char dir, int[] rs, int[] cs) { this(clueR + "," + clueC + ":" + dir, clueR, clueC, dir, rs, cs, rs.length); } }
} }
ArrayList<Slot> extractSlots(Grid grid) { ArrayList<Slot> extractSlots(Grid grid) {
@@ -333,20 +315,19 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
var d = grid.getCharAt(r, c); var d = grid.getCharAt(r, c);
if (!isDigit(d)) continue; if (!isDigit(d)) continue;
var dir = d - '0'; var dir = d - '0';
int or = OFFSETS[dir][0], oc = OFFSETS[dir][1]; int or = OFFSETS[dir].x, oc = OFFSETS[dir].y;
int dr = STEPS[dir][0], dc = STEPS[dir][1]; int dr = STEPS[dir].x, dc = STEPS[dir].y;
int rr = r + or, cc = c + oc; int rr = r + or, cc = c + oc;
if (rr < 0 || rr >= H || cc < 0 || cc >= W) continue; if (rr < 0 || rr >= H || cc < 0 || cc >= W) continue;
if (!isLetterCell(grid.getCharAt(rr, cc))) continue; if (grid.isDigitAt(rr, cc)) continue;
var rs = new int[MAX_LEN + 1]; var rs = new int[MAX_LEN + 1];
var cs = new int[MAX_LEN + 1]; var cs = new int[MAX_LEN + 1];
var n = 0; var n = 0;
while (rr >= 0 && rr < H && cc >= 0 && cc < W) { while (rr >= 0 && rr < H && cc >= 0 && cc < W) {
var ch = grid.getCharAt(rr, cc); if (grid.isDigitAt(rr, cc)) break;
if (!isLetterCell(ch)) break;
rs[n] = rr; rs[n] = rr;
cs[n] = cc; cs[n] = cc;
n++; n++;
@@ -363,8 +344,8 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
boolean hasRoomForClue(Grid grid, int r, int c, char d) { boolean hasRoomForClue(Grid grid, int r, int c, char d) {
var di = d - '0'; var di = d - '0';
int or = OFFSETS[di][0], oc = OFFSETS[di][1]; int or = OFFSETS[di].x, oc = OFFSETS[di].y;
int dr = STEPS[di][0], dc = STEPS[di][1]; int dr = STEPS[di].x, dc = STEPS[di].y;
int rr = r + or, cc = c + oc; int rr = r + or, cc = c + oc;
var run = 0; var run = 0;
while (rr >= 0 && rr < H && cc >= 0 && cc < W && (grid.isLettercell(rr, cc)) && run < MAX_LEN) { while (rr >= 0 && rr < H && cc >= 0 && cc < W && (grid.isLettercell(rr, cc)) && run < MAX_LEN) {
@@ -375,21 +356,6 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
return run >= MIN_LEN; return run >= MIN_LEN;
} }
record nbrs_8(int x, int y) { }
// ---------------- FAST mask fitness ----------------
final static nbrs_8[] nbrs8 = new nbrs_8[]{
new nbrs_8(-1, -1),
new nbrs_8(-1, 0),
new nbrs_8(-1, 1),
new nbrs_8(0, -1),
new nbrs_8(0, 1),
new nbrs_8(1, -1),
new nbrs_8(1, 0),
new nbrs_8(1, 1)
};
static final int[][] nbrs4 = new int[][]{ { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 } };
long maskFitness(Grid grid, int[] lenCounts) { long maskFitness(Grid grid, int[] lenCounts) {
long penalty = 0; long penalty = 0;
@@ -406,8 +372,6 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
var covV = new int[H][W]; var covV = new int[H][W];
for (var s : slots) { for (var s : slots) {
var horiz = (s.dir == HOR_ARROW_1 || s.dir == HOR_ARROW_2);
if (s.len < MIN_LEN) penalty += 8000; if (s.len < MIN_LEN) penalty += 8000;
if (s.len > MAX_LEN) penalty += 8000 + (long) (s.len - MAX_LEN) * 500L; if (s.len > MAX_LEN) penalty += 8000 + (long) (s.len - MAX_LEN) * 500L;
@@ -417,14 +381,14 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
for (var i = 0; i < s.len; i++) { for (var i = 0; i < s.len; i++) {
int r = s.rs[i], c = s.cs[i]; int r = s.rs[i], c = s.cs[i];
if (horiz) covH[r][c] += 1; if (s.horiz) covH[r][c] += 1;
else covV[r][c] += 1; else covV[r][c] += 1;
} }
} }
for (var r = 0; r < H; r++) for (var r = 0; r < H; r++)
for (var c = 0; c < W; c++) { for (var c = 0; c < W; c++) {
if (!isLetterCell(grid.getCharAt(r, c))) continue; if (grid.isDigitAt(r, c)) continue;
int h = covH[r][c], v = covV[r][c]; int h = covH[r][c], v = covV[r][c];
if (h == 0 && v == 0) penalty += 1500; if (h == 0 && v == 0) penalty += 1500;
else if (h > 0 && v > 0) { /* ok */ } else if (h + v == 1) penalty += 200; else if (h > 0 && v > 0) { /* ok */ } else if (h + v == 1) penalty += 200;
@@ -466,15 +430,15 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
for (var r = 0; r < H; r++) for (var r = 0; r < H; r++)
for (var c = 0; c < W; c++) { for (var c = 0; c < W; c++) {
if (!isLetterCell(grid.getCharAt(r, c))) continue; if (grid.isDigitAt(r, c)) continue;
var walls = 0; var walls = 0;
for (var d : nbrs4) { for (var d : nbrs4) {
int rr = r + d[0], cc = c + d[1]; int rr = r + d.x, cc = c + d.y;
if (rr < 0 || rr >= H || cc < 0 || cc >= W) { if (rr < 0 || rr >= H || cc < 0 || cc >= W) {
walls++; walls++;
continue; continue;
} }
if (!isLetterCell(grid.getCharAt(rr, cc))) walls++; if (grid.isDigitAt(rr, cc)) walls++;
} }
if (walls >= 3) penalty += 400; if (walls >= 3) penalty += 400;
} }
@@ -515,8 +479,7 @@ public record SwedishGenerator(int W, int H, int SIZE, int MAX_LEN) {
var rr = clamp(cx + (rng.randint(-2, 2) + rng.randint(-2, 2)), 0, H - 1); var rr = clamp(cx + (rng.randint(-2, 2) + rng.randint(-2, 2)), 0, H - 1);
var cc = clamp(cy + (rng.randint(-2, 2) + rng.randint(-2, 2)), 0, W - 1); var cc = clamp(cy + (rng.randint(-2, 2) + rng.randint(-2, 2)), 0, W - 1);
var cur = g.getCharAt(rr, cc); if (g.isDigitAt(rr, cc)) {
if (isDigit(cur)) {
g.setCharAt(rr, cc, '#'); g.setCharAt(rr, cc, '#');
} else { } else {
var d = (char) ('0' + rng.randint(1, cc == 0 ? CLUE_SIZE : 4)); var d = (char) ('0' + rng.randint(1, cc == 0 ? CLUE_SIZE : 4));