Gather data

This commit is contained in:
mike
2025-12-26 23:15:39 +01:00
parent dd8a6bffa3
commit 31658d2f72
5 changed files with 93 additions and 135 deletions

2
.gitignore vendored
View File

@@ -3,7 +3,7 @@
**/.custom/ **/.custom/
target/ target/
.env .env
out/puzzle/
*.log *.log
.output.txt .output.txt
out/ out/
.aider*

View File

@@ -1,22 +1,4 @@
services: services:
puzzle:
build:
context: ${PUZZLE_ROOT_DIR:-/opt/apps/puzzle}
dockerfile: Dockerfile
container_name: puzzle
restart: unless-stopped
networks: [ traefik_net ]
volumes:
- puzzles_data:/usr/share/nginx/html/puzzles:ro
labels:
- "traefik.enable=true"
- "traefik.http.routers.puzzle-main.rule=Host(`puzzle.appmodel.nl`)"
- "traefik.http.routers.puzzle-main.entrypoints=websecure"
- "traefik.http.routers.puzzle-main.tls=true"
- "traefik.http.routers.puzzle-main.tls.certresolver=letsencrypt"
- "traefik.http.routers.puzzle-main-http.rule=Host(`puzzle.appmodel.nl`)"
- "traefik.http.routers.puzzle-main-http.entrypoints=web"
- "traefik.http.routers.puzzle-main-http.middlewares=redirect-to-https@file"
puzzle_gen: puzzle_gen:
build: build:

View File

@@ -15,16 +15,6 @@ public final class ExportFormat {
private ExportFormat() { } private ExportFormat() { }
// Directions for digits '1'..'4'
private static final int[][] DIRS = new int[5][2];
static {
DIRS[1] = new int[]{ -1, 0 }; // up
DIRS[2] = new int[]{ 0, 1 }; // right
DIRS[3] = new int[]{ 1, 0 }; // down
DIRS[4] = new int[]{ 0, -1 }; // left
}
private static boolean isDigit(char ch) { return ch >= '1' && ch <= '4'; }
private static boolean isLetter(char ch) { return ch >= 'A' && ch <= 'Z'; } private static boolean isLetter(char ch) { return ch >= 'A' && ch <= 'Z'; }
private static boolean inBounds(int H, int W, int r, int c) { private static boolean inBounds(int H, int W, int r, int c) {
@@ -45,22 +35,17 @@ public final class ExportFormat {
// 1) extract "placed" list from all clue digits in the filled grid // 1) extract "placed" list from all clue digits in the filled grid
List<Placed> placed = new ArrayList<>(); List<Placed> placed = new ArrayList<>();
Set<String> seen = new HashSet<>(); var allSlots = SwedishGenerator.extractSlots(g);
var clueMap = puz.filled().clueMap;
for (var r = 0; r < H; r++) { for (var s : allSlots) {
for (var c = 0; c < W; c++) { var word = clueMap.get(s.key());
var ch = g[r][c]; if (word == null) continue;
if (!isDigit(ch)) continue;
var p = extractPlacedFromClue(g, r, c, ch, 8, 2); var p = extractPlacedFromSlot(s, word);
if (p == null) continue; if (p == null) continue;
var key = p.startRow + "," + p.startCol + ":" + p.direction + ":" + p.word;
if (seen.contains(key)) continue;
seen.add(key);
placed.add(p); placed.add(p);
} }
}
// If nothing placed: return full grid mapped to letters/# only // If nothing placed: return full grid mapped to letters/# only
if (placed.isEmpty()) { if (placed.isEmpty()) {
@@ -136,29 +121,18 @@ public final class ExportFormat {
} }
/** /**
* Extract a word run for a clue cell at (r,c) with direction digit d. * Convert a generator Slot + assigned word into a Placed object for export.
* Canonical output:
* - direction: "horizontal" (right) or "vertical" (down)
* - startRow/startCol: first letter cell in canonical direction
* - arrowRow/arrowCol: immediately before the start (left or above)
* - word read from grid in canonical order
*/ */
private static Placed extractPlacedFromClue(char[][] g, int r, int c, char d, int maxLen, int minLen) { private static Placed extractPlacedFromSlot(SwedishGenerator.Slot s, String word) {
int H = g.length, W = g[0].length; int r = s.clueR;
var di = d - '0'; int c = s.clueC;
int dr = DIRS[di][0], dc = DIRS[di][1]; char d = s.dir;
// collect letter cells in ORIGINAL direction away from the clue
List<int[]> cells = new ArrayList<>(); List<int[]> cells = new ArrayList<>();
int rr = r + dr, cc = c + dc; for (int i = 0; i < s.len; i++) {
while (inBounds(H, W, rr, cc) && isLetter(g[rr][cc]) && cells.size() < maxLen) { cells.add(new int[]{ s.rs[i], s.cs[i] });
cells.add(new int[]{ rr, cc });
rr += dr;
cc += dc;
} }
if (cells.size() < minLen) return null;
// Canonicalize: always output right/down // Canonicalize: always output right/down
int startRow, startCol, arrowRow, arrowCol; int startRow, startCol, arrowRow, arrowCol;
String direction; String direction;
@@ -170,7 +144,7 @@ public final class ExportFormat {
startCol = cells.get(0)[1]; startCol = cells.get(0)[1];
arrowRow = r; arrowRow = r;
arrowCol = c; arrowCol = c;
} else if (d == '3') { // down -> vertical } else if (d == '3' || d == '5') { // down or down-bent -> vertical
direction = "vertical"; direction = "vertical";
startRow = cells.get(0)[0]; startRow = cells.get(0)[0];
startCol = cells.get(0)[1]; startCol = cells.get(0)[1];
@@ -194,28 +168,16 @@ public final class ExportFormat {
return null; return null;
} }
// Read word from grid using the collected cells
var wordChars = new StringBuilder();
for (var rc : cells) {
wordChars.append(g[rc[0]][rc[1]]);
}
var word = wordChars.toString();
if (word.length() < minLen || word.length() > maxLen) return null;
// Build exact used cells (only for actual word length)
List<int[]> used = new ArrayList<>(cells);
return new Placed( return new Placed(
word, word,
word, // clue placeholder (same as JS) word, // clue placeholder
startRow, startRow,
startCol, startCol,
direction, direction,
word, // answer word, // answer
arrowRow, arrowRow,
arrowCol, arrowCol,
used, cells,
new int[]{ arrowRow, arrowCol }, new int[]{ arrowRow, arrowCol },
isReversed isReversed
); );

View File

@@ -14,9 +14,9 @@ public class Main {
// ---------------- CLI ---------------- // ---------------- CLI ----------------
public static class Opts { public static class Opts {
public int seed = 1; public int seed = 1234;
public int pop = 18; public int pop = 18;
public int gens = 500; public int gens = 200;
public int tries = 5; public int tries = 5;
public String wordsPath = "/data/puzzle/pool.txt"; public String wordsPath = "/data/puzzle/pool.txt";
public double minSimplicity = 0; // 0 means no limit public double minSimplicity = 0; // 0 means no limit

View File

@@ -17,20 +17,38 @@ import java.util.stream.IntStream;
@SuppressWarnings("ALL") @SuppressWarnings("ALL")
public class SwedishGenerator { public class SwedishGenerator {
static final int W = 9, H = 8; static final int W = 9, H = 8,
CLUE_SIZE = 6,
SIMPLICITY_DEFAULT_SCORE = 5;
static final int MIN_LEN = 2, MAX_LEN = 8; static final int MIN_LEN = 2, MAX_LEN = 8;
// Directions for '1'..'6'
// Directions for '1'..'4' static final int[][] OFFSETS = new int[7][2];
static final int[][] DIRS = new int[5][2]; static final int[][] STEPS = new int[7][2];
static { static {
DIRS[1] = new int[]{ -1, 0 }; // up // 1: up
DIRS[2] = new int[]{ 0, 1 }; // right OFFSETS[1] = new int[]{ -1, 0 };
DIRS[3] = new int[]{ 1, 0 }; // down STEPS[1] = new int[]{ -1, 0 };
DIRS[4] = new int[]{ 0, -1 }; // left // 2: right
OFFSETS[2] = new int[]{ 0, 1 };
STEPS[2] = new int[]{ 0, 1 };
// 3: down
OFFSETS[3] = new int[]{ 1, 0 };
STEPS[3] = new int[]{ 1, 0 };
// 4: left
OFFSETS[4] = new int[]{ 0, -1 };
STEPS[4] = new int[]{ 0, -1 };
// 5: vertical down, clue is on the right of the first letter
OFFSETS[5] = new int[]{ 0, -1 };
STEPS[5] = new int[]{ 1, 0 };
// 6: vertical down, clue is on the left of the first letter
OFFSETS[6] = new int[]{ 0, 1 };
STEPS[6] = new int[]{ 1, 0 };
} }
static final char FIRST_ABC = 'A';
static boolean isDigit(char ch) { return ch >= '1' && ch <= '4'; } static final char LAST_ABC = 'Z';
static boolean isLetter(char ch) { return ch >= 'A' && ch <= 'Z'; } static final char FIRST_ARROW = '1', LAST_ARROW = '6', HOR_ARROW_1 = '2', HOR_ARROW_2 = '4';
static boolean isDigit(char ch) { return ch >= FIRST_ARROW && ch <= LAST_ARROW; }
static boolean isLetter(char ch) { return ch >= FIRST_ABC && ch <= LAST_ABC; }
static boolean isLetterCell(char ch) { return ch == '#' || isLetter(ch); } static boolean isLetterCell(char ch) { return ch == '#' || isLetter(ch); }
// ---------------- RNG (xorshift32) ---------------- // ---------------- RNG (xorshift32) ----------------
@@ -52,21 +70,16 @@ public class SwedishGenerator {
return y; return y;
} }
int randint(int min, int max) { // inclusive int randint(int min, int max) { // inclusive
var r = nextU32(); var u = (nextU32() & 0xFFFFFFFFL);
var u = (r & 0xFFFFFFFFL);
var range = (long) max - (long) min + 1L; var range = (long) max - (long) min + 1L;
return (int) (min + (u % range)); return (int) (min + (u % range));
} }
double nextFloat() { double nextFloat() { return (nextU32() & 0xFFFFFFFFL) / 4294967295.0; }
var u = nextU32() & 0xFFFFFFFFL;
return u / 4294967295.0; // 0xFFFFFFFF
}
} }
static int clamp(int x, int a, int b) { return Math.max(a, Math.min(b, x)); } static int clamp(int x, int a, int b) { return Math.max(a, Math.min(b, x)); }
// ---------------- Grid helpers ---------------- // ---------------- Grid helpers ----------------
static char[][] makeEmptyGrid() { static char[][] makeEmptyGrid() {
var g = new char[H][W]; var g = new char[H][W];
for (var r = 0; r < H; r++) Arrays.fill(g[r], '#'); for (var r = 0; r < H; r++) Arrays.fill(g[r], '#');
@@ -160,11 +173,11 @@ public class SwedishGenerator {
first = false; first = false;
continue; continue;
} }
var parts = line.split(",",3); var parts = line.split(",", 3);
if (parts.length >= 2) { if (parts.length >= 2) {
try { try {
var word = parts[0].trim().toUpperCase(Locale.ROOT); var word = parts[0].trim().toUpperCase(Locale.ROOT);
var score = 10-Integer.parseInt(parts[1].trim()); var score = 10 - Integer.parseInt(parts[1].trim());
scores.put(word, score); scores.put(word, score);
} catch (NumberFormatException ignored) { } catch (NumberFormatException ignored) {
System.err.println("Illegal number format: " + line); System.err.println("Illegal number format: " + line);
@@ -200,10 +213,10 @@ public class SwedishGenerator {
var words = new ArrayList<WordDifficulty>(); var words = new ArrayList<WordDifficulty>();
for (var line : raw.split("\\R")) { for (var line : raw.split("\\R")) {
var word = line.split(",",3)[0].trim(); var word = line.split(",", 3)[0].trim();
var s = word.trim().toUpperCase(Locale.ROOT); var s = word.trim().toUpperCase(Locale.ROOT);
if (s.matches("^[A-Z]{2,8}$")) { if (s.matches("^[A-Z]{2,8}$")) {
var score = llmScores.getOrDefault(s, 5); // Default to middle var score = llmScores.getOrDefault(s, SIMPLICITY_DEFAULT_SCORE); // Default to middle
words.add(new WordDifficulty(s, score)); words.add(new WordDifficulty(s, score));
} }
} }
@@ -296,7 +309,7 @@ public class SwedishGenerator {
} }
static int indexToDifficulty(DictEntry entry, int index, Map<String, Integer> llmScores) { static int indexToDifficulty(DictEntry entry, int index, Map<String, Integer> llmScores) {
var word = entry.words.get(index); var word = entry.words.get(index);
var score = llmScores.getOrDefault(word, 5); var score = llmScores.getOrDefault(word, SIMPLICITY_DEFAULT_SCORE);
return new WordDifficulty(word, score).difficulty; return new WordDifficulty(word, score).difficulty;
} }
@@ -305,7 +318,7 @@ public class SwedishGenerator {
static final class Slot { static final class Slot {
final int clueR, clueC; final int clueR, clueC;
final char dir; // '1'..'4' final char dir; // '1'..'5'
final int[] rs, cs; // cells final int[] rs, cs; // cells
final int len; final int len;
Slot(int clueR, int clueC, char dir, int[] rs, int[] cs) { Slot(int clueR, int clueC, char dir, int[] rs, int[] cs) {
@@ -325,16 +338,17 @@ public class SwedishGenerator {
for (var c = 0; c < W; c++) { for (var c = 0; c < W; c++) {
var d = grid[r][c]; var d = grid[r][c];
if (!isDigit(d)) continue; if (!isDigit(d)) continue;
var dir = d - '0';
// Check all possible directions for clue placement
// for (int dir = 1; dir <= 4; dir++) {
int or = OFFSETS[dir][0], oc = OFFSETS[dir][1];
int dr = STEPS[dir][0], dc = STEPS[dir][1];
// Check all four possible directions for clue placement int rr = r + or, cc = c + oc;
for (int dir = 1; dir <= 4; dir++) {
int dr = DIRS[dir][0], dc = DIRS[dir][1];
int rr = r + dr, cc = c + dc;
if (rr < 0 || rr >= H || cc < 0 || cc >= W) continue; if (rr < 0 || rr >= H || cc < 0 || cc >= W) continue;
if (!isLetterCell(grid[rr][cc])) continue; if (!isLetterCell(grid[rr][cc])) continue;
var rs = new int[MAX_LEN + 1]; // allow MAX_LEN+1 like JS loop var rs = new int[MAX_LEN + 1];
var cs = new int[MAX_LEN + 1]; var cs = new int[MAX_LEN + 1];
var n = 0; var n = 0;
@@ -346,20 +360,20 @@ public class SwedishGenerator {
n++; n++;
rr += dr; rr += dr;
cc += dc; cc += dc;
if (n > MAX_LEN) break; // allow n==MAX_LEN+1 if (n > MAX_LEN) break;
} }
slots.add(new Slot(r, c, (char) ('0' + dir), Arrays.copyOf(rs, n), Arrays.copyOf(cs, n))); slots.add(new Slot(r, c, d, Arrays.copyOf(rs, n), Arrays.copyOf(cs, n)));
} // }
} }
} }
return slots; return slots;
} }
static boolean hasRoomForClue(char[][] grid, int r, int c, char d) { static boolean hasRoomForClue(char[][] grid, int r, int c, char d) {
var di = d - '0'; var di = d - '0';
int dr = DIRS[di][0], dc = DIRS[di][1]; int or = OFFSETS[di][0], oc = OFFSETS[di][1];
int rr = r + dr, cc = c + dc; int dr = STEPS[di][0], dc = STEPS[di][1];
int rr = r + or, cc = c + oc;
var run = 0; var run = 0;
while (rr >= 0 && rr < H && cc >= 0 && cc < W && isLetterCell(grid[rr][cc]) && run < MAX_LEN) { while (rr >= 0 && rr < H && cc >= 0 && cc < W && isLetterCell(grid[rr][cc]) && run < MAX_LEN) {
run++; run++;
@@ -387,7 +401,7 @@ public class SwedishGenerator {
var covV = new int[H][W]; var covV = new int[H][W];
for (var s : slots) { for (var s : slots) {
var horiz = (s.dir == '2' || s.dir == '4'); var horiz = (s.dir == HOR_ARROW_1 || s.dir == HOR_ARROW_2);
if (s.len < MIN_LEN) penalty += 8000; if (s.len < MIN_LEN) penalty += 8000;
if (s.len > MAX_LEN) penalty += 8000 + (long) (s.len - MAX_LEN) * 500L; if (s.len > MAX_LEN) penalty += 8000 + (long) (s.len - MAX_LEN) * 500L;
@@ -480,7 +494,7 @@ public class SwedishGenerator {
var c = rng.randint(0, W - 1); var c = rng.randint(0, W - 1);
if (isDigit(g[r][c])) continue; if (isDigit(g[r][c])) continue;
var d = (char) ('0' + rng.randint(1, 4)); var d = (char) ('0' + rng.randint(1, r == 0 ? CLUE_SIZE : 4));
g[r][c] = d; g[r][c] = d;
if (!hasRoomForClue(g, r, c, d)) { if (!hasRoomForClue(g, r, c, d)) {
g[r][c] = '#'; g[r][c] = '#';
@@ -505,7 +519,7 @@ public class SwedishGenerator {
if (isDigit(cur)) { if (isDigit(cur)) {
g[rr][cc] = '#'; g[rr][cc] = '#';
} else { } else {
var d = (char) ('0' + rng.randint(1, 4)); var d = (char) ('0' + rng.randint(1, rr == 0 ? CLUE_SIZE : 4));
g[rr][cc] = d; g[rr][cc] = d;
if (!hasRoomForClue(g, rr, cc, d)) g[rr][cc] = '#'; if (!hasRoomForClue(g, rr, cc, d)) g[rr][cc] = '#';
} }