Gather data

This commit is contained in:
mike
2025-12-26 23:15:39 +01:00
parent dd8a6bffa3
commit 31658d2f72
5 changed files with 93 additions and 135 deletions

2
.gitignore vendored
View File

@@ -3,7 +3,7 @@
**/.custom/
target/
.env
out/puzzle/
*.log
.output.txt
out/
.aider*

View File

@@ -1,22 +1,4 @@
services:
puzzle:
build:
context: ${PUZZLE_ROOT_DIR:-/opt/apps/puzzle}
dockerfile: Dockerfile
container_name: puzzle
restart: unless-stopped
networks: [ traefik_net ]
volumes:
- puzzles_data:/usr/share/nginx/html/puzzles:ro
labels:
- "traefik.enable=true"
- "traefik.http.routers.puzzle-main.rule=Host(`puzzle.appmodel.nl`)"
- "traefik.http.routers.puzzle-main.entrypoints=websecure"
- "traefik.http.routers.puzzle-main.tls=true"
- "traefik.http.routers.puzzle-main.tls.certresolver=letsencrypt"
- "traefik.http.routers.puzzle-main-http.rule=Host(`puzzle.appmodel.nl`)"
- "traefik.http.routers.puzzle-main-http.entrypoints=web"
- "traefik.http.routers.puzzle-main-http.middlewares=redirect-to-https@file"
puzzle_gen:
build:

View File

@@ -15,16 +15,6 @@ public final class ExportFormat {
private ExportFormat() { }
// Directions for digits '1'..'4'
private static final int[][] DIRS = new int[5][2];
static {
DIRS[1] = new int[]{ -1, 0 }; // up
DIRS[2] = new int[]{ 0, 1 }; // right
DIRS[3] = new int[]{ 1, 0 }; // down
DIRS[4] = new int[]{ 0, -1 }; // left
}
private static boolean isDigit(char ch) { return ch >= '1' && ch <= '4'; }
private static boolean isLetter(char ch) { return ch >= 'A' && ch <= 'Z'; }
private static boolean inBounds(int H, int W, int r, int c) {
@@ -45,21 +35,16 @@ public final class ExportFormat {
// 1) extract "placed" list from all clue digits in the filled grid
List<Placed> placed = new ArrayList<>();
Set<String> seen = new HashSet<>();
var allSlots = SwedishGenerator.extractSlots(g);
var clueMap = puz.filled().clueMap;
for (var r = 0; r < H; r++) {
for (var c = 0; c < W; c++) {
var ch = g[r][c];
if (!isDigit(ch)) continue;
var p = extractPlacedFromClue(g, r, c, ch, 8, 2);
if (p == null) continue;
var key = p.startRow + "," + p.startCol + ":" + p.direction + ":" + p.word;
if (seen.contains(key)) continue;
seen.add(key);
placed.add(p);
}
for (var s : allSlots) {
var word = clueMap.get(s.key());
if (word == null) continue;
var p = extractPlacedFromSlot(s, word);
if (p == null) continue;
placed.add(p);
}
// If nothing placed: return full grid mapped to letters/# only
@@ -136,29 +121,18 @@ public final class ExportFormat {
}
/**
* Extract a word run for a clue cell at (r,c) with direction digit d.
* Canonical output:
* - direction: "horizontal" (right) or "vertical" (down)
* - startRow/startCol: first letter cell in canonical direction
* - arrowRow/arrowCol: immediately before the start (left or above)
* - word read from grid in canonical order
* Convert a generator Slot + assigned word into a Placed object for export.
*/
private static Placed extractPlacedFromClue(char[][] g, int r, int c, char d, int maxLen, int minLen) {
int H = g.length, W = g[0].length;
var di = d - '0';
int dr = DIRS[di][0], dc = DIRS[di][1];
private static Placed extractPlacedFromSlot(SwedishGenerator.Slot s, String word) {
int r = s.clueR;
int c = s.clueC;
char d = s.dir;
// collect letter cells in ORIGINAL direction away from the clue
List<int[]> cells = new ArrayList<>();
int rr = r + dr, cc = c + dc;
while (inBounds(H, W, rr, cc) && isLetter(g[rr][cc]) && cells.size() < maxLen) {
cells.add(new int[]{ rr, cc });
rr += dr;
cc += dc;
for (int i = 0; i < s.len; i++) {
cells.add(new int[]{ s.rs[i], s.cs[i] });
}
if (cells.size() < minLen) return null;
// Canonicalize: always output right/down
int startRow, startCol, arrowRow, arrowCol;
String direction;
@@ -170,7 +144,7 @@ public final class ExportFormat {
startCol = cells.get(0)[1];
arrowRow = r;
arrowCol = c;
} else if (d == '3') { // down -> vertical
} else if (d == '3' || d == '5') { // down or down-bent -> vertical
direction = "vertical";
startRow = cells.get(0)[0];
startCol = cells.get(0)[1];
@@ -194,28 +168,16 @@ public final class ExportFormat {
return null;
}
// Read word from grid using the collected cells
var wordChars = new StringBuilder();
for (var rc : cells) {
wordChars.append(g[rc[0]][rc[1]]);
}
var word = wordChars.toString();
if (word.length() < minLen || word.length() > maxLen) return null;
// Build exact used cells (only for actual word length)
List<int[]> used = new ArrayList<>(cells);
return new Placed(
word,
word, // clue placeholder (same as JS)
word, // clue placeholder
startRow,
startCol,
direction,
word, // answer
arrowRow,
arrowCol,
used,
cells,
new int[]{ arrowRow, arrowCol },
isReversed
);

View File

@@ -14,9 +14,9 @@ public class Main {
// ---------------- CLI ----------------
public static class Opts {
public int seed = 1;
public int seed = 1234;
public int pop = 18;
public int gens = 500;
public int gens = 200;
public int tries = 5;
public String wordsPath = "/data/puzzle/pool.txt";
public double minSimplicity = 0; // 0 means no limit

View File

@@ -17,20 +17,38 @@ import java.util.stream.IntStream;
@SuppressWarnings("ALL")
public class SwedishGenerator {
static final int W = 9, H = 8;
static final int W = 9, H = 8,
CLUE_SIZE = 6,
SIMPLICITY_DEFAULT_SCORE = 5;
static final int MIN_LEN = 2, MAX_LEN = 8;
// Directions for '1'..'4'
static final int[][] DIRS = new int[5][2];
// Directions for '1'..'6'
static final int[][] OFFSETS = new int[7][2];
static final int[][] STEPS = new int[7][2];
static {
DIRS[1] = new int[]{ -1, 0 }; // up
DIRS[2] = new int[]{ 0, 1 }; // right
DIRS[3] = new int[]{ 1, 0 }; // down
DIRS[4] = new int[]{ 0, -1 }; // left
// 1: up
OFFSETS[1] = new int[]{ -1, 0 };
STEPS[1] = new int[]{ -1, 0 };
// 2: right
OFFSETS[2] = new int[]{ 0, 1 };
STEPS[2] = new int[]{ 0, 1 };
// 3: down
OFFSETS[3] = new int[]{ 1, 0 };
STEPS[3] = new int[]{ 1, 0 };
// 4: left
OFFSETS[4] = new int[]{ 0, -1 };
STEPS[4] = new int[]{ 0, -1 };
// 5: vertical down, clue is on the right of the first letter
OFFSETS[5] = new int[]{ 0, -1 };
STEPS[5] = new int[]{ 1, 0 };
// 6: vertical down, clue is on the left of the first letter
OFFSETS[6] = new int[]{ 0, 1 };
STEPS[6] = new int[]{ 1, 0 };
}
static boolean isDigit(char ch) { return ch >= '1' && ch <= '4'; }
static boolean isLetter(char ch) { return ch >= 'A' && ch <= 'Z'; }
static final char FIRST_ABC = 'A';
static final char LAST_ABC = 'Z';
static final char FIRST_ARROW = '1', LAST_ARROW = '6', HOR_ARROW_1 = '2', HOR_ARROW_2 = '4';
static boolean isDigit(char ch) { return ch >= FIRST_ARROW && ch <= LAST_ARROW; }
static boolean isLetter(char ch) { return ch >= FIRST_ABC && ch <= LAST_ABC; }
static boolean isLetterCell(char ch) { return ch == '#' || isLetter(ch); }
// ---------------- RNG (xorshift32) ----------------
@@ -52,21 +70,16 @@ public class SwedishGenerator {
return y;
}
int randint(int min, int max) { // inclusive
var r = nextU32();
var u = (r & 0xFFFFFFFFL);
var u = (nextU32() & 0xFFFFFFFFL);
var range = (long) max - (long) min + 1L;
return (int) (min + (u % range));
}
double nextFloat() {
var u = nextU32() & 0xFFFFFFFFL;
return u / 4294967295.0; // 0xFFFFFFFF
}
double nextFloat() { return (nextU32() & 0xFFFFFFFFL) / 4294967295.0; }
}
static int clamp(int x, int a, int b) { return Math.max(a, Math.min(b, x)); }
// ---------------- Grid helpers ----------------
static char[][] makeEmptyGrid() {
var g = new char[H][W];
for (var r = 0; r < H; r++) Arrays.fill(g[r], '#');
@@ -160,11 +173,11 @@ public class SwedishGenerator {
first = false;
continue;
}
var parts = line.split(",",3);
var parts = line.split(",", 3);
if (parts.length >= 2) {
try {
var word = parts[0].trim().toUpperCase(Locale.ROOT);
var score = 10-Integer.parseInt(parts[1].trim());
var word = parts[0].trim().toUpperCase(Locale.ROOT);
var score = 10 - Integer.parseInt(parts[1].trim());
scores.put(word, score);
} catch (NumberFormatException ignored) {
System.err.println("Illegal number format: " + line);
@@ -200,10 +213,10 @@ public class SwedishGenerator {
var words = new ArrayList<WordDifficulty>();
for (var line : raw.split("\\R")) {
var word = line.split(",",3)[0].trim();
var s = word.trim().toUpperCase(Locale.ROOT);
var word = line.split(",", 3)[0].trim();
var s = word.trim().toUpperCase(Locale.ROOT);
if (s.matches("^[A-Z]{2,8}$")) {
var score = llmScores.getOrDefault(s, 5); // Default to middle
var score = llmScores.getOrDefault(s, SIMPLICITY_DEFAULT_SCORE); // Default to middle
words.add(new WordDifficulty(s, score));
}
}
@@ -296,7 +309,7 @@ public class SwedishGenerator {
}
static int indexToDifficulty(DictEntry entry, int index, Map<String, Integer> llmScores) {
var word = entry.words.get(index);
var score = llmScores.getOrDefault(word, 5);
var score = llmScores.getOrDefault(word, SIMPLICITY_DEFAULT_SCORE);
return new WordDifficulty(word, score).difficulty;
}
@@ -305,7 +318,7 @@ public class SwedishGenerator {
static final class Slot {
final int clueR, clueC;
final char dir; // '1'..'4'
final char dir; // '1'..'5'
final int[] rs, cs; // cells
final int len;
Slot(int clueR, int clueC, char dir, int[] rs, int[] cs) {
@@ -325,41 +338,42 @@ public class SwedishGenerator {
for (var c = 0; c < W; c++) {
var d = grid[r][c];
if (!isDigit(d)) continue;
// Check all four possible directions for clue placement
for (int dir = 1; dir <= 4; dir++) {
int dr = DIRS[dir][0], dc = DIRS[dir][1];
int rr = r + dr, cc = c + dc;
if (rr < 0 || rr >= H || cc < 0 || cc >= W) continue;
if (!isLetterCell(grid[rr][cc])) continue;
var rs = new int[MAX_LEN + 1]; // allow MAX_LEN+1 like JS loop
var cs = new int[MAX_LEN + 1];
var n = 0;
while (rr >= 0 && rr < H && cc >= 0 && cc < W) {
var ch = grid[rr][cc];
if (!isLetterCell(ch)) break;
rs[n] = rr;
cs[n] = cc;
n++;
rr += dr;
cc += dc;
if (n > MAX_LEN) break; // allow n==MAX_LEN+1
}
slots.add(new Slot(r, c, (char) ('0' + dir), Arrays.copyOf(rs, n), Arrays.copyOf(cs, n)));
var dir = d - '0';
// Check all possible directions for clue placement
// for (int dir = 1; dir <= 4; dir++) {
int or = OFFSETS[dir][0], oc = OFFSETS[dir][1];
int dr = STEPS[dir][0], dc = STEPS[dir][1];
int rr = r + or, cc = c + oc;
if (rr < 0 || rr >= H || cc < 0 || cc >= W) continue;
if (!isLetterCell(grid[rr][cc])) continue;
var rs = new int[MAX_LEN + 1];
var cs = new int[MAX_LEN + 1];
var n = 0;
while (rr >= 0 && rr < H && cc >= 0 && cc < W) {
var ch = grid[rr][cc];
if (!isLetterCell(ch)) break;
rs[n] = rr;
cs[n] = cc;
n++;
rr += dr;
cc += dc;
if (n > MAX_LEN) break;
}
slots.add(new Slot(r, c, d, Arrays.copyOf(rs, n), Arrays.copyOf(cs, n)));
// }
}
}
return slots;
}
static boolean hasRoomForClue(char[][] grid, int r, int c, char d) {
var di = d - '0';
int dr = DIRS[di][0], dc = DIRS[di][1];
int rr = r + dr, cc = c + dc;
int or = OFFSETS[di][0], oc = OFFSETS[di][1];
int dr = STEPS[di][0], dc = STEPS[di][1];
int rr = r + or, cc = c + oc;
var run = 0;
while (rr >= 0 && rr < H && cc >= 0 && cc < W && isLetterCell(grid[rr][cc]) && run < MAX_LEN) {
run++;
@@ -387,7 +401,7 @@ public class SwedishGenerator {
var covV = new int[H][W];
for (var s : slots) {
var horiz = (s.dir == '2' || s.dir == '4');
var horiz = (s.dir == HOR_ARROW_1 || s.dir == HOR_ARROW_2);
if (s.len < MIN_LEN) penalty += 8000;
if (s.len > MAX_LEN) penalty += 8000 + (long) (s.len - MAX_LEN) * 500L;
@@ -480,7 +494,7 @@ public class SwedishGenerator {
var c = rng.randint(0, W - 1);
if (isDigit(g[r][c])) continue;
var d = (char) ('0' + rng.randint(1, 4));
var d = (char) ('0' + rng.randint(1, r == 0 ? CLUE_SIZE : 4));
g[r][c] = d;
if (!hasRoomForClue(g, r, c, d)) {
g[r][c] = '#';
@@ -505,7 +519,7 @@ public class SwedishGenerator {
if (isDigit(cur)) {
g[rr][cc] = '#';
} else {
var d = (char) ('0' + rng.randint(1, 4));
var d = (char) ('0' + rng.randint(1, rr == 0 ? CLUE_SIZE : 4));
g[rr][cc] = d;
if (!hasRoomForClue(g, rr, cc, d)) g[rr][cc] = '#';
}