Gather data
This commit is contained in:
@@ -6,7 +6,6 @@ import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.*;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* SwedishGenerator.java
|
||||
@@ -19,6 +18,7 @@ import java.util.stream.Collectors;
|
||||
public class SwedishGenerator {
|
||||
|
||||
static final int W = 9, H = 8,
|
||||
SIZE = W * H,
|
||||
CLUE_SIZE = 4,
|
||||
SIMPLICITY_DEFAULT_SCORE = 2;
|
||||
static final int MIN_LEN = 2, MAX_LEN = 8;
|
||||
@@ -138,15 +138,14 @@ public class SwedishGenerator {
|
||||
}
|
||||
}
|
||||
|
||||
static record Lemma(int index, String word, int length, int difficulty, int simpel, int score, int cross, ArrayList<String> clue) {
|
||||
static record Lemma(int index, String word, int length, int difficulty, int simpel, int score, ArrayList<String> clue) {
|
||||
|
||||
static int LEMMA_COUNTER = 0;
|
||||
public Lemma(String word, int simpel, int score, String clue) {
|
||||
var complex = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
|
||||
var crossScore = ThemePoolBuilderLength.crossabilityScore(word);
|
||||
var list = new ArrayList<String>(10);
|
||||
public Lemma(int index, String word, int simpel, int score, String clue) {
|
||||
var complex = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
|
||||
var list = new ArrayList<String>(10);
|
||||
list.add(clue);
|
||||
this(++LEMMA_COUNTER, word, word.length(), complex, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15), list);
|
||||
this(index, word, word.length(), complex, simpel, score, list);
|
||||
|
||||
// Prioritize simple words (high lScore) and long words.
|
||||
// lScore (1-10) adds up to 1000 points (weight 100).
|
||||
@@ -158,17 +157,46 @@ public class SwedishGenerator {
|
||||
// Length impact: up to 8 * 10 = 80
|
||||
// Score impact: up to 10 * 15 = 150
|
||||
}
|
||||
char charAt(int idx) { return word.charAt(idx); }
|
||||
@Override public int hashCode() { return index; }
|
||||
public Lemma(String word, int simpel, int score, String clue) { this(LEMMA_COUNTER++, word, simpel, score, clue); }
|
||||
char charAt(int idx) { return word.charAt(idx); }
|
||||
@Override public int hashCode() { return index; }
|
||||
@Override public boolean equals(Object o) {
|
||||
if (o == this) return true;
|
||||
return o instanceof Lemma l && l.index == index;
|
||||
}
|
||||
}
|
||||
|
||||
public static record Dict(Map<String, Lemma> words,
|
||||
public static record Dict(Lemma[] wordz,
|
||||
DictEntry[] index,
|
||||
int[] lenCounts) { }
|
||||
int[] lenCounts) {
|
||||
|
||||
public Dict(Lemma[] wordz) {
|
||||
// Sort words by difficulty in ascending order
|
||||
Lemma[] words = wordz.clone();
|
||||
Arrays.sort(words, Comparator.comparingInt(wd -> wd.simpel));
|
||||
|
||||
var lenCounts = new int[12];
|
||||
var index = new DictEntry[12];
|
||||
Arrays.setAll(index, i -> new DictEntry(i));
|
||||
int maxLength = -1;
|
||||
for (var w : words) {
|
||||
var L = w.length();
|
||||
if (L > maxLength) maxLength = L;
|
||||
lenCounts[L]++;
|
||||
|
||||
var entry = index[L];
|
||||
var idx = entry.words.size();
|
||||
entry.words.add(w);
|
||||
|
||||
for (var i = 0; i < L; i++) {
|
||||
var letter = w.charAt(i) - 'A';
|
||||
if (letter >= 0 && letter < 26) entry.pos[i][letter].add(idx);
|
||||
else throw new RuntimeException("Illegal letter: " + letter + " in word " + w);
|
||||
}
|
||||
}
|
||||
this(wordz, index, lenCounts);
|
||||
}
|
||||
}
|
||||
static Dict loadWords(String wordsPath) {
|
||||
String raw;
|
||||
try {
|
||||
@@ -191,11 +219,10 @@ public class SwedishGenerator {
|
||||
first = false;
|
||||
var s = word.toUpperCase(Locale.ROOT);
|
||||
if (s.matches("^[A-Z]{2,8}$")) {
|
||||
int score = SIMPLICITY_DEFAULT_SCORE;
|
||||
int simpel = 0;
|
||||
|
||||
// CSV has level 1-10. llmScores use 10-level.
|
||||
score = 10 - Integer.parseInt(parts[1].trim());
|
||||
simpel = Integer.parseInt(parts[2].trim());
|
||||
int score = 10 - Integer.parseInt(parts[1].trim());
|
||||
int simpel = Integer.parseInt(parts[2].trim());
|
||||
var rawClue = parts[3].trim();
|
||||
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
|
||||
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
|
||||
@@ -207,33 +234,12 @@ public class SwedishGenerator {
|
||||
map.put(s, new Lemma(s, simpel, score, rawClue));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
var words = map.values().stream().collect(Collectors.toCollection(ArrayList::new));
|
||||
// Sort words by difficulty in ascending order
|
||||
words.sort(Comparator.comparingInt(wd -> wd.simpel));
|
||||
|
||||
var lenCounts = new int[12];
|
||||
var index = new DictEntry[12];
|
||||
Arrays.setAll(index, i -> new DictEntry(i));
|
||||
int maxLength = -1;
|
||||
for (var w : words) {
|
||||
var L = w.length();
|
||||
if (L > maxLength) maxLength = L;
|
||||
lenCounts[L]++;
|
||||
|
||||
var entry = index[L];
|
||||
var idx = entry.words.size();
|
||||
entry.words.add(w);
|
||||
|
||||
for (var i = 0; i < L; i++) {
|
||||
var letter = w.charAt(i) - 'A';
|
||||
if (letter >= 0 && letter < 26) entry.pos[i][letter].add(idx);
|
||||
else throw new RuntimeException("Illegal letter: " + letter + " in word " + w);
|
||||
} else {
|
||||
System.err.println("Invalid word: " + line);
|
||||
}
|
||||
}
|
||||
|
||||
return new Dict(map, index, lenCounts);
|
||||
return new Dict(map.values().toArray(Lemma[]::new));
|
||||
}
|
||||
|
||||
static int[] intersectSorted(int[] a, int aLen, int[] b, int bLen) {
|
||||
@@ -284,13 +290,9 @@ public class SwedishGenerator {
|
||||
}
|
||||
// ---------------- Slots ----------------
|
||||
|
||||
static record Slot(int clueR, int clueC, char dir, int[] rs, int[] cs, int len) {
|
||||
static record Slot(String key, int clueR, int clueC, char dir, int[] rs, int[] cs, int len) {
|
||||
|
||||
public Slot(int clueR, int clueC, char dir, int[] rs, int[] cs) {
|
||||
this(clueR, clueC, dir, rs, cs, rs.length);
|
||||
|
||||
}
|
||||
String key() { return clueR + "," + clueC + ":" + dir; }
|
||||
public Slot(int clueR, int clueC, char dir, int[] rs, int[] cs) { this(clueR + "," + clueC + ":" + dir, clueR, clueC, dir, rs, cs, rs.length); }
|
||||
}
|
||||
|
||||
static ArrayList<Slot> extractSlots(char[][] grid) {
|
||||
@@ -345,14 +347,19 @@ public class SwedishGenerator {
|
||||
}
|
||||
|
||||
// ---------------- FAST mask fitness ----------------
|
||||
|
||||
final static int[][] nbrs8 = new int[][]{
|
||||
{ -1, -1 }, { -1, 0 }, { -1, 1 },
|
||||
{ 0, -1 }, { 0, 1 },
|
||||
{ 1, -1 }, { 1, 0 }, { 1, 1 }
|
||||
};
|
||||
static final int[][] nbrs4 = new int[][]{ { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 } };
|
||||
static long maskFitness(char[][] grid, int[] lenCounts) {
|
||||
long penalty = 0;
|
||||
|
||||
var clueCount = 0;
|
||||
for (var r = 0; r < H; r++) for (var c = 0; c < W; c++) if (isDigit(grid[r][c])) clueCount++;
|
||||
|
||||
var targetClues = (int) Math.round(W * H * 0.25); // ~18
|
||||
var targetClues = (int) Math.round(SIZE * 0.25); // ~18
|
||||
penalty += 8L * Math.abs(clueCount - targetClues);
|
||||
|
||||
var slots = extractSlots(grid);
|
||||
@@ -368,7 +375,7 @@ public class SwedishGenerator {
|
||||
if (s.len > MAX_LEN) penalty += 8000 + (long) (s.len - MAX_LEN) * 500L;
|
||||
|
||||
if (s.len >= MIN_LEN && s.len <= MAX_LEN) {
|
||||
if (lenCounts[s.len]<=0) penalty += 12000;
|
||||
if (lenCounts[s.len] <= 0) penalty += 12000;
|
||||
}
|
||||
|
||||
for (var i = 0; i < s.len; i++) {
|
||||
@@ -389,13 +396,8 @@ public class SwedishGenerator {
|
||||
|
||||
// clue clustering (8-connected)
|
||||
var seen = new boolean[H][W];
|
||||
var stack = new int[W * H];
|
||||
var stack = new int[SIZE];
|
||||
int sp;
|
||||
var nbrs8 = new int[][]{
|
||||
{ -1, -1 }, { -1, 0 }, { -1, 1 },
|
||||
{ 0, -1 }, { 0, 1 },
|
||||
{ 1, -1 }, { 1, 0 }, { 1, 1 }
|
||||
};
|
||||
|
||||
for (var r = 0; r < H; r++)
|
||||
for (var c = 0; c < W; c++) {
|
||||
@@ -424,7 +426,7 @@ public class SwedishGenerator {
|
||||
}
|
||||
|
||||
// dead-end-ish letter cell (3+ walls)
|
||||
var nbrs4 = new int[][]{ { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 } };
|
||||
|
||||
for (var r = 0; r < H; r++)
|
||||
for (var c = 0; c < W; c++) {
|
||||
if (!isLetterCell(grid[r][c])) continue;
|
||||
@@ -447,7 +449,7 @@ public class SwedishGenerator {
|
||||
|
||||
static char[][] randomMask(Rng rng) {
|
||||
var g = makeEmptyGrid();
|
||||
var targetClues = (int) Math.round(W * H * 0.25);
|
||||
var targetClues = (int) Math.round(SIZE * 0.25);
|
||||
int placed = 0, guard = 0;
|
||||
|
||||
while (placed < targetClues && guard++ < 4000) {
|
||||
@@ -594,17 +596,23 @@ public class SwedishGenerator {
|
||||
public int lastMRV;
|
||||
}
|
||||
|
||||
public static final class FillResult {
|
||||
public static final record FillResult(boolean ok,
|
||||
char[][] grid,
|
||||
HashMap<String, Lemma> clueMap,
|
||||
FillStats stats,
|
||||
double simplicity) {
|
||||
|
||||
public boolean ok;
|
||||
public char[][] grid;
|
||||
public HashMap<String, Lemma> clueMap;
|
||||
public FillStats stats;
|
||||
public double simplicity;
|
||||
public FillResult(boolean ok, char[][] grid, HashMap<String, Lemma> assigned, FillStats stats) {
|
||||
double totalSimplicity = 0;
|
||||
if (ok) {
|
||||
for (var w : assigned.values()) totalSimplicity += w.difficulty;
|
||||
totalSimplicity = assigned.isEmpty() ? 0 : totalSimplicity / assigned.size();
|
||||
}
|
||||
this(ok, grid, assigned, stats, totalSimplicity);
|
||||
}
|
||||
}
|
||||
|
||||
record Undo(int[] rs, int[] cs, char[] prev, int n) {
|
||||
}
|
||||
record Undo(int[] rs, int[] cs, char[] prev, int n) { }
|
||||
|
||||
static char[] patternForSlot(char[][] grid, Slot s) {
|
||||
var pat = new char[s.len];
|
||||
@@ -645,11 +653,13 @@ public class SwedishGenerator {
|
||||
}
|
||||
return new Undo(urs, ucs, up, n);
|
||||
}
|
||||
|
||||
static final int MAX_TRIES_PER_SLOT = 2000;
|
||||
static void undoPlace(char[][] grid, Undo u) {
|
||||
for (var i = 0; i < u.n; i++) grid[u.rs[i]][u.cs[i]] = u.prev[i];
|
||||
}
|
||||
|
||||
record Pick(Slot slot,
|
||||
CandidateInfo info,
|
||||
boolean done) { }
|
||||
static FillResult fillMask(Rng rng, char[][] mask, DictEntry[] dictIndex,
|
||||
int logEveryMs, int timeLimitMs, boolean verbose) {
|
||||
|
||||
@@ -692,12 +702,6 @@ public class SwedishGenerator {
|
||||
System.out.flush();
|
||||
};
|
||||
|
||||
record Pick(Slot slot,
|
||||
CandidateInfo info,
|
||||
boolean done) {
|
||||
|
||||
}
|
||||
|
||||
java.util.function.Supplier<Pick> chooseMRV = () -> {
|
||||
Slot best = null;
|
||||
CandidateInfo bestInfo = null;
|
||||
@@ -734,7 +738,6 @@ public class SwedishGenerator {
|
||||
}
|
||||
};
|
||||
IO.println("hit");
|
||||
final var MAX_TRIES_PER_SLOT = 2000;
|
||||
|
||||
class Solver {
|
||||
|
||||
@@ -829,20 +832,8 @@ public class SwedishGenerator {
|
||||
System.out.print("\r" + padRight("", 120) + "\r");
|
||||
System.out.flush();
|
||||
|
||||
var res = new FillResult();
|
||||
res.ok = ok;
|
||||
res.grid = grid;
|
||||
res.clueMap = assigned;
|
||||
stats.seconds = (System.currentTimeMillis() - t0) / 1000.0;
|
||||
res.stats = stats;
|
||||
|
||||
if (ok) {
|
||||
double totalSimplicity = 0;
|
||||
for (var w : assigned.values()) {
|
||||
totalSimplicity += w.difficulty;
|
||||
}
|
||||
res.simplicity = assigned.isEmpty() ? 0 : totalSimplicity / assigned.size();
|
||||
}
|
||||
var res = new FillResult(ok, grid, assigned, stats);
|
||||
|
||||
// print a final progress line
|
||||
if (verbose) {
|
||||
|
||||
Reference in New Issue
Block a user