Gather data

This commit is contained in:
mike
2026-01-09 22:42:33 +01:00
parent 183216e753
commit e8a1ab6d19
3 changed files with 37 additions and 28 deletions

View File

@@ -281,16 +281,14 @@ public record SwedishGenerator() {
public static record Dict(
DictEntry[] index,
int[] lenCounts, int length) {
int length) {
static final Gson GSON = new Gson();
public Dict(Lemma[] wordz) {
var lenCounts = new int[MAX_WORD_LENGTH_PLUS_ONE];
var index = new DictEntry[MAX_WORD_LENGTH_PLUS_ONE];
Arrays.setAll(index, i -> new DictEntry(i));
for (var lemma : wordz) {
var L = lemma.word.length;
lenCounts[L]++;
var entry = index[L];
var idx = entry.words.size();
@@ -302,13 +300,13 @@ public record SwedishGenerator() {
entry.pos[i][letter].add(idx);
}
}
for (var len : lenCounts) {
for (int i = MIN_LEN; i < index.length; i++) {
var len = index[i].words.size();
if (len <= 0) {
System.out.println("No words for length " + len);
//throw new RuntimeException("Invalid word length: " + len);
throw new RuntimeException("No words for length " + i);
}
}
this(index, lenCounts, Arrays.stream(lenCounts).sum());
this(index, Arrays.stream(index).mapToInt(i -> i.words.size()).sum());
}
static Dict loadDict(String wordsPath) {
String raw;
@@ -355,7 +353,6 @@ public record SwedishGenerator() {
return new Dict(map.toArray(Lemma[]::new));
}
public int dictLength() { return Arrays.stream(lenCounts).sum(); }
}
static int intersectSorted(int[] a, int aLen, int[] b, int bLen, int[] out) {
@@ -424,7 +421,7 @@ public record SwedishGenerator() {
return false;
}
long maskFitness(Grid grid, int[] lenCounts) {
long maskFitness(Grid grid) {
final long[] penalty = { 0 };
var clueCount = grid.clueCount();
@@ -460,9 +457,9 @@ public record SwedishGenerator() {
hasSlots[0] = true;
if (n < MIN_LEN) {
penalty[0] += 8000;
} else if (lenCounts[n] <= 0) {
} /*else if (lenCounts[n] <= 0) {
penalty[0] += 12000;
}
}*/
var horiz = Slot.horiz(d) ? covH : covV;
for (var i = 0; i < n; i++) horiz[Slot.offset(packedPos, i)] += 1;
}
@@ -488,9 +485,9 @@ public record SwedishGenerator() {
hasSlots[0] = true;
if (n < MIN_LEN) {
penalty[0] += 8000;
} else if (lenCounts[n] <= 0) {
} /*else if (lenCounts[n] <= 0) {
penalty[0] += 12000;
}
}*/
var horiz = Slot.horiz(d) ? covH : covV;
for (var i = 0; i < n; i++) horiz[Slot.offset(packedPos, i)] += 1;
}
@@ -595,14 +592,14 @@ public record SwedishGenerator() {
return out;
}
Grid hillclimb(Rng rng, Grid start, int[] lenCounts, int limit) {
Grid hillclimb(Rng rng, Grid start, int limit) {
var best = start;
var bestF = maskFitness(best, lenCounts);
var bestF = maskFitness(best);
var fails = 0;
while (fails < limit) {
var cand = mutate(rng, best);
var f = maskFitness(cand, lenCounts);
var f = maskFitness(cand);
if (f < bestF) {
best = cand;
bestF = f;
@@ -614,21 +611,21 @@ public record SwedishGenerator() {
return best;
}
public Grid generateMask(Rng rng, int[] lenCounts, int popSize, int gens) {
public Grid generateMask(Rng rng, int popSize, int gens) {
class GridAndFit {
Grid grid;
Long fite;
GridAndFit(Grid grid) { this.grid = grid; }
long fit() {
if (fite == null) this.fite = maskFitness(grid, lenCounts);
if (fite == null) this.fite = maskFitness(grid);
return this.fite;
}
}
if (Main.VERBOSE) System.out.println("generateMask init pop: " + popSize);
var pop = new ArrayList<GridAndFit>();
for (var i = 0; i < popSize; i++) {
pop.add(new GridAndFit(hillclimb(rng, randomMask(rng), lenCounts, 180)));
pop.add(new GridAndFit(hillclimb(rng, randomMask(rng), 180)));
}
for (var gen = 0; gen < gens; gen++) {
@@ -640,7 +637,7 @@ public record SwedishGenerator() {
var p1 = pop.get(rng.randint(0, pop.size() - 1));
var p2 = pop.get(rng.randint(0, pop.size() - 1));
var child = crossover(rng, p1.grid, p2.grid);
children.add(new GridAndFit(hillclimb(rng, child, lenCounts, 70)));
children.add(new GridAndFit(hillclimb(rng, child, 70)));
}
pop.addAll(children);