update them

This commit is contained in:
mike
2025-12-21 17:30:40 +01:00
parent b0be3937db
commit 5d1547e39f
9 changed files with 45078 additions and 124 deletions

View File

@@ -5,6 +5,7 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.stream.IntStream;
/**
* SwedishGenerator.java
@@ -108,6 +109,10 @@ public class SwedishGenerator {
if (n >= a.length) a = Arrays.copyOf(a, a.length * 2);
a[n++] = v;
}
void replaceAll(int[] newData) {
this.a = newData;
this.n = newData.length;
}
int size() { return n; }
int[] data() { return a; } // note: may have extra capacity
}
@@ -124,6 +129,18 @@ public class SwedishGenerator {
}
}
static class WordDifficulty {
final String word;
final int difficulty;
public WordDifficulty(String word) {
this.word = word;
// Simple heuristic for difficulty: shorter words have lower difficulty
this.difficulty = -Math.min(40,word.length() * 5);
}
}
static final class Dict {
final ArrayList<String> words;
@@ -135,7 +152,6 @@ public class SwedishGenerator {
this.lenCounts = lenCounts;
}
}
static Dict loadWords(String wordsPath) {
String raw;
try {
@@ -144,16 +160,26 @@ public class SwedishGenerator {
raw = "EU\nUUR\nAUTO\nBOOM\nHUIS\nKAT\nZEE\nRODE\nDRAAD\nKENNIS\nNETWERK\nPAKTE\n";
}
var words = new ArrayList<String>();
var words = new ArrayList<WordDifficulty>();
for (var line : raw.split("\\R")) {
var s = line.trim().toUpperCase(Locale.ROOT);
if (s.matches("^[A-Z]{2,8}$")) words.add(s);
if (s.matches("^[A-Z]{2,8}$")) {
words.add(new WordDifficulty(s));
}
}
// Sort words by difficulty in ascending order
words.sort(Comparator.comparingInt(wd -> wd.difficulty));
var dictWords = new ArrayList<String>();
for (var wd : words) {
dictWords.add(wd.word);
}
var index = new HashMap<Integer, DictEntry>();
var lenCounts = new HashMap<Integer, Integer>();
for (var w : words) {
for (var w : dictWords) {
var L = w.length();
lenCounts.put(L, lenCounts.getOrDefault(L, 0) + 1);
@@ -172,7 +198,7 @@ public class SwedishGenerator {
}
}
return new Dict(words, index, lenCounts);
return new Dict(dictWords, index, lenCounts);
}
static int[] intersectSorted(int[] a, int aLen, int[] b, int bLen) {
@@ -195,7 +221,6 @@ public class SwedishGenerator {
int[] indices; // null => unconstrained
int count;
}
static CandidateInfo candidateInfoForPattern(DictEntry entry, char[] pattern /* 0 means null */) {
var lists = new ArrayList<IntList>();
for (var i = 0; i < pattern.length; i++) {
@@ -204,6 +229,7 @@ public class SwedishGenerator {
lists.add(entry.pos[i][ch - 'A']);
}
}
var ci = new CandidateInfo();
if (lists.isEmpty()) {
ci.indices = null;
@@ -211,8 +237,6 @@ public class SwedishGenerator {
return ci;
}
lists.sort(Comparator.comparingInt(IntList::size));
var first = lists.get(0);
var cur = Arrays.copyOf(first.data(), first.size());
var curLen = cur.length;
@@ -230,6 +254,11 @@ public class SwedishGenerator {
ci.count = curLen;
return ci;
}
static int indexToDifficulty(DictEntry entry, int index) {
var word = entry.words.get(index);
return new WordDifficulty(word).difficulty;
}
// ---------------- Slots ----------------
@@ -751,11 +780,14 @@ public class SwedishGenerator {
var L = idxs.length;
var tries = Math.min(MAX_TRIES_PER_SLOT, L);
var start = (L == 1) ? 0 : rng.randint(0, L - 1);
var step = (L <= 1) ? 1 : rng.randint(1, L - 1);
// When picking words from sorted indices, we want to favor the beginning
// (lower difficulty) but still have some randomness.
for (var t = 0; t < tries; t++) {
var idx = idxs[(start + t * step) % L];
// Power law or similar to favor lower indices:
// pick a random double in [0, 1), square it to bias towards 0.
double r = rng.nextFloat();
int idxInArray = (int) (r * r * L);
var idx = idxs[idxInArray];
var w = entry.words.get(idx);
if (tryWord.apply(w)) return true;
}
@@ -770,12 +802,10 @@ public class SwedishGenerator {
}
var tries = Math.min(MAX_TRIES_PER_SLOT, N);
var start = (N == 1) ? 0 : rng.randint(0, N - 1);
var step = (N <= 1) ? 1 : rng.randint(1, N - 1);
for (var t = 0; t < tries; t++) {
var idx = (start + t * step) % N;
var w = entry.words.get(idx);
double r = rng.nextFloat();
int idxInArray = (int) (r * r * N);
var w = entry.words.get(idxInArray);
if (tryWord.apply(w)) return true;
}