update them
This commit is contained in:
@@ -5,6 +5,7 @@ import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.*;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/**
|
||||
* SwedishGenerator.java
|
||||
@@ -108,6 +109,10 @@ public class SwedishGenerator {
|
||||
if (n >= a.length) a = Arrays.copyOf(a, a.length * 2);
|
||||
a[n++] = v;
|
||||
}
|
||||
void replaceAll(int[] newData) {
|
||||
this.a = newData;
|
||||
this.n = newData.length;
|
||||
}
|
||||
int size() { return n; }
|
||||
int[] data() { return a; } // note: may have extra capacity
|
||||
}
|
||||
@@ -124,6 +129,18 @@ public class SwedishGenerator {
|
||||
}
|
||||
}
|
||||
|
||||
static class WordDifficulty {
|
||||
|
||||
final String word;
|
||||
final int difficulty;
|
||||
|
||||
public WordDifficulty(String word) {
|
||||
this.word = word;
|
||||
// Simple heuristic for difficulty: shorter words have lower difficulty
|
||||
this.difficulty = -Math.min(40,word.length() * 5);
|
||||
}
|
||||
}
|
||||
|
||||
static final class Dict {
|
||||
|
||||
final ArrayList<String> words;
|
||||
@@ -135,7 +152,6 @@ public class SwedishGenerator {
|
||||
this.lenCounts = lenCounts;
|
||||
}
|
||||
}
|
||||
|
||||
static Dict loadWords(String wordsPath) {
|
||||
String raw;
|
||||
try {
|
||||
@@ -144,16 +160,26 @@ public class SwedishGenerator {
|
||||
raw = "EU\nUUR\nAUTO\nBOOM\nHUIS\nKAT\nZEE\nRODE\nDRAAD\nKENNIS\nNETWERK\nPAKTE\n";
|
||||
}
|
||||
|
||||
var words = new ArrayList<String>();
|
||||
var words = new ArrayList<WordDifficulty>();
|
||||
for (var line : raw.split("\\R")) {
|
||||
var s = line.trim().toUpperCase(Locale.ROOT);
|
||||
if (s.matches("^[A-Z]{2,8}$")) words.add(s);
|
||||
if (s.matches("^[A-Z]{2,8}$")) {
|
||||
words.add(new WordDifficulty(s));
|
||||
}
|
||||
}
|
||||
|
||||
// Sort words by difficulty in ascending order
|
||||
words.sort(Comparator.comparingInt(wd -> wd.difficulty));
|
||||
|
||||
var dictWords = new ArrayList<String>();
|
||||
for (var wd : words) {
|
||||
dictWords.add(wd.word);
|
||||
}
|
||||
|
||||
var index = new HashMap<Integer, DictEntry>();
|
||||
var lenCounts = new HashMap<Integer, Integer>();
|
||||
|
||||
for (var w : words) {
|
||||
for (var w : dictWords) {
|
||||
var L = w.length();
|
||||
lenCounts.put(L, lenCounts.getOrDefault(L, 0) + 1);
|
||||
|
||||
@@ -172,7 +198,7 @@ public class SwedishGenerator {
|
||||
}
|
||||
}
|
||||
|
||||
return new Dict(words, index, lenCounts);
|
||||
return new Dict(dictWords, index, lenCounts);
|
||||
}
|
||||
|
||||
static int[] intersectSorted(int[] a, int aLen, int[] b, int bLen) {
|
||||
@@ -195,7 +221,6 @@ public class SwedishGenerator {
|
||||
int[] indices; // null => unconstrained
|
||||
int count;
|
||||
}
|
||||
|
||||
static CandidateInfo candidateInfoForPattern(DictEntry entry, char[] pattern /* 0 means null */) {
|
||||
var lists = new ArrayList<IntList>();
|
||||
for (var i = 0; i < pattern.length; i++) {
|
||||
@@ -204,6 +229,7 @@ public class SwedishGenerator {
|
||||
lists.add(entry.pos[i][ch - 'A']);
|
||||
}
|
||||
}
|
||||
|
||||
var ci = new CandidateInfo();
|
||||
if (lists.isEmpty()) {
|
||||
ci.indices = null;
|
||||
@@ -211,8 +237,6 @@ public class SwedishGenerator {
|
||||
return ci;
|
||||
}
|
||||
|
||||
lists.sort(Comparator.comparingInt(IntList::size));
|
||||
|
||||
var first = lists.get(0);
|
||||
var cur = Arrays.copyOf(first.data(), first.size());
|
||||
var curLen = cur.length;
|
||||
@@ -230,6 +254,11 @@ public class SwedishGenerator {
|
||||
ci.count = curLen;
|
||||
return ci;
|
||||
}
|
||||
static int indexToDifficulty(DictEntry entry, int index) {
|
||||
var word = entry.words.get(index);
|
||||
return new WordDifficulty(word).difficulty;
|
||||
}
|
||||
|
||||
|
||||
// ---------------- Slots ----------------
|
||||
|
||||
@@ -751,11 +780,14 @@ public class SwedishGenerator {
|
||||
var L = idxs.length;
|
||||
var tries = Math.min(MAX_TRIES_PER_SLOT, L);
|
||||
|
||||
var start = (L == 1) ? 0 : rng.randint(0, L - 1);
|
||||
var step = (L <= 1) ? 1 : rng.randint(1, L - 1);
|
||||
|
||||
// When picking words from sorted indices, we want to favor the beginning
|
||||
// (lower difficulty) but still have some randomness.
|
||||
for (var t = 0; t < tries; t++) {
|
||||
var idx = idxs[(start + t * step) % L];
|
||||
// Power law or similar to favor lower indices:
|
||||
// pick a random double in [0, 1), square it to bias towards 0.
|
||||
double r = rng.nextFloat();
|
||||
int idxInArray = (int) (r * r * L);
|
||||
var idx = idxs[idxInArray];
|
||||
var w = entry.words.get(idx);
|
||||
if (tryWord.apply(w)) return true;
|
||||
}
|
||||
@@ -770,12 +802,10 @@ public class SwedishGenerator {
|
||||
}
|
||||
|
||||
var tries = Math.min(MAX_TRIES_PER_SLOT, N);
|
||||
var start = (N == 1) ? 0 : rng.randint(0, N - 1);
|
||||
var step = (N <= 1) ? 1 : rng.randint(1, N - 1);
|
||||
|
||||
for (var t = 0; t < tries; t++) {
|
||||
var idx = (start + t * step) % N;
|
||||
var w = entry.words.get(idx);
|
||||
double r = rng.nextFloat();
|
||||
int idxInArray = (int) (r * r * N);
|
||||
var w = entry.words.get(idxInArray);
|
||||
if (tryWord.apply(w)) return true;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user