update them
This commit is contained in:
@@ -133,14 +133,40 @@ public class SwedishGenerator {
|
||||
|
||||
final String word;
|
||||
final int difficulty;
|
||||
final int score;
|
||||
|
||||
public WordDifficulty(String word) {
|
||||
public WordDifficulty(String word, int score) {
|
||||
this.word = word;
|
||||
this.score = score;
|
||||
// Simple heuristic for difficulty: shorter words have lower difficulty
|
||||
this.difficulty = -Math.min(40,word.length() * 5);
|
||||
// We combine this with the score (10 = common/simple, 1 = rare/hard)
|
||||
// Lower difficulty value means it is tried EARLIER.
|
||||
// We want LONGER and SIMPLER words to be tried earlier.
|
||||
// Increasing simplicity weight: score (1-10) now has max impact of 50.
|
||||
this.difficulty = -Math.min(40, word.length() * 5) - (score * 5);
|
||||
}
|
||||
}
|
||||
|
||||
static Map<String, Integer> loadScores() {
|
||||
var scores = new HashMap<String, Integer>();
|
||||
try {
|
||||
var lines = Files.readAllLines(Path.of("word_scores.csv"), StandardCharsets.UTF_8);
|
||||
var first = true;
|
||||
for (var line : lines) {
|
||||
if (first) { first = false; continue; }
|
||||
var parts = line.split(",", 3);
|
||||
if (parts.length >= 2) {
|
||||
try {
|
||||
scores.put(parts[0].trim().toUpperCase(Locale.ROOT), Integer.parseInt(parts[1].trim()));
|
||||
} catch (NumberFormatException ignored) {}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.err.println("Warning: word_scores.csv not found, using default scores.");
|
||||
}
|
||||
return scores;
|
||||
}
|
||||
|
||||
static final class Dict {
|
||||
|
||||
final ArrayList<String> words;
|
||||
@@ -160,11 +186,13 @@ public class SwedishGenerator {
|
||||
raw = "EU\nUUR\nAUTO\nBOOM\nHUIS\nKAT\nZEE\nRODE\nDRAAD\nKENNIS\nNETWERK\nPAKTE\n";
|
||||
}
|
||||
|
||||
var words = new ArrayList<WordDifficulty>();
|
||||
var llmScores = loadScores();
|
||||
var words = new ArrayList<WordDifficulty>();
|
||||
for (var line : raw.split("\\R")) {
|
||||
var s = line.trim().toUpperCase(Locale.ROOT);
|
||||
if (s.matches("^[A-Z]{2,8}$")) {
|
||||
words.add(new WordDifficulty(s));
|
||||
var score = llmScores.getOrDefault(s, 5); // Default to middle
|
||||
words.add(new WordDifficulty(s, score));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -254,9 +282,10 @@ public class SwedishGenerator {
|
||||
ci.count = curLen;
|
||||
return ci;
|
||||
}
|
||||
static int indexToDifficulty(DictEntry entry, int index) {
|
||||
static int indexToDifficulty(DictEntry entry, int index, Map<String, Integer> llmScores) {
|
||||
var word = entry.words.get(index);
|
||||
return new WordDifficulty(word).difficulty;
|
||||
var score = llmScores.getOrDefault(word, 5);
|
||||
return new WordDifficulty(word, score).difficulty;
|
||||
}
|
||||
|
||||
|
||||
@@ -729,7 +758,7 @@ public class SwedishGenerator {
|
||||
return p;
|
||||
};
|
||||
|
||||
final var MAX_TRIES_PER_SLOT = 500;
|
||||
final var MAX_TRIES_PER_SLOT = 2000;
|
||||
|
||||
class Solver {
|
||||
|
||||
@@ -783,10 +812,9 @@ public class SwedishGenerator {
|
||||
// When picking words from sorted indices, we want to favor the beginning
|
||||
// (lower difficulty) but still have some randomness.
|
||||
for (var t = 0; t < tries; t++) {
|
||||
// Power law or similar to favor lower indices:
|
||||
// pick a random double in [0, 1), square it to bias towards 0.
|
||||
// Bias strongly towards lower indices (simpler words) using r^3
|
||||
double r = rng.nextFloat();
|
||||
int idxInArray = (int) (r * r * L);
|
||||
int idxInArray = (int) (r * r * r * L);
|
||||
var idx = idxs[idxInArray];
|
||||
var w = entry.words.get(idx);
|
||||
if (tryWord.apply(w)) return true;
|
||||
@@ -804,7 +832,7 @@ public class SwedishGenerator {
|
||||
var tries = Math.min(MAX_TRIES_PER_SLOT, N);
|
||||
for (var t = 0; t < tries; t++) {
|
||||
double r = rng.nextFloat();
|
||||
int idxInArray = (int) (r * r * N);
|
||||
int idxInArray = (int) (r * r * r * N);
|
||||
var w = entry.words.get(idxInArray);
|
||||
if (tryWord.apply(w)) return true;
|
||||
}
|
||||
@@ -863,7 +891,7 @@ public class SwedishGenerator {
|
||||
System.out.printf(Locale.ROOT, "MASK: %.3fs%n", (tMask1 - tMask0) / 1e9);
|
||||
|
||||
var tFill0 = System.nanoTime();
|
||||
var filled = fillMask(rng, mask, dict.index, 200, 30000);
|
||||
var filled = fillMask(rng, mask, dict.index, 200, 60000);
|
||||
var tFill1 = System.nanoTime();
|
||||
System.out.printf(Locale.ROOT, "FILL: %.3fms%n", (tFill1 - tFill0) / 1e6);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user