Gather data

This commit is contained in:
mike
2025-12-25 04:38:16 +01:00
parent 49a1aa4152
commit 541e101ae0
26 changed files with 1539 additions and 185988 deletions

View File

@@ -153,18 +153,18 @@ public class SwedishGenerator {
static Map<String, Integer> loadScores() {
var scores = new HashMap<String, Integer>();
try {
var lines = Files.readAllLines(Path.of("export_words.csv"), StandardCharsets.UTF_8);
var lines = Files.readAllLines(Path.of("/data/puzzle/export_with_hints.csv"), StandardCharsets.UTF_8);
var first = true;
for (var line : lines) {
if (first) {
first = false;
continue;
}
var parts = line.split(",");
var parts = line.split(",",3);
if (parts.length >= 2) {
try {
var word = parts[0].trim().toUpperCase(Locale.ROOT);
var score = Integer.parseInt(parts[1].trim());
var score = 10-Integer.parseInt(parts[1].trim());
scores.put(word, score);
} catch (NumberFormatException ignored) {
System.err.println("Illegal number format: " + line);
@@ -200,7 +200,8 @@ public class SwedishGenerator {
var words = new ArrayList<WordDifficulty>();
for (var line : raw.split("\\R")) {
var s = line.trim().toUpperCase(Locale.ROOT);
var word = line.split(",",3)[0].trim();
var s = word.trim().toUpperCase(Locale.ROOT);
if (s.matches("^[A-Z]{2,8}$")) {
var score = llmScores.getOrDefault(s, 5); // Default to middle
words.add(new WordDifficulty(s, score));