Gather data
This commit is contained in:
@@ -74,7 +74,7 @@ public record Export() {
|
|||||||
|
|
||||||
record Bit1029(long[] bits) {
|
record Bit1029(long[] bits) {
|
||||||
|
|
||||||
public Bit1029() { this(new long[1029]); }
|
public Bit1029() { this(new long[2048]); }
|
||||||
static int wordIndex(int bitIndex) { return bitIndex >> 6; }
|
static int wordIndex(int bitIndex) { return bitIndex >> 6; }
|
||||||
public boolean get(int bitIndex) { return (this.bits[wordIndex(bitIndex)] & 1L << bitIndex) != 0L; }
|
public boolean get(int bitIndex) { return (this.bits[wordIndex(bitIndex)] & 1L << bitIndex) != 0L; }
|
||||||
public void set(int bitIndex) { bits[wordIndex(bitIndex)] |= 1L << bitIndex; }
|
public void set(int bitIndex) { bits[wordIndex(bitIndex)] |= 1L << bitIndex; }
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ public class Main {
|
|||||||
public int seed = (int) (System.nanoTime() ^ System.currentTimeMillis());
|
public int seed = (int) (System.nanoTime() ^ System.currentTimeMillis());
|
||||||
public int pop = 18;
|
public int pop = 18;
|
||||||
public int gens = 2000;
|
public int gens = 2000;
|
||||||
public String wordsPath = "nl_score_hints_v2.csv";
|
public String wordsPath = "nl_score_hints_v3.csv";
|
||||||
public double minSimplicity = 0; // 0 means no limit
|
public double minSimplicity = 0; // 0 means no limit
|
||||||
public int threads = Math.max(1, Runtime.getRuntime().availableProcessors());
|
public int threads = Math.max(1, Runtime.getRuntime().availableProcessors());
|
||||||
public int tries = threads;
|
public int tries = threads;
|
||||||
|
|||||||
@@ -219,7 +219,6 @@ public record SwedishGenerator(Rng rng) {
|
|||||||
static int LEMMA_COUNTER = 0;
|
static int LEMMA_COUNTER = 0;
|
||||||
public Lemma(int index, String word, int simpel, String[] clu) { this(index, word.getBytes(StandardCharsets.US_ASCII), simpel, clu); }
|
public Lemma(int index, String word, int simpel, String[] clu) { this(index, word.getBytes(StandardCharsets.US_ASCII), simpel, clu); }
|
||||||
public Lemma(String word, int simpel, String clue) { this(LEMMA_COUNTER++, word, simpel, new String[]{ clue }); }
|
public Lemma(String word, int simpel, String clue) { this(LEMMA_COUNTER++, word, simpel, new String[]{ clue }); }
|
||||||
public Lemma(String word, int simpel, String[] clue) { this(LEMMA_COUNTER++, word, simpel, clue); }
|
|
||||||
byte byteAt(int idx) { return word[idx]; }
|
byte byteAt(int idx) { return word[idx]; }
|
||||||
@Override public int hashCode() { return index; }
|
@Override public int hashCode() { return index; }
|
||||||
@Override public boolean equals(Object o) { return (o == this) || (o instanceof Lemma l && l.index == index); }
|
@Override public boolean equals(Object o) { return (o == this) || (o instanceof Lemma l && l.index == index); }
|
||||||
@@ -260,7 +259,7 @@ public record SwedishGenerator(Rng rng) {
|
|||||||
raw = Files.readString(Path.of(wordsPath), StandardCharsets.UTF_8);
|
raw = Files.readString(Path.of(wordsPath), StandardCharsets.UTF_8);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
raw = "WOORD,level_1_to_10,hint\nEU,2,hint\nUUR,2,hint\nAUTO,2,hint\nBOOM,2,hint\nHUIS,2,hint\nKAT,2,hint\nZEE,2,hint\nRODE,2,hint\nDRAAD,2,hint\nKENNIS,2,hint\nNETWERK,2,hint\nPAKTE,2,hint\n";
|
throw new RuntimeException("Failed to load dictionary from " + wordsPath, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
var map = new ArrayList<Lemma>();
|
var map = new ArrayList<Lemma>();
|
||||||
@@ -270,8 +269,9 @@ public record SwedishGenerator(Rng rng) {
|
|||||||
System.err.println("Empty line: " + line);
|
System.err.println("Empty line: " + line);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
var parts = line.split(",", 4);
|
var parts = line.split(",", 5);
|
||||||
var word = parts[0].trim();
|
var id = Integer.parseInt(parts[0].trim());
|
||||||
|
var word = parts[1].trim();
|
||||||
if (first && word.equalsIgnoreCase("WOORD")) {
|
if (first && word.equalsIgnoreCase("WOORD")) {
|
||||||
first = false;
|
first = false;
|
||||||
continue;
|
continue;
|
||||||
@@ -284,17 +284,17 @@ public record SwedishGenerator(Rng rng) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// CSV has level 1-10. llmScores use 10-level.
|
// CSV has level 1-10. llmScores use 10-level.
|
||||||
int score = 10 - Integer.parseInt(parts[1].trim());
|
int score = Integer.parseInt(parts[2].trim());
|
||||||
if (score < 1) {
|
if (score < 1) {
|
||||||
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
|
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
int simpel = Integer.parseInt(parts[2].trim());
|
int simpel = Integer.parseInt(parts[3].trim());
|
||||||
var rawClue = parts[3].trim();
|
var rawClue = parts[4].trim();
|
||||||
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
|
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
|
||||||
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
|
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
|
||||||
}
|
}
|
||||||
map.add(new Lemma(s, simpel, GSON.fromJson(rawClue, String[].class)));
|
map.add(new Lemma(id,s, simpel, GSON.fromJson(rawClue, String[].class)));
|
||||||
}
|
}
|
||||||
|
|
||||||
return new Dict(map.toArray(Lemma[]::new));
|
return new Dict(map.toArray(Lemma[]::new));
|
||||||
|
|||||||
Reference in New Issue
Block a user