Gather data

This commit is contained in:
mike
2026-01-09 21:55:34 +01:00
parent 95891d9efe
commit 2ec023b49d
2 changed files with 19 additions and 21 deletions

View File

@@ -248,7 +248,7 @@ public class Main {
var tLoad1 = System.nanoTime();
section("Load");
info(String.format(Locale.ROOT, "words : %,d", dict.wordz().length));
info(String.format(Locale.ROOT, "words : %,d", dict.dictLength() ));
info(String.format(Locale.ROOT, "loadTime : %.3f s", (tLoad1 - tLoad0) / 1e9));
section("Search");
@@ -332,7 +332,7 @@ public class Main {
if (TOTAL_SUCCESS.get() > 0) {
info(String.format(Locale.ROOT, "avgSimplic : %.2f", TOTAL_SIMPLICITY.get() / 100.0 / TOTAL_SUCCESS.get()));
}
info(String.format(Locale.ROOT, "dictWords : %,d", dict.wordz().length));
info(String.format(Locale.ROOT, "dictWords : %,d", dict.dictLength()));
return resFinal;
}

View File

@@ -40,7 +40,8 @@ public record SwedishGenerator() {
static final int SIZE = C * R;// ~18
static final double SIZED = (double) SIZE;// ~18
static final int TARGET_CLUES = SIZE >> 2;
static final int MAX_WORD_LENGTH = Math.min(C, R);
static final int MAX_WORD_LENGTH = C <= R ? C : R;
static final int MAX_WORD_LENGTH_PLUS_ONE = MAX_WORD_LENGTH + 1;
static final int MIN_LEN = Config.MIN_LEN;
static final int CLUE_SIZE = Config.CLUE_SIZE;
static final int SIMPLICITY_DEFAULT_SCORE = 2;
@@ -269,28 +270,24 @@ public record SwedishGenerator() {
static record Lemma(int index, byte[] word, int simpel, String[] clue) {
static int LEMMA_COUNTER = 0;
public Lemma(int index, String word, int simpel, String[] clu) {
this(index, word.getBytes(StandardCharsets.US_ASCII), simpel, clu);
}
public Lemma(String word, int simpel, String clue) { this(LEMMA_COUNTER++, word, simpel, new String[]{ clue }); }
public Lemma(String word, int simpel, String[] clue) { this(LEMMA_COUNTER++, word, simpel, clue); }
byte byteAt(int idx) { return word[idx]; }
@Override public int hashCode() { return index; }
@Override public boolean equals(Object o) { return (o == this) || (o instanceof Lemma l && l.index == index); }
public Lemma(int index, String word, int simpel, String[] clu) { this(index, word.getBytes(StandardCharsets.US_ASCII), simpel, clu); }
public Lemma(String word, int simpel, String clue) { this(LEMMA_COUNTER++, word, simpel, new String[]{ clue }); }
public Lemma(String word, int simpel, String[] clue) { this(LEMMA_COUNTER++, word, simpel, clue); }
byte byteAt(int idx) { return word[idx]; }
@Override public int hashCode() { return index; }
@Override public boolean equals(Object o) { return (o == this) || (o instanceof Lemma l && l.index == index); }
}
public static record Dict(Lemma[] wordz,
DictEntry[] index,
int[] lenCounts) {
public static record Dict(
DictEntry[] index,
int[] lenCounts) {
public Dict(Lemma[] wordz) {
var lenCounts = new int[MAX_WORD_LENGTH + 1];
var index = new DictEntry[MAX_WORD_LENGTH + 1];
var lenCounts = new int[MAX_WORD_LENGTH_PLUS_ONE];
var index = new DictEntry[MAX_WORD_LENGTH_PLUS_ONE];
Arrays.setAll(index, i -> new DictEntry(i));
int maxLength = -1;
for (var lemma : wordz) {
var L = lemma.word.length;
if (L > maxLength) maxLength = L;
lenCounts[L]++;
var entry = index[L];
@@ -299,12 +296,13 @@ public record SwedishGenerator() {
for (var i = 0; i < L; i++) {
var letter = lemma.byteAt(i) - 'A';
if (letter >= 0 && letter < 26) entry.pos[i][letter].add(idx);
else throw new RuntimeException("Illegal letter: " + letter + " in word " + lemma);
if (letter < 0 || letter >= 26) throw new RuntimeException("Illegal letter: " + letter + " in word " + lemma);
entry.pos[i][letter].add(idx);
}
}
this(wordz, index, lenCounts);
this(index, lenCounts);
}
public int dictLength() { return Arrays.stream(lenCounts).sum(); }
}
static final Gson GSON = new Gson();