introduce bitloops
This commit is contained in:
@@ -3,216 +3,216 @@ package puzzle;
|
||||
public final class DictDataL2W0 {
|
||||
private DictDataL2W0() {}
|
||||
public static final long[] DATA = new long[] {
|
||||
0x300000001c5L,
|
||||
0x40000000245L,
|
||||
0x500000000b4L,
|
||||
0x600000000a4L,
|
||||
0xa0000000169L,
|
||||
0xb00000000b7L,
|
||||
0xe00000000baL,
|
||||
0x12000000020fL,
|
||||
0x1600000000aaL,
|
||||
0xbb000000002eL,
|
||||
0xbc00000001c9L,
|
||||
0xc00000000181L,
|
||||
0xc20000000269L,
|
||||
0xc300000000adL,
|
||||
0xc500000000a8L,
|
||||
0xc700000002aeL,
|
||||
0xcc0000000027L,
|
||||
0xde00000001afL,
|
||||
0xdf00000000c1L,
|
||||
0xef00000000a9L,
|
||||
0x10600000000cfL,
|
||||
0x10a00000001faL,
|
||||
0x112000000002aL,
|
||||
0x11d00000002f5L,
|
||||
0xa7e0000000261L,
|
||||
0xa84000000002cL,
|
||||
0xa8d000000002dL,
|
||||
0xa960000000101L,
|
||||
0xaa00000000030L,
|
||||
0xab3000000016fL,
|
||||
0xae7000000010fL,
|
||||
0xafb0000000125L,
|
||||
0xb7e0000000115L,
|
||||
0xc690000000244L,
|
||||
0xd28000000026cL,
|
||||
0xd35000000024dL,
|
||||
0xf1600000002d4L,
|
||||
0x3f8d00000001c1L,
|
||||
0x3f9000000000b2L,
|
||||
0x3f910000000241L,
|
||||
0x3f920000000265L,
|
||||
0x3f930000000285L,
|
||||
0x3f940000000032L,
|
||||
0x3f9600000000b3L,
|
||||
0x3f970000000281L,
|
||||
0x3f980000000085L,
|
||||
0x3f990000000185L,
|
||||
0x3f9d00000001eeL,
|
||||
0x3f9e0000000033L,
|
||||
0x3f9f0000000081L,
|
||||
0x3fa400000001a5L,
|
||||
0x3fa70000000289L,
|
||||
0x3fad0000000133L,
|
||||
0x3fb200000001a1L,
|
||||
0x3fb50000000124L,
|
||||
0x3fb70000000105L,
|
||||
0x3fb900000000a7L,
|
||||
0x3fba0000000089L,
|
||||
0x3fbb000000002bL,
|
||||
0x3fbd000000012cL,
|
||||
0x3fc1000000026fL,
|
||||
0x3fd100000001e4L,
|
||||
0x3fd20000000045L,
|
||||
0x3fd60000000028L,
|
||||
0x3fdb00000001ecL,
|
||||
0x3fdc000000012dL,
|
||||
0x3ff20000000036L,
|
||||
0x3ff50000000041L,
|
||||
0x40070000000128L,
|
||||
0x400e00000001ebL,
|
||||
0x401b00000001edL,
|
||||
0x40220000000130L,
|
||||
0x40390000000122L,
|
||||
0x403a0000000023L,
|
||||
0x403f00000002a4L,
|
||||
0x40450000000026L,
|
||||
0x404900000001e7L,
|
||||
0x404c00000001e8L,
|
||||
0x406a00000001f0L,
|
||||
0x408e0000000021L,
|
||||
0x409f00000001e2L,
|
||||
0x40b30000000126L,
|
||||
0x410b00000001b5L,
|
||||
0x410f00000001f7L,
|
||||
0x41160000000121L,
|
||||
0x412600000001e3L,
|
||||
0x416700000000afL,
|
||||
0x419e0000000215L,
|
||||
0x41a300000002b6L,
|
||||
0x42ab0000000149L,
|
||||
0x434d00000002a1L,
|
||||
0x439f00000001eaL,
|
||||
0x43bd00000001cdL,
|
||||
0x44020000000293L,
|
||||
0x453e0000000135L,
|
||||
0x45bc0000000305L,
|
||||
0x46590000000250L,
|
||||
0x467e00000001b3L,
|
||||
0x48750000000276L,
|
||||
0x49bb00000001adL,
|
||||
0x4afa0000000283L,
|
||||
0x4b6f00000001a8L,
|
||||
0x4d620000000083L,
|
||||
0x4fff0000000102L,
|
||||
0x501800000001a3L,
|
||||
0x50ce000000024aL,
|
||||
0x534400000002c3L,
|
||||
0x593f0000000077L,
|
||||
0x60c30000000322L,
|
||||
0x96e80000000165L,
|
||||
0x96ea00000000e5L,
|
||||
0x96eb0000000205L,
|
||||
0x96ef000000028fL,
|
||||
0x96f000000000b0L,
|
||||
0x96f200000000b6L,
|
||||
0x96f400000002c1L,
|
||||
0x96f6000000012bL,
|
||||
0x96fa000000008fL,
|
||||
0x97040000000209L,
|
||||
0x970f0000000295L,
|
||||
0x97120000000061L,
|
||||
0x97150000000341L,
|
||||
0x971e00000002cfL,
|
||||
0x9724000000003aL,
|
||||
0x972f0000000069L,
|
||||
0x973500000002adL,
|
||||
0x973b0000000175L,
|
||||
0x973d000000013aL,
|
||||
0x974600000001e5L,
|
||||
0x974900000002a8L,
|
||||
0x974a0000000029L,
|
||||
0x9750000000024eL,
|
||||
0x975700000002d5L,
|
||||
0x9778000000002fL,
|
||||
0x977e0000000055L,
|
||||
0x978900000002a5L,
|
||||
0x979200000001e9L,
|
||||
0x979d000000018eL,
|
||||
0x979e000000012fL,
|
||||
0x97a50000000253L,
|
||||
0x97ab0000000075L,
|
||||
0x97ac00000000b5L,
|
||||
0x97e900000002ceL,
|
||||
0x97f20000000093L,
|
||||
0x97f50000000194L,
|
||||
0x980a0000000248L,
|
||||
0x980b00000002aaL,
|
||||
0x9812000000026dL,
|
||||
0x98180000000174L,
|
||||
0x981b0000000256L,
|
||||
0x982e00000001a4L,
|
||||
0x98490000000270L,
|
||||
0x984e0000000114L,
|
||||
0x9877000000020cL,
|
||||
0x98860000000131L,
|
||||
0x988b0000000054L,
|
||||
0x98920000000138L,
|
||||
0x989a0000000082L,
|
||||
0x989f0000000182L,
|
||||
0x98a20000000263L,
|
||||
0x98ad00000001a7L,
|
||||
0x98bc000000020dL,
|
||||
0x98c40000000170L,
|
||||
0x98ca0000000073L,
|
||||
0x98de0000000064L,
|
||||
0x98fa000000006cL,
|
||||
0x98fb00000000ccL,
|
||||
0x99060000000110L,
|
||||
0x99190000000216L,
|
||||
0x991a0000000177L,
|
||||
0x992800000002c2L,
|
||||
0x993600000001a6L,
|
||||
0x994500000002e7L,
|
||||
0x99730000000042L,
|
||||
0x999b0000000070L,
|
||||
0x99b200000002f7L,
|
||||
0x99bc0000000062L,
|
||||
0x99cc0000000144L,
|
||||
0x9a2100000002faL,
|
||||
0x9a300000000063L,
|
||||
0x9a9a0000000328L,
|
||||
0x9b6f0000000291L,
|
||||
0x9cb20000000223L,
|
||||
0x9eb800000002c9L,
|
||||
0x9ebc0000000025L,
|
||||
0x9ebd0000000049L,
|
||||
0x9f85000000028eL,
|
||||
0xa121000000016eL,
|
||||
0xa1f10000000264L,
|
||||
0xa3ad0000000084L,
|
||||
0xa3d80000000225L,
|
||||
0xa49b000000004eL,
|
||||
0xa4b100000002eeL,
|
||||
0xa89a00000001daL,
|
||||
0xa9c20000000088L,
|
||||
0xaa0a0000000229L,
|
||||
0xabfe0000000096L,
|
||||
0xafb500000000d3L,
|
||||
0xb0710000000176L,
|
||||
0xb16200000001a2L,
|
||||
0xb41b0000000210L,
|
||||
0xb53c0000000116L,
|
||||
0xb55800000002d6L,
|
||||
0xb58c000000019aL,
|
||||
0xb66100000000e2L,
|
||||
0xb71e0000000047L,
|
||||
0xc08300000000faL,
|
||||
0xc301000000018aL,
|
||||
0xc75900000000c3L,
|
||||
0xc7d90000000066L,
|
||||
0xd12c0000000156L
|
||||
0x100000001c5L,
|
||||
0x90000000245L,
|
||||
0x1100000000b4L,
|
||||
0x1900000000a4L,
|
||||
0x210000000169L,
|
||||
0x2900000000b7L,
|
||||
0x3100000000baL,
|
||||
0x39000000020fL,
|
||||
0x4100000000aaL,
|
||||
0x49000000002eL,
|
||||
0x5100000001c9L,
|
||||
0x590000000181L,
|
||||
0x610000000269L,
|
||||
0x6900000000adL,
|
||||
0x7100000000a8L,
|
||||
0x7900000002aeL,
|
||||
0x810000000027L,
|
||||
0x8900000001afL,
|
||||
0x9100000000c1L,
|
||||
0x9900000000a9L,
|
||||
0xa100000000cfL,
|
||||
0xa900000001faL,
|
||||
0xb1000000002aL,
|
||||
0xb900000002f5L,
|
||||
0xc10000000261L,
|
||||
0xc9000000002cL,
|
||||
0xd1000000002dL,
|
||||
0xd90000000101L,
|
||||
0xe10000000030L,
|
||||
0xe9000000016fL,
|
||||
0xf1000000010fL,
|
||||
0xf90000000125L,
|
||||
0x1010000000115L,
|
||||
0x1090000000244L,
|
||||
0x111000000026cL,
|
||||
0x119000000024dL,
|
||||
0x12100000002d4L,
|
||||
0x12900000001c1L,
|
||||
0x13100000000b2L,
|
||||
0x1390000000241L,
|
||||
0x1410000000265L,
|
||||
0x1490000000285L,
|
||||
0x1510000000032L,
|
||||
0x15900000000b3L,
|
||||
0x1610000000281L,
|
||||
0x1690000000085L,
|
||||
0x1710000000185L,
|
||||
0x17900000001eeL,
|
||||
0x1810000000033L,
|
||||
0x1890000000081L,
|
||||
0x19100000001a5L,
|
||||
0x1990000000289L,
|
||||
0x1a10000000133L,
|
||||
0x1a900000001a1L,
|
||||
0x1b10000000124L,
|
||||
0x1b90000000105L,
|
||||
0x1c100000000a7L,
|
||||
0x1c90000000089L,
|
||||
0x1d1000000002bL,
|
||||
0x1d9000000012cL,
|
||||
0x1e1000000026fL,
|
||||
0x1e900000001e4L,
|
||||
0x1f10000000045L,
|
||||
0x1f90000000028L,
|
||||
0x20100000001ecL,
|
||||
0x209000000012dL,
|
||||
0x2110000000036L,
|
||||
0x2190000000041L,
|
||||
0x2210000000128L,
|
||||
0x22900000001ebL,
|
||||
0x23100000001edL,
|
||||
0x2390000000130L,
|
||||
0x2410000000122L,
|
||||
0x2490000000023L,
|
||||
0x25100000002a4L,
|
||||
0x2590000000026L,
|
||||
0x26100000001e7L,
|
||||
0x26900000001e8L,
|
||||
0x27100000001f0L,
|
||||
0x2790000000021L,
|
||||
0x28100000001e2L,
|
||||
0x2890000000126L,
|
||||
0x29100000001b5L,
|
||||
0x29900000001f7L,
|
||||
0x2a10000000121L,
|
||||
0x2a900000001e3L,
|
||||
0x2b100000000afL,
|
||||
0x2b90000000215L,
|
||||
0x2c100000002b6L,
|
||||
0x2c90000000149L,
|
||||
0x2d100000002a1L,
|
||||
0x2d900000001eaL,
|
||||
0x2e100000001cdL,
|
||||
0x2e90000000293L,
|
||||
0x2f10000000135L,
|
||||
0x2f90000000305L,
|
||||
0x3010000000250L,
|
||||
0x30900000001b3L,
|
||||
0x3110000000276L,
|
||||
0x31900000001adL,
|
||||
0x3210000000283L,
|
||||
0x32900000001a8L,
|
||||
0x3310000000083L,
|
||||
0x3390000000102L,
|
||||
0x34100000001a3L,
|
||||
0x349000000024aL,
|
||||
0x35100000002c3L,
|
||||
0x3590000000077L,
|
||||
0x3610000000322L,
|
||||
0x3690000000165L,
|
||||
0x37100000000e5L,
|
||||
0x3790000000205L,
|
||||
0x381000000028fL,
|
||||
0x38900000000b0L,
|
||||
0x39100000000b6L,
|
||||
0x39900000002c1L,
|
||||
0x3a1000000012bL,
|
||||
0x3a9000000008fL,
|
||||
0x3b10000000209L,
|
||||
0x3b90000000295L,
|
||||
0x3c10000000061L,
|
||||
0x3c90000000341L,
|
||||
0x3d100000002cfL,
|
||||
0x3d9000000003aL,
|
||||
0x3e10000000069L,
|
||||
0x3e900000002adL,
|
||||
0x3f10000000175L,
|
||||
0x3f9000000013aL,
|
||||
0x40100000001e5L,
|
||||
0x40900000002a8L,
|
||||
0x4110000000029L,
|
||||
0x419000000024eL,
|
||||
0x42100000002d5L,
|
||||
0x429000000002fL,
|
||||
0x4310000000055L,
|
||||
0x43900000002a5L,
|
||||
0x44100000001e9L,
|
||||
0x449000000018eL,
|
||||
0x451000000012fL,
|
||||
0x4590000000253L,
|
||||
0x4610000000075L,
|
||||
0x46900000000b5L,
|
||||
0x47100000002ceL,
|
||||
0x4790000000093L,
|
||||
0x4810000000194L,
|
||||
0x4890000000248L,
|
||||
0x49100000002aaL,
|
||||
0x499000000026dL,
|
||||
0x4a10000000174L,
|
||||
0x4a90000000256L,
|
||||
0x4b100000001a4L,
|
||||
0x4b90000000270L,
|
||||
0x4c10000000114L,
|
||||
0x4c9000000020cL,
|
||||
0x4d10000000131L,
|
||||
0x4d90000000054L,
|
||||
0x4e10000000138L,
|
||||
0x4e90000000082L,
|
||||
0x4f10000000182L,
|
||||
0x4f90000000263L,
|
||||
0x50100000001a7L,
|
||||
0x509000000020dL,
|
||||
0x5110000000170L,
|
||||
0x5190000000073L,
|
||||
0x5210000000064L,
|
||||
0x529000000006cL,
|
||||
0x53100000000ccL,
|
||||
0x5390000000110L,
|
||||
0x5410000000216L,
|
||||
0x5490000000177L,
|
||||
0x55100000002c2L,
|
||||
0x55900000001a6L,
|
||||
0x56100000002e7L,
|
||||
0x5690000000042L,
|
||||
0x5710000000070L,
|
||||
0x57900000002f7L,
|
||||
0x5810000000062L,
|
||||
0x5890000000144L,
|
||||
0x59100000002faL,
|
||||
0x5990000000063L,
|
||||
0x5a10000000328L,
|
||||
0x5a90000000291L,
|
||||
0x5b10000000223L,
|
||||
0x5b900000002c9L,
|
||||
0x5c10000000025L,
|
||||
0x5c90000000049L,
|
||||
0x5d1000000028eL,
|
||||
0x5d9000000016eL,
|
||||
0x5e10000000264L,
|
||||
0x5e90000000084L,
|
||||
0x5f10000000225L,
|
||||
0x5f9000000004eL,
|
||||
0x60100000002eeL,
|
||||
0x60900000001daL,
|
||||
0x6110000000088L,
|
||||
0x6190000000229L,
|
||||
0x6210000000096L,
|
||||
0x62900000000d3L,
|
||||
0x6310000000176L,
|
||||
0x63900000001a2L,
|
||||
0x6410000000210L,
|
||||
0x6490000000116L,
|
||||
0x65100000002d6L,
|
||||
0x659000000019aL,
|
||||
0x66100000000e2L,
|
||||
0x6690000000047L,
|
||||
0x67100000000faL,
|
||||
0x679000000018aL,
|
||||
0x68100000000c3L,
|
||||
0x6890000000066L,
|
||||
0x6910000000156L
|
||||
};
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -8,7 +8,7 @@ public final class HintScores {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Class.forName("org.sqlite.JDBC");
|
||||
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:/home/mike/dev/puzzle-generator/tools/hint/hint.sqlite")) {
|
||||
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:tools/hint/hint.sqlite")) {
|
||||
updateCrossScores(conn, HintScores::crossabilityScore, 1000);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,7 +49,7 @@ public class Meta {
|
||||
return new ShardLem(Lemma.pack("XXX"), -1, new String[0]);
|
||||
}
|
||||
}
|
||||
static final Path[] SHARDS = IntStream.range(0, 10).mapToObj(sId -> Path.of("/home/mike/dev/puzzle-generator/src/main/generated-sources/puzzle").resolve(sId + ".idx")).toArray(
|
||||
static final Path[] SHARDS = IntStream.range(0, 10).mapToObj(sId -> Path.of("src/main/generated-sources/puzzle").resolve(sId + ".idx")).toArray(
|
||||
Path[]::new);
|
||||
static Path shardKey(long word) {
|
||||
int L = Lemma.length(word);
|
||||
|
||||
@@ -87,14 +87,12 @@ public class SwedishGenerator {
|
||||
public static final Pick PICK_NOT_DONE = new Pick(null, null, 0);
|
||||
|
||||
@RequiredArgsConstructor
|
||||
@Getter
|
||||
@Accessors(fluent = true)
|
||||
public static final class FillStats {
|
||||
|
||||
public double simplicity;
|
||||
}
|
||||
|
||||
public static record FillResult(boolean ok, long nodes, long backtracks, int lastMRV, long elapsed, @Delegate FillStats stats) {
|
||||
public static record FillResult(boolean ok, long nodes, long backtracks, int lastMRV, long elapsed, FillStats stats) {
|
||||
|
||||
}
|
||||
|
||||
@@ -144,16 +142,17 @@ public class SwedishGenerator {
|
||||
static final long LETTER_MASK = (1L << 40) - 1; // low 40 bits
|
||||
static final long INDEX_MASK = (1L << 24) - 1; // 24 bits
|
||||
|
||||
static long pack(String word) { return pack(word.getBytes(US_ASCII)); }
|
||||
static long pack(int index, byte[] b) { return pack(b) | ((long) index << 40); }
|
||||
static long pack(String word) { return pack(word.getBytes(US_ASCII)); }
|
||||
static long packW(byte[] b) { return pack(b) /*| ((long) index << 40)*/; }
|
||||
static long pack(byte[] b) {
|
||||
long w = 0;
|
||||
for (var i = 0; i < b.length; i++) w |= ((long) b[i] & 31) << (i * 5);
|
||||
return w;
|
||||
}
|
||||
static public long from(int index, String word) { return pack(index, word.getBytes(US_ASCII)); }
|
||||
static byte byteAt(long word, int idx) { return (byte) ((word >>> (idx * 5)) & 0b11111); }
|
||||
static int length(long word) { return ((63 - numberOfLeadingZeros(word & LETTER_MASK)) / 5) + 1; }
|
||||
static public long from(String word) { return packW(word.getBytes(US_ASCII)); }
|
||||
static byte byteAt(long word, int idx) { return (byte) ((word >>> (idx * 5)) & 0b11111); }
|
||||
static int length(long word) { return ((63 - numberOfLeadingZeros(word & LETTER_MASK)) / 5) + 1; }
|
||||
static int length0(long word) { return ((63 - numberOfLeadingZeros(word & LETTER_MASK)) / 5); }
|
||||
static ThreadLocal<byte[]> BYTES = ThreadLocal.withInitial(() -> new byte[MAX_WORD_LENGTH]);
|
||||
public static String asWord(long word) {
|
||||
val len = Lemma.length(word);
|
||||
@@ -413,10 +412,10 @@ public class SwedishGenerator {
|
||||
|
||||
var tries = Math.min(MAX_TRIES_PER_SLOT, N);
|
||||
for (var t = 0; t < tries; t++) {
|
||||
double r = rng.nextFloat();
|
||||
var shardIndx = (int) (r * r * r * (N - 1));
|
||||
var w = entry.words[shardIndx];
|
||||
var lemIdx = Lemma.unpackIndex(w);
|
||||
double r = rng.nextFloat();
|
||||
var shardIdx = (int) (r * r * r * (N - 1));
|
||||
var w = entry.words[shardIdx];
|
||||
var lemIdx = Lemma.unpackIndex(w);
|
||||
if (Bit1029.get(used, lemIdx)) continue;
|
||||
low = glo;
|
||||
top = ghi;
|
||||
@@ -424,7 +423,7 @@ public class SwedishGenerator {
|
||||
|
||||
Bit1029.set(used, lemIdx);
|
||||
s.assign.w = w;
|
||||
s.assign.shardIdx = shardIndx;
|
||||
s.assign.shardIdx = shardIdx;
|
||||
if (backtrack(depth + 1)) return true;
|
||||
s.assign.w = X;
|
||||
Bit1029.clear(used, lemIdx);
|
||||
|
||||
@@ -1,40 +1,13 @@
|
||||
package puzzle;
|
||||
|
||||
import puzzle.SwedishGenerator.Lemma;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.Closeable;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.Arrays;
|
||||
import java.util.function.LongConsumer;
|
||||
import static java.nio.charset.StandardCharsets.US_ASCII;
|
||||
|
||||
public final class CsvIndexService
|
||||
implements Closeable {
|
||||
public final class CsvIndexService {
|
||||
|
||||
static final ScopedValue<CsvIndexService> SC = ScopedValue.newInstance();
|
||||
private static final int MAGIC = 0x4C494458; // "LIDX"
|
||||
private static final int VERSION = 1;
|
||||
static int SIMPEL_IDX = 3;
|
||||
private final Path csvPath;
|
||||
private final Path idxPath;
|
||||
static int SIMPEL_IDX = 3;
|
||||
|
||||
private volatile long[] offsets; // lazy
|
||||
private volatile FileChannel csvChannel; // open once
|
||||
private final Object lock = new Object();
|
||||
|
||||
public CsvIndexService(Path csvPath, Path idxPath) {
|
||||
this.csvPath = csvPath;
|
||||
this.idxPath = idxPath;
|
||||
}
|
||||
public static int lineToSimpel(String line) {
|
||||
var parts = line.split(",", 5);
|
||||
return Integer.parseInt(parts[SIMPEL_IDX].trim());
|
||||
@@ -60,195 +33,6 @@ public final class CsvIndexService
|
||||
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
|
||||
return;
|
||||
}
|
||||
ok.accept(Lemma.pack(id, word.getBytes(US_ASCII)));
|
||||
ok.accept(Lemma.packW(word.getBytes(US_ASCII)));
|
||||
}
|
||||
|
||||
public static int simpel(int index) {
|
||||
try {
|
||||
if (SC.isBound())
|
||||
return lineToSimpel(SC.get().getLine(index));
|
||||
return -1;
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
throw new RuntimeException("Failed to get clues for index " + index, e);
|
||||
}
|
||||
}
|
||||
public static String[] clues(int index) {
|
||||
try {
|
||||
if (SC.isBound())
|
||||
return lineToClue(SC.get().getLine(index));
|
||||
return new String[0];
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
throw new RuntimeException("Failed to get clues for index " + index, e);
|
||||
}
|
||||
}
|
||||
/** Haal één regel op (0-based line index), met self-healing index (1x rebuild). */
|
||||
public String getLine(int lineIndex) throws IOException {
|
||||
ensureLoaded();
|
||||
|
||||
var line = readLineAt(lineIndex);
|
||||
|
||||
if (startsWithIndex(line, lineIndex)) return line;
|
||||
|
||||
// mismatch => rebuild index en nog 1x proberen
|
||||
synchronized (lock) {
|
||||
rebuildIndexLocked();
|
||||
line = readLineAt(lineIndex);
|
||||
if (startsWithIndex(line, lineIndex)) return line;
|
||||
}
|
||||
|
||||
throw new RuntimeException("Index mismatch after rebuild. Requested=" + lineIndex + ", got line=" + preview(line));
|
||||
}
|
||||
|
||||
public void ensureLoaded() throws IOException {
|
||||
if (offsets != null && csvChannel != null && csvChannel.isOpen()) return;
|
||||
|
||||
synchronized (lock) {
|
||||
if (offsets != null && csvChannel != null && csvChannel.isOpen()) return;
|
||||
|
||||
csvChannel = FileChannel.open(csvPath, StandardOpenOption.READ);
|
||||
|
||||
if (Files.exists(idxPath)) {
|
||||
try {
|
||||
offsets = readIndex(idxPath);
|
||||
return;
|
||||
} catch (IOException badIndex) {
|
||||
// fall-through -> rebuild
|
||||
}
|
||||
}
|
||||
|
||||
rebuildIndexLocked();
|
||||
}
|
||||
}
|
||||
|
||||
private void rebuildIndexLocked() throws IOException {
|
||||
var built = buildOffsets(csvPath);
|
||||
writeIndex(idxPath, built);
|
||||
offsets = built;
|
||||
}
|
||||
|
||||
private String readLineAt(int lineIndex) throws IOException {
|
||||
var local = offsets;
|
||||
if (lineIndex < 0 || lineIndex >= local.length) {
|
||||
throw new IndexOutOfBoundsException("lineIndex=" + lineIndex + ", max=" + (local.length - 1));
|
||||
}
|
||||
|
||||
long currentPos = local[lineIndex];
|
||||
|
||||
// lees in blokjes (sneller dan 1 byte) tot newline
|
||||
var buf = new byte[8192];
|
||||
var total = 0;
|
||||
var out = new byte[256];
|
||||
|
||||
while (true) {
|
||||
var bb = ByteBuffer.wrap(buf);
|
||||
var n = csvChannel.read(bb, currentPos);
|
||||
if (n < 0) break; // EOF
|
||||
currentPos += n;
|
||||
var end = n;
|
||||
|
||||
for (var i = 0; i < end; i++) {
|
||||
var b = buf[i];
|
||||
|
||||
if (b == (byte) '\n') {
|
||||
return new String(out, 0, total, StandardCharsets.UTF_8);
|
||||
}
|
||||
if (b == (byte) '\r') continue;
|
||||
|
||||
if (total == out.length) out = Arrays.copyOf(out, out.length * 2);
|
||||
out[total++] = b;
|
||||
}
|
||||
}
|
||||
|
||||
return new String(out, 0, total, StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
/** Check: begint de regel met "<lineIndex>," */
|
||||
private static boolean startsWithIndex(String line, int lineIndex) {
|
||||
if (line == null || line.isEmpty()) return false;
|
||||
|
||||
var comma = line.indexOf(',');
|
||||
if (comma <= 0) return false;
|
||||
|
||||
// snelle parse zonder split
|
||||
long v = 0;
|
||||
for (var i = 0; i < comma; i++) {
|
||||
var c = line.charAt(i);
|
||||
if (c < '0' || c > '9') return false;
|
||||
v = (v * 10) + (c - '0');
|
||||
if (v > Integer.MAX_VALUE) return false;
|
||||
}
|
||||
return v == lineIndex;
|
||||
}
|
||||
|
||||
private static String preview(String s) {
|
||||
if (s == null) return "null";
|
||||
return s.length() <= 120 ? s : s.substring(0, 120) + "...";
|
||||
}
|
||||
|
||||
/** Bouw offsets door newlines te scannen. Resultaat is exact getrimd. */
|
||||
public static long[] buildOffsets(Path path) throws IOException {
|
||||
try (var ch = FileChannel.open(path, StandardOpenOption.READ)) {
|
||||
var offs = new long[131072]; // start-capacity, groeit indien nodig
|
||||
var c = 0;
|
||||
offs[c++] = 0;
|
||||
|
||||
var buf = ByteBuffer.allocateDirect(1 << 20);
|
||||
int pos = 0;
|
||||
|
||||
while (true) {
|
||||
buf.clear();
|
||||
var n = ch.read(buf);
|
||||
if (n < 0) break;
|
||||
buf.flip();
|
||||
|
||||
for (var i = 0; i < n; i++) {
|
||||
if (buf.get(i) == (byte) '\n') {
|
||||
if (c == offs.length) offs = Arrays.copyOf(offs, offs.length * 2);
|
||||
offs[c++] = pos + i + 1;
|
||||
}
|
||||
}
|
||||
pos += n;
|
||||
}
|
||||
|
||||
return Arrays.copyOf(offs, c);
|
||||
}
|
||||
}
|
||||
|
||||
public static void writeIndex(Path out, long[] offsets) throws IOException {
|
||||
try (var dos = new DataOutputStream(new BufferedOutputStream(Files.newOutputStream(
|
||||
out, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)))) {
|
||||
dos.writeInt(MAGIC);
|
||||
dos.writeInt(VERSION);
|
||||
dos.writeInt(offsets.length);
|
||||
for (var v : offsets) dos.writeLong(v);
|
||||
}
|
||||
}
|
||||
|
||||
public static long[] readIndex(Path in) throws IOException {
|
||||
try (var dis = new DataInputStream(new BufferedInputStream(Files.newInputStream(in)))) {
|
||||
if (dis.readInt() != MAGIC) throw new IOException("Not a LIDX file");
|
||||
|
||||
var version = dis.readInt();
|
||||
if (version != VERSION) throw new IOException("Unsupported version: " + version);
|
||||
|
||||
var n = dis.readInt();
|
||||
if (n < 0) throw new IOException("Corrupt length: " + n);
|
||||
|
||||
var offsets = new long[n];
|
||||
for (var i = 0; i < n; i++) offsets[i] = dis.readLong();
|
||||
return offsets;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
synchronized (lock) {
|
||||
if (csvChannel != null) csvChannel.close();
|
||||
csvChannel = null;
|
||||
offsets = null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,213 +0,0 @@
|
||||
package puzzle;
|
||||
|
||||
import puzzle.Export.Dicts;
|
||||
import puzzle.SwedishGenerator.Dict;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
|
||||
public final class DictCodeGen {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
DictJavaGenerator.main(args); // gebruikt jouw makeDict logic
|
||||
}
|
||||
/**
|
||||
* Generates Java source files for dictionary data, split by word length (2..8),
|
||||
* and further chunked to avoid "code too large" / constant pool issues.
|
||||
*
|
||||
* Output:
|
||||
* - DictDataL2.java .. DictDataL8.java (arrays chunked)
|
||||
* - DictData.java (aggregator that builds Dict)
|
||||
*
|
||||
* Usage:
|
||||
* java puzzle.codegen.DictJavaGenerator <wordsFile> <outDir> <packageName>
|
||||
*
|
||||
* Example:
|
||||
* java puzzle.codegen.DictJavaGenerator nl_score_hints_v3.csv src/main/java puzzle
|
||||
*/
|
||||
public final class DictJavaGenerator {
|
||||
|
||||
// tune if needed
|
||||
private static final int WORDS_CHUNK = 8_192 >>> 5; // longs per chunk
|
||||
private static final int POS_CHUNK = 8_192 >>> 5; // longs per chunk
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv");
|
||||
Path outDir = Path.of(args.length > 1 ? args[1] : "/home/mike/dev/puzzle-generator/src/main/generated-sources/puzzle");
|
||||
writeDict(wordsFile, outDir);
|
||||
}
|
||||
public static Dict writeDict(Path wordsFile, Path outDir) {
|
||||
|
||||
String pkg = "puzzle";
|
||||
|
||||
SwedishGenerator.Dict dict = null;
|
||||
try {
|
||||
dict = buildDict(wordsFile);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
try {
|
||||
Files.createDirectories(outDir);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
// emit L2..L8
|
||||
for (int L = 2; L <= 8; L++) {
|
||||
var entry = dict.index()[L];
|
||||
if (entry == null || entry.words() == null || entry.words().length == 0) {
|
||||
throw new IllegalStateException("No words for length " + L);
|
||||
}
|
||||
try {
|
||||
writeLengthClass(outDir, pkg, "DictDataL" + L, L, entry);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
// emit aggregator
|
||||
try {
|
||||
writeAggregator(outDir, pkg, "DictData", dict.length());
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
System.out.println("Generated dictionary sources into: " + outDir.toAbsolutePath());
|
||||
return dict;
|
||||
}
|
||||
|
||||
private static SwedishGenerator.Dict buildDict(Path wordsPath) throws IOException {
|
||||
var map = new LongArrayList(100_000);
|
||||
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
|
||||
lines.forEach(line -> CsvIndexService.lineToLemma(line, map::add));
|
||||
}
|
||||
return Dicts.makeDict(map.toArray());
|
||||
}
|
||||
|
||||
private static void writeAggregator(Path outDir, String pkg, String cls, int totalLen) throws IOException {
|
||||
Path out = outDir.resolve(cls + ".java");
|
||||
try (BufferedWriter w = Files.newBufferedWriter(out, StandardCharsets.UTF_8,
|
||||
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) {
|
||||
|
||||
w.write("package " + pkg + ";\n\n");
|
||||
w.write("public final class " + cls + " {\n");
|
||||
w.write(" private " + cls + "() {}\n\n");
|
||||
w.write(" public static final SwedishGenerator.Dict DICT = build();\n\n");
|
||||
w.write(" private static SwedishGenerator.Dict build() {\n");
|
||||
w.write(" SwedishGenerator.DictEntry[] idx = new SwedishGenerator.DictEntry[SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE];\n");
|
||||
w.write(" idx[2] = DictDataL2.entry();\n");
|
||||
w.write(" idx[3] = DictDataL3.entry();\n");
|
||||
w.write(" idx[4] = DictDataL4.entry();\n");
|
||||
w.write(" idx[5] = DictDataL5.entry();\n");
|
||||
w.write(" idx[6] = DictDataL6.entry();\n");
|
||||
w.write(" idx[7] = DictDataL7.entry();\n");
|
||||
w.write(" idx[8] = DictDataL8.entry();\n");
|
||||
w.write(" return new SwedishGenerator.Dict(idx, " + totalLen + ");\n");
|
||||
w.write(" }\n");
|
||||
w.write("}\n");
|
||||
}
|
||||
}
|
||||
|
||||
private static void writeLengthClass(Path outDir, String pkg, String cls, int L, SwedishGenerator.DictEntry e) throws IOException {
|
||||
Path out = outDir.resolve(cls + ".java");
|
||||
try (BufferedWriter w = Files.newBufferedWriter(out, StandardCharsets.UTF_8,
|
||||
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) {
|
||||
|
||||
w.write("package " + pkg + ";\n\n");
|
||||
w.write("public final class " + cls + " {\n");
|
||||
w.write(" private " + cls + "() {}\n\n");
|
||||
|
||||
long[] words = e.words();
|
||||
|
||||
// flatten posBitsets: [rows][cols] -> flat[]
|
||||
long[][] bs = e.posBitsets();
|
||||
int rows = bs.length;
|
||||
int cols = bs[0].length;
|
||||
long[] flat = new long[rows * cols];
|
||||
int t = 0;
|
||||
for (int r = 0; r < rows; r++) {
|
||||
System.arraycopy(bs[r], 0, flat, t, cols);
|
||||
t += cols;
|
||||
}
|
||||
|
||||
w.write(" static final int LEN = " + L + ";\n");
|
||||
w.write(" static final int ROWS = " + rows + ";\n");
|
||||
w.write(" static final int COLS = " + cols + ";\n");
|
||||
w.write(" static final int WORDS_LEN = " + words.length + ";\n");
|
||||
w.write(" static final int POS_LEN = " + flat.length + ";\n\n");
|
||||
|
||||
// chunked arrays
|
||||
int wordChunks = emitLongArrayChunked(w, "WORDS", words, WORDS_CHUNK);
|
||||
int posChunks = emitLongArrayChunked(w, "POS", flat, POS_CHUNK);
|
||||
|
||||
// joiners
|
||||
emitJoiner(w, "WORDS", "WORDS", words.length, wordChunks);
|
||||
emitJoiner(w, "POS", "POS", flat.length, posChunks);
|
||||
|
||||
// entry builder
|
||||
w.write(" public static SwedishGenerator.DictEntry entry() {\n");
|
||||
w.write(" long[] words = WORDS();\n");
|
||||
w.write(" long[] flat = POS();\n");
|
||||
w.write(" long[][] pos = reshape(flat, ROWS, COLS);\n");
|
||||
w.write(" return new SwedishGenerator.DictEntry(words, pos, words.length, (words.length + 63) >>> 6);\n");
|
||||
w.write(" }\n\n");
|
||||
|
||||
// helpers
|
||||
w.write(" private static int copy(long[] dst, int at, long[] src) {\n");
|
||||
w.write(" System.arraycopy(src, 0, dst, at, src.length);\n");
|
||||
w.write(" return at + src.length;\n");
|
||||
w.write(" }\n\n");
|
||||
|
||||
w.write(" private static long[][] reshape(long[] flat, int rows, int cols) {\n");
|
||||
w.write(" long[][] out = new long[rows][cols];\n");
|
||||
w.write(" int k = 0;\n");
|
||||
w.write(" for (int r = 0; r < rows; r++) {\n");
|
||||
w.write(" System.arraycopy(flat, k, out[r], 0, cols);\n");
|
||||
w.write(" k += cols;\n");
|
||||
w.write(" }\n");
|
||||
w.write(" return out;\n");
|
||||
w.write(" }\n");
|
||||
|
||||
w.write("}\n");
|
||||
}
|
||||
}
|
||||
|
||||
/** Emits baseName_0..k arrays and returns chunkCount. */
|
||||
private static int emitLongArrayChunked(BufferedWriter w, String baseName, long[] data, int chunkSize) throws IOException {
|
||||
int chunks = (data.length + chunkSize - 1) / chunkSize;
|
||||
for (int ci = 0; ci < chunks; ci++) {
|
||||
int from = ci * chunkSize;
|
||||
int to = Math.min(data.length, from + chunkSize);
|
||||
|
||||
w.write(" static final long[] " + baseName + "_" + ci + " = new long[] {\n");
|
||||
for (int i = from; i < to; i++) {
|
||||
w.write(" " + toLongLiteral(data[i]) + (i + 1 < to ? "," : "") + "\n");
|
||||
}
|
||||
w.write(" };\n\n");
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
private static void emitJoiner(BufferedWriter w, String funcName, String baseName, int totalLen, int chunks) throws IOException {
|
||||
w.write(" static long[] " + funcName + "() {\n");
|
||||
w.write(" long[] out = new long[" + totalLen + "];\n");
|
||||
w.write(" int k = 0;\n");
|
||||
for (int ci = 0; ci < chunks; ci++) {
|
||||
w.write(" k = copy(out, k, " + baseName + "_" + ci + ");\n");
|
||||
}
|
||||
w.write(" return out;\n");
|
||||
w.write(" }\n\n");
|
||||
}
|
||||
|
||||
private static String toLongLiteral(long v) {
|
||||
// compact unsigned hex literal
|
||||
return "0x" + Long.toUnsignedString(v, 16) + "L";
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,10 +1,8 @@
|
||||
package puzzle;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import puzzle.DictCodeGen.DictJavaGenerator;
|
||||
import lombok.val;
|
||||
import puzzle.Export.Dicts;
|
||||
import puzzle.Export.IntListDTO;
|
||||
import puzzle.SwedishGenerator.Dict;
|
||||
import puzzle.SwedishGenerator.Lemma;
|
||||
|
||||
import java.io.*;
|
||||
@@ -12,23 +10,20 @@ import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
||||
public final class DictJavaGeneratorMulti {
|
||||
|
||||
// Smaller = more files, but safer for javac/class limits.
|
||||
private static final int WORDS_CHUNK = 8_192;
|
||||
private static final int POS_CHUNK = 8_192;
|
||||
@Test
|
||||
public void dictCodeGen15() {
|
||||
System.out.println(DictData.DICT);
|
||||
}
|
||||
public static void main(String[] args) throws Exception {
|
||||
Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv");
|
||||
Path outDir = Path.of(args.length > 1 ? args[1] : "/home/mike/dev/puzzle-generator/src/main/generated-sources/puzzle");
|
||||
String pkg = "puzzle";
|
||||
Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv");
|
||||
Path outDir = Path.of(args.length > 1 ? args[1] : "src/main/generated-sources/puzzle");
|
||||
String pkg = "puzzle";
|
||||
HashMap<Path, ShardBuilder> builders = new HashMap<Path, ShardBuilder>(16);
|
||||
|
||||
SwedishGenerator.Dict dict = buildDict(wordsFile);
|
||||
SwedishGenerator.Dict dict = buildDict(wordsFile, builders);
|
||||
|
||||
Files.createDirectories(outDir);
|
||||
|
||||
@@ -43,117 +38,60 @@ public final class DictJavaGeneratorMulti {
|
||||
|
||||
// Aggregator
|
||||
writeAggregator(outDir, pkg, "DictData", dict.length());
|
||||
var csv = Paths.get("nl_score_hints_v3.csv");
|
||||
var idx = Paths.get("nl_score_hints_v3.idx");
|
||||
//var csv = Paths.get("nl_score_hints_v3.csv");
|
||||
//var idx = Paths.get("nl_score_hints_v3.idx");
|
||||
|
||||
ScopedValue.where(CsvIndexService.SC, new CsvIndexService(csv, idx)).run(() -> generateHintShards(dict, outDir));
|
||||
//ScopedValue.where(CsvIndexService.SC, new CsvIndexService(csv, idx)).run(() -> generateHintShards(dict, builders, outDir));
|
||||
System.out.println("Generated sources into: " + outDir.toAbsolutePath());
|
||||
}
|
||||
|
||||
private static SwedishGenerator.Dict buildDict(Path wordsPath) throws IOException {
|
||||
private static SwedishGenerator.Dict buildDict(Path wordsPath, HashMap<Path, ShardBuilder> builders) throws IOException {
|
||||
var map = new LongArrayList(100_000);
|
||||
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
|
||||
lines.forEach(line -> CsvIndexService.lineToLemma(line, map::add));
|
||||
lines.forEach(line -> {
|
||||
CsvIndexService.lineToLemma(line, w -> {
|
||||
long len = Lemma.length0(w);
|
||||
|
||||
String word = Lemma.asWord(w);
|
||||
String[] clues = CsvIndexService.lineToClue(line);
|
||||
int simpel = CsvIndexService.lineToSimpel(line);
|
||||
|
||||
// serialize to: WORD \t JSON \n
|
||||
// (als je al JSON string wilt bewaren: gebruik Gson.toJson(clues))
|
||||
String json = Meta.GSON.toJson(clues);
|
||||
String recStr = word + "\t" + simpel + "\t" + json + "\n";
|
||||
byte[] rec = recStr.getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
var key = Meta.shardKey(w);
|
||||
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
|
||||
try {
|
||||
long index = ((long) sb.addRecord(rec) << 3) | len;
|
||||
map.add(w | (index << 40));
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
}
|
||||
});
|
||||
|
||||
});
|
||||
}
|
||||
return Dicts.makeDict(map.toArray());
|
||||
}
|
||||
|
||||
static final int VERSION = 1;
|
||||
static String wordFromLine(String line) {
|
||||
// ID,WORD,*,*,"JSON"
|
||||
var parts = line.split(",", 5);
|
||||
return parts[1].trim();
|
||||
}
|
||||
|
||||
static final class ShardBuilder {
|
||||
|
||||
final IntListDTO offsets = new IntListDTO(4096);
|
||||
final ByteArrayOutputStream data = new ByteArrayOutputStream(1 << 20); // grows
|
||||
void addRecord(byte[] rec) throws IOException {
|
||||
offsets.add(data.size());
|
||||
int addRecord(byte[] rec) throws IOException {
|
||||
var size = data.size();
|
||||
val currSize = offsets.size();
|
||||
offsets.add(size);
|
||||
data.write(rec);
|
||||
return currSize;
|
||||
}
|
||||
}
|
||||
static void generateHintShards(Path csv, Path outDir) throws IOException {
|
||||
Files.createDirectories(outDir);
|
||||
|
||||
var builders = new java.util.HashMap<String, ShardBuilder>(256);
|
||||
|
||||
try (var lines = Files.lines(csv, StandardCharsets.UTF_8)) {
|
||||
lines.forEach(line -> {
|
||||
if (line == null || line.isBlank()) return;
|
||||
|
||||
String word = wordFromLine(line);
|
||||
String[] clues = CsvIndexService.lineToClue(line);
|
||||
int simpel = CsvIndexService.lineToSimpel(line);
|
||||
|
||||
// serialize to: WORD \t JSON \n
|
||||
// (als je al JSON string wilt bewaren: gebruik Gson.toJson(clues))
|
||||
String json = Meta.GSON.toJson(clues);
|
||||
String recStr = word + "\t" + simpel + "\t" + json + "\n";
|
||||
byte[] rec = recStr.getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
String key = Meta.shardKey(word);
|
||||
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
|
||||
try {
|
||||
sb.addRecord(rec);
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
}
|
||||
});
|
||||
} catch (UncheckedIOException uioe) {
|
||||
throw uioe.getCause();
|
||||
}
|
||||
|
||||
// flush all shards to disk as <key>.idx (e.g. 6Z.idx)
|
||||
for (var e : builders.entrySet()) {
|
||||
writeIndexedShard(outDir.resolve(e.getKey() + ".idx"), e.getValue());
|
||||
}
|
||||
}
|
||||
static void generateHintShards(Dict dict, Path outDir) {
|
||||
try {
|
||||
Files.createDirectories(outDir);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
var builders = new java.util.HashMap<Path, ShardBuilder>(256);
|
||||
|
||||
for (var index : dict.index()) {
|
||||
long[] words = index.words();
|
||||
for (int shardIdx = 0; shardIdx < words.length; shardIdx++) {
|
||||
var w = words[shardIdx];
|
||||
String word = Lemma.asWord(w);
|
||||
int wIdx = Lemma.unpackIndex(w);
|
||||
String[] clues = CsvIndexService.clues(wIdx);
|
||||
int simpel = CsvIndexService.simpel(wIdx);
|
||||
|
||||
// serialize to: WORD \t JSON \n
|
||||
// (als je al JSON string wilt bewaren: gebruik Gson.toJson(clues))
|
||||
String json = Meta.GSON.toJson(clues);
|
||||
String recStr = word + "\t" + simpel + "\t" + json + "\n";
|
||||
byte[] rec = recStr.getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
var key = Meta.shardKey(w);
|
||||
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
|
||||
try {
|
||||
sb.addRecord(rec);
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// flush all shards to disk as <key>.idx (e.g. 6Z.idx)
|
||||
for (var e : builders.entrySet()) {
|
||||
try {
|
||||
writeIndexedShard(e.getKey(), e.getValue());
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
static void writeIndexedShard(Path out, ShardBuilder sb) throws IOException {
|
||||
int n = sb.offsets.size();
|
||||
int[] offs = sb.offsets.toArray();
|
||||
|
||||
@@ -126,25 +126,67 @@ public class ExportFormatTest {
|
||||
}
|
||||
}
|
||||
@Test
|
||||
void testIndex() {
|
||||
var csv = Paths.get("nl_score_hints_v3.csv");
|
||||
var idx = Paths.get("nl_score_hints_v3.idx");
|
||||
void testShardToClue() {
|
||||
for (int length = 2; length <= 8; length++) {
|
||||
val entry = DictData.DICT.index()[length];
|
||||
if (entry == null) continue;
|
||||
val words = entry.words();
|
||||
for (int i = 0; i < Math.min(words.length, 5); i++) {
|
||||
val wordVal = words[i];
|
||||
val word = Lemma.asWord(wordVal);
|
||||
val assigned = new Assign(wordVal, i);
|
||||
val shard = Meta.shardKey(assigned.w);
|
||||
val clueRec = Meta.readRecord(shard, i);
|
||||
|
||||
try (var svc = new CsvIndexService(csv, idx)) {
|
||||
System.out.println(svc.getLine(1319));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
assertNotNull(clueRec);
|
||||
assertEquals(word, Lemma.asWord(clueRec.w()));
|
||||
assertTrue(clueRec.simpel() >= 0);
|
||||
assertTrue(clueRec.clues().length > 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testShardToClue() {
|
||||
val index = 1;
|
||||
val word = DictData.DICT.index()[3].words()[index];
|
||||
val assigned = new Assign(word, index);
|
||||
val lemma = Lemma.unpackIndex(word);
|
||||
var word1 = Lemma.asWord(word);
|
||||
val shard = Meta.shardKey(assigned.w);
|
||||
val clue = Meta.readRecord(shard, index);
|
||||
assertNotNull(clue);
|
||||
void testSpecificWords() {
|
||||
// These words are known to be in the CSV and likely in the dictionary
|
||||
String[] testWords = {"EEN", "NAAR", "IEDEREEN"};
|
||||
for (String wStr : testWords) {
|
||||
long w = Lemma.pack(wStr);
|
||||
int L = wStr.length();
|
||||
var entry = DictData.DICT.index()[L];
|
||||
if (entry == null) continue;
|
||||
|
||||
// Find index of word in entry
|
||||
int idx = -1;
|
||||
long[] words = entry.words();
|
||||
for (int i = 0; i < words.length; i++) {
|
||||
if (Lemma.asWord(words[i]).equals(wStr)) {
|
||||
idx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (idx != -1) {
|
||||
val shard = Meta.shardKey(w);
|
||||
val clueRec = Meta.readRecord(shard, idx);
|
||||
assertNotNull(clueRec);
|
||||
assertEquals(wStr, Lemma.asWord(clueRec.w()));
|
||||
// Check some expected complexity values (from CSV head output, column 3)
|
||||
if (wStr.equals("EEN")) {
|
||||
assertEquals(451, clueRec.simpel());
|
||||
assertEquals("het getal 1", clueRec.clues()[0]);
|
||||
}
|
||||
if (wStr.equals("NAAR")) {
|
||||
assertEquals(497, clueRec.simpel());
|
||||
assertEquals("in de richting van", clueRec.clues()[0]);
|
||||
}
|
||||
if (wStr.equals("IEDEREEN")) {
|
||||
assertEquals(501, clueRec.simpel());
|
||||
assertEquals("elke persoon", clueRec.clues()[0]);
|
||||
}
|
||||
|
||||
assertTrue(clueRec.clues().length > 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,7 +47,7 @@ public class MainTest {
|
||||
this.tries = 1;
|
||||
this.verbose = false;
|
||||
}};
|
||||
static final Dict dict = loadDict(opts.wordsPath);
|
||||
static final Dict dict = DictData.DICT;//loadDict(opts.wordsPath);
|
||||
public static Dict loadDict(String wordsPath) {
|
||||
var map = new LongArrayList(100_000);
|
||||
try (var lines = Files.lines(Path.of(wordsPath), StandardCharsets.UTF_8)) {
|
||||
@@ -192,7 +192,7 @@ public class MainTest {
|
||||
"1 \n" +
|
||||
"1 \n" +
|
||||
"3 3 \n" +
|
||||
"3 0 3 \n" +
|
||||
"3 0 3 \n" +
|
||||
"3 \n" +
|
||||
"3 \n" +
|
||||
"222 3");
|
||||
@@ -200,8 +200,8 @@ public class MainTest {
|
||||
var slots = Masker.extractSlots(mask, dict.index());
|
||||
val slotInfo = Masker.scoreSlots(new int[slots.length], slots);
|
||||
var grid = mask.toGrid();
|
||||
var filled = fillMask(rng, slotInfo, grid, false);
|
||||
// val res = new PuzzleResult(new Clued(mask), new Gridded(grid), slotInfo, filled).exportFormatFromFilled(0, new Rewards(0, 0, 0));
|
||||
// var filled = fillMask(rng, slotInfo, grid, false);
|
||||
// val res = new PuzzleResult(new Clued(mask), new Gridded(grid), slotInfo, filled).exportFormatFromFilled(0, new Rewards(0, 0, 0));
|
||||
}
|
||||
@Test
|
||||
void testFiller() {
|
||||
@@ -236,7 +236,7 @@ public class MainTest {
|
||||
int foundSeed = -1;
|
||||
for (int i = 0; i < 50; i++) {
|
||||
int seed = opts.seed + i;
|
||||
res = Main.attempt(new Rng(seed), dict, opts);
|
||||
res = Main.attempt(new Rng(seed), DictData.DICT, opts);
|
||||
if (res != null && res.filled().ok()) {
|
||||
foundSeed = seed;
|
||||
System.out.println("[DEBUG_LOG] Seed found: " + seed);
|
||||
|
||||
@@ -8,6 +8,7 @@ import puzzle.Export.Dicts;
|
||||
import puzzle.Export.Gridded;
|
||||
import puzzle.Export.IntListDTO;
|
||||
import puzzle.Export.LetterVisit.LetterAt;
|
||||
import puzzle.Masker.Slot;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@@ -26,39 +27,39 @@ public class SwedishGeneratorTest {
|
||||
public static Context get() { return CTX.get(); }
|
||||
}
|
||||
|
||||
static final long TEST = Lemma.from(0, "TEST");
|
||||
static final long TEST = Lemma.from("TEST");
|
||||
static final long[] WORDS = new long[]{
|
||||
Lemma.from(1, "AT"),
|
||||
Lemma.from(2, "CAT"),
|
||||
Lemma.from(3, "DOGS"),
|
||||
Lemma.from(4, "APPLE"),
|
||||
Lemma.from(5, "APPLY"),
|
||||
Lemma.from(6, "BANAN"),
|
||||
Lemma.from(7, "BANANA"),
|
||||
Lemma.from(8, "BANANAS"),
|
||||
Lemma.from(9, "BANANASS") // length 8
|
||||
Lemma.from("AT"),
|
||||
Lemma.from("CAT"),
|
||||
Lemma.from("DOGS"),
|
||||
Lemma.from("APPLE"),
|
||||
Lemma.from("APPLY"),
|
||||
Lemma.from("BANAN"),
|
||||
Lemma.from("BANANA"),
|
||||
Lemma.from("BANANAS"),
|
||||
Lemma.from("BANANASS") // length 8
|
||||
};
|
||||
static final long l2a = Lemma.from(10, "IN");
|
||||
static final long l4a = Lemma.from(11, "INER");
|
||||
static final long l6a = Lemma.from(12, "INEREN");
|
||||
static final long l7a = Lemma.from(13, "INERENA");
|
||||
static final long l8a = Lemma.from(14, "INERENAE");
|
||||
static final long l1 = Lemma.from(15, "APPLE");
|
||||
static final long l2 = Lemma.from(16, "AXE");
|
||||
static final long l2a = Lemma.from("IN");
|
||||
static final long l4a = Lemma.from("INER");
|
||||
static final long l6a = Lemma.from("INEREN");
|
||||
static final long l7a = Lemma.from("INERENA");
|
||||
static final long l8a = Lemma.from("INERENAE");
|
||||
static final long l1 = Lemma.from("APPLE");
|
||||
static final long l2 = Lemma.from("AXE");
|
||||
|
||||
static final long[] WORDS2 = new long[]{ Lemma.from(17, "IN"),
|
||||
Lemma.from(18, "APPLE"),
|
||||
Lemma.from(19, "APPLY"),
|
||||
Lemma.from(20, "BANAN"),
|
||||
Lemma.from(21, "INE"),
|
||||
Lemma.from(22, "INER"),
|
||||
Lemma.from(23, "INEREN"),
|
||||
Lemma.from(24, "INERENA"),
|
||||
Lemma.from(25, "INERENAE") };
|
||||
static final long ABC = Lemma.from(26, "ABC");
|
||||
static final long ABD = Lemma.from(27, "ABD");
|
||||
static final long AZ = Lemma.from(28, "AZ");
|
||||
static final long AB = Lemma.from(29, "AB");
|
||||
static final long[] WORDS2 = new long[]{ Lemma.from("IN"),
|
||||
Lemma.from("APPLE"),
|
||||
Lemma.from("APPLY"),
|
||||
Lemma.from("BANAN"),
|
||||
Lemma.from("INE"),
|
||||
Lemma.from("INER"),
|
||||
Lemma.from("INEREN"),
|
||||
Lemma.from("INERENA"),
|
||||
Lemma.from("INERENAE") };
|
||||
static final long ABC = Lemma.from("ABC");
|
||||
static final long ABD = Lemma.from("ABD");
|
||||
static final long AZ = Lemma.from("AZ");
|
||||
static final long AB = Lemma.from("AB");
|
||||
static final byte LETTER_A = ((byte) 'A') & 31;
|
||||
static final byte LETTER_B = ((byte) 'B') & 31;
|
||||
static final byte LETTER_C = ((byte) 'C') & 31;
|
||||
@@ -139,7 +140,7 @@ public class SwedishGeneratorTest {
|
||||
@Test
|
||||
void testPatternForSlotAllLetters() {
|
||||
var grid = new Gridded(createEmpty());
|
||||
var key = Masker.Slot.packSlotKey(OFF_0_0, CLUE_RIGHT);
|
||||
var key = Slot.packSlotKey(OFF_0_0, CLUE_RIGHT);
|
||||
val clues = Masker.Clues.createEmpty();
|
||||
clues.setClueLo(IDX_0_0.lo, CLUE_RIGHT);
|
||||
GridBuilder.placeWord(grid.grid(), grid.grid().g, key, (1L << OFF_0_1) | (1L << OFF_0_2) | (1L << OFF_0_3), 0L, ABC);
|
||||
@@ -152,9 +153,9 @@ public class SwedishGeneratorTest {
|
||||
@Test
|
||||
void testPatternForSlotMixed() {
|
||||
var grid = createEmpty();
|
||||
GridBuilder.placeWord(grid, grid.g, Masker.Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_0_0, 0, Lemma.from(0, "A"));
|
||||
GridBuilder.placeWord(grid, grid.g, Masker.Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_2_0, 0, Lemma.from(0, "C"));
|
||||
var key = Masker.Slot.packSlotKey(OFF_1_0, CLUE_RIGHT);
|
||||
GridBuilder.placeWord(grid, grid.g, Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_0_0, 0, Lemma.from("A"));
|
||||
GridBuilder.placeWord(grid, grid.g, Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_2_0, 0, Lemma.from("C"));
|
||||
var key = Slot.packSlotKey(OFF_1_0, CLUE_RIGHT);
|
||||
var pattern = patternForSlot(grid.lo, grid.hi, grid.g, key, 7L, 0L);
|
||||
assertEquals(14081L, pattern);
|
||||
}
|
||||
@@ -162,7 +163,7 @@ public class SwedishGeneratorTest {
|
||||
@Test
|
||||
void testPatternForSlotAllDashes() {
|
||||
var grid = createEmpty();
|
||||
var key = Masker.Slot.packSlotKey(1 << Masker.Slot.BIT_FOR_DIR, CLUE_RIGHT);
|
||||
var key = Slot.packSlotKey(1 << Slot.BIT_FOR_DIR, CLUE_RIGHT);
|
||||
var pattern = patternForSlot(grid.lo, grid.hi, grid.g, key, 7L, 0L);
|
||||
assertEquals(0L, pattern);
|
||||
}
|
||||
@@ -170,8 +171,8 @@ public class SwedishGeneratorTest {
|
||||
@Test
|
||||
void testPatternForSlotSingleLetter() {
|
||||
var grid = createEmpty();
|
||||
GridBuilder.placeWord(grid, grid.g, Masker.Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_0_0, 0, Lemma.from(0, "A"));
|
||||
var key = Masker.Slot.packSlotKey(1, CLUE_RIGHT);
|
||||
GridBuilder.placeWord(grid, grid.g, Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_0_0, 0, Lemma.from("A"));
|
||||
var key = Slot.packSlotKey(1, CLUE_RIGHT);
|
||||
var pattern = patternForSlot(grid.lo, grid.hi, grid.g, key, 7L, 0L);
|
||||
assertEquals(1L, pattern);
|
||||
}
|
||||
@@ -196,7 +197,7 @@ public class SwedishGeneratorTest {
|
||||
@Test
|
||||
void testGrid() {
|
||||
var grid = new Gridded(createEmpty());
|
||||
GridBuilder.placeWord(grid.grid(), grid.grid().g, Masker.Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_0_0, 0, Lemma.from(0, "A"));
|
||||
GridBuilder.placeWord(grid.grid(), grid.grid().g, Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_0_0, 0, Lemma.from("A"));
|
||||
val arr = grid.stream(Masker.Clues.createEmpty()).collect(Collectors.toMap(LetterAt::index, LetterAt::letter));
|
||||
assertEquals(1, arr.size());
|
||||
assertEquals(LETTER_A, arr.get(OFF_0_0));
|
||||
@@ -232,11 +233,11 @@ public class SwedishGeneratorTest {
|
||||
|
||||
@Test
|
||||
void testSlot() {
|
||||
System.out.println("[DEBUG_LOG] Slot.BIT_FOR_DIR = " + Masker.Slot.BIT_FOR_DIR);
|
||||
System.out.println("[DEBUG_LOG] Slot.BIT_FOR_DIR = " + Slot.BIT_FOR_DIR);
|
||||
// key = (r << 8) | (c << 4) | d
|
||||
var offset = OFF_2_3;
|
||||
System.out.println("[DEBUG_LOG] Grid.offset(2, 3) = " + offset);
|
||||
var key = Masker.Slot.packSlotKey(offset, CLUE_DOWN);
|
||||
var key = Slot.packSlotKey(offset, CLUE_DOWN);
|
||||
System.out.println("[DEBUG_LOG] key = " + key);
|
||||
long lo = 0;
|
||||
// pos 0: (2, 5)
|
||||
@@ -246,10 +247,10 @@ public class SwedishGeneratorTest {
|
||||
// pos 2: (4, 5)
|
||||
lo |= 1L << OFF_4_5;
|
||||
|
||||
System.out.println("[DEBUG_LOG] s.dir() = " + Masker.Slot.dir(key));
|
||||
assertEquals(OFF_2_3, Masker.Slot.clueIndex(key));
|
||||
assertEquals(CLUE_DOWN, Masker.Slot.dir(key));
|
||||
assertFalse(Masker.Slot.horiz(key));
|
||||
System.out.println("[DEBUG_LOG] s.dir() = " + Slot.dir(key));
|
||||
assertEquals(OFF_2_3, Slot.clueIndex(key));
|
||||
assertEquals(CLUE_DOWN, Slot.dir(key));
|
||||
assertFalse(Slot.horiz(key));
|
||||
var cells = Gridded.walk((byte) key, lo, 0L).toArray();
|
||||
assertEquals(2, SwedishGenerator.IT[cells[0]].r());
|
||||
assertEquals(3, SwedishGenerator.IT[cells[1]].r());
|
||||
@@ -258,8 +259,8 @@ public class SwedishGeneratorTest {
|
||||
assertEquals(5, SwedishGenerator.IT[cells[1]].c());
|
||||
assertEquals(5, SwedishGenerator.IT[cells[2]].c());
|
||||
|
||||
assertTrue(Masker.Slot.horiz(CLUE_RIGHT)); // right
|
||||
assertFalse(Masker.Slot.horiz(CLUE_DOWN)); // down
|
||||
assertTrue(Slot.horiz(CLUE_RIGHT)); // right
|
||||
assertFalse(Slot.horiz(CLUE_DOWN)); // down
|
||||
}
|
||||
|
||||
static long packPattern(String s) {
|
||||
@@ -295,9 +296,9 @@ public class SwedishGeneratorTest {
|
||||
assertEquals(1, slots.length);
|
||||
var s = slots[0];
|
||||
|
||||
assertTrue(Masker.Slot.length(s.lo(), s.hi()) >= 2);
|
||||
assertEquals(OFF_0_0, Masker.Slot.clueIndex(s.key()));
|
||||
assertEquals(CLUE_RIGHT, Masker.Slot.dir(s.key()));
|
||||
assertTrue(Slot.length(s.lo(), s.hi()) >= 2);
|
||||
assertEquals(OFF_0_0, Slot.clueIndex(s.key()));
|
||||
assertEquals(CLUE_RIGHT, Slot.dir(s.key()));
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -336,7 +337,7 @@ public class SwedishGeneratorTest {
|
||||
void testPlaceWord() {
|
||||
var grid = new Gridded(createEmpty());
|
||||
// Slot at OFF_0_0 length 3, horizontal (right)
|
||||
var key = Masker.Slot.packSlotKey(0, CLUE_RIGHT);
|
||||
var key = Slot.packSlotKey(0, CLUE_RIGHT);
|
||||
var lo = (1L << OFF_0_0) | (1L << OFF_0_1) | (1L << OFF_0_2);
|
||||
val hi = 0L;
|
||||
var w1 = ABC;
|
||||
@@ -362,7 +363,7 @@ public class SwedishGeneratorTest {
|
||||
|
||||
// 4. Partial placement then conflict (rollback)
|
||||
grid = new Gridded(createEmpty());
|
||||
GridBuilder.placeWord(grid.grid(), grid.grid().g, Masker.Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_0_2, 0, Lemma.from(0, "X")); // Conflict at the end
|
||||
GridBuilder.placeWord(grid.grid(), grid.grid().g, Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_0_2, 0, Lemma.from("X")); // Conflict at the end
|
||||
assertFalse(GridBuilder.placeWord(grid.grid(), grid.grid().g, key, lo, hi, w1));
|
||||
map = grid.stream(Masker.Clues.createEmpty()).collect(Collectors.toMap(LetterAt::index, LetterAt::letter));
|
||||
assertEquals(1, map.size());
|
||||
@@ -373,7 +374,7 @@ public class SwedishGeneratorTest {
|
||||
void testBacktrackingHelpers() {
|
||||
var grid = new Gridded(createEmpty());
|
||||
// Slot at 0,1 length 2
|
||||
var key = Masker.Slot.packSlotKey(0, CLUE_RIGHT);
|
||||
var key = Slot.packSlotKey(0, CLUE_RIGHT);
|
||||
var lo = (1L << OFF_0_1) | (1L << OFF_0_2);
|
||||
var w = AZ;
|
||||
val low = grid.grid().lo;
|
||||
@@ -401,8 +402,8 @@ public class SwedishGeneratorTest {
|
||||
assertTrue(Slotinfo.increasing(CLUE_DOWN)); // Down
|
||||
assertFalse(Slotinfo.increasing(CLUE_UP)); // Up
|
||||
|
||||
assertTrue(Slotinfo.increasing(Masker.Slot.packSlotKey(0, CLUE_RIGHT)));
|
||||
assertFalse(Slotinfo.increasing(Masker.Slot.packSlotKey(0, CLUE_LEFT)));
|
||||
assertTrue(Slotinfo.increasing(Slot.packSlotKey(0, CLUE_RIGHT)));
|
||||
assertFalse(Slotinfo.increasing(Slot.packSlotKey(0, CLUE_LEFT)));
|
||||
|
||||
// 2. Test slotScore
|
||||
val counts = new byte[SIZE];
|
||||
|
||||
Reference in New Issue
Block a user