introduce bitloops

This commit is contained in:
mike
2026-01-20 19:00:27 +01:00
parent 58b8b57688
commit dadde53f76
28 changed files with 42000 additions and 41654 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -14,6 +14,6 @@ public final class DictData {
idx[6] = DictDataL6.entry(); idx[6] = DictDataL6.entry();
idx[7] = DictDataL7.entry(); idx[7] = DictDataL7.entry();
idx[8] = DictDataL8.entry(); idx[8] = DictDataL8.entry();
return new puzzle.SwedishGenerator.Dict(idx, 16413); return new puzzle.SwedishGenerator.Dict(idx, 16445);
} }
} }

View File

@@ -6,7 +6,7 @@ public final class DictDataL3 {
static final int LEN = 3; static final int LEN = 3;
static final int ROWS = 78; static final int ROWS = 78;
static final int COLS = 8; static final int COLS = 8;
static final int WORDS_LEN = 487; static final int WORDS_LEN = 490;
static final int POS_LEN = 624; static final int POS_LEN = 624;
private static long[] words() { private static long[] words() {

View File

@@ -7,131 +7,131 @@ public final class DictDataL3P0 {
0x2000000000001010L, 0x2000000000001010L,
0x0L, 0x0L,
0x800300e020008000L, 0x800300e020008000L,
0x1000000f800000L, 0x2000000f800000L,
0x0L, 0x0L,
0xc000000000100000L, 0x800000L,
0x1L, 0xeL,
0x0L, 0x0L,
0x2000002000L, 0x2000002000L,
0x8003800018001000L, 0x8003800018001000L,
0x2030001L, 0x2030001L,
0x20000000000000L, 0x40000000000000L,
0x3800000030000L, 0xe0000000c0000L,
0x600000L, 0x3000000L,
0x300003c0001eL, 0x180001e0000f0L,
0x808010000L, 0x4040080000L,
0x0L, 0x0L,
0x4000000000000L, 0x4000000000000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x1800000L, 0xc000000L,
0xc00000000000L, 0x6000000000000L,
0x1000420000L, 0x8002100000L,
0x20000020020182L, 0x20000020020182L,
0x80000e0002000L, 0x80000e0002000L,
0x4010000000000L, 0x4010000000000L,
0x1c000007000000fL, 0x38000007000000fL,
0x40000001c0000L, 0x10000000700000L,
0x2000000L, 0x10000000L,
0x1000000000000060L, 0x8000000000000300L,
0x20L, 0x100L,
0x40400e0000000001L, 0x40400e0000000001L,
0x40L, 0x40L,
0x8060108000000L, 0x8060108000000L,
0x200000000000070L, 0x400000000000070L,
0x8000000200000L, 0x20000000800000L,
0x4000000L, 0x20000000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x4000L, 0x4000L,
0x0L, 0x0L,
0x80000000L, 0x80000000L,
0x10000000000000L, 0x40000000000000L,
0x18000000L, 0xc0000000L,
0x2001000000000080L, 0x8000000000400L,
0x2480000000L, 0x12400000001L,
0x0L, 0x0L,
0x100018000L, 0x100018000L,
0x40002L, 0x40002L,
0x400000000000080L, 0x1800000000000080L,
0x20000000400000L, 0x80000001000000L,
0x60000000L, 0x300000000L,
0x400e00000c000000L, 0x70000060000000L,
0x840000L, 0x4200002L,
0x80000841040004L, 0x80000841040004L,
0x10000600020000L, 0x10000600020000L,
0x10000000000400L, 0x10000000000400L,
0x1800000300000000L, 0x6000000300000000L,
0x3c0000000000000L, 0xf00000000000000L,
0x380000000L, 0x1c00000000L,
0x10000070000700L, 0x80000380003800L,
0x1c0L, 0xe00L,
0x0L, 0x0L,
0x0L, 0x0L,
0x80200000000L, 0x80200000000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x8000000000000000L,
0x0L, 0x0L,
0x4L,
0x11000000000L, 0x11000000000L,
0x0L, 0x0L,
0x80800L, 0x80800L,
0x0L, 0x0L,
0x400000000000000L, 0x1000000000000000L,
0x400000000L, 0x2000000000L,
0x20000080000800L, 0x100000400004000L,
0x215080200L, 0x10a8401000L,
0x1000080000200L, 0x1000080000200L,
0x800000180L, 0x800000180L,
0x100cL, 0x100cL,
0x2000001c00000100L, 0x8000001c00000100L,
0x3800000000800000L, 0xe000000002000000L,
0x3800000000L, 0x1c000000000L,
0x7000L, 0x38000L,
0x100000L, 0x800000L,
0x300100000000000L, 0x300100000000000L,
0x70001c0000L, 0x70001c0000L,
0x60000000000010L, 0x60000000000010L,
0xc00000e000000200L, 0xe000000200L,
0x400000000f000000L, 0x3c000003L,
0x1fc000000000L, 0xfe0000000003L,
0x40000000018000L, 0x2000000000c0000L,
0x400L, 0x2000L,
0x8002000400004440L, 0x8002000400004440L,
0x20008000200001L, 0x20008000200001L,
0x4000000L, 0x4000000L,
0x30000000400L, 0x30000000400L,
0x8000000010000003L, 0x4000000cL,
0xe00000000007L, 0x700000000003cL,
0x3000e0000L, 0x1800700000L,
0x100000003L, 0x800000018L,
0x4400000480028L, 0x4400000480028L,
0x0L, 0x0L,
0x180100400000000L, 0x180100400000000L,
0x1800L, 0x40000001800L,
0x20000004L, 0x80000010L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x2008800L, 0x2008800L,
0xc0010000400202L, 0xc0010000400202L,
0x200800000000L, 0x200800000000L,
0x40000000000L, 0x80000000000L,
0x0L, 0x0L,
0x8L, 0x40L,
0x0L, 0x0L,
0x800L, 0x4000L,
0x400000000000000L, 0x400000000000000L,
0x300020000800004L, 0x300020000800004L,
0x20L, 0x20L,
0x80000000000L, 0x100000000000L,
0x40000038L, 0x1000000e0L,
0x70000000001f0L, 0x38000000000f80L,
0x180000c00100000L, 0xc00006000800000L,
0x201000L, 0x1008000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
@@ -139,55 +139,55 @@ public final class DictDataL3P0 {
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x20000000L, 0x100000000L,
0x8000000000000L, 0x8000000000000L,
0x408L, 0x408L,
0xe00400050100040L, 0xe00400050100040L,
0xf0000001e000L, 0x1e0000001e000L,
0x780000040L, 0x1e00000100L,
0x78000000000600L, 0x3c0000000003000L,
0x1000000000L, 0x8000000000L,
0x0L, 0x0L,
0x800000000000000L, 0x800000000000000L,
0x0L, 0x0L,
0x1000000000000000L, 0x1000000000000000L,
0x3000000060000L, 0x6000000060000L,
0x3800000080L, 0xe000000200L,
0x80000000003800L, 0x40000000001c000L,
0x0L, 0x0L,
0x4L, 0x20L,
0x1010a00000100000L, 0x1010a00000100000L,
0x400000000000000L, 0x400000000000000L,
0x2000801080000000L, 0x2000801080000000L,
0x4000000180000L, 0x8000000180000L,
0x3c000000700L, 0xf0000001c00L,
0x1c000L, 0xe0000L,
0x200002000200000L, 0x1000010001000000L,
0x2000008L, 0x10000040L,
0x800000L, 0x800000L,
0x800000000000000L, 0x800000000000000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x100000000020000L, 0x800000000100000L,
0x0L, 0x0L,
0x10L, 0x80L,
0x10000L, 0x10000L,
0xc0000000810L, 0xc0000000810L,
0x4000000000000080L, 0x4000000000000080L,
0x200000L, 0x200000L,
0xc0000001800L, 0x300000006000L,
0x600000000000000L, 0x3000000000000000L,
0x400004000000000L, 0x2000020000000000L,
0x0L, 0x0L,
0x431c200000L, 0x431c200000L,
0x1000000001000000L, 0x1000000001000000L,
0x200000L, 0x200000L,
0x8000000400000L, 0x10000000400000L,
0x700000006000L, 0x1c00000018000L,
0x18000000000c0000L, 0xc000000000600000L,
0x800018000000000L, 0x40000c0000000000L,
0x2000L, 0x10000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
@@ -195,31 +195,31 @@ public final class DictDataL3P0 {
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x4000000000L, 0x20000000000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x2000000000000000L, 0x0L,
0x60000000000L, 0x300000000001L,
0x40004000L, 0x200020000L,
0x8000000000L, 0x8000000000L,
0x6000700006000020L, 0x6000700006000020L,
0x1c06300L, 0x1c06300L,
0x0L, 0x0L,
0x8000L, 0x20000L,
0x0L, 0x0L,
0x80000000000L, 0x400000000000L,
0x8000L, 0x40000L,
0x100840000c010692L, 0x100840000c010692L,
0xa12186922081f080L, 0xa12186922081f080L,
0x2210024044c2120L, 0x2210024044c2120L,
0x3860196411822801L, 0xe0c0316411822801L,
0x8861944853042349L, 0x218651214c108d24L,
0xa00204989800010L, 0x5001024c4c000085L,
0x80400100408902L, 0x402000802044810L,
0x4000c0200L, 0x2000601000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
@@ -227,15 +227,15 @@ public final class DictDataL3P0 {
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x20000L,
0x0L,
0x0L,
0x8000L,
0x10000000000000L,
0x0L,
0x100000L, 0x100000L,
0x0L, 0x0L,
0x800L, 0x0L,
0x8000L,
0x20000000000000L,
0x0L,
0x800000L,
0x0L,
0x4000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
@@ -247,11 +247,11 @@ public final class DictDataL3P0 {
0x8787bc015100606dL, 0x8787bc015100606dL,
0x630011b060034L, 0x630011b060034L,
0x5c58291bd0004000L, 0x5c58291bd0004000L,
0x40882083a068d586L, 0x9104083a068d586L,
0x48008000040c810L, 0x1200200001032041L,
0x2003008410240001L, 0x18042081200008L,
0x28080000000L, 0x140400000001L,
0x2004000L, 0x10020000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
@@ -263,11 +263,11 @@ public final class DictDataL3P0 {
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x200000000000000L, 0x400000000000000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x100000L, 0x800000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
@@ -275,23 +275,23 @@ public final class DictDataL3P0 {
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x10L, 0x80L,
0x1ee00a20100L, 0x1ee00a20100L,
0x1000480044200800L, 0x1000480044200800L,
0x2080824000100204L, 0x2080824000100204L,
0x500020800010218L, 0x1200060800010218L,
0x731420808c811420L, 0xcc50820232045080L,
0x100c43302002ca60L, 0x8062198100165302L,
0x480114441010028cL, 0x4008a22080801460L,
0x100800428L, 0x804002142L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x8200000000000000L, 0x1000000000000000L,
0x0L, 0x4L,
0x8000L, 0x8000L,
0x0L, 0x0L,
0x0L, 0x0L,
@@ -303,15 +303,15 @@ public final class DictDataL3P0 {
0x1000L, 0x1000L,
0x40L, 0x40L,
0x8000000000L, 0x8000000000L,
0x1000004000020L, 0x2000004000020L,
0x0L, 0x0L,
0x4000000004400000L, 0x22000000L,
0x0L, 0x2L,
0x80000000L, 0x400000000L,
0x0L, 0x0L,
0x200L, 0x200L,
0x0L, 0x0L,
0x40000000000L, 0x80000000000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
@@ -327,18 +327,18 @@ public final class DictDataL3P0 {
0x300010825c0000L, 0x300010825c0000L,
0x464801248018050bL, 0x464801248018050bL,
0x100500002a11c83L, 0x100500002a11c83L,
0x8004401040140000L, 0x8801040140000L,
0x24313000a0082L, 0x90c4c0028020aL,
0x4108c0202093486L, 0x208460101049a430L,
0x2172a92225873470L, 0xb9549112c39a380L,
0x381c002041L, 0x1c0e0010209L,
0x0L, 0x0L,
0x400000L, 0x400000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x2000000000L, 0x8000000000L,
0x0L, 0x0L,
0x4000000000000L, 0x20000000000000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
@@ -352,8 +352,8 @@ public final class DictDataL3P0 {
0x0L, 0x0L,
0x2000028000000L, 0x2000028000000L,
0x8000000L, 0x8000000L,
0x200000L, 0x800000L,
0x100L, 0x800L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
@@ -362,7 +362,7 @@ public final class DictDataL3P0 {
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x8000000L, 0x40000000L,
0x0L, 0x0L,
0x800000000000000L, 0x800000000000000L,
0x0L, 0x0L,
@@ -371,15 +371,15 @@ public final class DictDataL3P0 {
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x4000000000L, 0x20000000000L,
0x20000000L, 0x20000000L,
0x890004800000000L, 0x890004800000000L,
0x1020058L, 0x1020058L,
0x2800000000000L, 0x5000000000000L,
0x8000420100004L, 0x20001080400010L,
0x80e0100040000008L, 0x700800200000040L,
0x1408001842284001L, 0xa04000c21142000cL,
0x261619186L, 0x130b0c8c30L,
0x0L, 0x0L,
0x0L, 0x0L,
0x8000000000000000L, 0x8000000000000000L,
@@ -393,7 +393,7 @@ public final class DictDataL3P0 {
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x100000000000000L, 0x800000000000000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
@@ -423,19 +423,19 @@ public final class DictDataL3P0 {
0x800000000000000L, 0x800000000000000L,
0x400a00L, 0x400a00L,
0x4001000000000L, 0x4001000000000L,
0x1000002000048L, 0x2000002000048L,
0x2000000000L, 0x8000000000L,
0x400000L, 0x2000000L,
0x200000000000000L, 0x1000000000000000L,
0x20000000L, 0x100000000L,
0x40000000L, 0x40000000L,
0x0L, 0x0L,
0x2000000L, 0x2000000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x8010004000000200L, 0x80020000001000L,
0x8000000000L, 0x40000000004L,
0x10300000L, 0x81800000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
@@ -443,54 +443,54 @@ public final class DictDataL3P0 {
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x4000000000L, 0x20000000000L,
0x0L, 0x0L,
0x80820208000000L, 0x80820208000000L,
0x400040100000002L, 0x400040100000002L,
0x2000L, 0x2000L,
0x1000010004000000L, 0x4000040010000000L,
0x40000L, 0x200000L,
0x800004L, 0x4000020L,
0x0L, 0x0L,
0x30120000248148L, 0x30120000248148L,
0x4000021L, 0x4000021L,
0xc010c080b8000800L, 0xc010c080b8000800L,
0x12041080400000L, 0x24081080400000L,
0x400400000020000L, 0x1001000000080000L,
0x100000000000000L, 0x800000000000000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x40000L, 0x40000L,
0x0L, 0x0L,
0x200000L, 0x800000L,
0x4008008004000000L, 0x40040020000000L,
0x40c0002100008000L, 0x600010800040002L,
0x200000045L, 0x100000022aL,
0x4040000100400000L, 0x4040000100400000L,
0x60001020040000L, 0x60001020040000L,
0x8000000404040L, 0x8000000404040L,
0x0L, 0x0L,
0x0L, 0x0L,
0x1010000004400L, 0x8080000022000L,
0x100000010020L, 0x800000080100L,
0x10002L, 0x80010L,
0x0L, 0x0L,
0x0L, 0x0L,
0x8000L, 0x8000L,
0x800000L, 0x800000L,
0x8000000000000L, 0x20000000000000L,
0x0L, 0x0L,
0x2000000400000L, 0x10000002000000L,
0x4000880L, 0x20004400L,
0x0L, 0x0L,
0x4402000000L, 0x4402000000L,
0x40200000000000L, 0x40200000000000L,
0x800000100000500L, 0x2000000100000500L,
0x4000L, 0x10000L,
0x0L, 0x0L,
0x2000000L, 0x10000000L,
0x0L, 0x0L,
0x1ec00000000L, 0x1ec00000000L,
0x0L, 0x0L,
@@ -499,55 +499,55 @@ public final class DictDataL3P0 {
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x800428L, 0x4002140L,
0x4000002000000L, 0x4000002000000L,
0x8102000140002040L, 0x8102000140002040L,
0x2000L, 0x2000L,
0x4080000008004010L, 0x100040008004010L,
0xc080008101000100L, 0x200020404000401L,
0x220441082011000L, 0x1102208410088006L,
0x2001200024321200L, 0x9000121909000L,
0x0L, 0x1L,
0x210000000L, 0x210000000L,
0x5042090020400L, 0x5042090020400L,
0x801000000000190L, 0x801000000000190L,
0x40008020080000L, 0x80008020080000L,
0x20600a1038400400L, 0x81802840e1001000L,
0x884800030020010L, 0x4424000180100080L,
0x408810001000408L, 0x2044080008002040L,
0x8000L, 0x40000L,
0x2000000081000000L, 0x2000000081000000L,
0x8018000000000L, 0x8018000000000L,
0x10000L, 0x10000L,
0x100000008800L, 0x200000008800L,
0x804000202040200L, 0x2010000808100800L,
0x40002100002000L, 0x200010800010000L,
0x8040002L, 0x40200010L,
0x210L, 0x1080L,
0x483200000012691L, 0x483200000012691L,
0x4010400800004100L, 0x4010400800004100L,
0x2020110840000000L, 0x2020110840000000L,
0x4288a0840340280L, 0x1051120840340280L,
0x1220000011b022L, 0x4880000046c088L,
0x2000000440080000L, 0x2200400000L,
0x20040000000800L, 0x100200000004001L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x200000000000002L, 0x400000000000002L,
0x0L, 0x0L,
0x100L, 0x800L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x400000000000000L, 0x400000000000000L,
0x1024L, 0x1024L,
0x200001001000L, 0x400001001000L,
0x4880000040L, 0x12200000100L,
0x2220800008801L, 0x11104000044009L,
0x910404610000140L, 0x4882023080000a00L,
0x40485100L, 0x202428800L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
@@ -559,34 +559,34 @@ public final class DictDataL3P0 {
0x840000000000L, 0x840000000000L,
0x800000000001016L, 0x800000000001016L,
0x100004600000000L, 0x100004600000000L,
0x9004010600000001L, 0x4008010600000001L,
0x100000000008L, 0x400000000022L,
0x80L, 0x400L,
0x0L, 0x0L,
0x0L, 0x0L,
0x9100000024001800L, 0x9100000024001800L,
0x180000a88000L, 0x180000a88000L,
0x2800220010a0009L, 0x2800220010a0009L,
0x2100402010020024L, 0x8200802010020024L,
0x100800400000080L, 0x402001000000200L,
0x1400100000800022L, 0xa000800004000110L,
0x8004020000080000L, 0x20100000400000L,
0x1020000L, 0x8100004L,
0x208480008924026L, 0x208480008924026L,
0x3200200001110088L, 0x3200200001110088L,
0x1002080000000200L, 0x1002080000000200L,
0x4004010000L, 0x800004004010000L,
0x201040040880815L, 0x804100102202054L,
0x20910004cL, 0x1048800260L,
0x808c0004090L, 0x404600020480L,
0x0L, 0x0L,
0x1000080000L, 0x1000080000L,
0x0L, 0x0L,
0xa00400L, 0xa00400L,
0x0L, 0x0L,
0x0L, 0x0L,
0x80000200000L, 0x400001000000L,
0x2000L, 0x10000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
@@ -602,8 +602,8 @@ public final class DictDataL3P0 {
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x1000001000000001L, 0x8000008000000008L,
0x2000L, 0x10000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x4000000L, 0x4000000L,
@@ -611,7 +611,7 @@ public final class DictDataL3P0 {
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x3d02000000L, 0x1e810000000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
@@ -619,7 +619,7 @@ public final class DictDataL3P0 {
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,
0x88040000L, 0x440200000L,
0x0L, 0x0L,
0x0L, 0x0L,
0x0L, 0x0L,

View File

@@ -238,259 +238,262 @@ public final class DictDataL3W0 {
0x73a00000030acL, 0x73a00000030acL,
0x742000000482dL, 0x742000000482dL,
0x74a000000392dL, 0x74a000000392dL,
0x75200000015afL, 0x7520000002d2eL,
0x75a0000003830L, 0x75a00000015afL,
0x7620000003432L, 0x7620000003830L,
0x76a00000040b2L, 0x76a0000003432L,
0x7720000004df2L, 0x77200000040b2L,
0x77a0000003ab2L, 0x77a0000004df2L,
0x7820000000593L, 0x7820000003ab2L,
0x78a00000016b3L, 0x78a0000000593L,
0x79200000049f4L, 0x79200000016b3L,
0x79a00000038b7L, 0x79a00000049f4L,
0x7a20000001461L, 0x7a200000038b7L,
0x7aa0000003822L, 0x7aa0000001461L,
0x7b20000003024L, 0x7b20000003822L,
0x7ba0000002ca4L, 0x7ba0000003024L,
0x7c20000004d24L, 0x7c20000002ca4L,
0x7ca0000003ce5L, 0x7ca0000004d24L,
0x7d20000003927L, 0x7d20000003ce5L,
0x7da0000002428L, 0x7da00000050a7L,
0x7e20000004828L, 0x7e20000003927L,
0x7ea0000004c2bL, 0x7ea0000002428L,
0x7f20000002cacL, 0x7f20000004828L,
0x7fa00000049ecL, 0x7fa0000004c2bL,
0x802000000502dL, 0x8020000002cacL,
0x80a00000039edL, 0x80a00000049ecL,
0x81200000052aeL, 0x812000000502dL,
0x81a0000004830L, 0x81a00000039edL,
0x82200000050b0L, 0x82200000052aeL,
0x82a0000003930L, 0x82a0000004830L,
0x8320000004032L, 0x83200000050b0L,
0x83a0000004df3L, 0x83a0000003930L,
0x8420000002c34L, 0x8420000004032L,
0x84a0000003434L, 0x84a0000004df3L,
0x8520000003134L, 0x8520000002c34L,
0x85a00000050b6L, 0x85a0000003434L,
0x8620000003936L, 0x8620000003134L,
0x86a0000003837L, 0x86a00000050b6L,
0x87200000024b7L, 0x8720000003936L,
0x87a00000038baL, 0x87a0000003837L,
0x8820000003922L, 0x88200000024b7L,
0x88a00000015e2L, 0x88a00000038baL,
0x8920000003424L, 0x8920000003922L,
0x89a00000051e4L, 0x89a00000015e2L,
0x8a20000003aa4L, 0x8a20000003424L,
0x8aa0000001a45L, 0x8aa00000051e4L,
0x8b200000030a7L, 0x8b20000003aa4L,
0x8ba000000512bL, 0x8ba0000001a45L,
0x8c20000002c2cL, 0x8c200000030a7L,
0x8ca000000342cL, 0x8ca000000512bL,
0x8d2000000112cL, 0x8d20000002c2cL,
0x8da000000312cL, 0x8da000000342cL,
0x8e2000000302dL, 0x8e2000000112cL,
0x8ea00000032aeL, 0x8ea000000312cL,
0x8f20000005030L, 0x8f2000000302dL,
0x8fa0000004132L, 0x8fa00000032aeL,
0x9020000002df2L, 0x9020000005030L,
0x90a00000035f2L, 0x90a0000004132L,
0x9120000004eb2L, 0x9120000002df2L,
0x91a0000004033L, 0x91a00000035f2L,
0x92200000031f3L, 0x9220000004eb2L,
0x92a0000000613L, 0x92a0000004033L,
0x9320000004034L, 0x93200000031f3L,
0x93a0000002d34L, 0x93a0000000613L,
0x94200000011f4L, 0x9420000004034L,
0x94a00000031f4L, 0x94a0000002d34L,
0x9520000005036L, 0x95200000011f4L,
0x95a00000030b6L, 0x95a00000031f4L,
0x9620000004837L, 0x9620000005036L,
0x96a0000003937L, 0x96a00000030b6L,
0x97200000015f7L, 0x9720000004837L,
0x97a0000004c22L, 0x97a0000003937L,
0x9820000005022L, 0x98200000015f7L,
0x98a00000039e2L, 0x98a0000004c22L,
0x9920000003524L, 0x9920000005022L,
0x99a00000022a5L, 0x99a00000039e2L,
0x9a20000003926L, 0x9a20000003524L,
0x9aa0000003027L, 0x9aa00000022a5L,
0x9b20000003028L, 0x9b20000003926L,
0x9ba0000002ca8L, 0x9ba0000003027L,
0x9c20000004d28L, 0x9c20000003028L,
0x9ca0000005128L, 0x9ca0000002ca8L,
0x9d200000014aaL, 0x9d20000004d28L,
0x9da000000342bL, 0x9da0000005128L,
0x9e2000000112bL, 0x9e200000014aaL,
0x9ea000000312bL, 0x9ea000000342bL,
0x9f20000002d2cL, 0x9f2000000112bL,
0x9fa0000002c2dL, 0x9fa000000312bL,
0xa0200000040adL, 0xa02000000402cL,
0xa0a0000004dedL, 0xa0a0000002d2cL,
0xa1200000051edL, 0xa120000002c2dL,
0xa1a00000052afL, 0xa1a00000040adL,
0xa220000003030L, 0xa220000004dedL,
0xa2a0000004d30L, 0xa2a00000051edL,
0xa320000005130L, 0xa3200000052afL,
0xa3a00000049f0L, 0xa3a0000003030L,
0xa420000003e50L, 0xa420000004d30L,
0xa4a0000000932L, 0xa4a0000005130L,
0xa520000001df2L, 0xa5200000049f0L,
0xa5a0000004133L, 0xa5a0000003e50L,
0xa620000002df3L, 0xa620000000932L,
0xa6a00000035f3L, 0xa6a0000001df2L,
0xa720000001d34L, 0xa720000004133L,
0xa7a0000004134L, 0xa7a0000002df3L,
0xa820000002df4L, 0xa8200000035f3L,
0xa8a0000003135L, 0xa8a0000001d34L,
0xa9200000010b7L, 0xa920000004134L,
0xa9a00000039f7L, 0xa9a0000002df4L,
0xaa20000005061L, 0xaa20000003135L,
0xaaa00000054a2L, 0xaaa00000010b7L,
0xab20000000582L, 0xab200000039f7L,
0xaba0000004c23L, 0xaba0000005061L,
0xac20000005023L, 0xac200000054a2L,
0xaca0000002de4L, 0xaca0000000582L,
0xad20000001985L, 0xad20000004c23L,
0xada0000005026L, 0xada0000005023L,
0xae200000030a6L, 0xae20000002de4L,
0xaea0000003127L, 0xaea0000001985L,
0xaf20000003aa7L, 0xaf20000005026L,
0xafa0000002c28L, 0xafa00000030a6L,
0xb020000003428L, 0xb020000003127L,
0xb0a00000051e8L, 0xb0a0000003aa7L,
0xb1200000038aaL, 0xb120000002c28L,
0xb1a000000402bL, 0xb1a0000003428L,
0xb220000002d2bL, 0xb2200000051e8L,
0xb2a000000352bL, 0xb2a00000038aaL,
0xb32000000082cL, 0xb32000000402bL,
0xb3a00000018acL, 0xb3a0000002d2bL,
0xb420000001d2cL, 0xb42000000352bL,
0xb4a000000412cL, 0xb4a000000082cL,
0xb520000002decL, 0xb5200000018acL,
0xb5a00000055ecL, 0xb5a0000001d2cL,
0xb620000004eacL, 0xb62000000412cL,
0xb6a000000402dL, 0xb6a0000002decL,
0xb720000002d2dL, 0xb7200000055ecL,
0xb7a00000031edL, 0xb7a0000004eacL,
0xb820000001cb0L, 0xb82000000402dL,
0xb8a00000040b0L, 0xb8a0000002d2dL,
0xb920000003130L, 0xb9200000031edL,
0xb9a0000001932L, 0xb9a0000001cb0L,
0xba200000009f2L, 0xba200000040b0L,
0xbaa0000002eb2L, 0xbaa0000003130L,
0xbb200000036b2L, 0xbb20000001932L,
0xbba00000032b3L, 0xbba00000009f2L,
0xbc200000016f5L, 0xbc20000002eb2L,
0xbca0000002c36L, 0xbca00000036b2L,
0xbd20000004df6L, 0xbd200000032b3L,
0xbda0000003037L, 0xbda00000016f5L,
0xbe20000004d37L, 0xbe20000002c36L,
0xbea00000038b9L, 0xbea0000004df6L,
0xbf20000001981L, 0xbf20000003037L,
0xbfa0000000aa1L, 0xbfa0000004d37L,
0xc020000005ea1L, 0xc0200000038b9L,
0xc0a0000003422L, 0xc0a0000001981L,
0xc120000001122L, 0xc120000000aa1L,
0xc1a0000003122L, 0xc1a0000005ea1L,
0xc2200000051e2L, 0xc220000003422L,
0xc2a0000001de4L, 0xc2a0000001122L,
0xc3200000041e4L, 0xc320000003122L,
0xc3a0000005126L, 0xc3a00000051e2L,
0xc420000004028L, 0xc420000001de4L,
0xc4a0000002d28L, 0xc4a00000041e4L,
0xc5200000031e8L, 0xc520000005126L,
0xc5a000000382aL, 0xc5a0000004028L,
0xc620000002debL, 0xc620000002d28L,
0xc6a00000055ebL, 0xc6a00000031e8L,
0xc7200000052abL, 0xc72000000382aL,
0xc7a000000182cL, 0xc7a0000002debL,
0xc820000001decL, 0xc8200000055ebL,
0xc8a0000002dedL, 0xc8a00000052abL,
0xc9200000035edL, 0xc92000000182cL,
0xc9a0000004eadL, 0xc9a0000001decL,
0xca20000002d30L, 0xca20000002dedL,
0xcaa0000002eb4L, 0xcaa00000035edL,
0xcb20000002022L, 0xcb20000004eadL,
0xcba00000011e2L, 0xcba0000002d30L,
0xcc200000031e2L, 0xcc20000002eb4L,
0xcca00000026a2L, 0xcca0000002022L,
0xcd20000002de7L, 0xcd200000011e2L,
0xcda0000003667L, 0xcda00000031e2L,
0xce20000004128L, 0xce200000026a2L,
0xcea0000002de8L, 0xcea0000002de7L,
0xcf200000052a8L, 0xcf20000003667L,
0xcfa00000050aaL, 0xcfa0000004128L,
0xd02000000182dL, 0xd020000002de8L,
0xd0a00000041edL, 0xd0a00000052a8L,
0xd120000004130L, 0xd1200000050aaL,
0xd1a00000052b0L, 0xd1a000000182dL,
0xd220000005eb2L, 0xd2200000041edL,
0xd2a00000019f4L, 0xd2a0000004130L,
0xd320000004136L, 0xd3200000052b0L,
0xd3a00000008b7L, 0xd3a0000005eb2L,
0xd4200000031f7L, 0xd4200000019f4L,
0xd4a0000004cb9L, 0xd4a0000004136L,
0xd520000003939L, 0xd5200000008b7L,
0xd5a00000051faL, 0xd5a00000031f7L,
0xd620000001d22L, 0xd620000004cb9L,
0xd6a0000002de2L, 0xd6a0000003939L,
0xd720000004023L, 0xd7200000051faL,
0xd7a00000031e3L, 0xd7a0000001d22L,
0xd820000002d26L, 0xd820000002de2L,
0xd8a00000021e7L, 0xd8a0000004023L,
0xd920000004e07L, 0xd9200000031e3L,
0xd9a00000032a7L, 0xd9a0000002d26L,
0xda200000041e8L, 0xda200000021e7L,
0xdaa00000039eaL, 0xdaa0000004e07L,
0xdb200000019ecL, 0xdb200000032a7L,
0xdba0000001830L, 0xdba00000041e8L,
0xdc200000041f0L, 0xdc200000039eaL,
0xdca0000000554L, 0xdca00000019ecL,
0xdd200000032b6L, 0xdd20000001830L,
0xdda0000004137L, 0xdda00000041f0L,
0xde20000005ea4L, 0xde20000000554L,
0xdea0000002de6L, 0xdea00000032b6L,
0xdf20000001927L, 0xdf20000004137L,
0xdfa0000004d49L, 0xdfa0000005ea4L,
0xe0200000019edL, 0xe020000002de6L,
0xe0a0000001eadL, 0xe0a0000001927L,
0xe120000001ab3L, 0xe120000004d49L,
0xe1a0000002934L, 0xe1a00000019edL,
0xe220000003515L, 0xe220000001eadL,
0xe2a0000002924L, 0xe2a0000001ab3L,
0xe3200000019e8L, 0xe320000002934L,
0xe3a00000022a8L, 0xe3a0000003515L,
0xe4200000042a8L, 0xe420000002924L,
0xe4a000000342aL, 0xe4a00000019e8L,
0xe52000000292cL, 0xe5200000022a8L,
0xe5a000000206fL, 0xe5a00000042a8L,
0xe6200000042b0L, 0xe62000000342aL,
0xe6a0000005df7L, 0xe6a000000292cL,
0xe7200000040b9L, 0xe72000000206fL,
0xe7a00000032baL, 0xe7a00000042b0L,
0xe820000001ea2L, 0xe820000005df7L,
0xe8a0000004c43L, 0xe8a00000040b9L,
0xe920000006427L, 0xe9200000032baL,
0xe9a000000402aL, 0xe9a0000001ea2L,
0xea200000008ebL, 0xea20000004c43L,
0xeaa0000000ab0L, 0xeaa0000006427L,
0xeb200000042a3L, 0xeb2000000402aL,
0xeba0000002927L, 0xeba00000008ebL,
0xec20000004eaaL, 0xec20000000ab0L,
0xeca00000060b4L, 0xeca00000042a3L,
0xed200000021eaL, 0xed20000002927L,
0xeda00000065e2L, 0xeda0000004eaaL,
0xee200000009eaL, 0xee200000060b4L,
0xeea00000006b1L, 0xeea00000021eaL,
0xef200000042b9L, 0xef200000065e2L,
0xefa0000006586L, 0xefa00000009eaL,
0xf02000000612dL, 0xf0200000006b1L,
0xf0a0000001aaaL, 0xf0a00000042b9L,
0xf120000006026L, 0xf120000006586L,
0xf1a00000061e2L, 0xf1a000000612dL,
0xf2200000061e3L, 0xf220000001aaaL,
0xf2a00000061e6L, 0xf2a0000006026L,
0xf320000000e98L 0xf3200000061e2L,
0xf3a00000061e3L,
0xf4200000061e6L,
0xf4a0000000e98L
}; };
} }
} }

View File

@@ -6,7 +6,7 @@ public final class DictDataL4 {
static final int LEN = 4; static final int LEN = 4;
static final int ROWS = 104; static final int ROWS = 104;
static final int COLS = 15; static final int COLS = 15;
static final int WORDS_LEN = 930; static final int WORDS_LEN = 932;
static final int POS_LEN = 1560; static final int POS_LEN = 1560;
private static long[] words() { private static long[] words() {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,7 @@ public final class DictDataL5 {
static final int LEN = 5; static final int LEN = 5;
static final int ROWS = 130; static final int ROWS = 130;
static final int COLS = 38; static final int COLS = 38;
static final int WORDS_LEN = 2394; static final int WORDS_LEN = 2399;
static final int POS_LEN = 4940; static final int POS_LEN = 4940;
private static long[] words() { private static long[] words() {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,7 @@ public final class DictDataL6 {
static final int LEN = 6; static final int LEN = 6;
static final int ROWS = 156; static final int ROWS = 156;
static final int COLS = 38; static final int COLS = 38;
static final int WORDS_LEN = 2381; static final int WORDS_LEN = 2382;
static final int POS_LEN = 5928; static final int POS_LEN = 5928;
private static long[] words() { private static long[] words() {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -5,9 +5,9 @@ public final class DictDataL7 {
static final int LEN = 7; static final int LEN = 7;
static final int ROWS = 182; static final int ROWS = 182;
static final int COLS = 98; static final int COLS = 99;
static final int WORDS_LEN = 6270; static final int WORDS_LEN = 6281;
static final int POS_LEN = 17836; static final int POS_LEN = 18018;
private static long[] words() { private static long[] words() {
return DictDataL7W0.get(); return DictDataL7W0.get();

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,7 @@ public final class DictDataL8 {
static final int LEN = 8; static final int LEN = 8;
static final int ROWS = 208; static final int ROWS = 208;
static final int COLS = 61; static final int COLS = 61;
static final int WORDS_LEN = 3879; static final int WORDS_LEN = 3889;
static final int POS_LEN = 12688; static final int POS_LEN = 12688;
private static long[] words() { private static long[] words() {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -0,0 +1,123 @@
package puzzle;
import lombok.val;
import puzzle.DictJavaGeneratorMulti.CsvIndexService;
import puzzle.Meta.ShardRec;
import puzzle.SwedishGenerator.Lemma;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.function.LongConsumer;
import static java.nio.charset.StandardCharsets.US_ASCII;
import static puzzle.SwedishGenerator.*;
public class BuildClueAndSimpelIndex {
public static void main(String[] args) throws Exception {
val records = buildDict(Path.of("nl_score_hints_v4.csv"));
buildShard(records);
for (var qRaw : List.of("FIETS", "huis", "kiwi")) {
var q = Meta.normWord(qRaw);
var w = Lemma.from(q);
var i = Meta.findIndexInMapMmap(Meta.shardMap, w);
System.out.println("\nQuery: " + qRaw + " (norm=" + q + ") w=" + w + " -> i=" + i);
if (i >= 0) {
var rec = Meta.readRecord(Meta.shardData, w, i);
System.out.println(" simpel=" + rec.simpel());
System.out.println(" clues=" + Arrays.toString(rec.clues()));
} else {
System.out.println(" NOT FOUND");
}
}
System.out.println("\nFiles written to: " + Meta.dir);
System.out.println(" " + Meta.shardData);
System.out.println(" " + Meta.shardMap);
} // --- Demo main ---
// --- Build demo files: shard.data + shard.map ---
static void buildShard(List<ShardRec> records) throws IOException {
records = new ArrayList<>(records);
// map is sorted by w; record index i == positie in deze gesorteerde lijst
records.sort(Comparator.comparingLong(ShardRec::w));
var n = records.size();
List<byte[]> recBytes = new ArrayList<>(n);
var offsets = new int[n];
var off = 0;
for (var i = 0; i < n; i++) {
var r = records.get(i);
// schrijf het echte woord weg + simpel + JSON hints
var line = r.word() + "\t" + r.simpel() + "\t" + Meta.GSON.toJson(r.clues());
var bytes = line.getBytes(StandardCharsets.UTF_8);
recBytes.add(bytes);
offsets[i] = off;
off += bytes.length;
}
var headerSize = 12L;
var tableSize = (long) n * 4L;
var dataStart = headerSize + tableSize;
try (var ch = FileChannel.open(Meta.shardData, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) {
var hdr = ByteBuffer.allocate(12).order(Meta.ORDER);
hdr.putInt(Meta.SHARD_MAGIC).putInt(Meta.VERSION).putInt(n);
hdr.flip();
ch.write(hdr);
var table = ByteBuffer.allocate(n * 4).order(Meta.ORDER);
for (var i = 0; i < n; i++) table.putInt(offsets[i]);
table.flip();
ch.write(table);
ch.position(dataStart);
for (var b : recBytes) ch.write(ByteBuffer.wrap(b));
}
try (var ch = FileChannel.open(Meta.shardMap, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) {
var hdr = ByteBuffer.allocate(12).order(Meta.ORDER);
hdr.putInt(Meta.MAP_MAGIC).putInt(Meta.VERSION).putInt(n);
hdr.flip();
ch.write(hdr);
var keys = ByteBuffer.allocate(n * 8).order(Meta.ORDER);
for (var r : records) keys.putLong(r.w());
keys.flip();
ch.write(keys);
}
}
private static List<ShardRec> buildDict(Path wordsPath) throws IOException {
var recs = new ArrayList<ShardRec>();
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
lines.forEach(line -> {
var parts = line.split(",", 4);
var word = parts[0].trim();
long w = SwedishGenerator.Lemma.from(word.getBytes(US_ASCII));
if (!word.equals(SwedishGenerator.Lemma.asWord(w, Export.BYTES.get()))) {
throw new RuntimeException();
}
var rawClue = parts[3].trim();
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
}
var clues = Meta.GSON.fromJson(rawClue, String[].class);
var simpel = Integer.parseInt(parts[CsvIndexService.SIMPEL_IDX].trim());
recs.add(new ShardRec(word, w, simpel, clues));
});
}
return recs;
}
}

View File

@@ -1,135 +0,0 @@
package puzzle;
import lombok.val;
import puzzle.DictJavaGeneratorMulti.CsvIndexService;
import puzzle.Meta.ShardLem;
import puzzle.Meta.ShardRec;
import puzzle.SwedishGenerator.Lemma;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.function.LongConsumer;
import static java.nio.charset.StandardCharsets.US_ASCII;
public class BuildMeta2 {
public static void lineToLemma(String line, LongConsumer ok) {
if (line.isBlank()) {
throw new RuntimeException("Empty line");
}
var parts = line.split(",", 4);
var word = parts[0].trim();
ok.accept(SwedishGenerator.Lemma.from(word.getBytes(US_ASCII)));
}
// --- Build demo files: shard.data + shard.map ---
static void buildShard(Path shardData, Path shardMap, List<ShardRec> records) throws IOException {
records = new ArrayList<>(records);
// map is sorted by w; record index i == positie in deze gesorteerde lijst
records.sort(Comparator.comparingLong(ShardRec::w));
int n = records.size();
List<byte[]> recBytes = new ArrayList<>(n);
int[] offsets = new int[n];
int off = 0;
for (int i = 0; i < n; i++) {
ShardRec r = records.get(i);
// schrijf het echte woord weg + simpel + JSON hints
String line = r.word() + "\t" + r.simpel() + "\t" + Meta.GSON.toJson(r.clues());
byte[] bytes = line.getBytes(StandardCharsets.UTF_8);
recBytes.add(bytes);
offsets[i] = off;
off += bytes.length;
}
long headerSize = 12L;
long tableSize = (long) n * 4L;
long dataStart = headerSize + tableSize;
try (FileChannel ch = FileChannel.open(shardData,
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) {
ByteBuffer hdr = ByteBuffer.allocate(12).order(Meta.ORDER);
hdr.putInt(Meta.SHARD_MAGIC).putInt(Meta.VERSION).putInt(n);
hdr.flip();
ch.write(hdr);
ByteBuffer table = ByteBuffer.allocate(n * 4).order(Meta.ORDER);
for (int i = 0; i < n; i++) table.putInt(offsets[i]);
table.flip();
ch.write(table);
ch.position(dataStart);
for (byte[] b : recBytes) ch.write(ByteBuffer.wrap(b));
}
try (FileChannel ch = FileChannel.open(shardMap,
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) {
ByteBuffer hdr = ByteBuffer.allocate(12).order(Meta.ORDER);
hdr.putInt(Meta.MAP_MAGIC).putInt(Meta.VERSION).putInt(n);
hdr.flip();
ch.write(hdr);
ByteBuffer keys = ByteBuffer.allocate(n * 8).order(Meta.ORDER);
for (ShardRec r : records) keys.putLong(r.w());
keys.flip();
ch.write(keys);
}
}
public static void main(String[] args) throws Exception {
val records = buildDict(Path.of("nl_score_hints_v4.csv"));
buildShard(Meta.shardData, Meta.shardMap, records);
for (String qRaw : List.of("FIETS", "huis", "kiwi")) {
String q = Meta.normWord(qRaw);
long w = Lemma.from(q);
int i = Meta.findIndexInMapMmap(Meta.shardMap, w);
System.out.println("\nQuery: " + qRaw + " (norm=" + q + ") w=" + w + " -> i=" + i);
if (i >= 0) {
ShardLem rec = Meta.readRecord(Meta.shardData, w, i);
System.out.println(" simpel=" + rec.simpel());
System.out.println(" clues=" + Arrays.toString(rec.clues()));
} else {
System.out.println(" NOT FOUND");
}
}
System.out.println("\nFiles written to: " + Meta.dir);
System.out.println(" " + Meta.shardData);
System.out.println(" " + Meta.shardMap);
} // --- Demo main ---
public static void main1(String[] args) throws Exception {
for (String qRaw : List.of("FIETS", "HUIS", "KIWI")) {
long w = Lemma.from(qRaw) | (3897L << 43L);
ShardLem rec = Meta.lookup(w);
System.out.println(rec);
}
}
private static List<ShardRec> buildDict(Path wordsPath) throws IOException {
var recs = new ArrayList<ShardRec>();
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
lines.forEach(line -> {
lineToLemma(line, w -> {
String word = SwedishGenerator.Lemma.asWord(w, Export.BYTES.get());
String[] clues = CsvIndexService.lineToClue(line);
int simpel = CsvIndexService.lineToSimpel(line);
recs.add(new ShardRec(word, w, simpel, clues));
});
});
}
return recs;
}
}

View File

@@ -5,7 +5,6 @@ import lombok.Getter;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import lombok.experimental.Accessors; import lombok.experimental.Accessors;
import lombok.val; import lombok.val;
import puzzle.DictJavaGeneratorMulti.DictEntryDTO.IntListDTO;
import puzzle.SwedishGenerator.Dict; import puzzle.SwedishGenerator.Dict;
import puzzle.SwedishGenerator.DictEntry; import puzzle.SwedishGenerator.DictEntry;
import puzzle.SwedishGenerator.Lemma; import puzzle.SwedishGenerator.Lemma;
@@ -16,14 +15,14 @@ public final class DictJavaGeneratorMulti {
// Smaller = more files, but safer for javac/class limits. // Smaller = more files, but safer for javac/class limits.
private static final int WORDS_CHUNK = 8_192; private static final int WORDS_CHUNK = 8_192;
private static final int POS_CHUNK = 8_192; private static final int POS_CHUNK = 8_192;
public static final int THRESS = 800;
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v4.csv"); Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v4.csv");
Path outDir = Path.of(args.length > 1 ? args[1] : "src/main/generated-sources/puzzle/dict" + THRESS); Path outDir = Path.of(args.length > 1 ? args[1] : "src/main/generated-sources/puzzle/dict" + THRESS);
String pkg = "puzzle.dict" + THRESS; String pkg = "puzzle.dict" + THRESS;
HashMap<String, ShardBuilder> builders = new HashMap<String, ShardBuilder>(16); HashMap<String, ShardBuilder> builders = new HashMap<String, ShardBuilder>(16);
SwedishGenerator.Dict dict = buildDict(wordsFile, builders); SwedishGenerator.Dict dict = buildDict(wordsFile, builders, THRESS);
Files.createDirectories(outDir); Files.createDirectories(outDir);
@@ -41,34 +40,30 @@ public final class DictJavaGeneratorMulti {
System.out.println("Generated sources into: " + outDir.toAbsolutePath()); System.out.println("Generated sources into: " + outDir.toAbsolutePath());
} }
public static final int THRESS = 800;
static String shardKey(long word) { static String shardKey(long word) {
return ""+Lemma.unpackSize(word) + 1; return "" + Lemma.unpackSize(word) + 1;
} }
private static SwedishGenerator.Dict buildDict(Path wordsPath, HashMap<String, ShardBuilder> builders) throws IOException { private static SwedishGenerator.Dict buildDict(Path wordsPath, HashMap<String, ShardBuilder> builders, int thress) throws IOException {
var map = new LongArrayList(100_000); var map = new LongArrayList(100_000);
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) { try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
lines.forEach(line -> { lines.forEach(line -> {
CsvIndexService.lineToLemma(line, w -> { var parts = line.split(",", 4);
String word = Lemma.asWord(w, Export.BYTES.get()); var word = parts[0].trim();
String[] clues = CsvIndexService.lineToClue(line); long w = SwedishGenerator.Lemma.from(word.getBytes(US_ASCII));
int simpel = CsvIndexService.lineToSimpel(line); if (!word.equals(SwedishGenerator.Lemma.asWord(w, Export.BYTES.get()))) {
throw new RuntimeException();
// serialize to: WORD \t JSON \n }
// (als je al JSON string wilt bewaren: gebruik Gson.toJson(clues)) int score = Integer.parseInt(parts[1].trim());
String json = Meta.GSON.toJson(clues); var simpel = Integer.parseInt(parts[CsvIndexService.SIMPEL_IDX].trim());
String recStr = word + "\t" + simpel + "\t" + json + "\n"; if (score < 1 || simpel > thress) {
byte[] rec = recStr.getBytes(StandardCharsets.UTF_8); if (Main.VERBOSE) System.err.println("Word too complex: " + line);
return;
var key = shardKey(w); }
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
try {
map.add(Lemma.pack(w, sb.addRecord(rec)));
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
var key = shardKey(w);
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
map.add(Lemma.pack(w, sb.addRecord()));
}); });
} }
return Dicts.makeDict(map.toArray()); return Dicts.makeDict(map.toArray());
@@ -114,13 +109,10 @@ public final class DictJavaGeneratorMulti {
static final class ShardBuilder { static final class ShardBuilder {
final IntListDTO offsets = new IntListDTO(4096); int c;
final ByteArrayOutputStream data = new ByteArrayOutputStream(1 << 20); // grows int addRecord() {
int addRecord(byte[] rec) throws IOException { val currSize = c;
var size = data.size(); c++;
val currSize = offsets.size();
offsets.add(size);
data.write(rec);
return currSize; return currSize;
} }
} }