introduce bitloops
This commit is contained in:
@@ -300,12 +300,16 @@ public record Export() {
|
|||||||
var index = new DictEntryDTO[SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE];
|
var index = new DictEntryDTO[SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE];
|
||||||
Arrays.setAll(index, DictEntryDTO::new);
|
Arrays.setAll(index, DictEntryDTO::new);
|
||||||
for (var lemma : wordz) {
|
for (var lemma : wordz) {
|
||||||
var L = Lemma.length(lemma);
|
var L = Lemma.unpackSize(lemma) + 1;//Lemma.unpackSize(lemma) + 2;
|
||||||
|
val entry = index[L];
|
||||||
var entry = index[L];
|
val idx = entry.words().size();
|
||||||
var idx = entry.words().size();
|
val pos = entry.pos();
|
||||||
entry.words().add(lemma);
|
entry.words().add(lemma);
|
||||||
for (var i = 0; i < L; i++) entry.pos()[i][Lemma.byteAt(lemma, i) - 1].add(idx);
|
int i = 0;
|
||||||
|
for (long w = lemma & Lemma.LETTER_MASK; w != 0; w >>>= 5, i++) {
|
||||||
|
pos[i][(int) ((w & 31) - 1)].add(idx);
|
||||||
|
}
|
||||||
|
// for (i = 0; i < L; i++) entry.pos()[i][Lemma.byteAt(lemma, i) - 1].add(idx);
|
||||||
}
|
}
|
||||||
for (int i = SwedishGenerator.MIN_LEN; i < index.length; i++) if (index[i].words().size() <= 0) throw new RuntimeException("No words for length " + i);
|
for (int i = SwedishGenerator.MIN_LEN; i < index.length; i++) if (index[i].words().size() <= 0) throw new RuntimeException("No words for length " + i);
|
||||||
return new Dict(Arrays.stream(index).map(i -> {
|
return new Dict(Arrays.stream(index).map(i -> {
|
||||||
|
|||||||
@@ -378,8 +378,8 @@ public record Masker(Rng rng, int[] stack, Clues cache) {
|
|||||||
|
|
||||||
for (var k = 0; k < offspring; k++) {
|
for (var k = 0; k < offspring; k++) {
|
||||||
if (Thread.currentThread().isInterrupted()) break;
|
if (Thread.currentThread().isInterrupted()) break;
|
||||||
var p1 = pop.get(rng.randint(pop.size() - 1));
|
var p1 = rng.rand(pop);
|
||||||
var p2 = pop.get(rng.randint(pop.size() - 1));
|
var p2 = rng.rand(pop);
|
||||||
var child = crossover(p1.grid, p2.grid);
|
var child = crossover(p1.grid, p2.grid);
|
||||||
children.add(new GridAndFit(hillclimb(child, clueSize, 70)));
|
children.add(new GridAndFit(hillclimb(child, clueSize, 70)));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -43,24 +43,16 @@ public class Meta {
|
|||||||
buf.flip();
|
buf.flip();
|
||||||
var string = StandardCharsets.UTF_8.decode(buf).toString();
|
var string = StandardCharsets.UTF_8.decode(buf).toString();
|
||||||
val parts = string.split("\t", 3);
|
val parts = string.split("\t", 3);
|
||||||
return new ShardLem(Lemma.pack(parts[0]), Integer.parseInt(parts[1]), GSON.fromJson(parts[2], String[].class));
|
return new ShardLem(Lemma.from(parts[0]), Integer.parseInt(parts[1]), GSON.fromJson(parts[2], String[].class));
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
return new ShardLem(Lemma.pack("XXX"), -1, new String[0]);
|
return new ShardLem(Lemma.from("XXX"), -1, new String[0]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
static final Path[] SHARDS = IntStream.range(0, 10).mapToObj(sId -> Path.of("src/main/generated-sources/puzzle").resolve(sId + ".idx")).toArray(
|
static final Path[] SHARDS = IntStream.range(0, 10).mapToObj(sId -> Path.of("src/main/generated-sources/puzzle").resolve(sId + ".idx")).toArray(
|
||||||
Path[]::new);
|
Path[]::new);
|
||||||
static Path shardKey(long word) {
|
static Path shardKey(long word) {
|
||||||
int L = Lemma.length(word);
|
return SHARDS[Lemma.unpackSize(word) + 1];
|
||||||
return SHARDS[L];
|
|
||||||
}
|
|
||||||
static String shardKey(String word) {
|
|
||||||
int L = word.length();
|
|
||||||
char ch = word.charAt(0);
|
|
||||||
if (ch < 'A' || ch > 'Z') ch = '_';
|
|
||||||
///return "" + L + ch; // e.g. "6Z"
|
|
||||||
return "" + L; // e.g. "6Z"
|
|
||||||
}
|
}
|
||||||
static int readIntAt(FileChannel ch, long pos) throws IOException {
|
static int readIntAt(FileChannel ch, long pos) throws IOException {
|
||||||
ByteBuffer b = ByteBuffer.allocate(4);
|
ByteBuffer b = ByteBuffer.allocate(4);
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import lombok.experimental.Delegate;
|
|||||||
import lombok.val;
|
import lombok.val;
|
||||||
import precomp.Neighbors9x8;
|
import precomp.Neighbors9x8;
|
||||||
import precomp.Neighbors9x8.rci;
|
import precomp.Neighbors9x8.rci;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import static java.lang.Long.*;
|
import static java.lang.Long.*;
|
||||||
import static java.lang.Long.numberOfTrailingZeros;
|
import static java.lang.Long.numberOfTrailingZeros;
|
||||||
@@ -33,7 +34,6 @@ import static java.nio.charset.StandardCharsets.US_ASCII;
|
|||||||
@SuppressWarnings("ALL")
|
@SuppressWarnings("ALL")
|
||||||
public record SwedishGenerator() {
|
public record SwedishGenerator() {
|
||||||
|
|
||||||
public static final long GT_1_OFFSET_53_BIT = 0x3E00000000000000L;
|
|
||||||
public static final long X = 0L;
|
public static final long X = 0L;
|
||||||
public static final int LOG_EVERY_MS = 200;
|
public static final int LOG_EVERY_MS = 200;
|
||||||
public static final int BAR_LEN = 22;
|
public static final int BAR_LEN = 22;
|
||||||
@@ -55,13 +55,12 @@ public record SwedishGenerator() {
|
|||||||
public static final byte DASH = (byte) C_DASH;
|
public static final byte DASH = (byte) C_DASH;
|
||||||
public static final long RANGE_0_SIZE = (long) SIZE_MIN_1 - 0L + 1L;
|
public static final long RANGE_0_SIZE = (long) SIZE_MIN_1 - 0L + 1L;
|
||||||
public static final long RANGE_0_624 = 624L - 0L + 1L;
|
public static final long RANGE_0_624 = 624L - 0L + 1L;
|
||||||
public static final int CLUE_INDEX_MAX_SIZE = (288 | 3) + 1;
|
|
||||||
public static int clamp(int x, int a, int b) { return Math.max(a, Math.min(b, x)); }
|
public static int clamp(int x, int a, int b) { return Math.max(a, Math.min(b, x)); }
|
||||||
interface Bit1029 {
|
interface Bit1029 {
|
||||||
|
|
||||||
static long[] bit1029() { return new long[2048]; }
|
static long[] bit1029() { return new long[2048]; }
|
||||||
private static int wordIndex(int bitIndex) { return bitIndex >> 6; }
|
private static int wordIndex(int bitIndex) { return bitIndex >> 6; }
|
||||||
static boolean get(long[] bits, int bitIndex) { return (bits[wordIndex(bitIndex)] & 1L << bitIndex) != 0L; }
|
static boolean get(long[] bits, int bitIndex) { return (bits[wordIndex(bitIndex)] & 1L << bitIndex) != X; }
|
||||||
static void set(long[] bits, int bitIndex) { bits[wordIndex(bitIndex)] |= 1L << bitIndex; }
|
static void set(long[] bits, int bitIndex) { bits[wordIndex(bitIndex)] |= 1L << bitIndex; }
|
||||||
static void clear(long[] bits, int bitIndex) { bits[wordIndex(bitIndex)] &= ~(1L << bitIndex); }
|
static void clear(long[] bits, int bitIndex) { bits[wordIndex(bitIndex)] &= ~(1L << bitIndex); }
|
||||||
}
|
}
|
||||||
@@ -120,7 +119,9 @@ public record SwedishGenerator() {
|
|||||||
}*/
|
}*/
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
public int randint(int max) { return (int) (((nextU32() & 0xFFFFFFFFL) % ((long) max - 0L + 1L))); }
|
public <T> T rand(T[] p) { return p[(int) (((nextU32() & 0xFFFFFFFFL) % ((long) p.length /*- 0L*/ /*+ 1L*/)))]; }
|
||||||
|
public <T> T rand(List<T> p) { return p.get((int) (((nextU32() & 0xFFFFFFFFL) % ((long) p.size() /*- 0L*/ /*+ 1L*/)))); }
|
||||||
|
public int randint(int max) { return (int) (((nextU32() & 0xFFFFFFFFL) % ((long) max /*- 0L*/ /*+ 1L*/))); }
|
||||||
public int randint0_SIZE() { return (int) (((nextU32() & 0xFFFFFFFFL) % RANGE_0_SIZE)); }
|
public int randint0_SIZE() { return (int) (((nextU32() & 0xFFFFFFFFL) % RANGE_0_SIZE)); }
|
||||||
public int randint0_624() { return (int) (((nextU32() & 0xFFFFFFFFL) % RANGE_0_624)); }
|
public int randint0_624() { return (int) (((nextU32() & 0xFFFFFFFFL) % RANGE_0_624)); }
|
||||||
public double nextFloat() { return (nextU32() & 0xFFFFFFFFL) / 4294967295.0; }
|
public double nextFloat() { return (nextU32() & 0xFFFFFFFFL) / 4294967295.0; }
|
||||||
@@ -142,26 +143,34 @@ public record SwedishGenerator() {
|
|||||||
static final long LETTER_MASK = (1L << 40) - 1; // low 40 bits
|
static final long LETTER_MASK = (1L << 40) - 1; // low 40 bits
|
||||||
static final long INDEX_MASK = (1L << 24) - 1; // 24 bits
|
static final long INDEX_MASK = (1L << 24) - 1; // 24 bits
|
||||||
|
|
||||||
static long pack(String word) { return pack(word.getBytes(US_ASCII)); }
|
static long from(byte[] word) { return packShiftIn(word) | ((long) (word.length - 1) << 40); }
|
||||||
static long packW(byte[] b) { return pack(b) /*| ((long) index << 40)*/; }
|
static long pack(long w, int shardIndex) { return w | (((long) shardIndex) << 43) | ((long) length0(w)) << 40; }
|
||||||
static long pack(byte[] b) {
|
/* static long pack(byte[] b) {
|
||||||
long w = 0;
|
long w = 0;
|
||||||
for (var i = 0; i < b.length; i++) w |= ((long) b[i] & 31) << (i * 5);
|
for (var i = 0; i < b.length; i++) w |= ((long) b[i] & 31) << (i * 5);
|
||||||
return w;
|
return w;
|
||||||
|
}*/
|
||||||
|
static long packShiftIn(byte[] b) {
|
||||||
|
long w = 0;
|
||||||
|
for (int i = b.length - 1; i >= 0; i--) w = (w << 5) | ((long) b[i] & 31);
|
||||||
|
return w;
|
||||||
}
|
}
|
||||||
static public long from(String word) { return packW(word.getBytes(US_ASCII)); }
|
static public long from(String word) { return packShiftIn(word.getBytes(US_ASCII)) | ((long) (word.length() - 1) << 40); }
|
||||||
static byte byteAt(long word, int idx) { return (byte) ((word >>> (idx * 5)) & 0b11111); }
|
static byte byteAt(long word, int idx) { return (byte) ((word >>> ((long) idx * 5L)) & 0b11111L); }
|
||||||
static int length(long word) { return ((63 - numberOfLeadingZeros(word & LETTER_MASK)) / 5) + 1; }
|
|
||||||
static int length0(long word) { return ((63 - numberOfLeadingZeros(word & LETTER_MASK)) / 5); }
|
static int length0(long word) { return ((63 - numberOfLeadingZeros(word & LETTER_MASK)) / 5); }
|
||||||
static ThreadLocal<byte[]> BYTES = ThreadLocal.withInitial(() -> new byte[MAX_WORD_LENGTH]);
|
static ThreadLocal<byte[]> BYTES = ThreadLocal.withInitial(() -> new byte[MAX_WORD_LENGTH]);
|
||||||
public static String asWord(long word) {
|
public static String asWord(long word) {
|
||||||
val len = Lemma.length(word);
|
var b = BYTES.get();
|
||||||
var b = BYTES.get();//new byte[Lemma.length(word)];
|
int bi = 0;
|
||||||
for (int i = 0, bi = 0; i < len * 5; bi++, i += 5) b[bi] = (byte) (((word >>> i) & 31) | 64);
|
|
||||||
return new String(b, 0, 0, len);
|
for (long w = word & LETTER_MASK; w != 0; w >>>= 5) {
|
||||||
|
b[bi++] = (byte) ((w & 31) | 64); // neem laagste 5 bits
|
||||||
|
}
|
||||||
|
return new String(b, 0, bi, US_ASCII);
|
||||||
}
|
}
|
||||||
static int unpackIndex(long w) { return (int) (w >>> 40); }
|
static int unpackIndex(long w) { return (int) (w >>> 40); }
|
||||||
static int unpackShardIndex(long w) { return (int) (w >>> 43); }
|
static int unpackShardIndex(long w) { return (int) (w >>> 43); }
|
||||||
|
static int unpackSize(long w) { return (int) (w >>> 40) & 7; }
|
||||||
static int unpackLetters(long w) { return (int) (w & LETTER_MASK); }
|
static int unpackLetters(long w) { return (int) (w & LETTER_MASK); }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -33,6 +33,6 @@ public final class CsvIndexService {
|
|||||||
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
|
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ok.accept(Lemma.packW(word.getBytes(US_ASCII)));
|
ok.accept(Lemma.from(word.getBytes(US_ASCII)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,8 +46,6 @@ public final class DictJavaGeneratorMulti {
|
|||||||
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
|
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
|
||||||
lines.forEach(line -> {
|
lines.forEach(line -> {
|
||||||
CsvIndexService.lineToLemma(line, w -> {
|
CsvIndexService.lineToLemma(line, w -> {
|
||||||
long len = Lemma.length0(w);
|
|
||||||
|
|
||||||
String word = Lemma.asWord(w);
|
String word = Lemma.asWord(w);
|
||||||
String[] clues = CsvIndexService.lineToClue(line);
|
String[] clues = CsvIndexService.lineToClue(line);
|
||||||
int simpel = CsvIndexService.lineToSimpel(line);
|
int simpel = CsvIndexService.lineToSimpel(line);
|
||||||
@@ -61,8 +59,7 @@ public final class DictJavaGeneratorMulti {
|
|||||||
var key = Meta.shardKey(w);
|
var key = Meta.shardKey(w);
|
||||||
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
|
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
|
||||||
try {
|
try {
|
||||||
long index = ((long) sb.addRecord(rec) << 3) | len;
|
map.add(Lemma.pack(w, sb.addRecord(rec)));
|
||||||
map.add(w | (index << 40));
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new UncheckedIOException(e);
|
throw new UncheckedIOException(e);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -151,7 +151,7 @@ public class ExportFormatTest {
|
|||||||
// These words are known to be in the CSV and likely in the dictionary
|
// These words are known to be in the CSV and likely in the dictionary
|
||||||
String[] testWords = { "EEN", "NAAR", "IEDEREEN" };
|
String[] testWords = { "EEN", "NAAR", "IEDEREEN" };
|
||||||
for (String wStr : testWords) {
|
for (String wStr : testWords) {
|
||||||
long w = Lemma.pack(wStr);
|
long w = Lemma.from(wStr);
|
||||||
int L = wStr.length();
|
int L = wStr.length();
|
||||||
var entry = DictData.DICT.index()[L];
|
var entry = DictData.DICT.index()[L];
|
||||||
if (entry == null) continue;
|
if (entry == null) continue;
|
||||||
|
|||||||
@@ -48,15 +48,6 @@ public class MainTest {
|
|||||||
this.verbose = false;
|
this.verbose = false;
|
||||||
}};
|
}};
|
||||||
static final Dict dict = DictData.DICT;//loadDict(opts.wordsPath);
|
static final Dict dict = DictData.DICT;//loadDict(opts.wordsPath);
|
||||||
public static Dict loadDict(String wordsPath) {
|
|
||||||
var map = new LongArrayList(100_000);
|
|
||||||
try (var lines = Files.lines(Path.of(wordsPath), StandardCharsets.UTF_8)) {
|
|
||||||
lines.forEach(line -> CsvIndexService.lineToLemma(line, map::add));
|
|
||||||
return Dicts.makeDict(map.toArray());
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("Failed to load dictionary from " + wordsPath, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@Test
|
@Test
|
||||||
void testExtractSlots() {
|
void testExtractSlots() {
|
||||||
|
|
||||||
|
|||||||
@@ -187,7 +187,7 @@ public class SwedishGeneratorTest {
|
|||||||
assertEquals(val1, rng2.nextU32());
|
assertEquals(val1, rng2.nextU32());
|
||||||
|
|
||||||
for (var i = 0; i < 100; i++) {
|
for (var i = 0; i < 100; i++) {
|
||||||
var r = rng.randint(5);
|
var r = rng.randint(6);
|
||||||
assertTrue(r >= 0 && r <= 5);
|
assertTrue(r >= 0 && r <= 5);
|
||||||
var f = rng.nextFloat();
|
var f = rng.nextFloat();
|
||||||
assertTrue(f >= 0.0 && f <= 1.0);
|
assertTrue(f >= 0.0 && f <= 1.0);
|
||||||
@@ -217,8 +217,8 @@ public class SwedishGeneratorTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testLemmaAndDict() {
|
void testLemmaAndDict() {
|
||||||
Assertions.assertEquals(Lemma.pack("APPLE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(l1));
|
Assertions.assertEquals(Lemma.packShiftIn("APPLE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(l1));
|
||||||
assertEquals(5, Lemma.length(l1));
|
assertEquals(4, Lemma.unpackSize(l1));
|
||||||
assertEquals(LETTER_A, Lemma.byteAt(l1, 0));
|
assertEquals(LETTER_A, Lemma.byteAt(l1, 0));
|
||||||
|
|
||||||
var dict = Dicts.makeDict(new long[]{ l1, l2, l2a, l4a, l6a, l7a, l8a });
|
var dict = Dicts.makeDict(new long[]{ l1, l2, l2a, l4a, l6a, l7a, l8a });
|
||||||
@@ -228,7 +228,7 @@ public class SwedishGeneratorTest {
|
|||||||
|
|
||||||
var entry3 = dict.index()[3];
|
var entry3 = dict.index()[3];
|
||||||
assertEquals(1, entry3.words().length);
|
assertEquals(1, entry3.words().length);
|
||||||
assertEquals(Lemma.pack("AXE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(entry3.words()[0]));
|
assertEquals(Lemma.packShiftIn("AXE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(entry3.words()[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|||||||
Reference in New Issue
Block a user