introduce bitloops

This commit is contained in:
mike
2026-01-20 09:47:55 +01:00
parent a764f45041
commit 8b7827cfc2
10 changed files with 252 additions and 94 deletions

View File

@@ -1,59 +1,127 @@
package puzzle;
import module java.base;
import com.google.gson.Gson;
import lombok.val;
import puzzle.SwedishGenerator.Lemma;
import static puzzle.SwedishGenerator.THRESS;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.Locale;
public class Meta {
static final Gson GSON = new Gson();
private static final int VERSION = 1;
static record ShardLem(long w, int simpel, String[] clues) { }
static final Gson GSON = new Gson();
static final int VERSION = 1;
static final int SHARD_MAGIC = 0x49445831; // "IDX1"
static ShardLem readRecord(Path shardFile, int i) {
static final int MAP_MAGIC = 0x4D415031; // "MAP1"
static final ByteOrder ORDER = ByteOrder.BIG_ENDIAN;
static record ShardRec(String word, long w, int simpel, String[] clues) { }
static final Path projectRoot = Path.of("").toAbsolutePath().normalize(); // current working dir
static final Path dir = projectRoot.resolve("src/main/resources/shards");
static final Path shardData = dir.resolve("shard0.data");
static final Path shardMap = dir.resolve("shard0.map");
static String normWord(String s) {
// belangrijk: zelfde normalisatie bij build en query
return s.toUpperCase(Locale.ROOT);
}
// --- Lookup: w -> i using mmap ---
static int findIndexInMapMmap(Path mapFile, long target) throws IOException {
try (FileChannel ch = FileChannel.open(mapFile, StandardOpenOption.READ)) {
MappedByteBuffer mbb = (MappedByteBuffer) ch.map(FileChannel.MapMode.READ_ONLY, 0, ch.size()).order(ORDER);
int magic = mbb.getInt(0);
int ver = mbb.getInt(4);
int n = mbb.getInt(8);
if (magic != MAP_MAGIC || ver != VERSION) throw new IOException("Bad map file");
int lo = 0, hi = n - 1;
while (lo <= hi) {
int mid = (lo + hi) >>> 1;
int off = 12 + mid * 8;
long key = mbb.getLong(off);
if (key < target) lo = mid + 1;
else if (key > target) hi = mid - 1;
else return mid;
}
return -1;
}
}
// --- Read record i from shard.data (your format) ---
static ShardLem readRecord(Path shardFile, int i) throws IOException {
try (FileChannel ch = FileChannel.open(shardFile, StandardOpenOption.READ)) {
ByteBuffer hdr = ByteBuffer.allocate(12);
ByteBuffer hdr = ByteBuffer.allocate(12).order(ORDER);
ch.read(hdr);
hdr.flip();
int magic = hdr.getInt();
int ver = hdr.getInt();
int n = hdr.getInt();
if (magic != SHARD_MAGIC || ver != VERSION) throw new IOException("Bad shard");
if (magic != SHARD_MAGIC || ver != VERSION) throw new IOException("Bad shard file");
if (i < 0 || i >= n) throw new IndexOutOfBoundsException();
long tableStart = 12L;
long dataStart = 12L + (long) n * 4L;
int offI = readIntAt(ch, tableStart + (long) i * 4L);
int offIp = (i + 1 < n) ? readIntAt(ch, tableStart + (long) (i + 1) * 4L)
: (int) (ch.size() - dataStart);
int offIp = (i + 1 < n)
? readIntAt(ch, tableStart + (long) (i + 1) * 4L)
: (int) (ch.size() - dataStart);
int len = offIp - offI;
ByteBuffer buf = ByteBuffer.allocate(len);
ch.position(dataStart + offI);
ch.read(buf);
buf.flip();
var string = StandardCharsets.UTF_8.decode(buf).toString();
val parts = string.split("\t", 3);
return new ShardLem(Lemma.from(parts[0]), Integer.parseInt(parts[1]), GSON.fromJson(parts[2], String[].class));
} catch (Exception e) {
return new ShardLem(Lemma.from("XXX"), -1, new String[0]);
String s = StandardCharsets.UTF_8.decode(buf).toString();
String[] parts = s.split("\t", 3);
long w = Lemma.from(normWord(parts[0]));
int simpel = Integer.parseInt(parts[1]);
String[] clues = GSON.fromJson(parts[2], String[].class);
return new ShardLem(w, simpel, clues);
}
}
static final Path[] SHARDS = IntStream.range(0, 10).mapToObj(sId -> Path.of("src/main/generated-sources/puzzle" + (THRESS == 0 ? "" : "/dict" + THRESS)).resolve(sId + ".idx"))
.toArray(
Path[]::new);
static Path shardKey(long word) {
return SHARDS[Lemma.unpackSize(word) + 1];
}
static int readIntAt(FileChannel ch, long pos) throws IOException {
ByteBuffer b = ByteBuffer.allocate(4);
ByteBuffer b = ByteBuffer.allocate(4).order(ORDER);
ch.position(pos);
ch.read(b);
b.flip();
return b.getInt();
}
// --- Demo main ---
public static ShardLem lookup(long w) {
try {
int i = findIndexInMapMmap(shardMap, Lemma.pack43(w));
val qRaw = Lemma.asWord(w, new byte[8]);
System.out.println("\nQuery: " + qRaw + " w=" + w + " -> i=" + i);
if (i >= 0) {
ShardLem rec = readRecord(shardData, i);
System.out.println(" simpel=" + rec.simpel());
System.out.println(" clues=" + Arrays.toString(rec.clues()));
return rec;
} else {
System.out.println(" NOT FOUND");
throw new RuntimeException("NOT FOUND");
}
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
public record ShardLem(long w, int simpel, String[] clues) { }
}