introduce bitloops
This commit is contained in:
@@ -1,59 +1,127 @@
|
||||
package puzzle;
|
||||
|
||||
import module java.base;
|
||||
import com.google.gson.Gson;
|
||||
import lombok.val;
|
||||
import puzzle.SwedishGenerator.Lemma;
|
||||
import static puzzle.SwedishGenerator.THRESS;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.MappedByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.Arrays;
|
||||
import java.util.Locale;
|
||||
|
||||
public class Meta {
|
||||
|
||||
static final Gson GSON = new Gson();
|
||||
private static final int VERSION = 1;
|
||||
|
||||
static record ShardLem(long w, int simpel, String[] clues) { }
|
||||
static final Gson GSON = new Gson();
|
||||
static final int VERSION = 1;
|
||||
|
||||
static final int SHARD_MAGIC = 0x49445831; // "IDX1"
|
||||
static ShardLem readRecord(Path shardFile, int i) {
|
||||
static final int MAP_MAGIC = 0x4D415031; // "MAP1"
|
||||
|
||||
static final ByteOrder ORDER = ByteOrder.BIG_ENDIAN;
|
||||
|
||||
static record ShardRec(String word, long w, int simpel, String[] clues) { }
|
||||
|
||||
static final Path projectRoot = Path.of("").toAbsolutePath().normalize(); // current working dir
|
||||
static final Path dir = projectRoot.resolve("src/main/resources/shards");
|
||||
static final Path shardData = dir.resolve("shard0.data");
|
||||
static final Path shardMap = dir.resolve("shard0.map");
|
||||
static String normWord(String s) {
|
||||
// belangrijk: zelfde normalisatie bij build en query
|
||||
return s.toUpperCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
// --- Lookup: w -> i using mmap ---
|
||||
static int findIndexInMapMmap(Path mapFile, long target) throws IOException {
|
||||
try (FileChannel ch = FileChannel.open(mapFile, StandardOpenOption.READ)) {
|
||||
MappedByteBuffer mbb = (MappedByteBuffer) ch.map(FileChannel.MapMode.READ_ONLY, 0, ch.size()).order(ORDER);
|
||||
|
||||
int magic = mbb.getInt(0);
|
||||
int ver = mbb.getInt(4);
|
||||
int n = mbb.getInt(8);
|
||||
if (magic != MAP_MAGIC || ver != VERSION) throw new IOException("Bad map file");
|
||||
|
||||
int lo = 0, hi = n - 1;
|
||||
while (lo <= hi) {
|
||||
int mid = (lo + hi) >>> 1;
|
||||
int off = 12 + mid * 8;
|
||||
long key = mbb.getLong(off);
|
||||
|
||||
if (key < target) lo = mid + 1;
|
||||
else if (key > target) hi = mid - 1;
|
||||
else return mid;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// --- Read record i from shard.data (your format) ---
|
||||
static ShardLem readRecord(Path shardFile, int i) throws IOException {
|
||||
try (FileChannel ch = FileChannel.open(shardFile, StandardOpenOption.READ)) {
|
||||
ByteBuffer hdr = ByteBuffer.allocate(12);
|
||||
ByteBuffer hdr = ByteBuffer.allocate(12).order(ORDER);
|
||||
ch.read(hdr);
|
||||
hdr.flip();
|
||||
int magic = hdr.getInt();
|
||||
int ver = hdr.getInt();
|
||||
int n = hdr.getInt();
|
||||
if (magic != SHARD_MAGIC || ver != VERSION) throw new IOException("Bad shard");
|
||||
if (magic != SHARD_MAGIC || ver != VERSION) throw new IOException("Bad shard file");
|
||||
if (i < 0 || i >= n) throw new IndexOutOfBoundsException();
|
||||
|
||||
long tableStart = 12L;
|
||||
long dataStart = 12L + (long) n * 4L;
|
||||
|
||||
int offI = readIntAt(ch, tableStart + (long) i * 4L);
|
||||
int offIp = (i + 1 < n) ? readIntAt(ch, tableStart + (long) (i + 1) * 4L)
|
||||
: (int) (ch.size() - dataStart);
|
||||
int offIp = (i + 1 < n)
|
||||
? readIntAt(ch, tableStart + (long) (i + 1) * 4L)
|
||||
: (int) (ch.size() - dataStart);
|
||||
|
||||
int len = offIp - offI;
|
||||
ByteBuffer buf = ByteBuffer.allocate(len);
|
||||
ch.position(dataStart + offI);
|
||||
ch.read(buf);
|
||||
buf.flip();
|
||||
var string = StandardCharsets.UTF_8.decode(buf).toString();
|
||||
val parts = string.split("\t", 3);
|
||||
return new ShardLem(Lemma.from(parts[0]), Integer.parseInt(parts[1]), GSON.fromJson(parts[2], String[].class));
|
||||
} catch (Exception e) {
|
||||
return new ShardLem(Lemma.from("XXX"), -1, new String[0]);
|
||||
|
||||
String s = StandardCharsets.UTF_8.decode(buf).toString();
|
||||
String[] parts = s.split("\t", 3);
|
||||
|
||||
long w = Lemma.from(normWord(parts[0]));
|
||||
int simpel = Integer.parseInt(parts[1]);
|
||||
String[] clues = GSON.fromJson(parts[2], String[].class);
|
||||
return new ShardLem(w, simpel, clues);
|
||||
}
|
||||
}
|
||||
static final Path[] SHARDS = IntStream.range(0, 10).mapToObj(sId -> Path.of("src/main/generated-sources/puzzle" + (THRESS == 0 ? "" : "/dict" + THRESS)).resolve(sId + ".idx"))
|
||||
.toArray(
|
||||
Path[]::new);
|
||||
static Path shardKey(long word) {
|
||||
return SHARDS[Lemma.unpackSize(word) + 1];
|
||||
}
|
||||
|
||||
static int readIntAt(FileChannel ch, long pos) throws IOException {
|
||||
ByteBuffer b = ByteBuffer.allocate(4);
|
||||
ByteBuffer b = ByteBuffer.allocate(4).order(ORDER);
|
||||
ch.position(pos);
|
||||
ch.read(b);
|
||||
b.flip();
|
||||
return b.getInt();
|
||||
}
|
||||
|
||||
// --- Demo main ---
|
||||
public static ShardLem lookup(long w) {
|
||||
try {
|
||||
int i = findIndexInMapMmap(shardMap, Lemma.pack43(w));
|
||||
val qRaw = Lemma.asWord(w, new byte[8]);
|
||||
System.out.println("\nQuery: " + qRaw + " w=" + w + " -> i=" + i);
|
||||
if (i >= 0) {
|
||||
ShardLem rec = readRecord(shardData, i);
|
||||
System.out.println(" simpel=" + rec.simpel());
|
||||
System.out.println(" clues=" + Arrays.toString(rec.clues()));
|
||||
return rec;
|
||||
} else {
|
||||
System.out.println(" NOT FOUND");
|
||||
throw new RuntimeException("NOT FOUND");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
public record ShardLem(long w, int simpel, String[] clues) { }
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user