255 lines
8.5 KiB
Java
255 lines
8.5 KiB
Java
package puzzle;
|
|
|
|
import puzzle.SwedishGenerator.Lemma;
|
|
import java.io.BufferedInputStream;
|
|
import java.io.BufferedOutputStream;
|
|
import java.io.Closeable;
|
|
import java.io.DataInputStream;
|
|
import java.io.DataOutputStream;
|
|
import java.io.IOException;
|
|
import java.nio.ByteBuffer;
|
|
import java.nio.channels.FileChannel;
|
|
import java.nio.charset.StandardCharsets;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
import java.nio.file.StandardOpenOption;
|
|
import java.util.Arrays;
|
|
import java.util.function.LongConsumer;
|
|
import static java.nio.charset.StandardCharsets.US_ASCII;
|
|
|
|
public final class CsvIndexService
|
|
implements Closeable {
|
|
|
|
static final ScopedValue<CsvIndexService> SC = ScopedValue.newInstance();
|
|
private static final int MAGIC = 0x4C494458; // "LIDX"
|
|
private static final int VERSION = 1;
|
|
static int SIMPEL_IDX = 3;
|
|
private final Path csvPath;
|
|
private final Path idxPath;
|
|
|
|
private volatile long[] offsets; // lazy
|
|
private volatile FileChannel csvChannel; // open once
|
|
private final Object lock = new Object();
|
|
|
|
public CsvIndexService(Path csvPath, Path idxPath) {
|
|
this.csvPath = csvPath;
|
|
this.idxPath = idxPath;
|
|
}
|
|
public static int lineToSimpel(String line) {
|
|
var parts = line.split(",", 5);
|
|
return Integer.parseInt(parts[SIMPEL_IDX].trim());
|
|
}
|
|
public static String[] lineToClue(String line) {
|
|
if (line.isBlank()) throw new RuntimeException("Empty line");
|
|
var parts = line.split(",", 5);
|
|
var rawClue = parts[4].trim();
|
|
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
|
|
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
|
|
}
|
|
return Meta.GSON.fromJson(rawClue, String[].class);
|
|
}
|
|
public static void lineToLemma(String line, LongConsumer ok) {
|
|
if (line.isBlank()) {
|
|
throw new RuntimeException("Empty line");
|
|
}
|
|
var parts = line.split(",", 5);
|
|
var id = Integer.parseInt(parts[0].trim());
|
|
var word = parts[1].trim();
|
|
int score = Integer.parseInt(parts[2].trim());
|
|
if (score < 1) {
|
|
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
|
|
return;
|
|
}
|
|
ok.accept(Lemma.pack(id, word.getBytes(US_ASCII)));
|
|
}
|
|
|
|
public static int simpel(int index) {
|
|
try {
|
|
if (SC.isBound())
|
|
return lineToSimpel(SC.get().getLine(index));
|
|
return -1;
|
|
} catch (Exception e) {
|
|
e.printStackTrace();
|
|
throw new RuntimeException("Failed to get clues for index " + index, e);
|
|
}
|
|
}
|
|
public static String[] clues(int index) {
|
|
try {
|
|
if (SC.isBound())
|
|
return lineToClue(SC.get().getLine(index));
|
|
return new String[0];
|
|
} catch (Exception e) {
|
|
e.printStackTrace();
|
|
throw new RuntimeException("Failed to get clues for index " + index, e);
|
|
}
|
|
}
|
|
/** Haal één regel op (0-based line index), met self-healing index (1x rebuild). */
|
|
public String getLine(int lineIndex) throws IOException {
|
|
ensureLoaded();
|
|
|
|
var line = readLineAt(lineIndex);
|
|
|
|
if (startsWithIndex(line, lineIndex)) return line;
|
|
|
|
// mismatch => rebuild index en nog 1x proberen
|
|
synchronized (lock) {
|
|
rebuildIndexLocked();
|
|
line = readLineAt(lineIndex);
|
|
if (startsWithIndex(line, lineIndex)) return line;
|
|
}
|
|
|
|
throw new RuntimeException("Index mismatch after rebuild. Requested=" + lineIndex + ", got line=" + preview(line));
|
|
}
|
|
|
|
public void ensureLoaded() throws IOException {
|
|
if (offsets != null && csvChannel != null && csvChannel.isOpen()) return;
|
|
|
|
synchronized (lock) {
|
|
if (offsets != null && csvChannel != null && csvChannel.isOpen()) return;
|
|
|
|
csvChannel = FileChannel.open(csvPath, StandardOpenOption.READ);
|
|
|
|
if (Files.exists(idxPath)) {
|
|
try {
|
|
offsets = readIndex(idxPath);
|
|
return;
|
|
} catch (IOException badIndex) {
|
|
// fall-through -> rebuild
|
|
}
|
|
}
|
|
|
|
rebuildIndexLocked();
|
|
}
|
|
}
|
|
|
|
private void rebuildIndexLocked() throws IOException {
|
|
var built = buildOffsets(csvPath);
|
|
writeIndex(idxPath, built);
|
|
offsets = built;
|
|
}
|
|
|
|
private String readLineAt(int lineIndex) throws IOException {
|
|
var local = offsets;
|
|
if (lineIndex < 0 || lineIndex >= local.length) {
|
|
throw new IndexOutOfBoundsException("lineIndex=" + lineIndex + ", max=" + (local.length - 1));
|
|
}
|
|
|
|
long currentPos = local[lineIndex];
|
|
|
|
// lees in blokjes (sneller dan 1 byte) tot newline
|
|
var buf = new byte[8192];
|
|
var total = 0;
|
|
var out = new byte[256];
|
|
|
|
while (true) {
|
|
var bb = ByteBuffer.wrap(buf);
|
|
var n = csvChannel.read(bb, currentPos);
|
|
if (n < 0) break; // EOF
|
|
currentPos += n;
|
|
var end = n;
|
|
|
|
for (var i = 0; i < end; i++) {
|
|
var b = buf[i];
|
|
|
|
if (b == (byte) '\n') {
|
|
return new String(out, 0, total, StandardCharsets.UTF_8);
|
|
}
|
|
if (b == (byte) '\r') continue;
|
|
|
|
if (total == out.length) out = Arrays.copyOf(out, out.length * 2);
|
|
out[total++] = b;
|
|
}
|
|
}
|
|
|
|
return new String(out, 0, total, StandardCharsets.UTF_8);
|
|
}
|
|
|
|
/** Check: begint de regel met "<lineIndex>," */
|
|
private static boolean startsWithIndex(String line, int lineIndex) {
|
|
if (line == null || line.isEmpty()) return false;
|
|
|
|
var comma = line.indexOf(',');
|
|
if (comma <= 0) return false;
|
|
|
|
// snelle parse zonder split
|
|
long v = 0;
|
|
for (var i = 0; i < comma; i++) {
|
|
var c = line.charAt(i);
|
|
if (c < '0' || c > '9') return false;
|
|
v = (v * 10) + (c - '0');
|
|
if (v > Integer.MAX_VALUE) return false;
|
|
}
|
|
return v == lineIndex;
|
|
}
|
|
|
|
private static String preview(String s) {
|
|
if (s == null) return "null";
|
|
return s.length() <= 120 ? s : s.substring(0, 120) + "...";
|
|
}
|
|
|
|
/** Bouw offsets door newlines te scannen. Resultaat is exact getrimd. */
|
|
public static long[] buildOffsets(Path path) throws IOException {
|
|
try (var ch = FileChannel.open(path, StandardOpenOption.READ)) {
|
|
var offs = new long[131072]; // start-capacity, groeit indien nodig
|
|
var c = 0;
|
|
offs[c++] = 0;
|
|
|
|
var buf = ByteBuffer.allocateDirect(1 << 20);
|
|
int pos = 0;
|
|
|
|
while (true) {
|
|
buf.clear();
|
|
var n = ch.read(buf);
|
|
if (n < 0) break;
|
|
buf.flip();
|
|
|
|
for (var i = 0; i < n; i++) {
|
|
if (buf.get(i) == (byte) '\n') {
|
|
if (c == offs.length) offs = Arrays.copyOf(offs, offs.length * 2);
|
|
offs[c++] = pos + i + 1;
|
|
}
|
|
}
|
|
pos += n;
|
|
}
|
|
|
|
return Arrays.copyOf(offs, c);
|
|
}
|
|
}
|
|
|
|
public static void writeIndex(Path out, long[] offsets) throws IOException {
|
|
try (var dos = new DataOutputStream(new BufferedOutputStream(Files.newOutputStream(
|
|
out, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)))) {
|
|
dos.writeInt(MAGIC);
|
|
dos.writeInt(VERSION);
|
|
dos.writeInt(offsets.length);
|
|
for (var v : offsets) dos.writeLong(v);
|
|
}
|
|
}
|
|
|
|
public static long[] readIndex(Path in) throws IOException {
|
|
try (var dis = new DataInputStream(new BufferedInputStream(Files.newInputStream(in)))) {
|
|
if (dis.readInt() != MAGIC) throw new IOException("Not a LIDX file");
|
|
|
|
var version = dis.readInt();
|
|
if (version != VERSION) throw new IOException("Unsupported version: " + version);
|
|
|
|
var n = dis.readInt();
|
|
if (n < 0) throw new IOException("Corrupt length: " + n);
|
|
|
|
var offsets = new long[n];
|
|
for (var i = 0; i < n; i++) offsets[i] = dis.readLong();
|
|
return offsets;
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public void close() throws IOException {
|
|
synchronized (lock) {
|
|
if (csvChannel != null) csvChannel.close();
|
|
csvChannel = null;
|
|
offsets = null;
|
|
}
|
|
}
|
|
|
|
}
|