package puzzle; import puzzle.SwedishGenerator.Lemma; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.Closeable; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.Arrays; import java.util.function.LongConsumer; import static java.nio.charset.StandardCharsets.US_ASCII; public final class CsvIndexService implements Closeable { static final ScopedValue SC = ScopedValue.newInstance(); private static final int MAGIC = 0x4C494458; // "LIDX" private static final int VERSION = 1; static int SIMPEL_IDX = 3; private final Path csvPath; private final Path idxPath; private volatile long[] offsets; // lazy private volatile FileChannel csvChannel; // open once private final Object lock = new Object(); public CsvIndexService(Path csvPath, Path idxPath) { this.csvPath = csvPath; this.idxPath = idxPath; } public static int lineToSimpel(String line) { var parts = line.split(",", 5); return Integer.parseInt(parts[SIMPEL_IDX].trim()); } public static String[] lineToClue(String line) { if (line.isBlank()) throw new RuntimeException("Empty line"); var parts = line.split(",", 5); var rawClue = parts[4].trim(); if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) { rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\""); } return Meta.GSON.fromJson(rawClue, String[].class); } public static void lineToLemma(String line, LongConsumer ok) { if (line.isBlank()) { throw new RuntimeException("Empty line"); } var parts = line.split(",", 5); var id = Integer.parseInt(parts[0].trim()); var word = parts[1].trim(); int score = Integer.parseInt(parts[2].trim()); if (score < 1) { if (Main.VERBOSE) System.err.println("Word too complex: " + line); return; } ok.accept(Lemma.pack(id, word.getBytes(US_ASCII))); } public static int simpel(int index) { try { if (SC.isBound()) return lineToSimpel(SC.get().getLine(index)); return -1; } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("Failed to get clues for index " + index, e); } } public static String[] clues(int index) { try { if (SC.isBound()) return lineToClue(SC.get().getLine(index)); return new String[0]; } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("Failed to get clues for index " + index, e); } } /** Haal één regel op (0-based line index), met self-healing index (1x rebuild). */ public String getLine(int lineIndex) throws IOException { ensureLoaded(); var line = readLineAt(lineIndex); if (startsWithIndex(line, lineIndex)) return line; // mismatch => rebuild index en nog 1x proberen synchronized (lock) { rebuildIndexLocked(); line = readLineAt(lineIndex); if (startsWithIndex(line, lineIndex)) return line; } throw new RuntimeException("Index mismatch after rebuild. Requested=" + lineIndex + ", got line=" + preview(line)); } public void ensureLoaded() throws IOException { if (offsets != null && csvChannel != null && csvChannel.isOpen()) return; synchronized (lock) { if (offsets != null && csvChannel != null && csvChannel.isOpen()) return; csvChannel = FileChannel.open(csvPath, StandardOpenOption.READ); if (Files.exists(idxPath)) { try { offsets = readIndex(idxPath); return; } catch (IOException badIndex) { // fall-through -> rebuild } } rebuildIndexLocked(); } } private void rebuildIndexLocked() throws IOException { var built = buildOffsets(csvPath); writeIndex(idxPath, built); offsets = built; } private String readLineAt(int lineIndex) throws IOException { var local = offsets; if (lineIndex < 0 || lineIndex >= local.length) { throw new IndexOutOfBoundsException("lineIndex=" + lineIndex + ", max=" + (local.length - 1)); } long currentPos = local[lineIndex]; // lees in blokjes (sneller dan 1 byte) tot newline var buf = new byte[8192]; var total = 0; var out = new byte[256]; while (true) { var bb = ByteBuffer.wrap(buf); var n = csvChannel.read(bb, currentPos); if (n < 0) break; // EOF currentPos += n; var end = n; for (var i = 0; i < end; i++) { var b = buf[i]; if (b == (byte) '\n') { return new String(out, 0, total, StandardCharsets.UTF_8); } if (b == (byte) '\r') continue; if (total == out.length) out = Arrays.copyOf(out, out.length * 2); out[total++] = b; } } return new String(out, 0, total, StandardCharsets.UTF_8); } /** Check: begint de regel met "," */ private static boolean startsWithIndex(String line, int lineIndex) { if (line == null || line.isEmpty()) return false; var comma = line.indexOf(','); if (comma <= 0) return false; // snelle parse zonder split long v = 0; for (var i = 0; i < comma; i++) { var c = line.charAt(i); if (c < '0' || c > '9') return false; v = (v * 10) + (c - '0'); if (v > Integer.MAX_VALUE) return false; } return v == lineIndex; } private static String preview(String s) { if (s == null) return "null"; return s.length() <= 120 ? s : s.substring(0, 120) + "..."; } /** Bouw offsets door newlines te scannen. Resultaat is exact getrimd. */ public static long[] buildOffsets(Path path) throws IOException { try (var ch = FileChannel.open(path, StandardOpenOption.READ)) { var offs = new long[131072]; // start-capacity, groeit indien nodig var c = 0; offs[c++] = 0; var buf = ByteBuffer.allocateDirect(1 << 20); int pos = 0; while (true) { buf.clear(); var n = ch.read(buf); if (n < 0) break; buf.flip(); for (var i = 0; i < n; i++) { if (buf.get(i) == (byte) '\n') { if (c == offs.length) offs = Arrays.copyOf(offs, offs.length * 2); offs[c++] = pos + i + 1; } } pos += n; } return Arrays.copyOf(offs, c); } } public static void writeIndex(Path out, long[] offsets) throws IOException { try (var dos = new DataOutputStream(new BufferedOutputStream(Files.newOutputStream( out, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)))) { dos.writeInt(MAGIC); dos.writeInt(VERSION); dos.writeInt(offsets.length); for (var v : offsets) dos.writeLong(v); } } public static long[] readIndex(Path in) throws IOException { try (var dis = new DataInputStream(new BufferedInputStream(Files.newInputStream(in)))) { if (dis.readInt() != MAGIC) throw new IOException("Not a LIDX file"); var version = dis.readInt(); if (version != VERSION) throw new IOException("Unsupported version: " + version); var n = dis.readInt(); if (n < 0) throw new IOException("Corrupt length: " + n); var offsets = new long[n]; for (var i = 0; i < n; i++) offsets[i] = dis.readLong(); return offsets; } } @Override public void close() throws IOException { synchronized (lock) { if (csvChannel != null) csvChannel.close(); csvChannel = null; offsets = null; } } }