Gather data
This commit is contained in:
59
tools/hint/jsonl-to-sqlite.mjs
Normal file
59
tools/hint/jsonl-to-sqlite.mjs
Normal file
@@ -0,0 +1,59 @@
|
||||
// jsonl-to-sqlite.mjs
|
||||
import fs from 'node:fs'
|
||||
import readline from 'node:readline'
|
||||
import Database from 'better-sqlite3'
|
||||
|
||||
const jsonlPath = process.argv[2]
|
||||
const dbPath = process.argv[3] ?? 'out.sqlite'
|
||||
const table = process.argv[4] ?? 'events'
|
||||
|
||||
if (!jsonlPath) {
|
||||
console.error('Usage: node jsonl-to-sqlite.mjs <file.jsonl> [out.sqlite] [table]')
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
const db = new Database(dbPath)
|
||||
db.pragma('journal_mode = WAL')
|
||||
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS ${ table }
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
json TEXT NOT NULL
|
||||
);
|
||||
`)
|
||||
|
||||
const insert = db.prepare(`INSERT INTO ${ table }(json)
|
||||
VALUES (?)`)
|
||||
const insertMany = db.transaction((rows) => {
|
||||
for (const r of rows) insert.run(r)
|
||||
})
|
||||
|
||||
const rl = readline.createInterface({
|
||||
input : fs.createReadStream(jsonlPath, { encoding: 'utf8' }),
|
||||
crlfDelay: Infinity
|
||||
})
|
||||
|
||||
let batch = []
|
||||
let lineNo = 0
|
||||
for await (const line of rl) {
|
||||
lineNo++
|
||||
const trimmed = line.trim()
|
||||
if (!trimmed) continue
|
||||
|
||||
try {
|
||||
JSON.parse(trimmed) // validate
|
||||
batch.push(trimmed)
|
||||
} catch (e) {
|
||||
console.warn(`Skipping invalid JSON on line ${ lineNo }: ${ e.message }`)
|
||||
continue
|
||||
}
|
||||
|
||||
if (batch.length >= 1000) {
|
||||
insertMany(batch)
|
||||
batch = []
|
||||
}
|
||||
}
|
||||
if (batch.length) insertMany(batch)
|
||||
|
||||
console.log(`Done. Imported into ${ dbPath }, table=${ table }`)
|
||||
Reference in New Issue
Block a user