60 lines
1.3 KiB
JavaScript
60 lines
1.3 KiB
JavaScript
// jsonl-to-sqlite.mjs
|
|
import fs from 'node:fs'
|
|
import readline from 'node:readline'
|
|
import Database from 'better-sqlite3'
|
|
|
|
const jsonlPath = process.argv[2]
|
|
const dbPath = process.argv[3] ?? 'out.sqlite'
|
|
const table = process.argv[4] ?? 'events'
|
|
|
|
if (!jsonlPath) {
|
|
console.error('Usage: node jsonl-to-sqlite.mjs <file.jsonl> [out.sqlite] [table]')
|
|
process.exit(1)
|
|
}
|
|
|
|
const db = new Database(dbPath)
|
|
db.pragma('journal_mode = WAL')
|
|
|
|
db.exec(`
|
|
CREATE TABLE IF NOT EXISTS ${ table }
|
|
(
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
json TEXT NOT NULL
|
|
);
|
|
`)
|
|
|
|
const insert = db.prepare(`INSERT INTO ${ table }(json)
|
|
VALUES (?)`)
|
|
const insertMany = db.transaction((rows) => {
|
|
for (const r of rows) insert.run(r)
|
|
})
|
|
|
|
const rl = readline.createInterface({
|
|
input : fs.createReadStream(jsonlPath, { encoding: 'utf8' }),
|
|
crlfDelay: Infinity
|
|
})
|
|
|
|
let batch = []
|
|
let lineNo = 0
|
|
for await (const line of rl) {
|
|
lineNo++
|
|
const trimmed = line.trim()
|
|
if (!trimmed) continue
|
|
|
|
try {
|
|
JSON.parse(trimmed) // validate
|
|
batch.push(trimmed)
|
|
} catch (e) {
|
|
console.warn(`Skipping invalid JSON on line ${ lineNo }: ${ e.message }`)
|
|
continue
|
|
}
|
|
|
|
if (batch.length >= 1000) {
|
|
insertMany(batch)
|
|
batch = []
|
|
}
|
|
}
|
|
if (batch.length) insertMany(batch)
|
|
|
|
console.log(`Done. Imported into ${ dbPath }, table=${ table }`)
|