问题描述
有一本结构化的excel工作簿,每张工作表上都有uniq名称,第一行中的每张工作表上都包含uniq列名,其余行中包含值。
有一个解决方案,将每个工作表导出到csv文件,并在其上使用sqlite3 .import 方法,这很慢,并且需要太多的精力。
.mode csv
.separator ";"
BEGIN TRANSACTION;
drop table if exists log;
.import "log.csv" log
COMIT;
我们想将数据从excel导入sqlite3数据库,其中每个工作表将是一个表,而工作表的行号是记录集的ID。由于第一行包含列名,因此 id =行号-1 解决方案:
'use strict';
var XLSX = require('xlsx');
const sql3 = require('sqlite3').verbose();
var workbook = XLSX.readFile( 'D:\\excel.xlsx' );
var db = new sql3.Database( 'D:\\database.db' );
var sheet_name_list = workbook.SheetNames; // tables
//console.log( sheet_name_list );
db.serialize( function () {
db.run( "BEGIN TRANSACTION");
for( var sheet of workbook.SheetNames ){
console.log( "Sheet: " + sheet );
var cells = workbook.Sheets[ sheet ];
cells['range'] = { R: (cells['!ref']).replace(/[^0-9\:]/g,'' ).split( ':' ),C: (cells['!ref']).replace(/[^A-Z\:]/g,'' ).split( ':' )
};
//console.log( cells );
var sql = { def: {
dro : "DROP TABLE IF EXISTS `$table$`;",cre : "CREATE TABLE `$table$` ( `id` INTEGER PRIMARY KEY AUTOINCREMENT$cres$ );",ins : "INSERT INTO `$table$` ( $columns$ ) VALUES ( $values$ );"
},cmd: {
dro : {},cre : {},ins : {},},cols: {},cres: {},vals: {}
};
sql.cols[ sheet ] = "";
sql.cres[ sheet ] = "";
sql.vals[ sheet ] = [];
var rowindex = 0;
for(var row=cells.range.R[0];row<=cells.range.R[1];row++){
sql.vals[ sheet ][ rowindex ] = [];
for(var colindex=XLSX.utils.decode_col( cells.range.C[0] );colindex<=XLSX.utils.decode_col( cells.range.C[1] );colindex++){
var col = XLSX.utils.encode_col( colindex );
var cell = cells[ col + "" + row ].v;
if( row == 1 ) {
//create table
sql.cols[ sheet ] += ",`" + cell + "`";
sql.cres[ sheet ] += ",`" + cell + "` " +
( cell * 1.0 == cell ? ( ( cell * 1 + "" ) == cell ? "INTEGER" : "REAL" ) : "TEXT" );
} else {
sql.vals[ sheet ][ rowindex ].push( cell );
//console.log(sql.vals[ sheet ]);
}
}
if( row==1 ) {
// drop table
sql.cmd.dro[ sheet ] = sql.def.dro
.replace( "$table$",sheet );
db.run( sql.cmd.dro[ sheet ] );
// create table
sql.cmd.cre[ sheet ] = sql.def.cre
.replace( "$table$",sheet )
.replace( "$cres$",sql.cres[ sheet ] );
db.run( sql.cmd.cre[ sheet ] );
console.log( "Columns: `id`"+ sql.cres[ sheet ] );
//prepare insert
sql.cmd.ins[ sheet ] = sql.def.ins
.replace( "$table$",sheet )
.replace( "$columns$",sql.cols[ sheet ].substr( 2 ) )
.replace( "$values$",sql.cols[ sheet ].substr( 2 )
.replace( /(`[^`]+?`)/g,"?" )
);
sql.cmd.ins[ sheet ] = db.prepare( sql.cmd.ins[ sheet ] );
} else {
//insert prepared values
console.log( "Row: " + row,"Values: ",sql.vals[ sheet ][ rowindex ] );
sql.cmd.ins[ sheet ].run( sql.vals[ sheet ][ rowindex++ ] );
}
}
//console.log(sql);
sql.cmd.ins[ sheet ].finalize();
}
db.run("COMMIT");
});
对我来说很好,但是对于大数据来说它有点慢。我已经阅读了关于Better-sqlite3的文章。使用该软件包,此过程会更快吗?
解决方法
最后,我在流程中采用了 better-sqlite。差异很大,这在几秒钟内完成转换,而不是几分钟。
'use strict';
var XLSX = require('xlsx');
const sql3 = require('better-sqlite3');
var workbook = XLSX.readFile( './perform.xlsx' );
var db = new sql3( './perform.db' );
var sheet_name_list = workbook.SheetNames; // tables
for( var sheet of workbook.SheetNames ){
console.log( "Sheet: " + sheet );
var cells = workbook.Sheets[ sheet ];
cells['range'] = { R: (cells['!ref']).replace(/[^0-9\:]/g,'' ).split( ':' ),C: (cells['!ref']).replace(/[^A-Z\:]/g,'' ).split( ':' )
};
//console.log( cells );
var sql = { def: {
dro : "DROP TABLE IF EXISTS `$table$`;",cre : "CREATE TABLE `$table$` ( `id` INTEGER PRIMARY KEY AUTOINCREMENT$cres$ );",ins : "INSERT INTO `$table$` ( $columns$ ) VALUES ( $values$ );"
},cmd: {
dro : {},cre : {},ins : {},},cols: {},cres: {},vals: {}
};
sql.cols[ sheet ] = "";
sql.cres[ sheet ] = "";
sql.vals[ sheet ] = [];
var rowindex = 0;
for(var row=cells.range.R[0];row<=cells.range.R[1];row++){
sql.vals[ sheet ][ rowindex ] = [];
for(var colindex=XLSX.utils.decode_col( cells.range.C[0] );colindex<=XLSX.utils.decode_col( cells.range.C[1] );colindex++){
var col = XLSX.utils.encode_col( colindex );
var cell = ( typeof cells[ col + "" + row ] != "undefined") ? cells[ col + "" + row ].v : "";
if( row == 1 ) {
//create table
sql.cols[ sheet ] += ",`" + cell + "`";
sql.cres[ sheet ] += ",`" + cell + "` " +
( cell * 1.0 == cell ? ( ( cell * 1 + "" ) == cell ? "INTEGER" : "REAL" ) : "TEXT" );
} else {
sql.vals[ sheet ][ rowindex ].push( cell + "");
//console.log(sql.vals[ sheet ]);
}
}
if( row==1 ) {
// drop table
sql.cmd.dro[ sheet ] = sql.def.dro
.replace( "$table$",sheet );
db.exec( sql.cmd.dro[ sheet ] );
// create table
sql.cmd.cre[ sheet ] = sql.def.cre
.replace( "$table$",sheet )
.replace( "$cres$",sql.cres[ sheet ] );
db.exec( sql.cmd.cre[ sheet ] );
console.log( "Columns: `id`"+ sql.cres[ sheet ] );
//prepare insert
sql.cmd.ins[ sheet ] = sql.def.ins
.replace( "$table$",sheet )
.replace( "$columns$",sql.cols[ sheet ].substr( 2 ) )
.replace( "$values$",sql.cols[ sheet ].substr( 2 )
.replace( /(`[^`]+?`)/g,"?" )
);
sql.cmd.ins[ sheet ] = db.prepare( sql.cmd.ins[ sheet ] );
} else {
//insert prepared values
console.log( "Row: " + row,"Values: ",sql.vals[ sheet ][ rowindex ] );
sql.cmd.ins[ sheet ].run( sql.vals[ sheet ][ rowindex++ ] );
}
}
}
db.close();