将数据从Excel工作簿导入Node.js中的sqlite3数据库

问题描述

有一本结构化的excel工作簿,每张工作表上都有uniq名称,第一行中的每张工作表上都包含uniq列名,其余行中包含值。

一个解决方案,将每个工作表导出到csv文件,并在其上使用sqlite3 .import 方法,这很慢,并且需要太多的精力。

.mode csv
.separator ";"
BEGIN TRANSACTION;
drop table if exists log;
.import "log.csv" log
COMIT;

我们想将数据从excel导入sqlite3数据库,其中每个工作表将是一个表,而工作表的行号是记录集的ID。由于第一行包含列名,因此 id =行号-1 解决方案:

'use strict';
var   XLSX = require('xlsx');
const sql3 = require('sqlite3').verbose();

var workbook = XLSX.readFile( 'D:\\excel.xlsx' );
var       db = new sql3.Database( 'D:\\database.db' );

var sheet_name_list = workbook.SheetNames; // tables
//console.log( sheet_name_list );
db.serialize( function () {
  db.run( "BEGIN TRANSACTION");
  for( var sheet of workbook.SheetNames ){
    console.log( "Sheet: " + sheet );
    var cells = workbook.Sheets[ sheet ];
    cells['range'] = { R: (cells['!ref']).replace(/[^0-9\:]/g,'' ).split( ':' ),C: (cells['!ref']).replace(/[^A-Z\:]/g,'' ).split( ':' )
                      };
    //console.log( cells );
    var sql = { def: {
                      dro : "DROP TABLE IF EXISTS `$table$`;",cre : "CREATE TABLE `$table$` ( `id` INTEGER PRIMARY KEY AUTOINCREMENT$cres$ );",ins : "INSERT INTO `$table$` ( $columns$ ) VALUES ( $values$ );"
                     },cmd: {
                      dro : {},cre : {},ins : {},},cols: {},cres: {},vals: {}    
              };
    
    sql.cols[ sheet ] = "";
    sql.cres[ sheet ] = "";
    sql.vals[ sheet ] = [];
    var rowindex = 0;   
    for(var row=cells.range.R[0];row<=cells.range.R[1];row++){
      sql.vals[ sheet ][ rowindex ] = [];
      for(var colindex=XLSX.utils.decode_col( cells.range.C[0] );colindex<=XLSX.utils.decode_col( cells.range.C[1] );colindex++){
        var col  = XLSX.utils.encode_col( colindex );
        var cell = cells[ col + "" + row ].v;
        
        if( row == 1 ) {
        //create table
          sql.cols[ sheet ] += ",`" + cell + "`";
          sql.cres[ sheet ] += ",`" + cell + "` " + 
                              ( cell * 1.0 == cell ?  ( ( cell * 1 + "" )  == cell ? "INTEGER" : "REAL" ) : "TEXT" );
        } else {
          sql.vals[ sheet ][ rowindex ].push( cell );
          //console.log(sql.vals[ sheet ]);
        }
      }
      if( row==1 ) {
        // drop table
        sql.cmd.dro[ sheet ] = sql.def.dro
                               .replace( "$table$",sheet );
        db.run( sql.cmd.dro[ sheet ] );
        
        // create table
        sql.cmd.cre[ sheet ] = sql.def.cre
                               .replace( "$table$",sheet ) 
                               .replace( "$cres$",sql.cres[ sheet ] );
        db.run( sql.cmd.cre[ sheet ] );
        
        console.log( "Columns: `id`"+ sql.cres[ sheet ] );
        //prepare insert
        sql.cmd.ins[ sheet ] = sql.def.ins
                               .replace( "$table$",sheet ) 
                               .replace( "$columns$",sql.cols[ sheet ].substr( 2 ) )
                               .replace( "$values$",sql.cols[ sheet ].substr( 2 )
                                                      .replace( /(`[^`]+?`)/g,"?" )
                                        );
        sql.cmd.ins[ sheet ] = db.prepare( sql.cmd.ins[ sheet ] );
      } else {
        //insert prepared values
        console.log( "Row: " + row,"Values: ",sql.vals[ sheet ][ rowindex ] );
        sql.cmd.ins[ sheet ].run( sql.vals[ sheet ][ rowindex++ ] );
      }
    }
    //console.log(sql);
    
    sql.cmd.ins[ sheet ].finalize();
  }
  db.run("COMMIT");
});

对我来说很好,但是对于大数据来说它有点慢。我已经阅读了关于Better-sqlite3文章。使用该软件包,此过程会更快吗?

解决方法

最后,我在流程中采用了 better-sqlite。差异很大,这在几秒钟内完成转换,而不是几分钟。

'use strict';
var   XLSX = require('xlsx');
const sql3 = require('better-sqlite3');
var workbook = XLSX.readFile( './perform.xlsx' );
var       db = new sql3( './perform.db' );

var sheet_name_list = workbook.SheetNames; // tables
  
  for( var sheet of workbook.SheetNames ){
    console.log( "Sheet: " + sheet );
    var cells = workbook.Sheets[ sheet ];
    cells['range'] = { R: (cells['!ref']).replace(/[^0-9\:]/g,'' ).split( ':' ),C: (cells['!ref']).replace(/[^A-Z\:]/g,'' ).split( ':' )
                      };
    //console.log( cells );
    var sql = { def: {
                      dro : "DROP TABLE IF EXISTS `$table$`;",cre : "CREATE TABLE `$table$` ( `id` INTEGER PRIMARY KEY AUTOINCREMENT$cres$ );",ins : "INSERT INTO `$table$` ( $columns$ ) VALUES ( $values$ );"
                     },cmd: {
                      dro : {},cre : {},ins : {},},cols: {},cres: {},vals: {}    
              };
    
    sql.cols[ sheet ] = "";
    sql.cres[ sheet ] = "";
    sql.vals[ sheet ] = [];
    var rowindex = 0;   
    for(var row=cells.range.R[0];row<=cells.range.R[1];row++){
      sql.vals[ sheet ][ rowindex ] = [];
      for(var colindex=XLSX.utils.decode_col( cells.range.C[0] );colindex<=XLSX.utils.decode_col( cells.range.C[1] );colindex++){
        var col  = XLSX.utils.encode_col( colindex );
        var cell = ( typeof cells[ col + "" + row ] != "undefined") ? cells[ col + "" + row ].v : "";
        
        if( row == 1 ) {
        //create table
          sql.cols[ sheet ] += ",`" + cell + "`";
          sql.cres[ sheet ] += ",`" + cell + "` " + 
                              ( cell * 1.0 == cell ?  ( ( cell * 1 + "" )  == cell ? "INTEGER" : "REAL" ) : "TEXT" );
        } else {
          sql.vals[ sheet ][ rowindex ].push( cell + "");
          //console.log(sql.vals[ sheet ]);
        }
      }
      if( row==1 ) {
        // drop table
        sql.cmd.dro[ sheet ] = sql.def.dro
                               .replace( "$table$",sheet );
        db.exec( sql.cmd.dro[ sheet ] );
        
        // create table
        sql.cmd.cre[ sheet ] = sql.def.cre
                               .replace( "$table$",sheet ) 
                               .replace( "$cres$",sql.cres[ sheet ] );
        db.exec( sql.cmd.cre[ sheet ] );
        
        console.log( "Columns: `id`"+ sql.cres[ sheet ] );
        //prepare insert
        sql.cmd.ins[ sheet ] = sql.def.ins
                               .replace( "$table$",sheet ) 
                               .replace( "$columns$",sql.cols[ sheet ].substr( 2 ) )
                               .replace( "$values$",sql.cols[ sheet ].substr( 2 )
                                                      .replace( /(`[^`]+?`)/g,"?" )
                                        );
        sql.cmd.ins[ sheet ] = db.prepare( sql.cmd.ins[ sheet ] );
      } else {
        //insert prepared values
        console.log( "Row: " + row,"Values: ",sql.vals[ sheet ][ rowindex ] );
        sql.cmd.ins[ sheet ].run( sql.vals[ sheet ][ rowindex++ ] );
      }
    }
  }
db.close();