问题描述
我想在 scala 中创建一个数组缓冲区,而无需在开始时使用数据类型对其进行实例化。我想检查一个条件,然后动态地将类型传递给它。查看给定的代码。
def rowGen(startNumber:Int,tableIdentifier:String,NumRows:Int)={
var tmpArrayBuffer:collection.mutable.ArrayBuffer[_]=null // I tried [T] here. That didn't work either.
tableIdentifier match {
case value if value==baseTable => tmpArrayBuffer= new collection.mutable.ArrayBuffer[(String,String,String)]()
case value if value==batchTable => tmpArrayBuffer= new collection.mutable.ArrayBuffer[(String,String)]()
}
for (currentNum <- startNumber to startNumber+NumRows)
tableIdentifier match {
case value if value==baseTable => tmpArrayBuffer+=(s"col1-${currentNum}",s"col2-${currentNum}",s"col3-${currentNum}",s"col4-${currentNum}")
case value if value==batchTable => tmpArrayBuffer+=(s"col1-${currentNum}",s"col2-${currentNum}")
}
tableIdentifier match {
case value if value==baseTable => tmpArrayBuffer.toSeq.toDF("col1","col2","col3","col4")
case value if value==batchTable => tmpArrayBuffer.toSeq.toDF("col1","col2")
}
}
请帮我解决这个问题。基于我想实例化 ArrayBuffer[(String,String)] 或 ArrayBuffer[(String,String)] 的条件。
解决方法
我只想在匹配中定义数组缓冲区:
import org.apache.spark.sql.DataFrame
val baseTable = "baseTable"
val batchTable = "batchTable"
def rowGen(startNumber:Int,tableIdentifier:String,NumRows:Int) : DataFrame = {
tableIdentifier match {
case `baseTable` => {
var tmpArrayBuffer = new collection.mutable.ArrayBuffer[(String,String,String)]
for (currentNum <- startNumber to startNumber+NumRows){
tmpArrayBuffer += ((s"col1-${currentNum}",s"col2-${currentNum}",s"col3-${currentNum}",s"col4-${currentNum}"))
}
tmpArrayBuffer.toSeq.toDF("col1","col2","col3","col4")
}
case `batchTable` => {
var tmpArrayBuffer = new collection.mutable.ArrayBuffer[(String,String)]
for (currentNum <- startNumber to startNumber+NumRows) {
tmpArrayBuffer += ((s"col1-${currentNum}",s"col2-${currentNum}"))
}
tmpArrayBuffer.toSeq.toDF("col1","col2")
}
}
}
scala> rowGen(1,"batchTable",5).show
+------+------+
| col1| col2|
+------+------+
|col1-1|col2-1|
|col1-2|col2-2|
|col1-3|col2-3|
|col1-4|col2-4|
|col1-5|col2-5|
|col1-6|col2-6|
+------+------+
scala> rowGen(1,"baseTable",5).show
+------+------+------+------+
| col1| col2| col3| col4|
+------+------+------+------+
|col1-1|col2-1|col3-1|col4-1|
|col1-2|col2-2|col3-2|col4-2|
|col1-3|col2-3|col3-3|col4-3|
|col1-4|col2-4|col3-4|col4-4|
|col1-5|col2-5|col3-5|col4-5|
|col1-6|col2-6|col3-6|col4-6|
+------+------+------+------+
或者,正如评论所建议的,使用 Seq.newBuilder
更好:
import org.apache.spark.sql.DataFrame
val baseTable = "baseTable"
val batchTable = "batchTable"
def rowGen(startNumber:Int,NumRows:Int) : DataFrame = {
tableIdentifier match {
case `baseTable` => {
var tmpArrayBuffer = Seq.newBuilder[(String,s"col4-${currentNum}"))
}
tmpArrayBuffer.result.toDF("col1","col4")
}
case `batchTable` => {
var tmpArrayBuffer = Seq.newBuilder[(String,s"col2-${currentNum}"))
}
tmpArrayBuffer.result.toDF("col1","col2")
}
}
}