问题描述
我有一个 .Rdata 格式的数据集 - 这是我以前从未使用过的。我想将数据导出到 csv 或相关文件以在 Python 中使用。我使用过“write.csv”、“write.table”和其他一些,虽然它们似乎都在写入文件,但当我打开它时,它完全是空白的。到目前为止,我还尝试在导出之前将数据转换为数据帧,但没有成功。
在 R 中导入文件后,数据被标记为具有以下属性的 Large array (1499904 elements,11.5 Mb)
:
> attributes(data.station)
$`dim`
[1] 12 31 288 7 2
$dimnames
$dimnames[[1]]
[1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"
$dimnames[[2]]
[1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" "20" "21"
[22] "22" "23" "24" "25" "26" "27" "28" "29" "30" "31"
$dimnames[[3]]
[1] "" "00:05:00" "00:10:00" "00:15:00" "00:20:00" "00:25:00" "00:30:00" "00:35:00" "00:40:00"
[10] "00:45:00" "00:50:00" "00:55:00" "01:00:00" "01:05:00" "01:10:00" "01:15:00" "01:20:00" "01:25:00"
[19] "01:30:00" "01:35:00" "01:40:00" "01:45:00" "01:50:00" "01:55:00" "02:00:00" "02:05:00" "02:10:00"
[28] "02:15:00" "02:20:00" "02:25:00" "02:30:00" "02:35:00" "02:40:00" "02:45:00" "02:50:00" "02:55:00"
[37] "03:00:00" "03:05:00" "03:10:00" "03:15:00" "03:20:00" "03:25:00" "03:30:00" "03:35:00" "03:40:00"
[46] "03:45:00" "03:50:00" "03:55:00" "04:00:00" "04:05:00" "04:10:00" "04:15:00" "04:20:00" "04:25:00"
[55] "04:30:00" "04:35:00" "04:40:00" "04:45:00" "04:50:00" "04:55:00" "05:00:00" "05:05:00" "05:10:00"
[64] "05:15:00" "05:20:00" "05:25:00" "05:30:00" "05:35:00" "05:40:00" "05:45:00" "05:50:00" "05:55:00"
[73] "06:00:00" "06:05:00" "06:10:00" "06:15:00" "06:20:00" "06:25:00" "06:30:00" "06:35:00" "06:40:00"
[82] "06:45:00" "06:50:00" "06:55:00" "07:00:00" "07:05:00" "07:10:00" "07:15:00" "07:20:00" "07:25:00"
[91] "07:30:00" "07:35:00" "07:40:00" "07:45:00" "07:50:00" "07:55:00" "08:00:00" "08:05:00" "08:10:00"
[100] "08:15:00" "08:20:00" "08:25:00" "08:30:00" "08:35:00" "08:40:00" "08:45:00" "08:50:00" "08:55:00"
[109] "09:00:00" "09:05:00" "09:10:00" "09:15:00" "09:20:00" "09:25:00" "09:30:00" "09:35:00" "09:40:00"
[118] "09:45:00" "09:50:00" "09:55:00" "10:00:00" "10:05:00" "10:10:00" "10:15:00" "10:20:00" "10:25:00"
[127] "10:30:00" "10:35:00" "10:40:00" "10:45:00" "10:50:00" "10:55:00" "11:00:00" "11:05:00" "11:10:00"
[136] "11:15:00" "11:20:00" "11:25:00" "11:30:00" "11:35:00" "11:40:00" "11:45:00" "11:50:00" "11:55:00"
[145] "12:00:00" "12:05:00" "12:10:00" "12:15:00" "12:20:00" "12:25:00" "12:30:00" "12:35:00" "12:40:00"
[154] "12:45:00" "12:50:00" "12:55:00" "13:00:00" "13:05:00" "13:10:00" "13:15:00" "13:20:00" "13:25:00"
[163] "13:30:00" "13:35:00" "13:40:00" "13:45:00" "13:50:00" "13:55:00" "14:00:00" "14:05:00" "14:10:00"
[172] "14:15:00" "14:20:00" "14:25:00" "14:30:00" "14:35:00" "14:40:00" "14:45:00" "14:50:00" "14:55:00"
[181] "15:00:00" "15:05:00" "15:10:00" "15:15:00" "15:20:00" "15:25:00" "15:30:00" "15:35:00" "15:40:00"
[190] "15:45:00" "15:50:00" "15:55:00" "16:00:00" "16:05:00" "16:10:00" "16:15:00" "16:20:00" "16:25:00"
[199] "16:30:00" "16:35:00" "16:40:00" "16:45:00" "16:50:00" "16:55:00" "17:00:00" "17:05:00" "17:10:00"
[208] "17:15:00" "17:20:00" "17:25:00" "17:30:00" "17:35:00" "17:40:00" "17:45:00" "17:50:00" "17:55:00"
[217] "18:00:00" "18:05:00" "18:10:00" "18:15:00" "18:20:00" "18:25:00" "18:30:00" "18:35:00" "18:40:00"
[226] "18:45:00" "18:50:00" "18:55:00" "19:00:00" "19:05:00" "19:10:00" "19:15:00" "19:20:00" "19:25:00"
[235] "19:30:00" "19:35:00" "19:40:00" "19:45:00" "19:50:00" "19:55:00" "20:00:00" "20:05:00" "20:10:00"
[244] "20:15:00" "20:20:00" "20:25:00" "20:30:00" "20:35:00" "20:40:00" "20:45:00" "20:50:00" "20:55:00"
[253] "21:00:00" "21:05:00" "21:10:00" "21:15:00" "21:20:00" "21:25:00" "21:30:00" "21:35:00" "21:40:00"
[262] "21:45:00" "21:50:00" "21:55:00" "22:00:00" "22:05:00" "22:10:00" "22:15:00" "22:20:00" "22:25:00"
[271] "22:30:00" "22:35:00" "22:40:00" "22:45:00" "22:50:00" "22:55:00" "23:00:00" "23:05:00" "23:10:00"
[280] "23:15:00" "23:20:00" "23:25:00" "23:30:00" "23:35:00" "23:40:00" "23:45:00" "23:50:00" "23:55:00"
$dimnames[[4]]
[1] "tempinf" "tempf" "humidityin" "humidity" "solarradiation" "hourlyrainin"
[7] "windspeedmph"
$dimnames[[5]]
[1] "2020" "2021"
有关如何处理此问题的任何建议?谢谢!
解决方法
您必须将数组展平才能写入。首先,我们为您的数据创建一个可重现的示例:
x <- 1:(2 * 3 * 4 * 5 * 6)
dnames <- list(LETTERS[1:2],LETTERS[3:5],LETTERS[6:9],LETTERS[10:14],LETTERS[15:20])
y <- array(x,dim=c(2,3,4,5,6),dimnames=dnames)
str(y)
# int [1:2,1:3,1:4,1:5,1:6] 1 2 3 4 5 6 7 8 9 10 ...
# - attr(*,"dimnames")=List of 5
# ..$ : chr [1:2] "A" "B"
# ..$ : chr [1:3] "C" "D" "E"
# ..$ : chr [1:4] "F" "G" "H" "I"
# ..$ : chr [1:5] "J" "K" "L" "M" ...
# ..$ : chr [1:6] "O" "P" "Q" "R" ...
attributes(y)
# $dim
# [1] 2 3 4 5 6
#
# $dimnames
# $dimnames[[1]]
# [1] "A" "B"
#
# $dimnames[[2]]
# [1] "C" "D" "E"
#
# $dimnames[[3]]
# [1] "F" "G" "H" "I"
#
# $dimnames[[4]]
# [1] "J" "K" "L" "M" "N"
#
# $dimnames[[5]]
# [1] "O" "P" "Q" "R" "S" "T"
现在我们将数组展平并将其写入文件:
z <- as.data.frame.table(y)
str(z)
# 'data.frame': 720 obs. of 6 variables:
# $ Var1: Factor w/ 2 levels "A","B": 1 2 1 2 1 2 1 2 1 2 ...
# $ Var2: Factor w/ 3 levels "C","D","E": 1 1 2 2 3 3 1 1 2 2 ...
# $ Var3: Factor w/ 4 levels "F","G","H","I": 1 1 1 1 1 1 2 2 2 2 ...
# $ Var4: Factor w/ 5 levels "J","K","L","M",..: 1 1 1 1 1 1 1 1 1 1 ...
# $ Var5: Factor w/ 6 levels "O","P","Q","R",..: 1 1 1 1 1 1 1 1 1 1 ...
# $ Freq: int 1 2 3 4 5 6 7 8 9 10 ...
write.csv(z,file="dfz.csv",row.names=FALSE)
最后我们读取文件并将其转换回数组:
a <- read.csv("dfz.csv",as.is=FALSE)
b <- xtabs(Freq~.,a)
class(b) <- "array"
attr(b,"call") <- NULL
names(dimnames(b)) <- NULL
str(b)
# int [1:2,"dimnames")=List of 5
# ..$ : chr [1:2] "A" "B"
# ..$ : chr [1:3] "C" "D" "E"
# ..$ : chr [1:4] "F" "G" "H" "I"
# ..$ : chr [1:5] "J" "K" "L" "M" ...
# ..$ : chr [1:6] "O" "P" "Q" "R" ...