我的累积百分比函数需要的R帮助

问题描述

最近的更改(在R或其他地方)使我先前的工作功能停止工作。该函数旨在生成两栏,告诉我百分位数分数(请参见df2$CumPercent是针对调查中给定分数的分数(请参见df2$V1),因此,我对手动版本进行了一些更改当我在函数中应用相同的逻辑时,它会抛出一个错误,指出未找到Var1变量。这里有什么想法可能会出错吗?

df5 <- structure(list(MyVariable = c(4.66666666666667,2.16666666666667,5.66666666666667,4.5,5.16666666666667,1,3.83333333333333,2,4,2.33333333333333,5.5,2.66666666666667,2.83333333333333,4.33333333333333,5.33333333333333,3.66666666666667,5,3,4.83333333333333,1.33333333333333,3.5,4.66666666666667,3.16666666666667,4.16666666666667,6.16666666666667,1.83333333333333,3.33333333333333,1.5,5.83333333333333,6,2.5,4)),row.names = c(NA,-145L),class = "data.frame")

#Manual version of the cumulative percent logic (which works as intended)
PercentilesRaw <- data.frame(seq(from=0,to=7,by=.01)) #Create every increment of percentile as vector
colnames(PercentilesRaw)[colnames(PercentilesRaw)=="seq.from...0..to...7..by...0.01."] <- "V1" #Rename percentile column name
df <- data.frame(table(df5$MyVariable)) #Count the number of original values in the column
df[,"Var1"] <- as.numeric(as.character(df[,"Var1"])) #The table function above produces factor levels so need to convert to numeric
V1 <- df[,"Var1"] #Make a vector from the Var1 column
Frequency <- df[,"Freq"] #Make a vector from the Freq column
CumSum <- cumsum(df[,"Freq"]) #Calculate a cumulative sum from the Freq column
CumPercent <- CumSum/sum(df[,"Freq"])*100 #Calculate the cumulative percentage vector
CumPercent <- round(CumPercent,2) #Round the cumulative percentage vector to 2 dp
output <- cbind(round(V1,2),CumPercent) #Map the cumulative percent results to the V1 vector
df2 <- data.frame(output) #Convert the two columns into a df

#Now attempt to convert into a function.
cpave1 <- function(x) {
  PercentilesRaw <- data.frame(seq(from=0,by=.01)) #Create every increment of percentile as vector
  colnames(PercentilesRaw)[colnames(PercentilesRaw)=="seq.from...0..to...7..by...0.01."] <- "V1" #Rename percentile column name
  df <- data.frame(table(x)) #Count the number of original values in the column
  df[,"Var1"])) #The table function above produces factor levels so need to convert to numeric
  V1 <- df[,"Var1"] #Make a vector from the Var1 column
  Frequency <- df[,"Freq"] #Make a vector from the Freq column
  CumSum <- cumsum(df[,"Freq"]) #Calculate a cumulative sum from the Freq column
  CumPercent <- CumSum/sum(df[,"Freq"])*100 #Calculate the cumulative percentage vector
  CumPercent <- round(CumPercent,2) #Round the cumulative percentage vector to 2 dp
  output <- cbind(round(V1,CumPercent) #Map the cumulative percent results to the V1 vector
  df2 <- data.frame(output) #Convert the two columns into a df
}

#Apply function to the MyVariable column.
MyVariable <- cpave1(df5$MyVariable)

解决方法

错误消息提示您的数据中没有"Var1"列。该列称为x。这是函数的更短和更新的版本,它返回相同的输出。

cpave1 <- function(x) {
  df <- type.convert(data.frame(table(x)),as.is = TRUE)
  data.frame(V1 = round(df$x,2),CumPercent = round(cumsum(df$Freq)/sum(df$Freq)*100,2))
}
cpave1(df5$MyVariable)

#     V1 CumPercent
#1  1.00       1.38
#2  1.33       2.76
#3  1.50       3.45
#4  1.83       4.83
#5  2.00       6.21
#6  2.17       8.28
#7  2.33      12.41
#8  2.50      13.79
#...
,

我认为data.table是最好的方法: 没有功能的解决方案很简单:

library(data.table)
df5 <- data.table(df5)
df5[,.N,MyVariable][order(MyVariable)][,.(MyVariable,CumPercent = round(cumsum(N) / sum(N),4) * 100)] 

或者如果您想执行功能:

library(data.table)
df5 <- data.table(df5)
cpave2 <- function(data,colname) {
  data[,get(colname)][order(get)][,.(Values = get,4) * 100)]
  
}

cpave2(df5,'MyVariable')

相关问答

依赖报错 idea导入项目后依赖报错,解决方案:https://blog....
错误1:代码生成器依赖和mybatis依赖冲突 启动项目时报错如下...
错误1:gradle项目控制台输出为乱码 # 解决方案:https://bl...
错误还原:在查询的过程中,传入的workType为0时,该条件不起...
报错如下,gcc版本太低 ^ server.c:5346:31: 错误:‘struct...