问题描述
我的数据框是:
`Account id Fcast 1 Fcast 2 Fcast 3 Diff 1 Diff 2 Diff 3
101 4000 2000 1000 1000 3000 4000
201 2900 3300 5000 100 300 2000
301 -100 5500 -800 1700 7300 1000
401 5000 8000 7100 2500 500 400
501 9000 12000 2000 15000 12000 22000
所需结果是从标记为Diff ...的列中找出最小值。
`Account id Min
101 1000
201 100
301 1000
401 400
501 12000
同样理想的是,我还需要获取另一列,该列告诉您从中获取了最小值的列名称填充了
。解决方法
我们可以在此处以行模式使用apply
:
data.frame(AccountId=df$AccountId,Min=apply(df[names(df)[grepl("^Diff\\d",names(df))]],1,FUN=min))
AccountId Min
1 101 1000
2 201 100
3 301 1000
4 401 400
5 501 12000
数据:
df <- data.frame(AccountId=c(101,201,301,401,501),Fcast1=c(4000,2900,-100,5000,9000),Fcast2=c(2000,3300,5500,8000,12000),Fcast3=c(1000,-800,7100,2000),Diff1=c(1000,100,1700,2500,15000),Diff2=c(3000,300,7300,500,Diff3=c(4000,2000,1000,400,22000))
,
另一种选择是使用apply
函数:
df <- data.frame(df$AccountId,min = apply(df[,2:ncol(df)],min))
,
使用dplyr
:
library(dplyr)
cols <- grep('Diff',names(df),value = TRUE)
df %>%
group_by(Accountid) %>%
mutate(Min = min(c_across(cols)),Min_name = cols[which.min(c_across(cols))]) %>%
select(Accountid,Min,Min_name)
# Accountid Min Min_name
# <int> <int> <chr>
#1 101 1000 Diff1
#2 201 100 Diff1
#3 301 1000 Diff3
#4 401 400 Diff3
#5 501 12000 Diff2
数据
df <- structure(list(Accountid = c(101L,201L,301L,401L,501L),Fcast1 = c(4000L,2900L,-100L,5000L,9000L),Fcast2 = c(2000L,3300L,5500L,8000L,12000L),Fcast3 = c(1000L,-800L,7100L,2000L),Diff1 = c(1000L,100L,1700L,2500L,15000L),Diff2 = c(3000L,300L,7300L,500L,Diff3 = c(4000L,2000L,1000L,400L,22000L)),class = "data.frame",row.names = c(NA,-5L))
,
使用data.table的解决方案
dt[,`:=`(min_val=apply(.SD,min),min_col=names(.SD)[apply(.SD,which.min)]),.SDcols=names(dt) %like% 'diff']
- 此处,
.SDcols
选择要处理的列的子集,在这种情况下,列中包含工作diff
的列。因此,使用%like
-
.SD
现在表现为仅具有diff
列的子数据表。