问题描述
对于以下数据集
mydata=data.frame(x1_c1=c(1:5),x2_c1=c(2:6),x3_c1=c(3:7),x4_c1=c(4:8),x1_c2=0,x2_c2=0,x3_c2=0,x4_c2=0,x1_c3=c(1:5),x2_c3=c(2:6),x3_c3=c(3:7),x4_c3=c(4:8))
> mydata
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1 1 2 3 4 0 0 0 0 1 2 3 4
2 2 3 4 5 0 0 0 0 2 3 4 5
3 3 4 5 6 0 0 0 0 3 4 5 6
4 4 5 6 7 0 0 0 0 4 5 6 7
5 5 6 7 8 0 0 0 0 5 6 7 8
我想从以 _c3
、_c1
和 _c2
结尾的变量中减去所有以 _c3
结尾的变量,然后合并所有列。这是一个选项
mydata_update=cbind(mydata[,grep("_c1",colnames(mydata)) ]-mydata[,grep("_c3",colnames(mydata)) ],mydata[,grep("_c2",colnames(mydata)) ])
预期结果是
> mydata_update
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
欢迎使用任何其他方法。
解决方法
匹配数据和减法部分的前缀,然后减去:
subsel <- endsWith(names(mydata),"_c3")
prefix <- sub("_.+","",names(mydata))
mydata - mydata[subsel][match(prefix,prefix[subsel])]
# x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
#1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
#2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
#3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
#4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
#5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
或者,如果您想生活在边缘,并且您确定您的数据完整且按预期排序:
mydata - as.matrix(mydata[,endsWith(names(mydata),"_c3")])
,
我们可以使用split.default
根据列名的子串将数据分组,然后用list
找到每个grep
元素中的'c3'列,减去并cbind
list
内的do.call
元素
out <- do.call(cbind,unname(lapply(split.default(mydata,sub("_.*",names(mydata))),function(x) x - x[,grep("_c3",names(x))])))[names(mydata)]
-输出
out
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
或者我们可以使用 tidyverse
library(dplyr)
library(tidyr)
mydata %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -rn,names_to = c(".value","grp"),names_sep = "_") %>%
group_by(rn) %>%
mutate(across(where(is.numeric),~ . - .[grp == 'c3'])) %>%
ungroup %>%
pivot_wider(names_from = grp,values_from = x1:x4) %>%
select(-rn) %>%
select(names(mydata))
-输出
# A tibble: 5 x 12
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
,
这是使用循环的另一种方式:
sm <- mydata[,colnames(mydata))]
mydata_update <- mydata
for (i in seq(1,ncol(mydata),ncol(sm))) {
mydata_update[,i:(i+ncol(sm)-1)] <- mydata_update[,i:(i+ncol(sm)-1)]-sm
}
mydata_update
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
5 0 0 0 0 -5 -6 -7 -8 0 0 0 0