问题描述
下面,我展示了一段代码,用于在R
中生成一组项目得分。但是,似乎有很多不必要的重复才能到达最后的data
。
我想知道是否有更紧凑的方法可以在data
中实现相同的R
?
set.seed(8649)
N = 10
latent = rnorm(N)
##### generate latent responses to items
item1 = latent + rnorm(N,mean=0,sd=0.2)
item2 = latent + rnorm(N,sd=0.3)
item3 = latent + rnorm(N,sd=0.5)
item4 = latent + rnorm(N,sd=1.0)
item5 = latent + rnorm(N,sd=1.2)
##### convert latent responses to ordered categories
item1 = findInterval(item1,vec=c(-Inf,-2.5,-1,1,2.5,Inf))
item2 = findInterval(item2,Inf))
item3 = findInterval(item3,-3,-2,2,3,Inf))
item4 = findInterval(item4,Inf))
item5 = findInterval(item5,-3.5,0.5,Inf))
data = cbind(item1,item2,item3,item4,item5)
解决方法
我们可以在list
中创建第一组'item',变量部分为'sd'
# // loop over the sd vector and create the list of random numbers in a list
lst1 <- lapply(c(0.2,0.3,0.5,1,1.2),function(x)
latent + rnorm(N,mean = 0,sd = x))
# // set the names of the list if needed
names(lst1) <- paste0("item",seq_along(lst1))
使用Map
将'lst1'和vec
的相应元素作为list
('veclst')循环并应用findInterval
data.frame(Map(findInterval,lst1,vec = veclst))
# item1 item2 item3 item4 item5
#1 4 4 3 3 5
#2 3 3 3 3 5
#3 3 3 3 3 5
#4 4 4 4 3 5
#5 2 2 3 2 4
#6 3 3 3 3 3
#7 3 3 3 3 4
#8 2 2 2 2 2
#9 4 4 5 4 5
#10 3 3 3 3 4
或者用tidyverse
library(purrr)
library(dplyr)
library(stringr)
map2_dfc(c(0.2,veclst,~
findInterval(latent + rnorm(N,sd = .x),vec = .y)) %>%
set_names(str_c('item',seq_along(.)))
-输出
# A tibble: 10 x 5
# item1 item2 item3 item4 item5
# <int> <int> <int> <int> <int>
# 1 4 4 3 4 4
# 2 3 3 3 3 5
# 3 3 3 3 3 4
# 4 3 4 3 3 5
# 5 2 2 3 3 4
# 6 3 3 3 3 5
# 7 3 3 3 3 4
# 8 2 2 2 1 2
# 9 4 4 4 5 5
#10 3 3 3 3 4
更新
如果我们要创建一个函数,请确保latent
是基于函数内传递的新'N'创建的,因为它可能导致长度上的差异。在OP'帖子中显示的原始代码中,length
是10,而latent
是基于该代码创建的
make_likert <- function(N.judge = 10,item.sds,cut_points,seed = NULL){
set.seed(seed)
latent <- rnorm(N.judge)
lst1 <- lapply(item.sds,function(x) latent + rnorm(n = N.judge,sd = x))
names(lst1) <- paste0("item",seq_along(lst1))
data.frame(Map(findInterval,cut_points))
}
make_likert(N.judge=13,item.sds = item.sds,cut_points = cut_points)
# item1 item2 item3 item4 item5 item6 item7 item8 item9 item10
#1 4 3 2 3 1 3 3 5 4 4
#2 5 3 3 3 5 3 3 5 5 5
#3 2 2 3 3 3 3 3 4 3 5
#4 3 5 3 3 3 3 4 5 4 3
#5 3 1 4 3 4 3 1 2 4 4
#6 2 1 3 3 1 3 3 5 5 4
#7 3 2 3 3 3 3 5 1 5 3
#8 3 1 2 3 5 3 3 5 3 5
#9 4 3 2 3 3 4 3 1 5 5
#10 3 2 3 3 1 3 4 3 3 2
#11 4 5 1 3 5 3 1 3 5 5
#12 3 5 3 3 3 3 5 3 5 5
#13 3 4 5 3 3 3 1 1 5 3
数据
veclst <- rep(list(c(-Inf,-2.5,-1,2.5,Inf),c(-Inf,-3,-2,2,3,-3.5,Inf)),c(2,1))
,
您可以使用mapply
。例如。像这样:
mapply(findInterval,vec = v_arg,x = lapply(sig_arg,rnorm,mean = latent,n = N))
#R> [,1] [,2] [,3] [,4] [,5]
#R> [1,] 4 4 3 3 3
#R> [2,] 3 3 3 4 5
#R> [3,] 3 3 3 4 4
#R> [4,] 4 4 3 3 5
#R> [5,] 2 2 3 3 3
#R> [6,] 3 3 3 3 5
#R> [7,] 3 3 3 3 4
#R> [8,] 1 1 2 3 3
#R> [9,] 4 4 3 3 5
#R> [10,] 3 3 3 3 4
如果要使用列名,请使用例如:
mapply(findInterval,setNames(lapply(sig_arg,n = N),paste0("item",seq_along(sig_arg))),v_arg)
#R> item1 item2 item3 item4 item5
#R> [1,] 4 4 3 3 3
#R> [2,] 3 3 3 4 5
#R> [3,] 3 3 3 4 4
#R> [4,] 4 4 3 3 5
#R> [5,] 2 2 3 3 3
#R> [6,] 3 3 3 3 5
#R> [7,] 3 3 3 3 4
#R> [8,] 1 1 2 3 3
#R> [9,] 4 4 3 3 5
#R> [10,] 3 3 3 3 4
您可以将其包装到一个函数中,以便可以像这样更改N
,标准偏差和断点:
sim_scores <- function(N,sigs,cuts)
mapply(findInterval,setNames(lapply(sigs,mean = rnorm(N),seq_along(cuts))),cuts)
# use the function
sim_scores(10L,sig_arg,] 3 3 3 2 3
#R> [2,] 3 3 3 3 5
#R> [3,] 3 3 3 3 4
#R> [4,] 5 4 4 4 5
#R> [5,] 3 3 3 3 3
#R> [6,] 3 4 3 3 5
#R> [8,] 2 2 3 3 3
#R> [9,] 3 3 3 3 4
#R> [10,] 2 2 3 3 2
sim_scores(4L,sig_arg[1:2],v_arg[1:2])
#R> item1 item2
#R> [1,] 2 2
#R> [2,] 3 3
#R> [3,] 2 3
#R> [4,] 3 3
数据
sig_arg <- c(.2,.3,.5,1.5)
v_arg <- list(c(-Inf,Inf))