在R中循环一组生成数字的命令 更新数据数据

问题描述

下面,我展示了一段代码,用于在R生成一组项目得分。但是,似乎有很多不必要的重复才能到达最后的data

我想知道是否有更紧凑的方法可以在data中实现相同的R

set.seed(8649)     
N      = 10        
latent = rnorm(N)  

##### generate latent responses to items
item1 = latent + rnorm(N,mean=0,sd=0.2)  
item2 = latent + rnorm(N,sd=0.3)
item3 = latent + rnorm(N,sd=0.5)
item4 = latent + rnorm(N,sd=1.0)
item5 = latent + rnorm(N,sd=1.2)  

##### convert latent responses to ordered categories
item1 = findInterval(item1,vec=c(-Inf,-2.5,-1,1,2.5,Inf)) 
item2 = findInterval(item2,Inf))
item3 = findInterval(item3,-3,-2,2,3,Inf))  
item4 = findInterval(item4,Inf))
item5 = findInterval(item5,-3.5,0.5,Inf))

data = cbind(item1,item2,item3,item4,item5)

解决方法

我们可以在list中创建第一组'item',变量部分为'sd'

# // loop over the sd vector and create the list of random numbers in a list
lst1 <- lapply(c(0.2,0.3,0.5,1,1.2),function(x) 
           latent + rnorm(N,mean = 0,sd = x))
# // set the names of the list if needed
names(lst1) <- paste0("item",seq_along(lst1))

使用Map将'lst1'和vec的相应元素作为list('veclst')循环并应用findInterval

data.frame(Map(findInterval,lst1,vec = veclst))
#   item1 item2 item3 item4 item5
#1      4     4     3     3     5
#2      3     3     3     3     5
#3      3     3     3     3     5
#4      4     4     4     3     5
#5      2     2     3     2     4
#6      3     3     3     3     3
#7      3     3     3     3     4
#8      2     2     2     2     2
#9      4     4     5     4     5
#10     3     3     3     3     4

或者用tidyverse

library(purrr)
library(dplyr)
library(stringr)
map2_dfc(c(0.2,veclst,~ 
          findInterval(latent + rnorm(N,sd = .x),vec = .y)) %>%
  set_names(str_c('item',seq_along(.)))

-输出

# A tibble: 10 x 5
#  item1 item2 item3 item4 item5
#   <int> <int> <int> <int> <int>
# 1     4     4     3     4     4
# 2     3     3     3     3     5
# 3     3     3     3     3     4
# 4     3     4     3     3     5
# 5     2     2     3     3     4
# 6     3     3     3     3     5
# 7     3     3     3     3     4
# 8     2     2     2     1     2
# 9     4     4     4     5     5
#10     3     3     3     3     4

更新

如果我们要创建一个函数,请确保latent是基于函数内传递的新'N'创建的,因为它可能导致长度上的差异。在OP'帖子中显示的原始代码中,length是10,而latent是基于该代码创建的

make_likert <- function(N.judge = 10,item.sds,cut_points,seed = NULL){
   set.seed(seed)
   latent <- rnorm(N.judge)
   lst1 <- lapply(item.sds,function(x) latent + rnorm(n = N.judge,sd = x))
      names(lst1) <- paste0("item",seq_along(lst1))
      data.frame(Map(findInterval,cut_points))
     }
      
make_likert(N.judge=13,item.sds = item.sds,cut_points = cut_points)
#     item1 item2 item3 item4 item5 item6 item7 item8 item9 item10
#1      4     3     2     3     1     3     3     5     4      4
#2      5     3     3     3     5     3     3     5     5      5
#3      2     2     3     3     3     3     3     4     3      5
#4      3     5     3     3     3     3     4     5     4      3
#5      3     1     4     3     4     3     1     2     4      4
#6      2     1     3     3     1     3     3     5     5      4
#7      3     2     3     3     3     3     5     1     5      3
#8      3     1     2     3     5     3     3     5     3      5
#9      4     3     2     3     3     4     3     1     5      5
#10     3     2     3     3     1     3     4     3     3      2
#11     4     5     1     3     5     3     1     3     5      5
#12     3     5     3     3     3     3     5     3     5      5
#13     3     4     5     3     3     3     1     1     5      3

数据

veclst <- rep(list(c(-Inf,-2.5,-1,2.5,Inf),c(-Inf,-3,-2,2,3,-3.5,Inf)),c(2,1))
,

您可以使用mapply。例如。像这样:

mapply(findInterval,vec = v_arg,x = lapply(sig_arg,rnorm,mean = latent,n = N)) 
#R>       [,1] [,2] [,3] [,4] [,5]
#R>  [1,]    4    4    3    3    3
#R>  [2,]    3    3    3    4    5
#R>  [3,]    3    3    3    4    4
#R>  [4,]    4    4    3    3    5
#R>  [5,]    2    2    3    3    3
#R>  [6,]    3    3    3    3    5
#R>  [7,]    3    3    3    3    4
#R>  [8,]    1    1    2    3    3
#R>  [9,]    4    4    3    3    5
#R> [10,]    3    3    3    3    4

如果要使用列名,请使用例如:

mapply(findInterval,setNames(lapply(sig_arg,n = N),paste0("item",seq_along(sig_arg))),v_arg)
#R>       item1 item2 item3 item4 item5
#R>  [1,]     4     4     3     3     3
#R>  [2,]     3     3     3     4     5
#R>  [3,]     3     3     3     4     4
#R>  [4,]     4     4     3     3     5
#R>  [5,]     2     2     3     3     3
#R>  [6,]     3     3     3     3     5
#R>  [7,]     3     3     3     3     4
#R>  [8,]     1     1     2     3     3
#R>  [9,]     4     4     3     3     5
#R> [10,]     3     3     3     3     4

您可以将其包装到一个函数中,以便可以像这样更改N,标准偏差和断点:

sim_scores <- function(N,sigs,cuts)
  mapply(findInterval,setNames(lapply(sigs,mean = rnorm(N),seq_along(cuts))),cuts)

# use the function
sim_scores(10L,sig_arg,]     3     3     3     2     3
#R>  [2,]     3     3     3     3     5
#R>  [3,]     3     3     3     3     4
#R>  [4,]     5     4     4     4     5
#R>  [5,]     3     3     3     3     3
#R>  [6,]     3     4     3     3     5
#R>  [8,]     2     2     3     3     3
#R>  [9,]     3     3     3     3     4
#R> [10,]     2     2     3     3     2

sim_scores(4L,sig_arg[1:2],v_arg[1:2])
#R>      item1 item2
#R> [1,]     2     2
#R> [2,]     3     3
#R> [3,]     2     3
#R> [4,]     3     3

数据

sig_arg <- c(.2,.3,.5,1.5)
v_arg <- list(c(-Inf,Inf))