问题描述
我想创建一个汇总统计表,该表按区域报告每个变量的平均值/中位数,并在行中比较样本中的美国出生人口和移民之间以及整体之间的统计数据。我不知道可以用多种方式对表进行分组的代码。
这是我到目前为止提出的代码:
#the data frame
structure(list(AGE = c(40L,23L,24L,18L,30L,33L,32L,63L,22L,24L),FAMSIZE = c(2L,2L,3L,6L,1L,1L
),HYPERTEN = c(0,1,0),ALC = c(0,2,3,2),region_group = c("Region 4","Region 3","Region 4","Region 1","Region 2","Region 4"),PSU = c(2L,2L),IMMIGRANT = c(0,1),SAMPWEIGHT_MERGE = c(65,860.4,94.4,9146,170.8,310.4,755.2,1053.4,3964.4,706.2),STRATA = c(6296L,6165L,6296L,6224L,6045L,6083L,6029L,6073L,6287L,6247L
)),row.names = c(NA,10L),class = "data.frame")
#weighting data frame so accounts for sample design
sample_survey<- as_survey_design(A,ids=PSU,weights=SAMPWEIGHT_MERGE,strata=STRATA,nest=TRUE)
options(survey.lonely.psu="remove")
#producing desired table
out1<-sample_survey %>%
group_by(region_group) %>%
summarise("Number of drinks (mean)"=survey_mean(ALC),"Number of drinks (median)"=survey_median(ALC),"Hypertension"=survey_mean(HYPERTEN),"Family Size"=survey_mean(FAMSIZE),"Age"=survey_median(AGE))
out1=t(out1)
out1
#But here is what I hope the table can look like,such that the mean/median amongst all individuals,immigrant=0 and the immigrant=1 group are all displayed for each variable
[,1] [,2] [,3] [,4]
region_group "Region 1" "Region 2" "Region 3" "Region 4"
Number of drinks (all) "1.663778" "2.131566" "1.744107" "2.009594"
IMMIGRANT==0
IMMIGRANT==1
Number of drinks (mean)_se "0.1375124" "0.1245772" "0.0957500" "0.1199982"
Number of drinks (all) "1" "2" "1" "2"
IMMIGRANT==0
IMMIGRANT==1
Number of drinks (median)_se "0.0000000" "0.2531528" "0.0000000" "0.2533324"
Hypertension (all) "0.1340147" "0.1685102" "0.1834528" "0.1225418"
IMMIGRANT==0
IMMIGRANT==1
Hypertension_se "0.01623974" "0.01529678" "0.01463019" "0.01475651"
Family \n (all) Size "3.121062" "2.883905" "3.107202" "3.265012"
IMMIGRANT==0
IMMIGRANT==1
Family \n Size_se "0.11668906" "0.07435704" "0.08004129" "0.11138869"
Age (all) "30" "27" "30" "28"
IMMIGRANT==0
IMMIGRANT==1
Age_se "1.3615690" "1.0126110" "0.7616152" "0.7599972"
谢谢!
解决方法
您可以使用tables
package:
library(tables)
tables::tabular((ALC+HYPERTEN)*(IMMIGRANT=factor(IMMIGRANT)+1)*(weighted.mean+median)*Arguments(w = SAMPWEIGHT_MERGE)~(region=factor(region_group)),data=data)
region
IMMIGRANT Region 1 Region 2 Region 3
ALC 0 weighted.mean 2.816 0.0000 0.172
median 2.500 0.0000 1.000
1 weighted.mean NaN 0.0000 NaN
median NA 0.0000 NA
All weighted.mean 2.816 0.0000 0.172
median 2.500 0.0000 1.000
HYPERTEN 0 weighted.mean 0.000 1.0000 0.000
median 0.000 1.0000 0.000
1 weighted.mean NaN 0.0000 NaN
median NA 0.0000 NA
All weighted.mean 0.000 0.7724 0.000
median 0.000 0.5000 0.000