循环获取 R 中的 ChangePoint 数据

问题描述

这是我的示例数据:

datex <- c(rep("2018-01-18",24),rep("2018-01-19",rep("2018-01-18",24))
hourx <- c(rep(0:23,18))
country <- c(rep("indonesia",48),rep("brunei",rep("filipina",rep("kamboja",rep("laos",rep("malaysia",rep("myanmar",rep("singapura",rep("vietnam",48))
transaction <- c(1080,993,1014,1003,885,934,1068,1128,1188,1335,1218,1297,1186,1123,1139,1170,1207,1223,1145,1178,1300,1282,1209,922,1797,979,997,851,1099,962,1070,1028,1182,1431,1498,1364,1357,1115,1248,1191,1350,1276,1448,1319,1097,1131,1122,964,881,893,986,874,980,978,1127,1308,1198,1173,1090,1134,1175,1230,1125,1298,1138,941,1216,1060,926,831,975,1085,1046,1166,1519,1537,1181,1237,1235,1407,1174,1196,1355,1416,1341,1309,1132,1162,1111,948,920,1000,1077,1020,1287,1224,1263,1406,1262,1089,1169,1272,1270,1146,1280,1105,1275,1291,965,1376,953,1004,889,1084,1073,1042,1326,1599,1522,1229,1163,1091,1353,1305,1426,1362,1478,1201,1184,947,915,1074,1024,918,1041,1231,1217,1096,1307,1271,1202,1240,1113,1179,1302,1215,1106,2232,1036,924,1168,930,1116,1088,1054,1589,1526,1371,1234,1203,1157,1343,1445,1397,1238,1192,1057,1156,966,989,959,1143,912,1066,1110,1314,1107,1118,1158,1164,1385,938,1523,1010,1043,883,1155,1442,1189,1351,1264,1358,105,211,790,297,138,100,106,102,99,434,402,464,710,85,144,405,91,95,101,127,120,94,614,327,280,215,104,103,181,657,1724,22,279,418,135,87,169,174,112,125,1067,942,1065,981,944,1023,1095,1100,1079,1210,1250,1293,1165,1279,1284,1136,1254,921,3866,1019,807,1120,940,1071,1619,1524,1348,1259,1232,1313,1360,1409,1359,1212,1069,897,973,967,1005,1299,1055,1253,1535,1255,1252,1301,1233,1153,1320,945,1772,890,1039,1151,1509,1483,1265,1243,1342,1511,1373,1257,1141,827,1006,856,1076,1205,1183,1316,1228,1101,1092,1244,1147,1124,1159,929,977,850,855,1126,1160,1613,1403,1491,1108,1286,1177)
mydata <- data.frame(datex,hourx,country,transaction)

我的任务是根据我拥有的每个国家/地区获取交易的变更点数据。这是我从“印度尼西亚”国家/地区获取变更点数据的手动脚本

# Manual Script
library(changepoint)
cp_data <- subset(mydata,country == "indonesia")
cp_process <- cpt.meanvar(cp_data$transaction,method = "PELT") 
cp_index <- cpts(cp_process)+1

cp_index
# [1]  8 24 34 36

cp_result <- rbind(cp_data[cp_index[1],],cp_data[cp_index[2],cp_data[cp_index[3],cp_data[cp_index[4],])
cp_result
# datex hourx   country transaction
# 8  2018-01-18     7 indonesia        1128
# 24 2018-01-18    23 indonesia         922
# 34 2018-01-19     9 indonesia        1431
# 36 2018-01-19    11 indonesia        1498

请帮忙,我如何循环从我的数据中获取任何国家/地区的所有变更点数据?。谢谢

解决方法

您可以将您拥有的代码放在一个函数中,并将其应用于每个 country

library(changepoint)
library(dplyr)

filter_change_point <- function(transaction) {
  cp_process <- cpt.meanvar(transaction,method = "PELT") 
  cp_index <- cpts(cp_process)+1
  cp_index
}

mydata %>%
  group_by(country) %>%
  slice(filter_change_point(transaction)) %>%
  ungroup

#   datex      hourx country   transaction
#   <chr>      <int> <chr>           <dbl>
# 1 2018-01-19     9 brunei           1519
# 2 2018-01-19    12 brunei           1181
# 3 2018-01-19     9 filipina         1599
# 4 2018-01-19    11 filipina         1522
# 5 2018-01-18     7 indonesia        1128
# 6 2018-01-18    23 indonesia         922
# 7 2018-01-19     9 indonesia        1431
# 8 2018-01-19    11 indonesia        1498
# 9 2018-01-18     7 kamboja          1231
#10 2018-01-18    23 kamboja           979
# … with 23 more rows