使用R中的函数monthglm将带有月份分类变量的广义线性模型glm拟合

问题描述

问题

我有一个数据框(请参见下文),我想使用一个基于季节和年份的协变量的函数monthglm()来将一个具有月分类变量的通用线性模型(glm)拟合。

运行以下由Barnett,A.G.,Dobson,A.J. (2010) Analysing Seasonal Health Data. Springer.编写的函数(见下文)后,我继续收到此错误消息。

如果有人可以提供帮助,我将非常感激。

加载程序包

library(season)
library(MASS) # for mvrnorm
library(survival) # for coxph
library(ggplot2)

功能

monthglm<-function(formula,data,family=gaussian(),refmonth=1,monthvar='month',offsetmonth=FALSE,offsetpop=NULL){
  ## checks
  if (refmonth<1|refmonth>12){stop("Reference month must be between 1 and 12")}
  ## original call with defaults (see amer package)
  ans <- as.list(match.call())
  frmls <- formals(deparse(ans[[1]]))
  add <- which(!(names(frmls) %in% names(ans)))
  call<-as.call(c(ans,frmls[add]))
  
  monthvar=with(data,get(monthvar))
  cmonthvar=class(monthvar)
  ## If month is a character,create the numbers
  if(cmonthvar%in%c('factor','character')){
    if(cmonthvar=='character'){
      if(max(nchar(monthvar))==3){mlevels=substr(month.name,1,3)}else{mlevels=month.name}
      monthvar=factor(monthvar,levels=mlevels)
    }
    months=as.numeric(monthvar)
    data$month=months # add to data for flagleap
    months=as.factor(months)
    levels(months)[months]<-month.abb[months]
    months<-relevel(months.u,ref=month.abb[refmonth]) # set reference month
  }
  ## Transform month numbers to names
  if(cmonthvar%in%c('integer','numeric')){
    months.u<-as.factor(monthvar)  
    nums<-as.numeric(nochars(levels(months.u))) # Month numbers
    levels(months.u)[nums]<-month.abb[nums]
    months<-relevel(months.u,ref=month.abb[refmonth]) # set reference month
  }
  ## prepare data/formula
  parts<-paste(formula)
  f<-as.formula(paste(parts[2],parts[1],parts[3:length(formula)],'+months'))
  dep<-parts[2] # dependent variable
  days<-flagleap(data=data,report=FALSE,matchin=T) # get the number of days in each month
  l<-nrow(data)
  if(is.null(offsetpop)==FALSE){poff=with(data,eval(offsetpop))} else{poff=rep(1,l)} # 
  if(offsetmonth==TRUE){moff=days$ndaysmonth/(365.25/12)} else{moff=rep(1,l)} # days per month divided by average month length
  ###  data$off<-log(poff*moff)
  off<-log(poff*moff)  # 
  fit<-glm(formula=f,data=data,family=family,offset=off)
  ## return
  toret<-list()
  toret$call<-call
  toret$glm<-fit
  toret$fitted.values<-fitted(fit)
  toret$residuals<-residuals(fit)
  class(toret)<-'monthglm'
  return(toret)
}

#The levels of a factor must match the observed values. 
#If you want to change how those values print out,you need to change the labels. 

错误消息

model<-monthglm(formula=Frequency_Blue~Year+Monsoon_Season,family=gaussian,+                       offsetmonth=TRUE,data=Final_New_Blue)
Error in nochars(levels(months.u)) : Could not find function "nochars"

数据框

   structure(list(Year = c(2015L,2016L,2017L,2015L,2017L),Month = structure(c(5L,5L,4L,8L,1L,9L,7L,6L,2L,12L,11L,10L,3L,3L),.Label = c("April","August","December","Feb","Jan","July","June","Mar","May","November","October","September"
),class = "factor"),Frequency_Blue_Whales_Year_Month = c(76L,78L,66L,28L,54L,37L,39L,31L,88L,46L,23L,0L,22L,44L,30L,35L,41L,43L,65L,90L),Season = structure(c(4L,5L),.Label = c("Autumn","Spring","Summer","winter","Winter"),class = "factor")),class = "data.frame",row.names = c(NA,-36L))

参数

enter image description here

解决方法

通过以下简单的代码更改和函数调用,我就能使模型运行。我将其命名为monthglm2,以区别于包功能。调用数据df

library(season)
library(MASS) # for mvrnorm
library(survival) # for coxph
library(ggplot2)


monthglm2<-function(formula,data,family=gaussian(),refmonth=1,monthvar='month',offsetmonth=FALSE,offsetpop=NULL){
  ## checks
  if (refmonth<1|refmonth>12){stop("Reference month must be between 1 and 12")}
  ## original call with defaults (see amer package)
  ans <- as.list(match.call())
  frmls <- formals(deparse(ans[[1]]))
  add <- which(!(names(frmls) %in% names(ans)))
  call<-as.call(c(ans,frmls[add]))

  monthvar=with(data,get(monthvar))
  cmonthvar=class(monthvar)
  ## If month is a character,create the numbers
  if(cmonthvar%in%c('factor','character')){
    if(cmonthvar=='character'){
      if(max(nchar(monthvar))==3){mlevels=substr(month.name,1,3)}else{mlevels=month.name}
      monthvar=factor(monthvar,levels=mlevels)
    }
    months=as.numeric(monthvar)
    data$month=months # add to data for flagleap
    months=as.factor(months)
    levels(months)[months]<-month.abb[months]
    months<-relevel(months,ref=month.abb[refmonth]) # set reference month ### TYPO HERE,changed from months.u
  }
  ## Transform month numbers to names
  if(cmonthvar%in%c('integer','numeric')){
    months.u<-as.factor(monthvar)
    nums<-as.numeric(nochars(levels(months.u))) # Month numbers
    levels(months.u)[nums]<-month.abb[nums]
    months<-relevel(months.u,ref=month.abb[refmonth]) # set reference month
  }
  ## prepare data/formula
  parts<-paste(formula)
  f<-as.formula(paste(parts[2],parts[1],parts[3:length(formula)],'+months'))
  dep<-parts[2] # dependent variable
  days<-flagleap(data=data,report=FALSE,matchin=T) # get the number of days in each month
  l<-nrow(data)
  if(is.null(offsetpop)==FALSE){poff=with(data,eval(offsetpop))} else{poff=rep(1,l)} #
  if(offsetmonth==TRUE){moff=days$ndaysmonth/(365.25/12)} else{moff=rep(1,l)} # days per month divided by average month length
  ###  data$off<-log(poff*moff)
  off<-log(poff*moff)  #
  fit<-glm(formula=f,data=data,family=family,offset=off)
  ## return
  toret<-list()
  toret$call<-call
  toret$glm<-fit
  toret$fitted.values<-fitted(fit)
  toret$residuals<-residuals(fit)
  class(toret)<-'monthglm'
  return(toret)
}


df$year <- df$Year
monthglm2(formula=Frequency_Blue_Whales_Year_Month~Year+Season,offsetmonth=TRUE,monthvar='Month',data=df)

函数中存在另一个问题,我必须将列重命名为year。如果您查看此软件包的github,则只有一名贡献者,而没有提出任何问题。使用诸如此类的程序包是有利有弊的:它们可能具有有用的新颖方法,但是无法快速识别并解决错误。如果您继续进行季节性分析,建议您尝试学习如何在glm中直接添加季节性模型