使用 nlsLM 拟合呈现肘部/膝盖弯曲的数据集,并“强制”系数接近阈值

问题描述

由于需要使用 sestak berggren 模型(源自逻辑模型)拟合与二维扩散过程 D2 过程相关的数据集,我需要了解如何使用 nlsLM 当存在肘部/膝盖时,因为以下“简单方法不起作用”

x=c(1.000000e-05,1.070144e-05,1.208082e-05,1.456624e-05,1.861581e-05,2.490437e-05,3.407681e-05,4.696710e-05,6.474653e-05,8.870800e-05,1.206194e-04,1.624442e-04,2.172716e-04,2.882747e-04,3.794489e-04,4.956619e-04,6.427156e-04,8.275095e-04,1.058201e-03,1.344372e-03,1.697222e-03,2.129762e-03,2.657035e-03,3.296215e-03,4.067301e-03,4.992831e-03,6.098367e-03,7.412836e-03,8.968747e-03,1.080251e-02,1.295471e-02,1.547045e-02,1.839960e-02,2.179713e-02,2.572334e-02,3.024414e-02,3.543131e-02,4.136262e-02,4.812205e-02,5.579985e-02,6.449256e-02,7.430297e-02,8.533991e-02,9.771803e-02,1.115573e-01,1.269824e-01,1.441219e-01,1.631074e-01,1.840718e-01,2.071477e-01,2.324656e-01,2.601509e-01,2.903210e-01,3.230812e-01,3.585200e-01,3.967033e-01,4.376671e-01,4.814084e-01,5.278744e-01,5.769469e-01,6.284244e-01,6.819947e-01,7.371982e-01,7.933704e-01,8.495444e-01,9.042616e-01)

ynorm=c(
 1.000000e+00,8.350558e-01,6.531870e-01,4.910995e-01,3.581158e-01,2.553070e-01,1.814526e-01,1.290639e-01,9.219591e-02,6.623776e-02,4.817180e-02,3.543117e-02,2.624901e-02,1.961542e-02,1.478284e-02,1.123060e-02,8.597996e-03,6.631400e-03,5.151026e-03,4.028428e-03,3.171096e-03,2.511600e-03,2.001394e-03,1.604211e-03,1.292900e-03,1.047529e-03,8.530624e-04,6.981015e-04,5.739778e-04,4.740553e-04,3.932255e-04,3.275345e-04,2.739059e-04,2.299339e-04,1.937278e-04,1.637946e-04,1.389500e-04,1.182504e-04,1.009406e-04,8.641380e-05,7.418032e-05,6.384353e-05,5.508090e-05,4.762920e-05,4.127282e-05,3.583451e-05,3.116813e-05,2.715264e-05,2.368759e-05,2.068935e-05,1.808802e-05,1.582499e-05,1.385102e-05,1.212452e-05,1.061032e-05,9.278534e-06,8.103650e-06,7.063789e-06,6.140038e-06,5.315870e-06,4.576585e-06,3.908678e-06,3.298963e-06,2.732866e-06,2.189810e-06,1.614149e-06)


dfxy <-  data.frame(x[1:length(ynorm)],ynorm)
fn=funSel <-"co*((1-x)^m)*(x^n)"
mod_fit <- nlsLM(ynorm~eval(parse(text=fn)),start=c(co=0.5,m=-1,n=0.5),data=dfxy)
plot(dfxy$x,dfxy$y,xlim=c(0,0.001))
plot(dfxy$x,(fitted(mod_fit))[1:length(dfxy$x)],0.001))

我找到的唯一解决方案是基于 https://stackoverflow.com/a/54286595/6483091。因此,首先找到“肘部”,然后仅将回归应用于缩减的数据集。以这种方式一切都有效,但我想知道是否还有其他解决方案(调整回归参数而不是分两步进行,以某种方式让 nlsLM 使用动态一阶导数阈值“识别”曲线,但仍然强制fn 用于回归) “最大的问题是我已经知道参数的“范围””(即 使用“好”起点(接近“基本事实”同范数的系数

“基本事实”

 plot(x,yrnom) yt <- 0.973*(1-x)^(0.425)*x^(-1.008)
 lines(x,yt/max(yt))

解决方法

这是使用 nls 和双曲线拟合的解决方案:

x=c(1.000000e-05,1.070144e-05,1.208082e-05,1.456624e-05,1.861581e-05,2.490437e-05,3.407681e-05,4.696710e-05,6.474653e-05,8.870800e-05,1.206194e-04,1.624442e-04,2.172716e-04,2.882747e-04,3.794489e-04,4.956619e-04,6.427156e-04,8.275095e-04,1.058201e-03,1.344372e-03,1.697222e-03,2.129762e-03,2.657035e-03,3.296215e-03,4.067301e-03,4.992831e-03,6.098367e-03,7.412836e-03,8.968747e-03,1.080251e-02,1.295471e-02,1.547045e-02,1.839960e-02,2.179713e-02,2.572334e-02,3.024414e-02,3.543131e-02,4.136262e-02,4.812205e-02,5.579985e-02,6.449256e-02,7.430297e-02,8.533991e-02,9.771803e-02,1.115573e-01,1.269824e-01,1.441219e-01,1.631074e-01,1.840718e-01,2.071477e-01,2.324656e-01,2.601509e-01,2.903210e-01,3.230812e-01,3.585200e-01,3.967033e-01,4.376671e-01,4.814084e-01,5.278744e-01,5.769469e-01,6.284244e-01,6.819947e-01,7.371982e-01,7.933704e-01,8.495444e-01,9.042616e-01)

ynorm=c(
  1.000000e+00,8.350558e-01,6.531870e-01,4.910995e-01,3.581158e-01,2.553070e-01,1.814526e-01,1.290639e-01,9.219591e-02,6.623776e-02,4.817180e-02,3.543117e-02,2.624901e-02,1.961542e-02,1.478284e-02,1.123060e-02,8.597996e-03,6.631400e-03,5.151026e-03,4.028428e-03,3.171096e-03,2.511600e-03,2.001394e-03,1.604211e-03,1.292900e-03,1.047529e-03,8.530624e-04,6.981015e-04,5.739778e-04,4.740553e-04,3.932255e-04,3.275345e-04,2.739059e-04,2.299339e-04,1.937278e-04,1.637946e-04,1.389500e-04,1.182504e-04,1.009406e-04,8.641380e-05,7.418032e-05,6.384353e-05,5.508090e-05,4.762920e-05,4.127282e-05,3.583451e-05,3.116813e-05,2.715264e-05,2.368759e-05,2.068935e-05,1.808802e-05,1.582499e-05,1.385102e-05,1.212452e-05,1.061032e-05,9.278534e-06,8.103650e-06,7.063789e-06,6.140038e-06,5.315870e-06,4.576585e-06,3.908678e-06,3.298963e-06,2.732866e-06,2.189810e-06,1.614149e-06)


dfxy <-  data.frame(x[1:length(ynorm)],ynorm)
plot(ynorm ~ x.1.length.ynorm..,data = dfxy)
mod <- nls(ynorm ~ a/x.1.length.ynorm.. + b,data = dfxy,start = list(a = 1,b = 0))
lines(x = dfxy$x.1.length.ynorm..,y = predict(mod,newdata = dfxy$x.1.length.ynorm..))

不过,合身并不完美。我想没有连续函数来拟合直角...

根据您想将回归用于什么目的,您也可以使用 loess 回归:

dfxy <-  data.frame(x[1:length(ynorm)],ynorm)
names(dfxy) <- c("x","y")
plot(y ~ x,data = dfxy)
mod <- loess(y ~ x,span = 0.1)
lines(x = dfxy$x,newdata = dfxy$x),col = "red")

结果:

loess regression