问题描述
我使用一个栅格文件,该文件具有2015年草甘膦对大豆的全球施用量(以千克/公顷为单位)。我想计算每个国家的平均施用量,并得出以千克为单位的总施用量。
我尝试提取数据,但是当我检查数据时,它们并没有加起来,因此我需要一些错误信息。
这是我的代码:
## Load libraries and read data ----
library(raster)
library(tidyverse)
library(maptools) # To get the borders of countries
# Border of countries
data(wrld_simpl)
# Make a data frame to put my numbers on the corresponding countries
world_data_empty = data.frame(country = wrld_simpl$NAME,iso3c = wrld_simpl$ISO3)
# This is a file with the application rate of glyphosate to soybeans in 2015 in kg/ha
raster_path = "https://raw.github.com/hansronald/pesticide-data/master/APR_Soybean_Glyphosate_2015_L.tif"
sqm_to_ha_conversion = 0.0001
rast <- raster(raster_path)
## Extract the raster data ----
# Remove all the negative values of the raster
# This is because negative values correspond to things like water and I dont want them to count when adding,replace with NAs
# Then trim to remove NAs
rast_positive = rast
rast_positive <- clamp(rast,lower=0,useValues=FALSE)
rast_trimmed = trim(rast_positive)
# Multiply the kg/ha with the area of each cell to get the total kgs
rast_total_pesticide_application = rast_trimmed * area(rast_trimmed)
# Get the mean applcation rate (kg/ha) for each country
mean_application_rate_extract = raster::extract(rast_trimmed,wrld_simpl,fun = mean,na.rm = TRUE) # sp = T for keeping original dataframe
# Get the total applcation rate (kg/ha) for each country
total_application_extract = raster::extract(rast_total_pesticide_application,fun = sum,na.rm = TRUE)
# Get the total area
total_area_extract = raster::extract(area(rast_trimmed),na.rm = TRUE)
## Create the data frame ----
# Put the mean pesticide use per country in a dataframe and name the column after the pesticide
# NaNs created because all raster cells are NA in country polygon,replace with 0
mean_application_rate_extract[is.nan(mean_application_rate_extract)] = 0
mean_application_rate = data.frame(mean_application_rate = mean_application_rate_extract)
total_application = data.frame(total_application = total_application_extract)
total_area = data.frame(total_area = total_area_extract)
world_data = data.frame(world_data_empty,mean_application_rate,total_application,total_area) %>%
as_tibble()
# Check calculations by selecting a few countries and multiplying apr*area
world_data %>%
filter(iso3c %in% c("CHN","BRA","USA")) %>%
mutate(total_application_calc = mean_application_rate * total_area)
这是输出
country iso3c mean_application_rate total_application total_area total_application_calc
<fct> <fct> <dbl> <dbl> <dbl> <dbl>
1 Brazil BRA 2.00 4253187. 84469. 168653.
2 China CHN 2.09 9153007. 93254. 194736.
3 United States USA 1.93 5070446. 93889. 181164.
所以这里有一些问题。首先是total_application_calc应该等于总施用量(因为它是施用量(kg / ha)乘以总面积(ha)。
但是问题还在于整个应用程序似乎至少具有一个数量级。根据{{3}},2014年草甘膦在大豆上的总施用量为122,473,987磅,相当于55,553,266千克,而我从该数据集中获得的5,070,446千克。可以略有不同,因为它们是来自不同假设的不同来源,但是没有那么多。
有人可以在我做错事情的地方帮忙吗?
解决方法
我稍微简化了您的代码,现在我觉得数字更有意义。我不能说外部有效性问题。
library(raster)
library(maptools)
data(wrld_simpl)
r <- raster("https://raw.github.com/hansronald/Pesticide-data/master/APR_Soybean_Glyphosate_2015_L.tif")
r <- clamp(r,lower=0,useValues=FALSE)
# area in ha
a <- area(r) * 100
mean_app <- raster::extract(r,wrld_simpl,fun = mean,na.rm = TRUE)
rtot <- r * a
tot_app <- raster::extract(rtot,fun = sum,na.rm = TRUE)
我认为您在这里犯了一个错误。您需要使用非仅NA的单元格。
rarea <- mask(a,r)
tot_area <- raster::extract(rarea,na.rm = TRUE)
## not
## tot_area <- raster::extract(area(r),na.rm = TRUE)
检查结果
w <- data.frame(country = wrld_simpl$NAME,iso3c = wrld_simpl$ISO3,mean_app=mean_app,tot_app=tot_app,tot_area=tot_area)
w$tot_calc <- w$tot_area * w$mean_app
w[is.na(w)] <- 0
w[w$iso3c %in% c("CHN","BRA","USA"),]
# country iso3c mean_app tot_app tot_area tot_calc
#21 Brazil BRA 1.996631 425318662 213982181 427243427
#30 China CHN 2.088219 915300667 439036703 916804725
#209 United States USA 1.929559 507044556 263544939 508525402
global_app <- cellStats(rtot,"sum")
global_app
# 2375398749
sum(w$tot_app)
# 2367120358