问题描述
这些是文件:
[1] "MYD11C3.A2003001.006.2015182092934_LST_Day_CMG_subregion.tif"
[2] "MYD11C3.A2004001.006.2015213013933_LST_Day_CMG_subregion.tif"
[3] "MYD11C3.A2005001.006.2015243211529_LST_Day_CMG_subregion.tif"
[4] "MYD11C3.A2006001.006.2015274114332_LST_Day_CMG_subregion.tif"
[5] "MYD11C3.A2007001.006.2015309201228_LST_Day_CMG_subregion.tif"
[6] "MYD11C3.A2008001.006.2015338170025_LST_Day_CMG_subregion.tif"
[7] "MYD11C3.A2009001.006.2016001145426_LST_Day_CMG_subregion.tif"
[8] "MYD11C3.A2010001.006.2016035025512_LST_Day_CMG_subregion.tif"
[9] "MYD11C3.A2011001.006.2016053231728_LST_Day_CMG_subregion.tif"
[10] "MYD11C3.A2012001.006.2016106151313_LST_Day_CMG_subregion.tif"
[11] "MYD11C3.A2013001.006.2016189231222_LST_Day_CMG_subregion.tif"
[12] "MYD11C3.A2014001.006.2016198015925_LST_Day_CMG_subregion.tif"
[13] "MYD11C3.A2015001.006.2016223172712_LST_Day_CMG_subregion.tif"
[14] "MYD11C3.A2016001.006.2016242200237_LST_Day_CMG_subregion.tif"
[15] "MYD11C3.A2017001.006.2017032230414_LST_Day_CMG_subregion.tif"
[16] "MYD11C3.A2018001.006.2018032175447_LST_Day_CMG_subregion.tif"
[17] "MYD11C3.A2019001.006.2019035162351_LST_Day_CMG_subregion.tif"
模式“ A2003” ...“ A2019”应重命名为“ A2002” ...“ A2018”。
谢谢!
解决方法
您可以使用str_replace()
中的stringr
,该函数可以将函数传递给替换对象。每次匹配都会调用一次,返回值将用于替换匹配。
stringr::str_replace(vec,"(?<=A)\\d{4}",function(x) as.numeric(x) - 1)
# [1] "MYD11C3.A2002001.006.2015182092934_LST_Day_CMG_subregion.tif"
# [2] "MYD11C3.A2003001.006.2015213013933_LST_Day_CMG_subregion.tif"
# [3] "MYD11C3.A2004001.006.2015243211529_LST_Day_CMG_subregion.tif"
数据
vec = c("MYD11C3.A2003001.006.2015182092934_LST_Day_CMG_subregion.tif","MYD11C3.A2004001.006.2015213013933_LST_Day_CMG_subregion.tif","MYD11C3.A2005001.006.2015243211529_LST_Day_CMG_subregion.tif")
,
我建议这种tidyverse
方法将字符串分成几列,格式化所需的值,然后再重新进行联接。这里的代码:
library(tidyverse)
#Code
df %>%
#Separate by period
separate(V1,into = c(paste0('V',1:5)),sep = '\\.') %>%
#Remove text
mutate(V2=gsub('A|001','',V2)) %>%
#Format year
mutate(V2=paste0('A',as.numeric(V2)-1,'001')) %>%
rowwise() %>%
#Collapse all
mutate(V=paste(V1,V2,V3,V4,V5,sep = '.')) %>% select(V)
输出:
# A tibble: 17 x 1
# Rowwise:
V
<chr>
1 MYD11C3.A2002001.006.2015182092934_LST_Day_CMG_subregion.tif
2 MYD11C3.A2003001.006.2015213013933_LST_Day_CMG_subregion.tif
3 MYD11C3.A2004001.006.2015243211529_LST_Day_CMG_subregion.tif
4 MYD11C3.A2005001.006.2015274114332_LST_Day_CMG_subregion.tif
5 MYD11C3.A2006001.006.2015309201228_LST_Day_CMG_subregion.tif
6 MYD11C3.A2007001.006.2015338170025_LST_Day_CMG_subregion.tif
7 MYD11C3.A2008001.006.2016001145426_LST_Day_CMG_subregion.tif
8 MYD11C3.A2009001.006.2016035025512_LST_Day_CMG_subregion.tif
9 MYD11C3.A2010001.006.2016053231728_LST_Day_CMG_subregion.tif
10 MYD11C3.A2011001.006.2016106151313_LST_Day_CMG_subregion.tif
11 MYD11C3.A2012001.006.2016189231222_LST_Day_CMG_subregion.tif
12 MYD11C3.A2013001.006.2016198015925_LST_Day_CMG_subregion.tif
13 MYD11C3.A2014001.006.2016223172712_LST_Day_CMG_subregion.tif
14 MYD11C3.A2015001.006.2016242200237_LST_Day_CMG_subregion.tif
15 MYD11C3.A2016001.006.2017032230414_LST_Day_CMG_subregion.tif
16 MYD11C3.A2017001.006.2018032175447_LST_Day_CMG_subregion.tif
17 MYD11C3.A2018001.006.2019035162351_LST_Day_CMG_subregion.tif
使用了一些数据:
#Data
df <- structure(list(V1 = c("MYD11C3.A2003001.006.2015182092934_LST_Day_CMG_subregion.tif","MYD11C3.A2005001.006.2015243211529_LST_Day_CMG_subregion.tif","MYD11C3.A2006001.006.2015274114332_LST_Day_CMG_subregion.tif","MYD11C3.A2007001.006.2015309201228_LST_Day_CMG_subregion.tif","MYD11C3.A2008001.006.2015338170025_LST_Day_CMG_subregion.tif","MYD11C3.A2009001.006.2016001145426_LST_Day_CMG_subregion.tif","MYD11C3.A2010001.006.2016035025512_LST_Day_CMG_subregion.tif","MYD11C3.A2011001.006.2016053231728_LST_Day_CMG_subregion.tif","MYD11C3.A2012001.006.2016106151313_LST_Day_CMG_subregion.tif","MYD11C3.A2013001.006.2016189231222_LST_Day_CMG_subregion.tif","MYD11C3.A2014001.006.2016198015925_LST_Day_CMG_subregion.tif","MYD11C3.A2015001.006.2016223172712_LST_Day_CMG_subregion.tif","MYD11C3.A2016001.006.2016242200237_LST_Day_CMG_subregion.tif","MYD11C3.A2017001.006.2017032230414_LST_Day_CMG_subregion.tif","MYD11C3.A2018001.006.2018032175447_LST_Day_CMG_subregion.tif","MYD11C3.A2019001.006.2019035162351_LST_Day_CMG_subregion.tif"
)),class = "data.frame",row.names = c(NA,-17L))
,
欢迎来到regex
的世界。
df <- structure(list(V1 = c("MYD11C3.A2003001.006.2015182092934_LST_Day_CMG_subregion.tif",-17L))
df %>%
as_tibble() %>%
mutate(current_year = str_extract(V1,pattern = "(?<=\\.A)\\d{4}") %>% as.numeric() - 1,new_file_name = str_replace(V1,pattern = "(?<=\\.A)\\d{4}",replacement = as.character(current_year)))
输出
V1 current_year new_file_name
<chr> <dbl> <chr>
1 MYD11C3.A2003001.006.2015182092934_LST_~ 2002 MYD11C3.A2002001.006.2015182092934_LST_Da~
2 MYD11C3.A2004001.006.2015213013933_LST_~ 2003 MYD11C3.A2003001.006.2015213013933_LST_Da~
3 MYD11C3.A2005001.006.2015243211529_LST_~ 2004 MYD11C3.A2004001.006.2015243211529_LST_Da~
4 MYD11C3.A2006001.006.2015274114332_LST_~ 2005 MYD11C3.A2005001.006.2015274114332_LST_Da~
5 MYD11C3.A2007001.006.2015309201228_LST_~ 2006 MYD11C3.A2006001.006.2015309201228_LST_Da~
6 MYD11C3.A2008001.006.2015338170025_LST_~ 2007 MYD11C3.A2007001.006.2015338170025_LST_Da~
7 MYD11C3.A2009001.006.2016001145426_LST_~ 2008 MYD11C3.A2008001.006.2016001145426_LST_Da~
8 MYD11C3.A2010001.006.2016035025512_LST_~ 2009 MYD11C3.A2009001.006.2016035025512_LST_Da~
9 MYD11C3.A2011001.006.2016053231728_LST_~ 2010 MYD11C3.A2010001.006.2016053231728_LST_Da~
10 MYD11C3.A2012001.006.2016106151313_LST_~ 2011 MYD11C3.A2011001.006.2016106151313_LST_Da~
11 MYD11C3.A2013001.006.2016189231222_LST_~ 2012 MYD11C3.A2012001.006.2016189231222_LST_Da~
12 MYD11C3.A2014001.006.2016198015925_LST_~ 2013 MYD11C3.A2013001.006.2016198015925_LST_Da~
13 MYD11C3.A2015001.006.2016223172712_LST_~ 2014 MYD11C3.A2014001.006.2016223172712_LST_Da~
14 MYD11C3.A2016001.006.2016242200237_LST_~ 2015 MYD11C3.A2015001.006.2016242200237_LST_Da~
15 MYD11C3.A2017001.006.2017032230414_LST_~ 2016 MYD11C3.A2016001.006.2017032230414_LST_Da~
16 MYD11C3.A2018001.006.2018032175447_LST_~ 2017 MYD11C3.A2017001.006.2018032175447_LST_Da~
17 MYD11C3.A2019001.006.2019035162351_LST_~ 2018 MYD11C3.A2018001.006.2019035162351_LST_Da~
,
对于基本R解决方案,可以将sapply()
与sub()
一起使用。这样会提取文件名中".A"
之后的4位数字,然后在减去1
(用paste0()
代替".A"
)的同时替换它们。
sapply(vec,function(x) {
num <- as.integer(sub(".*?\\.A(\\d{4}).*","\\1",x))
sub("(\\.A\\d{4})",paste0(".A",num - 1),x)
},USE.NAMES = FALSE) # USE.NAMES = FALSE just for cleaner output on SO,no functional reason
[1] "MYD11C3.A2002001.006.2015182092934_LST_Day_CMG_subregion.tif"
[2] "MYD11C3.A2003001.006.2015213013933_LST_Day_CMG_subregion.tif"
[3] "MYD11C3.A2004001.006.2015243211529_LST_Day_CMG_subregion.tif"
[4] "MYD11C3.A2005001.006.2015274114332_LST_Day_CMG_subregion.tif"
[5] "MYD11C3.A2006001.006.2015309201228_LST_Day_CMG_subregion.tif"
[6] "MYD11C3.A2007001.006.2015338170025_LST_Day_CMG_subregion.tif"
[7] "MYD11C3.A2008001.006.2016001145426_LST_Day_CMG_subregion.tif"
[8] "MYD11C3.A2009001.006.2016035025512_LST_Day_CMG_subregion.tif"
[9] "MYD11C3.A2010001.006.2016053231728_LST_Day_CMG_subregion.tif"
[10] "MYD11C3.A2011001.006.2016106151313_LST_Day_CMG_subregion.tif"
[11] "MYD11C3.A2012001.006.2016189231222_LST_Day_CMG_subregion.tif"
[12] "MYD11C3.A2013001.006.2016198015925_LST_Day_CMG_subregion.tif"
[13] "MYD11C3.A2014001.006.2016223172712_LST_Day_CMG_subregion.tif"
[14] "MYD11C3.A2015001.006.2016242200237_LST_Day_CMG_subregion.tif"
[15] "MYD11C3.A2016001.006.2017032230414_LST_Day_CMG_subregion.tif"
[16] "MYD11C3.A2017001.006.2018032175447_LST_Day_CMG_subregion.tif"
[17] "MYD11C3.A2018001.006.2019035162351_LST_Day_CMG_subregion.tif"
数据:
vec = c("MYD11C3.A2003001.006.2015182092934_LST_Day_CMG_subregion.tif","MYD11C3.A2019001.006.2019035162351_LST_Day_CMG_subregion.tif")