findElement 与 XPath 逐行工作,但在循环中失败

问题描述

我想从这个 website 收集电子邮件我创建了这个循环,当我单独运行每个部分时它可以工作,但当一起运行时它不起作用。

library(RSelenium)


#######################################University College dublin
dep<-"https://people.ucd.ie/search?by=text"
rD <- rsDriver(browser="firefox",port=4545L,verbose=F)
remDr <- rD[["client"]]
remDr$navigate(dep)

mail<-list()

for(i in 2:65){
  if(i==2){
    webElem <- remDr$findElement(using = 'xpath','//*[@id="app"]/div/div/main/div[2]/div[3]/div[2]/div[1]/div[3]/span[2]')  
    webElem$clickElement()
    
  }else{
  w<-  paste0('//*[@id="app"]/div/div/main/div[2]/div[3]/div[2]/div[1]/div[3]/span[',i,"]/button" )
  webElem <- remDr$findElement(using = 'xpath',w)  
  webElem$clickElement()
  }
  
  for(j in 1:25){
    #click each person 25 x page
    ww<-paste0('//*[@id="app"]/div/div/main/div[2]/div[3]/div[2]/div[4]/div[',j,"]/div[1]/div[2]/div[1]/a" )
    webElem <- remDr$findElement(using = 'xpath',ww)  
    webElem$clickElement()
    #click emails
    webElem <- remDr$findElement(using = 'xpath','//*[@id="app"]/div/div/main/div[1]/div[3]/div[3]/div[2]/div[1]/div[2]/span/a')  
    ma<-webElem$getElementText()
    if(length(ma)!=0){mail<-c(mail,ma)}
    webElem$goBack() 
    rm(ma)
  }
 }

解决方法

这里有一个可能的解决方案,虽然非常耗时,有没有更好的选择?

dep<-"https://people.ucd.ie/search?by=text"
rD <- rsDriver(browser="firefox",port=4545L,verbose=F)
remDr <- rD[["client"]]

mail<-list()
remDr$navigate(dep)
peo<-paste0('.userStub__userStub___ju2wK:nth-child(',1:25,') a')

for(i in 1:63){
for(j in 1:25){
  #.userStub__userStub___ju2wK:nth-child(1) a
  #.userStub__userStub___ju2wK:nth-child(2) a
  #.userStub__userStub___ju2wK:nth-child(25) a
  webElem <- remDr$findElement(using = 'css selector',peo[j])  
  webElem$clickElement()
  Sys.sleep(1) #time to load the page
  
  r<-webElem$getPageSource() #get all webpage text and selct mailto
  r<-unlist(str_split(as.character(r),'"'))
  w<-which(grepl("mailto:",r))
  
  if(length(w)!=0){
  a<-r[w]
  a<-gsub("mailto:","",a,fixed = T)
  mail<-c(mail,a)
  }
  
  #go back
  webElem <- remDr$findElement(using = 'css selector','#app > div > div > main > div.hero__hero___3_ZZJ > a')  
  webElem$clickElement()
  Sys.sleep(1) 
 # #app > div > div > main > div.hero__hero___3_ZZJ > a > div > span
  ##app > div > div > main > div.hero__hero___3_ZZJ > a
}
  
#next page
  webElem <- remDr$findElement(using = 'css selector','#app > div > div > main > div.results__resultsContainer___18wNx > div.results__paginatedUsersContainer___3PU1S > div:nth-child(3) > div:nth-child(1) > div.paginationBar__paginationItems___3UjZC > span:nth-child(6)')  
  webElem$clickElement()
  Sys.sleep(1) 
  
}