问题描述
我想从这个 website 下载所有电子邮件。 我发现要做到这一点,我应该在“部门列表”中选择每个名称,然后单击以打开隐藏的名称。如何选择完整的电子邮件列表?例如,要在“解剖学”上进行,我正在尝试以下操作。
此外,有没有更好的方法来先选择所有部门的 css 选择器,然后循环它们以获取电子邮件? 提前致谢
library(RSelenium)
dep<-"http://peoplefinder.tcd.ie/#DEPTSEARCHRESULT" #my url
#open connection
rD <- rsDriver(browser="firefox",port=4545L,verbose=F)
remDr <- rD[["client"]]
remDr$navigate(dep)
#find and click on "Anatomy"
webElem <- remDr$findElement(using = 'css selector','#MainContent_UserControlSearchForPerson_DropDownListDepartments > option:nth-child(9)')
webElem$clickElement()
#open masked list
webElem <- remDr$findElement(using = 'css selector','#DEPTSEARCHRESULT > div.panel-heading > div.panel-title.visible-md.visible-lg > h4 > a') # this matches for the element which is interactable
webElem$clickElement()
#Now I cannot get the right css if I substite _# with _1 for example I get the first element but I would like the full list
text<-remDr$findElement(using = 'css selector','MainContent_UserControlSearchForPerson_RepeaterSubDeptParent_RepeaterSubDeptChild_0_UserControlEmailAddress_#_EmailLink_#')$getElementText()
解决方法
我是这样解决的;您认为是否有更好/更快的代码?
dep<-"http://peoplefinder.tcd.ie/#DEPTSEARCHRESULT"
rD <- rsDriver(browser="firefox",port=4545L,verbose=F)
remDr <- rD[["client"]]
#MainContent_UserControlSearchForPerson_DropDownListDepartments > option:nth-child(8)-(225)
toclick<-paste0('#MainContent_UserControlSearchForPerson_DropDownListDepartments > option:nth-child(',8:225,')')
mail<-list()
for(i in 1:length(toclick)){
remDr$navigate(dep)
webElem <- remDr$findElement(using = 'css selector',toclick[i]) # click on items in dropdown menu
webElem$clickElement()
Sys.sleep(3) #time to load the page
r<-webElem$getPageSource() #get all webpage text and selct mailto
r<-unlist(str_split(as.character(r),'"'))
w<-which(grepl("mailto:",r))
a<-r[w]
a<-gsub("mailto:","",a,fixed = T)
a<-gsub("%40","@",fixed = T)
a<-unique(a)
mail<-c(mail,a)
}