from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import webdriverwait
from selenium.webdriver.support import expected_conditions as EC
def wait(dr, x):
element = webdriverwait(dr, 50).until(
EC.presence_of_all_elements_located((By.XPATH, x))
)
return element
from selenium import webdriver
browser = webdriver.Firefox()
browser.get("http://www.dinamalar.com/user_comments.asp? uid=14701&name=%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%E0%AE%9A%E0%AF%86%E0%AE%B2%E0%AF%8D%E0%AE%B5%E0%AE%A9%E0%AF%8D")
for elem in wait(browser, '//*[@id="commsec"]/div[2]/div[1]'):
print elem.text
这是我需要提取所有评论http://www.dinamalar.com/user_comments.asp?uid=14701&name=%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%E0%AE%9A%E0%AF%86%E0%AE%B2%E0%AF%8D%E0%AE%B5%E0%AE%A9%E0%AF%8D的链接
但我的代码只提取了前10条评论.单击按钮后,动态加载其他10条注释.如何使用python selenium提取所有这些注释
解决方法:
我们的想法是寻找页面上有多少“更多创意”元素.每次单击按钮并加载更多注释时,都会出现一个“更多创意”红色按钮.执行:
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import webdriverwait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
browser = webdriver.Firefox()
wait = webdriverwait(browser, 10)
browser.get("http://www.dinamalar.com/user_comments.asp?uid=14701&name=%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%E0%AE%9A%E0%AF%86%E0%AE%B2%E0%AF%8D%E0%AE%B5%E0%AE%A9%E0%AF%8D")
# initial wait for the page to load
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".morered")))
pages = 1
while True:
browser.find_elements_by_css_selector(".morered")[-1].click()
# wait for more "load more" buttons to be present
try:
wait.until(lambda browser: len(browser.find_elements_by_css_selector(".morered")) > pages)
except TimeoutException:
break # no more data loaded, exit the loop
print("Comments loaded: %d" % len(browser.find_elements_by_css_selector(".dateg")))
pages += 1
browser.close()
请注意,我还删除了URL中的额外空间.