问题描述
FirefoxOptions options = new FirefoxOptions();
^
SyntaxError: invalid Syntax
请帮助使用useragent初始化webdriver。我希望我能避免机器人自然刮擦。 使用:“ Mozilla / 5.0(Windows NT 6.1; Win64; x64; rv:47.0)Gecko / 20100101 Firefox / 47.0”作为代理
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from time import sleep
from bs4 import BeautifulSoup
import pandas as pd
class DataExtract:
def __init__(self):
FirefoxOptions options = new FirefoxOptions();
String userAgent = ""Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0";
options.addPreference("general.useragent.override",userAgent);
WebDriver webDriver = new FirefoxDriver(options);
options.add_argument('--allow-running-insecure-content')
options.add_argument('--ignore-certificate-errors')
self.driver = webdriver.PhantomJS(executable_path=r"C:/Pathtoexec/phantomjs/bin/phantomjs.exe")
self.accept_untrusted_certs = True
解决方法
冲浪之后,我发现了一些对我有用的东西。请建议我如何检查是否已相应设置webagent。
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from time import sleep
from bs4 import BeautifulSoup
import pandas as pd
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53 "
"(KHTML,like Gecko) Chrome/15.0.87")
driver = webdriver.PhantomJS(desired_capabilities=dcap,executable_path=r"C:/PathtoExec/phantomjs.exe")
driver.get("https://www.webpagecontainingtables.com")
soup=BeautifulSoup(driver.page_source,'lxml')
table = soup.find_all('table')[4]
df = pd.read_html(str(table),header=0)
print(df)