总结
-
selenium基础
from selenium import webdriver from selenium.webdriver.common.keys import Keys import time # 1. 创建浏览器对象 # 注意:浏览器对象如果是局部变量,那么函数结束后浏览器会自动关闭。如果是全局变量浏览器需要手动关闭 b = webdriver.Chrome() # 2. 打开网页 b.get('https://www.jd.com') # 3. 获取网页内容 # 注意:在获取浏览器page_source值的时候,只能获取到当前浏览器已经加载出来的数据 print(b.page_source) # 4. 获取和操作标签 # 1)输入框操作:获取到输入框 -> 输入内容 -> 按回车 # 根据id值获取输入框 input = b.find_element_by_css_selector('#key') # 在输入框中输入电脑 input.send_keys('电脑') # 在输入框按回车键 input.send_keys(Keys.ENTER) time.sleep(1) input2 = b.find_element_by_css_selector('#key') # 清空输入框内容 input2.clear() input2.send_keys('鼠标') # 获取按钮标签 search_btn = b.find_element_by_css_selector('.button.cw-icon') # 点击按钮 search_btn.click() # 4.回退 time.sleep(1) b.back() time.sleep(1) b.back() time.sleep(1) b.forward() time.sleep(1) b.forward() # 关闭浏览器 # b.close()
-
selenium选项卡
from selenium import webdriver import time b = webdriver.Chrome() b.get('https://www.jd.com') # 获取秒杀对应的a标签 miaosha = b.find_element_by_css_selector('#navitems-group1>li>a') miaosha.click() # 获取所有选项卡 print(b.window_handles) time.sleep(2) # 切换选项卡 b.switch_to.window(b.window_handles[0])
-
selenium获取网页cookie
from selenium import webdriver from selenium.webdriver.common.keys import Keys import time url = 'https://www.taobao.com' b = webdriver.Chrome() b.get(url) input = b.find_element_by_css_selector('#q') input.send_keys('电脑') input.send_keys(Keys.ENTER) # 进入到登录页面, 等待人工操作登录页面 time.sleep(10) print('人工操作结束') # 人工登录成功后获取cookie值并且保存到本地文件中 cookies = b.get_cookies() with open('files/taobao_cookies.txt', 'w', encoding='utf-8') as f: f.write(str(cookies))
-
selenium使用cookie
from selenium import webdriver import time from selenium.webdriver.common.keys import Keys b = webdriver.Chrome() b.get('https://www.taobao.com') # 设置cookie with open(r'./files/taobao_cookies.txt', 'r', encoding='utf-8') as f: py_obj = eval(f.read()) for obj in py_obj: # cookie中secure键对应的值是True才支持https的请求 if obj['secure']: b.add_cookie(obj) time.sleep(1) b.get('https://www.taobao.com') # b.refresh() input = b.find_element_by_css_selector('#q') input.send_keys('电脑') input.send_keys(Keys.ENTER)
-
页面滚动
from selenium import webdriver from selenium.webdriver.common.keys import Keys import time from bs4 import BeautifulSoup def get_net_data(): global b b = webdriver.Chrome() b.get('https://www.jd.com') input = b.find_element_by_css_selector('#key') input.send_keys('电脑') input.send_keys(Keys.ENTER) time.sleep(1) # ==================滚动================== # 提前设置滚动的最大距离 max_height = 7000 # 每次滚动的位置 y = 0 while True: y += 500 b.execute_script(f'window.scrollTo(0, {y})') if y > max_height: break time.sleep(1) return b.page_source def an_data(data): # #J_goodsList > ul > li:nth-child(1) soup = BeautifulSoup(data, 'lxml') li_list = soup.select('#J_goodsList > ul > li') print(len(li_list)) print(li_list) if __name__ == '__main__': an_data(get_net_data())