import requests from urllib.parse import urlencode from multiprocessing import Pool#开启多进程 from requests.exceptions import RequestException # import re import json from hashlib import md5 def page_get(url): try: html = requests.get(url) if html.status_code == 200: return html.text return None except RequestException: print(‘请求失败‘) return None def page_html(pn): data = { ‘tn‘: ‘resultjson_com‘,‘ipn‘: ‘rj‘,‘ct‘: 201326592,‘is‘: ‘‘,‘fp‘: ‘result‘,‘queryWord‘: ‘清晰图片‘,‘cl‘: 2,‘lm‘: -1,‘ie‘: ‘utf-8‘,‘oe‘: ‘utf-8‘,‘adpicid‘: ‘‘,‘st‘: -1,‘z‘: 0,‘ic‘: 0,‘hd‘: 0,‘latest‘: 0,‘copyright‘: 0,‘word‘: ‘清晰图片‘,‘s‘: ‘‘,‘se‘: ‘‘,‘tab‘: ‘‘,‘width‘: 1920,‘height‘: 1080,‘face‘: ‘‘,‘istype‘: ‘‘,‘qc‘: ‘‘,‘nc‘: 1,‘fr‘: ‘‘,‘expermode‘: ‘‘,‘force‘: ‘‘,‘pn‘: pn,‘rn‘: 30,‘gsm‘: ‘1e‘,‘1561179768452‘: ‘‘ } url = ‘https://image.baidu.com/search/acjson?‘+ urlencode(data) html = page_get(url) for item in page_re(html): print(item) def page_re(html): srt = re.compile(‘.*?fromPageTitle":"(.*?)",.*?thumbURL":"(.*?)",.*?middleURL":"(.*?)",.*?hoverURL":"(.*?)",‘,re.S) srt = re.findall(srt,html) for item in srt: wrire_to(item[1]) yield { ‘名称‘:item[0],‘img‘:item[1] } def wrire_to(url): try: html = requests.get(url) if html.status_code == 200: asve_img(html.content) return None except RequestException: print(‘请求失败‘) return None def asve_img(content): file_path = ‘{0}/{1}.{2}‘.format(r‘C:\Users\Administrator\Desktop\img‘,md5(content).hexdigest(),‘jpg‘) # if not os.path.exists(file_path): with open(file_path,‘wb‘) as f: f.write(content) f.close() def main(): # for i in range(10): # page_html(i*30) pool = Pool() pool.map(page_html,[i*30 for i in range(10)]) # html = page_html(30) if __name__ == ‘__main__‘: main()