从文本文件读取url而不是在函数内部列出一个url?

问题描述

如何从文本文件中读取多个URL,而不是在此脚本中列出一个URL?

from concurrent.futures import ThreadPoolExecutor

import requests

from timer import timer

URL = 'www.url1.com'


def fetch(session,url):
    with session.get(url) as response:
        print(response.text)


@timer(1,5)
def main():
    with ThreadPoolExecutor(max_workers=100) as executor:
        with requests.Session() as session:
            executor.map(fetch,[session] * 100,[URL] * 100)
            executor.shutdown(wait=True)

解决方法

您可以将网址存储在列表中,然后使用map将会话和每个网址发送给函数:

from concurrent.futures import ThreadPoolExecutor
import requests
#from timer import timer


#########  create test file

URLsTest = '''
https://www.yahoo.com
https://www.oracle.com
https://www.python.org
https://www.reuters.com
https://www.walmart.com
https://www.google.com
'''.strip()

with open('tmp.txt','w') as f:
    f.write(URLsTest)
    
####################


#URL = 'https://www.url1.com'

with open('tmp.txt','r') as f:
    urls=f.read().split('\n')    # url list

def fetch(tt):  # received tuple
    session,url = tt
    print('Processing',url)
    with session.get(url) as response:
        print(url,str(len(response.text)))

#@timer(1,5)
def main():
    with ThreadPoolExecutor(max_workers=100) as executor:
        with requests.Session() as session:  # for now,just one session
            results = executor.map(fetch,[(session,u) for u in urls])  # tuple list (session,url),each tuple passed to function
            executor.shutdown(wait=True)
    # write all results to text file
    with open('allresults.txt','w',encoding='utf8') as f2:
        s = ""
        for r in results:  # tuple (url,html)
            s += r[0] + '\n' + r[1] + '\n\n'
        f2.write(s)
            
main()

输出

Processing https://www.yahoo.com
Processing https://www.oracle.com
Processing https://www.python.org
Processing https://www.reuters.com
Processing https://www.walmart.com
Processing https://www.google.com
https://www.python.org 48984
https://www.yahoo.com 418732
https://www.oracle.com 36374
https://www.google.com 12904
https://www.reuters.com 195139
https://www.walmart.com 250661