问题描述
我有以下代码部分运行但结果显示非常混乱。我需要有关如何获取附加数据以及输出格式的帮助。
from urllib.request import Request,urlopen
from bs4 import BeautifulSoup
import re,random,ctypes
import requests,os
from time import sleep
import beepy as beep
from time import strftime
import datetime
user_agent_list = [
"header = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0Gecko/20100101 Firefox/86.0'}","header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/83.0.4103.97 Safari/537.36'}","header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML,like Gecko) Version/13.1.1 Safari/605.1.15'}","header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/32.0.1667.0 Safari/537.36'}","header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/36.0.1985.67 Safari/537.36'}"
]
header = random.choice(user_agent_list)
line = "https://bscscan.com/address/0x639AD7c49EC616a64e074c21a58608C0d843A8a3"
contractpage = requests.get(line,header)
ca = BeautifulSoup(contractpage.content,'html.parser')
contractcreator = ca.find(id='ContentPlaceHolder1_trContract').get_text()
tokenname = ca.find(id='ContentPlaceHolder1_tr_tokeninfo').get_text()
transcount = ca.find('p',class_='mr-2 mb-2').get_text()
tokencount = ca.find(id='ContentPlaceHolder1_tokenbalance').get_text()
print (contractcreator)
print ("Token Name: ",tokenname)
print ("Trans Count: ",transcount)
print ("Token Count: ",tokencount)
当前输出:#-- 非常非常混乱,缺少一些预期数据
ContractCreator:
0x7ab96edb99e1faa06238609947792038520f1a3c at txn 0x51a8db6ac707dcd9644b5400b533c9bbe95243054c9c67e8a8aeeab38c7f7e79
Token Name:
TokenTracker:
TripCandy (CANDY)
Trans Count:
Latest 25 from a total of 2,878 transactions
Token Count:
Token:
$726.10
3
Could not find any matches! Token display limit reached. Click to Show more
BEP-20 Tokens (3) minereum BSC (MNEB)150,000 MNEB Neftipedia (NFT)1 NFT$0.01@0.0086TripCandy (CANDY)76,581.46551862 CANDY$726.09@0.0095
ContractCreator: 0x7ab96edb99e1faa06238609947792038520f1a3c
Txn: 0x51a8db6ac707dcd9644b5400b533c9bbe95243054c9c67e8a8aeeab38c7f7e79
Token Name: TripCandy (CANDY)
Trans Count: 2,875
Balance: 0.498586644749540253 BNB #-- needed additional data
Tokens ValCount: $732.78 / 3 #-- needed additional data and formatting
Token List: BEP-20 Tokens (3) #-- data from the dropdown Box
minereum BSC (MNEB)150,000 MNEB
Neftipedia (NFT)1 - $0.01 @ 0.0086
TripCandy (CANDY)76,581.46551862 - $732.77 @ 0.0096
解决方法
import requests
from bs4 import BeautifulSoup
from pprint import pp
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0'
}
adds = [
'0x639AD7c49EC616a64e074c21a58608C0d843A8a3'
]
def main(url):
with requests.Session() as req:
req.headers.update(headers)
for add in adds:
r = req.get(url.format(add))
soup = BeautifulSoup(r.text,'lxml')
goal = soup.select('div.card-body')
data = {
'ContractCreator': goal[1].select_one('a.hash-tag').text,'Txn': goal[1].select('a.hash-tag')[1].text,'Token Name': goal[1].select('a')[3].text,'Trans Count': soup.select_one('p.mr-2 a').text,'Balance': goal[0].select_one('.col-md-8').get_text(strip=True),'Tokens ValCount': " / ".join(list(goal[0].select_one('.position-relative').stripped_strings)[:2]),'Token List': [x.get_text(strip=True) for x in soup.select('.list.list-unstyled strong,.list-name')]
}
pp(data)
main('https://bscscan.com/address/{}')
输出:
{'ContractCreator': '0x7ab96edb99e1faa06238609947792038520f1a3c','Txn': '0x51a8db6ac707dcd9644b5400b533c9bbe95243054c9c67e8a8aeeab38c7f7e79','Token Name': 'TripCandy (CANDY)','Trans Count': '2,880','Balance': '0.498586644749540253 BNB','Tokens ValCount': '$727.19 / 3','Token List': ['BEP-20 Tokens','Minereum BSC (MNEB)','Neftipedia (NFT)','TripCandy (CANDY)']}