Python Beautifulsoup 在跨度内抓取了错误的数据

问题描述

我需要在 url 中获取一些额外的数据:https://bscscan.com/tx/0x86d46163d35626df273bbed53cbdba26959d8ab7eb536af0f526dfa937c75dde。我尝试抓取的一些数据返回了错误的数据。

from bs4 import BeautifulSoup
from urllib import request
from urllib.request import Request,urlopen

url = 'https://bscscan.com/tx/0x86d46163d35626df273bbed53cbdba26959d8ab7eb536af0f526dfa937c75dde'
headers = {'User-Agent': 'Mozilla/5.0'}

req = Request(url,headers=headers)
html = urlopen(req).read()
soup = BeautifulSoup(html,'html.parser')

blockconf = soup.find('span',class_='u-label u-label--xs u-label--badge-in u-label--secondary ml-1').text
fromaddr = soup.find('span',id='spanFromAdd').text
toaddr = soup.find('span',class_='hash-tag text-truncate hash-tag-custom-from tooltip-address').text
transcount = soup.find('span',class_='badge badge-pill badge-secondary align-midle').text

val = soup.find('span',class_='u-label u-label--value u-label--secondary text-dark rounded mr-1').text
transfee = soup.find('span',id='ContentPlaceHolder1_spanTxFee').text

print ("Block Number:   ",str(blockconf))
print("From Address:    " + str(fromaddr))
print ("To Address:      " + str(toaddr))
print ("Transfer Count:  " + str(transcount))
print("Value:           " + str(val))
print("Transaction Fee: " + str(transfee))

电流输出

Block Number:    792 Block Confirmations #-- wrong data
From Address:    0xf3b1c9f3bbc351d90c7a539b26d789af57c869f7
To Address:      0x53a01b184df7ac3c5f839c65fd120c693dda274c #-- wrong data
Transfer Count:  8
Value:           0 BNB
Transaction Fee: 0.00529533 BNB ($1.60)

想要的输出

Block Number:    9428747 792 Block Confirmations  #-- wanted data
From Address:    0xf3b1c9f3bbc351d90c7a539b26d789af57c869f7
To Address:      PancakeSwap: Router v2  #-- wanted data
Transfer Count:  8
Value:           0 BNB
Transaction Fee: 0.00529533 BNB ($1.60)

解决方法

尝试(注意:我从 (PancakeSwap: Router v2) 行抓取了 "Interacted With (To):"):

from bs4 import BeautifulSoup
from urllib import request
from urllib.request import Request,urlopen

url = "https://bscscan.com/tx/0x86d46163d35626df273bbed53cbdba26959d8ab7eb536af0f526dfa937c75dde"
headers = {"User-Agent": "Mozilla/5.0"}

req = Request(url,headers=headers)
html = urlopen(req).read()
soup = BeautifulSoup(html,"html.parser")

blockconf = soup.find(
    "span",class_="u-label u-label--xs u-label--badge-in u-label--secondary ml-1",).parent.text.strip()
fromaddr = soup.find("span",id="spanFromAdd").text
toaddr = soup.select_one("#contractCopy ~ .mr-1").text
transcount = soup.find(
    "span",class_="badge badge-pill badge-secondary align-midle"
).text

val = soup.find(
    "span",class_="u-label u-label--value u-label--secondary text-dark rounded mr-1",).text
transfee = soup.find("span",id="ContentPlaceHolder1_spanTxFee").text

print("Block Number:   ",str(blockconf))
print("From Address:    " + str(fromaddr))
print("To Address:      " + str(toaddr))
print("Transfer Count:  " + str(transcount))
print("Value:           " + str(val))
print("Transaction Fee: " + str(transfee))

打印:

Block Number:    9428747 3013 Block Confirmations
From Address:    0xf3b1c9f3bbc351d90c7a539b26d789af57c869f7
To Address:      (PancakeSwap: Router v2)
Transfer Count:  8
Value:           0 BNB
Transaction Fee: 0.00529533 BNB ($1.60)

编辑:要让 0x10ed43c718714eb63d5aa57b78b54704e256024e (PancakeSwap: Router v2) 使用这个

#...

toaddr = (
    soup.select_one("#contractCopy").text
    + " "
    + soup.select_one("#contractCopy ~ .mr-1").text
)

#...