问题描述
import os
import re
import sys
import time
from urllib.request import urlopen
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup as BS
path = os.getcwd()
time_st = time.time()
url = "URL"
# Regular expressions
id2 = re.compile("gr\d")
fig = re.compile("fig\d")
html = requests.get(url).text
sp = BS(html,"html.parser")
J_fig = sp.find_all("figure",{"id": fig})
print(J_fig)
for fig in J_fig:
print(fig.get("src"))
这是我使用过的,我只想从这段代码中获取 src 地址。首先,print(J_fig)
运行良好。
然而,所有的代码都是关于数字的。所以,当我运行上面的代码时,输出只是“无”。
只想获取src数据怎么办?谢谢。
附言我用过 print(fig['src'])
。它也不起作用。
解决方法
以下是对您评论中内容的回答:
import requests
from bs4 import BeautifulSoup
from pprint import pp
def main(url):
r = requests.get(url)
soup = BeautifulSoup(r.text,'lxml')
goal = [x.img['src'] for x in soup.select('.imageblock')]
pp(goal)
main('https://h-glacier.tistory.com/entry/Python-파이썬-BeautifulSoup4를-이용해-웹-크롤링-예제-만들어-보기')
输出:
['https://blog.kakaocdn.net/dn/Lpy7b/btqD6REn6D3/UWaaWCuCBKimtzAHp7lJ71/img.png','https://blog.kakaocdn.net/dn/djSmqZ/btqD6lr4aYI/9XXKO277XFN8REnBWKAU80/img.png','https://blog.kakaocdn.net/dn/ed2ktw/btqD7Hg6lAo/3n12aVBRPiJUiMMhdCaoG0/img.png','https://blog.kakaocdn.net/dn/Mapl1/btqD53yuVe3/Gb7VfyZ1PoKnCSGffkgV2K/img.png','https://blog.kakaocdn.net/dn/7Vged/btqD4y6X9dn/92pKptURb0VHu232pYAxqk/img.png','https://blog.kakaocdn.net/dn/lwzPC/btqD8hbbIbK/jnedJ6lBgKFxUUMzBytSD0/img.png','https://blog.kakaocdn.net/dn/dPJydy/btqD6lyOmpr/PXLYckP40Mvn6JqI1rQsrk/img.png','https://blog.kakaocdn.net/dn/begZOh/btqD8inCSyt/dkV15mFKXFeFlpxDCWQA71/img.png','https://blog.kakaocdn.net/dn/lLVFC/btqD8iOOeF8/dWfpa68wKNXPUboXgcZbz1/img.png','https://blog.kakaocdn.net/dn/bBAlHO/btqD7XKUr8k/gbaOmtBDJS8i9kbIRJKko1/img.png','https://blog.kakaocdn.net/dn/dMyQri/btqD5JAm4m6/htHrr9R8R0YDVhZG0rRJd0/img.png','https://blog.kakaocdn.net/dn/cskT4k/btqD4x780hK/KTjb7CM7st6As4M43aoMX0/img.png','https://blog.kakaocdn.net/dn/do1c5R/btqD6lFFEcH/D5mUX6YQVqEimkeVR7mTnK/img.png','https://blog.kakaocdn.net/dn/eg5W5P/btqD4yeW9Tt/ratG7YkUekSQKsaktxisF0/img.png','https://blog.kakaocdn.net/dn/pFxu2/btqD8PyQre4/8IpreDBy0exCHkSTRb8ne1/img.png']
这是对您的初始帖子的回答:
import requests
from bs4 import BeautifulSoup
from pprint import pp
def main(url):
r = requests.get(url)
soup = BeautifulSoup(r.text,'lxml')
goal = [x for x in soup.select('figure[id^=og]')]
print(goal[0])
main('https://h-glacier.tistory.com/entry/Python-파이썬-BeautifulSoup4를-이용해-웹-크롤링-예제-만들어-보기')
您将找不到该输出的 src
属性。