问题描述
我正在尝试修改ANKI插件.py文件,以便能够解析牛津大学的单词
from urllib.parse import urlencode
qs = urlencode({"q": "come along"})
URL = "https://www.oxfordlearnersdictionaries.com/search/english/?{}"
print(URL.format(qs))
我的输出
“ https://www.oxfordlearnersdictionaries.com/search/english/?q=come+along”
但实际上输出应该是这样的
“ https://www.oxfordlearnersdictionaries.com/definition/english/come-along?q=come+along”
我是初学者,如何克服这个问题
#-- coding:utf-8 --
import random
from ..base import *
import requests
from bs4 import BeautifulSoup
from time import sleep
from random import randint
@register(u'Oxford_Article')
class Oxford_Article(WebService):
def init(self):
super(Oxford_Article,self).init()
def _get_from_api(self):
sleep(randint(1,3))
param = {"q": "come along"}
qs = urlencode(param)
param['q'] = param['q'].replace(' ','-')
URL = "https://www.oxfordlearnersdictionaries.com/definition/english/{}?{}"
data = self.get_response(URL.format(param['q'],qs))
soup = parse_html(data)
result = {
'Article': u'',}
# Article
element = soup.find('div',id='entryContent')
for s in element.select('script'):
s.extract()
if element:
result['Article'] = u''.join(str(e) for e in element.contents)
return self.cache_this(result)
@export([u'entryContent',u'Article definition'])
def fld_definate(self):
return self._get_field('Article')
解决方法
您可以做到
from urllib.parse import urlencode
param = {"q": "come along"}
qs = urlencode(param)
param['q'] = param['q'].replace(' ','-')
URL = "https://www.oxfordlearnersdictionaries.com/definition/english/{}?{}"
print(URL.format(param['q'],qs))
,
如果您在解析数据时遇到错误
只需替换此代码
setNames(keep_names,keep_names)
使用此代码
data = self.get_response(URL.format(param['q'],qs))
soup = parse_html(data)
也不要忘记导入此
page = urllib.request.urlopen(URL.format(param['q'],qs))
soup = BeautifulSoup(page)