问题描述
我收到错误“ NameError:未定义名称'urllib'”。但是我正在尝试导入urlopen,但是它是灰色的(在Intellij中第二行代码是灰色的,因为它认为这是不必要的)。我导入不正确吗?还是我的代码中还有其他原因导致错误?
请查看我的代码并提供帮助。
from html.parser import HTMLParser
from urllib.request import urlopen
class ContentParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.tagList = []
self.wordDictionary = {}
def handle_starttag(self,tag,attrs):
self.tagList.append(tag)
def handle_endtag(self,tag):
self.tagList.pop()
def handle_data(self,data):
if len(self.tagList) > 0 and (self.tagList[-1]) not in ["script"]:
self.AddContents(data)
def AddContents(self,data):
data = self.RemoveSpecialChar(data)
contents = data.split()
for word in contents:
word = word.decode("utf-8")
if word in self.wordDictionary:
self.wordDictionary[word] += 1
else:
self.wordDitionary[word] = 1
def RemoveSpecialChar(self,data):
data = str.encode(data)
ret = data
for char in data:
if not ((char >= ord("a") and char <= ord("z")) or (char >= ord("A"))):
if char != ord(""):
ret = ret.replace(bytes(chr(char),"utf-8"),b"")
return ret
def PrintTopUsedWords(self,count):
sortedDic = sorted(self.wordDictionary.items(),key=lambda x: x[1],reverse=True)
rangeN = count
if len(sortedDic) < count:
rangeN = len(sortedDic)
for i in range(rangeN):
print(sortedDic[i])
def getWords(url):
response = urllib.request.urlopen(url)
html = response.read()
wordParser = ContentParser()
wordParser.feed(html.decode("utf-8"))
wordParser.PrintTopUsedWords(25)
getWords("https://law.depaul.edu/Pages/default.aspx")
解决方法
在Python 3中,最简单的方法是做
import urllib.request
data = urllib.request.urlopen("http://google.com")