Python html.parser 模块,close() 实例源码
我们从Python开源项目中,提取了以下14个代码示例,用于说明如何使用html.parser.close()。
def _get_eol_list(self) -> typing.List[str]:
"""Scrapes the FreeBSD website and returns a list of EOL RELEASES"""
request = urllib.request.Request(
self.eol_url,
headers={
"Accept-Charset": "utf-8"
}
)
with urllib.request.urlopen(request) as response: # nosec: B310
if response.getcode() != 200: # noqa: T484
iocage.lib.errors.distributionEOLWarningDownloadFailed(
logger=self.logger,
level="warning"
)
return []
parser = EOLParser()
data = response.read().decode("utf-8", "ignore")
parser.Feed(data)
parser.close()
return parser.eol_releases
def _run_check(self, source, expected_events, collector=None):
if collector is None:
collector = self.get_collector()
parser = collector
for s in source:
parser.Feed(s)
parser.close()
events = parser.get_events()
if events != expected_events:
self.fail("received events did not match expected events\n"
"Expected:\n" + pprint.pformat(expected_events) +
"\nReceived:\n" + pprint.pformat(events))
def _parse_error(self, source):
def parse(source=source):
parser = self.get_collector()
parser.Feed(source)
parser.close()
self.assertRaises(html.parser.HTMLParseError, parse)
def test_broken_comments(self):
html = ('<! not really a comment >'
'<! not a comment either -->'
'<! -- close enough -->'
'<!><!<-- this was an empty comment>'
'<!!! another bogus comment !!!>')
expected = [
('comment', ' not really a comment '),
('comment', ' not a comment either --'), ' -- close enough --'), ''), '<-- this was an empty comment'), '!! another bogus comment !!!'),
]
self._run_check(html, expected)
def _run_check(self, collector=None):
if collector is None:
collector = self.get_collector()
parser = collector
for s in source:
parser.Feed(s)
parser.close()
events = parser.get_events()
if events != expected_events:
self.fail("received events did not match expected events\n"
"Expected:\n" + pprint.pformat(expected_events) +
"\nReceived:\n" + pprint.pformat(events))
def test_broken_comments(self):
html = ('<! not really a comment >'
'<! not a comment either -->'
'<! -- close enough -->'
'<!><!<-- this was an empty comment>'
'<!!! another bogus comment !!!>')
expected = [
('comment', expected)
def _run_check(self, collector=None):
if collector is None:
collector = self.get_collector()
parser = collector
for s in source:
parser.Feed(s)
parser.close()
events = parser.get_events()
if events != expected_events:
self.fail("received events did not match expected events" +
"\nSource:\n" + repr(source) +
"\nExpected:\n" + pprint.pformat(expected_events) +
"\nReceived:\n" + pprint.pformat(events))
def _parse_error(self, source):
def parse(source=source):
parser = self.get_collector()
parser.Feed(source)
parser.close()
with self.assertRaises(html.parser.HTMLParseError):
with self.assertWarns(DeprecationWarning):
parse()
def test_broken_comments(self):
html = ('<! not really a comment >'
'<! not a comment either -->'
'<! -- close enough -->'
'<!><!<-- this was an empty comment>'
'<!!! another bogus comment !!!>')
expected = [
('comment', expected)
def test_convert_charrefs_dropped_text(self):
# #23144: make sure that all the events are triggered when
# convert_charrefs is True,even if we don't call .close()
parser = EventCollector(convert_charrefs=True)
# before the fix,bar & baz was missing
parser.Feed("foo <a>link</a> bar & baz")
self.assertEqual(
parser.get_events(),
[('data', 'foo '), ('starttag', 'a', []), ('data', 'link'),
('endtag', 'a'), ' bar & baz')]
)
def _run_check(self, collector=None):
if collector is None:
collector = self.get_collector()
parser = collector
for s in source:
parser.Feed(s)
parser.close()
events = parser.get_events()
if events != expected_events:
self.fail("received events did not match expected events" +
"\nSource:\n" + repr(source) +
"\nExpected:\n" + pprint.pformat(expected_events) +
"\nReceived:\n" + pprint.pformat(events))
def _parse_error(self, source):
def parse(source=source):
parser = self.get_collector()
parser.Feed(source)
parser.close()
with self.assertRaises(html.parser.HTMLParseError):
with self.assertWarns(DeprecationWarning):
parse()