问题描述
我尝试了以下三种在网上找到的方法来读取Python中的.conll
文件,但只得到了我不理解的错误报告。我也了解了.conll
文件的不同类型,但我不知道哪个是my dataset。我怎么知道?是否有必要根据.conll
的特定类型来实现加载?是否有一种简单的方法来处理.conll
个文件?
from pyconll load__from_file
data = load_from_file("wnut17train.conll")
ParseError Traceback (most recent call last)
<ipython-input-15-9d2ceebeda92> in <module>()
1 import pyconll
----> 2 data = pyconll.load_from_file("wnut17train.conll")
3 data
/usr/local/lib/python3.6/dist-packages/pyconll/load.py in load_from_file(filename)
46 """
47 with open(filename,encoding='utf-8') as f:
---> 48 c = Conll(f)
49
50 return c
/usr/local/lib/python3.6/dist-packages/pyconll/unit/conll.py in __init__(self,it)
30 self._sentences = []
31
---> 32 for sentence in pyconll._parser.iter_sentences(it):
33 self._sentences.append(sentence)
34
/usr/local/lib/python3.6/dist-packages/pyconll/_parser.py in iter_sentences(lines_it)
51 sent_lines.append(line)
52 elif sent_lines:
---> 53 sentence = _create_sentence(sent_lines)
54 sent_lines.clear()
55
/usr/local/lib/python3.6/dist-packages/pyconll/_parser.py in _create_sentence(sent_lines)
22 """
23 sent_source = '\n'.join(sent_lines)
---> 24 sentence = Sentence(sent_source)
25
26 return sentence
/usr/local/lib/python3.6/dist-packages/pyconll/unit/sentence.py in __init__(self,source)
77 self._Meta[k] = None
78 else:
---> 79 token = Token(line)
80 self._tokens.append(token)
81
/usr/local/lib/python3.6/dist-packages/pyconll/unit/token.py in __init__(self,source,empty)
661 error_msg = 'The number of columns per token line must be 10. Invalid token: {}'.format(
662 source)
--> 663 raise ParseError(error_msg)
664
665 # Assign all the field values from the line to internal equivalents.
ParseError: The number of columns per token line must be 10. Invalid token: @paulwalk O
from conllu import parse
train = parse("wnut17train.conll",fields=["id","form","lemma","postag"])
ParseException Traceback (most recent call last)
<ipython-input-21-7be24f5d9e1f> in <module>()
1 from conllu import parse
2
----> 3 train = parse("wnut17train.conll","postag"])
/usr/local/lib/python3.6/dist-packages/conllu/__init__.py in parse(data,fields,field_parsers,Metadata_parsers)
18 fields=fields,19 field_parsers=field_parsers,---> 20 Metadata_parsers=Metadata_parsers
21 ))
22
/usr/local/lib/python3.6/dist-packages/conllu/__init__.py in parse_incr(in_file,Metadata_parsers)
36 fields=fields,37 field_parsers=field_parsers,---> 38 Metadata_parsers=Metadata_parsers
39 ))
40
/usr/local/lib/python3.6/dist-packages/conllu/parser.py in parse_token_and_Metadata(data,Metadata_parsers)
94 Metadata[key] = value
95 else:
---> 96 tokens.append(parse_line(line,field_parsers))
97
98 return tokens,Metadata
/usr/local/lib/python3.6/dist-packages/conllu/parser.py in parse_line(line,field_parsers)
118
119 if len(line_split) == 1:
--> 120 raise ParseException("Invalid line format,line must contain either tabs or two spaces.")
121
122 data = Token()
ParseException: Invalid line format,line must contain either tabs or two spaces.
from nltk.corpus.reader import ConllChunkCorpusReader
TRAIN = ConllChunkCorpusReader("wnut17train.conll",('NP','VP','PP’),tagset="wsj”,encoding="utf-8”)
File "<ipython-input-26-9362233e7aa7>",line 2
TRAIN = ConllChunkCorpusReader("wnut17train.conll",encoding="utf-8”)
^
SyntaxError: EOL while scanning string literal
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)