问题描述
我正在尝试在概率CFG上使用CYK算法构建解析树。这是我的概率CFG
pcfg = PCFG.fromstring("""S -> NP VP [1.0]
NP -> DT NN [0.5]
NP -> NP PP [0.25]
NP -> 'John' [0.1]
NP -> 'I' [0.15]
PP -> P NP [1.0]
VP -> V [0.4]
VP -> Vt NP [0.4]
VP -> VP PP [0.2]
V -> 'sleeps' [0.5]
V -> 'laughs' [0.5]
Vt -> 'saw' [0.7]
Vt -> 'ate' [0.3]
P -> 'with' [0.7]
P -> 'under' [0.3]
DT -> 'the' [0.7]
DT -> 'a' [0.3]
NN -> 'man' [0.7]
NN -> 'woman' [0.2]
NN -> 'telescope' [0.1]
""")
这是CYK构建解析树的代码:
class PCFGParser():
def __init__(self,grammar):
#Initialize Nonterminals and the Unary and Binary rules
self.unary_rules = []
self.binary_rules =[]
self.N =['S','NP','PP','VP','V','Vt','P','DT','NN'] # List of non-terminals from the grammar
for rule in grammar.productions():
if len(rule)==1:
self.unary_rules.append(rule)
elif len(rule)==2:
self.binary_rules.append(rule)
def q(self,X,Y,Z):
#Returns probabilities for Binary rules
print('binary_func_called')
print("X : ","TYPE X :",type(X))
print("Y : ","TYPE Y :",type(Y))
print("Z : ",Z,"TYPE Z :",type(Z))
for rule in self.binary_rules:
print('RULE LHS : ',rule.lhs(),'RULE LHS TYPE :',type(str(rule.lhs())))
print('RULE RHS 0 : ',rule.rhs()[0],'RULE RHS 0 TYPE : ',type(str(rule.rhs()[0])))
print('RULE RHS 1 : ',rule.rhs()[1],type(str(rule.rhs()[1])))
if str(rule.lhs())==X and str(rule.rhs()[0])==Y and str(rule.rhs()[1])==Z:
print('working- returns prob')
return rule.prob()
else:
print(' not satisfied')
return 0
def q_unary(self,W):
#Returns probabilities for Unary rules
for rule in self.unary_rules:
if str(rule.lhs())==X and rule.rhs()[0]==W:
return rule.prob()
return 0
def parse(self,sentence):
#Calls the CYK Algorithm and stores the parse tree in a JSON Format
sentence = sentence.strip()
print (json.dumps(self.CKY(sentence.split(' '))))
def CKY(self,x):
#Returns Tree for a grammar in Chomsky-normal Form
n = len(x) # length of sentence x
pi = defaultdict(float) # DP table pi
bp = {} # back pointers
# Base case
for i in range(n):
w = x[i]
for X in self.N:
pi[i,i,X] = self.q_unary(X,w)
# Recursive case
for l in range(1,n):
for i in range(n-l):
j = i + l
for X in self.N:
max_score = 0
args = None
for R in self.binary_rules: # search only within the rules with non-zero probability
if str(R.lhs()) == X: # consider rules which start from X
Y,Z= R.rhs()
Y = str(Y)
Z = str(Z)
for s in range(i,j):
if pi[i,s,Y] and pi[s + 1,j,Z]: # calculate score if both pi entries have non-zero score
score = self.q(X,Z) * pi[i,Y] * pi[s + 1,Z]
if max_score < score:
max_score = score
args = Y,s
if max_score: # update table and back pointers
pi[i,X] = max_score
bp[i,X] = args
# Backtrack to retrieve the tree
if pi[0,n-1,'S']:
return self.backtrack_tree(x,bp,'S')
else: # if start symbol is not 'S'
max_score = 0
args = None
for X in self.N:
print(pi[0,X])
if max_score < pi[0,X]:
print('if - working')
max_score = pi[0,X]
args = 0,X
return self.backtrack_tree(x,*args)
def backtrack_tree(self,sentence,X):
#Recurse to get the parse tree
if i == j:
return [X,sentence[i]]
else:
Y,s = bp[i,X]
return [X,self.backtrack_tree(sentence,Y),s+1,Z)]
parser = PCFGParser(pcfg)
parser.parse('the man sleeps')
binary_func_called
X : NP TYPE X : <class 'str'>
Y : DT TYPE Y : <class 'str'>
Z : NN TYPE Z : <class 'str'>
RULE LHS : S RULE LHS TYPE : <class 'str'>
RULE RHS 0 : NP RULE RHS 0 TYPE : <class 'str'>
RULE RHS 1 : VP RULE RHS 0 TYPE : <class 'str'>
not satisfied
RULE LHS : NP RULE LHS TYPE : <class 'str'>
RULE RHS 0 : DT RULE RHS 0 TYPE : <class 'str'>
RULE RHS 1 : NN RULE RHS 0 TYPE : <class 'str'>
working- returns prob
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-16-b0f35f4387c1> in <module>
----> 1 parser.parse('the man sleeps')
<ipython-input-11-33bb4cfed9b6> in parse(self,sentence)
57
58 sentence = sentence.strip()
---> 59 print (json.dumps(self.CKY(sentence.split(' '))))
60
61
<ipython-input-11-33bb4cfed9b6> in CKY(self,x)
109 max_score = pi[0,X]
110 args = 0,X
--> 111 return self.backtrack_tree(x,*args)
112
113
TypeError: backtrack_tree() argument after * must be an iterable,not nonetype
我遵循的算法:
有人可以帮我解决这个问题吗?我正确地遵循了上述算法,为什么会失败?我已尽力而为,但仍不知道如何解决此问题。递归似乎存在问题,因为它对S-> NP VP返回的概率为0。
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)