问题描述
hmencoder.py
import sys
import operator
class node:
def __init__ (self,element,num,leftChild,rightChild):
self.element = element #eg "a"
self.num = num #eg 2
self.leftChild = leftChild
self.rightChild = rightChild
def countChar (text): #counting num of occurance of each char and return a dict
charDict = {}
for i in text:
if i == "\n":
continue
if charDict.get(i,None) == None:
charDict[i] = 1
else:
charDict[i] += 1
sortedCharDict = dict (sorted (charDict.items(),key = operator.itemgetter(1),reverse = False))
#method of sorting dictionary from https://www.w3resource.com/python-exercises/dictionary/python-data-type-dictionary-exercise-1.PHP
return sortedCharDict
def readfile (argv): #extract text from the input file
file = open (argv,"r")
text = file.read()
file.close()
return text
def getBaseNode (charDict):
trees = []
for key in charDict:
trees.append(node(key,charDict[key],None,None))
return trees
def buildTree (trees):
if len(trees) == 1:
return trees
else:
if trees[0].element <= trees[1].element:
left = trees[0]
right = trees[1]
else:
left = trees[1]
right = trees[0]
new = node (left.element + right.element,left.num + right.num,left,right)
trees = trees[2:]
if not trees:
trees = trees + [new]
else:
trees = [new] + trees
for i in range(1,len(trees)):
if new.num <= trees[i].num:
trees[0],trees[i - 1] = trees [i - 1],trees[0]
break
return buildTree (trees)
def treeSearchCoding (tree,target,code):
if target == tree.element:
return code
if target not in tree.element:
return ""
elif target in tree.leftChild.element:
return treeSearchCoding (tree.leftChild,code + "0")
else:
return treeSearchCoding (tree.rightChild,code + "1")
def fetchCode (huffmanTree):
codeDict = {}
for i in range (32,127):
target = chr (i)
code = treeSearchCoding (huffmanTree,"")
if code != "":
codeDict [target] = code
return codeDict
def printCodeTable (codeDict,charDict):
ave = 0
symNum = 0
for i in codeDict:
ave += len(codeDict[i]) * charDict[i]
symNum += charDict[i]
ave = round (ave / symNum,3)
file = open ("code.txt","w")
for j in codeDict:
if j == " ":
msg = "space: " + codeDict[j] + "\n"
else:
msg = j + ": " + codeDict[j] + "\n"
file.write (msg)
msg = "Ave = " + str(ave) + " bits per symbol"
file.write (msg)
file.close()
def printEncodeMsg (text,codeDict):
msg = ""
count = 0
for i in text:
if i not in codeDict:
continue
else:
msg += codeDict[i]
file = open ("encodemsg.txt","w")
for j in msg:
file.write(j)
count += 1
if count == 80:
file.write("\n")
count = 0
file.close()
def main (argv):
text = readfile (argv)
charDict = countChar (text)
trees = getBaseNode (charDict)
trees = buildTree (trees)
huffmanTree = trees[0]
codeDict = fetchCode (huffmanTree)
printCodeTable (codeDict,charDict)
printEncodeMsg (text,codeDict)
if __name__ == "__main__":
main(sys.argv[1])
我正在对一个文本文件“input.txt”进行编码,该项目将使用霍夫曼树对文件中的所有可打印符号(不包括“\n”)进行编码。然后它会打印出一个列表,显示文件“code.txt”中的所有符号及其代码。以及“encodemsg.txt”中的编码消息。它根据我的算法生成了结果。但是,我的程序生成的每个符号的平均位数比示例高一点。我认为问题可能出在 buildTree 函数中。但我不确定。你能帮我找出什么是关闭的并删除代码的最后几位吗?谢谢你。您可以通过 >>py hmencoder.py input.txt
尝试代码解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)