关于哈夫曼树编码调试

问题描述

hmencoder.py

import sys
import operator

class node:
    def __init__ (self,element,num,leftChild,rightChild):
        self.element = element #eg "a"
        self.num = num #eg 2
        self.leftChild = leftChild
        self.rightChild = rightChild

def countChar (text): #counting num of occurance of each char and return a dict
    charDict = {}
    for i in text:
        if i == "\n":
            continue
        if charDict.get(i,None) == None:
            charDict[i] = 1
        else:
            charDict[i] += 1
    sortedCharDict = dict (sorted (charDict.items(),key = operator.itemgetter(1),reverse = False))
    #method of sorting dictionary from https://www.w3resource.com/python-exercises/dictionary/python-data-type-dictionary-exercise-1.PHP
    return sortedCharDict

def readfile (argv): #extract text from the input file
    file = open (argv,"r")
    text = file.read()
    file.close()
    return text

def getBaseNode (charDict):
    trees = []
    for key in charDict:
        trees.append(node(key,charDict[key],None,None))
    return trees

def buildTree (trees):
    if len(trees) == 1:
        return trees
    else:
        if trees[0].element <= trees[1].element:
            left = trees[0]
            right = trees[1]
        else:
            left = trees[1]
            right = trees[0]
        new = node (left.element + right.element,left.num + right.num,left,right)
        trees = trees[2:]
        if not trees:
            trees = trees + [new]
        else:
            trees = [new] + trees
            for i in range(1,len(trees)):
                if new.num <= trees[i].num:
                    trees[0],trees[i - 1] = trees [i - 1],trees[0]
                    break
        return buildTree (trees)

def treeSearchCoding (tree,target,code):
    if target == tree.element:
        return code
    if target not in tree.element:
        return ""
    elif target in tree.leftChild.element:
        return treeSearchCoding (tree.leftChild,code + "0")
    else:
        return treeSearchCoding (tree.rightChild,code + "1")

def fetchCode (huffmanTree):
    codeDict = {}
    for i in range (32,127):
        target = chr (i)
        code = treeSearchCoding (huffmanTree,"")
        if code != "":
            codeDict [target] = code
    return codeDict
    
def printCodeTable (codeDict,charDict):
    ave = 0
    symNum = 0
    for i in codeDict:
        ave += len(codeDict[i]) * charDict[i]
        symNum += charDict[i]
    ave = round (ave / symNum,3)
    file = open ("code.txt","w")
    for j in codeDict:
        if j == " ":
            msg = "space: " + codeDict[j] + "\n"
        else:
            msg = j + ": " + codeDict[j] + "\n"
        file.write (msg)
    msg = "Ave = " + str(ave) + " bits per symbol"
    file.write (msg)
    file.close()

def printEncodeMsg (text,codeDict):
    msg = ""
    count = 0
    for i in text:
        if i not in codeDict:
            continue
        else:
            msg += codeDict[i]
    file = open ("encodemsg.txt","w")
    for j in msg:
        file.write(j)
        count += 1
        if count == 80:
            file.write("\n")
            count = 0
    file.close()

def main (argv):
    text = readfile (argv)
    charDict = countChar (text)
    trees = getBaseNode (charDict)
    trees = buildTree (trees)
    huffmanTree = trees[0]
    codeDict = fetchCode (huffmanTree)
    printCodeTable (codeDict,charDict)
    printEncodeMsg (text,codeDict)
    
    
if __name__ == "__main__":
    main(sys.argv[1])

我正在对一个文本文件“input.txt”进行编码，该项目将使用霍夫曼树对文件中的所有可打印符号（不包括“\n”）进行编码。然后它会打印出一个列表，显示文件“code.txt”中的所有符号及其代码。以及“encodemsg.txt”中的编码消息。它根据我的算法生成了结果。但是，我的程序生成的每个符号的平均位数比示例高一点。我认为问题可能出在 buildTree 函数中。但我不确定。你能帮我找出什么是关闭的并删除代码的最后几位吗？谢谢你。您可以通过 >>py hmencoder.py input.txt

尝试代码

解决方法

暂无找到可以解决该程序问题的有效方法，小编努力寻找整理中！

如果你已经找到好的解决方法，欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@）

huffman-code python