问题描述
我正在尝试从头开始制作Naive Bayes分类器,以便在Python中过滤垃圾邮件。当我尝试训练课程时,我不断收到此错误:
Traceback (most recent call last):
File "/Users/francescacape/Desktop/testnb.py",line 97,in <module>
nb.train(traindata)
File "/Users/francescacape/Desktop/testnb.py",line 36,in train
self._hamdocs += 1
AttributeError: 'NLPNaiveBayes' object has no attribute '_hamdocs'
我是编程的新手,这是我建立的第一个类。我不知道为什么会这样,因为属性是在构造函数下定义的。抱歉,我的代码有点长。
from collections import defaultdict
import math
from nltk.tokenize import word_tokenize
import glob
class NLPNaiveBayes:
def _init__(self):
self._spamdocs = 0
self._hamdocs = 0
self.totaldocs = self._spamdocs + self._hamdocs
self.spamwordcount = defaultdict(int)
self.hamwordcount = defaultdict(int)
self.spamwords = {}
self.hamwords = {}
self.totalwords = set()
self.totalspamwords = []
self.totalhamwords = []
self.priorlogham = math.log(self._hamdocs / self.totaldocs)
self.priorlogspam = math.log(self._spamdocs / self.totaldocs)
@staticmethod
def preprocessing(message):
toks = list(word_tokenize(message))
words = [word.islower() for word in toks if word.isalnum()]
return set(words)
def train(self,data):
for message,cat in data:
if is_spam:
self._spamdocs += 1
self.totalspamwords.append(message)
else:
self._hamdocs += 1
self.totalhamwords.append(message)
words = self.preprocessing(message)
for word in words:
if is_spam:
self.totalwords.update(word)
self.spamwords.update(word)
self.spamwordcount[word] += 1
else:
self.totalwords.update(word)
self.hamwords.update(word)
self.hamwordcount[word] += 1
for word,count in self.totalspamwords.item():
self.spamwords[word] = math.log((int(count) + 1)) / (self._spamdocs + self.totaldocs)
for word,count in self.totalhamwords.item():
self.hamwords[word] = math.log((int(count) + 1)) / (self._hamdocs + self.totaldocs)
nb = NLPNaiveBayes()
nb.train(traindata)
解决方法
如果这是您代码的精确副本,那么我认为您的init方法名称不正确-应该__init__
-前后两个下划线。您似乎只有一个下划线(_init__
之后)。
由于名称错误,这意味着在构造实例时将不会执行该操作。