AttributeError: 解析 CNN 源时对象没有属性“已发布”

问题描述

自从解析 CNN.RSS 站点后，我就遇到了这个问题。它只得到前 7 个条目，然后我遇到了这个问题。下面是我的日志 .. 请帮帮我 :(。谢谢

这是我的代码：

from win32com.client.dynamic import dispatch,ERRORS_BAD_CONTEXT,winerror
import ghostscript,locale,time,fnmatch,shutil,os
from datetime import datetime,timedelta
def convert_to_1_3(old_pdf_file,PS_filename):
    ERRORS_BAD_CONTEXT.append(winerror.E_NOTIMPL)
    PDSaveFull = 0x01
    PDSaveLinearized = 0x04
    PDDocOldVersion = 0x0080
    PDDocNeedsSave = 0x0001
    SAVEFLAG = PDDocNeedsSave|PDSaveLinearized
    try:
        src =os.path.abspath(old_pdf_file) 
        avDoc = dispatch('AcroExch.AVDoc')
        if avDoc.Open(src," "):
            pdDoc = avDoc.GetPDDoc()
            pdDoc.Save(SAVEFLAG,PS_filename)
    except Exception as e:
        print(str(e))
    finally:
        avDoc.Close(-1)
       
def ps2pdf(ps_input_path,pdf_output_path):
    args1 = ["ps2pdf",# actual value doesn't matter
            "-sBATCH","-snopAUSE","-sSAFER","-sDEVICE=pdfwrite","-dCompatibilityLevel=1.3","-sOutputFile=" + pdf_output_path,ps_input_path]
    encoding = locale.getpreferredencoding()
    args1 = [a.encode(encoding) for a in args1]
    ghostscript.Ghostscript(*args1)
    ghostscript.cleanup()

def read_config():
    with open('/PDF_Flattening/tech_code.config','r') as lookupfile:
        for line in lookupfile:
            tech= [word[1:-1] for word in line.split(",")]
            return tech

def main(input_file):
    try:
        tom_date=(datetime.today()+timedelta(days=1)).strftime("%d%m%Y")
        today_date=(datetime.Now()).strftime("%d%m%Y")
        for dirpath,dirnames,filenames in os.walk(inputpath):
            structure = os.path.join('c:\\',dirpath[len(inputpath):])
            if not os.path.isdir(structure):
                os.mkdir(structure)
            for file in filenames:
                if not os.path.isfile(os.path.join(structure,file)):
                    for techcode in read_config():
                        pattterns=(tom_date+'_'+techcode+'*',today_date+'_'+techcode+'*')
                        if [pat for pat in pattterns if fnmatch.fnmatch(file,pat)]:
                            if not file.endswith('qxd.pdf'):                                                    
                                PS_filename='ps'.join(str(file).split('pdf'))
                                PDF_filename='pdf'.join(str(file).split('ps'))
                                old_pdf_file= os.path.join(dirpath,file)              
                                convert_to_1_3(old_pdf_file,os.path.join(dirpath,PS_filename))
                                ps2pdf(os.path.join(dirpath,PS_filename),os.path.join(structure,PDF_filename))
                                os.remove(os.path.join(dirpath,PS_filename))
                    else: 
                        shutil.copy2(os.path.join(dirpath,file),file))                        
    except Exception as e:
        print(str(e))

if __name__=="__main__":
    inputpath = "/121rawfile"
    main(inputpath)

Log

我的日志： https://pastebin.com/vMJSXD0J

解决方法

要调试它，您应该首先检查 published 是否是 keys 的 news 的一部分。

>>> news.keys()
dict_keys(['title','title_detail','summary','summary_detail','links','link','id','guidislink','published','published_parsed','media_content'])

根据此 published 是 keys 的 news 的一部分，因此您的代码应该可以正常工作。

>>> import feedparser
>>> url = "http://rss.cnn.com/rss/edition.rss"
>>> feed = feedparser.parse(url)
>>> for news in feed.entries:
    print(news.published)

    
Thu,11 Mar 2021 04:53:36 GMT
Thu,11 Mar 2021 03:21:32 GMT
Wed,10 Mar 2021 12:54:12 GMT
Thu,11 Mar 2021 05:13:03 GMT
Wed,10 Mar 2021 23:46:07 GMT
Wed,10 Mar 2021 17:56:03 GMT
Thu,11 Mar 2021 05:50:56 GMT
Thu,11 Mar 2021 00:37:19 GMT
Thu,11 Mar 2021 04:44:57 GMT
Wed,10 Mar 2021 03:46:09 GMT
Wed,10 Mar 2021 13:24:02 GMT
Thu,11 Mar 2021 05:37:44 GMT
Thu,11 Mar 2021 01:48:41 GMT
Wed,10 Mar 2021 17:13:52 GMT
Thu,11 Mar 2021 03:43:19 GMT
Thu,11 Mar 2021 05:11:13 GMT
'
'
etc

PS - 这是在 Python 3.9 上实现的

feedparser python-3.x