问题描述
自从解析 CNN.RSS 站点后,我就遇到了这个问题。它只得到前 7 个条目,然后我遇到了这个问题。下面是我的日志 .. 请帮帮我 :(。谢谢
这是我的代码:
from win32com.client.dynamic import dispatch,ERRORS_BAD_CONTEXT,winerror
import ghostscript,locale,time,fnmatch,shutil,os
from datetime import datetime,timedelta
def convert_to_1_3(old_pdf_file,PS_filename):
ERRORS_BAD_CONTEXT.append(winerror.E_NOTIMPL)
PDSaveFull = 0x01
PDSaveLinearized = 0x04
PDDocOldVersion = 0x0080
PDDocNeedsSave = 0x0001
SAVEFLAG = PDDocNeedsSave|PDSaveLinearized
try:
src =os.path.abspath(old_pdf_file)
avDoc = dispatch('AcroExch.AVDoc')
if avDoc.Open(src," "):
pdDoc = avDoc.GetPDDoc()
pdDoc.Save(SAVEFLAG,PS_filename)
except Exception as e:
print(str(e))
finally:
avDoc.Close(-1)
def ps2pdf(ps_input_path,pdf_output_path):
args1 = ["ps2pdf",# actual value doesn't matter
"-sBATCH","-snopAUSE","-sSAFER","-sDEVICE=pdfwrite","-dCompatibilityLevel=1.3","-sOutputFile=" + pdf_output_path,ps_input_path]
encoding = locale.getpreferredencoding()
args1 = [a.encode(encoding) for a in args1]
ghostscript.Ghostscript(*args1)
ghostscript.cleanup()
def read_config():
with open('/PDF_Flattening/tech_code.config','r') as lookupfile:
for line in lookupfile:
tech= [word[1:-1] for word in line.split(",")]
return tech
def main(input_file):
try:
tom_date=(datetime.today()+timedelta(days=1)).strftime("%d%m%Y")
today_date=(datetime.Now()).strftime("%d%m%Y")
for dirpath,dirnames,filenames in os.walk(inputpath):
structure = os.path.join('c:\\',dirpath[len(inputpath):])
if not os.path.isdir(structure):
os.mkdir(structure)
for file in filenames:
if not os.path.isfile(os.path.join(structure,file)):
for techcode in read_config():
pattterns=(tom_date+'_'+techcode+'*',today_date+'_'+techcode+'*')
if [pat for pat in pattterns if fnmatch.fnmatch(file,pat)]:
if not file.endswith('qxd.pdf'):
PS_filename='ps'.join(str(file).split('pdf'))
PDF_filename='pdf'.join(str(file).split('ps'))
old_pdf_file= os.path.join(dirpath,file)
convert_to_1_3(old_pdf_file,os.path.join(dirpath,PS_filename))
ps2pdf(os.path.join(dirpath,PS_filename),os.path.join(structure,PDF_filename))
os.remove(os.path.join(dirpath,PS_filename))
else:
shutil.copy2(os.path.join(dirpath,file),file))
except Exception as e:
print(str(e))
if __name__=="__main__":
inputpath = "/121rawfile"
main(inputpath)
我的日志: https://pastebin.com/vMJSXD0J
解决方法
要调试它,您应该首先检查 published
是否是 keys
的 news
的一部分。
>>> news.keys()
dict_keys(['title','title_detail','summary','summary_detail','links','link','id','guidislink','published','published_parsed','media_content'])
根据此 published
是 keys
的 news
的一部分,因此您的代码应该可以正常工作。
>>> import feedparser
>>> url = "http://rss.cnn.com/rss/edition.rss"
>>> feed = feedparser.parse(url)
>>> for news in feed.entries:
print(news.published)
Thu,11 Mar 2021 04:53:36 GMT
Thu,11 Mar 2021 03:21:32 GMT
Wed,10 Mar 2021 12:54:12 GMT
Thu,11 Mar 2021 05:13:03 GMT
Wed,10 Mar 2021 23:46:07 GMT
Wed,10 Mar 2021 17:56:03 GMT
Thu,11 Mar 2021 05:50:56 GMT
Thu,11 Mar 2021 00:37:19 GMT
Thu,11 Mar 2021 04:44:57 GMT
Wed,10 Mar 2021 03:46:09 GMT
Wed,10 Mar 2021 13:24:02 GMT
Thu,11 Mar 2021 05:37:44 GMT
Thu,11 Mar 2021 01:48:41 GMT
Wed,10 Mar 2021 17:13:52 GMT
Thu,11 Mar 2021 03:43:19 GMT
Thu,11 Mar 2021 05:11:13 GMT
'
'
etc
PS - 这是在 Python 3.9 上实现的