python爬取豆瓣250

import urllib.request
import ssl
import re
import xlwt
import dbutils
import xlrd
from xlutils.copy import copy
def getContent(ye):
    headers={
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36" ,
    'Connection': 'keep-alive'
    }
    url = "https://movie.douban.com/top250?start=%s&filter="%ye
    ##请求对象(url+请求头)
    req = urllib.request.Request(url,headers = headers)
    ##获取页面内容
    page = urllib.request.urlopen(req).read()
 
    page = page.decode("utf-8")
    return page
 
# print(getContent(ye))
 
def getItem(content):
    pattern = re.compile(r'alt="(.*?)"')
    res = re.findall(pattern,content)
    res.pop()
    return res;
#
# content = getContent(ye)
# print(getItem(content))
 
def saveExcel():
    wb = xlwt.Workbook()
    sheet = wb.add_sheet("豆瓣250")
    header = ["书名"]
    for (i,v) in enumerate(header):
        sheet.write(0,i,v)
    wb.save("豆瓣.xls")
# content = getContent()
# list = getItem(content)
# saveExcel(list)
def wb(list,x):
    # 打开工作薄
    wb = xlrd.open_workbook("豆瓣.xls")
    # 复制一份工作薄,用来写入
    copyWb = copy(wb)
    # 通过索引获取表
    sheet = copyWb.get_sheet(0)
    for (i, v) in enumerate(list):
        sheet.write(x,0, v)
        x +=1
    # 保存,如果文件名和之前一样,覆盖
    # 文件名不存在:新的文件
    copyWb.save("豆瓣.xls")
def ye():
    ye = 0
    x = 1
    saveExcel()
    while ye<250:
        content = getContent(ye)
        list = getItem(content)
        wb(list,x)
        for i in range(0,len(list)):
            sql = "insert into tb_use(name) values ('%s');"%list[i]
            dbutils.insertData(sql)
        ye +=25
        x +=25
    return "完成"
print(ye())

import pyMysqL.cursors
 
#获取连接
def getConnect():
    conn = pyMysqL.connect(host="", user="root", password="123", database="pyMysqL", charset="utf8")
    return conn
#关闭连接
def closeConnect(cursor,conn):
    if cursor:
        cursor.close()
    if conn:
        conn.close()
 
#插入数据
def insertData(sql):
    conn = getConnect()
 
    cursor = conn.cursor()
 
    cursor.execute(sql)
    conn.commit()
 
    closeConnect(cursor, conn)
    count = cursor.rowcount
    if count > 0:
        return True
    else:
        return False

相关文章

功能概要:(目前已实现功能)公共展示部分:1.网站首页展示...
大体上把Python中的数据类型分为如下几类: Number(数字) ...
开发之前第一步,就是构造整个的项目结构。这就好比作一幅画...
源码编译方式安装Apache首先下载Apache源码压缩包,地址为ht...
前面说完了此项目的创建及数据模型设计的过程。如果未看过,...
python中常用的写爬虫的库有urllib2、requests,对于大多数比...