刚接触用Python解析数据,发现十分的方便
先来了解一下json.loads()、json.dumps()和json.dump()、json.load()四个函数
json.loads()、json.dumps()和json.dump()、json.load()分别是两组不一样用法
带s的用于数据类型的转换,不带s的用于操做文件。
json.loads()、json.dumps()概念理解
json自己是字符串,经过json.loads()、json.dumps()能够进行字典和字符串的转换。
由于浏览器不支持字典方式显示,若是请求过来的类型是字典,必须经过json.dumps()函数将字典转换为字符串以后,才可展现。
json.dump()和json.load()概念理解
1.通过接口解析json数据提取各个城市的实时疫情数据,没有在脚本里进行定时,不知道该怎么写定时任务(查百度也弄不明白),就在linux系统设置了,一天执行一次脚本
# -*- coding: utf-8 -*- import requests from bs4 import BeautifulSoup import json import time from pyMysqL import * def mes(): url = 'https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0' #请求地址 headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36 SLbrowser/6.0.1.6181'}#创建头部信息 resp = requests.get(url,headers = headers) #发送网络请求 content=resp.content.decode('utf-8') print("content"+content) soup = BeautifulSoup(content, 'html.parser') print(soup) listA = soup.find_all(name='script',attrs={"id":"getAreaStat"}) account =str(listA) mes = account.replace('[<script id="getAreaStat">try { window.getAreaStat = ', '') mes=mes.replace('}catch(e){}</script>]','') #mes=account[52:-21] #json.loads 用于解码 JSON 数据。该函数返回 Python 字段的数据类型。 messages_json = json.loads(mes) print("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") print(messages_json) times=time.strftime('%Y-%m-%d %H:%M:%s', time.localtime(time.time())) print(times) provinceList=[] cityList=[] lenth=total() total() sheng = len(messages_json) con=sheng+(lenth)#算出数据库已有的条数+今天省份的条数,才是城市的开始id print(sheng) print(len(messages_json)) print(con) for item in messages_json: lenth+=1 provinceName=item['provinceName'] NowCount = item['currentConfirmedCount'] confirmedCount=item['confirmedCount'] SUSPECTedCount=item['SUSPECTedCount'] curedCount=item['curedCount'] deadCount=item['deadCount'] cities=item['cities'] provinceList.append((lenth,times,provinceName,None,NowCount,confirmedCount,SUSPECTedCount,curedCount,deadCount)) for i in cities: con+=1 provinceName = item['provinceName'] cityName=i['cityName'] NowCount = i['currentConfirmedCount'] confirmedCount = i['confirmedCount'] SUSPECTedCount = item['SUSPECTedCount'] curedCount = i['curedCount'] deadCount = i['deadCount'] cityList.append((con,times,provinceName,cityName,NowCount,confirmedCount,SUSPECTedCount,curedCount,deadCount)) insert(provinceList,cityList) return NULL def insert(provinceList, cityList): provinceTuple=tuple(provinceList) cityTuple=tuple(cityList) cursor = db.cursor() sql = "insert into China values (%s,%s,%s,%s,%s,%s,%s,%s,%s) " try: cursor.executemany(sql,provinceTuple) print("插入成功") db.commit() except Exception as e: print(e) db.rollback() try: cursor.executemany(sql,cityTuple) print("插入成功") db.commit() except Exception as e: print(e) db.rollback() cursor.close() def total(): sql= "select * from China" cursor = db.cursor() try: cursor.execute(sql) results = cursor.fetchall() lenth = len(results) db.commit() return lenth except: print('执行失败,进入回调1') db.rollback() # 连接数据库的方法 def connectDB(): try: db = connect(host='localhost', port=3306, user='root', password='123456', db='db2',charset='utf8') print("数据库连接成功") return db except Exception as e: print(e) return NULL if __name__ == '__main__': db=connectDB() mes()
sql:
CREATE TABLE `china` ( `id` int(11) NOT NULL, `time` datetime NULL DEFAULT NULL, `provinceName` varchar(255) CHaraCTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '省,直辖市', `cityName` varchar(255) CHaraCTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '市', `NowCount` varchar(255) CHaraCTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '现存确诊', `confirmedCount` varchar(255) CHaraCTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '总共确诊', `SUSPECTedCount` varchar(255) CHaraCTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '疑似', `curedCount` varchar(255) CHaraCTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '治愈', `deadCount` varchar(255) CHaraCTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '死亡', PRIMARY KEY (`id`) USING BTREE ) ENGINE = InnoDB CHaraCTER SET = utf8mb4 COLLATE = utf8mb4_bin ROW_FORMAT = Dynamic; SET FOREIGN_KEY_CHECKS = 1;
2.是一个本地的json文件,解析,然后数据入库。和上面其实差不多
# coding:utf8 import requests from bs4 import BeautifulSoup import json import time from pyMysqL import * def mes(): with open(r"D:\alldata.json", 'r', encoding='UTF-8') as load_f: load_dict = json.load(load_f) print(load_dict) load_dict = load_dict['stations'] # 拆第一层花括号 print(len(load_dict)) con = len(load_dict) print(load_dict[0]['id']) # data_raw = pd.DataFrame(columns=load_dict.keys()) # data_raw = data_raw.append(load_dict, ignore_index=True) provinceList = [] lenth = select() print("aaaaaaaaa") print(select()) for item in load_dict: print(lenth) lenth = lenth + 1 print("bbbbbbbbbbbbbbbbbbbbbb") print(lenth) id = item['id'] type = item['type'] name = item['name'] dev_id = item['dev_id'] no = item['no'] partner = item['partner'] enabled = item['enabled'] agent = item['agent'] total = item['total'] used = item['used'] free = item['free'] root = item['root'] # agent_list = item['agent_list'] # agent_list_public = ','.join(agent_list) # print(agent_list_public) parent = item['parent'] owner = item['owner'] longitude = item['longitude'] latitude = item['latitude'] address = item['address'] region = item['region'] vendor = item['vendor'] country = item['country'] province = item['province'] district = item['district'] zipcoce = item['zipcoce'] style = item['style'] rate = item['rate'] rate_group = item['rate_group'] package = item['package'] lot_rate_group = item['lot_rate_group'] contact_phone = item['contact_phone'] contact_name = item['contact_name'] memo = item['memo'] stat_total = item['stat']['total'] stat_busy = item['stat']['busy'] stat_idle = item['stat']['idle'] lot_rate = item['lot_rate'] tags = item['tags'] price = item['price'] #数组转字符串 tags_public = ','.join(tags) print(tags_public) if not lot_rate: print("Empty") else: print("Not Empty") print("========================================================================================================") if lot_rate==None: print("没有内容") print(lot_rate) provinceList.append( (id, lenth, type, name, dev_id, no, partner, enabled, agent, total, used, free, root, None, parent, owner, longitude, latitude, address, region, vendor, country, province, district, zipcoce, style, rate, rate_group, package, lot_rate_group, contact_phone, contact_name, memo, stat_total, stat_busy, stat_idle, None, None, None, None, None, None, None, None, None, None, None, None, None, tags_public, price)) else: print("有内容") print(lot_rate) lot_rate_id = lot_rate['id'] lot_rate_name = lot_rate['name'] lot_rate_agent = lot_rate['agent'] lot_rate_unit = lot_rate['unit'] lot_rate_free_time = lot_rate['free_time'] lot_rate_cycle = lot_rate['cycle'] lot_rate_max_perday = lot_rate['max_perday'] lot_rate_max_permonth = lot_rate['max_permonth'] lot_rate_creator = lot_rate['creator'] lot_rate_created_time = lot_rate['created_time'] lot_rate_update_time = lot_rate['update_time'] lot_rate_memo = lot_rate['memo'] #lot_rate如果有内容 该字段置为1 provinceList.append( (id, lenth, type, name, dev_id, no, partner, enabled, agent, total, used, free, root, None, parent, owner, longitude, latitude, address, region, vendor, country, province, district, zipcoce, style, rate, rate_group, package, lot_rate_group, contact_phone, contact_name, memo, stat_total, stat_busy, stat_idle, 1, lot_rate_id, lot_rate_name, lot_rate_agent, lot_rate_unit, lot_rate_free_time, lot_rate_cycle, lot_rate_max_perday, lot_rate_max_permonth, lot_rate_creator,lot_rate_created_time, lot_rate_update_time, lot_rate_memo, tags_public, price)) print("相数") print(provinceList) insert(provinceList) return NULL def insert(provinceList): provinceTuple = tuple(provinceList) cursor = db.cursor() sql = "insert into park_db values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) " try: cursor.executemany(sql, provinceTuple) print("插入成功") db.commit() except Exception as e: print(e) db.rollback() cursor.close() def select(): sql = "select * from park_db" cursor = db.cursor() try: cursor.execute(sql) results = cursor.fetchall() lenth = len(results) db.commit() return lenth except: print('执行失败,进入回调1') db.rollback() def connectDB(): try: db = connect(host='localhost', port=3306, user='root', password='123456', db='db3', charset='utf8') print("数据库连接成功") return db except Exception as e: print(e) return NULL if __name__ == '__main__': db = connectDB() mes()
我也忘了当时是参考了哪篇博客了,反正作者看着,联系本人删除即可