问题描述
import requests
import pandas as pd
import json
import time
def search_endpoint_connect(bearer_token,query,st,et,next_token):
headers = {"Authorization": "Bearer {}".format(bearer_token)}
query_params = {
'query': query,'start_time': st,'end_time': et,'max_results': 100,'tweet.fields': 'id,text,author_id,created_at,geo,lang,public_metrics,in_reply_to_user_id,referenced_tweets','user.fields':'created_at,location,profile_image_url',}
if (next_token is not None):
url = "https://api.twitter.com/2/tweets/search/all?next_token={}".format(next_token)
else:
url = "https://api.twitter.com/2/tweets/search/all"
response = requests.request("GET",url,params=query_params,headers=headers)
if response.status_code != 200:
raise Exception(response.status_code,response.text)
return response.json()
def main(bearer_token,n,fn,sq,et):
rl_count = 0
count = 0
flag = True
first = True
while flag:
if rl_count==300:
time.sleep(600)
print('Rate limit cooldown 10 mins.')
if count >= n and n!=0:
break
if not first:
json_response = search_endpoint_connect(bearer_token,next_token)
else:
json_response = search_endpoint_connect(bearer_token,next_token=None)
result_count = json_response['Meta']['result_count']
if 'next_token' in json_response['Meta']:
next_token = json_response['Meta']['next_token']
if result_count is not None and result_count > 0 and next_token is not None:
print(json_response)
df = pd.json_normalize(json_response['data'])
df = df.reindex(columns=['id','text','public_metrics.retweet_count','public_metrics.favourite_count','created_at','user.id','lang','public_metrics.reply_count','public_metrics.like_count','location','in_reply_to_user_id','authorid.username','place','geo.place_id','geo.coordinates.type','geo.coordinates.coordinates','referenced_tweets','referenced_tweets.id'])
if not first:
df.to_csv('%s.csv'%fn,mode='a',encoding='utf-8',index=False,header=None)
else:
df.to_csv('%s.csv'%fn,index=False)
time.sleep(1)
count += result_count
print('Tweets downloaded: '+str(count))
else:
flag = False
rl_count += 1
first = False
#Enter your bearer token
bearer_token = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
#Set number of tweets to be downloaded. Enter 0 for no limits
no_of_tweets = 20
#Specify the name of the output csv file. Do not include .csv
file_name = 'downloaded_tweets6'
#Enter your search query. Refer to https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query
search_query = '(travel OR goa) lang:en point_radius:[-74.014746 40.730610 20mi]'
#point_radius:[23.8.050 -80.180374 16mi]
#Set the beginning date and time in YYYY-MM-DDTHH:MM:SSZ format
start_time = "2019-11-27T00:00:00Z"
#Set the ending date and time in YYYY-MM-DDTHH:MM:SSZ format
end_time = "2019-11-29T00:00:00Z"
#point_radius= "-41.287336,174.761070,20mi"
main(bearer_token,no_of_tweets,file_name,search_query,start_time,end_time)
我无法获取推文的用户信息(即用户名和用户位置)。相应的文档可以在以下链接中找到:“https://developer.twitter.com/en/docs/twitter-api/expansions”和“https://developer.twitter.com/en/docs/twitter-api” /tweets/search/api-reference/get-tweets-search-all" 请帮忙 提前致谢
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)