问题描述
我正在尝试找到最接近特定纬度和经度的站点。
# import requests
import json
# import matplotlib.pyplot as plt
# import seaborn as sns
# from pandasdmx import Request
# from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import pandas as pd
import numpy as np
import geopandas as gpd
import geopy.distance
import time
import csv
# import sys,os
# from sqlalchemy import create_engine
# import pymssql
# import sqlalchemy as db
# from functools import reduce
# import folium # map rendering library
# from sklearn.neighbors import NearestNeighbors
property_data_set = pd.read_csv('prop_data_set.csv')
shape_file_trains = "./shapefile/ptv_metro_train_station.shp"
trains_shape = gpd.read_file(shape_file_trains)
mwarp=property_data_set
# cartesian product so we get all combinations
dfdist = (mwarp.assign(foo=1).merge(trains_shape.assign(foo=1),on="foo")
# calc distance in km between each suburb and each train station
.assign(km=lambda dfa: dfa.apply(lambda r:
geopy.distance.geodesic(
(r["LATITUDE"],r["LONGITUDE"]),(r["lat"],r["lon"])).km,axis=1))
# reduce number of columns to make it more digestable
.loc[:,["postcode","address_street_full","STOP_ID","STOP_NAME","km"]]
# sort so shortest distance station from a suburb is first
.sort_values(["postcode","suburb","km"])
# good practice
.reset_index(drop=True)
)
# finally pick out stations nearest to suburb
# this can easily be joined back to source data frames as postcode and STOP_ID have been maintained
dfnearest = dfdist.groupby(["postcode","suburb"])\
.agg({"STOP_ID":"first","STOP_NAME":"first","km":"first"}).reset_index()
# print(dfnearest.to_string(index=False))
dfnearest.to_csv("distances_station")
print(dfnearest)
这是我当前正在使用的代码。我的计算机正在努力进行此分析,因为有11,000行数据。我想批量处理它,但不确定100%采取哪种最佳方法。任何人都做过类似的事情并且了解最佳途径吗?谢谢
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)