问题描述
我创建了一个较短且伪造的数据集。我已将Location_1和Location_2分为两列,以产生总共四列。现在,我需要在其上使用geodesic
。我可以在进行测试运行时手动观察一次。但是我似乎无法使其适用于整个数据列,也无法为距离创建新列。
下面的代码将运行到最后一行引发错误并反映我对原始数据集的处理,该原始数据集我无法共享,并且是成千上万的观测值。粗线也引发了一个错误,但又是一个不同的错误。
places_data = pd.DataFrame(
{"Place_1": ["disneyland Park","Empire State Building","Yosemite Park","disney World Park","Rockefeller Tower","Grand Canyon"],"Places": ["Peaches","Apples","Peaches","Peaches"]}
)
other_places = places_data.copy()
other_places.loc[(other_places["Places"] == "Peaches"),"Sites"] = "Georgia Aquarium"
other_places.loc[(other_places["Places"] == "Apples"),"Sites"] = "World of Coca-Cola"
other_places["Loc_1"] = other_places["Place_1"].apply(geolocator.geocode).apply(lambda loc: tuple(loc.point) if loc else None)
other_places["Loc_2"] = other_places["Sites"].apply(geolocator.geocode).apply(lambda loc: tuple(loc.point) if loc else None)
places_data['Loc_1'] = places_data.Place_1.map(dict(other_places[['Place_1','Loc_1']].to_numpy()))
places_data['Loc_2'] = places_data.Places.map(dict(other_places[['Places','Loc_2']].to_numpy()))
places_data[['Lat_1','Long_1','Alt_1']] = pd.DataFrame(places_data['Loc_1'].tolist(),index = places_data.index)
places_data[['Lat_2','Long_2','Alt_2']] = pd.DataFrame(places_data['Loc_2'].tolist(),index = places_data.index)
#places_data["distance"] = geodesic(places_data["Loc_1"],places_data["Loc_2"]).miles
places_data["distance"] = geodesic(
(places_data["Lat_1"],places_data["Long_1"]),(places_data["Lat_2"],places_data["Long_2"])
).miles
geodesic(
(geolocator.geocode("disneyland Park").latitude,geolocator.geocode("disneyland Park").longitude),(geolocator.geocode("World of Coca-Cola").latitude,geolocator.geocode("disneyland Park").longitude)
)
返回:距离(5.629067391427556)
ValueError:系列的真值不明确。使用空 a.bool(),a.item(),a.any()或a.all()。
这是错误:
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-772-f5a592d7d527> in <module>()
22 places_data["distance"] = geodesic(
23 (places_data["Lat_1"],---> 24 (places_data["Lat_2"],places_data["Long_2"])
25 ).miles
7 frames /usr/local/lib/python3.6/dist-packages/geopy/distance.py in
__init__(self,*args,**kwargs)
387 kwargs.pop('iterations',0)
388 major,minor,f = self.ELLIPSOID
--> 389 super(geodesic,self).__init__(*args,**kwargs)
390
391 def set_ellipsoid(self,ellipsoid):
/usr/local/lib/python3.6/dist-packages/geopy/distance.py in
__init__(self,**kwargs)
162 elif len(args) > 1:
163 for a,b in util.pairwise(args):
--> 164 kilometers += self.measure(a,b)
165
166 kilometers += units.kilometers(**kwargs)
/usr/local/lib/python3.6/dist-packages/geopy/distance.py in measure(self,a,b)
408 # Call geographiclib routines for measure and destination
409 def measure(self,b):
--> 410 a,b = Point(a),Point(b)
411 lat1,lon1 = a.latitude,a.longitude
412 lat2,lon2 = b.latitude,b.longitude
/usr/local/lib/python3.6/dist-packages/geopy/point.py in __new__(cls,latitude,longitude,altitude)
169 )
170 else:
--> 171 return cls.from_sequence(seq)
172
173 if single_arg:
/usr/local/lib/python3.6/dist-packages/geopy/point.py in from_sequence(cls,seq)
408 raise ValueError('When creating a Point from sequence,it '
409 'must not have more than 3 items.')
--> 410 return cls(*args)
411
412 @classmethod
/usr/local/lib/python3.6/dist-packages/geopy/point.py in __new__(cls,altitude)
181
182 latitude,altitude = \
--> 183 _normalize_coordinates(latitude,altitude)
184
185 self = super(Point,cls).__new__(cls)
/usr/local/lib/python3.6/dist-packages/geopy/point.py in
_normalize_coordinates(latitude,altitude)
63
64 def _normalize_coordinates(latitude,altitude):
---> 65 latitude = float(latitude or 0.0)
66 longitude = float(longitude or 0.0)
67 altitude = float(altitude or 0.0)
/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in
__nonzero__(self) 1477 def __nonzero__(self): 1478 raise ValueError(
-> 1479 f"The truth value of a {type(self).__name__} is ambiguous. " 1480 "Use a.empty,a.bool(),a.item(),a.any() or a.all()." 1481 )
ValueError: The truth value of a Series is ambiguous. Use a.empty,a.any() or a.all().
解决方法
在列表理解zip
内的Loc_1
和Loc_2
列中,并为每对geodesic
和loc_1
计算loc_2
距离:
places_data['Distance'] = [geodesic(x,y).miles for x,y in zip(places_data['Loc_1'],places_data['Loc_2'])]
0 1920.542230
1 748.136742
2 1587.254446
3 406.942672
4 1918.193488
5 1575.644170
Name: Distance,dtype: float64