如何遍历两个数据帧中的数据并保留第一个数据帧的索引?

问题描述

我有两个数据框,分别带有“纬度”和“经度”列。我需要:

  • 获取df1中纬度/经度对1与df2中所有纬度/经度对之间的所有距离,
  • 返回这些距离的最小距离。将结果附加到字典中,
  • 转到 df1 中的纬度/经度对 2
  • 重复

这是我走了多远

def distance(df1,df2 = School_usable):
    # create `result` dict to store index of row as key,and minimum distance as value
    result = {}
    values = ()  
    
    # get df1 lat/lon pair 1 and df2 lat/lon pair 1
    df1 = df1.sort_values(by = "code_postal",ascending = True)
    df2 = df2.sort_values(by = "code_postal",ascending = True)
    
    lat1 = np.array(df1["latitude"])
    lat2 = np.array(df2["latitude"])
    lon1 = np.array(df1["longitude"])
    lon2 = np.array(df2["longitude"])
    
    # math to calculate distance between two lat/lon pairs
    x = 0
    for index,row in df1.iterrows():
 
        p = pi/180
        a = 0.5 - cos((lat2-lat1[x])*p)/2 + cos(lat1[x]*p) * cos(lat2*p) * (1-cos((lon2-lon1[x])*p))/2   
        distance = np.array(round(12742 * asin(sqrt(a)),2))

        # Here,I wish to store distance into value. The "idea" is to get all
        # distances between df1 lat/lon pair1 and df2 lat/lon pair 1 --> *n* .
        # Then,get the minimum of these distances and update `result`,such 
        # that the index of df1 is the key and minimum distance is the value.
        values.append(distance)
        for val in values:  
            min_value = val.min()
        result.update({index: min_value}) 
        x +=1 

    return result

distance()

解决方法

#解决并得到这个

from math import cos,asin,sqrt,pi

def distance(l1,L1,l2,L2):
    p = pi/180
    a = 0.5 - cos((L2-L1)*p)/2 + cos(L1*p) * cos(L2*p) * (1-cos((l2-l1)*p))/2   
    return round(12742 * asin(sqrt(a)),2)

def minDistance(df1,df2):
    df1 = df1.sort_values(by = "code_postal",ascending = True)
    df2 = df2.sort_values(by = "code_postal",ascending = True)

    zc1 = np.array(df1["code_postal"])
    zc2 = np.array(df2["code_postal"])
    
    lat1 = np.array(df1["latitude"])
    lat2 = np.array(df2["latitude"])
    lon1 = np.array(df1["longitude"])
    lon2 = np.array(df2["longitude"])
    
    minDistances = []
    for (z1,l1,L1) in zip(zc1,lon1,lat1):
        minDist = 1e308
        minIndex = -1
        index = np.where(zc2 == z1)[0]
        if len(index) : index = index[0]
            
        else: index = len(zc2)
        while index < len(zc2) and zc2[index] == z1:
            l2,L2 = lon2[index],lat2[index]
            d = distance(l1,L2)
            if d < minDist:
                minDist = d
                minIndex = index
            index += 1
        
        minDistances.append(minDist)
    return minDistances