问题描述
我想快速(例如
每次搜索的输入:
- 二维空间中的一组固定点(图像中的黑点)。这在搜索中保持不变,因此可以存储在有效的数据结构中。 1 到 1000 之间的点数。
- 每次搜索的位置都不同的点(图像中的红点)。抽象地说,这将 2D 空间划分为 4 个象限(由图像中的红线分隔),就像笛卡尔坐标中的原点一样。
- 来自每个红色象限中最接近红点的黑点(在图像中用蓝色圈出)。
输出通常应该是 4 个点,每个象限一个。但也有一些可能的边缘情况:
我知道行不通的事情:
编辑:接受答案的代码版本和时间
def trial1(quadneighbori,printout = False,seedn = None,Npts = 1000,Niter = 1000):
if seedn != None: np.random.seed(seedn) # random seed
# Generate Npts (x,y) coordinates where x and y are standard normal
dataset = np.random.randn(Npts,2)
for n in range(Niter):
# Generate random pixel (x,y) coordinates where x and y are standard normal
red = np.random.randn(1,2)
dst,i = quadneighbori(dataset,red)
if printout: print(dst,i)
def quadneighbor1(dataset,red):
dst = np.zeros(4)
closest = np.zeros((4,2))
# Work out a Boolean mask for the 4 quadrants
right_exclu = dataset[:,0] > red[0,0]
top_exclu = dataset[:,1] > red[0,1]
Q1 = np.logical_and( top_exclu,right_exclu)
Q2 = np.logical_and(~top_exclu,right_exclu)
Q3 = np.logical_and(~top_exclu,~right_exclu)
Q4 = np.logical_and( top_exclu,~right_exclu)
Qs = [Q1,Q2,Q3,Q4]
for Qi in range(4):
# Boolean mask to select points in this quadrant
thisquad = dataset[Qs[Qi]]
if len(thisquad)==0: continue # if no points,move on to next quadrant
# Calculate distance of each point in dataset to red point
distances = cdist(thisquad,red,'sqeuclidean')
# Choose nearest
idx = np.argmin(distances)
dst[Qi] = distances[idx]
closest[Qi] = thisquad[idx]
return dst,closest
# numba turns 1.53s trial1 to 4.12ms trial1
@nb.jit(nopython=True,fastmath=True)
def quadneighbor2(dataset,red):
redX,redY = red[0,0],red[0,1]
# distance to and index of nearest point in each quadrant
dst = np.zeros(4) + 1.0e308 # Start large! Update with something smaller later
idx = np.zeros(4,dtype=np.uint32)
for i in nb.prange(dataset.shape[0]):
# Get this point's x,y
x,y = dataset[i,dataset[i,1]
# Get quadrant of this point (minus 1)
if x>redX:
if y>redY:
Qi = 0
else:
Qi = 1
else:
if y>redY:
Qi = 3
else:
Qi = 2
# Get distance (squared) of this point - square root is slow and unnecessary
d = (x-redX)*(x-redX) + (y-redY)*(y-redY)
# Update if nearest
if d<dst[Qi]:
dst[Qi] = d
idx[Qi] = i
return dst,idx
%timeit trial1(quadneighbor1)
111 ms ± 3.21 ms per loop (mean ± std. dev. of 7 runs,10 loops each)
%timeit trial1(quadneighbor2)
4.12 ms ± 79.6 µs per loop (mean ± std. dev. of 7 runs,100 loops each)
PS:cdist
与 numba
不兼容,但调整该行以删除 cdist
并将 @nb.jit
添加到 quadneighbor1
可加快 trial1(quadneighbor1)
从 111 毫秒到 30.1 毫秒。
解决方法
更新答案
我已将原始答案修改为在 numba
下运行。现在它可以在 3 微秒内处理 1,000 个点的数据集 - 与最初的 1 毫秒左右相比,速度要快得多。
#!/usr/bin/env python3
import random,sys
import numpy as np
import numba as nb
@nb.jit(nopython=True,fastmath=True)
def QuadNearestNeighbour(dataset,redX,redY):
# Distance to and index of nearest point in each quadrant
dst = np.zeros(4) + 1.0e308 # Start large! Update with something smaller later
idx = np.zeros(4,dtype=np.uint32)
for i in nb.prange(dataset.shape[0]):
# Get this point's x,y
x,y = dataset[i,0],dataset[i,1]
# Get quadrant of this point (minus 1)
if x>redX:
if y>redY:
Q = 0
else:
Q = 1
else:
if y>redY:
Q = 3
else:
Q = 2
# Get distance (squared) of this point - square root is slow and unnecessary
d = (x-redX)*(x-redX) + (y-redY)*(y-redY)
# Update if nearest
if d<dst[Q]:
dst[Q] = d
idx[Q] = i
return dst,idx
# Number of points
Npts = 1000
# Generate the Npts random X,Y points
dataset = np.random.rand(Npts,2)
# Generate random red pixel (X,Y)
redX,redY = random.random(),random.random()
res = QuadNearestNeighbour(dataset,redY)
print(res)
原答案
对于 1,000 个点,这在 1 毫秒内运行 - 为红点在 1,000 多个不同位置计时。
这将生成一个包含 1,000 个点的单个数据集,然后生成 1,000 个红色中心并对每个中心进行 4 个象限。它在我的 MacBook 上运行不到一秒,所以每个红色中心为 1 毫秒。
我相信可以通过去掉无关的 print
语句来进一步优化它,而不是计算出仅用于说明的东西,也许使用 numba
但这足以一天。
我没有添加大量错误检查或边缘情况,这不是生产质量代码。
#!/usr/bin/env python3
import random
import numpy as np
from scipy.spatial.distance import cdist
# Let's get some repeatable randomness
np.random.seed(42)
# Number of points,number of iterations
Npts,Niter = 1000,1000
# Generate the Npts random X,2)
# Run lots of iterations for better timing
for n in range(Niter):
# Generate random red pixel (X,Y)
red = np.random.rand(1,2)
# Work out a Boolean mask for the 4 quadrants
above = dataset[:,0]>red[0,0]
right = dataset[:,1]>red[0,1]
Q1 = np.logical_and(above,right) # top-right
Q2 = np.logical_and(above,~right) # top-left
Q3 = np.logical_and(~above,~right) # bottom-left
Q4 = np.logical_and(~above,right) # bottom-right
Q = 1
for quadrant in [Q1,Q2,Q3,Q4]:
print(f'Quadrant {Q}: ')
# Boolean mask to select points in this quadrant
thisQuad = dataset[quadrant]
l = len(thisQuad)
if l==0:
print('No points in quadrant')
continue
# print(f'nPoints in quadrant: {l}')
# print(f'Points: {dataset[quadrant]}')
# Calculate distance of each point in dataset to red point
distances = cdist(thisQuad,red,'sqeuclidean')
# Choose nearest
idx = np.argmin(distances)
print(f'Index: {idx},point: {thisQuad[idx]},distance: {distances[idx]}')
Q += 1