问题描述
我正在尝试一种算法,以O(N)时间对字符串列表进行排序,我认为我的方法有点类似于Radix Sort。但是,我没有尝试获取排序列表,而是试图像这样获取“排序索引”:
输入:[“ AT#”,“ AC#”,“ A#”]
输出:[2,1,0]
但是,我的算法输出[1,1,0]好像它认为“ AT#”和“ AC#”相同
这是我尝试过的代码
def two_key_count_sort(L,longest_word,sigma = 6):
'''
Sorting a two key value list L based on counting sort
:param L: list containing triplets (p,s,v) (primary key,secondary key and value)
:param longest_word: length of the longest word in the sorting
:param sigma : Alphabet Size,default is 6 indicating #,A,C,N,G,T
:return: a sorted list L'
'''
# Generate Count^p and B^p
count_p = [0 for i in range(longest_word)]
projection_dic = {"#":0,"A":1,"C":2,"N":3,"G":4,"T":5}
primary_string = True
for i in L:
primary_key = i[0]
if isinstance(primary_key,str):
count_p[projection_dic[primary_key]] += 1
else:
count_p[primary_key] += 1
primary_string = False
b_p = [0 for i in range(longest_word)]
for i in range(1,len(count_p)):
b_p[i] = b_p[i - 1] + count_p[i - 1]
# Sorting L by secondary key
# Generate Count^s and B^s
count_s = [0 for i in range(sigma)]
seconday_string = True
for i in L:
secondary_key = i[1]
if isinstance(secondary_key,str):
count_s[projection_dic[secondary_key]] += 1
else:
count_s[secondary_key] += 1
seconday_string = False
b_s = [0 for i in range(sigma)]
for i in range(1,len(count_s)):
b_s[i] = b_s[i - 1] + count_s[i - 1]
#Calculate L_s -> L sorted by secondary key
L_s = [(None,None,None) for i in range(len(L))]
for i in range(len(L)):
if seconday_string:
L_s[b_s[projection_dic[L[i][1]]]] = L[i]
b_s[projection_dic[L[i][1]]] += 1
else:
L_s[b_s[L[i][1]]] = L[i]
b_s[L[i][1]] += 1
# Compute L' Using L_s and B^p
L_prime = [(None,None) for i in range(len(L_s))]
for k in range(len(L_s)):
if not primary_string:
L_prime[b_p[L_s[k][0]]] = L_s[k]
b_p[L_s[k][0]] += 1
else:
L_prime[b_p[projection_dic[L_s[k][0]]]] = L_s[k]
b_p[projection_dic[L_s[k][0]]] += 1
return L_prime
def sort_variable_length_string(S,sigma = 6):
'''
Sorting variable length strings in L in linear time
:param S: list of strings to be sroted
:param sigma: alphabet size of L,T
:return: sorted strings
'''
# get the largest length in string L
longest_word = max([len(i) for i in S])
n = len(S)
N = sum([len(i) for i in S])
# Initialization (p = 1)
# Generate a block pointer array B[1 .. n]
count = [0 for i in range(sigma)]
projection_dic = {"#": 0,"A": 1,"C": 2,"N": 3,"G": 4,"T": 5}
for i in range(n):
count[projection_dic[S[i][0]]] += 1
B = [0 for i in range(n)]
for i in range(n):
index = projection_dic[S[i][0]]
for j in range(index):
B[i] += count[j]
# Generate list L and sort it in linear time
L = [(None,None) for i in range(N)]
L_index = 0
for i in range(len(S)):
for j in range(len(S[i])):
L[L_index] = (j,S[i][j],i)
L_index += 1
L_prime = two_key_count_sort(L,longest_word)
L_i = [[] for i in range(longest_word)]
L_i_index = 0
for i in L_prime:
if i[0] == L_i_index:
L_i[L_i_index].append(i)
else:
L_i_index += 1
L_i[L_i_index].append(i)
# Update/Refinement (p > 1)
# Initialize Q in O(|L^p|) time using two scans over L^p
for p in range(1,longest_word):
T_k = [0 for i in range(len(L_i[p]))]
for k in range(len(L_i[p])):
T_k[k] = B[L_i[p][k][2]]
# Initialize Q and C
Q = [None for i in range(n)]
C = [None for i in range(n)]
for i in T_k:
Q[i] = -1
for k in range(len(L_i[p])):
if Q[T_k[k]] == -1:
Q[T_k[k]] = T_k[k]
C[T_k[k]] = L_i[p][k][1]
# Update
S = [0 for i in range(n)]
for k in range(len(L_i[p])):
if C[T_k[k]] == L_i[p][k][1]:
B[L_i[p][k][2]] = Q[T_k[k]]
S[T_k[k]] += 1
else:
Q[T_k[k]] += S[T_k[k]]
S[T_k[k]] = 0
C[T_k[k]] = L_i[p][k][1]
B[L_i[p][k][2]] = Q[T_k[k]]
return B
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)