计算字符串的相似度VB2005——思索之二

在完成“计算字符串的相似度(VB2005)——思索之一”之后,照例对程序进行了一番测试。第一次找了两个相似的字符串,长度分别为1517。速度和结果都比较满意。这也印证了算法的正确性。第二次找了两个相似的字符串,长度分别为15001507。嗯,直接跳出错误,说是堆栈错误。实际上是由于递归嵌套出了问题。采用递归算法,只是理论上有效,便于理解,实际应用中会出现各种限制。如本例,嵌套约1000层的时候就超过了系统的限制。必须想一个解决之道。

仔细观察,可以发现用数学性的语言描述就是

Fnm=GFnm),Fn+1m),Fnm+1))

这个可以简化为递推,由于递推可以放在一个函数内,就解决了系统的递归限制。代码赋予其后。用的是VB2005

Public Class clsCalculateStringDistanceEx

Implements IDistance

Private mStringA As String

Private mStringB As String

Private mIsSame As Boolean

Private mDic As Dictionary(Of String,Integer)

Private Function CalculateStringDistance(ByVal StartLower As Integer) As Integer

Dim LA As Integer = mStringA.Length

Dim LB As Integer = mStringB.Length

Dim i As Integer,j As Integer

Dim T1 As Integer,T2 As Integer,T3 As Integer

AddToDic(LA + 1,LB + 1,0)

For i = LA To StartLower Step -1

AddToDic(i,LA - i + 1)

Next

For i = LB To StartLower Step -1

AddToDic(LA + 1,i,LB - i + 1)

Next

For i = LA To StartLower Step -1

For j = LB To StartLower Step -1

If mStringA.Chars(i - 1) = mStringB.Chars(j - 1) Then

AddToDic(i,j,GetFromDic(i + 1,j + 1))

Else

T1 = GetFromDic(i + 1,j)

T2 = GetFromDic(i,j + 1)

T3 = GetFromDic(i + 1,j + 1)

AddToDic(i,Min(T1,T2,T3) + 1)

End If

Next

Next

Return GetFromDic(StartLower,StartLower)

End Function

Private Sub AddToDic(ByVal S1 As Integer,ByVal S2 As Integer,ByVal Value As Integer)

mDic.Add(S1 & "," & S2,Value)

End Sub

Private Function GetFromDic(ByVal S1 As Integer,ByVal S2 As Integer) As Integer

Return mDic(S1 & "," & S2)

End Function

Private Function Min(ByVal ParamArray M() As Integer) As Integer

Dim i As Integer,J As Integer

J = M(0)

For i = 1 To M.GetUpperBound(0)

If M(i) < J Then J = M(i)

Next

Return J

End Function

Public Function CalculateStringDistance() As Integer _

Implements IDistance.CalculateStringDistance

If mStringA.Length = 0 Then Return mStringB.Length

If mStringB.Length = 0 Then Return mStringA.Length

mDic = New Dictionary(Of String,Integer)

mIsSame = True

Dim i As Integer,j As Integer

For i = 1 To Min(mStringA.Length,mStringB.Length)

If mStringA.Chars(i - 1) <> mStringB.Chars(i - 1) Then

mIsSame = False

j = i

Exit For

End If

Next

If mIsSame = False Then

Return CalculateStringDistance(j)

Else

Return Math.Abs(mStringA.Length - mStringB.Length)

End If

End Function

Public ReadOnly Property DicCount() As Integer _

Implements IDistance.DicCount

Get

Return mDic.Count

End Get

End Property

Public Sub SetString(ByVal S1 As String,ByVal S2 As String) _

Implements IDistance.SetString

mStringA = S1

mStringB = S2

End Sub

End Class

相关文章

Format[$] ( expr [ , fmt ] ) format 返回变体型 format$ 强...
VB6或者ASP 格式化时间为 MM/dd/yyyy 格式,竟然没有好的办...
在项目中添加如下代码:新建窗口来显示异常信息。 Namespace...
转了这一篇文章,原来一直想用C#做k3的插件开发,vb没有C#用...
Sub 分列() ‘以空格为分隔符,连续空格只算1个。对所选...
  窗体代码 1 Private Sub Text1_OLEDragDrop(Data As Dat...