用各种分隔符将地址拆分为街道地址,城市,州,邮政编码和国家/地区

问题描述

我的数据在Excel中。我有几张数据,其中地址始终位于每张纸的同一列中。地址格式的示例包括

华盛顿特区15街15号1155号西北套房20005美国
4600 Emperor Blvd#200 Durham,NC 27703-8577 US
200 Stevens Drive费城,PA 19113美国
505 City Parkway西奥兰治,CA 92868美国
550 S Caldwell St,夏洛特,NC 28202-2633 US
1643 NW 136th Ave Ste H200 Sunrise,FL 33323-2857 US

我已经尝试过下面的代码,但是此时在代码“ sCity = Trim(Mid $(rCell.Value,Len(sAddress)+ 1,lStatePos-Len(sAddress)-1))”中出现错误

有人可以帮我弄清楚如何解决此问题吗?

Sub SplitAddresses()

    Dim vaStates As Variant
    Dim vaStreets As Variant
    Dim i As Long
    Dim rCell As Range
    Dim sAddress As String
    Dim sCity As String,sstate As String
    Dim sZip As String
    Dim lStreetPos As Long,lStatePos As Long

    vaStates = Array(“ AL “,“ AK “,“ AZ “,“ AR “,“ CA “,“ CO “,“ CT “,“ DE “,“ DC “,“ FL “,“ GA “,“ HI “,“ ID “,“ IL “,“ IN “,“ IA “,“ KS “,“ KY “,“ LA “,“ ME “,“ MD “,“ MA “,“ MI “,“ MN “,“ MS “,“ MO “,“ MT “,“ NE “,“ NV “,“ NH “,“ NJ “,“ NM “,“ NY “,“ NC “,“ ND “,“ OH “,“ OK “,“ OR “,“ PA “,“ RI “,“ SC “,“ SD “,“ TN “,“ TX “,“ UT “,“ VT “,“ VA “,“ WA “,“ WV “,“ WI “,“ WY “,“ GU “,“ PR “)
    vaStreets = Array(" CR "," BLVD "," RD "," ST "," AVE "," CT ")

    For Each rCell In Sheet1.Range("A1:A5").Cells
        sAddress = "": sCity = "": sZip = "": sstate = ""
        For i = LBound(vaStreets) To UBound(vaStreets)
            lStreetPos = InStr(1,rCell.Value,vaStreets(i))
            If lStreetPos > 0 Then
                sAddress = Trim(Left$(rCell.Value,lStreetPos + Len(vaStreets(i)) - 1))
                Exit For
            End If
        Next i

        For i = LBound(vaStates) To UBound(vaStates)
            lStatePos = InStr(1,vaStates(i))
            If lStatePos > 0 Then
                sCity = Trim(Mid$(rCell.Value,Len(sAddress) + 1,lStatePos - Len(sAddress) - 1))
                sstate = Trim(Mid$(rCell.Value,lStatePos + 1,Len(vaStates(i)) - 1))
                sZip = Trim(Mid$(rCell.Value,lStatePos + Len(vaStates(i)),Len(rCell.Value)))
                Exit For
            End If
        Next i

        rCell.Offset(0,1).Value = "'" & sAddress
        rCell.Offset(0,2).Value = "'" & sCity
        rCell.Offset(0,3).Value = "'" & sstate
        rCell.Offset(0,4).Value = "'" & sZip

    Next rCell

End Sub

这是我得到的错误error_image

解决方法

拆分逻辑中存在一些不一致之处,这还不包括您必须将大写街道数组也与Ucase()字符串值进行比较。

但是,好消息-由于您似乎采用了随后的地址逻辑,即将城市,州/省/自治区(+)括在最后冒号分隔符周围,因此可以尝试以下代码:

Option Explicit             ' declaration head of code module
Enum c                      ' define column constants
    [_Start] = 0
    add1
    City
    State
    Zip
End Enum

Sub SplitAddresses()
With Sheet1
    'define dataset
    Dim lastRow As Long: lastRow = .Range("A" & .Rows.Count).End(xlUp).Row
    Dim rng As Range: Set rng = .Range("A2:A" & lastRow)
    'assign to variant datafield array (provide for 4 columns: Add+City+State+ZIP)
    Dim v: v = rng.Resize(columnsize:=4).Value2
    'split data
    doSplit v
    'write split results to any target,e.g. B:B
    .Range("B2").Resize(UBound(v),4) = v
End With
End Sub

帮助过程doSplit

Sub doSplit(data)
Dim i As Long
For i = LBound(data) To UBound(data)
    Dim curAddress As String: curAddress = data(i,c.add1)
    
    Dim tokens,tmp
    tokens = Split(curAddress,",")
    
    '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    'a) analyze string part after last ","
    '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    tmp = Split(Trim(tokens(UBound(tokens))) & " "," ",2)
    'aa) add State + Zip (to columns 3..4)
    data(i,c.State) = tmp(0): data(i,c.Zip) = tmp(1)
    
    '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    'b) analyze first string part
    '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    tmp = Split(tokens(UBound(tokens) - 1)," ")
    'data(i,c.City) = tmp(UBound(tmp))   '<< only for 1-word city names
     data(i,c.City) = getCity(tmp)       '<< see edit below
    'bb) add City + Address
    data(i,c.add1) = Split(curAddress,data(i,c.City),2)(0)
    data(i,c.add1) = Replace(data(i,c.add1),"")
Next i
End Sub

帮助功能 //由于@RonRosenfeld的评论而进行编辑

由于会有由复合词组成的城市名称,因此必须将上面子区域中的城市字符串分配从data(r,c.City) = tmp(UBound(tmp))更改为

    data(r,c.City) = getCity(tmp)  ' << function call

功能getCity()

包括对common开头部分的检查,如“北部”,“西部”或“新”,以避免至少检查带有复合城市名称的详尽列表。必须在附加列表cities中定义所有其他需要的具有多个单词的城市名称:

Function getCity(tmp) As String
'Purp.: return valid city names of either one or two parts
'[1]Definitions
    'a) List common first parts of city names like "West" in "West Orange"
        Dim common$: common = "North,West,South,East,Grand,New"
    'b) List all other needed cities consisting of compound words
        Dim cities$: cities = "Sterling Heights,Ann Arbor"
'[2]Get potential city name
    'a) Define tmp indices of potential city tokens
        Dim first&: first = UBound(tmp) - 1
        Dim secnd&: secnd = UBound(tmp)
    'b) Build city name as compound string of tmp tokens
        Dim City As String
        City = Trim(IIf(first < 0,"",tmp(first) & " ") & tmp(secnd))
'[3]Check common first parts plus additional cities list
    'a) Check for common name parts like e.g. "West" in "West Orange"
        If InStr(common & ",tmp(first) & ",") Then getCity = City: Exit Function
    'b) Check rest in listed cities and return function result
        getCity = IIf(InStr(cities,City) > 0,City,tmp(secnd))
End Function

includes 2-word city names

,

在您的评论中,有一个返回字符来描述城市的街道地址,以及地址的常规格式:street|City,State Zip Country该算法变得更加简单,因为一系列Split函数可以分开地址部分。

我还使用了Type语句-不必要,但是使代码更清晰,IMO。 根据格式,某些Trim语句可能不是必需的,但它们不会受到损害。

请注意,您可以更改数据源和结果位置的范围/表格以适合您的特定要求。

编辑:我刚读完您的评论,即从街道地址返回市区之前,可能会有多个returns

.street的代码已相应更改

Option Explicit
Type Address
    street As String
    city As String
    state As String
    zip As String
    country As String
End Type
Sub splitAddresses()
    Dim wsSrc As Worksheet,wsRes As Worksheet,rRes As Range
    Dim vSrc As Variant,vRes As Variant
    Dim myAdr As Address
    Dim v,w,x,y
    Dim I As Long
    
Set wsSrc = Worksheets("sheet1")

'read into vba array for faster processing
With wsSrc
    vSrc = .Range(.Cells(2,1),.Cells(.Rows.Count,1).End(xlUp))
End With

Set wsRes = Worksheets("Sheet1")
    Set rRes = wsRes.Cells(1,3)


ReDim vRes(0 To UBound(vSrc),1 To 5)

'Headers
    vRes(0,1) = "Street"
    vRes(0,2) = "City"
    vRes(0,3) = "State"
    vRes(0,4) = "Zip"
    vRes(0,5) = "Country"
    
For I = 1 To UBound(vSrc)
    v = Split(vSrc(I,vbLf)
    With myAdr
        y = v
        ReDim Preserve y(UBound(y) - 1)
        .street = WorksheetFunction.Trim(Join(y," "))

    w = Split(Trim(v(UBound(v))),")
        .city = w(0)
    
    x = Split(Trim(w(1)))
        .state = Trim(x(0))
        .zip = Trim(x(1))
        .country = Trim(x(2))
    
    vRes(I,1) = .street
    vRes(I,2) = .city
    vRes(I,3) = .state
    vRes(I,4) = .zip
    vRes(I,5) = .country
End With

Set rRes = rRes.Resize(rowsize:=UBound(vRes,1) + 1,columnsize:=UBound(vRes,2))
With rRes
    .EntireColumn.Clear
    .Value = vRes
    .Rows(1).Font.Bold = True
    .Columns(4).NumberFormat = "@"
    .EntireColumn.AutoFit
End With
    
Next I

End Sub

enter image description here