问题描述
Sub WebScraping()
Dim XMLPage As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim Records As MSHTML.IHTMLElementCollection
Dim Record As MSHTML.IHTMLElement
Dim HTMLims As MSHTML.IHTMLElementCollection
Dim HTMLIm As MSHTML.IHTMLElement
Dim URL As String
Dim RowNum As Integer: RowNum = 1
Dim NumPage As Integer
Sheets("Sheet1").Range("a1:z10000").ClearContents
For NumPage = 1 To 4
URL = "https://www.worldathletics.org/records/toplists/sprints/100-metres/outdoor/men/senior/2020?page=" & NumPage & ""
XMLPage.Open "Get",URL,False
XMLPage.setRequestHeader "Content-Type","text/xml"
XMLPage.send
HTMLDoc.body.innerHTML = XMLPage.responseText
Set Records = HTMLDoc.getElementById("toplists").getElementsByTagName("table")(0).getElementsByTagName("tbody")(0).getElementsByTagName("tr")
For Each Record In Records
Set HTMLims = Record.getElementsByTagName("td")
For Each HTMLIm In HTMLims
Sheets("Sheet1").Cells(RowNum,1).Value = HTMLims.Item(0).innerText
Sheets("Sheet1").Cells(RowNum,2).Value = HTMLims.Item(1).innerText
Sheets("Sheet1").Cells(RowNum,3).Value = HTMLims.Item(2).innerText
Sheets("Sheet1").Cells(RowNum,4).Value = HTMLims.Item(3).innerText
Sheets("Sheet1").Cells(RowNum,5).Value = HTMLims.Item(4).innerText
Sheets("Sheet1").Cells(RowNum,6).Value = HTMLims.Item(5).innerText
Sheets("Sheet1").Cells(RowNum,7).Value = HTMLims.Item(6).innerText
Sheets("Sheet1").Cells(RowNum,9).Value = HTMLims.Item(8).innerText
Sheets("Sheet1").Cells(RowNum,10).Value = HTMLims.Item(9).innerText
Sheets("Sheet1").Cells(RowNum,11).Value = HTMLims.Item(10).innerText
Next HTMLIm
RowNum = RowNum + 1
Next Record
Next NumPage
End Sub
但是当我想插入代码以获取运动员的身份证时,我遇到了麻烦(也可以单独使用):
'Athletes' codes
RowNum = 1
Set HTMLims = HTMLDoc.getElementsByTagName("a")
For Each HTMLIm In HTMLims
If Left(HTMLIm.getAttribute("href"),24) = "about:/athletes/athlete=" Then
Sheets("Sheet1").Cells(RowNum,12).Value = Right(HTMLIm.getAttribute("href"),Len(HTMLIm.getAttribute("href")) - _
(InStr(HTMLIm.getAttribute("href"),"=")))
RowNum = RowNum + 1
End If
Next HTMLIm
请问有人可以帮助我如何将第二个代码插入其中吗?
谢谢。
解决方法
尝试一次将ID号包含在内。我使用了一个小技巧,以便让您在第3列中的不同数字旁边获得+
或-
符号。您可以合并其余部分,因为我在此处粘贴了相关部分。>
部分合并的代码:
Sub WebScraping()
Const Url As String = "https://www.worldathletics.org/records/toplists/sprints/100-metres/outdoor/men/senior/2020?page="
Dim XMLPage As New XMLHTTP60,HTMLDoc As New HTMLDocument
Dim Record As Object,NumPage As Integer
Dim RowNum As Integer,Ws As Worksheet,I&
Set Ws = ThisWorkbook.Worksheets("Sheet1")
For NumPage = 1 To 2
XMLPage.Open "Get",Url & NumPage,False
XMLPage.setRequestHeader "Content-Type","text/xml"
XMLPage.send
HTMLDoc.body.innerHTML = XMLPage.responseText
For Each Record In HTMLDoc.getElementsByTagName("table")(0).getElementsByTagName("tbody")(0).getElementsByTagName("tr")
RowNum = RowNum + 1: Ws.Cells(RowNum,1).Value = Record.getElementsByTagName("td").Item(0).innerText
Ws.Cells(RowNum,2).Value = Record.getElementsByTagName("td").Item(1).innerText
Ws.Cells(RowNum,3).Value = "'" & Record.getElementsByTagName("td").Item(2).innerText
Ws.Cells(RowNum,4).Value = Record.getElementsByTagName("td").Item(3).innerText
On Error Resume Next
Ws.Cells(RowNum,5).Value = Split(Record.getElementsByTagName("td").Item(3).getElementsByTagName("a")(0).getAttribute("href"),"=")(1)
On Error GoTo 0
Next Record
Next NumPage
End Sub