问题描述
我正在尝试从政府网站中提取一个数字,我已经做了很多谷歌搜索,但我有点迷失了想法,我下面的代码返回了一个数字,但这不是我想要的数字,我也不完全是知道为什么。
我想从“按地区划分的病例(整个大流行病)”表“洛杉矶上层”部分和“海上绍森德”病例编号中减去该数字。
https://coronavirus.data.gov.uk/details/cases
我从网上某处窃取了此代码,并试图用我在网站 F12 部分找到的班级编号进行复制。
Sub ExtractLastValue()
Set objIE = CreateObject("InternetExplorer.Application")
objIE.Top = 0
objIE.Left = 0
objIE.Width = 800
objIE.Height = 600
objIE.Visible = True
objIE.Navigate ("https://coronavirus.data.gov.uk/details/cases")
Do
DoEvents
Loop Until objIE.readystate = 4
MsgBox objIE.document.getElementsByClassName("sc-bYEvPH khGBIg govuk-table__cell govuk-table__cell--numeric ")(0).innerText
Set objIE = nothing
End Sub
解决方法
数据来自官方 API,当您点击 Upper Tier 面板时,会在该页面上动态返回 json 响应。
查看并使用 API 指南 这里:
https://coronavirus.data.gov.uk/details/developers-guide
您可以按照 API 文档中的指导直接发出 xhr 请求,然后使用 json 解析器来处理响应。对于您的请求,它将类似于以下内容:
https://coronavirus.data.gov.uk/api/v1/data?filters=areaName=Southend-on-Sea&areaType=utla&latestBy=cumCasesByPublishDate&structure=
{"date":"date","areaName":"areaName","cumCasesByPublishDate":"cumCasesByPublishDate","cumCasesByPublishDateRate":"cumCasesByPublishDateRate"}
XHR:
一个使用 jsonconverter.bas 作为 json 解析器的工作示例
Option Explicit
Public Sub GetCovidNumbers()
Dim http As Object,json As Object
Set http = CreateObject("MSXML2.XMLHTTP")
With http
.Open "GET","https://coronavirus.data.gov.uk/api/v1/data?filters=areaName=Southend-on-Sea&areaType=utla&latestBy=cumCasesByPublishDate&structure={""date"":""date"",""areaName"":""areaName"",""cumCasesByPublishDate"":""cumCasesByPublishDate"",""cumCasesByPublishDateRate"":""cumCasesByPublishDateRate""}",False
.setRequestHeader "User-Agent","Mozilla/5.0"
.send
Set json = JsonConverter.ParseJson(.responseText)("data")(1)
End With
With ActiveSheet
Dim arr()
arr = json.Keys
.Cells(1,1).Resize(1,UBound(arr) + 1) = arr
arr = json.Items
.Cells(2,UBound(arr) + 1) = arr
End With
End Sub
Json 库(在上述解决方案中使用):
我使用 jsonconverter.bas。从 here 下载原始代码并添加到名为 JsonConverter 的标准模块。然后,您需要转到 VBE > 工具 > 引用 > 添加对 Microsoft 脚本运行时的引用。从复制的代码中删除顶部的 Attribute 行。
Internet Explorer:
您可以执行一个更慢、更复杂的 Internet Explorer 解决方案,您需要在出现 utla 选项时选择该选项,然后从表中选择所需的值:
Option Explicit
Public Sub GetCovidNumbers()
'Tools references Microsoft Internet Controls and Microsoft HTML Object Library
Dim ie As SHDocVw.InternetExplorer,t As Date,ele As Object
Const MAX_WAIT_SEC As Long = 10
Set ie = New SHDocVw.InternetExplorer
With ie
.Visible = True
.Navigate2 "https://coronavirus.data.gov.uk/details/cases"
While .Busy Or .ReadyState <> READYSTATE_COMPLETE: DoEvents: Wend
t = Timer 'timed loop for element to be present to click on (to get utla)
Do
On Error Resume Next
Set ele = .Document.querySelector("#card-cases_by_area_whole_pandemic [aria-label='Upper tier LA']")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While ele Is Nothing
If ele Is Nothing Then Exit Sub
ele.Click
While .Busy Or .ReadyState <> READYSTATE_COMPLETE: DoEvents: Wend
Dim table As MSHTML.HTMLTable,datetime As String,result()
Set table = .Document.querySelector("table[download='cumCasesByPublishDate,cumCasesByPublishDateRate']")
datetime = .Document.querySelector("time").getAttribute("datetime")
result = GetDataForUtla("Southend-on-Sea",datetime,table)
With ActiveSheet
.Cells(1,4) = Array("Datetime","Area","Cases","Rate per 100,000 population")
.Cells(2,UBound(result) + 1) = result
End With
.Quit
End With
End Sub
Public Function GetDataForUtla(ByVal utla As String,ByVal datetime As String,ByVal table As MSHTML.HTMLTable) As Variant
Dim row As MSHTML.HTMLTableRow,i As Long
For Each row In table.Rows
If InStr(row.outerHTML,utla) > 0 Then
Dim arr(4)
arr(0) = datetime
For i = 0 To 2
arr(i + 1) = row.Children(i).innerText
Next
GetDataForUtla = arr
Exit Function
End If
Next
GetDataForUtla = Array("Not found")
End Function
参考文献: