在 VBA 中优化 HTML 解析

问题描述

我有一些 VBA 代码可以向 https://nt3-s.zacks.com/fundamreports/Default.aspx 发出请求并将所有数据提取到工作表中。我的问题是我的解析技术需要很长时间。有没有人对我如何改进下面的代码有什么建议?也许我应该使用 QuerySelectorAll() 或其他一些方法...

Sub Financials(ticker,SheetName As String)
Dim XMLPage As New MSXML2.XMLHTTP60
Dim RP1,RP2,QP1,QP2,QP3,QP4,QP5,QP6 As String
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim HTMLRow,HTMLCell As MSHTML.IHTMLElement
Dim i As Integer
Dim r,c,offset,shift As Integer

'Application.ScreenUpdating = False

shift = 50

' STANDARDIZED FINANCIAL STATEMENTS

' Generate income statements
'============================

XMLPage.Open "Post","https://nt3-s.zacks.com/fundamreports/Default.aspx",False

' Header
XMLPage.setRequestHeader "Content-Type","application/x-www-form-urlencoded"

' Body Parameters
' required parameters so every table value can be editted
RP1 = "__VIEWSTATE=%2FwEPDwUJNDg5NjgzODkyD2QWAgIDD2QWAgIBD2QWBAIPDxAPFgIeB0VuYWJsZWRnZGRkZAITDxAPFgIfAGdkZGRkGAEFHl9fQ29udHJvbHNSZXF1aXJlUG9zdEJhY2tLZXlfXxYBBQhidG5FeGNlbHC%2FJVg5ST2AoHtph1XAsql0bObX"

' String is too long to be typed in one line...
a1 = "%2FwEWdALpx6COCwKm07CqBwKSh%2B%2FQCgL96MW%2BBgLN6MW%2BBgLav7PDDQLCv%2FPADQLNv%2FPADQLcv%2FPADQLfv%2FPADQLev%2FPADQLZv%2FPADQLYv%2FPADQLbv%2FPADQLav%2FPADQLFv%2FPADQLduMOkAwLq0v%2FjCQLq0uO8AQLq0teZCgLq0rvyAwLq0q%2FPDALq0pOoBALq0oeFDQLq0uvhBgLq0p%2BJAwLq0oPiDALX%2B53NDwLX%2B4GmBwLX%2B%2FUCAtf72d8JAtf7zbgBAtf7sZUKAtf7pe4DAtf7icsMAtf7vfIJAtf7oc8CApOYyLkHApOYvBICk5ig7wkCk5iUyAICk5j4pAoCk5jsgQMCk5jQ2gwCk5jEtwQCk5jo3gICk5jcuwoC%2BKHuog0C%2BKHS%2FwYC%2BKHG2A8C%2BKGqtQcC%2BKGeDgL4oYLrCQL4ofbHAgL4odqgCgL4oY7ICAL4ofIkAuW2jIwDAuW28OgMAuW25MUFAsTR38AHAvO744cNAvO7%2F9gFAvO7y%2F0OAvO7p5YHAvO7s6sIAvO7j0wC87ub4QkC87v3hQIC87uD7QcC87ufhggCzpKBqQsCzpKdwgMCzpLp5gQCzpLFuw0CzpLR3AUCzpKt8Q4CzpK5igcCzpKVrwgCzpKhlg0CzpK9qwYCivHU3Q"
a2 = "MCivGg9gQCivG8iw0CivGIrAYCivHkwA4CivHw5QcCivHMvggCivHYUwKK8fS6BgKK8cDfDgLhyPLGCQLhyM6bAgLhyNq8CwLhyLbRAwLhyILqBALhyJ6PDQLhyOqjBgLhyMbedgLhyJKsDALhyO7ABAL835dobwL83%2ByMCAL83%2FihAQLnzLDSBAL4zLDSBAL5zLDSBAL6zLDSBAL8zLDSBAL9zLDSBAK3wpPeDgKFwpPeDgKuh8zbAwKhh8zbAwLR2eHwDAKFt7SHCcQGh8eHqGGqe%2F6U9Wz%2FpSk9FF8d"

RP2 = "&__EVENTVALIDATION=" & a1 & a2

' Query Parameters
QP1 = "&tbTicker=" & ticker
QP2 = "&ddBasis=Q"
QP3 = "&ddPeriod=0"
QP4 = "&ddFrom=" & CStr(Year(DateAdd("yyyy",-10,Date)))
QP5 = "&ddTo=" & CStr(Year(Date))
QP6 = "&ddFormType=0"

XMLPage.send RP1 & RP2 & QP1 & QP2 & QP3 & QP4 & QP5 & QP6

HTMLDoc.body.innerHTML = XMLPage.responseText

r = 1
c = 1
offset = 0 * shift

Sheets(SheetName).Select

Cells.Select
Selection.ClearContents

'Extract data from table
For Each HTMLRow In HTMLDoc.getElementsByTagName("tr")
    For Each HTMLCell In HTMLRow.getElementsByTagName("th")
        Cells(r,c + offset).value = HTMLCell.innerText
        c = c + 1
    Next HTMLCell
    For Each HTMLCell In HTMLRow.getElementsByTagName("td")
        Cells(r,c + offset).value = HTMLCell.innerText
        c = c + 1
    Next HTMLCell
c = 1
r = r + 1
Next HTMLRow

' Generate balance statements
'======================================
'Request
XMLPage.Open "Post","application/x-www-form-urlencoded"

QP6 = "&ddFormType=1"

XMLPage.send RP1 & RP2 & QP1 & QP2 & QP3 & QP4 & QP5 & QP6

HTMLDoc.body.innerHTML = XMLPage.responseText

r = 1
c = 1
offset = 1 * shift

'Extract data from table
For Each HTMLRow In HTMLDoc.getElementsByTagName("tr")
    For Each HTMLCell In HTMLRow.getElementsByTagName("th")
        Cells(r,c + offset).value = HTMLCell.innerText
        c = c + 1
    Next HTMLCell
c = 1
r = r + 1
Next HTMLRow

编辑:我更改了代码以包含有关我的程序的更多详细信息。该子将一次收集多个公司的损益表、资产负债表、现金流量和其他指标。我包括了几个我如何使用 POST 方法解析数据的实例。

解决方法

通过每次迭代计算,将 Application.Calculation = xlManual 添加到宏的开头,将 Application.Calculation = xlAutomatic 添加到末尾,这将防止在每次循环后更新计算。