问题描述
这是此问题解决方案的后续:How to extract data from HTML page source of (a tab within) a webpage?
我正在尝试为 Finance.yahoo.com/quote/AAPL/cash-flow?p=AAPL 上的 Cash Flow Stmt 做同样的事情 - 使用
.getJSONObject("dispatcher")
.getJSONObject("stores")
.getJSONObject("QuoteSummaryStore")
.getJSONObject("cashflowStatementHistory")
.getJSONArray("cashflowStatements");
尝试提取键 trailingFreeCashFlow
的值 - 但是,它失败并显示错误“No value for trailingFreeCashFlow”。
public static Map<String,Map<String,String>> getCashFlowTableNames() {
final Map<String,String> cashFlow = new LinkedHashMap<String,String>() {
{
put("trailingFreeCashFlow","trailingFreeCashFlow");
}
};
Map<String,String>> allTableNames = new LinkedHashMap<String,String>>() {
{
put("cashFlow",cashFlow);
}
};
return allTableNames;
}
和
public static String getCashFlowYear(String requestURL) throws IOException {
String userAgent1 = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/69.0.3497.100 Safari/537.36 OPR/56.0.3051.43";
try {
Document doc1 = Jsoup.connect(requestURL).userAgent(userAgent1).get();
Elements scriptTags = doc1.getElementsByTag("script");
String re = "root\\.App\\.main\\s*\\=\\s*(.*?);\\s*\\}\\(this\\)\\)\\s*;";
for (Element script : scriptTags) {
Pattern pattern = Pattern.compile(re,Pattern.DOTALL);
Matcher matcher = pattern.matcher(script.html());
if (matcher.find()) {
String data = matcher.group(1);
//Log.e("CashFlowData",data);
JSONObject jo = new JSONObject(data);
JSONArray table = getCashFlowTable(jo);
JSONArray tableQ = getCashFlowTableQ(jo);
Map<String,String>> tableNames = getCashFlowTableNames();
String[] dates = getDates(table);
String[] datesQ = getDates(tableQ); //works
List<String> tableData = new ArrayList<>();
for (Map.Entry<String,String>> tableEntry : tableNames.entrySet()) {
tableData.add(tableEntry.getKey());
tableData.addAll(Arrays.asList(dates));
for (Map.Entry<String,String> row1 : tableEntry.getValue().entrySet()) {
String[] tableRow1 = getRow(table,row1.getValue());
tableData.add(row1.getKey());
for (String column : tableRow1) {
tableData.add(column);
}
}
}
cashFlowData = TextUtils.join(" ",tableData);
cashFlowData = cashFlowData.replaceAll("[^a-zA-Z0-9 /-]","");
cashFlowData = cashFlowData.trim().replaceAll("(?<=[A-Za-z])\\s+(?=[A-Za-z])","");
Log.e("cashFlowData",cashFlowData);
List<String> tableDataQ = new ArrayList<>();
for (Map.Entry<String,String>> tableEntry : tableNames.entrySet()) {
tableDataQ.add(tableEntry.getKey());
tableDataQ.addAll(Arrays.asList(datesQ));
for (Map.Entry<String,String> row1 : tableEntry.getValue().entrySet()) {
String[] tableRow1 = getRow(tableQ,row1.getValue());
tableDataQ.add(row1.getKey());
for (String column : tableRow1) {
tableDataQ.add(column);
}
}
}
cashFlowDataQ = TextUtils.join(" ",tableDataQ);
cashFlowDataQ = cashFlowDataQ.replaceAll("[^a-zA-Z0-9 /-]","");
cashFlowDataQ = cashFlowDataQ.trim().replaceAll("(?<=[A-Za-z])\\s+(?=[A-Za-z])","");
//Log.e("balanceDATAQ",balanceDataQ);
}
}
} catch (Exception e) {
Log.e("err","err",e);
}
return cashFlowData;
}
有什么建议吗?
谢谢!
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)