问题描述
问题:无法从 https://apps.irs.gov/app/fatcaFfiList/data/FFIListFull.csv 下载 csv,出现以下错误。
连接: https://apps.irs.gov/app/fatcaFfiList/data/FFIListFull.csv java.net.ProtocolException: 服务器在以下位置重定向了太多次 (20) sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1636) 在 sun.net.www.protocol.https.HttpsURLConnectionImpl.getInputStream(HttpsURLConnectionImpl.java:254) 在 FileDownload.downloadFile(FileDownload.java:86) 在 FileDownload.main(FileDownload.java:54)
目标:使用 Java 下载 csv
注意:在HttpURLConnection.setFollowRedirects(false);
前添加URLConnection conn = uURL.openConnection(proxy);
并不能解决问题。添加代码后文件变为0字节
重现问题:
java被BAT脚本调用
java -Xmx1024m -classpath "%LIB%\*" -Dbasepath=D:\ FileDownload https://apps.irs.gov/app/fatcaFfiList/data/FFIListFull.csv NoUser nopassword FFIListFull.csv webproxy 8080>> log.txt
FileDownload.class
用于文件下载
import java.io.*;
import java.net.*;
public class FileDownload {
public static void main(String[] args) {
boolean isError = false;
String iURL = "";
String iUSR = "";
String ipwD = "";
String IoUT = "";
String iPXY = "";
int iPPT = -1;
if (args.length == 6) {
iURL = args[0];
iUSR = args[1];
ipwD = args[2];
IoUT = args[3];
iPXY = args[4];
iPPT = strToInt(args[5]);
} else {
isError = true;
}
if (isError) {
System.err
.println("Usage: FileDownload <URL> <USERNAME> <PASSWORD> <OUTPUT FILE>\nFileDownload <URL> <USERNAME> <PASSWORD> <OUTPUT FILE> <PROXY HOST> <PROXY PORT>");
System.exit(1);
}
long lStart = System.currentTimeMillis();
try {
System.out.printf("Connecting : %s \n",new Object[] { iURL });
if (args.length == 6) {
downloadFile(iURL,iUSR,ipwD,IoUT,iPXY,iPPT);
}
} catch (IOException e) {
e.printstacktrace(System.out);
System.exit(1);
}
long lEnd = System.currentTimeMillis();
long lUsed = lEnd - lStart;
int iSec = (int) (lUsed / 1024L);
System.out.printf("Time Used: %d msec (i.e. %d min %d sec)\n",new Object[] { Long.valueOf(lUsed),Integer.valueOf(iSec / 60),Integer.valueOf(iSec % 60) });
}
public static void downloadFile(String iURL,String iUSR,String ipwD,String IoUTFile,String proxyHost,int proxyPort)
throws IOException {
final String USER = iUSR;
final String PASS = ipwD;
Authenticator.setDefault(new Authenticator() {
protected PasswordAuthentication getpasswordAuthentication() {
return new PasswordAuthentication(USER,PASS.tochararray());
}
});
URL uURL = new URL(iURL);
SocketAddress addr = new InetSocketAddress(proxyHost,proxyPort);
Proxy proxy = new Proxy(Proxy.Type.HTTP,addr);
URLConnection conn = uURL.openConnection(proxy);
InputStream in = conn.getInputStream();
int fileSize = getFileSize(uURL,proxy);
downloadFile(in,fileSize,IoUTFile);
}
private static void printDownloadStatus(int downloadedBytes,int totalBytes) {
if (totalBytes > 0) {
System.out.printf(
"%s out of %s ( %.1f%% ) downloaded\n",new Object[] {
formatBytes(downloadedBytes),formatBytes(totalBytes),Double.valueOf(downloadedBytes * 100.0D
/ totalBytes) });
} else {
System.out.printf("%s downloaded\n",new Object[] { formatBytes(downloadedBytes) });
}
}
private static String formatBytes(int iBytes) {
String res = "";
if (iBytes > 1073741824) {
res = String.format("%.1f GB",new Object[] { Double
.valueOf(iBytes / 1024.0D / 1024.0D / 1024.0D) });
} else if (iBytes > 1048576) {
res = String
.format("%.1f MB",new Object[] { Double
.valueOf(iBytes / 1024.0D / 1024.0D) });
} else if (iBytes > 1024) {
res = String.format("%.1f KB",new Object[] { Double.valueOf(iBytes / 1024.0D) });
} else {
res = String.format("%.1f Bytes",new Object[] { Double.valueOf(iBytes) });
}
return res;
}
private static int getFileSize(URL url,Proxy proxy) {
HttpURLConnection conn = null;
try {
conn = (HttpURLConnection) url.openConnection(proxy);
conn.setRequestMethod("HEAD");
conn.getInputStream();
return conn.getContentLength();
} catch (IOException e) {
e.printstacktrace(System.out);
return -1;
} finally {
conn.disconnect();
}
}
private static int strToInt(String str) {
int res = -1;
try {
res = Integer.parseInt(str);
} catch (Exception e) {
e.printstacktrace();
}
return res;
}
}
解决方法
使用嵌入在 bat 中的 JScript 发现了一个 alternative solution。但是,不是 Java 解决方案。