jsoup登录并解析html

问题描述

问题是:当我登录网站时,它每次都指向主页。但我想连接到其他页面以解析其html并从中下载,因此最后一个连接中的文档将指示再次登录

try {
            
            Connection.Response response = Jsoup.connect("https://10.232.1.220/123Mobile/Portal/")
                    .userAgent(USER_AGENT)
                    .sslSocketFactory(utilService.socketFactory())
                    .method(Connection.Method.GET)
                    .execute();

            //Login 
            
            FormElement loginForm = (FormElement)response.parse().select("form[class=login-form]").first();
            checkElement("Login Form",loginForm);
            Element loginField = loginForm.select("input[name=UserName]").first();
            checkElement("Login Field",loginField);
            loginField.val(internalConstant.getEbcUsename());
            Element passwordField = loginForm.select("input[name=Password]").first();
            checkElement("Password Field",passwordField);
            passwordField.val(internalConstant.getEbcPassword());
            response = loginForm.submit()
                     .cookies(response.cookies())
                     .userAgent(USER_AGENT)  
                     .method(Method.POST)
                     .sslSocketFactory(utilService.socketFactory())
                     .followRedirects(false)
                     .execute();
            
            logger.info("home html: " + response.parse());
            //target document
            
            Document targetPage = Jsoup.connect("https://10.232.1.220/123Mobile/Portal/Reports/TransactionReport")
                    .userAgent(USER_AGENT)
                    .sslSocketFactory(utilService.socketFactory())
                    .cookies(response.cookies())
                    .get();
            
            logger.info("target document html: " + targetPage.html());

            if (ebcFile.isEmpty()) {
                logger.error("file not found");

                throw new ResponseStatusException(HttpStatus.NOT_FOUND,"Ebc File not found in website");
            }

            // this.FileDownloaderService.downloadExcelFile(ebcFile);
        } catch (IOException e) {
            e.printstacktrace();
            logger.error("download error " + e.getMessage());
            throw new ResponseStatusException(HttpStatus.NOT_FOUND,"Failed to download file");
        }

解决方法

问题在于,提交cookie的响应中没有sessionid和请求令牌cookie,因此我从登录响应中添加了它们。

Connection.Response response = Jsoup.connect(internalConstant.getLoginURL())
                    .userAgent(USER_AGENT)
                    .sslSocketFactory(utilService.socketFactory())
                    .method(Connection.Method.GET)
                    .execute();
            
            logger.info("response cookies after parse login page: " + response.cookies());
            String sesionID = response.cookie(your cookie);
            String requestCookie = response.cookie(your cookie);

            //Login 
            
            FormElement loginForm = (FormElement)response.parse().select("form[class=login-form]").first();
            checkElement("Login Form",loginForm);
            Element loginField = loginForm.select("input[name=UserName]").first();
            checkElement("Login Field",loginField);
            loginField.val(internalConstant.getEbcUsename());
            
            Element passwordField = loginForm.select("input[name=Password]").first();
            checkElement("Password Field",passwordField);
            passwordField.val(internalConstant.getEbcPassword());
            response = loginForm.submit()
                     .cookies(response.cookies())
                     .userAgent(USER_AGENT)  
                     .method(Method.POST)
                     .sslSocketFactory(utilService.socketFactory())
                     .execute();
            Map<String,String> coky = response.cookies();
            coky.put("your cookie",requestCookie);
            coky.put("your cookie",sesionID);
            
            logger.info("response cookies after submit: " + coky);
            
            
            //target document
            
             response = Jsoup.connect(internalConstant.getURL())
                    .userAgent(USER_AGENT)
                    .sslSocketFactory(utilService.socketFactory())
                    .cookies(coky)
                    .method(Method.GET)
                    .execute();