htmlunit的工具类

2017-10-27  本文已影响0人  adminerator

package com.luyn.http.htmlunit;

import java.io.InputStream;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import com.gargoylesoftware.htmlunit.DefaultCredentialsProvider;
import com.gargoylesoftware.htmlunit.HttpMethod;
import com.gargoylesoftware.htmlunit.Page;
import com.gargoylesoftware.htmlunit.ProxyConfig;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebRequest;
import com.gargoylesoftware.htmlunit.WebResponse;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.util.Cookie;
import com.gargoylesoftware.htmlunit.util.NameValuePair;
import com.luyn.http.proxy.ProxyPojo;
import com.luyn.io.IoUtil;

/**

*/
public class HtmlunitUtils {

/** 
 * Get请求 
 * @param url 
 * @return 
 * @throws Exception 
 */  
public static byte[] sendGetRequest(String url) throws Exception{  
        WebClient webClient = new WebClient();  
    WebRequest webRequest = new WebRequest(new URL(url));  
    webRequest.setHttpMethod(HttpMethod.GET);  
    return sendRequest(webClient,webRequest);  
}  


  
/** 
 * Post 请求 
 *  
 * @param url 
 * @param params 
 * @return 
 * @throws Exception 
 */  
public static byte[] sendPostRequest(String url,Map<String,String> params) throws Exception{  
        WebClient webClient = new WebClient();  
    WebRequest webRequest = new WebRequest(new URL(url));  
    webRequest.setHttpMethod(HttpMethod.POST);  
    if (params != null && params.size() > 0) {  
        for (Entry<String, String> param : params.entrySet()) {  
            webRequest.getRequestParameters().add(new NameValuePair(param.getKey(), param.getValue()));  
        }  
    }  
    return sendRequest(webClient,webRequest);  
}  

//底层请求  
private static byte[] sendRequest(WebClient webClient,WebRequest webRequest) throws Exception{  
    byte[] responseContent = null;  
    Page page = webClient.getPage(webRequest);  
      
    WebResponse webResponse = page.getWebResponse();  
      
    int status = webResponse.getStatusCode();  
      
    System.out.println("Charset : " + webResponse.getContentCharset());  
  
    System.out.println("ContentType : " + webResponse.getContentType());  
  
    // 读取数据内容  
    if (status==200) {  
        if (page.isHtmlPage()) {  
            // 等待JS执行完成,包括远程JS文件请求,Dom处理  
             webClient.waitForBackgroundJavaScript(10000);
                // 使用JS还原网页  
             responseContent = ((HtmlPage) page).asXml().getBytes();
        } else {  
            InputStream bodyStream = webResponse.getContentAsStream();  
            
            responseContent = IoUtil.toByteFromInputStream(bodyStream);  
            bodyStream.close();  
        }  
    }  
    // 关闭响应流  
    webResponse.cleanUp();  
  
    return responseContent;  
}  


   
/**
 * 配置相关参数
 * @param webClient
 * @param timeout
 */
public  void configWebClient(WebClient webClient,int timeout) {  
    // 设置webClient的相关参数  
    // 1 启动JS  
    webClient.getOptions().setJavaScriptEnabled(true);  
    // 2 禁用Css,可避免自动二次请求CSS进行渲染  
    webClient.getOptions().setCssEnabled(false);  
    // 3 启动客户端重定向  
    webClient.getOptions().setRedirectEnabled(true);  
  
    // 4 js运行错误时,是否抛出异常  
    webClient.getOptions().setThrowExceptionOnScriptError(false);  
    // 5 设置超时  
    webClient.getOptions().setTimeout(timeout);  
}  


/**
 * 设置代理ip
 * @param webClient
 * @param proxy
 */
public  void setProxy(WebClient webClient,ProxyPojo proxy) {  
    ProxyConfig proxyConfig = webClient.getOptions().getProxyConfig();  
    proxyConfig.setProxyHost(proxy.getIp());  
    proxyConfig.setProxyPort(proxy.getPort());  
  
    DefaultCredentialsProvider credentialsProvider = (DefaultCredentialsProvider) webClient  
            .getCredentialsProvider();  
    credentialsProvider.addCredentials(proxy.getUsername(), proxy.getPassword());  
} 




/**
 * 设置cookie
 * @param webClient
 * @param domain
 * @param cookies
 */
public void setCookies(WebClient webClient,String domain, Map<String, String> cookies) {  
    if (cookies != null && cookies.size() > 0) {  
        webClient.getCookieManager().setCookiesEnabled(true);// enable  
                                                                // cookies  
        for (Entry<String, String> c : cookies.entrySet()) {  
            Cookie cookie = new Cookie(domain, c.getKey(), c.getValue());  
            webClient.getCookieManager().addCookie(cookie);  
        }  
    }  
} 


/**
 * 获取响应的cookie
 * @param webClient
 * @return
 */
public  Map<String, String> getResponseCookies(WebClient webClient) {  
    Set<Cookie> cookies = webClient.getCookieManager().getCookies();  
    Map<String, String> responseCookies =new  HashMap<>();
    for (Cookie c : cookies) {  
        responseCookies.put(c.getName(), c.getValue());  
    }  
    return responseCookies;  
}  


/** 
 * 清除所有cookie 
 */  
public void clearCookies(WebClient webClient) {  
    webClient.getCookieManager().clearCookies();  
} 

/**
 * 驱动js
 * @param page
 */

// public void doWeb(Page page) {
// if (page instanceof HtmlPage) {
// StringBuilder js = new StringBuilder();
// js.append("document.getElementsByName('username')[1].value='").append(WeiboAccount.USERNAME)
// .append("';");
// js.append("document.getElementsByName('password')[1].value='").append(WeiboAccount.PASSWORD)
// .append("';");
// js.append("document.getElementsByClassName('W_btn_g')[1].click();");
// HtmlPage htmlPage = (HtmlPage) page;
// htmlPage.executeJavaScript(js.toString());
// }
// }
}

上一篇下一篇

猜你喜欢

热点阅读