中级04 - Java Web
2019-08-26 本文已影响0人
晓风残月1994
一、Java Web初体验
1. 使用Java发送HTTP请求
可以借助第三方库,比如 httpclient,接着使用 IOUtils 操作 IO 流将消息实体转化为 HTML 字符串:
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import java.io.IOException;
import java.io.InputStream;
public class HTTPRequest {
public static void main(String[] args) throws IOException {
CloseableHttpClient httpclient = HttpClients.createDefault();
HttpGet httpGet = new HttpGet("http://localhost:8080/auth");
// 还把自己伪装成一个浏览器
httpGet.addHeader("user-agent", "ozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36");
CloseableHttpResponse response = httpclient.execute(httpGet);
try {
System.out.println(response.getStatusLine());
HttpEntity entity1 = response.getEntity();
InputStream is = entity1.getContent();
String html = IOUtils.toString(is, "UTF-8");
System.out.println(html);
// do something useful with the response body
// and ensure it is fully consumed
EntityUtils.consume(entity1);
} finally {
response.close();
}
}
}
2. 使用爬虫抓取指定仓库中的Pull request信息
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class Crawler {
static class GitHubPullRequest {
// Pull request的编号
int number;
// Pull request的标题
String title;
// Pull request的作者的GitHub id
String author;
GitHubPullRequest(int number, String title, String author) {
this.number = number;
this.title = title;
this.author = author;
}
@Override
public String toString() {
return number + "\n" + title + "\n" + author + "\n";
}
}
// 给定一个仓库名,例如"golang/go",或者"gradle/gradle",返回第一页的Pull request信息
public static List<GitHubPullRequest> getFirstPageOfPullRequests(String repo) throws IOException {
Document doc = Jsoup.connect("https://github.com/" + repo + "/pulls").get();
ArrayList<Element> issues = doc.select(".js-issue-row");
ArrayList<GitHubPullRequest> results = new ArrayList<>();
for (Element element : issues) {
GitHubPullRequest pr = new GitHubPullRequest(
Integer.parseInt(element.attr("id").substring(6)),
element.select(".js-navigation-open").get(0).text(),
element.select(".muted-link").get(0).text()
);
System.out.println(pr);
results.add(pr);
}
return results;
}
public static void main(String[] args) throws IOException {
getFirstPageOfPullRequests("golang/go");
}
}
二、关于HTTP
HTTP是互联网世界的基石。
2. HTTP常用方法
- GET
- POST
- PUT
- DELETE
- ...
3. HTTP Status
- 1xx 告知请求的处理进度和情况
- 2xx 成功
- 3xx 服务端需要进一步操作
- 4xx 客户端错误
- 5xx 服务器错误
彩蛋1:
https://http.cat/
彩蛋2:
HTTP Status 418
彩蛋3:
HTTP Status 6xx
4. HTTP重要的请求header
- Accept*
- Cookie
- User-Agent
- Referer
5. HTTP重要的响应header
- Content-Type
- Set-Cookie
6. HTTP body
- HTTP request body
- 表单
- key-value 对
- HTTP response body
- JSON
- HTML/XML
- 二进制(图片/下载文件)
7. 其它
HTTP的消息头是不区分大小写的
HTTP 是无状态的,每个请求之间都是独立的,最常用的就是使用 cookie 这一客户端存储技术来持久化客户端状态(HTTP cookies - MDN)
1. 模拟登录后携带cookie获取用户信息
import com.alibaba.fastjson.JSON;
import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
public class Crawler {
public static String loginAndGetResponse(String username, String password) throws IOException {
// 通过 builder 模式创建一个具有 cookie 存储功能的客户端实例
CloseableHttpClient httpclient = HttpClients.custom().setDefaultCookieStore(new BasicCookieStore()).build();
// 尝试登录并被 set cookie
HttpPost httpPost = new HttpPost("http://example.com/auth/login");
Map<String, String> map = new HashMap<>();
map.put("username", username);
map.put("password", password);
httpPost.setEntity(new StringEntity(JSON.toJSONString(map)));
httpPost.setHeader("Content-Type", "application/json");
httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36");
httpclient.execute(httpPost).close();
// 携带 cookie 进一步获取 auth 信息
HttpGet httpGet = new HttpGet("http://example.com/auth");
CloseableHttpResponse authResponse = httpclient.execute(httpGet);
try {
InputStream is = authResponse.getEntity().getContent();
String authResponseBody = IOUtils.toString(is, "UTF-8");
System.out.println(authResponseBody);
return authResponseBody;
} finally {
authResponse.close();
}
}
public static void main(String[] args) throws IOException {
loginAndGetResponse("admin", "admin");
}
}