Java操作Solr —— SolrJ
2020-07-08 本文已影响0人
xiaogp
摘要:Solr,Java,分页,游标
引入依赖
引入依赖
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-solrj</artifactId>
<version>4.10.0</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.1.1</version>
</dependency>
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import java.io.IOException;
public class solrj_test {
private static final String SOLR_BASE_URL = "http://localhost:8080/solr/collection3";
static HttpSolrServer solrServer = new HttpSolrServer(SOLR_BASE_URL);
public static void main(String[] args) throws IOException, SolrServerException {
// insertTest(solrServer);
// deleteTest(solrServer);
// queryTest(solrServer);
// sorttest(solrServer);
// insertManyTest(solrServer);
// pageTest(solrServer);
// filterTest(solrServer);
// countTest(solrServer);
query2Test(solrServer);
}
/**
* 插入单条数据
*
* @param solrServer
* @throws IOException
* @throws SolrServerException
*/
public static void insertTest(HttpSolrServer solrServer) throws IOException, SolrServerException {
//创建文档对象
SolrInputDocument document = new SolrInputDocument();
document.addField("id", "test");
document.addField("ent", "测试solrj");
//向solr服务器写入文档
solrServer.add(document);
solrServer.commit();
}
/**
* 插入多条数据
*
* @param solrServer
* @throws IOException
* @throws SolrServerException
*/
public static void insertManyTest(HttpSolrServer solrServer) throws IOException, SolrServerException {
String[][] data = {{"test2", "b"}, {"test3", "d"}};
for (String[] a : data) {
SolrInputDocument document = new SolrInputDocument();
document.addField("id", a[0]);
document.addField("ent", a[1]);
solrServer.add(document);
}
solrServer.commit();
}
/**
* 删除单条数据
*
* @param solrServer
* @throws IOException
* @throws SolrServerException
*/
public static void deleteTest(HttpSolrServer solrServer) throws IOException, SolrServerException {
solrServer.deleteByQuery("id:test");
// 提交
solrServer.commit();
}
/**
* 检索数据
*
* @param solrServer
* @throws SolrServerException
*/
public static void queryTest(HttpSolrServer solrServer) throws SolrServerException {
SolrQuery params = new SolrQuery("*:*");
//执行查询,获取响应数据
QueryResponse response = solrServer.query(params);
//获取数据结果集
SolrDocumentList list = response.getResults();
System.out.println("一共获取了" + list.size() + "条结果:");
for (SolrDocument solrDocument : list) {
System.out.println("id: " + solrDocument.getFieldValue("id"));
System.out.println("ent:" + solrDocument.getFieldValue("ent"));
}
}
/**
* 排序
*
* @param solrServer
* @throws IOException
* @throws SolrServerException
*/
public static void sorttest(HttpSolrServer solrServer) throws SolrServerException {
SolrQuery params = new SolrQuery("*:*");
params.setSort("id", SolrQuery.ORDER.desc);
QueryResponse response = solrServer.query(params);
SolrDocumentList list = response.getResults();
System.out.println("一共获取了" + list.size() + "条结果:");
for (SolrDocument solrDocument : list) {
System.out.println("id: " + solrDocument.getFieldValue("id"));
System.out.println("ent:" + solrDocument.getFieldValue("ent"));
}
}
public static void pageTest(HttpSolrServer solrServer) throws SolrServerException {
SolrQuery params = new SolrQuery("*:*");
params.setStart(10);
params.setRows(5);
QueryResponse response = solrServer.query(params);
SolrDocumentList list = response.getResults();
System.out.println("一共获取了" + list.size() + "条结果:");
for (SolrDocument solrDocument : list) {
System.out.println("id: " + solrDocument.getFieldValue("id"));
System.out.println("ent: " + solrDocument.getFieldValue("ent"));
}
}
/**
* 测试过滤条件
* @param solrServer
* @throws SolrServerException
*/
public static void filterTest(HttpSolrServer solrServer) throws SolrServerException {
SolrQuery params = new SolrQuery("*:*");
params.setFilterQueries("id:[2 TO *]");
QueryResponse response = solrServer.query(params);
SolrDocumentList list = response.getResults();
for (SolrDocument solrDocument : list) {
System.out.println("id: " + solrDocument.getFieldValue("id"));
System.out.println("ent: " + solrDocument.getFieldValue("ent"));
}
}
/**
* 测试获取总条数
* @param solrServer
* @throws SolrServerException
*/
public static void countTest(HttpSolrServer solrServer) throws SolrServerException {
SolrQuery params = new SolrQuery("*:*");
QueryResponse response = solrServer.query(params);
Long foundNum = response.getResults().getNumFound();
System.out.println(foundNum);
}
/**
* 测试精确查询和模糊查询
* @param solrServer
* @throws SolrServerException
*/
public static void query2Test(HttpSolrServer solrServer) throws SolrServerException {
System.out.println("模糊查询");
SolrQuery params = new SolrQuery("ent:通辽城投");
QueryResponse response = solrServer.query(params);
SolrDocumentList list = response.getResults();
for (SolrDocument solrDocument : list) {
System.out.println("id: " + solrDocument.getFieldValue("id"));
System.out.println("ent: " + solrDocument.getFieldValue("ent"));
}
System.out.println("精确查询");
SolrQuery params2 = new SolrQuery("ent:\"通辽城投\"");
QueryResponse response2 = solrServer.query(params2);
SolrDocumentList list2 = response2.getResults();
for (SolrDocument solrDocument : list2) {
System.out.println("id: " + solrDocument.getFieldValue("id"));
System.out.println("ent: " + solrDocument.getFieldValue("ent"));
}
}
}
批量查询数据
使用翻页
ublic static SolrDocumentList getBatchData(String dataString) throws SolrServerException {
SolrDocumentList solrDocuments = new SolrDocumentList();
long total = 0;
SolrQuery params = new SolrQuery();
params.set("q", "*:*");
params.set("fq", String.format("inputtime: %s*", dataString));
params.set("fl", "ID,inputtime,nerent,title,pubdate,TEXT");
QueryResponse res = SolrUtils.getInstance().solrServer.query(params);
if (!res.getResults().isEmpty()) {
total = res.getResults().getNumFound();
}
if (total == 0) {
LOGGER.info(String.format("当前日期没有需要处理的增量数据!日期:%s", dataString));
// System.exit(0);
} else {
LOGGER.info(String.format("当前日期数据量:%d", total));
}
long batch = total / 1000 + 1;
// 翻页查询所有数据
for (int i = 0; i < batch; i++) {
params.set("start", i * 1000);
params.set("rows", 1000);
QueryResponse batchRes = SolrUtils.getInstance().solrServer.query(params);
if (!batchRes.getResults().isEmpty()) {
solrDocuments.addAll(batchRes.getResults());
LOGGER.info(String.format("日期 %s %d / %d", dataString, solrDocuments.size(), total));
}
}
return solrDocuments;
}
使用游标
分页读取的方式,在大数据量的情况下,在solr里面表现并不是特别好,因为它随时可能会发生OOM的异常,在solr里面
通过rows和start参数,非常方便分页读取,但是如果你的start=1000000 rows=10,那么solr里面会将前面100万元数据的索引信息读取在内存里面,这样以来,非常耗内存。
游标是无状态的,不会维护索引数据在内存里面,仅仅记录最后一个doc的计算值类似md5,然后每一次读取,都会如此记录最后一个值的mark,下一次通过这个mark便能快速的定位到第二页上,如此往复,便能完成整个数据的读取。而且耗费内存非常少。
游标的缺点是游标一旦读取了,就不能再返回上一次的位置了,游标需要根据主键升序或者降序,并且不能重复。
public static SolrDocumentList getBatchData2(String dataString) throws SolrServerException {
SolrDocumentList solrDocuments = new SolrDocumentList();
long total = 0;
SolrQuery params = new SolrQuery();
params.set("q", "*:*");
params.set("fq", String.format("inputtime: %s*", dataString));
params.set("fl", "ID,inputtime,nerent,title,pubdate,TEXT");
QueryResponse res = SolrUtils.getInstance().solrServer.query(params);
if (!res.getResults().isEmpty()) {
total = res.getResults().getNumFound();
}
if (total == 0) {
LOGGER.info(String.format("当前日期没有需要处理的增量数据!日期:%s", dataString));
// System.exit(0);
} else {
LOGGER.info(String.format("当前日期数据量:%d", total));
}
params.setRows(1000);
params.setSort("ID", SolrQuery.ORDER.asc);
String cursorMark = CursorMarkParams.CURSOR_MARK_START;
boolean hasMore = true;
while (hasMore) {
params.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
QueryResponse batchRes = SolrUtils.getInstance().solrServer.query(params);
String nextCursorMark = batchRes.getNextCursorMark();
if (!batchRes.getResults().isEmpty()) {
solrDocuments.addAll(batchRes.getResults());
}
if (nextCursorMark.equals(cursorMark)) {
hasMore = false;
} else {
cursorMark = nextCursorMark;
}
}
return solrDocuments;
}