Lucene入门1:索引的增、查
2018-03-29 本文已影响0人
半帅气
因工作需要接触Lucene,今天简单对Lucene索引的增删改查有了一个初步了解。先用起来,再逐步根据需要深入了解其原理。
1. 新增索引
步骤:
1)创建Directory,指定索引存放目录
2)创建索引写入配置对象,指定分词器
3)创建索引写入对象IndexWriter
4)创建Document对象,存储索引
5)为Document添加Field
6)添加Document到Index
public class luceneIndexUtil {
private int[] ids = {1, 2, 3};
private String[] authors = {"jason", "neo", "rzexin"};
private String[] articles = {"vim can do everything!", "go go", "blockchain"};
private String[] file_paths = {"/tmp/lucene/a.txt", "/tmp/lucene/b.txt", "/tmp/lucene/c.txt"};
private static String INDEX_PATH = "/tmp/lucene/index.1";
public void createIndex() throws IOException {
//1)创建Directory,指定索引存放目录
FSDirectory dir = FSDirectory.open(Paths.get(INDEX_PATH));
//2)创建索引写入配置对象,指定分词器
// 因存在中文文档,使用中文分词器,默认是标准分词器(StandardAnalyzer)
// pom.xml需添加库依赖:lucene-analyzers-smartcn
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new SmartChineseAnalyzer());
//3)创建索引写入对象IndexWriter
IndexWriter indexWriter = new IndexWriter(dir, indexWriterConfig);
Document doc = null;
for (int i=0; i<ids.length; ++i) {
//4)创建Document对象,存储索引
doc = new Document();
//5)为Document添加Field
//IntPoint - 对int字段,只索引不存储
doc.add(new IntPoint("id", ids[i]));
//存储Field的值
doc.add(new StoredField("id", ids[i]));
//StringField - 只索引不分词,即字符串作为一个整体进行索引
//Field.Store.YES - 将这个域中内容完全存储到文件,方便进行文本还原
doc.add(new StringField("author", authors[i], Field.Store.YES));
//TextField - 索引并分词
//Field.Store.NO - 不存储到文件,可以被索引,但内容不可还原
doc.add(new TextField("article", articles[i], Field.Store.NO));
//读取文件,默认:Field.Store.NO
doc.add(new TextField("content", new FileReader(new File(file_paths[i]))));
//6)添加Document到Index
indexWriter.addDocument(doc);
}
indexWriter.close();
}
}
测试:
@Test
public void testCreateIndex() throws IOException {
luceneIndexUtil luceneIndexUtil = new luceneIndexUtil();
luceneIndexUtil.createIndex();
}
执行后,生成文件:
2. 搜索
步骤:
1)创建Directory
2)创建IndexReader
3)创建IndexSearcher
4)创建搜索的Query
5)返回TopDocs
6)获取ScoreDoc对象
7)获取具体Document对象
8)从Document对象中获取需要值
public void searchIndex() throws IOException, ParseException {
//1)创建Directory
FSDirectory dir = FSDirectory.open(Paths.get(INDEX_PATH));
//2)创建IndexReader
IndexReader indexReader = DirectoryReader.open(dir);
//3)创建IndexSearcher
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
//4)创建搜索的Query
QueryParser parser = new QueryParser("content", new SmartChineseAnalyzer());
//搜索content包含关键字【天空】的文档
Query query = parser.parse("天空");
//QueryParser parser = new QueryParser("article", new StandardAnalyzer());
//Query query = parser.parse("Go");
//5)返回TopDocs
TopDocs topDocs = indexSearcher.search(query, 10);
//6)获取ScoreDoc对象
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for(ScoreDoc scoreDoc : scoreDocs) {
//7)获取具体Document对象
Document doc = indexSearcher.doc(scoreDoc.doc);
//8)从Document对象中获取需要值
System.out.printf("id:%s, author:%s, article:%s, content:%s\n",
doc.get("id"), doc.get("author"),
doc.get("article"), doc.get("content"));
}
indexReader.close();
}
测试:
@Test
public void testSearchIndex() throws IOException, ParseException {
luceneIndexUtil luceneIndexUtil = new luceneIndexUtil();
luceneIndexUtil.searchIndex();
}