lucene的增删改查

2019-03-26  本文已影响0人  刘书生

lucene全文检索,还是直接看代码吧

lucene依赖

<!--lucene核心及其依赖-->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>7.6.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>7.6.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>7.6.0</version>
        </dependency>
        <!--中文分词器-->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-smartcn</artifactId>
            <version>7.6.0</version>
        </dependency>

IndexReader与IndeaSearch工厂类
新版里面都是用 DirectoryReader 生成IndexReader呢,这里跟老版本很不一样,网上很多都说的是lucene老版本

package cn.wgd.zmx.utils;

import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;

import java.io.File;
import java.io.IOException;

/**
 * 用于维护IndexReader和IndexSearch
 */
public class LuceneFactory {

    private static DirectoryReader indexReader = null;

    private static IndexSearcher indexSearcher = null;

    public static DirectoryReader getIndexReader(IndexWriter indexWriter) {
        synchronized (Object.class) {
            if (indexReader == null) {
                synchronized (Object.class) {
                    if (indexReader == null) {
                        try {
                            indexReader = DirectoryReader.open(indexWriter);
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }else{
                        try {
                            DirectoryReader reader = DirectoryReader.openIfChanged(indexReader);
                            indexReader.close();
                            indexReader = reader;
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }
                }
            }
        }
        return indexReader;
    }

    public static IndexSearcher getIndexSearch(IndexWriter indexWriter) {
        synchronized (Object.class) {
            if (indexSearcher == null) {
                synchronized (Object.class) {
                    if (indexSearcher == null) {
                        DirectoryReader indexReader = LuceneFactory.getIndexReader(indexWriter);
                        indexSearcher = new IndexSearcher(indexReader);
                    }else{
                        try {
                            DirectoryReader directoryReader = DirectoryReader.openIfChanged(LuceneFactory.getIndexReader(indexWriter));
                            indexSearcher = new IndexSearcher(directoryReader);
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }
                }
            }
        }
        return indexSearcher;
    }
}

官网上有说到,DirectoryReader.open(indexWriter);创建IndexReader更快,并且IndexReader每次创建的话,开销非常大,所以上面使用了单例模式,并且如果IndexReader发生变化的时候,我们不直接创建,使用官网推荐的方法DirectoryReader.openIfChanged,先判断变化没,如果变化,我们就在旧的基础上创建IndexReader,然后再通过IndexReader创建IndexSearch开销会比较小。

package cn.wgd.zmx.utils;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.io.IOException;
import java.nio.file.Paths;

/**
 * lucene工具类
 * 可优化的地方:先将索引写入到内存,再批量写入到文件
 */
public class LuceneUtils {

    private static final String INDEX_PATH = "indexDir/";

    /**
     * 索引存放得位置,设置再当前目录下
     */
    private static Directory directory;

    static {
        try {
            directory = FSDirectory.open(Paths.get(INDEX_PATH));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 中文分词器
     */
    private static SmartChineseAnalyzer smartChineseAnalyzer = new SmartChineseAnalyzer();

    /**
     * 创建索引写入配置
     */
    private static IndexWriterConfig indexWriterConfig = new IndexWriterConfig(smartChineseAnalyzer);

    /**
     * 创建索引写入对象
     */
    private static IndexWriter indexWriter;

    static {
        try {
            indexWriter = new IndexWriter(directory, indexWriterConfig);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 将indexReader维护成单例
     */
    private static DirectoryReader indexReader = null;

    /**
     * 将indexSearch也维护成单例的
     */
    private static IndexSearcher indexSearch = LuceneFactory.getIndexSearch(indexWriter);

    // 存放到内存中
    // Directory directory = new RAMDirectory();

    public LuceneUtils(){
    }

    /**
     * 增加索引
     */
    public static void addIndex(Document doc) throws IOException {

        /**
         * 通过设置IndexWrite的参数优化索引建立
         */

        //将对象保存到索引库中
        indexWriter.addDocument(doc);

        /**
         * 提交索引
         */
        indexWriter.commit();

        /**
         * 索引关闭
         */
        indexWriter.close();
    }

    /**
     * 查询索引
     */
    public static String findIndex(Term term, Integer n, String field) throws IOException, ParseException {

        TermQuery termQuery = new TermQuery(term);

        TopDocs search = indexSearch.search(termQuery, n);

        ScoreDoc[] scoDoc = search.scoreDocs;

        if (scoDoc == null || scoDoc.length == 0) {
            System.out.println("索引不存在!");
        }
        String s = "";

        for (int i = 0; i < scoDoc.length; i++) {
            Document doc = indexSearch.doc(scoDoc[i].doc);
            s += doc.getField(field);
        }

        return s;
    }

    /**
     * 删除索引
     */
    public static void delIndex(Term term) throws IOException {

        TermQuery termQuery = new TermQuery(term);

        indexWriter.deleteDocuments(termQuery);

        indexWriter.close();

    }

    /**
     * 更新索引
     */
    public static void updateIndex(Term query, Document doc) throws IOException {

        indexWriter.updateDocument(query, doc);

        indexWriter.commit();

        indexWriter.close();
    }

    /**
     * 清空回收站,强制优化
     */
    public static void forceDelete(){
        try {
            indexWriter.forceMergeDeletes();
        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            if(indexWriter!=null){
                try {
                    indexWriter.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }


    /**
     * 用于流的关闭
     */

}

上面带有注释

接下来看下测试,将IndexReader维护成单例和没有维护成单例的时候


image.png image.png

从上面可以很明显的看出,第一次因为需要创建索引,速度差不多,但是从后面读的次数很明显可以看出,速度有提升。

需要测试代码的私信我,不想在文章里面贴的太长,影响阅读

上一篇 下一篇

猜你喜欢

热点阅读