lucene

2018-10-12 本文已影响0人冰与河豚鱼

创建索引库

public static void main(String[] args) throws IOException{
    //创建一个IndexWriter对象
    IndexWriter indexWriter = new IndexWriter(
    //索引库的创建路径
    FSDirectory.open(Paths.get("索引存储路径"))，
    //索引配置，文本解析器
    new IndexWriterConfig(new StandardAnalyzer())
    );

    //把原始的文档转换成Document对象（把文档的信息封装到Field属性里面去）
    File file = new File("");
    Document doc = new convertToDocument(file);

    //使用IndexWriter把Document对象写到索引库里卖弄
    indexWriter.addDocument(doc);

    indexWriter.close();

}

//把原始的文档转化为Document对象
private static Document convertToDocument(File file)throws FileNotFoundException{
    Document doc = new Document();

    //把文件的每一个信息封装到Field对象里面去
    //名字，路径，大小，内容
    Field nameField = new TextField("name",file.getName(),Store.YES);
    Field pathField = new StoreField("path",file.getAbsolutePath());
    Field sizeField = new LongPoint("size",fiel.length());
    Field contentField = new TextField("content",new BufferedReader(new File(file)));

    doc.add(nameField);
    doc.add(pathField);
    doc.add(sizeField);
    doc.add(contentField);
}

对文件夹下所有文件进行索引

public class LuceneDirectoryIndex{
    private static final String DATA_PATH = "";
    private static final String INDEX_PATH = "";
    public static void main(String[] args) throws IOException{
    //把目录里面的文件都遍历出来
    File file = new File(DATA_PATH);
    // File[] files = file.listFiles();  //列出目录下的所有文件

  List<File> files = new ArrayList<>();
  
  //遍历文件（遍历目录和子目录，速度非常快）
    Files.walkFileTree(Paths.get(DATA_PATH),new SimpleFileVisitor<Path>){
       
       //遍历到每个文件的时候，visitFile方法都会被调用
       @Override
       public FileVisitResult(Path file,BasicFileAttributes atte) throws IOException{
           IF(file.getFileName().toString().endWith(".docx"));  //只查询word文件
           
           files.add(file.toFile());

           return FileVisitResult.CONTINUE;
       }
    }

    //对每个文件进行索引
    IndexWriter indexWriter = createIndexWriter();
    for(File f:files){
       indexWriter.addDocument(convertToDocument(f));
    }
       indexWriter.close();
    }

    //创建一个索引写入器
    private static IndexWriter createIndexWriter() throws IOException{
        IndexWriter indexWriter = new IndexWriter(
          FSDirectory.open(Paths.get("索引存储路径"))，
          new IndexWriterConfig(new StandardAnalyzer()));
      return indexWriter;
    }

    //把原始的文档转化为Document对象
    private static Document convertToDocument(File file)throws FileNotFoundException{
    Document doc = new Document();

    //把文件的每一个信息封装到Field对象里面去
    //名字，路径，大小，内容
    Field nameField = new TextField("name",file.getName(),Store.YES);
    Field pathField = new StoreField("path",file.getAbsolutePath());
    Field sizeField = new LongPoint("size",fiel.length());
    Field contentField = new TextField("content",new BufferedReader(new File(file)));

    doc.add(nameField);
    doc.add(pathField);
    doc.add(sizeField);
    doc.add(contentField);
}
}

文档搜索

public class LuceneSearcher{

    private static final String INDEX_PATH = "";

      public static void main(String[] args) throws IOException {
         IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get("INDEX_PATH")));
         IndexSearcher seacher = new IndexSeacher(reader);

         Analyzer analyzer = new StandardAnalyzer();

         TopDocs topDocs = seacher.seach(new TermQuery(new Term("name","spring")),10);
         ScoreDoc[] scoreDocs = topDocs.scoreDocs;
         for (ScoreDoc d:scoreDocs){
            int doc =d.doc; //每一条记录的id（理解为数据库里面的主键）
            Document doc = searcher.doc(id);

            String string = doc.get("name");
            String path = doc.get("path");
            String size = doc.get("size");
            String content = doc.get("content");

            String msg = "name: %s path: %s size: %s content: %s \n";
            System.out.println(msg,name,path,size,content);
         }
      }
}

lucene

创建索引库

对文件夹下所有文件进行索引

文档搜索

猜你喜欢

热点阅读