ES支持中文&&全拼&&拼音首字母搜索

2019-06-13  本文已影响0人  箭飞天

环境

搜索模板

新建模板,便于后续创建索引,直接使用模板,省事方便

PUT _template/goods
{
  "index_patterns":"goods*",
  "settings": {
    "index.number_of_replicas": "1",
    "index.number_of_shards": "5",
    "index.translog.flush_threshold_size": "512mb",
    "index.translog.sync_interval": "60s",
    "index.codec": "best_compression",
    "analysis": {
      "filter": {
        "edge_ngram_filter": {
          "type": "edge_ngram",
          "min_gram": 1,
          "max_gram": 50
        },
        "simple_pinyin_filter": {
          "type": "pinyin",
          "keep_first_letter": true,
          "keep_separate_first_letter": false,
          "keep_full_pinyin": false,
          "keep_original": false,
          "limit_first_letter_length": 50,
          "lowercase": true
        },
        "full_pinyin_filter": {
          "type": "pinyin",
          "keep_first_letter": false,
          "keep_separate_first_letter": false,
          "keep_full_pinyin": true,
          "none_chinese_pinyin_tokenize": true,
          "keep_original": false,
          "limit_first_letter_length": 50,
          "lowercase": true
        }
      },
      "char_filter": {
        "charconvert": {
          "type": "mapping",
          "mappings_path": "char_filter_text.txt"
        }
      },
      "tokenizer": {
        "ik_max_word": {
          "type": "ik_max_word",
          "use_smart": true
        }
      },
      "analyzer": {
        "ngramIndexAnalyzer": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "edge_ngram_filter",
            "lowercase"
          ],
          "char_filter": [
            "charconvert"
          ]
        },
        "ngramSearchAnalyzer": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase"
          ],
          "char_filter": [
            "charconvert"
          ]
        },
        "ikIndexAnalyzer": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "char_filter": [
            "charconvert"
          ]
        },
        "ikSearchAnalyzer": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "char_filter": [
            "charconvert"
          ]
        },
        "simplePinyinIndexAnalyzer": {
          "tokenizer": "keyword",
          "filter": [
            "simple_pinyin_filter",
            "edge_ngram_filter",
            "lowercase"
          ]
        },
        "simplePinyinSearchAnalyzer": {
          "tokenizer": "keyword",
          "filter": [
            "simple_pinyin_filter",
            "lowercase"
          ]
        },
        "fullPinyinIndexAnalyzer": {
          "tokenizer": "keyword",
          "filter": [
            "full_pinyin_filter",
            "edge_ngram_filter",
            "lowercase"
          ]
        },
        "fullPinyinSearchAnalyzer": {
          "tokenizer": "keyword",
          "filter": [
            "full_pinyin_filter",
            "lowercase"
          ]
        }
      }
    }
  }
}

新建 char_filter_text.txt 存于 ES_HOME/config 目录下

新建索引

goods_v1 将使用上述模板(goods*),可直接使用模板中定义的 analyzer

DELETE goods_v1

PUT /goods_v1
{
  "mappings": {
    "doc":{
      "properties" : {
        "id":{
          "type":"long"
        },
        "name" : {
          "type": "text", 
          "analyzer": "ikIndexAnalyzer",
          "fields": {
            "ngram": {
              "type": "text", 
              "analyzer": "ngramIndexAnalyzer"
            },
            "SPY": {
              "type": "text", 
              "analyzer": "simplePinyinIndexAnalyzer"
            },
            "FPY": {
              "type": "text", 
              "analyzer": "fullPinyinIndexAnalyzer"
            }
          }
        },
        "update_time" : {
          "type":"date"
        },
        "deleted" : {
          "type":"boolean"
        }
      }
    }
  }
}

PUT /goods_v1/_alias/goods
{ 
  "is_write_index":true
}

filter&&结果id倒序

本搜索只做简单过滤,并且按id倒序返回,不需要计算相似度

GET goods/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "filter": [
            {
              "term": {
                "deleted": {
                  "value": false,
                  "boost": 1
                }
              }
            },
            {
              "bool": {
                "should": [
                  {
                    "match": {
                      "name.ngram": {
                        "query": "水果",
                        "operator": "OR",
                        "analyzer": "ngramSearchAnalyzer",
                        "prefix_length": 0,
                        "max_expansions": 50,
                        "fuzzy_transpositions": true,
                        "lenient": false,
                        "zero_terms_query": "NONE",
                        "auto_generate_synonyms_phrase_query": true,
                        "boost": 5
                      }
                    }
                  },
                  {
                    "term": {
                      "name.SPY": {
                        "value": "水果",
                        "boost": 1
                      }
                    }
                  },
                  {
                    "wildcard": {
                      "name.SPY": {
                        "wildcard": "*水果*",
                        "boost": 0.8
                      }
                    }
                  },
                  {
                    "match_phrase": {
                      "name.FPY": {
                        "query": "水果",
                        "analyzer": "fullPinyinSearchAnalyzer",
                        "slop": 0,
                        "zero_terms_query": "NONE",
                        "boost": 1
                      }
                    }
                  },
                  {
                    "match": {
                      "name": {
                        "query": "水果",
                        "operator": "OR",
                        "analyzer": "ikSearchAnalyzer",
                        "prefix_length": 0,
                        "max_expansions": 50,
                        "minimum_should_match": "100%",
                        "fuzzy_transpositions": true,
                        "lenient": false,
                        "zero_terms_query": "NONE",
                        "auto_generate_synonyms_phrase_query": true,
                        "boost": 1
                      }
                    }
                  }
                ],
                "adjust_pure_negative": true,
                "boost": 1
              }
            }
          ],
          "adjust_pure_negative": true,
          "boost": 1
        }
      },
      "boost": 1
    }
  },
  "sort": [
    {
      "id": {
        "order": "desc"
      }
    }
  ]
}

根据score返回结果

多个query结果,取相似度score最大的返回

GET goods/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "dis_max": {
            "tie_breaker": 0,
            "queries": [
              {
                "match": {
                  "name.ngram": {
                    "query": "水果",
                    "operator": "OR",
                    "analyzer": "ngramSearchAnalyzer",
                    "prefix_length": 0,
                    "max_expansions": 50,
                    "fuzzy_transpositions": true,
                    "lenient": false,
                    "zero_terms_query": "NONE",
                    "auto_generate_synonyms_phrase_query": true,
                    "boost": 5
                  }
                }
              },
              {
                "term": {
                  "name.SPY": {
                    "value": "水果",
                    "boost": 1
                  }
                }
              },
              {
                "wildcard": {
                  "name.SPY": {
                    "wildcard": "*水果*",
                    "boost": 0.8
                  }
                }
              },
              {
                "match_phrase": {
                  "name.FPY": {
                    "query": "水果",
                    "analyzer": "fullPinyinSearchAnalyzer",
                    "slop": 0,
                    "zero_terms_query": "NONE",
                    "boost": 1
                  }
                }
              },
              {
                "match": {
                  "name": {
                    "query": "水果",
                    "operator": "OR",
                    "analyzer": "ikSearchAnalyzer",
                    "prefix_length": 0,
                    "max_expansions": 50,
                    "minimum_should_match": "100%",
                    "fuzzy_transpositions": true,
                    "lenient": false,
                    "zero_terms_query": "NONE",
                    "auto_generate_synonyms_phrase_query": true,
                    "boost": 1
                  }
                }
              }
            ],
            "boost": 1
          }
        }
      ],
      "filter": [
        {
          "term": {
            "deleted": {
              "value": false,
              "boost": 1
            }
          }
        }
      ],
      "adjust_pure_negative": true,
      "boost": 1
    }
  }
}

部分相关java代码

public class SearchRequest {
    public int page;
    public int size;
}

public class GoodsRequest extends SearchRequest {
    public String keyword;
}

private BoolQueryBuilder addFilter(QueryBuilder baseQueryBuilder, GoodsRequest request) {
    BoolQueryBuilder boolQueryBuilder = getBaseFilterQueryBuilder(request);
    if (baseQueryBuilder != null) {
        boolQueryBuilder.must(baseQueryBuilder);
    }
    return boolQueryBuilder;
}

private BoolQueryBuilder getBaseFilterQueryBuilder() {
    BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
    boolQueryBuilder.filter(QueryBuilders.termQuery("deleted", false));
    return boolQueryBuilder;
}

private BoolQueryBuilder getKeywordQueryBuilder(String keyword) {
    if (StringUtils.isEmpty(keyword)) {
        return null;
    }

    BoolQueryBuilder keywordQueryBuilder = QueryBuilders.boolQuery();
    QueryBuilder ngramSearchBuilder = QueryBuilders
        .matchQuery("name.ngram", keyword)
        .analyzer("ngramSearchAnalyzer")
        .boost(5f);
    keywordQueryBuilder.should(ngramSearchBuilder);
  
    TermQueryBuilder simplePinYinQueryBuilder = QueryBuilders.termQuery("name.SPY", keyword);
    keywordQueryBuilder.should(simplePinYinQueryBuilder);
        
    QueryBuilder simplePinYinContainQueryBuilder = QueryBuilders
        .wildcardQuery("name.SPY", "*" + keyword + "*")
        .boost(0.8f);
    keywordQueryBuilder.should(simplePinYinContainQueryBuilder);
        
    QueryBuilder fullPingYinQueryBuilder = QueryBuilders
        .matchPhraseQuery("name.FPY", keyword)
        .analyzer("fullPinyinSearchAnalyzer");
    keywordQueryBuilder.should(fullPingYinQueryBuilder);
        
    QueryBuilder containSearchBuilder = QueryBuilders
        .matchQuery("name", keyword)
        .analyzer("ikSearchAnalyzer")
        .minimumShouldMatch("100%");
    keywordQueryBuilder.should(containSearchBuilder);
    return keywordQueryBuilder;
}

// 返回满足搜索条件的结果,按id倒序
private SearchSourceBuilder getSearchBuilder(GoodsRequest request) {
    BoolQueryBuilder boolQueryBuilder =getBaseFilterQueryBuilder();
    BoolQueryBuilder keywordQueryBuilder = getKeywordQueryBuilder(request.keyword);
    if (keywordQueryBuilder != null) {
        boolQueryBuilder.filter(keywordQueryBuilder);
    }
    
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.query(QueryBuilders.constantScoreQuery(boolQueryBuilder));
    searchSourceBuilder.sort(SortBuilders.fieldSort("id").order(SortOrder.DESC));

    return searchSourceBuilder;
}

// 按相关度返回结果
private SearchSourceBuilder getSearchBuilderWithScore() {
    //使用dis_max直接取多个query中,分数最高的那一部分结果返回
    DisMaxQueryBuilder disMaxQueryBuilder = QueryBuilders.disMaxQuery();

    QueryBuilder ngramSearchBuilder = QueryBuilders
        .matchQuery("name.ngram", request.keyword)
        .analyzer("ngramSearchAnalyzer")
        .boost(5f);
    disMaxQueryBuilder.add(ngramSearchBuilder);

    TermQueryBuilder simplePinYinQueryBuilder = QueryBuilders.termQuery("name.SPY", request.keyword);
    disMaxQueryBuilder.add(simplePinYinQueryBuilder);

    QueryBuilder simplePinYinContainQueryBuilder = QueryBuilders
        .wildcardQuery("name.SPY", "*" + request.keyword + "*")
        .boost(0.8f);
    disMaxQueryBuilder.add(simplePinYinContainQueryBuilder);

    QueryBuilder fullPingYinQueryBuilder = QueryBuilders
        .matchPhraseQuery("name.FPY", request.keyword)
        .analyzer("fullPinyinSearchAnalyzer");
    disMaxQueryBuilder.add(fullPingYinQueryBuilder);

    QueryBuilder containSearchBuilder = QueryBuilders
        .matchQuery("name", request.keyword)
        .analyzer("ikSearchAnalyzer")
        .minimumShouldMatch("100%");
    disMaxQueryBuilder.add(containSearchBuilder);

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.query(addFilter(disMaxQueryBuilder, request));

    return searchSourceBuilder;
}

遇到问题

---未完待续---

参考

上一篇 下一篇

猜你喜欢

热点阅读