Elasticsearch实现模糊搜索、keyword忽略大小写
2021-10-11 本文已影响0人
AC编程
目标
1、ngram分词器Elasticsearch实现模糊搜索
2、keyword忽略大小写
一、代码
Controller
@PostMapping("createIndex")
@ApiOperation(value="创建索引")
public Result<Boolean> createIndex() throws Exception {
Boolean is = esMemberService.createIndex("member");
return Result.success(is);
}
Service
@Override
public Boolean createIndex(String index) {
XContentBuilder setting = packageSetting();
XContentBuilder mapping = packageMapping();
return createIndexSetting(index,setting,mapping);
}
private XContentBuilder packageMapping(){
XContentBuilder mapping = null;
try {
//创建索引Mapping
mapping = XContentFactory.jsonBuilder()
.startObject()
.field("dynamic", true)
.startObject("properties")
//id
.startObject("id")
.field("type", "long")
.field("index", false)
.endObject()
//账号:keyword忽略大小写
.startObject("markId")
.field("type", "keyword")
.field("normalizer", "lowercase")
.endObject()
//昵称:模糊搜索、忽略大小写
.startObject("nickName")
.field("type", "text")
.field("analyzer", "ngram")
.endObject()
//头像
.startObject("iconUrl")
.field("type", "text")
.field("index", false)
.endObject()
//性别
.startObject("sex")
.field("type", "keyword")
.endObject()
.startObject("mobile")
.field("type", "keyword")
.endObject()
//经纬度
.startObject("location")
.field("type", "geo_point")
.endObject()
//地址
.startObject("address")
.field("type", "text")
.endObject()
.startObject("openMobile")
.field("type", "keyword")
.endObject()
//
.startObject("birthday")
.field("type", "date")
.field("format","yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis")
.endObject()
//
.startObject("createTime")
.field("type", "date")
.field("format","yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis")
.endObject()
.endObject()
.endObject();
} catch (Exception e) {
e.printStackTrace();
}
return mapping;
}
/**
* ngram分词器配置
* ngram:英文单词按字母分词
* field("filter","lowercase"):大小写兼容搜索
* index.max_ngram_diff: 允许min_gram、max_gram的差值
* https://www.elastic.co/guide/en/elasticsearch/reference/6.8/analysis-ngram-tokenizer.html
* normalizer:解决keyword区分大小写
* https://www.elastic.co/guide/en/elasticsearch/reference/6.0/normalizer.html
* @return
*/
private XContentBuilder packageSetting() {
XContentBuilder setting = null;
try {
//创建索引setting
setting = XContentFactory.jsonBuilder()
.startObject()
.field("index.max_ngram_diff","5")
.startObject("analysis")
.startObject("analyzer")
.startObject("ngram")
.field("tokenizer","my_tokenizer")
.field("filter","lowercase")
.endObject()
.endObject()
.startObject("tokenizer")
.startObject("my_tokenizer")
.field("type","ngram")
.field("min_gram","1")
.field("max_gram","3")
.endObject()
.endObject()
.startObject("normalizer")
.startObject("lowercase")
.field("type","custom")
.field("filter","lowercase")
.endObject()
.endObject()
.endObject()
.endObject();
} catch (Exception e) {
e.printStackTrace();
}
return setting;
}
protected Boolean createIndexSetting(String indexName, XContentBuilder settings,XContentBuilder mapping) {
Boolean is = false;
try {
CreateIndexRequest request = buildCreateIndexRequest(indexName);
if (settings != null) {
request.settings(settings);
}
if (mapping != null) {
request.mapping(mapping);
}
//获取索引客户端
IndicesClient indices = client.indices();
//创建索引
CreateIndexResponse response = indices.create(request, COMMON_OPTIONS);
log.info("是否所有节点都已确认请求: " + response.isAcknowledged());
log.info("指示是否在超时之前为索引中的每个分片启动了必要数量的分片副本: " + response.isShardsAcknowledged());
is = response.isAcknowledged();
} catch (Exception e) {
e.printStackTrace();
}
return is;
}
二、JSON格式
PUT member
{
"settings": {
"index.max_ngram_diff":"5"
"analysis": {
"analyzer": {
"ngram": {
"tokenizer": "my_tokenizer",
"filter": "lowercase"
}
},
"tokenizer": {
"my_tokenizer": {
"type": "ngram",
"min_gram": 1,
"max_gram": 3
}
}
}
}
}
三、参数说明
3.1 filter:lowercase
大小写兼容搜索,即字段内容为alan
,搜索alan
、ALAN
、Alan
都可以搜索出来。
3.2 min_gram、max_gram
根据min_gram
以及max_gram
指定切分时最小几个字符、最大几个字符。长度越短,切分出来越少,更多的被匹配到质量也越差;长度越长,切分出来越多,匹配越精确。
如min_gram
为1,max_gram
为1,对于Quick
这个单词,就会变成[ Q,u,i,c,k]。按关键字Qui
搜索,关键字就会被拆分成Q
,u
、i
三个字母去搜索,可能就会搜索出:Quick
、Query
、your
、like
等单词。
如min_gram
为1,max_gram
为3,对于Quick
这个单词,就会变成[ Q, Qu, Qui, u, ui, uic, i, ic, ick, c, ck, k ]。按关键字Qui
搜索,只会去匹配包含Qui
的单词,因此搜索结果只有Quick
3.3 index.max_ngram_diff
min_gram
默认值为1,max_gram
默认值为2,min_gram
与max_gram
的差值默认最大为1,如果设置值时差值大于1,需要先设置index.max_ngram_diff
参数。