基于Mynlp拼音流切分实现ES拼音查询功能
2020-09-02 本文已影响0人
菠萝炒鸡蛋
需求:
基于ES实现一个多字段匹配查询功能, 要求支持谐音和拼音识别
开发思路:
采用pinyin分词插件, 实现谐音查询功能
通过正则表达式识别出查询关键词中的字母元素, 然后用Mynlp的拼音流切分功能生成拼音查询关键词, 查询时指定stop分词器
代码
设置索引setting
{
"index.max_ngram_diff":10,
"number_of_shards": 5,
"auto_expand_replicas": "0-3",
"analysis": {
"analyzer": {
"onechar": {
"tokenizer": "onechar",
"filter": [
"lowercase"
]
},
"ngram": {
"tokenizer": "ngram",
"filter": [
"lowercase"
]
},
"mynlp" : {
"tokenizer" : "mynlp"
}
},
"tokenizer": {
"onechar": {
"type": "ngram",
"min_gram": 1,
"max_gram": 1
},
"ngram": {
"type": "ngram",
"min_gram": 2,
"max_gram": 8
},
"mynlp":{
"type":"mynlp-core",
"mode":"atom",
"subWord":"smart",
"personName":true,
"punctuation":true,
"stopword":false
}
}
}
}
创建动态模板, 用于指定个字段的分词器:
{
"dynamic_templates": [
{
"pinyin" : {
"match" : "*_py",
"mapping" : {
"type": "text",
"analyzer":"mynlp",
"fields": {
"py": {
"type": "text",
"analyzer": "pinyin"
},
"pyf": {
"type": "text",
"analyzer": "pinyin-fuzzy"
},
"pyh": {
"type": "text",
"analyzer": "pinyin-head"
},
"pyfk":{
"type": "text",
"analyzer": "pinyin-fuzzy-keyword"
},
"pyhk": {
"type": "text",
"analyzer": "pinyin-head-keyword"
},
"pyk":{
"type": "text",
"analyzer": "pinyin-keyword"
}
}
}
}
},
{
"strings": {
"match_mapping_type": "string",
"mapping": {
"analyzer": "mynlp",
"type": "text"
}
}
}
]
}
实体类数据结构:
@Document(indexName = GlobalConst.projectName + "bot_function")
@Setting(settingPath = "setting.json")
@Mapping(mappingPath = "mapping.json")
data class BotFunction(
@Id
val id:String?,
val no:Int,
val name_py:String,
val category_py:String,
val department_py:String,
val url:String
)
DSL语句
{
"bool":{
"should":[
{
"query_string":{
"query":"?0",
"fields":[
"name_py","name_py.py"
],
"boost": 4
}
},
{
"query_string":{
"analyzer": "stop",
"query":"?1",
"fields":[
"name_py.py","department_py.py","category_py.py"
],
"boost": 2
}
},
{
"query_string":{
"query":"?0",
"fields":[
"department_py","department_py.py"
],
"boost": 2
}
},
{
"query_string":{
"query":"?0",
"fields":[
"category_py","category_py.py"
],
"boost": 1
}
}
]
}
}