multi-fields

2020-01-04 本文已影响0人秦汉邮侠

版本

6.4.0

多字段

经常需要将一个字段用作不同的用途，比如：全文检索、排序和聚合等。这就是多字段的用途
样例一

PUT my_index
{
  "mappings": {
    "_doc": {
      "properties": {
        "city": {
          "type": "text",
          "fields": {
            "raw": { 
              "type":  "keyword"
            }
          }
        }
      }
    }
  }
}

PUT my_index/_doc/1
{
  "city": "New York"
}

PUT my_index/_doc/2
{
  "city": "York"
}

PUT my_index/_doc/3
{
  "city": "York"
}

GET my_index/_search
{
  "query": {
    "match": {
      "city": "york" 
    }
  },
  "sort": {
    "city.raw": "asc" 
  },
  "aggs": {
    "Cities": {
      "terms": {
        "field": "city.raw" 
      }
    }
  }
}

结果
{
  "took": 9,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 3,
    "max_score": null,
    "hits": [
      {
        "_index": "my_index",
        "_type": "_doc",
        "_id": "1",
        "_score": null,
        "_source": {
          "city": "New York"
        },
        "sort": [
          "New York"
        ]
      },
      {
        "_index": "my_index",
        "_type": "_doc",
        "_id": "2",
        "_score": null,
        "_source": {
          "city": "York"
        },
        "sort": [
          "York"
        ]
      },
      {
        "_index": "my_index",
        "_type": "_doc",
        "_id": "3",
        "_score": null,
        "_source": {
          "city": "York"
        },
        "sort": [
          "York"
        ]
      }
    ]
  },
  "aggregations": {
    "Cities": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "York",
          "doc_count": 2
        },
        {
          "key": "New York",
          "doc_count": 1
        }
      ]
    }
  }
}

关于sort
上面的sort字段是对搜索结果的排序，不是对聚合结果的排序
样例二

GET my_index/_search
{
  "size": 0, 
  "query": {
    "match": {
      "city": "york" 
    }
  },
  "sort": {
    "city.raw": "asc" 
  },
  "aggs": {
    "Cities": {
      "terms": {
        "field": "city.raw" 
      }
    }
  }
}

结果
{
  "took": 4,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 3,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "Cities": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "York",
          "doc_count": 2
        },
        {
          "key": "New York",
          "doc_count": 1
        }
      ]
    }
  }
}

关于size=0,
size=0相当于不展示文档的数据，只展示聚合后的数据
terms桶聚合默认是从打大小的顺序

多字段多分词器

对于英文搜索，用户希望根据单词能检索到内容，同时也希望通过词根能检索到内容

样例一

PUT my_index1
{
  "mappings": {
    "_doc": {
      "properties": {
        "text": { 
          "type": "text",
          "fields": {
            "english": { 
              "type":     "text",
              "analyzer": "english"
            }
          }
        }
      }
    }
  }
}

PUT my_index1/_doc/1
{ "text": "quick brown fox" } 

PUT my_index1/_doc/2
{ "text": "quick brown foxes" } 

GET my_index1/_search
{
  "query": {
    "multi_match": {
      "query": "quick brown foxes",
      "fields": [ 
        "text",
        "text.english"
      ],
      "type": "most_fields" 
    }
  }
}

结果

{
  "took": 7,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 1.7260926,
    "hits": [
      {
        "_index": "my_index1",
        "_type": "_doc",
        "_id": "2",
        "_score": 1.7260926,
        "_source": {
          "text": "quick brown foxes"
        }
      },
      {
        "_index": "my_index1",
        "_type": "_doc",
        "_id": "1",
        "_score": 1.4384105,
        "_source": {
          "text": "quick brown fox"
        }
      }
    ]
  }
}

对text字段配置标准分词器，将字符串切分成单词
对text.english字段配置英文分词器，将字符串切分单词后，提取词根
多字段多分词器一般要配置most_fields，保证所有字段都参与计算
关于中文拼音简拼搜索，请参考https://www.jianshu.com/writer#/notebooks/18277751/notes/50120900

multi-fields

版本

多字段

多字段多分词器

猜你喜欢

热点阅读