Es

017.Elasticsearch搜索操作入门篇

2020-07-03  本文已影响0人  CoderJed

1. 多种搜索方式

1.1 Query String Search:在请求URL中包括search的参数

# 语法
curl -X GET "ip:port/index_name/type_name/_search?q=key1=value1&key2=value2"

# 举例
# 查看全部文档
curl -X GET "node01:9200/shop/product/_search"
# 搜索商品名称中包含“Toothpaste”的商品,而且按照price降序排序
curl -X GET "node01:9200/shop/product/_search?q=name:Toothpaste&sort=price:desc"

# 扩展
GET /index_name/type_name/_search?q=key:value
GET /index_name/type_name/_search?q=+key:value
GET /index_name/type_name/_search?q=-key:value

# name的值包含"Tom"
GET /test_index/test_type/_search?q=name:Tom
# 无论那个field,只要其值包含"Tom"即可
GET /test_index/test_type/_search?q=Tom
# =+ 与 = 的效果相同
GET /test_index/test_type/_search?q=+name:Tom
# =- 的意思是:field不是指定的字符串(精确匹配,例如本例子不会过滤name=Tommy的document)
GET /test_index/test_type/_search?q=-name:Tom

假设有如下的document:

{
    "name": "Tom",
    "age": 25,
    "sex": "male",
    "country": "China"
}

在插入这条数据后,ES会自动将多个field的值,全部用字符串的方式串联起来,组成一个长字符串,作为_all field的值,同时建立索引,本例中,"_all field"="Tom 25 male China",当使用Query String Search:GET /test_index/test_type/_search?q=Tom,实际上并不会去逐一比较每个field的值,而是直接与"_all field"的分词结果做比较

适用于临时的在命令行使用一些工具,比如curl,快速的发出请求,来检索想要的信息;但是如果查询请求很复杂,是很难去构建的,在生产环境中,几乎很少使用Query String Search

1.2 Query DSL(Domain Specified Language):查询领域专用语言

1.3 查询结果元数据分析

{
  "took": 6,
  "timed_out": false,
  "_shards": {
    "total": 6,
    "successful": 6,
    "failed": 0
  },
  "hits": {
    "total": 10,
    "max_score": 1,
    "hits": [
      {
        "_index": ".kibana",
        "_type": "config",
        "_id": "5.2.0",
        "_score": 1,
        "_source": {
          "buildNum": 14695
        }
      }
    ]
  }
}

2. 词条查询(term)和全文检索(full text)

2.1 准备测试数据

# text:用于全文检索,该类型的字段将通过分词器进行分词
# keyword:不分词,只能搜索该字段完整的值
PUT /shop
{
  "mappings": {
    "product": {
      "properties": {
        "name": {
          "type": "keyword"
        },
        "desc": {
          "type": "text"
        }
      }
    }
  }
}

PUT shop/product/1
{
    "name": "Yunnanbaiyao Toothpaste",
    "desc": "Yunnanbaiyao Toothpaste"
}

PUT /shop/product/2
{
    "name": "Darlie Toothpaste",
    "desc": "Darlie Toothpaste"
}

PUT /shop/product/3
{
    "name": "ZhongHua Toothpaste",
    "desc": "ZhongHua Toothpaste"
}

2.2 term测试

# 词条查询

# 没有结果
POST /shop/product/_search
{
    "query": {
        "term": {
            "name": "Toothpaste"
        }
    }
}

# 有结果(1条)
POST /shop/product/_search
{
    "query": {
        "term": {
            "name": "Darlie Toothpaste"
        }
    }
}

# 没有结果
POST /shop/product/_search
{
    "query": {
        "term": {
            "desc": "Toothpaste"
        }
    }
}

# 没有结果
# 这个没有结果,是因为"Darlie Toothpaste"分词后分成了"darlie"和"toothpaste"
# 所有完全匹配"Darlie Toothpaste"是无法匹配到的
POST /shop/product/_search
{
    "query": {
        "term": {
            "desc": "Darlie Toothpaste"
        }
    }
}


# 有3条结果
POST /shop/product/_search
{
    "query": {
        "term": {
            "desc": "toothpaste"
        }
    }
}

2.3 terms测试

# 没有数据
POST /shop/product/_search
{
    "query": {
        "terms": {
            "name": ["Darlie", "Toothpaste"]
        }
    }
}

# 两条结果
POST /shop/product/_search
{
    "query": {
        "terms": {
            "name": ["Darlie Toothpaste", "Yunnanbaiyao Toothpaste"]
        }
    }
}

# 一条结果
POST /shop/product/_search
{
    "query": {
        "terms": {
            "name": ["Darlie", "Yunnanbaiyao Toothpaste"]
        }
    }
}

# 没有结果
POST /shop/product/_search
{
    "query": {
        "terms": {
            "desc": ["Darlie Toothpaste", "Yunnanbaiyao Toothpaste"]
        }
    }
}

# 1个结果
POST /shop/product/_search
{
    "query": {
        "terms": {
            "desc": ["darlie", "Yunnanbaiyao Toothpaste"]
        }
    }
}

2.4 match测试

# 有结果(1条)
GET /shop/product/_search
{
    "query": {
        "match": {
            "name": "Darlie Toothpaste"
        }
    }
}

# 没有结果
GET /shop/product/_search
{
    "query": {
        "match": {
            "name": "Toothpaste"
        }
    }
}

# 有结果(3条)
GET /shop/product/_search
{
    "query": {
        "match": {
            "desc": "Toothpaste"
        }
    }
}

# 有结果(3条)
GET /shop/product/_search
{
    "query": {
        "match": {
            "desc": "Darlie Toothpaste"
        }
    }
}

2.5 multi_match测试

PUT /shop/product/5
{
    "name": "Apple Toothpaste",
    "desc": "Apple Darlie"
}

PUT /shop/product/6
{
    "name": "Orage Darlie",
    "desc": "Orage"
}

# 3条结果
POST /shop/product/_search
{
    "query": {
        "multi_match": {
            "query": "Darlie",
            "fields": ["name", "desc"]
        }
    }
}

# 4条结果
POST /shop/product/_search
{
    "query": {
        "multi_match": {
            "query": "Orage Darlie",
            "fields": ["name", "desc"]
        }
    }
}

2.6 match_phrase测试

# 有结果(1条)
POST /shop/product/_search
{
    "query": {
        "match_phrase": {
            "desc": "Darlie Toothpaste"
        }
    }
}

# 有结果(3条)
POST /shop/product/_search
{
    "query": {
        "match_phrase": {
            "desc": "Toothpaste"
        }
    }
}

# 没有结果
POST /shop/product/_search
{
    "query": {
        "match_phrase": {
            "name": "Toothpaste"
        }
    }
}

# 有结果(1条)
POST /shop/product/_search
{
    "query": {
        "match_phrase": {
            "name": "Darlie Toothpaste"
        }
    }
}

2.7 match_all测试

# 查询全部数据
POST /shop/product/_search
{
    "query": {
        "match_all": {}
    }
}

GET /shop/product/_search
{
    "query": {
        "match_all": {
            "desc": "Darlie Toothpaste"
        }
    }
}

# 结果
{
    "error":{
        "root_cause":[
            {
                "type":"parsing_exception",
                "reason":"[5:13] [match_all] unknown field [desc], parser not found","line":5,
                "col":13
            }
        ],
        "type":"parsing_exception",
        "reason":"[5:13] [match_all] unknown field [desc], parser not found",
        "line":5,
        "col":13,
        "caused_by":{
            "type":"x_content_parse_exception",
            "reason":"[5:13] [match_all] unknown field [desc], parser not found"}
        },
        "status":400
    }
}

# 分页
GET /shop/product/_search
{
    "query": {
        "match_all": {}
    },
    "from": 0,
    "size": 10
}

2.8 match_phrase_prefix测试

PUT /shop/product/7
{
    "name": "Darlie Pro Toothpaste",
    "desc": "Darlie Pro Toothpaste"
}

# 有结果(1条)
POST /shop/product/_search
{
    "query": {
        "match_phrase_prefix": {
            "name": "Darlie Toothpaste"
        }
    }
}

# 没有结果
POST /shop/product/_search
{
    "query": {
        "match_phrase_prefix": {
            "name": "Darlie"
        }
    }
}

# 有结果(2条)
POST /shop/product/_search
{
    "query": {
        "match_phrase_prefix": {
            "desc": "Darlie"
        }
    }
}

# 有结果(1条)
POST /shop/product/_search
{
    "query": {
        "match_phrase_prefix": {
            "desc": "Darlie Pro"
        }
    }
}

# 有结果(1条)
POST /shop/product/_search
{
    "query": {
        "match_phrase_prefix": {
            "desc": "Darlie Toothpaste"
        }
    }
}

2.9 总结

keyword text
term 完全匹配才返回 完全匹配分词后的单词才返回
terms 传入多个字符串,返回那些可以完全匹配的结果 每个传入的单词,在分词后的所有单词中进行匹配,完全匹配才返回
match_all 查询全部数据,不能传入任何参数 查询全部数据,不能传入任何参数
match 完全匹配才返回 对输入字符串进行分词,指定的字段文本分词后的词语中包含任意一个输入字符串的分词词语,就算匹配,就可以作为结果返回
multi_match 指定的多个字段都完全匹配才返回 对输入字符串进行分词,指定的字段文本分词后的词语中包含任意一个输入字符串的分词词语,就算匹配,就可以作为结果返回
match_phrase 完全匹配才返回 输入字符串不分词,指定的字段文本分词后的词语中包含完整的输入字符串,才可以算匹配,才能作为结果返回
match_phrase_prefix 完全匹配才返回 输入一个单词,例如"hello",只要指定的字段文本分词后的词语中有一个词语是以"hello"作为前缀,就算匹配,输入一个短语例如"hello world tom",那么先匹配分词的后的词语中包含"hello world"的文档,然后在这些文档中过滤,只要这些文档的词语中包含以"tom"开头的词语,就算匹配

3. 范围查询

GET /company/employee/_search
{
  "query": {
    "range": {
      "age": {
        "gte": 31
      }
    }
  }
}

4. query上下文和filter上下文

一般来说,如果是进行搜索,需要将最匹配搜索条件的数据先返回,那么用query上下文;如果只是要根据条件筛选出一部分数据,不关注其排序,那么用filter,如果希望越符合搜索条件的document排名越靠前,就把这些搜索条件要放在query上下文中,如果只是想查询到数据并不关注其排名,就放到filter上下文中。

5. 多条件组合查询

# 语法
GET /index_name/type_name/_search
{
    "query": {
        "bool": {
            "must": [],
            "should": [],
            ...
        }
    }
}
    
# 示例
POST _search
{
  "query": {
    "bool" : {
      "must" : {
        "term" : { "user" : "kimchy" }
      },
      "filter": {
        "term" : { "tag" : "tech" }
      },
      "must_not" : {
        "range" : {
          "age" : { "gte" : 10, "lte" : 20 }
        }
      },
      "should" : [
        { "term" : { "tag" : "wow" } },
        { "term" : { "tag" : "elasticsearch" } }
      ],
      "minimum_should_match" : 1
    }
  }
}

# bool中可以放置的内容
must,must_not,should,filter

6. 自定义排序规则

默认情况下,是按照"_score"降序排序的,使用"sort"参数来自定义排序规则

GET /company/employee/_search
{
  "query": {
    "range": {
      "age": {
        "gte": 30
      }
    }
  },
  "sort": [
    {
      "join_date": {
        "order": "desc"
      }
    }
  ]
}

GET /company/employee/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "range": {
          "age": {
            "gte": 30
          }
        }
      }
    }
  },
  "sort": [
    {
      "join_date": {
        "order": "asc"
      }
    }
  ]
}

7. DSL校验

构建好一个复杂的查询之后,可以先校验一下语句,通过返回的异常信息来修改语句,校验语法如下:

GET /test_index/test_type/_validate/query?explain
{
  "query": {
    "math": { # 这里故意把match写为了math
      "test_field": "test"
    }
  }
}

{
  "valid": false,
  "error": "org.elasticsearch.common.ParsingException: no [query] registered for [math]"
}

GET /test_index/test_type/_validate/query?explain
{
  "query": {
    "match": {
      "test_field": "test"
    }
  }
}

{
  "valid": true,
  "_shards": {
    "total": 1,
    "successful": 1,
    "failed": 0
  },
  "explanations": [
    {
      "index": "test_index",
      "valid": true,
      "explanation": "+test_field:test #(#_type:test_type)"
    }
  ]
}

上一篇 下一篇

猜你喜欢

热点阅读