Elasticsearch原理
2019-03-10 本文已影响0人
诺之林
目录
Installation
docker run -d --name elasticsearch -p 9200:9200 -p 9300:9300 elasticsearch:6.6.1
安装ElasticSearch Head Chrome extension
Inverted Index
- Forward Index: 从文档的角度看其中的关键词 (即: 文档 => 关键词)
文档ID | 文档内容 |
---|---|
1 | 人工智能成为互联网大会焦点 |
2 | 互联网的未来在人工智能 |
- Inverted Index: 从关键词角度看包含其的文档 (即: 关键词 => 文档)
关键词 | 倒排记录表 |
---|---|
人工智能 | 1 -> 2 |
能 | 1 |
成为 | 1 |
互联网 | 1 -> 2 |
大会 | 1 |
焦点 | 1 |
的 | 2 |
未来 | 2 |
在 | 2 |
Analyzer
curl -X PUT 'http://localhost:9200/test_index'
# curl -X DELETE 'localhost:9200/test_index'
curl -X POST 'http://localhost:9200/test_index/test_doc/_mapping' -H 'Content-Type:application/json' -d '
{
"properties": {
"content": {
"type": "text"
}
}
}
'
curl -X POST 'http://localhost:9200/test_index/test_doc/1' -H 'Content-Type:application/json' -d '
{
"content":"人工智能成为互联网大会焦点"
}
'
curl -X POST 'http://localhost:9200/test_index/test_doc/_search?pretty' -H 'Content-Type:application/json' -d '
{
"query": {
"match": {
"content": "互联网"
}
}
}'
{
"took" : 12,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 0.8630463,
"hits" : [
{
"_index" : "test_index",
"_type" : "test_doc",
"_id" : "1",
"_score" : 0.8630463,
"_source" : {
"content" : "人工智能成为互联网大会焦点"
}
}
]
}
}
curl -X POST 'localhost:9200/test_index/_analyze?pretty' -H 'Content-Type:application/json' -d '
{
"field": "content",
"text": "互联网"
}
'
{
"tokens" : [
{
"token" : "互",
"start_offset" : 0,
"end_offset" : 1,
"type" : "<IDEOGRAPHIC>",
"position" : 0
},
{
"token" : "联",
"start_offset" : 1,
"end_offset" : 2,
"type" : "<IDEOGRAPHIC>",
"position" : 1
},
{
"token" : "网",
"start_offset" : 2,
"end_offset" : 3,
"type" : "<IDEOGRAPHIC>",
"position" : 2
}
]
}
IK Analysis
# 安装方法1
docker exec -it elasticsearch /bin/bash
elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.6.1/elasticsearch-analysis-ik-6.6.1.zip
# 安装方法2
wget https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.6.1/elasticsearch-analysis-ik-6.6.1.zip
mkdir ~/Downloads/ik
unzip ~/Downloads/elasticsearch-analysis-ik-6.6.1.zip -d ~/Downloads/ik
docker cp ~/Downloads/ik elasticsearch:/usr/share/elasticsearch/plugins
docker exec -i elasticsearch chown elasticsearch -R /usr/share/elasticsearch/plugins/ik
curl -X PUT 'http://localhost:9200/ik_index'
# curl -X DELETE 'localhost:9200/ik_index'
curl -X POST 'http://localhost:9200/ik_index/ik_doc/_mapping' -H 'Content-Type:application/json' -d '
{
"properties": {
"content": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word"
}
}
}
'
curl -X POST 'http://localhost:9200/ik_index/ik_doc/1' -H 'Content-Type:application/json' -d '
{
"content":"人工智能成为互联网大会焦点"
}
'
curl -X POST 'http://localhost:9200/ik_index/ik_doc/_search?pretty' -H 'Content-Type:application/json' -d '
{
"query": {
"match": {
"content": "互联网"
}
}
}
'
{
"took" : 31,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 0.8630463,
"hits" : [
{
"_index" : "ik_index",
"_type" : "ik_doc",
"_id" : "1",
"_score" : 0.8630463,
"_source" : {
"content" : "人工智能成为互联网大会焦点"
}
}
]
}
}
curl -X POST 'localhost:9200/ik_index/_analyze?pretty' -H 'Content-Type:application/json' -d '
{
"field": "content",
"text": "互联网"
}
'
{
"tokens" : [
{
"token" : "互联网",
"start_offset" : 0,
"end_offset" : 3,
"type" : "CN_WORD",
"position" : 0
},
{
"token" : "互联",
"start_offset" : 0,
"end_offset" : 2,
"type" : "CN_WORD",
"position" : 1
},
{
"token" : "联网",
"start_offset" : 1,
"end_offset" : 3,
"type" : "CN_WORD",
"position" : 2
}
]
}