Elasticsearch 更改已有字段的数据类型,清洗数据
2019-11-15 本文已影响0人
蒙嘉
背景:
Elasticsearch具有动态mapping的功能,如果没有设置字段的映射,Elasticsearch将会对字段进行动态映射。但是有时候动态映射会出现不符合业务需求的字段映射,比如时间戳的映射,elasticsearch将其自动映射为long类型,但是在业务使用中需要date类型。
更改索引blog中的动态映射字段priority数据类型:由long更改为date
一、新建动态索引模板,此时time字段为long类型
put http://localhost:9200/_template/test
{
"template": "test_*",
"settings": {
"index.refresh_interval": "5s",
"index.translog.durability": "async",
"index.translog.flush_threshold_size": "512m",
"index.number_of_replicas": 0,
"index.routing.allocation.total_shards_per_node": 1,
"index.number_of_shards": 1,
"index.unassigned.node_left.delayed_timeout": "30m"
},
"mappings": {
"_default_": {
"dynamic_templates": [
{
"regix_long_string": {
"match_pattern": "regex",
"mapping": {
"type": "long"
},
"match_mapping_type": "string",
"match": ".*(?i)(num|cnt|count)$"
}
},
{
"regix_double_string": {
"match_pattern": "regex",
"mapping": {
"type": "double"
},
"match_mapping_type": "string",
"match": ".*(?i)(amount|price)$"
}
},
{
"longs_as_strings": {
"unmatch": "*_text",
"mapping": {
"type": "long"
},
"match_mapping_type": "string",
"match": "long_*"
}
},
{
"double_as_strings": {
"unmatch": "*_text",
"mapping": {
"type": "double"
},
"match_mapping_type": "string",
"match": "double_*"
}
},
{
"date_as_strings": {
"unmatch": "*_text",
"mapping": {
"type": "date"
},
"match_mapping_type": "string",
"match": "date_*"
}
},
{
"strings": {
"mapping": {
"type": "string",
"fields": {
"raw": {
"ignore_above": 256,
"index": "not_analyzed",
"type": "string"
},
"keyword": {
"type": "keyword"
}
}
},
"match_mapping_type": "string"
}
}
],
"_all": {
"enabled": false
},
"properties": {
"time": {
"index": true,
"store": true,
"type": "long"
}
}
}
}
}
2、存入样本数据
post http://localhost:9200/test_001/test
{
"name":"forest",
"age":26,
"time":1573701009309
}
3、更改动态模板中time的字段类型为date,并新建索引模板,同步骤1
put http://localhost:9200/_template/test
4、将test_001数据reindex到test_001_new
post http://localhost:9200/_reindex
{
"source": {
"index": "test_001"
},
"dest": {
"index": "test_001_new"
}
}
5、删除旧索引 test_001
delete http://localhost:9200/test_001
6、将test_001_new数据reindex到test_001
post http://localhost:9200/_reindex
{
"source": {
"index": "test_001_new"
},
"dest": {
"index": "test_001"
}
}