Python 操作ES
1、python查询es数据的语句
https://blog.csdn.net/greatxiang888/article/details/103829973
from elasticsearch import Elasticsearch
import pandas as pd
es = Elasticsearch("172.16.163.8", timeout=360)
get_body = {
"query": {
# "match_all": {}
"term": {
"flag": "1"
}
}, "size": 1000
}
data = es.search(index="word2vec_index", body=get_body)['hits']['hits']
print(len(data))
for i in data:
print(i)
if data:
print(data)
else:
print('为空')
df = pd.DataFrame(data)
print(df)
df_source = pd.DataFrame(list(df['_source']))
print(df_source)
df_field = df_source[['content', 'esHour', 'id', 'sysId','flag']]
print(df_field)
2、插入ES
insert_body = {'content': '测试',
'createTime': 1632801190617,
'esDate': '2021-09-28',
'esHour': '11',
'flag': '1',
}
es.index(index="word2vec_index", body=insert_body)
3、ES删除索引
es.indices.delete(index='word2vec_index')
4、ES删除查询的所有结果
delete_body = {
"query": {
"match_all": {}
}
}
es.delete_by_query(index="word2vec_index", body=delete_body)
5、python直接操作Elasticsearch7(https://blog.csdn.net/weixin_41979456/article/details/111932972)
https://www.cnblogs.com/lshan/p/15510018.html
6、# es 之term和match区别
区别
- match是经过analyer的,也就是说,文档首先被分析器处理了。根据不同的分析器,分析的结果稍有不同。然后再根据分词结果进行匹配
- term则不经过分词,它是直接去倒排索引中查找精确的值。
7、ES批量插入(https://www.cnblogs.com/midworld/p/13670001.html)
def insert_es_list(es_list, list_type):
""" 批量写入数据 """
es = Elasticsearch("172.16.163.8", timeout=360)
body = []
if list_type == 1: # 用户信息
for item in es_list:
every_body = {
"_index": "word2vec_index",
"_source": {
"user_id": item,
"is_grab": 0 # 是否已爬取,0是未爬取,1是已爬取
}
}
body.append(every_body)
elif list_type == 2: # 微博内容
for item in es_list:
every_body = {
"_index": "word2vec_index",
"_source": {
"item_id": item,
"is_grab": 0 # 是否已爬取,0是未爬取,1是已爬取
}
}
body.append(every_body)
helpers.bulk(es, body)
8、ES单个写入
index_name=‘xxx’
body = {
'web_id': '2396815443/L2FtVry33',
'w_is_grab': 1,
}
res=es.index(index=index_name, body=body)
9、ES查询
select_body = {
"query": {
"term": {
‘’: ‘’
}
}
}
data = es.search(index="es_weibo_check", body=select_body)['hits']['hits']
print(data)
is_grab = data[0].get('_source'))
10、ES删除某条记录
es.delete(index="word2vec_index",id='HYRoUX0BTwtGDMe14Euk')
11、条件查询数量:
es = Elasticsearch("", timeout=360)
get_body = {
"query": {
# "match_all": {}
"term": {
"source": "苏宁易购"
}
}
}
data = es.count(index="es_table", body=get_body)
print(data)
12、python 连接ES,填入密码
client = Elasticsearch(hosts=["http://{0}:{1}".format(settings.ES_HOST, settings.ES_PORT)],
http_auth=(settings.ES_USER, settings.ES_PASSWORD))
13、按时间倒序排序
suning_body={
"query": {
"term": {
"source": "苏宁易购"
}
},
"sort":[{"create_time":{"order":"desc"}}]
}
suning_data = es.search(index=es_index, body=suning_body)
14、and 组合条件,where a and b
weibo_body={
"query": {
"bool":{
"must":[
{"term": {"source": "新浪微博"}},
{"term": {"emotion":2}}
]
}
}
}
weibo_data = es.search(index=es_index, body=weibo_body)
15、and 条件写法,.keyword是不分词的意思
body={
"query": {
"bool":{
"must":[
{"term": {"source": "苏宁易购"}},
{"match": {"content.keyword":"用户未及时作出评价,系统默认好评"}}
]
}
}
}
es.search(index=es_index, body=body)
16、