mongoDB+ES

mongodb插入字段,更新数据,查询限制(python)

2019-11-12  本文已影响0人  布口袋_天晴了

插入字段--关键语句

db.your_table_name.update(
#更新条件
{"_id": ent['_id']}, 
#更新内容
{'$set': {'case_reason': final_key}}, 
multi=True)

查询限制--关键语句(根据_id字段,一个一个地更新新加入的case_reason字段的值)

db.chongqing.find({},{"content.title":1,}).limit(100)
db.your_table_name.find({},{"your_segement":1,}).limit(your_count)

具体代码

"""
date:2019-11-12
author:bukoudai_tianqingle
function:mongodb insert a "case_reason" segement
"""
import pymysql
import re
import pymongo

class init_MongoDB:
    def __init__(self, host=None, port=None, user=None, pwd=None):
        self.host = host
        self.port = port
        self.user = user
        self.pwd = pwd
        self.client = pymongo.MongoClient(self.host, self.port)

    def connection(self, database):
        db = self.client.admin
        if self.pwd is not None:
            db.authenticate(self.user, self.pwd, mechanism="SCRAM-SHA-1")
        my_db = self.client[database]

        return my_db


class CaseReason():
    def __init__(self):
        self.get_class()
        self.get_mongodb_data()

    def get_class(self):
        self.key_list = []
        with open('mongo_anyou_class.txt','r',encoding='utf8') as f:
            line = f.readline()
            while line:
                if(len(line)>1):
                    matchObj = re.match(r'(.*)(\d+、)(.*)', line)
                    if (matchObj):
                        key_word = matchObj.group(3)
                        # print(key_word)
                        self.key_list.append(key_word)
                    else:
                        matchObj2 = re.match(r'(.*)[(](\d+)[)](.*)',line)
                        if (matchObj2):
                            key_word = matchObj2.group(3)
                            # print('22222',key_word)
                            self.key_list.append(key_word)
                line = f.readline()

    def get_mongodb_data(self):
        mongodb = init_MongoDB(host='your_mongo_host', port=27017, user='admin')
        db = mongodb.connection("law")  # 数据库名
        all_count=db.chongqing.find().count()
        entities = db.chongqing.find({},{"content.title":1,})
        for pos, ent in enumerate(entities):
            content = ent['content']
            title = content['title']
            print(pos, all_count)
            f_k = ''
            for key in self.key_list:
                if(title.find(key)!=-1):
                    f_k = f_k + '+' + key
            if(len(f_k)>1):
                # print(f_k)
                tep = f_k.split('+')
                final_key = ''
                max_len = 0
                for t in tep:
                    if(len(t)>max_len):
                        max_len = len(t)
                        final_key = t
                # print(final_key)
                db.chongqing.update({"_id": ent['_id']}, {'$set': {'case_reason': final_key}}, multi=True)
            else:
                # print('no')
                db.chongqing.update({"_id": ent['_id']}, {'$set': {'case_reason': 'not find'}}, multi=True)

CaseReason()
上一篇 下一篇

猜你喜欢

热点阅读