NLP小工具

2022-01-01  本文已影响0人  WritingHere

日常用NLP脚本备份

机器翻译

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import torch
import json
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from flask import Flask, request

app = Flask(__name__)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-zh")
model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-zh").to(device)

@app.route("/", methods=['POST'])
def index():
    text = request.get_json()['text']
    batch = tokenizer.prepare_seq2seq_batch(src_texts=text)
    for k, v in batch.items():
        batch[k] = torch.tensor([w[:512] for w in v]).to(device)

    translation = model.generate(**batch)
    result = tokenizer.batch_decode(translation, skip_special_tokens=True)
    return json.dumps({'result': result}, ensure_ascii=False)

if __name__ == '__main__':
   app.run(host='0.0.0.0', port=9100)

客户端代码main.py如下:

#!/usr/bin/env python

import yaml
import requests as rq

text = ['Oh, god, this is great! The plane is gone, so it looks like I\'m stuck here with you guys.', 'I love you.']
headers = {'Content-Type': 'application/json', 'Accept':'application/json'}    
data = {'text': text} 
a = rq.post('http://127.0.0.1:9100', data=json.dumps(data), headers=self.headers)
print(a.text)

TopK算法


import heapq

class PriorityQueueTopK:
  
    def __init__(self, k=10):
        """[summary]

        Args:
            k (int, optional): Max number of the queue. Defaults to 10.
        """
        self._queue = []
        self._index = 0
        self.k = k

    def push(self, item, priority=None):
        # 传入两个参数,一个是存放元素的数组,另一个是要存储的元素,这里是一个元组。
        if priority is None: priority = item
        if len(self._queue) < self.k:
            heapq.heappush(self._queue, (priority, self._index, item))
            self._index += 1
        elif priority > self._queue[0][0]:
            heapq.heapreplace(self._queue, (priority, self._index, item))
            self._index += 1

    def pop(self):
        return heapq.heappop(self._queue)[-1]
    
    def topk(self):
        return [w[-1] for w in self._queue]
        return self._queue
k = 5
items = [random.randint(1, 10) for i in range(10)]
print(items)
pq = PriorityQueueTopK(k)
for i in range(len(items)):
    pq.push(items[i])
res = pq.topk()
print(res)
上一篇下一篇

猜你喜欢

热点阅读