Network programming Homework3

2018-03-28 本文已影响19人什锦甜

website: http://iems5703.albertauyeung.com/assignment-3

Assignment 3 - An HTTP-based Movie Search Engine

Full Mark: 100 (10% of the course assessment scheme)
Deadline: 24th March, 2018
Submission: See instructions below
NOTE: The assignment must be finished using Python

Overview

In this assignment, you will implement a simple movie search engine with some social functions. The server will provide APIs (application programming interfaces) for clients to use functions such as searching for a movie or leaving a comment. Both the server and the client program should be implemented in Python.

In this assignment, we will use a dataset from the Internet Movie Database (IMDb) to populate the content of our search engine. The dataset can be found on Kaggle (https://www.kaggle.com/PromptCloudHQ/imdb-data/data). A copy of the data in CSV format can also be found here: http://iems5703.albertauyeung.com/files/imdb_top1000.csv.

Solution

server.py

from flask import Flask
from flask import request
from flask import current_app
from flask import jsonify
import json
import nltk
import pandas as pd
import datetime
import time
from os import sys

def data_prepocess(dataset):
    vocabulary_titles = []
    vocabulary_actors = []
    words_of_titles = []
    words_of_actors = []
    for title in dataset.Title:
        words = nltk.word_tokenize(title)
        words_of_titles.append(words)
        for word in words:
            vocabulary_titles.append(word)
    for actor in dataset.Actors:
        words = nltk.word_tokenize(actor)
        words_of_actors.append(words)
        for word in words:
            vocabulary_actors.append(word)

    title_index = {}
    actor_index = {}
    for word in vocabulary_titles:
        title_index[word] = []
        for i in range(len(words_of_titles)):
            if word in words_of_titles[i]:
                title_index[word].append(i)
                
    for word in vocabulary_actors:
        actor_index[word] = []
        for i in range(len(words_of_actors)):
            if word in words_of_actors[i]:
                actor_index[word].append(i)

    return title_index, actor_index

port = int(sys.argv[1])
#load csv file with pandas
dataset = pd.read_csv("imdb_top1000.csv")
dataset['comments'] = pd.Series([[] for i in range(dataset.shape[0])])

#data prepocessing
title_index, actor_index = data_prepocess(dataset)

#initialization
app = Flask(__name__)
app.dataset = dataset
app.title_index = title_index
app.actor_index = actor_index

@app.route('/search', methods=['GET'])
def search():
    #get the value of "query" i.e. query = some_value
    query = request.args.get("query")
    attribute = request.args.get("attribute")
    sortby = request.args.get("sortby")
    order = request.args.get("order")

    #get the movies loaded during initialization
    #movies = current_app.movies

    if attribute == 'title':
        moive_index = title_index[query]
    if attribute == 'actor':
        moive_index = actor_index[query]
    if attribute == 'both':
        s1 = set(title_index[query] if query in title_index else [])
        s2 = set(actor_index[query] if query in actor_index else [])
        moive_index = list(s1 | s2)

    print (moive_index)
    selected_columns = ['Actors', 'Director', 'Genre', 'Rating', 'Revenue (Millions)',
                        'Title', 'Year', 'id']
    current_app.dataset['id'] = current_app.dataset['Rank'] - 1
    movies = current_app.dataset.iloc[moive_index][selected_columns]
    movies = movies.sort_values(by=[sortby.capitalize()], ascending=(order == 'ascending'))

    #return a json response
    print (movies)
    return movies.to_json()

@app.route('/movie/<movie_id>', methods=['GET'])
def movie(movie_id):
    movie_id = eval(movie_id)
    current_app.dataset['id'] = current_app.dataset['Rank'] - 1
    movie = current_app.dataset.iloc[movie_id]
    selected_columns = ['Actors', 'Description', 'Director', 'Genre','Metascore','Rank', 'Rating', 
                        'Revenue (Millions)','Runtime (Minutes)', 'Title','Votes', 'Year','comments', 'id']
    movie = movie[selected_columns]
    print (movie)
    return movie.to_json()

@app.route('/comment', methods=['POST'])
def comment():
    user_name = request.form.get("user_name")
    movie_id = eval(request.form.get("movie_id"))
    comment = request.form.get("comment")
    print (user_name, movie_id, comment)
    t = time.time()
    timestamp = datetime.datetime.fromtimestamp(t).strftime('%Y-%m-%d %H:%M:%S')
    current_app.dataset['id'] = current_app.dataset['Rank'] - 1
    comment_info = {'user_name':user_name, 'timestamp':timestamp, 'comment':comment}
    current_app.dataset.iloc[movie_id]['comments'].append(comment_info)
    movie = current_app.dataset.iloc[movie_id]
    selected_columns = ['Actors', 'Description', 'Director', 'Genre','Metascore','Rank', 'Rating', 
                        'Revenue (Millions)','Runtime (Minutes)', 'Title','Votes', 'Year','comments', 'id']
    movie = movie[selected_columns]
    print (movie)
    return movie.to_json()

if __name__=="__main__":
    app.run(port=int(port))

client.py

import json
import requests
from os import sys
# r = requests.get('http://127.0.0.1:5000/search', params = values)
# r = requests.get('http://127.0.0.1:5000/search')

def search(port, query, attribute, sortby, order):
    query = query.lower().capitalize()
    values = {'query':query, 'attribute':attribute, 'sortby':sortby, 'order':order} 
    r = requests.get('http://127.0.0.1:' + port + '/search', params = values)
    r = dict(r.json())
    features = r.keys()
    movies_id = r['id'].keys()
    movies = []
    for i in movies_id:
        temp_dict = {}
        for feature in features:
            temp_dict[feature] = r[feature][i]
        movies.append(temp_dict)

    formatted_json = json.dumps(movies, indent=4)
    print (formatted_json)

def moive(port, movie_id):
    values = {'movie_id':movie_id}
    r = requests.get('http://127.0.0.1:' + port + '/movie/' + movie_id, params = values)
    r = dict(r.json())
    formatted_json = json.dumps(r, indent=4)
    print (formatted_json)

def comment(port, user_name, movie_id):
    comment = input("What is your comment?\n")
    values = {'user_name':user_name, 'movie_id':movie_id, 'comment':comment}
    r = requests.post('http://127.0.0.1:' + port + '/comment', data = values)
    r = dict(r.json())
    formatted_json = json.dumps(r, indent=4)
    print (formatted_json)

if __name__ == '__main__':
    IP = sys.argv[1]
    port = sys.argv[2]
    choice = sys.argv[3]
    if (choice == 'search'):
        search(sys.argv[2],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7])
    elif(choice == 'movie'):
        moive(sys.argv[2], sys.argv[4])
    elif(choice == 'comment'):
        comment(sys.argv[2], sys.argv[4], sys.argv[5])

Network programming Homework3

Assignment 3 - An HTTP-based Movie Search Engine

Overview

Solution

猜你喜欢

热点阅读