Network programming Homework3
2018-03-28 本文已影响19人
什锦甜
website: http://iems5703.albertauyeung.com/assignment-3
Assignment 3 - An HTTP-based Movie Search Engine
- Full Mark: 100 (10% of the course assessment scheme)
- Deadline: 24th March, 2018
- Submission: See instructions below
- NOTE: The assignment must be finished using Python
Overview
In this assignment, you will implement a simple movie search engine with some social functions. The server will provide APIs (application programming interfaces) for clients to use functions such as searching for a movie or leaving a comment. Both the server and the client program should be implemented in Python.
In this assignment, we will use a dataset from the Internet Movie Database (IMDb) to populate the content of our search engine. The dataset can be found on Kaggle (https://www.kaggle.com/PromptCloudHQ/imdb-data/data). A copy of the data in CSV format can also be found here: http://iems5703.albertauyeung.com/files/imdb_top1000.csv.
Solution
server.py
from flask import Flask
from flask import request
from flask import current_app
from flask import jsonify
import json
import nltk
import pandas as pd
import datetime
import time
from os import sys
def data_prepocess(dataset):
vocabulary_titles = []
vocabulary_actors = []
words_of_titles = []
words_of_actors = []
for title in dataset.Title:
words = nltk.word_tokenize(title)
words_of_titles.append(words)
for word in words:
vocabulary_titles.append(word)
for actor in dataset.Actors:
words = nltk.word_tokenize(actor)
words_of_actors.append(words)
for word in words:
vocabulary_actors.append(word)
title_index = {}
actor_index = {}
for word in vocabulary_titles:
title_index[word] = []
for i in range(len(words_of_titles)):
if word in words_of_titles[i]:
title_index[word].append(i)
for word in vocabulary_actors:
actor_index[word] = []
for i in range(len(words_of_actors)):
if word in words_of_actors[i]:
actor_index[word].append(i)
return title_index, actor_index
port = int(sys.argv[1])
#load csv file with pandas
dataset = pd.read_csv("imdb_top1000.csv")
dataset['comments'] = pd.Series([[] for i in range(dataset.shape[0])])
#data prepocessing
title_index, actor_index = data_prepocess(dataset)
#initialization
app = Flask(__name__)
app.dataset = dataset
app.title_index = title_index
app.actor_index = actor_index
@app.route('/search', methods=['GET'])
def search():
#get the value of "query" i.e. query = some_value
query = request.args.get("query")
attribute = request.args.get("attribute")
sortby = request.args.get("sortby")
order = request.args.get("order")
#get the movies loaded during initialization
#movies = current_app.movies
if attribute == 'title':
moive_index = title_index[query]
if attribute == 'actor':
moive_index = actor_index[query]
if attribute == 'both':
s1 = set(title_index[query] if query in title_index else [])
s2 = set(actor_index[query] if query in actor_index else [])
moive_index = list(s1 | s2)
print (moive_index)
selected_columns = ['Actors', 'Director', 'Genre', 'Rating', 'Revenue (Millions)',
'Title', 'Year', 'id']
current_app.dataset['id'] = current_app.dataset['Rank'] - 1
movies = current_app.dataset.iloc[moive_index][selected_columns]
movies = movies.sort_values(by=[sortby.capitalize()], ascending=(order == 'ascending'))
#return a json response
print (movies)
return movies.to_json()
@app.route('/movie/<movie_id>', methods=['GET'])
def movie(movie_id):
movie_id = eval(movie_id)
current_app.dataset['id'] = current_app.dataset['Rank'] - 1
movie = current_app.dataset.iloc[movie_id]
selected_columns = ['Actors', 'Description', 'Director', 'Genre','Metascore','Rank', 'Rating',
'Revenue (Millions)','Runtime (Minutes)', 'Title','Votes', 'Year','comments', 'id']
movie = movie[selected_columns]
print (movie)
return movie.to_json()
@app.route('/comment', methods=['POST'])
def comment():
user_name = request.form.get("user_name")
movie_id = eval(request.form.get("movie_id"))
comment = request.form.get("comment")
print (user_name, movie_id, comment)
t = time.time()
timestamp = datetime.datetime.fromtimestamp(t).strftime('%Y-%m-%d %H:%M:%S')
current_app.dataset['id'] = current_app.dataset['Rank'] - 1
comment_info = {'user_name':user_name, 'timestamp':timestamp, 'comment':comment}
current_app.dataset.iloc[movie_id]['comments'].append(comment_info)
movie = current_app.dataset.iloc[movie_id]
selected_columns = ['Actors', 'Description', 'Director', 'Genre','Metascore','Rank', 'Rating',
'Revenue (Millions)','Runtime (Minutes)', 'Title','Votes', 'Year','comments', 'id']
movie = movie[selected_columns]
print (movie)
return movie.to_json()
if __name__=="__main__":
app.run(port=int(port))
client.py
import json
import requests
from os import sys
# r = requests.get('http://127.0.0.1:5000/search', params = values)
# r = requests.get('http://127.0.0.1:5000/search')
def search(port, query, attribute, sortby, order):
query = query.lower().capitalize()
values = {'query':query, 'attribute':attribute, 'sortby':sortby, 'order':order}
r = requests.get('http://127.0.0.1:' + port + '/search', params = values)
r = dict(r.json())
features = r.keys()
movies_id = r['id'].keys()
movies = []
for i in movies_id:
temp_dict = {}
for feature in features:
temp_dict[feature] = r[feature][i]
movies.append(temp_dict)
formatted_json = json.dumps(movies, indent=4)
print (formatted_json)
def moive(port, movie_id):
values = {'movie_id':movie_id}
r = requests.get('http://127.0.0.1:' + port + '/movie/' + movie_id, params = values)
r = dict(r.json())
formatted_json = json.dumps(r, indent=4)
print (formatted_json)
def comment(port, user_name, movie_id):
comment = input("What is your comment?\n")
values = {'user_name':user_name, 'movie_id':movie_id, 'comment':comment}
r = requests.post('http://127.0.0.1:' + port + '/comment', data = values)
r = dict(r.json())
formatted_json = json.dumps(r, indent=4)
print (formatted_json)
if __name__ == '__main__':
IP = sys.argv[1]
port = sys.argv[2]
choice = sys.argv[3]
if (choice == 'search'):
search(sys.argv[2],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7])
elif(choice == 'movie'):
moive(sys.argv[2], sys.argv[4])
elif(choice == 'comment'):
comment(sys.argv[2], sys.argv[4], sys.argv[5])