Rewritten in go and python

This commit is contained in:
Aykhan Shahsuvarov 2024-11-06 01:25:27 +04:00
parent 9f22d9678d
commit d8449237bb
50 changed files with 3824 additions and 879 deletions

5
.gitignore vendored
View File

@ -1,4 +1 @@
IMDB_data_sets
.venv
.ipynb_checkpoints
__pycache__
*.env

View File

@ -1,332 +0,0 @@
from pathlib import Path
import pandas as pd
import numpy as np
from time import time
from exceptions import (
FileExistException,
FileNotExistException
)
BASE_DIR = Path(__file__).resolve().parent
class DTO:
def __init__(
self,
save_dir=(BASE_DIR / 'IMDB_data_sets/filtered/'),
read_dir=(BASE_DIR / 'IMDB_data_sets/'),
default_chunksize: int=3_000_000
) -> None:
"""
Parameters
----------
save_dir : str, optional
Folder location to save files (default is BASE_DIR / 'IMDB_data_sets/filtered/')
get_dir : str, optional
Folder location to get files (default is BASE_DIR / 'IMDB_data_sets/')
default_chunksize : int, optional
Default value to be used when chunksize is not given in methods that take
chunksize parameters (default is 3_000_000)
"""
self.save_dir = save_dir
self.save_dir.mkdir(parents=True, exist_ok=True)
self.read_dir = read_dir
self.default_chunksize = default_chunksize
def timing_decorator(func):
def wrapper(*args, **kwargs):
start_time = time()
result = func(*args, **kwargs)
print(f"Function {func.__name__} took {time() - start_time} seconds to run.")
return result
return wrapper
def is_exist(self, file_dir: Path) -> None:
"""
Parameters
----------
file_dir : pathlib.Path
File path
Raises
------
FileExistException
If the file exists
"""
if file_dir.is_file():
raise FileExistException(f"file is exist: {file_dir}")
def is_not_exist(self, file_dir: Path) -> None:
"""
Parameters
----------
file_dir : pathlib.Path
File path
Raises
------
FileNotExistException
If the file does not exist
"""
if not file_dir.is_file():
raise FileNotExistException(f"file is not exist: {file_dir}")
def df2csv(
self,
df: pd.DataFrame,
name: str,
overwrite: bool=False,
index: bool=False
) -> None:
"""
Parameters
----------
df : DataFrame
DataFrame object you want to save
name : str
The name you want to save the DataFrame object
overwrite : bool, optional
When True, overwrite if file exists (default is False)
index : bool, optional
Save index column or no (deafault is False)
Raises
------
FileExistException
If the overwrite parameter is false and the file exists
"""
if not overwrite:
self.is_exist(self.save_dir / name)
df.to_csv(self.save_dir / name, index=index)
@timing_decorator
def filter_tconst(
self,
name: str,
title_types: list[str]=['movie', 'tvMovie'],
chunksize: int=None
) -> list[str]:
"""
Parameters
----------
name : str
Name of the basics file to be read
title_type : list, optional
'titleType' type of lines to be read from file (default is ['movie', 'tvMovie'])
chunksize : int
Chunk size for reading data (default is self.default_chunksize (default is 3_000_000)).
Returns
-------
list
A list of tconst
Raises
------
FileNotExistException
If the file does not exist
"""
self.is_not_exist(self.read_dir / name)
if chunksize is None:
chunksize = self.default_chunksize
tconst_list = []
with pd.read_csv(
self.read_dir / name,
sep=r'\t',
chunksize=chunksize,
engine='python',
usecols=['tconst', 'titleType'],
dtype={'tconst': str, 'titleType': str},
na_values='\\N') as reader:
for i, r in enumerate(reader):
tconst_list += list(r[r.titleType.isin(title_types)]['tconst'])
return tconst_list
def get_tconst(self, name: str) -> list[str]:
"""
Parameters
----------
name : str
Name of the tconst file to be read
Returns
-------
list
A list of tconst
Raises
------
FileNotExistException
If the file does not exist
"""
self.is_not_exist(self.save_dir / name)
return list(pd.read_csv(self.save_dir / name, usecols=['tconst'], dtype={'tconst': str})['tconst'])
@timing_decorator
def filter_principal(
self,
name: str,
tconst_list: list[str],
category_list: list[str]=['actress', 'actor', 'director', 'writer'],
chunksize: int=None
) -> pd.DataFrame:
"""
Parameters
----------
name : str
Name of the principals file to be read
tconst_list : list
List of tconst (It can be obtained by the get_tconst or read_tconst method).
category : list
List of categories of rows to be selected (default is ['actress', 'actor', 'director', 'writer']).
chunksize : int
Chunk size for reading data (default is self.default_chunksize (default is 3_000_000)).
Returns
-------
DataFrame
A DataFrame object with columns tconst, nconst, and category.
Raises
------
FileNotExistException
If the file does not exist
"""
self.is_not_exist(self.read_dir / name)
if chunksize is None:
chunksize = self.default_chunksize
df = pd.DataFrame({
'tconst': tconst_list,
'nconst': np.empty((len(tconst_list), 0)).tolist(),
'category': np.empty((len(tconst_list), 0)).tolist()
})
# index = pd.Index(tconst_list, name='tconst')
# df = pd.DataFrame({
# 'nconst': pd.Series(dtype='object', index=index),
# 'category': pd.Series(dtype='object', index=index)
# })
cnt = 0
with pd.read_csv(self.read_dir / name,
sep=r'\t',
chunksize=chunksize,
engine='python',
usecols=['tconst', 'nconst', 'category']) as reader:
for i, r in enumerate(reader):
r = r.query(f"(tconst in @tconst_list) and (category in @category_list)")
r_group = r.groupby('tconst', as_index=0).agg({'nconst': lambda x: list(x), 'category': lambda x: list(x)})
df = pd.concat([df, r_group]).groupby('tconst', as_index=0).agg(sum)
# r_group.index.name = 'tconst'
# df.update(r_group)
del r_group
print(cnt)
return df
@timing_decorator
def filter_rating(
self,
name: str,
tconst_list: list[str],
chunksize: int=None
) -> pd.DataFrame:
"""
Parameters
----------
name : str
Name of the ratings file to be read
tconst_list : list
List of tconst (It can be obtained by the get_tconst or read_tconst method).
chunksize : int
Chunk size for reading data (default is self.default_chunksize (default is 3_000_000)).
Returns
-------
DataFrame
A DataFrame object with columns tconst, and averageRating.
Raises
------
FileNotExistException
If the file does not exist
"""
self.is_not_exist(self.read_dir / name)
if chunksize is None:
chunksize = self.default_chunksize
df = pd.DataFrame({'tconst': tconst_list})
with pd.read_csv(
self.read_dir / name,
sep=r'\t',
chunksize=chunksize,
engine='python',
usecols=['tconst', 'averageRating', 'numVotes'],
dtype={'tconst': str, 'averageRating': np.float16, 'numVotes': int},
na_values='\\N') as reader:
for i, r in enumerate(reader):
df = pd.concat([df, r.query("tconst in @tconst_list")]).groupby('tconst', as_index=0).first()
return df
@timing_decorator
def filter_basic(
self,
name: str,
tconst_list: list[str],
chunksize: int=None
) -> pd.DataFrame:
"""
Parameters
----------
name : str
Name of the basics file to be read
tconst_list : list
List of tconst (It can be obtained by the get_tconst or read_tconst method).
chunksize : int
Chunk size for reading data (default is self.default_chunksize (default is 3_000_000)).
Returns
-------
DataFrame
A DataFrame object with columns tconst, startYear and genres.
Raises
------
FileNotExistException
If the file does not exist
"""
self.is_not_exist(self.read_dir / name)
if chunksize is None:
chunksize = self.default_chunksize
df = pd.DataFrame({'tconst': tconst_list})
with pd.read_csv(self.read_dir / name,
sep=r'\t',
chunksize=chunksize,
engine='python',
usecols=['tconst', 'startYear', 'genres'],
dtype={'tconst': str, 'startYear': 'Int16', 'genres': str},
na_values='\\N') as reader:
for i, r in enumerate(reader):
df = pd.concat([df, r.query("tconst in @tconst_list")]).groupby('tconst', as_index=0).first()
return df

20
Makefile Normal file
View File

@ -0,0 +1,20 @@
.SILENT:
proto-generate-go:
protoc \
-I./protos \
--go_out=./server/pkg/proto \
--go_opt=paths=source_relative \
--go-grpc_out=./server/pkg/proto \
--go-grpc_opt=paths=source_relative \
./protos/*.proto
proto-generate-py:
python -m grpc_tools.protoc \
-I./protos \
--python_out=./recommender/proto \
--pyi_out=./recommender/proto \
--grpc_python_out=./recommender/proto \
./protos/*.proto
proto-generate: proto-generate-go proto-generate-py

View File

@ -0,0 +1,4 @@
POSTGRES_USER=admin
POSTGRES_PASSWORD=admin
POSTGRES_DB=imdb
POSTGRES_PORT=5432

96
docker-compose.yml Normal file
View File

@ -0,0 +1,96 @@
services:
postgres:
image: postgres:17.0-alpine
container_name: "movier-postgres"
hostname: "movier-postgres"
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
env_file:
- ./config/postgres/.env
healthcheck:
test: [ "CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}" ]
interval: 5s
timeout: 5s
retries: 5
start_period: 10s
init: true
migrate:
image: migrate/migrate:v4.18.1
container_name: "movier-migrate"
hostname: "movier-migrate"
env_file:
- ./config/postgres/.env
environment:
- POSTGRES_HOST=movier-postgres
volumes:
- ./server/pkg/storage/postgresql/migrations:/migrations
init: true
depends_on:
postgres:
condition: service_healthy
entrypoint: [ "/bin/sh", "-c", "migrate -path=/migrations/ -database postgres://$${POSTGRES_USER}:$${POSTGRES_PASSWORD}@$${POSTGRES_HOST}:$${POSTGRES_PORT}/$${POSTGRES_DB}?sslmode=disable up" ]
etl:
image: movier:latest
build: ./server
container_name: "movier-etl"
hostname: "movier-etl"
env_file:
- ./config/postgres/.env
environment:
- POSTGRES_HOST=movier-postgres
init: true
depends_on:
postgres:
condition: service_healthy
migrate:
condition: service_completed_successfully
entrypoint: [ "/bin/sh", "-c", "./movier download && ./movier filter" ]
recommender:
build: ./recommender
container_name: "movier-recommender"
hostname: "movier-recommender"
env_file:
- ./config/postgres/.env
environment:
- POSTGRES_HOST=movier-postgres
- GRPC_PORT=50051
init: true
depends_on:
postgres:
condition: service_healthy
etl:
condition: service_completed_successfully
healthcheck:
test: [ "CMD-SHELL", "/bin/grpc_health_probe -addr=:$${GRPC_PORT}" ]
interval: 5s # how often to check the health
timeout: 5s # how long to wait for a response
retries: 5 # how many retries before marking as unhealthy
start_period: 10s # initial delay before starting health checks
entrypoint: [ "uv", "run", "main.py" ]
server:
image: movier:latest
container_name: "movier-server"
hostname: "movier-server"
ports:
- "8080:8080"
env_file:
- ./config/postgres/.env
environment:
- POSTGRES_HOST=movier-postgres
- BASE_URL=http://localhost:8080
- RECOMMENDER_SERVICE_GRPC_HOST=movier-recommender
- RECOMMENDER_SERVICE_GRPC_PORT=50051
init: true
depends_on:
recommender:
condition: service_healthy
entrypoint: [ "./movier", "serve" ]
volumes:
postgres_data:

View File

@ -1,3 +0,0 @@
class FileExistException(Exception): ...
class FileNotExistException(Exception): ...

View File

@ -1,517 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.metrics.pairwise import cosine_similarity\n",
"from ast import literal_eval\n",
"from functools import reduce"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"input_film = 'tt0816692'\n",
"\n",
"trained = {'basics': {}, 'principals': {}, 'ratings': {}}\n",
"\n",
"def drop_by_tconst(df, tconst: str, inplace=True) -> pd.DataFrame:\n",
" return df.drop(df[df['tconst'] == tconst].index, inplace=inplace)\n",
"\n",
"# Basics\n",
"\n",
"df = pd.read_csv('./IMDB_data_sets/filtered/basics.csv', dtype={'tconst': str, 'startYear': 'Int16', 'genres': str})\n",
"df['genres'].fillna('', inplace=True)\n",
"\n",
"cv = CountVectorizer(dtype=np.int8, token_pattern=\"(?u)[\\w'-]+\")\n",
"count_matrix = cv.fit_transform(df['genres'])\n",
"\n",
"trained['basics']['genres'] = pd.DataFrame(\n",
" {\n",
" 'genres': cosine_similarity(count_matrix[df[df['tconst'] == input_film].index[0]], count_matrix)[0],\n",
" 'tconst': df['tconst']\n",
" }\n",
" )\n",
"\n",
"drop_by_tconst(trained['basics']['genres'], input_film)\n",
"\n",
"trained['basics']['genres'].sort_values(ascending=False, by='genres', inplace=True, ignore_index=True)\n",
"trained['basics']['genres'].drop('genres', axis=1, inplace=True)\n",
"\n",
"\n",
"year = int(df[df['tconst'] == input_film].startYear.iloc[0])\n",
"\n",
"trained['basics']['years'] = pd.DataFrame(\n",
" {\n",
" 'years': df['startYear'],\n",
" 'tconst': df['tconst']\n",
" }\n",
")\n",
"\n",
"drop_by_tconst(trained['basics']['years'], input_film)\n",
"trained['basics']['years'].sort_values(by='years', key=lambda x: abs(year-x), inplace=True, ignore_index=True)\n",
"trained['basics']['years'].drop('years', axis=1, inplace=True)\n",
"trained['basics']['years'].reset_index(names='years_index', inplace=True)\n",
"\n",
"# Principals\n",
"\n",
"df = pd.read_csv('./IMDB_data_sets/filtered/principals.csv', dtype={'tconst': str, 'nconst': str}, usecols=['tconst', 'nconst'])\n",
"df.nconst = df.nconst.apply(lambda n: ','.join(literal_eval(n)))\n",
"\n",
"cv = CountVectorizer(dtype=np.int8, token_pattern=\"(?u)[\\w'-]+\")\n",
"count_matrix = cv.fit_transform(df['nconst'])\n",
"\n",
"trained['principals']['nconst'] = pd.DataFrame(\n",
" {\n",
" 'nconst': cosine_similarity(count_matrix[df[df['tconst'] == input_film].index[0]], count_matrix)[0],\n",
" 'tconst': df['tconst']\n",
" }\n",
" )\n",
"\n",
"drop_by_tconst(trained['principals']['nconst'], input_film)\n",
"trained['principals']['nconst'].sort_values(ascending=False, by='nconst', inplace=True, ignore_index=True)\n",
"trained['principals']['nconst'].drop('nconst', axis=1, inplace=True)\n",
"trained['principals']['nconst'].reset_index(names='nconst_index', inplace=True)\n",
"\n",
"# Ratings\n",
"\n",
"df = pd.read_csv('./IMDB_data_sets/filtered/ratings.csv', dtype={'tconst': str, 'averageRating': float, 'numVotes': 'Int64'})\n",
"\n",
"rating = float(df[df['tconst'] == input_film].averageRating.iloc[0])\n",
"votes = int(df[df['tconst'] == input_film].numVotes.iloc[0])\n",
"\n",
"drop_by_tconst(df, input_film)\n",
"\n",
"trained['ratings']['ratings'] = df.sort_values(by='averageRating', key=lambda x: abs(rating-x), ignore_index=True)\n",
"trained['ratings']['ratings'].drop(['averageRating', 'numVotes'], axis=1, inplace=True)\n",
"trained['ratings']['ratings'].reset_index(names='ratings_index', inplace=True)\n",
"\n",
"df.drop('averageRating', axis=1, inplace=True)\n",
"\n",
"trained['ratings']['votes'] = df.sort_values(by='numVotes', key=lambda x: abs(votes-x), ignore_index=True)\n",
"trained['ratings']['votes'].drop('numVotes', axis=1, inplace=True)\n",
"trained['ratings']['votes'].reset_index(names='votes_index', inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"merged = reduce(lambda left, right: pd.merge(\n",
" left,\n",
" right,\n",
" on=['tconst'],\n",
" how='outer'\n",
" ), [\n",
" trained['basics']['genres'],\n",
" trained['basics']['years'],\n",
" trained['principals']['nconst'],\n",
" trained['ratings']['ratings'],\n",
" trained['ratings']['votes']\n",
" ])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tconst</th>\n",
" <th>years_index</th>\n",
" <th>nconst_index</th>\n",
" <th>ratings_index</th>\n",
" <th>votes_index</th>\n",
" <th>average</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>tt4255564</td>\n",
" <td>744690</td>\n",
" <td>297616</td>\n",
" <td>669670</td>\n",
" <td>669670</td>\n",
" <td>476329.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>tt2203897</td>\n",
" <td>27293</td>\n",
" <td>9705</td>\n",
" <td>602978</td>\n",
" <td>602978</td>\n",
" <td>248591.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>tt0355627</td>\n",
" <td>344502</td>\n",
" <td>708640</td>\n",
" <td>318038</td>\n",
" <td>205177</td>\n",
" <td>315271.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>tt15387378</td>\n",
" <td>710214</td>\n",
" <td>98486</td>\n",
" <td>540358</td>\n",
" <td>540358</td>\n",
" <td>377883.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>tt5155340</td>\n",
" <td>72975</td>\n",
" <td>386406</td>\n",
" <td>103102</td>\n",
" <td>152733</td>\n",
" <td>143044.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>777383</th>\n",
" <td>tt1230211</td>\n",
" <td>189871</td>\n",
" <td>599826</td>\n",
" <td>278434</td>\n",
" <td>25539</td>\n",
" <td>374210.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>777384</th>\n",
" <td>tt12302076</td>\n",
" <td>149946</td>\n",
" <td>599825</td>\n",
" <td>483066</td>\n",
" <td>483066</td>\n",
" <td>498657.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>777385</th>\n",
" <td>tt1230206</td>\n",
" <td>189885</td>\n",
" <td>599823</td>\n",
" <td>301969</td>\n",
" <td>25847</td>\n",
" <td>378981.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>777386</th>\n",
" <td>tt1230179</td>\n",
" <td>255769</td>\n",
" <td>599809</td>\n",
" <td>483065</td>\n",
" <td>483065</td>\n",
" <td>519818.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>777387</th>\n",
" <td>tt9916754</td>\n",
" <td>39373</td>\n",
" <td>777387</td>\n",
" <td>777387</td>\n",
" <td>777387</td>\n",
" <td>629784.2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>777388 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" tconst years_index nconst_index ratings_index votes_index \n",
"0 tt4255564 744690 297616 669670 669670 \\\n",
"1 tt2203897 27293 9705 602978 602978 \n",
"2 tt0355627 344502 708640 318038 205177 \n",
"3 tt15387378 710214 98486 540358 540358 \n",
"4 tt5155340 72975 386406 103102 152733 \n",
"... ... ... ... ... ... \n",
"777383 tt1230211 189871 599826 278434 25539 \n",
"777384 tt12302076 149946 599825 483066 483066 \n",
"777385 tt1230206 189885 599823 301969 25847 \n",
"777386 tt1230179 255769 599809 483065 483065 \n",
"777387 tt9916754 39373 777387 777387 777387 \n",
"\n",
" average \n",
"0 476329.2 \n",
"1 248591.0 \n",
"2 315271.8 \n",
"3 377883.8 \n",
"4 143044.0 \n",
"... ... \n",
"777383 374210.6 \n",
"777384 498657.4 \n",
"777385 378981.8 \n",
"777386 519818.8 \n",
"777387 629784.2 \n",
"\n",
"[777388 rows x 6 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"merged['average'] = (merged.index*20 + merged.years_index*20 + merged.nconst_index*20 + merged.ratings_index*20 + merged.votes_index*20) / (5*20)\n",
"merged"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tconst</th>\n",
" <th>years_index</th>\n",
" <th>nconst_index</th>\n",
" <th>ratings_index</th>\n",
" <th>votes_index</th>\n",
" <th>average</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8695</th>\n",
" <td>tt2338151</td>\n",
" <td>7775</td>\n",
" <td>12586</td>\n",
" <td>23860</td>\n",
" <td>1151</td>\n",
" <td>10813.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>tt3659388</td>\n",
" <td>49654</td>\n",
" <td>98</td>\n",
" <td>25758</td>\n",
" <td>79</td>\n",
" <td>15120.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8501</th>\n",
" <td>tt1754656</td>\n",
" <td>30993</td>\n",
" <td>116</td>\n",
" <td>46193</td>\n",
" <td>3247</td>\n",
" <td>17810.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7374</th>\n",
" <td>tt2103281</td>\n",
" <td>11453</td>\n",
" <td>27910</td>\n",
" <td>49618</td>\n",
" <td>347</td>\n",
" <td>19340.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7549</th>\n",
" <td>tt2358592</td>\n",
" <td>54985</td>\n",
" <td>17633</td>\n",
" <td>12668</td>\n",
" <td>9182</td>\n",
" <td>20403.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>758545</th>\n",
" <td>tt13334656</td>\n",
" <td>700841</td>\n",
" <td>672174</td>\n",
" <td>500543</td>\n",
" <td>500543</td>\n",
" <td>626529.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>758577</th>\n",
" <td>tt13336544</td>\n",
" <td>700845</td>\n",
" <td>672184</td>\n",
" <td>500576</td>\n",
" <td>500576</td>\n",
" <td>626551.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>758587</th>\n",
" <td>tt13335546</td>\n",
" <td>700843</td>\n",
" <td>672231</td>\n",
" <td>500564</td>\n",
" <td>500564</td>\n",
" <td>626557.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>758590</th>\n",
" <td>tt13335152</td>\n",
" <td>700842</td>\n",
" <td>672247</td>\n",
" <td>500557</td>\n",
" <td>500557</td>\n",
" <td>626558.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>777387</th>\n",
" <td>tt9916754</td>\n",
" <td>39373</td>\n",
" <td>777387</td>\n",
" <td>777387</td>\n",
" <td>777387</td>\n",
" <td>629784.2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>777388 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" tconst years_index nconst_index ratings_index votes_index \n",
"8695 tt2338151 7775 12586 23860 1151 \\\n",
"14 tt3659388 49654 98 25758 79 \n",
"8501 tt1754656 30993 116 46193 3247 \n",
"7374 tt2103281 11453 27910 49618 347 \n",
"7549 tt2358592 54985 17633 12668 9182 \n",
"... ... ... ... ... ... \n",
"758545 tt13334656 700841 672174 500543 500543 \n",
"758577 tt13336544 700845 672184 500576 500576 \n",
"758587 tt13335546 700843 672231 500564 500564 \n",
"758590 tt13335152 700842 672247 500557 500557 \n",
"777387 tt9916754 39373 777387 777387 777387 \n",
"\n",
" average \n",
"8695 10813.4 \n",
"14 15120.6 \n",
"8501 17810.0 \n",
"7374 19340.4 \n",
"7549 20403.4 \n",
"... ... \n",
"758545 626529.2 \n",
"758577 626551.6 \n",
"758587 626557.8 \n",
"758590 626558.6 \n",
"777387 629784.2 \n",
"\n",
"[777388 rows x 6 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"merged.sort_values(by='average')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "08dff0a1cb2e37beec5bc340112a669cde11fa0a1a1e2fde92884d26090bd6fc"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

23
main.py
View File

@ -1,23 +0,0 @@
from IMDB_DTO import DTO
from time import time
import pandas as pd
if __name__ == '__main__':
start_time = time()
dto = DTO()
tconst_list = dto.filter_tconst(name='title.basics.tsv')
dto.df2csv(df=pd.DataFrame({'tconst': tconst_list}), name='tconst.csv')
tconst_list = dto.get_tconst('tconst.csv')
df = dto.filter_basic(name='title.basics.tsv', tconst_list=tconst_list)
dto.df2csv(df=df, name='basics.csv')
del df
df = dto.filter_principal(name='title.principals.tsv', tconst_list=tconst_list)
dto.df2csv(df=df, name='principals_comma.csv', overwrite=True)
del df
df = dto.filter_rating(name='title.ratings.tsv', tconst_list=tconst_list)
dto.df2csv(df=df, name='ratings.csv', overwrite=1)
del df

53
protos/recommender.proto Normal file
View File

@ -0,0 +1,53 @@
syntax = "proto3";
package recommender;
option go_package = "github.com/aykhans/movier/server/pkg/proto";
service Recommender {
rpc GetRecommendations(Request) returns (Response) {}
}
message Filter {
oneof min_votes_oneof {
uint32 min_votes = 1;
}
oneof max_votes_oneof {
uint32 max_votes = 2;
}
oneof min_year_oneof {
uint32 min_year = 3;
}
oneof max_year_oneof {
uint32 max_year = 4;
}
oneof min_rating_oneof {
float min_rating = 5;
}
oneof max_rating_oneof {
float max_rating = 6;
}
}
message Weight {
uint32 year = 1;
uint32 rating = 2;
uint32 genres = 3;
uint32 nconsts = 4;
}
message Request {
repeated string tconsts = 1;
uint32 n = 2;
Filter filter = 3;
Weight weight = 4;
}
message Response {
repeated RecommendedMovie movies = 1;
}
message RecommendedMovie {
string tconst = 1;
repeated string weights = 2;
}

View File

@ -0,0 +1,4 @@
.venv
.ipynb_checkpoints
__pycache__
*.ipynb

4
recommender/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
.venv
.ipynb_checkpoints
__pycache__
*.ipynb

View File

@ -0,0 +1 @@
3.12

19
recommender/Dockerfile Normal file
View File

@ -0,0 +1,19 @@
FROM python:3.12.3-slim-bookworm
ENV UV_COMPILE_BYTECODE=1
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN apt-get update && apt-get install -y curl
ENV GRPC_HEALTH_PROBE_VERSION=v0.4.35
RUN curl -L -o /bin/grpc_health_probe \
https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-amd64 && \
chmod +x /bin/grpc_health_probe
WORKDIR /app
COPY . .
RUN uv sync --frozen

38
recommender/config.py Normal file
View File

@ -0,0 +1,38 @@
import os
def get_postgres_dsn():
user = os.getenv('POSTGRES_USER', None)
if user is None:
raise ValueError('POSTGRES_USER is not set')
password = os.getenv('POSTGRES_PASSWORD', None)
if password is None:
raise ValueError('POSTGRES_PASSWORD is not set')
host = os.getenv('POSTGRES_HOST', None)
if host is None:
raise ValueError('POSTGRES_HOST is not set')
port = os.getenv('POSTGRES_PORT', None)
if port is None:
raise ValueError('POSTGRES_PORT is not set')
try:
port = int(port)
except ValueError:
raise ValueError('POSTGRES_PORT is not an integer')
dbname = os.getenv('POSTGRES_DB', None)
if dbname is None:
raise ValueError('POSTGRES_DB is not set')
return f'postgres://{user}:{password}@{host}:{port}/{dbname}?sslmode=disable'
def get_grpc_port():
port = os.getenv('GRPC_PORT', None)
if port is None:
raise ValueError('GRPC_PORT is not set')
try:
port = int(port)
except ValueError:
raise ValueError('GRPC_PORT is not an integer')
return port

114
recommender/main.py Normal file
View File

@ -0,0 +1,114 @@
from sys import path
path.append('./proto')
from concurrent import futures
from time import sleep
import threading
from recommend import Recommender, Weight, Filter
from config import get_postgres_dsn, get_grpc_port
import psycopg2
from proto import recommender_pb2, recommender_pb2_grpc
import grpc
from grpc_reflection.v1alpha import reflection
from grpc_health.v1 import health
from grpc_health.v1 import health_pb2
from grpc_health.v1 import health_pb2_grpc
postgres_dsn = get_postgres_dsn()
class RecommenderServicer(recommender_pb2_grpc.RecommenderServicer):
def GetRecommendations(self, request: recommender_pb2.Request, context):
try:
recommender = Recommender(
filter_=Filter(
min_votes=request.filter.min_votes if request.filter.HasField('min_votes_oneof') else None,
max_votes=request.filter.max_votes if request.filter.HasField('max_votes_oneof') else None,
min_year=request.filter.min_year if request.filter.HasField('min_year_oneof') else None,
max_year=request.filter.max_year if request.filter.HasField('max_year_oneof') else None,
min_rating=request.filter.min_rating if request.filter.HasField('min_rating_oneof') else None,
max_rating=request.filter.max_rating if request.filter.HasField('max_rating_oneof') else None
),
weight=Weight(
year=request.weight.year,
rating=request.weight.rating,
genres=request.weight.genres,
nconsts=request.weight.nconsts
)
)
except ValueError as e:
context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
context.set_details(str(e))
return recommender_pb2.Response()
except Exception as e:
context.set_code(grpc.StatusCode.INTERNAL)
context.set_details(str(e))
return recommender_pb2.Response()
with psycopg2.connect(dsn=postgres_dsn) as conn:
try:
data = recommender.get_recommendations(conn, request.tconsts, request.n)
except ValueError as e:
context.set_code(grpc.StatusCode.NOT_FOUND)
context.set_details(str(e))
return recommender_pb2.Response()
except Exception as e:
context.set_code(grpc.StatusCode.INTERNAL)
context.set_details(str(e))
return recommender_pb2.Response()
movies = []
for k, v in data.items():
movies.append(
recommender_pb2.RecommendedMovie(
tconst=k,
weights=v
)
)
return recommender_pb2.Response(movies=movies)
def _toggle_health(health_servicer: health.HealthServicer, service: str):
next_status = health_pb2.HealthCheckResponse.SERVING
while True:
if next_status == health_pb2.HealthCheckResponse.SERVING:
next_status = health_pb2.HealthCheckResponse.NOT_SERVING
else:
next_status = health_pb2.HealthCheckResponse.SERVING
health_servicer.set(service, next_status)
sleep(5)
def _configure_health_server(server: grpc.Server):
health_servicer = health.HealthServicer(
experimental_non_blocking=True,
experimental_thread_pool=futures.ThreadPoolExecutor(max_workers=10),
)
health_pb2_grpc.add_HealthServicer_to_server(health_servicer, server)
toggle_health_status_thread = threading.Thread(
target=_toggle_health,
args=(health_servicer, "recommender.Recommender"),
daemon=True,
)
toggle_health_status_thread.start()
def serve():
server = grpc.server(futures.ThreadPoolExecutor(max_workers=100))
recommender_pb2_grpc.add_RecommenderServicer_to_server(RecommenderServicer(), server)
SERVICE_NAMES = (
recommender_pb2.DESCRIPTOR.services_by_name["Recommender"].full_name,
reflection.SERVICE_NAME,
)
reflection.enable_server_reflection(SERVICE_NAMES, server)
server.add_insecure_port(f'[::]:{get_grpc_port()}')
_configure_health_server(server)
server.start()
server.wait_for_termination()
if __name__ == '__main__':
try:
serve()
except KeyboardInterrupt:
print("Shutting down server")

View File

@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# NO CHECKED-IN PROTOBUF GENCODE
# source: recommender.proto
# Protobuf Python Version: 5.27.2
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import runtime_version as _runtime_version
from google.protobuf import symbol_database as _symbol_database
from google.protobuf.internal import builder as _builder
_runtime_version.ValidateProtobufRuntimeVersion(
_runtime_version.Domain.PUBLIC,
5,
27,
2,
'',
'recommender.proto'
)
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x11recommender.proto\x12\x0brecommender\"\xf8\x01\n\x06\x46ilter\x12\x13\n\tmin_votes\x18\x01 \x01(\rH\x00\x12\x13\n\tmax_votes\x18\x02 \x01(\rH\x01\x12\x12\n\x08min_year\x18\x03 \x01(\rH\x02\x12\x12\n\x08max_year\x18\x04 \x01(\rH\x03\x12\x14\n\nmin_rating\x18\x05 \x01(\x02H\x04\x12\x14\n\nmax_rating\x18\x06 \x01(\x02H\x05\x42\x11\n\x0fmin_votes_oneofB\x11\n\x0fmax_votes_oneofB\x10\n\x0emin_year_oneofB\x10\n\x0emax_year_oneofB\x12\n\x10min_rating_oneofB\x12\n\x10max_rating_oneof\"G\n\x06Weight\x12\x0c\n\x04year\x18\x01 \x01(\r\x12\x0e\n\x06rating\x18\x02 \x01(\r\x12\x0e\n\x06genres\x18\x03 \x01(\r\x12\x0f\n\x07nconsts\x18\x04 \x01(\r\"o\n\x07Request\x12\x0f\n\x07tconsts\x18\x01 \x03(\t\x12\t\n\x01n\x18\x02 \x01(\r\x12#\n\x06\x66ilter\x18\x03 \x01(\x0b\x32\x13.recommender.Filter\x12#\n\x06weight\x18\x04 \x01(\x0b\x32\x13.recommender.Weight\"9\n\x08Response\x12-\n\x06movies\x18\x01 \x03(\x0b\x32\x1d.recommender.RecommendedMovie\"3\n\x10RecommendedMovie\x12\x0e\n\x06tconst\x18\x01 \x01(\t\x12\x0f\n\x07weights\x18\x02 \x03(\t2R\n\x0bRecommender\x12\x43\n\x12GetRecommendations\x12\x14.recommender.Request\x1a\x15.recommender.Response\"\x00\x42,Z*github.com/aykhans/movier/server/pkg/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'recommender_pb2', _globals)
if not _descriptor._USE_C_DESCRIPTORS:
_globals['DESCRIPTOR']._loaded_options = None
_globals['DESCRIPTOR']._serialized_options = b'Z*github.com/aykhans/movier/server/pkg/proto'
_globals['_FILTER']._serialized_start=35
_globals['_FILTER']._serialized_end=283
_globals['_WEIGHT']._serialized_start=285
_globals['_WEIGHT']._serialized_end=356
_globals['_REQUEST']._serialized_start=358
_globals['_REQUEST']._serialized_end=469
_globals['_RESPONSE']._serialized_start=471
_globals['_RESPONSE']._serialized_end=528
_globals['_RECOMMENDEDMOVIE']._serialized_start=530
_globals['_RECOMMENDEDMOVIE']._serialized_end=581
_globals['_RECOMMENDER']._serialized_start=583
_globals['_RECOMMENDER']._serialized_end=665
# @@protoc_insertion_point(module_scope)

View File

@ -0,0 +1,60 @@
from google.protobuf.internal import containers as _containers
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union
DESCRIPTOR: _descriptor.FileDescriptor
class Filter(_message.Message):
__slots__ = ("min_votes", "max_votes", "min_year", "max_year", "min_rating", "max_rating")
MIN_VOTES_FIELD_NUMBER: _ClassVar[int]
MAX_VOTES_FIELD_NUMBER: _ClassVar[int]
MIN_YEAR_FIELD_NUMBER: _ClassVar[int]
MAX_YEAR_FIELD_NUMBER: _ClassVar[int]
MIN_RATING_FIELD_NUMBER: _ClassVar[int]
MAX_RATING_FIELD_NUMBER: _ClassVar[int]
min_votes: int
max_votes: int
min_year: int
max_year: int
min_rating: float
max_rating: float
def __init__(self, min_votes: _Optional[int] = ..., max_votes: _Optional[int] = ..., min_year: _Optional[int] = ..., max_year: _Optional[int] = ..., min_rating: _Optional[float] = ..., max_rating: _Optional[float] = ...) -> None: ...
class Weight(_message.Message):
__slots__ = ("year", "rating", "genres", "nconsts")
YEAR_FIELD_NUMBER: _ClassVar[int]
RATING_FIELD_NUMBER: _ClassVar[int]
GENRES_FIELD_NUMBER: _ClassVar[int]
NCONSTS_FIELD_NUMBER: _ClassVar[int]
year: int
rating: int
genres: int
nconsts: int
def __init__(self, year: _Optional[int] = ..., rating: _Optional[int] = ..., genres: _Optional[int] = ..., nconsts: _Optional[int] = ...) -> None: ...
class Request(_message.Message):
__slots__ = ("tconsts", "n", "filter", "weight")
TCONSTS_FIELD_NUMBER: _ClassVar[int]
N_FIELD_NUMBER: _ClassVar[int]
FILTER_FIELD_NUMBER: _ClassVar[int]
WEIGHT_FIELD_NUMBER: _ClassVar[int]
tconsts: _containers.RepeatedScalarFieldContainer[str]
n: int
filter: Filter
weight: Weight
def __init__(self, tconsts: _Optional[_Iterable[str]] = ..., n: _Optional[int] = ..., filter: _Optional[_Union[Filter, _Mapping]] = ..., weight: _Optional[_Union[Weight, _Mapping]] = ...) -> None: ...
class Response(_message.Message):
__slots__ = ("movies",)
MOVIES_FIELD_NUMBER: _ClassVar[int]
movies: _containers.RepeatedCompositeFieldContainer[RecommendedMovie]
def __init__(self, movies: _Optional[_Iterable[_Union[RecommendedMovie, _Mapping]]] = ...) -> None: ...
class RecommendedMovie(_message.Message):
__slots__ = ("tconst", "weights")
TCONST_FIELD_NUMBER: _ClassVar[int]
WEIGHTS_FIELD_NUMBER: _ClassVar[int]
tconst: str
weights: _containers.RepeatedScalarFieldContainer[str]
def __init__(self, tconst: _Optional[str] = ..., weights: _Optional[_Iterable[str]] = ...) -> None: ...

View File

@ -0,0 +1,97 @@
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
"""Client and server classes corresponding to protobuf-defined services."""
import grpc
import warnings
import recommender_pb2 as recommender__pb2
GRPC_GENERATED_VERSION = '1.67.0'
GRPC_VERSION = grpc.__version__
_version_not_supported = False
try:
from grpc._utilities import first_version_is_lower
_version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
except ImportError:
_version_not_supported = True
if _version_not_supported:
raise RuntimeError(
f'The grpc package installed is at version {GRPC_VERSION},'
+ f' but the generated code in recommender_pb2_grpc.py depends on'
+ f' grpcio>={GRPC_GENERATED_VERSION}.'
+ f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
+ f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
)
class RecommenderStub(object):
"""Missing associated documentation comment in .proto file."""
def __init__(self, channel):
"""Constructor.
Args:
channel: A grpc.Channel.
"""
self.GetRecommendations = channel.unary_unary(
'/recommender.Recommender/GetRecommendations',
request_serializer=recommender__pb2.Request.SerializeToString,
response_deserializer=recommender__pb2.Response.FromString,
_registered_method=True)
class RecommenderServicer(object):
"""Missing associated documentation comment in .proto file."""
def GetRecommendations(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def add_RecommenderServicer_to_server(servicer, server):
rpc_method_handlers = {
'GetRecommendations': grpc.unary_unary_rpc_method_handler(
servicer.GetRecommendations,
request_deserializer=recommender__pb2.Request.FromString,
response_serializer=recommender__pb2.Response.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'recommender.Recommender', rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))
server.add_registered_method_handlers('recommender.Recommender', rpc_method_handlers)
# This class is part of an EXPERIMENTAL API.
class Recommender(object):
"""Missing associated documentation comment in .proto file."""
@staticmethod
def GetRecommendations(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/recommender.Recommender/GetRecommendations',
recommender__pb2.Request.SerializeToString,
recommender__pb2.Response.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)

View File

@ -0,0 +1,15 @@
[project]
name = "movier"
version = "0.1.0"
description = "Movier"
readme = "README.md"
requires-python = "==3.12.3"
dependencies = [
"grpcio>=1.67.0",
"grpcio-reflection==1.67.0",
"grpcio-tools==1.67.0",
"polars==1.12.0",
"psycopg2-binary==2.9.10",
"scikit-learn==1.5.2",
"grpcio-health-checking==1.67.1",
]

441
recommender/recommend.py Normal file
View File

@ -0,0 +1,441 @@
from typing import Any
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import polars as pl
from dataclasses import dataclass
@dataclass
class Filter:
min_votes: int = None
max_votes: int = None
min_year: int = None
max_year: int = None
min_rating: float = None
max_rating: float = None
def __post_init__(self):
if self.min_votes is not None and self.min_votes < 0:
raise ValueError("min_votes should be greater than or equal to 0")
if self.max_votes is not None and self.max_votes < 0:
raise ValueError("max_votes should be greater than or equal to 0")
if self.min_votes is not None and self.max_votes is not None and self.min_votes > self.max_votes:
raise ValueError("min_votes should be less than or equal to max_votes")
if self.min_year is not None and self.min_year < 0:
raise ValueError("min_year should be greater than or equal to 0")
if self.max_year is not None and self.max_year < 0:
raise ValueError("max_year should be greater than or equal to 0")
if self.min_year is not None and self.max_year is not None and self.min_year > self.max_year:
raise ValueError("min_year should be less than or equal to max_year")
if self.min_rating is not None and self.min_rating < 0:
raise ValueError("min_rating should be greater than or equal to 0")
if self.max_rating is not None and self.max_rating < 0:
raise ValueError("max_rating should be greater than or equal to 0")
if self.min_rating is not None and self.max_rating is not None and self.min_rating > self.max_rating:
raise ValueError("min_rating should be less than or equal to max_rating")
@dataclass
class Weight:
year: int = 100
rating: int = 100
genres: int = 100
nconsts: int = 100
def __post_init__(self):
total_sum = 0
total_count = 0
for k, v in self.__dict__.items():
if v < 0:
raise ValueError(f'Weight for {k} must be greater than or equal to 0, got {v}')
if v > 0:
total_sum += v
total_count += 1
if total_sum < 100:
raise ValueError(f'Total sum of weights must be at least 100, got {total_sum}')
if total_count*100 != total_sum:
raise ValueError(f'Total sum of weights must be {total_count*100}, got {total_sum}')
class Recommender:
def __init__(
self,
filter_: Filter = Filter(),
weight: Weight = Weight()
) -> None:
self.filter = filter_
self.weight = weight
self.sql_where_clause = ''
self.sql_where_clause = self.add_sql_where_clause(self.sql_where_clause, f"genres != ''")
self.sql_where_clause = self.add_sql_where_clause(self.sql_where_clause, f"nconsts != ''")
if filter_.min_votes:
self.sql_where_clause = self.add_sql_where_clause(self.sql_where_clause, f'votes >= {filter_.min_votes}')
if filter_.max_votes:
self.sql_where_clause = self.add_sql_where_clause(self.sql_where_clause, f'votes <= {filter_.max_votes}')
if filter_.min_year:
self.sql_where_clause = self.add_sql_where_clause(self.sql_where_clause, f'year >= {filter_.min_year}')
if filter_.max_year:
self.sql_where_clause = self.add_sql_where_clause(self.sql_where_clause, f'year <= {filter_.max_year}')
if filter_.min_rating:
self.sql_where_clause = self.add_sql_where_clause(self.sql_where_clause, f'rating >= {filter_.min_rating}')
if filter_.max_rating:
self.sql_where_clause = self.add_sql_where_clause(self.sql_where_clause, f'rating <= {filter_.max_rating}')
def add_sql_where_clause(self, old: str, new: str) -> None:
return f'WHERE {new}' if old == '' else f'{old} AND {new}'
def get_ordered_year_from_sql(self, conn, reference_year: int) -> pl.DataFrame:
"""
Args
----
conn: psycopg2 connection object
reference_year: int - year to sort by closest
Returns
-------
DataFrame:
First sorted by closest year, then by number of votes (descending).
| year_index (uint32) | tconst (str) |
| --- | --- |
| 0 | tt0000001 |
| 1 | tt0000002 |
| 2 | tt0000003 |
| ... | ... |
"""
return pl.read_database(
f"""
SELECT tconst
FROM imdb
{self.sql_where_clause}
ORDER BY ABS(year - {reference_year}), votes DESC
""",
conn, schema_overrides={'tconst': str}
).with_row_index('year_index')
def get_ordered_rating_from_sql(self, conn, reference_rating: int) -> pl.DataFrame:
"""
Args
----
conn: psycopg2 connection object
reference_rating: int - rating to sort by closest
Returns
-------
DataFrame:
First sorted by closest rating, then by number of votes (descending).
| rating_index (uint32) | tconst (str) |
| --- | --- |
| 0 | tt0000001 |
| 1 | tt0000002 |
| 2 | tt0000003 |
| ... | ... |
"""
return pl.read_database(
f"""
SELECT tconst
FROM imdb
{self.sql_where_clause}
ORDER BY ABS(rating - {reference_rating}), votes DESC
""",
conn, schema_overrides={'tconst': str}
).with_row_index('rating_index')
def get_ordered_genres_from_df(self, df: pl.DataFrame, reference_genres: str) -> pl.DataFrame:
"""
Args
----
df: DataFrame
| tconst (str) | genres (str) | votes (uint32) |
| --- | --- | --- |
| tt0000001 | Drama, Romance | 123 |
| tt0000002 | Comedy, Drama | 456 |
| tt0000003 | Action, Drama | 789 |
| ... | ... | ... |
reference_genres: str - genres to calculate cosine similarities
Returns
-------
DataFrame:
First sorted by cosine similarities genres (descending) and then by number of votes (descending).
| genres_index (uint32) | tconst (str) |
| --- | --- |
| 0 | tt0000001 |
| 1 | tt0000002 |
| 2 | tt0000003 |
| ... | ... |
"""
df = df.with_row_index('genres_index')
genres_cv = CountVectorizer(dtype=np.uint8, token_pattern=r"(?u)[\w'-]+")
genres_count_matrix = genres_cv.fit_transform(df['genres'])
genres_sims = cosine_similarity(genres_cv.transform([reference_genres]), genres_count_matrix)[0]
return pl.DataFrame(
{
'tconst': df['tconst'],
'cosine_similarity': genres_sims,
'votes': df['votes']
}, schema={'tconst': str, 'cosine_similarity': pl.Float32, 'votes': pl.UInt32}
).\
sort(['cosine_similarity', 'votes'], descending=True).\
drop(['cosine_similarity', 'votes']).\
with_row_index('genres_index')
def get_ordered_nconsts_from_df(self, df: pl.DataFrame, reference_nconsts: str) -> pl.DataFrame:
"""
Args
----
df: DataFrame
| tconst (str) | nconsts (str) | votes (uint32) |
| --- | --- | --- |
| tt0000001 | nm0000001, nm0000002 | 123 |
| tt0000002 | nm0000001, nm0000003 | 456 |
| tt0000003 | nm0000004, nm0000002 | 789 |
| ... | ... | ... |
reference_nconsts: str - nconsts to calculate cosine similarities
Returns
-------
df: DataFrame
First sorted by cosine similarities of nconsts (descending) and then by number of votes (descending).
| nconsts_index (uint32) | tconst (str) |
| --- | --- |
| 0 | tt0000001 |
| 1 | tt0000002 |
| 2 | tt0000003 |
| ... | ... |
"""
df = df.with_row_index('nconsts_index')
nconsts_cv = CountVectorizer(dtype=np.uint8, token_pattern=r"(?u)[\w'-]+")
nconsts_count_matrix = nconsts_cv.fit_transform(df['nconsts'])
nconsts_sims = cosine_similarity(nconsts_cv.transform([reference_nconsts]), nconsts_count_matrix)[0]
return pl.DataFrame(
{
'tconst': df['tconst'],
'cosine_similarity': nconsts_sims,
'votes': df['votes']
}, schema={'tconst': str, 'cosine_similarity': pl.Float32, 'votes': pl.UInt32}
).\
sort(['cosine_similarity', 'votes'], descending=True).\
drop(['cosine_similarity', 'votes']).\
with_row_index('nconsts_index')
def get_main_df(self, conn) -> pl.DataFrame:
"""
Args
----
conn: psycopg2 connection object
Returns
-------
DataFrame:
| tconst (str) | genres (str) | nconsts (str) | votes (uint32) |
| --- | --- | --- | --- |
| tt0000001 | Drama, Romance | nm0000001, nm0000002 | 123 |
| tt0000002 | Comedy, Drama | nm0000001, nm0000003 | 456 |
| tt0000003 | Action, Drama | nm0000004, nm0000002 | 789 |
| ... | ... | ... | ... |
"""
return pl.read_database(
f"""
SELECT tconst, genres, nconsts, votes
FROM imdb
{self.sql_where_clause}
""", conn, schema_overrides={'tconst': str, 'genres': str, 'nconsts': str, 'votes': pl.UInt32}
)
def get_row_by_tconst(self, conn, tconst: str) -> dict[str, Any]:
"""
Args
----
conn: psycopg2 connection object
tconst: str - tconst to get row from database
Returns
-------
dict: row from database
{
'tconst': str,
'year': int,
'genres': str,
'nconsts': str,
'rating': float,
'votes': int
}
Raises
------
ValueError: if tconst is not found in database
"""
with conn.cursor() as cursor:
cursor.execute(
f"""
SELECT tconst, year, genres, nconsts, rating, votes
FROM imdb
WHERE tconst = '{tconst}'
"""
)
row = cursor.fetchone()
if row is None:
raise ValueError(f"tconst '{tconst}' not found")
return {cursor.description[i][0]: value for i, value in enumerate(row)}
def set_average(self, column_name: str, features: list[str], merged_df: pl.DataFrame) -> pl.DataFrame:
"""
Args
----
column_name: str - name of the column to store the average
features: list[str] - list of features to calculate the average
merged_df: DataFrame - merged DataFrame of all features
Returns
-------
DataFrame: Same DataFrame with the argument column_name added to it with the average of all features
"""
average = merged_df[f'{features[0]}_index'] * self.weight.__getattribute__(features[0])
for feature in features[1:]:
average += merged_df[f'{feature}_index'] * self.weight.__getattribute__(feature)
return merged_df.with_columns(**{column_name: (average / (len(features) * 100))})
def get_single_recommendation(self, conn, tconst: str, features: list[str]) -> pl.DataFrame:
"""
Args
----
conn: psycopg2 connection object
tconst: str - tconst to get recommendations
features: list[str] - list of features to calculate the average
Returns
-------
DataFrame: DataFrame with the average of all features
Raises
------
ValueError: if no recommendations found
"""
reference_row = self.get_row_by_tconst(conn, tconst)
trained: dict[str, pl.DataFrame] = {}
if 'year' in features:
df = self.get_ordered_year_from_sql(conn, reference_year=reference_row['year'])
if len(df) > 0:
trained['year'] = df
if 'rating' in features:
df = self.get_ordered_rating_from_sql(conn, reference_rating=reference_row['rating'])
if len(df) > 0:
trained['rating'] = df
if 'genres' in features or 'nconsts' in features:
main_df = self.get_main_df(conn)
if len(main_df) > 0:
if 'genres' in features:
trained['genres'] = self.get_ordered_genres_from_df(
pl.DataFrame(
{
'tconst': main_df['tconst'],
'genres': main_df['genres'],
'votes': main_df['votes']
}
), reference_genres=reference_row['genres']
)
if 'nconsts' in features:
trained['nconsts'] = self.get_ordered_nconsts_from_df(
pl.DataFrame(
{
'tconst': main_df['tconst'],
'nconsts': main_df['nconsts'],
'votes': main_df['votes']
}
), reference_nconsts=reference_row['nconsts']
)
if len(trained) == 0:
raise ValueError("No recommendations found, try changing the filter or weight")
if len(features) > 1:
merged = pl.concat(trained.values(), how='align')
return self.set_average(
"average", features=features, merged_df=merged
)
else:
trained_df = trained[features[0]]
return trained_df.with_columns(
average=trained_df[f'{features[0]}_index']
)
def get_recommendations(self, conn, tconsts: list[str], n: int = 5) -> dict[str, list[str]]:
"""
Args
----
conn: psycopg2 connection object
tconsts: list[str] - list of tconsts to get recommendations
n: int - number of recommendations to get
Returns
-------
list[dict[str, list[str]]]: list of dictionaries with tconst (ascending)
as key and list of weights of columns as value (ascending)
"""
self.sql_where_clause = self.add_sql_where_clause(
self.sql_where_clause,
f"tconst NOT IN ({', '.join(f"'{tconst}'" for tconst in tconsts)})"
)
features: list[str] = []
if self.weight.year > 0:
features.append('year')
if self.weight.rating > 0:
features.append('rating')
if self.weight.genres > 0:
features.append('genres')
if self.weight.nconsts > 0:
features.append('nconsts')
if len(tconsts) == 1:
merged_df = self.get_single_recommendation(conn, tconsts[0], features).sort('average')[:n]
responses: dict[str, list[str]] = dict()
for row in merged_df.rows(named=True):
row.pop('average')
t: str = row.pop('tconst')
for f in features:
row[f] = row[f"{f}_index"] / self.weight.__getattribute__(f)
row.pop(f"{f}_index")
weights: list[str] = [column for column, _ in sorted(row.items(), key=lambda item: item[1])]
responses[t] = weights
return responses
else:
trained_dfs: dict[str, pl.DataFrame] = {}
for tconst in tconsts:
df = self.get_single_recommendation(conn, tconst, features)
trained_dfs[tconst] = pl.DataFrame({
'tconst': df['tconst'],
f"{tconst}_average": df['average']
})
merged_df: pl.DataFrame = pl.concat(trained_dfs.values(), how='align')
all_average = merged_df[f"{tconsts[0]}_average"]
for tconst in tconsts[1:]:
all_average += merged_df[f"{tconst}_average"]
merged_df = merged_df.with_columns(all_average=all_average / len(tconsts)).sort('all_average')[:n]
responses: dict[str, list[str]] = dict()
for row in merged_df.rows(named=True):
row.pop('all_average')
curretn_tconst: str = row.pop('tconst')
for tconst in tconsts:
row[tconst] = row[f"{tconst}_average"]
row.pop(f"{tconst}_average")
weights: list[str] = [column for column, _ in sorted(row.items(), key=lambda item: item[1])]
responses[curretn_tconst] = weights
return responses

283
recommender/uv.lock generated Normal file
View File

@ -0,0 +1,283 @@
version = 1
requires-python = "==3.12.3"
[[package]]
name = "grpcio"
version = "1.67.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/20/53/d9282a66a5db45981499190b77790570617a604a38f3d103d0400974aeb5/grpcio-1.67.1.tar.gz", hash = "sha256:3dc2ed4cabea4dc14d5e708c2b426205956077cc5de419b4d4079315017e9732", size = 12580022 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6e/25/6f95bd18d5f506364379eabc0d5874873cc7dbdaf0757df8d1e82bc07a88/grpcio-1.67.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:267d1745894200e4c604958da5f856da6293f063327cb049a51fe67348e4f953", size = 5089809 },
{ url = "https://files.pythonhosted.org/packages/10/3f/d79e32e5d0354be33a12db2267c66d3cfeff700dd5ccdd09fd44a3ff4fb6/grpcio-1.67.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:85f69fdc1d28ce7cff8de3f9c67db2b0ca9ba4449644488c1e0303c146135ddb", size = 10981985 },
{ url = "https://files.pythonhosted.org/packages/21/f2/36fbc14b3542e3a1c20fb98bd60c4732c55a44e374a4eb68f91f28f14aab/grpcio-1.67.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:f26b0b547eb8d00e195274cdfc63ce64c8fc2d3e2d00b12bf468ece41a0423a0", size = 5588770 },
{ url = "https://files.pythonhosted.org/packages/0d/af/bbc1305df60c4e65de8c12820a942b5e37f9cf684ef5e49a63fbb1476a73/grpcio-1.67.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4422581cdc628f77302270ff839a44f4c24fdc57887dc2a45b7e53d8fc2376af", size = 6214476 },
{ url = "https://files.pythonhosted.org/packages/92/cf/1d4c3e93efa93223e06a5c83ac27e32935f998bc368e276ef858b8883154/grpcio-1.67.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d7616d2ded471231c701489190379e0c311ee0a6c756f3c03e6a62b95a7146e", size = 5850129 },
{ url = "https://files.pythonhosted.org/packages/ae/ca/26195b66cb253ac4d5ef59846e354d335c9581dba891624011da0e95d67b/grpcio-1.67.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8a00efecde9d6fcc3ab00c13f816313c040a28450e5e25739c24f432fc6d3c75", size = 6568489 },
{ url = "https://files.pythonhosted.org/packages/d1/94/16550ad6b3f13b96f0856ee5dfc2554efac28539ee84a51d7b14526da985/grpcio-1.67.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:699e964923b70f3101393710793289e42845791ea07565654ada0969522d0a38", size = 6149369 },
{ url = "https://files.pythonhosted.org/packages/33/0d/4c3b2587e8ad7f121b597329e6c2620374fccbc2e4e1aa3c73ccc670fde4/grpcio-1.67.1-cp312-cp312-win32.whl", hash = "sha256:4e7b904484a634a0fff132958dabdb10d63e0927398273917da3ee103e8d1f78", size = 3599176 },
{ url = "https://files.pythonhosted.org/packages/7d/36/0c03e2d80db69e2472cf81c6123aa7d14741de7cf790117291a703ae6ae1/grpcio-1.67.1-cp312-cp312-win_amd64.whl", hash = "sha256:5721e66a594a6c4204458004852719b38f3d5522082be9061d6510b455c90afc", size = 4346574 },
{ url = "https://files.pythonhosted.org/packages/12/d2/2f032b7a153c7723ea3dea08bffa4bcaca9e0e5bdf643ce565b76da87461/grpcio-1.67.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:aa0162e56fd10a5547fac8774c4899fc3e18c1aa4a4759d0ce2cd00d3696ea6b", size = 5091487 },
{ url = "https://files.pythonhosted.org/packages/d0/ae/ea2ff6bd2475a082eb97db1104a903cf5fc57c88c87c10b3c3f41a184fc0/grpcio-1.67.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:beee96c8c0b1a75d556fe57b92b58b4347c77a65781ee2ac749d550f2a365dc1", size = 10943530 },
{ url = "https://files.pythonhosted.org/packages/07/62/646be83d1a78edf8d69b56647327c9afc223e3140a744c59b25fbb279c3b/grpcio-1.67.1-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:a93deda571a1bf94ec1f6fcda2872dad3ae538700d94dc283c672a3b508ba3af", size = 5589079 },
{ url = "https://files.pythonhosted.org/packages/d0/25/71513d0a1b2072ce80d7f5909a93596b7ed10348b2ea4fdcbad23f6017bf/grpcio-1.67.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e6f255980afef598a9e64a24efce87b625e3e3c80a45162d111a461a9f92955", size = 6213542 },
{ url = "https://files.pythonhosted.org/packages/76/9a/d21236297111052dcb5dc85cd77dc7bf25ba67a0f55ae028b2af19a704bc/grpcio-1.67.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e838cad2176ebd5d4a8bb03955138d6589ce9e2ce5d51c3ada34396dbd2dba8", size = 5850211 },
{ url = "https://files.pythonhosted.org/packages/2d/fe/70b1da9037f5055be14f359026c238821b9bcf6ca38a8d760f59a589aacd/grpcio-1.67.1-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:a6703916c43b1d468d0756c8077b12017a9fcb6a1ef13faf49e67d20d7ebda62", size = 6572129 },
{ url = "https://files.pythonhosted.org/packages/74/0d/7df509a2cd2a54814598caf2fb759f3e0b93764431ff410f2175a6efb9e4/grpcio-1.67.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:917e8d8994eed1d86b907ba2a61b9f0aef27a2155bca6cbb322430fc7135b7bb", size = 6149819 },
{ url = "https://files.pythonhosted.org/packages/0a/08/bc3b0155600898fd10f16b79054e1cca6cb644fa3c250c0fe59385df5e6f/grpcio-1.67.1-cp313-cp313-win32.whl", hash = "sha256:e279330bef1744040db8fc432becc8a727b84f456ab62b744d3fdb83f327e121", size = 3596561 },
{ url = "https://files.pythonhosted.org/packages/5a/96/44759eca966720d0f3e1b105c43f8ad4590c97bf8eb3cd489656e9590baa/grpcio-1.67.1-cp313-cp313-win_amd64.whl", hash = "sha256:fa0c739ad8b1996bd24823950e3cb5152ae91fca1c09cc791190bf1627ffefba", size = 4346042 },
]
[[package]]
name = "grpcio-health-checking"
version = "1.67.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "grpcio" },
{ name = "protobuf" },
]
sdist = { url = "https://files.pythonhosted.org/packages/64/dd/e3b339fa44dc75b501a1a22cb88f1af5b1f8c964488f19c4de4cfbbf05ba/grpcio_health_checking-1.67.1.tar.gz", hash = "sha256:ca90fa76a6afbb4fda71d734cb9767819bba14928b91e308cffbb0c311eb941e", size = 16775 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5c/8d/7a9878dca6616b48093d71c52d0bc79cb2dd1a2698ff6f5ce7406306de12/grpcio_health_checking-1.67.1-py3-none-any.whl", hash = "sha256:93753da5062152660aef2286c9b261e07dd87124a65e4dc9fbd47d1ce966b39d", size = 18924 },
]
[[package]]
name = "grpcio-reflection"
version = "1.67.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "grpcio" },
{ name = "protobuf" },
]
sdist = { url = "https://files.pythonhosted.org/packages/fe/69/08a7c3c3524e3af650d22bd8e0015e35bc284eae919fbb38bc2702809d07/grpcio_reflection-1.67.0.tar.gz", hash = "sha256:c47143738b1897b6ce4af5e0e338c85c9aee5fdcbb3355d368a8dcae46d8933c", size = 18818 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9c/cf/343839a5800c38a0231b78708e84db1fa4797751cb44892093ef8b215377/grpcio_reflection-1.67.0-py3-none-any.whl", hash = "sha256:2a2f6d865adecd8d5f81b1a6858252d61b1897997f0656c73807045c5e79c421", size = 22691 },
]
[[package]]
name = "grpcio-tools"
version = "1.67.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "grpcio" },
{ name = "protobuf" },
{ name = "setuptools" },
]
sdist = { url = "https://files.pythonhosted.org/packages/e7/f8/62e15867651b72f6f95313e21d81f5f1c210b69a4cc664aecf52ec4c8a53/grpcio_tools-1.67.0.tar.gz", hash = "sha256:181b3d4e61b83142c182ec366f3079b0023509743986e54c9465ca38cac255f8", size = 5159163 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d6/b6/57e67c0244db8d7c0c312041293b806bfb1c9d66c26159e6faf39cc10356/grpcio_tools-1.67.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:dca7f053cbdb26a587d4410ddb893877c585fb60a31f22fdd128e4f7c4dab27c", size = 2307646 },
{ url = "https://files.pythonhosted.org/packages/52/43/837f08b85b04ac225aebe1d7da1a7a79fc313f231306c865b5112cef7dc4/grpcio_tools-1.67.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:de8c4f68ffa690769d84329c17c7fdd5fbe4c61b8f8a0de03f1ad8ef8bb06963", size = 5525447 },
{ url = "https://files.pythonhosted.org/packages/3e/5f/adb8b87f5c403ba53529b6645184beddfa63abf2c524a6dabaa430e6bab3/grpcio_tools-1.67.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:6e4ecb24c27a78f09fead45d4ed873805d6026124ccb6793b6fb93a490b78ddf", size = 2281767 },
{ url = "https://files.pythonhosted.org/packages/6e/cd/3d6a7971e28b96cb618abb281325517443744ecfe48aa03f27a17cd5d4e1/grpcio_tools-1.67.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:004d6ef1b5f724480f05c0bdc904bf8c78c43d633c537d99abe51b52ce0cadeb", size = 2617363 },
{ url = "https://files.pythonhosted.org/packages/2d/a9/b8f1eae3db0f1b6f9548bd2032f48cb6f1ec9bc6781436d52dff4b352fab/grpcio_tools-1.67.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9dd257072c86eb9b36791b3674a513a215ba76bbdd38fc228f0e8c6dc5ce3524", size = 2415322 },
{ url = "https://files.pythonhosted.org/packages/9b/fc/0045bf2e5c97a5ffe0ff2c9a4e4a62894402e8d7094162c2084a809c9d1c/grpcio_tools-1.67.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:a8cca551317ed26e17d13b6ee27b2bd62f5fe9b3842b4e88389deb984f995848", size = 3225044 },
{ url = "https://files.pythonhosted.org/packages/dc/73/eaf40958dd648dd98a0fbd30df2b51c5beb7ee24127c1f0bb99ea44fd435/grpcio_tools-1.67.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a7ac3b4f837c693142f6688b629d1f6408f6ab250d927159b572555f5339fe25", size = 2870418 },
{ url = "https://files.pythonhosted.org/packages/b4/77/e307e91816123444ff657bbae2269cb912f31a9390118ed371bde9d0c1f3/grpcio_tools-1.67.0-cp312-cp312-win32.whl", hash = "sha256:95feec33388e2a8f72c360a68efe6f0bfed9c771e94d21b7f2359d0010f60219", size = 940540 },
{ url = "https://files.pythonhosted.org/packages/be/2a/0c1a64e88fbc17235b68d3178be6cf4a69aea5bd1deed683c0bbd2f5e9f9/grpcio_tools-1.67.0-cp312-cp312-win_amd64.whl", hash = "sha256:50a31d035193ebe7154181eac84734e25bdcdb36adba849d3b2adf1c3b0c382b", size = 1090427 },
{ url = "https://files.pythonhosted.org/packages/1e/44/cfe3aa14158d8becffd7823d5147039378d448097fb91ec723ad8b6d60cb/grpcio_tools-1.67.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:9ecb7c2e5da052a3feaeaa83d8f2a946a8feec8a50751b0e6175da982b49ebb1", size = 2307454 },
{ url = "https://files.pythonhosted.org/packages/46/9c/99b345764b355b11f1ea7d160276e9eb9d32a1c77e4bfaa2db3de025f7d2/grpcio_tools-1.67.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:3c52164f2b9d41c6d75464bb45f45737dcb421e92e98d85d94fda100c67a24d8", size = 5518036 },
{ url = "https://files.pythonhosted.org/packages/e8/8d/f1b4378335f39f37f9b440b26e7ecaa19660eef6b438ac7d8c5ad7e96d73/grpcio_tools-1.67.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:471f58b919767290260d427dd9b760796e6208ee5fcda2f76bb8bd585ff842ec", size = 2281084 },
{ url = "https://files.pythonhosted.org/packages/27/ec/c31e5ec4d01f17e38fce03dfc3e47880bc25e1dd681fffe00fab04e21e33/grpcio_tools-1.67.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:72c6bcdf38f672721c093c92b1fb1f9a02a365acc5bd42e1c69fe6e904b26081", size = 2616930 },
{ url = "https://files.pythonhosted.org/packages/b5/2b/82f3451ad9471ba946ca0e2ff43dc3269030d5e963d86d1cfe199e07dc38/grpcio_tools-1.67.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:833b1eb9c03d28a798294523f75294055eff78fa897adf797876337b901afeb9", size = 2414635 },
{ url = "https://files.pythonhosted.org/packages/89/59/2811cb32947f21075c0b53a87aa652653154e6db3f03766e29b1f80a4bb6/grpcio_tools-1.67.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:1db92ad6ade1946fc5705eb04956fcfdb3a0a4682de8dc3fce31cb97b6e4fcb8", size = 3224330 },
{ url = "https://files.pythonhosted.org/packages/3f/25/dde7cef6e639dae24e93f4385e689f554a1d2a531d5703786a6b7b8366f3/grpcio_tools-1.67.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:38128310ded818e1044c0cd0979d76f7c0d3c3946a526a8aa39cd258624c3bf3", size = 2869633 },
{ url = "https://files.pythonhosted.org/packages/6f/dc/313bbdc01e4bd062d1e86cf667d81338670b9f44afa81a7b4e5ebf566ff4/grpcio_tools-1.67.0-cp313-cp313-win32.whl", hash = "sha256:db57930dc20ab678311727883bdb9f122daf06c14f3fd3067c9ccedb7eb056c3", size = 939997 },
{ url = "https://files.pythonhosted.org/packages/9e/07/5227eb621973b6afe7e6b3d4c637ed14069b7f5f7f45cc804c59df791304/grpcio_tools-1.67.0-cp313-cp313-win_amd64.whl", hash = "sha256:7de44d8d3bb920a4973a559f2950d03382fa4aed4880306416ffa73d24838477", size = 1089819 },
]
[[package]]
name = "joblib"
version = "1.4.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/64/33/60135848598c076ce4b231e1b1895170f45fbcaeaa2c9d5e38b04db70c35/joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e", size = 2116621 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/91/29/df4b9b42f2be0b623cbd5e2140cafcaa2bef0759a00b7b70104dcfe2fb51/joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6", size = 301817 },
]
[[package]]
name = "movier"
version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "grpcio" },
{ name = "grpcio-health-checking" },
{ name = "grpcio-reflection" },
{ name = "grpcio-tools" },
{ name = "polars" },
{ name = "psycopg2-binary" },
{ name = "scikit-learn" },
]
[package.metadata]
requires-dist = [
{ name = "grpcio", specifier = ">=1.67.0" },
{ name = "grpcio-health-checking", specifier = "==1.67.1" },
{ name = "grpcio-reflection", specifier = "==1.67.0" },
{ name = "grpcio-tools", specifier = "==1.67.0" },
{ name = "polars", specifier = "==1.12.0" },
{ name = "psycopg2-binary", specifier = "==2.9.10" },
{ name = "scikit-learn", specifier = "==1.5.2" },
]
[[package]]
name = "numpy"
version = "2.1.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/25/ca/1166b75c21abd1da445b97bf1fa2f14f423c6cfb4fc7c4ef31dccf9f6a94/numpy-2.1.3.tar.gz", hash = "sha256:aa08e04e08aaf974d4458def539dece0d28146d866a39da5639596f4921fd761", size = 20166090 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/8a/f0/385eb9970309643cbca4fc6eebc8bb16e560de129c91258dfaa18498da8b/numpy-2.1.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f55ba01150f52b1027829b50d70ef1dafd9821ea82905b63936668403c3b471e", size = 20849658 },
{ url = "https://files.pythonhosted.org/packages/54/4a/765b4607f0fecbb239638d610d04ec0a0ded9b4951c56dc68cef79026abf/numpy-2.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13138eadd4f4da03074851a698ffa7e405f41a0845a6b1ad135b81596e4e9958", size = 13492258 },
{ url = "https://files.pythonhosted.org/packages/bd/a7/2332679479c70b68dccbf4a8eb9c9b5ee383164b161bee9284ac141fbd33/numpy-2.1.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a6b46587b14b888e95e4a24d7b13ae91fa22386c199ee7b418f449032b2fa3b8", size = 5090249 },
{ url = "https://files.pythonhosted.org/packages/c1/67/4aa00316b3b981a822c7a239d3a8135be2a6945d1fd11d0efb25d361711a/numpy-2.1.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:0fa14563cc46422e99daef53d725d0c326e99e468a9320a240affffe87852564", size = 6621704 },
{ url = "https://files.pythonhosted.org/packages/5e/da/1a429ae58b3b6c364eeec93bf044c532f2ff7b48a52e41050896cf15d5b1/numpy-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8637dcd2caa676e475503d1f8fdb327bc495554e10838019651b76d17b98e512", size = 13606089 },
{ url = "https://files.pythonhosted.org/packages/9e/3e/3757f304c704f2f0294a6b8340fcf2be244038be07da4cccf390fa678a9f/numpy-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2312b2aa89e1f43ecea6da6ea9a810d06aae08321609d8dc0d0eda6d946a541b", size = 16043185 },
{ url = "https://files.pythonhosted.org/packages/43/97/75329c28fea3113d00c8d2daf9bc5828d58d78ed661d8e05e234f86f0f6d/numpy-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a38c19106902bb19351b83802531fea19dee18e5b37b36454f27f11ff956f7fc", size = 16410751 },
{ url = "https://files.pythonhosted.org/packages/ad/7a/442965e98b34e0ae9da319f075b387bcb9a1e0658276cc63adb8c9686f7b/numpy-2.1.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:02135ade8b8a84011cbb67dc44e07c58f28575cf9ecf8ab304e51c05528c19f0", size = 14082705 },
{ url = "https://files.pythonhosted.org/packages/ac/b6/26108cf2cfa5c7e03fb969b595c93131eab4a399762b51ce9ebec2332e80/numpy-2.1.3-cp312-cp312-win32.whl", hash = "sha256:e6988e90fcf617da2b5c78902fe8e668361b43b4fe26dbf2d7b0f8034d4cafb9", size = 6239077 },
{ url = "https://files.pythonhosted.org/packages/a6/84/fa11dad3404b7634aaab50733581ce11e5350383311ea7a7010f464c0170/numpy-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:0d30c543f02e84e92c4b1f415b7c6b5326cbe45ee7882b6b77db7195fb971e3a", size = 12566858 },
{ url = "https://files.pythonhosted.org/packages/4d/0b/620591441457e25f3404c8057eb924d04f161244cb8a3680d529419aa86e/numpy-2.1.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96fe52fcdb9345b7cd82ecd34547fca4321f7656d500eca497eb7ea5a926692f", size = 20836263 },
{ url = "https://files.pythonhosted.org/packages/45/e1/210b2d8b31ce9119145433e6ea78046e30771de3fe353f313b2778142f34/numpy-2.1.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f653490b33e9c3a4c1c01d41bc2aef08f9475af51146e4a7710c450cf9761598", size = 13507771 },
{ url = "https://files.pythonhosted.org/packages/55/44/aa9ee3caee02fa5a45f2c3b95cafe59c44e4b278fbbf895a93e88b308555/numpy-2.1.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:dc258a761a16daa791081d026f0ed4399b582712e6fc887a95af09df10c5ca57", size = 5075805 },
{ url = "https://files.pythonhosted.org/packages/78/d6/61de6e7e31915ba4d87bbe1ae859e83e6582ea14c6add07c8f7eefd8488f/numpy-2.1.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:016d0f6f5e77b0f0d45d77387ffa4bb89816b57c835580c3ce8e099ef830befe", size = 6608380 },
{ url = "https://files.pythonhosted.org/packages/3e/46/48bdf9b7241e317e6cf94276fe11ba673c06d1fdf115d8b4ebf616affd1a/numpy-2.1.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c181ba05ce8299c7aa3125c27b9c2167bca4a4445b7ce73d5febc411ca692e43", size = 13602451 },
{ url = "https://files.pythonhosted.org/packages/70/50/73f9a5aa0810cdccda9c1d20be3cbe4a4d6ea6bfd6931464a44c95eef731/numpy-2.1.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5641516794ca9e5f8a4d17bb45446998c6554704d888f86df9b200e66bdcce56", size = 16039822 },
{ url = "https://files.pythonhosted.org/packages/ad/cd/098bc1d5a5bc5307cfc65ee9369d0ca658ed88fbd7307b0d49fab6ca5fa5/numpy-2.1.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ea4dedd6e394a9c180b33c2c872b92f7ce0f8e7ad93e9585312b0c5a04777a4a", size = 16411822 },
{ url = "https://files.pythonhosted.org/packages/83/a2/7d4467a2a6d984549053b37945620209e702cf96a8bc658bc04bba13c9e2/numpy-2.1.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0df3635b9c8ef48bd3be5f862cf71b0a4716fa0e702155c45067c6b711ddcef", size = 14079598 },
{ url = "https://files.pythonhosted.org/packages/e9/6a/d64514dcecb2ee70bfdfad10c42b76cab657e7ee31944ff7a600f141d9e9/numpy-2.1.3-cp313-cp313-win32.whl", hash = "sha256:50ca6aba6e163363f132b5c101ba078b8cbd3fa92c7865fd7d4d62d9779ac29f", size = 6236021 },
{ url = "https://files.pythonhosted.org/packages/bb/f9/12297ed8d8301a401e7d8eb6b418d32547f1d700ed3c038d325a605421a4/numpy-2.1.3-cp313-cp313-win_amd64.whl", hash = "sha256:747641635d3d44bcb380d950679462fae44f54b131be347d5ec2bce47d3df9ed", size = 12560405 },
{ url = "https://files.pythonhosted.org/packages/a7/45/7f9244cd792e163b334e3a7f02dff1239d2890b6f37ebf9e82cbe17debc0/numpy-2.1.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:996bb9399059c5b82f76b53ff8bb686069c05acc94656bb259b1d63d04a9506f", size = 20859062 },
{ url = "https://files.pythonhosted.org/packages/b1/b4/a084218e7e92b506d634105b13e27a3a6645312b93e1c699cc9025adb0e1/numpy-2.1.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:45966d859916ad02b779706bb43b954281db43e185015df6eb3323120188f9e4", size = 13515839 },
{ url = "https://files.pythonhosted.org/packages/27/45/58ed3f88028dcf80e6ea580311dc3edefdd94248f5770deb980500ef85dd/numpy-2.1.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:baed7e8d7481bfe0874b566850cb0b85243e982388b7b23348c6db2ee2b2ae8e", size = 5116031 },
{ url = "https://files.pythonhosted.org/packages/37/a8/eb689432eb977d83229094b58b0f53249d2209742f7de529c49d61a124a0/numpy-2.1.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a9f7f672a3388133335589cfca93ed468509cb7b93ba3105fce780d04a6576a0", size = 6629977 },
{ url = "https://files.pythonhosted.org/packages/42/a3/5355ad51ac73c23334c7caaed01adadfda49544f646fcbfbb4331deb267b/numpy-2.1.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7aac50327da5d208db2eec22eb11e491e3fe13d22653dce51b0f4109101b408", size = 13575951 },
{ url = "https://files.pythonhosted.org/packages/c4/70/ea9646d203104e647988cb7d7279f135257a6b7e3354ea6c56f8bafdb095/numpy-2.1.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4394bc0dbd074b7f9b52024832d16e019decebf86caf909d94f6b3f77a8ee3b6", size = 16022655 },
{ url = "https://files.pythonhosted.org/packages/14/ce/7fc0612903e91ff9d0b3f2eda4e18ef9904814afcae5b0f08edb7f637883/numpy-2.1.3-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:50d18c4358a0a8a53f12a8ba9d772ab2d460321e6a93d6064fc22443d189853f", size = 16399902 },
{ url = "https://files.pythonhosted.org/packages/ef/62/1d3204313357591c913c32132a28f09a26357e33ea3c4e2fe81269e0dca1/numpy-2.1.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:14e253bd43fc6b37af4921b10f6add6925878a42a0c5fe83daee390bca80bc17", size = 14067180 },
{ url = "https://files.pythonhosted.org/packages/24/d7/78a40ed1d80e23a774cb8a34ae8a9493ba1b4271dde96e56ccdbab1620ef/numpy-2.1.3-cp313-cp313t-win32.whl", hash = "sha256:08788d27a5fd867a663f6fc753fd7c3ad7e92747efc73c53bca2f19f8bc06f48", size = 6291907 },
{ url = "https://files.pythonhosted.org/packages/86/09/a5ab407bd7f5f5599e6a9261f964ace03a73e7c6928de906981c31c38082/numpy-2.1.3-cp313-cp313t-win_amd64.whl", hash = "sha256:2564fbdf2b99b3f815f2107c1bbc93e2de8ee655a69c261363a1172a79a257d4", size = 12644098 },
]
[[package]]
name = "polars"
version = "1.12.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/5f/df/55127a3099e990b45ce3a29ab6789a083451e76e7109fb754aad5525360b/polars-1.12.0.tar.gz", hash = "sha256:fb5c92de1a8f7d0a3f923fe48ea89eb518bdf55315ae917012350fa072bd64f4", size = 4090738 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6e/ae/77c7ec395d9361ae2086693af1947c9a2b21346ba3faf092bb154b735227/polars-1.12.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:8f3c4e4e423c373dda07b4c8a7ff12aa02094b524767d0ca306b1eba67f2d99e", size = 32923786 },
{ url = "https://files.pythonhosted.org/packages/97/1c/60736d5588309eb528c52538e116593cb275310bab82ba28702cd87a76d1/polars-1.12.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:aa6f9862f0cec6353243920d9b8d858c21ec8f25f91af203dea6ff91980e140d", size = 28887255 },
{ url = "https://files.pythonhosted.org/packages/5a/3e/31257118e7e087fa27c230b8fadf8ff15d521140bf58558dc889ee0c9c5e/polars-1.12.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afb03647b5160737d2119532ee8ffe825de1d19d87f81bbbb005131786f7d59b", size = 34126501 },
{ url = "https://files.pythonhosted.org/packages/ad/e6/d03053e6064d262f2ec41172a5092b08fc20d10c059dda6c9460371cfd7e/polars-1.12.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:ea96aba5eb3dab8f0e6abf05ab3fc2136b329261860ef8661d20f5456a2d78e0", size = 30479546 },
{ url = "https://files.pythonhosted.org/packages/d5/28/3d44ddf56a5c95272b202ce8aa0e9b818a1310e83525c4c29176b538ae7c/polars-1.12.0-cp39-abi3-win_amd64.whl", hash = "sha256:a228a4b320a36d03a9ec9dfe7241b6d80a2f119b2dceb1da953166655e4cf43c", size = 33790337 },
]
[[package]]
name = "protobuf"
version = "5.28.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/74/6e/e69eb906fddcb38f8530a12f4b410699972ab7ced4e21524ece9d546ac27/protobuf-5.28.3.tar.gz", hash = "sha256:64badbc49180a5e401f373f9ce7ab1d18b63f7dd4a9cdc43c92b9f0b481cef7b", size = 422479 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/c5/05163fad52d7c43e124a545f1372d18266db36036377ad29de4271134a6a/protobuf-5.28.3-cp310-abi3-win32.whl", hash = "sha256:0c4eec6f987338617072592b97943fdbe30d019c56126493111cf24344c1cc24", size = 419624 },
{ url = "https://files.pythonhosted.org/packages/9c/4c/4563ebe001ff30dca9d7ed12e471fa098d9759712980cde1fd03a3a44fb7/protobuf-5.28.3-cp310-abi3-win_amd64.whl", hash = "sha256:91fba8f445723fcf400fdbe9ca796b19d3b1242cd873907979b9ed71e4afe868", size = 431464 },
{ url = "https://files.pythonhosted.org/packages/1c/f2/baf397f3dd1d3e4af7e3f5a0382b868d25ac068eefe1ebde05132333436c/protobuf-5.28.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a3f6857551e53ce35e60b403b8a27b0295f7d6eb63d10484f12bc6879c715687", size = 414743 },
{ url = "https://files.pythonhosted.org/packages/85/50/cd61a358ba1601f40e7d38bcfba22e053f40ef2c50d55b55926aecc8fec7/protobuf-5.28.3-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:3fa2de6b8b29d12c61911505d893afe7320ce7ccba4df913e2971461fa36d584", size = 316511 },
{ url = "https://files.pythonhosted.org/packages/5d/ae/3257b09328c0b4e59535e497b0c7537d4954038bdd53a2f0d2f49d15a7c4/protobuf-5.28.3-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:712319fbdddb46f21abb66cd33cb9e491a5763b2febd8f228251add221981135", size = 316624 },
{ url = "https://files.pythonhosted.org/packages/ad/c3/2377c159e28ea89a91cf1ca223f827ae8deccb2c9c401e5ca233cd73002f/protobuf-5.28.3-py3-none-any.whl", hash = "sha256:cee1757663fa32a1ee673434fcf3bf24dd54763c79690201208bafec62f19eed", size = 169511 },
]
[[package]]
name = "psycopg2-binary"
version = "2.9.10"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/cb/0e/bdc8274dc0585090b4e3432267d7be4dfbfd8971c0fa59167c711105a6bf/psycopg2-binary-2.9.10.tar.gz", hash = "sha256:4b3df0e6990aa98acda57d983942eff13d824135fe2250e6522edaa782a06de2", size = 385764 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/49/7d/465cc9795cf76f6d329efdafca74693714556ea3891813701ac1fee87545/psycopg2_binary-2.9.10-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:880845dfe1f85d9d5f7c412efea7a08946a46894537e4e5d091732eb1d34d9a0", size = 3044771 },
{ url = "https://files.pythonhosted.org/packages/8b/31/6d225b7b641a1a2148e3ed65e1aa74fc86ba3fee850545e27be9e1de893d/psycopg2_binary-2.9.10-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9440fa522a79356aaa482aa4ba500b65f28e5d0e63b801abf6aa152a29bd842a", size = 3275336 },
{ url = "https://files.pythonhosted.org/packages/30/b7/a68c2b4bff1cbb1728e3ec864b2d92327c77ad52edcd27922535a8366f68/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3923c1d9870c49a2d44f795df0c889a22380d36ef92440ff618ec315757e539", size = 2851637 },
{ url = "https://files.pythonhosted.org/packages/0b/b1/cfedc0e0e6f9ad61f8657fd173b2f831ce261c02a08c0b09c652b127d813/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b2c956c028ea5de47ff3a8d6b3cc3330ab45cf0b7c3da35a2d6ff8420896526", size = 3082097 },
{ url = "https://files.pythonhosted.org/packages/18/ed/0a8e4153c9b769f59c02fb5e7914f20f0b2483a19dae7bf2db54b743d0d0/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f758ed67cab30b9a8d2833609513ce4d3bd027641673d4ebc9c067e4d208eec1", size = 3264776 },
{ url = "https://files.pythonhosted.org/packages/10/db/d09da68c6a0cdab41566b74e0a6068a425f077169bed0946559b7348ebe9/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cd9b4f2cfab88ed4a9106192de509464b75a906462fb846b936eabe45c2063e", size = 3020968 },
{ url = "https://files.pythonhosted.org/packages/94/28/4d6f8c255f0dfffb410db2b3f9ac5218d959a66c715c34cac31081e19b95/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dc08420625b5a20b53551c50deae6e231e6371194fa0651dbe0fb206452ae1f", size = 2872334 },
{ url = "https://files.pythonhosted.org/packages/05/f7/20d7bf796593c4fea95e12119d6cc384ff1f6141a24fbb7df5a668d29d29/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d7cd730dfa7c36dbe8724426bf5612798734bff2d3c3857f36f2733f5bfc7c00", size = 2822722 },
{ url = "https://files.pythonhosted.org/packages/4d/e4/0c407ae919ef626dbdb32835a03b6737013c3cc7240169843965cada2bdf/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:155e69561d54d02b3c3209545fb08938e27889ff5a10c19de8d23eb5a41be8a5", size = 2920132 },
{ url = "https://files.pythonhosted.org/packages/2d/70/aa69c9f69cf09a01da224909ff6ce8b68faeef476f00f7ec377e8f03be70/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c3cc28a6fd5a4a26224007712e79b81dbaee2ffb90ff406256158ec4d7b52b47", size = 2959312 },
{ url = "https://files.pythonhosted.org/packages/d3/bd/213e59854fafe87ba47814bf413ace0dcee33a89c8c8c814faca6bc7cf3c/psycopg2_binary-2.9.10-cp312-cp312-win32.whl", hash = "sha256:ec8a77f521a17506a24a5f626cb2aee7850f9b69a0afe704586f63a464f3cd64", size = 1025191 },
{ url = "https://files.pythonhosted.org/packages/92/29/06261ea000e2dc1e22907dbbc483a1093665509ea586b29b8986a0e56733/psycopg2_binary-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:18c5ee682b9c6dd3696dad6e54cc7ff3a1a9020df6a5c0f861ef8bfd338c3ca0", size = 1164031 },
{ url = "https://files.pythonhosted.org/packages/3e/30/d41d3ba765609c0763505d565c4d12d8f3c79793f0d0f044ff5a28bf395b/psycopg2_binary-2.9.10-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:26540d4a9a4e2b096f1ff9cce51253d0504dca5a85872c7f7be23be5a53eb18d", size = 3044699 },
{ url = "https://files.pythonhosted.org/packages/35/44/257ddadec7ef04536ba71af6bc6a75ec05c5343004a7ec93006bee66c0bc/psycopg2_binary-2.9.10-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e217ce4d37667df0bc1c397fdcd8de5e81018ef305aed9415c3b093faaeb10fb", size = 3275245 },
{ url = "https://files.pythonhosted.org/packages/1b/11/48ea1cd11de67f9efd7262085588790a95d9dfcd9b8a687d46caf7305c1a/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:245159e7ab20a71d989da00f280ca57da7641fa2cdcf71749c193cea540a74f7", size = 2851631 },
{ url = "https://files.pythonhosted.org/packages/62/e0/62ce5ee650e6c86719d621a761fe4bc846ab9eff8c1f12b1ed5741bf1c9b/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c4ded1a24b20021ebe677b7b08ad10bf09aac197d6943bfe6fec70ac4e4690d", size = 3082140 },
{ url = "https://files.pythonhosted.org/packages/27/ce/63f946c098611f7be234c0dd7cb1ad68b0b5744d34f68062bb3c5aa510c8/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3abb691ff9e57d4a93355f60d4f4c1dd2d68326c968e7db17ea96df3c023ef73", size = 3264762 },
{ url = "https://files.pythonhosted.org/packages/43/25/c603cd81402e69edf7daa59b1602bd41eb9859e2824b8c0855d748366ac9/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8608c078134f0b3cbd9f89b34bd60a943b23fd33cc5f065e8d5f840061bd0673", size = 3020967 },
{ url = "https://files.pythonhosted.org/packages/5f/d6/8708d8c6fca531057fa170cdde8df870e8b6a9b136e82b361c65e42b841e/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:230eeae2d71594103cd5b93fd29d1ace6420d0b86f4778739cb1a5a32f607d1f", size = 2872326 },
{ url = "https://files.pythonhosted.org/packages/ce/ac/5b1ea50fc08a9df82de7e1771537557f07c2632231bbab652c7e22597908/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909", size = 2822712 },
{ url = "https://files.pythonhosted.org/packages/c4/fc/504d4503b2abc4570fac3ca56eb8fed5e437bf9c9ef13f36b6621db8ef00/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1", size = 2920155 },
{ url = "https://files.pythonhosted.org/packages/b2/d1/323581e9273ad2c0dbd1902f3fb50c441da86e894b6e25a73c3fda32c57e/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567", size = 2959356 },
]
[[package]]
name = "scikit-learn"
version = "1.5.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "joblib" },
{ name = "numpy" },
{ name = "scipy" },
{ name = "threadpoolctl" },
]
sdist = { url = "https://files.pythonhosted.org/packages/37/59/44985a2bdc95c74e34fef3d10cb5d93ce13b0e2a7baefffe1b53853b502d/scikit_learn-1.5.2.tar.gz", hash = "sha256:b4237ed7b3fdd0a4882792e68ef2545d5baa50aca3bb45aa7df468138ad8f94d", size = 7001680 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a4/db/b485c1ac54ff3bd9e7e6b39d3cc6609c4c76a65f52ab0a7b22b6c3ab0e9d/scikit_learn-1.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f932a02c3f4956dfb981391ab24bda1dbd90fe3d628e4b42caef3e041c67707a", size = 12110344 },
{ url = "https://files.pythonhosted.org/packages/54/1a/7deb52fa23aebb855431ad659b3c6a2e1709ece582cb3a63d66905e735fe/scikit_learn-1.5.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3b923d119d65b7bd555c73be5423bf06c0105678ce7e1f558cb4b40b0a5502b1", size = 11033502 },
{ url = "https://files.pythonhosted.org/packages/a1/32/4a7a205b14c11225609b75b28402c196e4396ac754dab6a81971b811781c/scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd", size = 12085794 },
{ url = "https://files.pythonhosted.org/packages/c6/29/044048c5e911373827c0e1d3051321b9183b2a4f8d4e2f11c08fcff83f13/scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6", size = 12945797 },
{ url = "https://files.pythonhosted.org/packages/aa/ce/c0b912f2f31aeb1b756a6ba56bcd84dd1f8a148470526a48515a3f4d48cd/scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1", size = 10985467 },
{ url = "https://files.pythonhosted.org/packages/a4/50/8891028437858cc510e13578fe7046574a60c2aaaa92b02d64aac5b1b412/scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5", size = 12025584 },
{ url = "https://files.pythonhosted.org/packages/d2/79/17feef8a1c14149436083bec0e61d7befb4812e272d5b20f9d79ea3e9ab1/scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908", size = 10959795 },
{ url = "https://files.pythonhosted.org/packages/b1/c8/f08313f9e2e656bd0905930ae8bf99a573ea21c34666a813b749c338202f/scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3", size = 12077302 },
{ url = "https://files.pythonhosted.org/packages/a7/48/fbfb4dc72bed0fe31fe045fb30e924909ad03f717c36694351612973b1a9/scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12", size = 13002811 },
{ url = "https://files.pythonhosted.org/packages/a5/e7/0c869f9e60d225a77af90d2aefa7a4a4c0e745b149325d1450f0f0ce5399/scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f", size = 10951354 },
]
[[package]]
name = "scipy"
version = "1.14.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "numpy" },
]
sdist = { url = "https://files.pythonhosted.org/packages/62/11/4d44a1f274e002784e4dbdb81e0ea96d2de2d1045b2132d5af62cc31fd28/scipy-1.14.1.tar.gz", hash = "sha256:5a275584e726026a5699459aa72f828a610821006228e841b94275c4a7c08417", size = 58620554 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c0/04/2bdacc8ac6387b15db6faa40295f8bd25eccf33f1f13e68a72dc3c60a99e/scipy-1.14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:631f07b3734d34aced009aaf6fedfd0eb3498a97e581c3b1e5f14a04164a456d", size = 39128781 },
{ url = "https://files.pythonhosted.org/packages/c8/53/35b4d41f5fd42f5781dbd0dd6c05d35ba8aa75c84ecddc7d44756cd8da2e/scipy-1.14.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:af29a935803cc707ab2ed7791c44288a682f9c8107bc00f0eccc4f92c08d6e07", size = 29939542 },
{ url = "https://files.pythonhosted.org/packages/66/67/6ef192e0e4d77b20cc33a01e743b00bc9e68fb83b88e06e636d2619a8767/scipy-1.14.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:2843f2d527d9eebec9a43e6b406fb7266f3af25a751aa91d62ff416f54170bc5", size = 23148375 },
{ url = "https://files.pythonhosted.org/packages/f6/32/3a6dedd51d68eb7b8e7dc7947d5d841bcb699f1bf4463639554986f4d782/scipy-1.14.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:eb58ca0abd96911932f688528977858681a59d61a7ce908ffd355957f7025cfc", size = 25578573 },
{ url = "https://files.pythonhosted.org/packages/f0/5a/efa92a58dc3a2898705f1dc9dbaf390ca7d4fba26d6ab8cfffb0c72f656f/scipy-1.14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30ac8812c1d2aab7131a79ba62933a2a76f582d5dbbc695192453dae67ad6310", size = 35319299 },
{ url = "https://files.pythonhosted.org/packages/8e/ee/8a26858ca517e9c64f84b4c7734b89bda8e63bec85c3d2f432d225bb1886/scipy-1.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f9ea80f2e65bdaa0b7627fb00cbeb2daf163caa015e59b7516395fe3bd1e066", size = 40849331 },
{ url = "https://files.pythonhosted.org/packages/a5/cd/06f72bc9187840f1c99e1a8750aad4216fc7dfdd7df46e6280add14b4822/scipy-1.14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:edaf02b82cd7639db00dbff629995ef185c8df4c3ffa71a5562a595765a06ce1", size = 42544049 },
{ url = "https://files.pythonhosted.org/packages/aa/7d/43ab67228ef98c6b5dd42ab386eae2d7877036970a0d7e3dd3eb47a0d530/scipy-1.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:2ff38e22128e6c03ff73b6bb0f85f897d2362f8c052e3b8ad00532198fbdae3f", size = 44521212 },
{ url = "https://files.pythonhosted.org/packages/50/ef/ac98346db016ff18a6ad7626a35808f37074d25796fd0234c2bb0ed1e054/scipy-1.14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1729560c906963fc8389f6aac023739ff3983e727b1a4d87696b7bf108316a79", size = 39091068 },
{ url = "https://files.pythonhosted.org/packages/b9/cc/70948fe9f393b911b4251e96b55bbdeaa8cca41f37c26fd1df0232933b9e/scipy-1.14.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:4079b90df244709e675cdc8b93bfd8a395d59af40b72e339c2287c91860deb8e", size = 29875417 },
{ url = "https://files.pythonhosted.org/packages/3b/2e/35f549b7d231c1c9f9639f9ef49b815d816bf54dd050da5da1c11517a218/scipy-1.14.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e0cf28db0f24a38b2a0ca33a85a54852586e43cf6fd876365c86e0657cfe7d73", size = 23084508 },
{ url = "https://files.pythonhosted.org/packages/3f/d6/b028e3f3e59fae61fb8c0f450db732c43dd1d836223a589a8be9f6377203/scipy-1.14.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0c2f95de3b04e26f5f3ad5bb05e74ba7f68b837133a4492414b3afd79dfe540e", size = 25503364 },
{ url = "https://files.pythonhosted.org/packages/a7/2f/6c142b352ac15967744d62b165537a965e95d557085db4beab2a11f7943b/scipy-1.14.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b99722ea48b7ea25e8e015e8341ae74624f72e5f21fc2abd45f3a93266de4c5d", size = 35292639 },
{ url = "https://files.pythonhosted.org/packages/56/46/2449e6e51e0d7c3575f289f6acb7f828938eaab8874dbccfeb0cd2b71a27/scipy-1.14.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5149e3fd2d686e42144a093b206aef01932a0059c2a33ddfa67f5f035bdfe13e", size = 40798288 },
{ url = "https://files.pythonhosted.org/packages/32/cd/9d86f7ed7f4497c9fd3e39f8918dd93d9f647ba80d7e34e4946c0c2d1a7c/scipy-1.14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e4f5a7c49323533f9103d4dacf4e4f07078f360743dec7f7596949149efeec06", size = 42524647 },
{ url = "https://files.pythonhosted.org/packages/f5/1b/6ee032251bf4cdb0cc50059374e86a9f076308c1512b61c4e003e241efb7/scipy-1.14.1-cp313-cp313-win_amd64.whl", hash = "sha256:baff393942b550823bfce952bb62270ee17504d02a1801d7fd0719534dfb9c84", size = 44469524 },
]
[[package]]
name = "setuptools"
version = "75.3.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/ed/22/a438e0caa4576f8c383fa4d35f1cc01655a46c75be358960d815bfbb12bd/setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686", size = 1351577 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/90/12/282ee9bce8b58130cb762fbc9beabd531549952cac11fc56add11dcb7ea0/setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd", size = 1251070 },
]
[[package]]
name = "threadpoolctl"
version = "3.5.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/bd/55/b5148dcbf72f5cde221f8bfe3b6a540da7aa1842f6b491ad979a6c8b84af/threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107", size = 41936 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467", size = 18414 },
]

1
server/.dockerignore Normal file
View File

@ -0,0 +1 @@
data

1
server/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/data/

16
server/Dockerfile Normal file
View File

@ -0,0 +1,16 @@
FROM golang:1.23.2-alpine AS builder
WORKDIR /server
COPY go.mod go.sum ./
RUN go mod download
COPY . .
RUN go build -ldflags "-s -w" -o movier
FROM alpine:latest
WORKDIR /server
COPY --from=builder /server/movier /server/movier
COPY --from=builder /server/pkg/templates /server/pkg/templates

82
server/cmd/download.go Normal file
View File

@ -0,0 +1,82 @@
package cmd
import (
"log"
"github.com/aykhans/movier/server/pkg/config"
"github.com/aykhans/movier/server/pkg/dto"
"github.com/aykhans/movier/server/pkg/utils"
"github.com/spf13/cobra"
)
func getDownloadCmd() *cobra.Command {
downloadCmd := &cobra.Command{
Use: "download",
Short: "Movie Data Downloader",
Run: func(cmd *cobra.Command, args []string) {
err := runDownload()
if err != nil {
log.Fatalln(err)
}
},
}
return downloadCmd
}
func runDownload() error {
downloadPath := config.GetDownloadPath()
extractPath := config.GetExtractPath()
err := utils.MakeDirIfNotExist(downloadPath)
if err != nil {
return err
}
err = utils.MakeDirIfNotExist(extractPath)
if err != nil {
return err
}
download(downloadPath, extractPath)
return nil
}
func download(
downloadPath string,
extractPath string,
) error {
for _, downloadConfig := range config.DownloadConfigs {
extracted, err := utils.IsDirExist(extractPath + "/" + downloadConfig.ExtractName)
if err != nil {
return err
}
if extracted {
log.Printf("File %s already extracted. Skipping...\n\n", downloadConfig.ExtractName)
continue
}
downloaded, err := utils.IsDirExist(downloadPath + "/" + downloadConfig.DownloadName)
if err != nil {
return err
}
if downloaded {
log.Printf("File %s already downloaded. Extracting...\n\n", downloadConfig.DownloadName)
if err := dto.ExtractGzFile(
downloadPath+"/"+downloadConfig.DownloadName,
extractPath+"/"+downloadConfig.ExtractName,
); err != nil {
return err
}
continue
}
log.Printf("Downloading and extracting %s file...\n\n", downloadConfig.DownloadName)
if err := dto.DownloadAndExtractGz(
downloadConfig.URL,
downloadPath+"/"+downloadConfig.DownloadName,
extractPath+"/"+downloadConfig.ExtractName,
); err != nil {
return err
}
}
return nil
}

102
server/cmd/filter.go Normal file
View File

@ -0,0 +1,102 @@
package cmd
import (
"fmt"
"log"
"time"
"github.com/aykhans/movier/server/pkg/config"
"github.com/aykhans/movier/server/pkg/dto"
"github.com/aykhans/movier/server/pkg/storage/postgresql"
"github.com/aykhans/movier/server/pkg/storage/postgresql/repository"
"github.com/spf13/cobra"
)
func getFilterCmd() *cobra.Command {
filterCmd := &cobra.Command{
Use: "filter",
Short: "Movie Data Filter",
Run: func(cmd *cobra.Command, args []string) {
err := runFilter()
if err != nil {
log.Fatalln(err)
}
},
}
return filterCmd
}
func runFilter() error {
generalStartTime := time.Now()
extractedPath := config.GetExtractPath()
log.Printf("Filtering basics data...\n\n")
startTime := time.Now()
basics, err := dto.FilterBasics(extractedPath + "/title.basics.tsv")
if err != nil {
return err
}
log.Printf("Basics data filtered. Found %d records (%s)\n\n", len(basics), time.Since(startTime))
log.Printf("Inserting basics data...\n\n")
postgresURL, err := config.NewPostgresURL()
if err != nil {
return err
}
db, err := postgresql.NewDB(postgresURL)
if err != nil {
return err
}
imdbRepo := repository.NewIMDbRepository(db)
startTime = time.Now()
err = imdbRepo.InsertMultipleBasics(basics)
if err != nil {
return err
}
log.Printf("Basics data inserted. (%s)\n\n", time.Since(startTime))
log.Printf("Filtering principals data...\n\n")
tconsts, err := imdbRepo.GetAllTconsts()
if err != nil {
return err
}
if len(tconsts) == 0 {
return fmt.Errorf("no tconsts found")
}
startTime = time.Now()
principals, err := dto.FilterPrincipals(extractedPath+"/title.principals.tsv", tconsts)
if err != nil {
return err
}
log.Printf("Principals data filtered. (%s)\n\n", time.Since(startTime))
log.Printf("Inserting principals data...\n\n")
startTime = time.Now()
err = imdbRepo.UpdateMultiplePrincipals(principals)
if err != nil {
return err
}
log.Printf("Principals data inserted. (%s)\n\n", time.Since(startTime))
log.Printf("Filtering ratings data...\n\n")
startTime = time.Now()
ratings, err := dto.FilterRatings(extractedPath+"/title.ratings.tsv", tconsts)
if err != nil {
return err
}
log.Printf("Ratings data filtered. (%s)\n\n", time.Since(startTime))
log.Printf("Inserting ratings data...\n\n")
startTime = time.Now()
err = imdbRepo.UpdateMultipleRatings(ratings)
if err != nil {
return err
}
log.Printf("Ratings data inserted. (%s)\n\n", time.Since(startTime))
log.Printf("Filtering done! (%s)\n", time.Since(generalStartTime))
return nil
}

20
server/cmd/root.go Normal file
View File

@ -0,0 +1,20 @@
package cmd
import (
"github.com/spf13/cobra"
)
var rootCmd = &cobra.Command{
Use: "movier",
Short: "Movie Recommendation System",
Run: func(cmd *cobra.Command, args []string) {
cmd.Help()
},
}
func Execute() error {
rootCmd.AddCommand(getDownloadCmd())
rootCmd.AddCommand(getFilterCmd())
rootCmd.AddCommand(getServeCmd())
return rootCmd.Execute()
}

75
server/cmd/serve.go Normal file
View File

@ -0,0 +1,75 @@
package cmd
import (
"context"
"fmt"
"log"
"net/http"
"github.com/aykhans/movier/server/pkg/config"
"github.com/aykhans/movier/server/pkg/handlers"
"github.com/aykhans/movier/server/pkg/storage/postgresql"
"github.com/aykhans/movier/server/pkg/storage/postgresql/repository"
"github.com/spf13/cobra"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
func getServeCmd() *cobra.Command {
serveCmd := &cobra.Command{
Use: "serve",
Short: "Movie Recommendation Serve",
Run: func(cmd *cobra.Command, args []string) {
err := runServe()
if err != nil {
log.Fatalln(err)
}
fmt.Println("Movie Recommendation Serve")
},
}
return serveCmd
}
func runServe() error {
dbURL, err := config.NewPostgresURL()
if err != nil {
return err
}
db, err := postgresql.NewDB(dbURL)
defer db.Close(context.Background())
if err != nil {
return err
}
imdbRepo := repository.NewIMDbRepository(db)
grpcRecommenderServiceTarget, err := config.NewRecommenderServiceGrpcTarget()
if err != nil {
return err
}
conn, err := grpc.NewClient(
grpcRecommenderServiceTarget,
grpc.WithTransportCredentials(insecure.NewCredentials()),
)
if err != nil {
log.Fatalf("did not connect to grpc recommender service: %v", err)
}
defer conn.Close()
router := http.NewServeMux()
imdbHandler := handlers.NewIMDbHandler(*imdbRepo, conn, config.GetBaseURL())
router.HandleFunc("GET /ping", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(http.StatusOK)
w.Write([]byte("pong"))
})
router.HandleFunc("GET /", imdbHandler.HandlerHome)
router.HandleFunc("GET /recs", imdbHandler.HandlerGetRecommendations)
log.Printf("serving on port %d", config.ServePort)
err = http.ListenAndServe(fmt.Sprintf(":%d", config.ServePort), handlers.CORSMiddleware(router))
if err != nil {
return err
}
return nil
}

22
server/go.mod Normal file
View File

@ -0,0 +1,22 @@
module github.com/aykhans/movier/server
go 1.23.2
require (
github.com/jackc/pgx/v5 v5.7.1
github.com/spf13/cobra v1.8.1
google.golang.org/grpc v1.67.1
google.golang.org/protobuf v1.35.1
)
require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
github.com/spf13/pflag v1.0.5 // indirect
golang.org/x/crypto v0.27.0 // indirect
golang.org/x/net v0.28.0 // indirect
golang.org/x/sys v0.25.0 // indirect
golang.org/x/text v0.18.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 // indirect
)

48
server/go.sum Normal file
View File

@ -0,0 +1,48 @@
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
github.com/jackc/pgx/v5 v5.7.1 h1:x7SYsPBYDkHDksogeSmZZ5xzThcTgRz++I5E+ePFUcs=
github.com/jackc/pgx/v5 v5.7.1/go.mod h1:e7O26IywZZ+naJtWWos6i6fvWK+29etgITqrqHLfoZA=
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A=
golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70=
golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224=
golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 h1:e7S5W7MGGLaSu8j3YjdezkZ+m1/Nm0uRVRMEMGk26Xs=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E=
google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA=
google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA=
google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

23
server/main.go Normal file
View File

@ -0,0 +1,23 @@
package main
import (
"log"
"os"
"github.com/aykhans/movier/server/cmd"
"github.com/aykhans/movier/server/pkg/config"
)
func main() {
// log.SetFlags(log.LstdFlags | log.Lshortfile)
baseDir, err := os.Getwd()
if err != nil {
log.Fatal(err)
}
config.BaseDir = baseDir
err = cmd.Execute()
if err != nil {
log.Fatal(err)
}
}

105
server/pkg/config/config.go Normal file
View File

@ -0,0 +1,105 @@
package config
import (
"fmt"
"strconv"
"github.com/aykhans/movier/server/pkg/utils"
)
type DownloadConfig struct {
URL string
DownloadName string
ExtractName string
}
var DownloadConfigs = []DownloadConfig{
{
URL: "https://datasets.imdbws.com/title.basics.tsv.gz",
DownloadName: "title.basics.tsv.gz",
ExtractName: "title.basics.tsv",
},
{
URL: "https://datasets.imdbws.com/title.principals.tsv.gz",
DownloadName: "title.principals.tsv.gz",
ExtractName: "title.principals.tsv",
},
{
URL: "https://datasets.imdbws.com/title.ratings.tsv.gz",
DownloadName: "title.ratings.tsv.gz",
ExtractName: "title.ratings.tsv",
},
}
var BaseDir = "/"
func GetTemplatePath() string {
return BaseDir + "/pkg/templates"
}
func GetDownloadPath() string {
return BaseDir + "/data/raw"
}
func GetExtractPath() string {
return BaseDir + "/data/extracted"
}
const (
ServePort = 8080
)
var TitleTypes = []string{"movie", "tvMovie"}
var NconstCategories = []string{"actress", "actor", "director", "writer"}
func NewPostgresURL() (string, error) {
username := utils.GetEnv("POSTGRES_USER", "")
if username == "" {
return "", fmt.Errorf("POSTGRES_USER env variable is not set")
}
password := utils.GetEnv("POSTGRES_PASSWORD", "")
if password == "" {
return "", fmt.Errorf("POSTGRES_PASSWORD env variable is not set")
}
host := utils.GetEnv("POSTGRES_HOST", "")
if host == "" {
return "", fmt.Errorf("POSTGRES_HOST env variable is not set")
}
port := utils.GetEnv("POSTGRES_PORT", "")
if port == "" {
return "", fmt.Errorf("POSTGRES_PORT env variable is not set")
}
_, err := strconv.Atoi(port)
if err != nil {
return "", fmt.Errorf("POSTGRES_PORT env variable is not a number")
}
db := utils.GetEnv("POSTGRES_DB", "")
if db == "" {
return "", fmt.Errorf("POSTGRES_DB env variable is not set")
}
return fmt.Sprintf(
"postgres://%s:%s@%s:%s/%s?sslmode=disable",
username, password, host, port, db,
), nil
}
func NewRecommenderServiceGrpcTarget() (string, error) {
host := utils.GetEnv("RECOMMENDER_SERVICE_GRPC_HOST", "")
if host == "" {
return "", fmt.Errorf("RECOMMENDER_SERVICE_GRPC_HOST env variable is not set")
}
port := utils.GetEnv("RECOMMENDER_SERVICE_GRPC_PORT", "")
if port == "" {
return "", fmt.Errorf("RECOMMENDER_SERVICE_GRPC_PORT env variable is not set")
}
_, err := strconv.Atoi(port)
if err != nil {
return "", fmt.Errorf("RECOMMENDER_SERVICE_GRPC_PORT env variable is not a number")
}
return fmt.Sprintf("%s:%s", host, port), nil
}
func GetBaseURL() string {
return utils.GetEnv("BASE_URL", "http://localhost:8080")
}

View File

@ -0,0 +1,31 @@
package dto
import (
"io"
"net/http"
"os"
)
func DownloadAndExtractGz(url, downloadFilepath, extractFilepath string) error {
if err := Download(url, downloadFilepath); err != nil {
return err
}
return ExtractGzFile(downloadFilepath, extractFilepath)
}
func Download(url, filepath string) error {
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
out, err := os.Create(filepath)
if err != nil {
return err
}
defer out.Close()
_, err = io.Copy(out, resp.Body)
return err
}

30
server/pkg/dto/extract.go Normal file
View File

@ -0,0 +1,30 @@
package dto
import (
"compress/gzip"
"io"
"os"
)
func ExtractGzFile(gzFile, extractedFilepath string) error {
file, err := os.Open(gzFile)
if err != nil {
return err
}
defer file.Close()
gzReader, err := gzip.NewReader(file)
if err != nil {
return err
}
defer gzReader.Close()
outFile, err := os.Create(extractedFilepath)
if err != nil {
return err
}
defer outFile.Close()
_, err = io.Copy(outFile, gzReader)
return err
}

259
server/pkg/dto/filter.go Normal file
View File

@ -0,0 +1,259 @@
package dto
import (
"bufio"
"fmt"
"math"
"os"
"slices"
"strconv"
"strings"
"github.com/aykhans/movier/server/pkg/config"
)
func FilterBasics(filePath string) ([]Basic, error) {
file, err := os.Open(filePath)
if err != nil {
return nil, fmt.Errorf("could not open file: %v", err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
columnCount := 9
var headers []string
if scanner.Scan() {
headers = strings.Split(scanner.Text(), "\t")
if len(headers) != columnCount {
return nil, fmt.Errorf("expected %d column headers, found %d", columnCount, len(headers))
}
} else {
return nil, fmt.Errorf("could not read column headers: %v", scanner.Err())
}
var (
tconstIndex int = -1
titleTypeIndex int = -1
startYearIndex int = -1
genresIndex int = -1
)
for i, header := range headers {
switch header {
case "tconst":
tconstIndex = i
case "titleType":
titleTypeIndex = i
case "startYear":
startYearIndex = i
case "genres":
genresIndex = i
}
}
switch {
case tconstIndex == -1:
return nil, fmt.Errorf("column %s not found", "`tconst`")
case titleTypeIndex == -1:
return nil, fmt.Errorf("column %s not found", "`titleType`")
case startYearIndex == -1:
return nil, fmt.Errorf("column %s not found", "`startYear`")
case genresIndex == -1:
return nil, fmt.Errorf("column %s not found", "`genres`")
}
var basics []Basic
for scanner.Scan() {
line := scanner.Text()
columns := strings.Split(line, "\t")
if len(columns) != columnCount {
fmt.Println("Columns are:", columns)
return nil, fmt.Errorf("expected %d columns, found %d", columnCount, len(columns))
}
if slices.Contains(config.TitleTypes, columns[titleTypeIndex]) {
var startYearUint16 uint16
startYear, err := strconv.Atoi(columns[startYearIndex])
if err != nil {
startYearUint16 = 0
} else {
startYearUint16 = uint16(startYear)
}
var genres string
if columns[genresIndex] == "\\N" {
genres = ""
} else {
genres = strings.ReplaceAll(strings.ToLower(columns[genresIndex]), " ", "")
}
basics = append(basics, Basic{
Tconst: columns[tconstIndex],
StartYear: startYearUint16,
Genres: genres,
})
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
return basics, nil
}
func FilterPrincipals(filePath string, tconsts []string) ([]Principal, error) {
file, err := os.Open(filePath)
if err != nil {
return nil, fmt.Errorf("could not open file: %v", err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
columnCount := 6
var headers []string
if scanner.Scan() {
headers = strings.Split(scanner.Text(), "\t")
if len(headers) != columnCount {
return nil, fmt.Errorf("expected %d column headers, found %d", columnCount, len(headers))
}
} else {
return nil, fmt.Errorf("could not read column headers: %v", scanner.Err())
}
var (
tconstIndex int = -1
nconstIndex int = -1
categoryIndex int = -1
)
for i, header := range headers {
switch header {
case "tconst":
tconstIndex = i
case "nconst":
nconstIndex = i
case "category":
categoryIndex = i
}
}
switch {
case tconstIndex == -1:
return nil, fmt.Errorf("column %s not found", "`tconst`")
case nconstIndex == -1:
return nil, fmt.Errorf("column %s not found", "`nconst`")
case categoryIndex == -1:
return nil, fmt.Errorf("column %s not found", "`category`")
}
tconstMap := make(map[string][]string)
for _, tconst := range tconsts {
tconstMap[tconst] = []string{}
}
for scanner.Scan() {
line := scanner.Text()
columns := strings.Split(line, "\t")
if len(columns) != columnCount {
fmt.Println("Columns are:", columns)
return nil, fmt.Errorf("expected %d columns, found %d", columnCount, len(columns))
}
if slices.Contains(config.NconstCategories, columns[categoryIndex]) {
if _, ok := tconstMap[columns[tconstIndex]]; ok {
tconstMap[columns[tconstIndex]] = append(tconstMap[columns[tconstIndex]], columns[nconstIndex])
}
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
var principals []Principal
for tconst, nconsts := range tconstMap {
principals = append(principals, Principal{
Tconst: tconst,
Nconsts: strings.Join(nconsts, ","),
})
}
return principals, nil
}
func FilterRatings(filePath string, tconsts []string) ([]Ratings, error) {
file, err := os.Open(filePath)
if err != nil {
return nil, fmt.Errorf("could not open file: %v", err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
columnCount := 3
var headers []string
if scanner.Scan() {
headers = strings.Split(scanner.Text(), "\t")
if len(headers) != columnCount {
return nil, fmt.Errorf("expected %d column headers, found %d", columnCount, len(headers))
}
} else {
return nil, fmt.Errorf("could not read column headers: %v", scanner.Err())
}
var (
tconstIndex int = -1
averageRatingIndex int = -1
numVotesIndex int = -1
)
for i, header := range headers {
switch header {
case "tconst":
tconstIndex = i
case "averageRating":
averageRatingIndex = i
case "numVotes":
numVotesIndex = i
}
}
switch {
case tconstIndex == -1:
return nil, fmt.Errorf("column %s not found", "`tconst`")
case averageRatingIndex == -1:
return nil, fmt.Errorf("column %s not found", "`averageRating`")
case numVotesIndex == -1:
return nil, fmt.Errorf("column %s not found", "`numVotes`")
}
tconstMap := make(map[string][]string)
for _, tconst := range tconsts {
tconstMap[tconst] = []string{}
}
var ratings []Ratings
for scanner.Scan() {
line := scanner.Text()
columns := strings.Split(line, "\t")
if len(columns) != columnCount {
fmt.Println("Columns are:", columns)
return nil, fmt.Errorf("expected %d columns, found %d", columnCount, len(columns))
}
if _, ok := tconstMap[columns[tconstIndex]]; ok {
rating, err := strconv.ParseFloat(columns[averageRatingIndex], 32)
if err != nil {
rating = 0
}
votes, err := strconv.Atoi(columns[numVotesIndex])
if err != nil {
votes = 0
}
ratings = append(ratings, Ratings{
Tconst: columns[tconstIndex],
Rating: math.Round(rating*10) / 10,
Votes: votes,
})
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
return ratings, nil
}

27
server/pkg/dto/models.go Normal file
View File

@ -0,0 +1,27 @@
package dto
type Basic struct {
Tconst string `json:"tconst"`
StartYear uint16 `json:"startYear"`
Genres string `json:"genres"`
}
type Principal struct {
Tconst string `json:"tconst"`
Nconsts string `json:"nconsts"`
}
type Ratings struct {
Tconst string `json:"tconst"`
Rating float64 `json:"rating"`
Votes int `json:"votes"`
}
type MinMax struct {
MinVotes uint `json:"minVotes"`
MaxVotes uint `json:"maxVotes"`
MinYear uint `json:"minYear"`
MaxYear uint `json:"maxYear"`
MinRating float64 `json:"minRating"`
MaxRating float64 `json:"maxRating"`
}

58
server/pkg/dto/vector.go Normal file
View File

@ -0,0 +1,58 @@
package dto
import (
"fmt"
"math"
)
type CountVectorizer struct {
WordIndex map[string]int
}
func NewCountVectorizer() *CountVectorizer {
return &CountVectorizer{}
}
func (cv *CountVectorizer) SetWordIndexes(docs [][]string) {
cv.WordIndex = make(map[string]int)
index := 0
for _, doc := range docs {
for _, word := range doc {
if word == "" {
continue
}
if _, exists := cv.WordIndex[word]; !exists {
cv.WordIndex[word] = index
index++
}
}
}
}
func (cv *CountVectorizer) Vectorize(doc []string) []uint8 {
vector := make([]uint8, len(cv.WordIndex))
for _, word := range doc {
vector[cv.WordIndex[word]]++
}
return vector
}
func CosineSimilarity(a, b []uint8) (float32, error) {
if len(a) != len(b) {
return 0, fmt.Errorf("slices must have the same length")
}
var dotProduct, normA, normB float64
for i := 0; i < len(a); i++ {
x := float64(a[i])
y := float64(b[i])
dotProduct += x * y
normA += x * x
normB += y * y
}
if normA == 0 || normB == 0 {
return 0, nil
}
return float32(dotProduct / (math.Sqrt(normA) * math.Sqrt(normB))), nil
}

301
server/pkg/handlers/imdb.go Normal file
View File

@ -0,0 +1,301 @@
package handlers
import (
"fmt"
"log"
"net/http"
"strconv"
"strings"
"github.com/aykhans/movier/server/pkg/dto"
"github.com/aykhans/movier/server/pkg/proto"
"github.com/aykhans/movier/server/pkg/storage/postgresql/repository"
"github.com/aykhans/movier/server/pkg/utils"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
type IMDbHandler struct {
imdbRepo repository.IMDbRepository
grpcRecommenderService *grpc.ClientConn
baseURL string
}
func NewIMDbHandler(imdbRepo repository.IMDbRepository, grpcRecommenderService *grpc.ClientConn, baseURL string) *IMDbHandler {
return &IMDbHandler{
imdbRepo: imdbRepo,
grpcRecommenderService: grpcRecommenderService,
baseURL: baseURL,
}
}
func (h *IMDbHandler) HandlerGetRecommendations(w http.ResponseWriter, r *http.Request) {
query := r.URL.Query()
tconstsQ := query["tconst"]
tconstsLen := len(tconstsQ)
if tconstsLen < 1 || tconstsLen > 5 {
RespondWithJSON(w, ErrorResponse{Error: "tconsts should be between 1 and 5"}, http.StatusBadRequest)
return
}
uniqueTconsts := make(map[string]struct{})
for _, str := range tconstsQ {
uniqueTconsts[str] = struct{}{}
}
invalidTconsts := []string{}
tconsts := []string{}
for tconst := range uniqueTconsts {
tconstLength := len(tconst)
if 9 > tconstLength || tconstLength > 12 || !strings.HasPrefix(tconst, "tt") {
invalidTconsts = append(invalidTconsts, tconst)
}
tconsts = append(tconsts, tconst)
}
if len(invalidTconsts) > 0 {
RespondWithJSON(
w,
ErrorResponse{
Error: fmt.Sprintf("Invalid tconsts: %s", strings.Join(invalidTconsts, ", ")),
},
http.StatusBadRequest,
)
return
}
n := 5
nQuery := query.Get("n")
if nQuery != "" {
nInt, err := strconv.Atoi(nQuery)
if err != nil {
RespondWithJSON(w, ErrorResponse{Error: "n should be an integer"}, http.StatusBadRequest)
return
}
if nInt < 1 || nInt > 20 {
RespondWithJSON(w, ErrorResponse{Error: "n should be greater than 0 and less than 21"}, http.StatusBadRequest)
return
}
n = nInt
}
filter := &proto.Filter{}
minVotesQ := query.Get("min_votes")
if minVotesQ != "" {
minVotesInt, err := strconv.Atoi(minVotesQ)
if err != nil {
RespondWithJSON(w, ErrorResponse{Error: "min_votes should be an integer"}, http.StatusBadRequest)
return
}
if !utils.IsUint32(minVotesInt) {
RespondWithJSON(w, ErrorResponse{Error: "min_votes should be greater than or equal to 0 and less than or equal to 4294967295"}, http.StatusBadRequest)
return
}
filter.MinVotesOneof = &proto.Filter_MinVotes{MinVotes: uint32(minVotesInt)}
}
maxVotesQ := query.Get("max_votes")
if maxVotesQ != "" {
maxVotesInt, err := strconv.Atoi(maxVotesQ)
if err != nil {
RespondWithJSON(w, ErrorResponse{Error: "max_votes should be an integer"}, http.StatusBadRequest)
return
}
if !utils.IsUint32(maxVotesInt) {
RespondWithJSON(w, ErrorResponse{Error: "max_votes should be greater than 0 or equal to and less than or equal to 4294967295"}, http.StatusBadRequest)
return
}
if uint32(maxVotesInt) < filter.GetMinVotes() {
RespondWithJSON(w, ErrorResponse{Error: "max_votes should be greater than min_votes"}, http.StatusBadRequest)
return
}
filter.MaxVotesOneof = &proto.Filter_MaxVotes{MaxVotes: uint32(maxVotesInt)}
}
minRatingQ := query.Get("min_rating")
if minRatingQ != "" {
minRatingFloat, err := strconv.ParseFloat(minRatingQ, 32)
if err != nil {
RespondWithJSON(w, ErrorResponse{Error: "min_rating should be a float"}, http.StatusBadRequest)
return
}
if minRatingFloat < 0 || minRatingFloat > 10 {
RespondWithJSON(w, ErrorResponse{Error: "min_rating should be greater than or equal to 0.0 and less than equal to 10.0"}, http.StatusBadRequest)
return
}
filter.MinRatingOneof = &proto.Filter_MinRating{MinRating: float32(minRatingFloat)}
}
maxRatingQ := query.Get("max_rating")
if maxRatingQ != "" {
maxRatingFloat, err := strconv.ParseFloat(maxRatingQ, 32)
if err != nil {
RespondWithJSON(w, ErrorResponse{Error: "max_rating should be a float"}, http.StatusBadRequest)
return
}
if maxRatingFloat < 0 || maxRatingFloat > 10 {
RespondWithJSON(w, ErrorResponse{Error: "max_rating should be greater than or equal to 0.0 and less than or equal to 10.0"}, http.StatusBadRequest)
return
}
if float32(maxRatingFloat) < filter.GetMinRating() {
RespondWithJSON(w, ErrorResponse{Error: "max_rating should be greater than min_rating"}, http.StatusBadRequest)
return
}
filter.MaxRatingOneof = &proto.Filter_MaxRating{MaxRating: float32(maxRatingFloat)}
}
minYearQ := query.Get("min_year")
if minYearQ != "" {
minYearInt, err := strconv.Atoi(minYearQ)
if err != nil {
RespondWithJSON(w, ErrorResponse{Error: "min_year should be an integer"}, http.StatusBadRequest)
return
}
if !utils.IsUint32(minYearInt) {
RespondWithJSON(w, ErrorResponse{Error: "min_year should be greater than or equal to 0 and less than or equal to 4294967295"}, http.StatusBadRequest)
return
}
filter.MinYearOneof = &proto.Filter_MinYear{MinYear: uint32(minYearInt)}
}
maxYearQ := query.Get("max_year")
if maxYearQ != "" {
maxYearInt, err := strconv.Atoi(maxYearQ)
if err != nil {
RespondWithJSON(w, ErrorResponse{Error: "max_year should be an integer"}, http.StatusBadRequest)
return
}
if !utils.IsUint32(maxYearInt) {
RespondWithJSON(w, ErrorResponse{Error: "max_year should be greater than or equal to 0 and less than or equal to 4294967295"}, http.StatusBadRequest)
return
}
if uint32(maxYearInt) < filter.GetMinYear() {
RespondWithJSON(w, ErrorResponse{Error: "max_year should be greater than min_year"}, http.StatusBadRequest)
return
}
filter.MaxYearOneof = &proto.Filter_MaxYear{MaxYear: uint32(maxYearInt)}
}
yearWeightQ := query.Get("year_weight")
ratingWeightQ := query.Get("rating_weight")
genresWeightQ := query.Get("genres_weight")
nconstsWeightQ := query.Get("nconsts_weight")
weight := &proto.Weight{}
features := []string{}
totalSum := 0
if yearWeightQ != "" {
yearWeight, err := strconv.Atoi(yearWeightQ)
if err != nil {
RespondWithJSON(w, ErrorResponse{Error: "year_weight should be an integer"}, http.StatusBadRequest)
return
}
if yearWeight < 0 || yearWeight > 400 {
RespondWithJSON(w, ErrorResponse{Error: "year_weight should be greater than or equal to 0 and less than or equal to 400"}, http.StatusBadRequest)
return
}
weight.Year = uint32(yearWeight)
totalSum += yearWeight
features = append(features, "year")
}
if ratingWeightQ != "" {
ratingWeight, err := strconv.Atoi(ratingWeightQ)
if err != nil {
RespondWithJSON(w, ErrorResponse{Error: "rating_weight should be an integer"}, http.StatusBadRequest)
return
}
if ratingWeight < 0 || ratingWeight > 400 {
RespondWithJSON(w, ErrorResponse{Error: "rating_weight should be greater than or equal to 0 and less than or equal to 400"}, http.StatusBadRequest)
return
}
weight.Rating = uint32(ratingWeight)
totalSum += ratingWeight
features = append(features, "rating")
}
if genresWeightQ != "" {
genresWeight, err := strconv.Atoi(genresWeightQ)
if err != nil {
RespondWithJSON(w, ErrorResponse{Error: "genres_weight should be an integer"}, http.StatusBadRequest)
return
}
if genresWeight < 0 || genresWeight > 400 {
RespondWithJSON(w, ErrorResponse{Error: "genres_weight should be greater than or equal to 0 and less than or equal to 400"}, http.StatusBadRequest)
return
}
weight.Genres = uint32(genresWeight)
totalSum += genresWeight
features = append(features, "genres")
}
if nconstsWeightQ != "" {
nconstsWeight, err := strconv.Atoi(nconstsWeightQ)
if err != nil {
RespondWithJSON(w, ErrorResponse{Error: "nconsts_weight should be an integer"}, http.StatusBadRequest)
return
}
if nconstsWeight < 0 || nconstsWeight > 400 {
RespondWithJSON(w, ErrorResponse{Error: "nconsts_weight should be greater than or equal to 0 and less than or equal to 400"}, http.StatusBadRequest)
return
}
weight.Nconsts = uint32(nconstsWeight)
totalSum += nconstsWeight
features = append(features, "nconsts")
}
featuresLen := len(features)
if featuresLen < 1 {
RespondWithJSON(w, ErrorResponse{Error: "At least one feature should be selected"}, http.StatusBadRequest)
return
}
if featuresLen*100 != totalSum {
RespondWithJSON(w, ErrorResponse{Error: fmt.Sprintf("Sum of the %d features should be equal to %d", featuresLen, featuresLen*100)}, http.StatusBadRequest)
return
}
client := proto.NewRecommenderClient(h.grpcRecommenderService)
response, err := client.GetRecommendations(r.Context(), &proto.Request{
Tconsts: tconsts,
N: uint32(n),
Filter: filter,
Weight: weight,
})
if err != nil {
if st, ok := status.FromError(err); ok {
switch st.Code() {
case codes.InvalidArgument:
RespondWithJSON(w, ErrorResponse{Error: st.Message()}, http.StatusBadRequest)
case codes.NotFound:
RespondWithJSON(w, ErrorResponse{Error: st.Message()}, http.StatusNotFound)
case codes.Internal:
RespondWithServerError(w)
default:
fmt.Println(err)
RespondWithServerError(w)
}
return
}
RespondWithServerError(w)
return
}
RespondWithJSON(w, response.Movies, http.StatusOK)
}
func (h *IMDbHandler) HandlerHome(w http.ResponseWriter, r *http.Request) {
minMax, err := h.imdbRepo.GetMinMax()
if err != nil {
log.Printf("error getting min max: %v", err)
RespondWithServerError(w)
return
}
RespondWithHTML(
w, "index.html",
struct {
MinMax dto.MinMax
BaseURL string
}{*minMax, h.baseURL},
http.StatusOK,
)
}

View File

@ -0,0 +1,18 @@
package handlers
import "net/http"
func CORSMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Access-Control-Allow-Origin", "*")
w.Header().Set("Access-Control-Allow-Methods", "GET, OPTIONS")
w.Header().Set("Access-Control-Allow-Headers", "Content-Type")
if r.Method == http.MethodOptions {
w.WriteHeader(http.StatusOK)
return
}
next.ServeHTTP(w, r)
})
}

View File

@ -0,0 +1,64 @@
package handlers
import (
"encoding/json"
"fmt"
"html/template"
"log"
"net/http"
"strings"
"github.com/aykhans/movier/server/pkg/config"
)
type ErrorResponse struct {
Error string `json:"error"`
}
func RespondWithServerError(w http.ResponseWriter) {
http.Error(w, "Internal server error", http.StatusInternalServerError)
}
func RespondWithJSON(w http.ResponseWriter, data any, statusCode int) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(statusCode)
if err := json.NewEncoder(w).Encode(data); err != nil {
log.Printf("error encoding response: %v", err)
RespondWithServerError(w)
}
}
func formatNumber(n uint) string {
s := fmt.Sprintf("%d", n)
var result strings.Builder
length := len(s)
for i, digit := range s {
if i > 0 && (length-i)%3 == 0 {
result.WriteString(",")
}
result.WriteRune(digit)
}
return result.String()
}
func RespondWithHTML(w http.ResponseWriter, templateName string, data any, statusCode int) {
w.Header().Set("Content-Type", "text/html")
w.WriteHeader(statusCode)
funcMap := template.FuncMap{
"formatNumber": formatNumber,
}
t, err := template.New(templateName).Funcs(funcMap).ParseFiles(config.GetTemplatePath() + "/" + templateName)
if err != nil {
log.Printf("error parsing template: %v", err)
RespondWithServerError(w)
return
}
err = t.Execute(w, data)
if err != nil {
log.Printf("error executing template: %v", err)
RespondWithServerError(w)
}
}

View File

@ -0,0 +1,587 @@
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.35.1
// protoc v5.28.3
// source: recommender.proto
package proto
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
type Filter struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
// Types that are assignable to MinVotesOneof:
//
// *Filter_MinVotes
MinVotesOneof isFilter_MinVotesOneof `protobuf_oneof:"min_votes_oneof"`
// Types that are assignable to MaxVotesOneof:
//
// *Filter_MaxVotes
MaxVotesOneof isFilter_MaxVotesOneof `protobuf_oneof:"max_votes_oneof"`
// Types that are assignable to MinYearOneof:
//
// *Filter_MinYear
MinYearOneof isFilter_MinYearOneof `protobuf_oneof:"min_year_oneof"`
// Types that are assignable to MaxYearOneof:
//
// *Filter_MaxYear
MaxYearOneof isFilter_MaxYearOneof `protobuf_oneof:"max_year_oneof"`
// Types that are assignable to MinRatingOneof:
//
// *Filter_MinRating
MinRatingOneof isFilter_MinRatingOneof `protobuf_oneof:"min_rating_oneof"`
// Types that are assignable to MaxRatingOneof:
//
// *Filter_MaxRating
MaxRatingOneof isFilter_MaxRatingOneof `protobuf_oneof:"max_rating_oneof"`
}
func (x *Filter) Reset() {
*x = Filter{}
mi := &file_recommender_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *Filter) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*Filter) ProtoMessage() {}
func (x *Filter) ProtoReflect() protoreflect.Message {
mi := &file_recommender_proto_msgTypes[0]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use Filter.ProtoReflect.Descriptor instead.
func (*Filter) Descriptor() ([]byte, []int) {
return file_recommender_proto_rawDescGZIP(), []int{0}
}
func (m *Filter) GetMinVotesOneof() isFilter_MinVotesOneof {
if m != nil {
return m.MinVotesOneof
}
return nil
}
func (x *Filter) GetMinVotes() uint32 {
if x, ok := x.GetMinVotesOneof().(*Filter_MinVotes); ok {
return x.MinVotes
}
return 0
}
func (m *Filter) GetMaxVotesOneof() isFilter_MaxVotesOneof {
if m != nil {
return m.MaxVotesOneof
}
return nil
}
func (x *Filter) GetMaxVotes() uint32 {
if x, ok := x.GetMaxVotesOneof().(*Filter_MaxVotes); ok {
return x.MaxVotes
}
return 0
}
func (m *Filter) GetMinYearOneof() isFilter_MinYearOneof {
if m != nil {
return m.MinYearOneof
}
return nil
}
func (x *Filter) GetMinYear() uint32 {
if x, ok := x.GetMinYearOneof().(*Filter_MinYear); ok {
return x.MinYear
}
return 0
}
func (m *Filter) GetMaxYearOneof() isFilter_MaxYearOneof {
if m != nil {
return m.MaxYearOneof
}
return nil
}
func (x *Filter) GetMaxYear() uint32 {
if x, ok := x.GetMaxYearOneof().(*Filter_MaxYear); ok {
return x.MaxYear
}
return 0
}
func (m *Filter) GetMinRatingOneof() isFilter_MinRatingOneof {
if m != nil {
return m.MinRatingOneof
}
return nil
}
func (x *Filter) GetMinRating() float32 {
if x, ok := x.GetMinRatingOneof().(*Filter_MinRating); ok {
return x.MinRating
}
return 0
}
func (m *Filter) GetMaxRatingOneof() isFilter_MaxRatingOneof {
if m != nil {
return m.MaxRatingOneof
}
return nil
}
func (x *Filter) GetMaxRating() float32 {
if x, ok := x.GetMaxRatingOneof().(*Filter_MaxRating); ok {
return x.MaxRating
}
return 0
}
type isFilter_MinVotesOneof interface {
isFilter_MinVotesOneof()
}
type Filter_MinVotes struct {
MinVotes uint32 `protobuf:"varint,1,opt,name=min_votes,json=minVotes,proto3,oneof"`
}
func (*Filter_MinVotes) isFilter_MinVotesOneof() {}
type isFilter_MaxVotesOneof interface {
isFilter_MaxVotesOneof()
}
type Filter_MaxVotes struct {
MaxVotes uint32 `protobuf:"varint,2,opt,name=max_votes,json=maxVotes,proto3,oneof"`
}
func (*Filter_MaxVotes) isFilter_MaxVotesOneof() {}
type isFilter_MinYearOneof interface {
isFilter_MinYearOneof()
}
type Filter_MinYear struct {
MinYear uint32 `protobuf:"varint,3,opt,name=min_year,json=minYear,proto3,oneof"`
}
func (*Filter_MinYear) isFilter_MinYearOneof() {}
type isFilter_MaxYearOneof interface {
isFilter_MaxYearOneof()
}
type Filter_MaxYear struct {
MaxYear uint32 `protobuf:"varint,4,opt,name=max_year,json=maxYear,proto3,oneof"`
}
func (*Filter_MaxYear) isFilter_MaxYearOneof() {}
type isFilter_MinRatingOneof interface {
isFilter_MinRatingOneof()
}
type Filter_MinRating struct {
MinRating float32 `protobuf:"fixed32,5,opt,name=min_rating,json=minRating,proto3,oneof"`
}
func (*Filter_MinRating) isFilter_MinRatingOneof() {}
type isFilter_MaxRatingOneof interface {
isFilter_MaxRatingOneof()
}
type Filter_MaxRating struct {
MaxRating float32 `protobuf:"fixed32,6,opt,name=max_rating,json=maxRating,proto3,oneof"`
}
func (*Filter_MaxRating) isFilter_MaxRatingOneof() {}
type Weight struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
Year uint32 `protobuf:"varint,1,opt,name=year,proto3" json:"year,omitempty"`
Rating uint32 `protobuf:"varint,2,opt,name=rating,proto3" json:"rating,omitempty"`
Genres uint32 `protobuf:"varint,3,opt,name=genres,proto3" json:"genres,omitempty"`
Nconsts uint32 `protobuf:"varint,4,opt,name=nconsts,proto3" json:"nconsts,omitempty"`
}
func (x *Weight) Reset() {
*x = Weight{}
mi := &file_recommender_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *Weight) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*Weight) ProtoMessage() {}
func (x *Weight) ProtoReflect() protoreflect.Message {
mi := &file_recommender_proto_msgTypes[1]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use Weight.ProtoReflect.Descriptor instead.
func (*Weight) Descriptor() ([]byte, []int) {
return file_recommender_proto_rawDescGZIP(), []int{1}
}
func (x *Weight) GetYear() uint32 {
if x != nil {
return x.Year
}
return 0
}
func (x *Weight) GetRating() uint32 {
if x != nil {
return x.Rating
}
return 0
}
func (x *Weight) GetGenres() uint32 {
if x != nil {
return x.Genres
}
return 0
}
func (x *Weight) GetNconsts() uint32 {
if x != nil {
return x.Nconsts
}
return 0
}
type Request struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
Tconsts []string `protobuf:"bytes,1,rep,name=tconsts,proto3" json:"tconsts,omitempty"`
N uint32 `protobuf:"varint,2,opt,name=n,proto3" json:"n,omitempty"`
Filter *Filter `protobuf:"bytes,3,opt,name=filter,proto3" json:"filter,omitempty"`
Weight *Weight `protobuf:"bytes,4,opt,name=weight,proto3" json:"weight,omitempty"`
}
func (x *Request) Reset() {
*x = Request{}
mi := &file_recommender_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *Request) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*Request) ProtoMessage() {}
func (x *Request) ProtoReflect() protoreflect.Message {
mi := &file_recommender_proto_msgTypes[2]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use Request.ProtoReflect.Descriptor instead.
func (*Request) Descriptor() ([]byte, []int) {
return file_recommender_proto_rawDescGZIP(), []int{2}
}
func (x *Request) GetTconsts() []string {
if x != nil {
return x.Tconsts
}
return nil
}
func (x *Request) GetN() uint32 {
if x != nil {
return x.N
}
return 0
}
func (x *Request) GetFilter() *Filter {
if x != nil {
return x.Filter
}
return nil
}
func (x *Request) GetWeight() *Weight {
if x != nil {
return x.Weight
}
return nil
}
type Response struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
Movies []*RecommendedMovie `protobuf:"bytes,1,rep,name=movies,proto3" json:"movies,omitempty"`
}
func (x *Response) Reset() {
*x = Response{}
mi := &file_recommender_proto_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *Response) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*Response) ProtoMessage() {}
func (x *Response) ProtoReflect() protoreflect.Message {
mi := &file_recommender_proto_msgTypes[3]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use Response.ProtoReflect.Descriptor instead.
func (*Response) Descriptor() ([]byte, []int) {
return file_recommender_proto_rawDescGZIP(), []int{3}
}
func (x *Response) GetMovies() []*RecommendedMovie {
if x != nil {
return x.Movies
}
return nil
}
type RecommendedMovie struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
Tconst string `protobuf:"bytes,1,opt,name=tconst,proto3" json:"tconst,omitempty"`
Weights []string `protobuf:"bytes,2,rep,name=weights,proto3" json:"weights,omitempty"`
}
func (x *RecommendedMovie) Reset() {
*x = RecommendedMovie{}
mi := &file_recommender_proto_msgTypes[4]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *RecommendedMovie) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*RecommendedMovie) ProtoMessage() {}
func (x *RecommendedMovie) ProtoReflect() protoreflect.Message {
mi := &file_recommender_proto_msgTypes[4]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use RecommendedMovie.ProtoReflect.Descriptor instead.
func (*RecommendedMovie) Descriptor() ([]byte, []int) {
return file_recommender_proto_rawDescGZIP(), []int{4}
}
func (x *RecommendedMovie) GetTconst() string {
if x != nil {
return x.Tconst
}
return ""
}
func (x *RecommendedMovie) GetWeights() []string {
if x != nil {
return x.Weights
}
return nil
}
var File_recommender_proto protoreflect.FileDescriptor
var file_recommender_proto_rawDesc = []byte{
0x0a, 0x11, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x2e, 0x70, 0x72,
0x6f, 0x74, 0x6f, 0x12, 0x0b, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x65, 0x72,
0x22, 0xb4, 0x02, 0x0a, 0x06, 0x46, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x12, 0x1d, 0x0a, 0x09, 0x6d,
0x69, 0x6e, 0x5f, 0x76, 0x6f, 0x74, 0x65, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x48, 0x00,
0x52, 0x08, 0x6d, 0x69, 0x6e, 0x56, 0x6f, 0x74, 0x65, 0x73, 0x12, 0x1d, 0x0a, 0x09, 0x6d, 0x61,
0x78, 0x5f, 0x76, 0x6f, 0x74, 0x65, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x48, 0x01, 0x52,
0x08, 0x6d, 0x61, 0x78, 0x56, 0x6f, 0x74, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x08, 0x6d, 0x69, 0x6e,
0x5f, 0x79, 0x65, 0x61, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x48, 0x02, 0x52, 0x07, 0x6d,
0x69, 0x6e, 0x59, 0x65, 0x61, 0x72, 0x12, 0x1b, 0x0a, 0x08, 0x6d, 0x61, 0x78, 0x5f, 0x79, 0x65,
0x61, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x48, 0x03, 0x52, 0x07, 0x6d, 0x61, 0x78, 0x59,
0x65, 0x61, 0x72, 0x12, 0x1f, 0x0a, 0x0a, 0x6d, 0x69, 0x6e, 0x5f, 0x72, 0x61, 0x74, 0x69, 0x6e,
0x67, 0x18, 0x05, 0x20, 0x01, 0x28, 0x02, 0x48, 0x04, 0x52, 0x09, 0x6d, 0x69, 0x6e, 0x52, 0x61,
0x74, 0x69, 0x6e, 0x67, 0x12, 0x1f, 0x0a, 0x0a, 0x6d, 0x61, 0x78, 0x5f, 0x72, 0x61, 0x74, 0x69,
0x6e, 0x67, 0x18, 0x06, 0x20, 0x01, 0x28, 0x02, 0x48, 0x05, 0x52, 0x09, 0x6d, 0x61, 0x78, 0x52,
0x61, 0x74, 0x69, 0x6e, 0x67, 0x42, 0x11, 0x0a, 0x0f, 0x6d, 0x69, 0x6e, 0x5f, 0x76, 0x6f, 0x74,
0x65, 0x73, 0x5f, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x42, 0x11, 0x0a, 0x0f, 0x6d, 0x61, 0x78, 0x5f,
0x76, 0x6f, 0x74, 0x65, 0x73, 0x5f, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x42, 0x10, 0x0a, 0x0e, 0x6d,
0x69, 0x6e, 0x5f, 0x79, 0x65, 0x61, 0x72, 0x5f, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x42, 0x10, 0x0a,
0x0e, 0x6d, 0x61, 0x78, 0x5f, 0x79, 0x65, 0x61, 0x72, 0x5f, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x42,
0x12, 0x0a, 0x10, 0x6d, 0x69, 0x6e, 0x5f, 0x72, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x5f, 0x6f, 0x6e,
0x65, 0x6f, 0x66, 0x42, 0x12, 0x0a, 0x10, 0x6d, 0x61, 0x78, 0x5f, 0x72, 0x61, 0x74, 0x69, 0x6e,
0x67, 0x5f, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x22, 0x66, 0x0a, 0x06, 0x57, 0x65, 0x69, 0x67, 0x68,
0x74, 0x12, 0x12, 0x0a, 0x04, 0x79, 0x65, 0x61, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52,
0x04, 0x79, 0x65, 0x61, 0x72, 0x12, 0x16, 0x0a, 0x06, 0x72, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x18,
0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x72, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x12, 0x16, 0x0a,
0x06, 0x67, 0x65, 0x6e, 0x72, 0x65, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x67,
0x65, 0x6e, 0x72, 0x65, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x73,
0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x73, 0x22,
0x8b, 0x01, 0x0a, 0x07, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x74,
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x74, 0x63,
0x6f, 0x6e, 0x73, 0x74, 0x73, 0x12, 0x0c, 0x0a, 0x01, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d,
0x52, 0x01, 0x6e, 0x12, 0x2b, 0x0a, 0x06, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x18, 0x03, 0x20,
0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x65,
0x72, 0x2e, 0x46, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x52, 0x06, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72,
0x12, 0x2b, 0x0a, 0x06, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b,
0x32, 0x13, 0x2e, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x2e, 0x57,
0x65, 0x69, 0x67, 0x68, 0x74, 0x52, 0x06, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x22, 0x41, 0x0a,
0x08, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x35, 0x0a, 0x06, 0x6d, 0x6f, 0x76,
0x69, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x72, 0x65, 0x63, 0x6f,
0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x2e, 0x52, 0x65, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e,
0x64, 0x65, 0x64, 0x4d, 0x6f, 0x76, 0x69, 0x65, 0x52, 0x06, 0x6d, 0x6f, 0x76, 0x69, 0x65, 0x73,
0x22, 0x44, 0x0a, 0x10, 0x52, 0x65, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x65, 0x64, 0x4d,
0x6f, 0x76, 0x69, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x18, 0x01,
0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x74, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x12, 0x18, 0x0a, 0x07,
0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x77,
0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x32, 0x52, 0x0a, 0x0b, 0x52, 0x65, 0x63, 0x6f, 0x6d, 0x6d,
0x65, 0x6e, 0x64, 0x65, 0x72, 0x12, 0x43, 0x0a, 0x12, 0x47, 0x65, 0x74, 0x52, 0x65, 0x63, 0x6f,
0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x2e, 0x72, 0x65,
0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x2e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73,
0x74, 0x1a, 0x15, 0x2e, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x2e,
0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x2c, 0x5a, 0x2a, 0x67, 0x69,
0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x61, 0x79, 0x6b, 0x68, 0x61, 0x6e, 0x73,
0x2f, 0x6d, 0x6f, 0x76, 0x69, 0x65, 0x72, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2f, 0x70,
0x6b, 0x67, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
}
var (
file_recommender_proto_rawDescOnce sync.Once
file_recommender_proto_rawDescData = file_recommender_proto_rawDesc
)
func file_recommender_proto_rawDescGZIP() []byte {
file_recommender_proto_rawDescOnce.Do(func() {
file_recommender_proto_rawDescData = protoimpl.X.CompressGZIP(file_recommender_proto_rawDescData)
})
return file_recommender_proto_rawDescData
}
var file_recommender_proto_msgTypes = make([]protoimpl.MessageInfo, 5)
var file_recommender_proto_goTypes = []any{
(*Filter)(nil), // 0: recommender.Filter
(*Weight)(nil), // 1: recommender.Weight
(*Request)(nil), // 2: recommender.Request
(*Response)(nil), // 3: recommender.Response
(*RecommendedMovie)(nil), // 4: recommender.RecommendedMovie
}
var file_recommender_proto_depIdxs = []int32{
0, // 0: recommender.Request.filter:type_name -> recommender.Filter
1, // 1: recommender.Request.weight:type_name -> recommender.Weight
4, // 2: recommender.Response.movies:type_name -> recommender.RecommendedMovie
2, // 3: recommender.Recommender.GetRecommendations:input_type -> recommender.Request
3, // 4: recommender.Recommender.GetRecommendations:output_type -> recommender.Response
4, // [4:5] is the sub-list for method output_type
3, // [3:4] is the sub-list for method input_type
3, // [3:3] is the sub-list for extension type_name
3, // [3:3] is the sub-list for extension extendee
0, // [0:3] is the sub-list for field type_name
}
func init() { file_recommender_proto_init() }
func file_recommender_proto_init() {
if File_recommender_proto != nil {
return
}
file_recommender_proto_msgTypes[0].OneofWrappers = []any{
(*Filter_MinVotes)(nil),
(*Filter_MaxVotes)(nil),
(*Filter_MinYear)(nil),
(*Filter_MaxYear)(nil),
(*Filter_MinRating)(nil),
(*Filter_MaxRating)(nil),
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: file_recommender_proto_rawDesc,
NumEnums: 0,
NumMessages: 5,
NumExtensions: 0,
NumServices: 1,
},
GoTypes: file_recommender_proto_goTypes,
DependencyIndexes: file_recommender_proto_depIdxs,
MessageInfos: file_recommender_proto_msgTypes,
}.Build()
File_recommender_proto = out.File
file_recommender_proto_rawDesc = nil
file_recommender_proto_goTypes = nil
file_recommender_proto_depIdxs = nil
}

View File

@ -0,0 +1,121 @@
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.5.1
// - protoc v5.28.3
// source: recommender.proto
package proto
import (
context "context"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
)
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.64.0 or later.
const _ = grpc.SupportPackageIsVersion9
const (
Recommender_GetRecommendations_FullMethodName = "/recommender.Recommender/GetRecommendations"
)
// RecommenderClient is the client API for Recommender service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type RecommenderClient interface {
GetRecommendations(ctx context.Context, in *Request, opts ...grpc.CallOption) (*Response, error)
}
type recommenderClient struct {
cc grpc.ClientConnInterface
}
func NewRecommenderClient(cc grpc.ClientConnInterface) RecommenderClient {
return &recommenderClient{cc}
}
func (c *recommenderClient) GetRecommendations(ctx context.Context, in *Request, opts ...grpc.CallOption) (*Response, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(Response)
err := c.cc.Invoke(ctx, Recommender_GetRecommendations_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
// RecommenderServer is the server API for Recommender service.
// All implementations must embed UnimplementedRecommenderServer
// for forward compatibility.
type RecommenderServer interface {
GetRecommendations(context.Context, *Request) (*Response, error)
mustEmbedUnimplementedRecommenderServer()
}
// UnimplementedRecommenderServer must be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedRecommenderServer struct{}
func (UnimplementedRecommenderServer) GetRecommendations(context.Context, *Request) (*Response, error) {
return nil, status.Errorf(codes.Unimplemented, "method GetRecommendations not implemented")
}
func (UnimplementedRecommenderServer) mustEmbedUnimplementedRecommenderServer() {}
func (UnimplementedRecommenderServer) testEmbeddedByValue() {}
// UnsafeRecommenderServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to RecommenderServer will
// result in compilation errors.
type UnsafeRecommenderServer interface {
mustEmbedUnimplementedRecommenderServer()
}
func RegisterRecommenderServer(s grpc.ServiceRegistrar, srv RecommenderServer) {
// If the following call pancis, it indicates UnimplementedRecommenderServer was
// embedded by pointer and is nil. This will cause panics if an
// unimplemented method is ever invoked, so we test this at initialization
// time to prevent it from happening at runtime later due to I/O.
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
t.testEmbeddedByValue()
}
s.RegisterService(&Recommender_ServiceDesc, srv)
}
func _Recommender_GetRecommendations_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(Request)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(RecommenderServer).GetRecommendations(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Recommender_GetRecommendations_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(RecommenderServer).GetRecommendations(ctx, req.(*Request))
}
return interceptor(ctx, in, info, handler)
}
// Recommender_ServiceDesc is the grpc.ServiceDesc for Recommender service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var Recommender_ServiceDesc = grpc.ServiceDesc{
ServiceName: "recommender.Recommender",
HandlerType: (*RecommenderServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "GetRecommendations",
Handler: _Recommender_GetRecommendations_Handler,
},
},
Streams: []grpc.StreamDesc{},
Metadata: "recommender.proto",
}

View File

@ -0,0 +1,16 @@
package postgresql
import (
"context"
"fmt"
"github.com/jackc/pgx/v5"
)
func NewDB(dbURL string) (*pgx.Conn, error) {
conn, err := pgx.Connect(context.Background(), dbURL)
if err != nil {
return nil, fmt.Errorf("failed to connect to database: %w", err)
}
return conn, nil
}

View File

@ -0,0 +1 @@
DROP TABLE IF EXISTS imdb;

View File

@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS imdb (
tconst VARCHAR(12) PRIMARY KEY NOT NULL,
year SMALLINT NOT NULL DEFAULT 0,
genres TEXT NOT NULL DEFAULT '',
nconsts TEXT NOT NULL DEFAULT '',
rating REAL NOT NULL DEFAULT 0.0,
votes INTEGER NOT NULL DEFAULT 0
);

View File

@ -0,0 +1,107 @@
package repository
import (
"context"
"github.com/jackc/pgx/v5"
"github.com/aykhans/movier/server/pkg/dto"
)
type IMDbRepository struct {
db *pgx.Conn
}
func NewIMDbRepository(db *pgx.Conn) *IMDbRepository {
return &IMDbRepository{
db: db,
}
}
func (repo *IMDbRepository) InsertMultipleBasics(basics []dto.Basic) error {
batch := &pgx.Batch{}
for _, basic := range basics {
batch.Queue(
`INSERT INTO imdb (tconst, year, genres)
VALUES ($1, $2, $3)
ON CONFLICT (tconst) DO UPDATE
SET year = EXCLUDED.year, genres = EXCLUDED.genres`,
basic.Tconst, basic.StartYear, basic.Genres,
)
}
results := repo.db.SendBatch(context.Background(), batch)
if err := results.Close(); err != nil {
return err
}
return nil
}
func (repo *IMDbRepository) GetAllTconsts() ([]string, error) {
rows, err := repo.db.Query(
context.Background(),
"SELECT tconst FROM imdb",
)
if err != nil {
return nil, err
}
defer rows.Close()
var tconsts []string
for rows.Next() {
var tconst string
if err := rows.Scan(&tconst); err != nil {
return nil, err
}
tconsts = append(tconsts, tconst)
}
if err := rows.Err(); err != nil {
return nil, err
}
return tconsts, nil
}
func (repo *IMDbRepository) UpdateMultiplePrincipals(principals []dto.Principal) error {
batch := &pgx.Batch{}
for _, principal := range principals {
batch.Queue(
`UPDATE imdb SET nconsts = $1 WHERE tconst = $2`,
principal.Nconsts, principal.Tconst,
)
}
results := repo.db.SendBatch(context.Background(), batch)
if err := results.Close(); err != nil {
return err
}
return nil
}
func (repo *IMDbRepository) UpdateMultipleRatings(ratings []dto.Ratings) error {
batch := &pgx.Batch{}
for _, rating := range ratings {
batch.Queue(
`UPDATE imdb SET rating = $1, votes = $2 WHERE tconst = $3`,
rating.Rating, rating.Votes, rating.Tconst,
)
}
results := repo.db.SendBatch(context.Background(), batch)
if err := results.Close(); err != nil {
return err
}
return nil
}
func (repo *IMDbRepository) GetMinMax() (*dto.MinMax, error) {
var minMax dto.MinMax
err := repo.db.QueryRow(
context.Background(),
"SELECT MIN(votes), MAX(votes), MIN(year), MAX(year), MIN(rating), MAX(rating) FROM imdb LIMIT 1",
).Scan(&minMax.MinVotes, &minMax.MaxVotes, &minMax.MinYear, &minMax.MaxYear, &minMax.MinRating, &minMax.MaxRating)
if err != nil {
return nil, err
}
return &minMax, nil
}

View File

@ -0,0 +1,357 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Movier</title>
<link href="https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.3.2/css/bootstrap.min.css" rel="stylesheet">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">
<link rel="icon" href="https://ftp.aykhans.me/web/client/pubshares/hB6VSdCnBCr8gFPeiMuCji/browse?path=%2Fshipit.png"
type="image/x-icon">
<style>
.input-wrapper {
margin-bottom: 10px;
}
.btn-remove {
margin-left: 5px;
}
.btn-custom-yellow {
background-color: #f3ce13;
border-color: #f3ce13;
color: #000;
}
.btn-custom-yellow:hover {
background-color: #dbb911;
border-color: #dbb911;
color: #000;
}
.btn-custom-yellow:disabled {
background-color: #f3ce13;
border-color: #f3ce13;
opacity: 0.65;
}
.single-input {
width: 50%;
}
</style>
</head>
<body>
<div class="container mt-5">
<div class="row justify-content-center">
<div class="col-md-8">
<form id="dynamicForm">
<div id="input-container" class="row justify-content-center">
<div class="col-md-6 input-wrapper single-input">
<div class="input-group">
<input type="text" class="form-control" name="tconst" placeholder="tt0000009"
pattern="^tt[0-9]+$" minlength="9" maxlength="12" required>
</div>
</div>
</div>
<div class="mb-4 text-center">
<button type="button" id="add-field" class="btn btn-custom-yellow">
<i class="fa-solid fa-plus"></i> Add Field
</button>
</div>
<div class="row text-center mb-3">
<div class="col-md-6">
<label for="min-votes">Min Votes ({{ .MinMax.MinVotes | formatNumber }})</label>
<input type="number" id="min-votes" name="min-votes" class="form-control mx-sm-3" value="1"
min="{{ .MinMax.MinVotes }}" max="{{ .MinMax.MaxVotes }}">
</div>
<div class="col-md-6">
<label for="max-votes">Max Votes ({{ .MinMax.MaxVotes | formatNumber }})</label>
<input type="number" id="max-votes" name="max-votes" class="form-control mx-sm-3"
min="{{ .MinMax.MinVotes }}" max="{{ .MinMax.MaxVotes }}">
</div>
</div>
<div class="row text-center mb-3">
<div class="col-md-6">
<label for="min-year">Min Year ({{ .MinMax.MinYear }})</label>
<input type="number" id="min-year" name="min-year" class="form-control mx-sm-3" value="1"
min="{{ .MinMax.MinYear }}" max="{{ .MinMax.MaxYear }}">
</div>
<div class="col-md-6">
<label for="max-year">Max Year ({{ .MinMax.MaxYear }})</label>
<input type="number" id="max-year" name="max-year" class="form-control mx-sm-3"
min="{{ .MinMax.MinYear }}" max="{{ .MinMax.MaxYear }}">
</div>
</div>
<div class="row text-center mb-5">
<div class="col-md-6">
<label for="min-rating">Min Rating ({{ .MinMax.MinRating }})</label>
<input type="number" step="0.1" id="min-rating" name="min-rating"
class="form-control mx-sm-3" min="{{ .MinMax.MinRating }}"
max="{{ .MinMax.MaxRating }}">
</div>
<div class="col-md-6">
<label for="max-rating">Max Rating ({{ .MinMax.MaxRating }})</label>
<input type="number" step="0.1" id="max-rating" name="max-rating"
class="form-control mx-sm-3" min="{{ .MinMax.MinRating }}"
max="{{ .MinMax.MaxRating }}">
</div>
</div>
<div class="row text-center mb-4">
<div class="col-md-3">
<label for="year-weight">Year Weight</label>
<input type="number" id="year-weight" name="year-weight" class="form-control mx-sm-3 weight"
value="100" min="0" max="400">
</div>
<div class="col-md-3">
<label for="rating-weight">Rating Weight</label>
<input type="number" id="rating-weight" name="rating-weight"
class="form-control mx-sm-3 weight" value="100" min="0" max="400">
</div>
<div class="col-md-3">
<label for="genres-weight">Genres Weight</label>
<input type="number" id="genres-weight" name="genres-weight"
class="form-control mx-sm-3 weight" value="100" min="0" max="400">
</div>
<div class="col-md-3">
<label for="nconsts-weight">Nconsts Weight</label>
<input type="number" id="nconsts-weight" name="nconsts-weight"
class="form-control mx-sm-3 weight" value="100" min="0" max="400">
</div>
</div>
<div class="row text-center">
<p id="weight-sum"></p>
</div>
<div class="row justify-content-center text-center mb-4">
<div class="col-md-3">
<label for="n">Number of Recommendations</label>
<input type="number" id="n" name="n" class="form-control mx-sm-3" value="5" min="0"
max="20">
</div>
</div>
<div class="text-center mb-4">
<p class="response-err" style="color: red;"></p>
<button type="submit" class="btn btn-success">Get</button>
</div>
<div class="row response mb-5"></div>
</form>
</div>
</div>
</div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.3.2/js/bootstrap.bundle.min.js"></script>
<script>
document.addEventListener('DOMContentLoaded', function () {
const container = document.getElementById('input-container');
const addButton = document.getElementById('add-field');
const form = document.getElementById('dynamicForm');
const MAX_FIELDS = 5;
function handleRemoveClick(event) {
const removeButton = event.target.closest('.btn-remove');
if (removeButton) {
const wrapper = removeButton.closest('.input-wrapper');
if (wrapper) {
wrapper.remove();
addButton.disabled = false;
rearrangeInputs();
}
}
}
function rearrangeInputs() {
const wrappers = container.getElementsByClassName('input-wrapper');
Array.from(wrappers).forEach((wrapper, index) => {
wrapper.className = 'col-md-6 input-wrapper';
if (wrappers.length === 1) {
wrapper.classList.add('single-input');
} else {
wrapper.classList.remove('single-input');
}
});
}
addButton.addEventListener('click', function () {
const inputGroups = container.getElementsByClassName('input-wrapper');
if (inputGroups.length < MAX_FIELDS) {
const newWrapper = document.createElement('div');
newWrapper.className = 'col-md-6 input-wrapper';
newWrapper.innerHTML = `
<div class="input-group">
<input type="text" class="form-control" name="tconst" placeholder="tt0000009" pattern="^tt[0-9]+$" minlength="9" maxlength="12" required>
<button type="button" class="btn btn-custom-yellow btn-remove">
<i class="fa-solid fa-minus"></i>
</button>
</div>
`;
container.appendChild(newWrapper);
if (inputGroups.length === 1) {
inputGroups[0].classList.remove('single-input');
}
if (inputGroups.length === MAX_FIELDS) {
addButton.disabled = true;
}
}
});
container.addEventListener('click', handleRemoveClick);
const weights = document.querySelectorAll('.weight');
const weightSum = document.getElementById('weight-sum');
function calculateSum() {
let sum = 0;
let nonZeroWeights = 0;
weights.forEach(weight => {
sum += parseInt(weight.value) || 0;
if (parseInt(weight.value) > 0) {
nonZeroWeights++;
}
});
if (nonZeroWeights * 100 !== sum) {
weightSum.textContent = `Total: ${sum} (Total weights must be ${nonZeroWeights * 100})`;
weightSum.style.color = 'red';
} else {
weightSum.textContent = 'Total: ' + sum;
weightSum.style.color = 'green';
}
}
weights.forEach(weight => {
weight.addEventListener('input', calculateSum);
});
form.addEventListener('submit', function (e) {
e.preventDefault();
const formData = new FormData(form);
const params = new URLSearchParams();
formData.getAll('tconst').forEach(tconst => {
params.append('tconst', tconst);
});
if (formData.get('min-votes') !== '') {
params.append('min_votes', formData.get('min-votes'));
}
if (formData.get('max-votes') !== '') {
params.append('max_votes', formData.get('max-votes'));
}
if (formData.get('min-year') !== '') {
params.append('min_year', formData.get('min-year'));
}
if (formData.get('max-year') !== '') {
params.append('max_year', formData.get('max-year'));
}
if (formData.get('min-rating') !== '') {
params.append('min_rating', formData.get('min-rating'));
}
if (formData.get('max-rating') !== '') {
params.append('max_rating', formData.get('max-rating'));
}
if (formData.get('year-weight') !== '') {
params.append('year_weight', formData.get('year-weight'));
}
if (formData.get('rating-weight') !== '') {
params.append('rating_weight', formData.get('rating-weight'));
}
if (formData.get('genres-weight') !== '') {
params.append('genres_weight', formData.get('genres-weight'));
}
if (formData.get('nconsts-weight') !== '') {
params.append('nconsts_weight', formData.get('nconsts-weight'));
}
if (formData.get('n') !== '') {
params.append('n', formData.get('n'));
}
const queryString = new URLSearchParams(params).toString();
const responseErr = document.querySelector('.response-err');
const responseContainer = document.querySelector('.response');
async function fetchRecommendations() {
responseErr.textContent = '';
try {
const response = await fetch(`{{ .BaseURL }}/recs?${queryString}`, { method: 'GET' });
const data = await response.json();
if (response.status === 200) {
const responseTable = document.createElement('table');
responseTable.className = 'table';
const responseTableHead = document.createElement('thead');
const responseTableHeadRow = document.createElement('tr');
responseTableHead.appendChild(responseTableHeadRow);
const responseTableHeadCellID = document.createElement('th');
responseTableHeadCellID.scope = 'col';
responseTableHeadCellID.textContent = '#';
responseTableHeadRow.appendChild(responseTableHeadCellID);
const responseTableHeadCellTconst = document.createElement('th');
responseTableHeadCellTconst.scope = 'col';
responseTableHeadCellTconst.textContent = 'tconst';
responseTableHeadRow.appendChild(responseTableHeadCellTconst);
for (let i = 1; i <= data[0].weights.length; i++) {
const responseTableHeadCellWeight = document.createElement('th');
responseTableHeadCellWeight.scope = 'col';
responseTableHeadCellWeight.textContent = i;
responseTableHeadRow.appendChild(responseTableHeadCellWeight);
}
responseTable.appendChild(responseTableHead);
const responseTableBody = document.createElement('tbody');
responseTable.appendChild(responseTableBody);
let rowIndex = 1;
for (const d of data) {
const row = document.createElement('tr');
const rowIndexElement = document.createElement('th');
rowIndexElement.scope = 'row';
rowIndexElement.textContent = rowIndex;
rowIndex++;
row.appendChild(rowIndexElement);
const cellTconst = document.createElement('td');
const cellTconstText = document.createElement('a');
cellTconstText.href = `https://www.imdb.com/title/${d.tconst}/`;
cellTconstText.target = '_blank';
cellTconstText.textContent = d.tconst;
cellTconst.appendChild(cellTconstText);
row.appendChild(cellTconst);
for (const c of d.weights) {
const cell = document.createElement('td');
cell.textContent = c;
row.appendChild(cell);
}
responseTableBody.appendChild(row);
}
responseContainer.innerHTML = responseTable.outerHTML;
} else if (response.status === 400 || response.status === 404) {
const errorMessage = data.error || "An error occurred";
responseErr.textContent = errorMessage;
} else {
console.error("Error:", response.status, response.statusText);
}
} catch (error) {
console.error("Error:", error);
}
}
fetchRecommendations();
});
});
</script>
</body>
</html>

11
server/pkg/utils/env.go Normal file
View File

@ -0,0 +1,11 @@
package utils
import "os"
func GetEnv(key, default_ string) string {
value := os.Getenv(key)
if value == "" {
return default_
}
return value
}

16
server/pkg/utils/file.go Normal file
View File

@ -0,0 +1,16 @@
package utils
import "os"
func MakeDirIfNotExist(path string) error {
return os.MkdirAll(path, os.ModePerm)
}
func IsDirExist(path string) (bool, error) {
if _, err := os.Stat(path); err == nil {
return true, nil
} else if !os.IsNotExist(err) {
return false, err
}
return false, nil
}

View File

@ -0,0 +1,20 @@
package utils
import (
"math"
"path/filepath"
"strconv"
)
func IsValidPath(path string) bool {
return filepath.IsAbs(path)
}
func IsUint32(value int) bool {
return value >= 0 && value <= math.MaxUint32
}
func IsInt(value string) bool {
_, err := strconv.Atoi(value)
return err == nil
}