diff --git a/.gitignore b/.gitignore index ee25eca..4f509e5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1 @@ -IMDB_data_sets -.venv -.ipynb_checkpoints -__pycache__ \ No newline at end of file +*.env \ No newline at end of file diff --git a/IMDB_DTO.py b/IMDB_DTO.py deleted file mode 100755 index 9a4abac..0000000 --- a/IMDB_DTO.py +++ /dev/null @@ -1,332 +0,0 @@ -from pathlib import Path -import pandas as pd -import numpy as np -from time import time -from exceptions import ( - FileExistException, - FileNotExistException -) - - -BASE_DIR = Path(__file__).resolve().parent - -class DTO: - def __init__( - self, - save_dir=(BASE_DIR / 'IMDB_data_sets/filtered/'), - read_dir=(BASE_DIR / 'IMDB_data_sets/'), - default_chunksize: int=3_000_000 - ) -> None: - """ - Parameters - ---------- - save_dir : str, optional - Folder location to save files (default is BASE_DIR / 'IMDB_data_sets/filtered/') - get_dir : str, optional - Folder location to get files (default is BASE_DIR / 'IMDB_data_sets/') - default_chunksize : int, optional - Default value to be used when chunksize is not given in methods that take - chunksize parameters (default is 3_000_000) - """ - - self.save_dir = save_dir - self.save_dir.mkdir(parents=True, exist_ok=True) - self.read_dir = read_dir - self.default_chunksize = default_chunksize - - def timing_decorator(func): - def wrapper(*args, **kwargs): - start_time = time() - result = func(*args, **kwargs) - print(f"Function {func.__name__} took {time() - start_time} seconds to run.") - return result - return wrapper - - def is_exist(self, file_dir: Path) -> None: - """ - Parameters - ---------- - file_dir : pathlib.Path - File path - - Raises - ------ - FileExistException - If the file exists - """ - - if file_dir.is_file(): - raise FileExistException(f"file is exist: {file_dir}") - - def is_not_exist(self, file_dir: Path) -> None: - """ - Parameters - ---------- - file_dir : pathlib.Path - File path - - Raises - ------ - FileNotExistException - If the file does not exist - """ - - if not file_dir.is_file(): - raise FileNotExistException(f"file is not exist: {file_dir}") - - def df2csv( - self, - df: pd.DataFrame, - name: str, - overwrite: bool=False, - index: bool=False - ) -> None: - """ - Parameters - ---------- - df : DataFrame - DataFrame object you want to save - name : str - The name you want to save the DataFrame object - overwrite : bool, optional - When True, overwrite if file exists (default is False) - index : bool, optional - Save index column or no (deafault is False) - - Raises - ------ - FileExistException - If the overwrite parameter is false and the file exists - """ - - if not overwrite: - self.is_exist(self.save_dir / name) - df.to_csv(self.save_dir / name, index=index) - - @timing_decorator - def filter_tconst( - self, - name: str, - title_types: list[str]=['movie', 'tvMovie'], - chunksize: int=None - ) -> list[str]: - """ - Parameters - ---------- - name : str - Name of the basics file to be read - title_type : list, optional - 'titleType' type of lines to be read from file (default is ['movie', 'tvMovie']) - chunksize : int - Chunk size for reading data (default is self.default_chunksize (default is 3_000_000)). - - Returns - ------- - list - A list of tconst - - Raises - ------ - FileNotExistException - If the file does not exist - """ - - self.is_not_exist(self.read_dir / name) - if chunksize is None: - chunksize = self.default_chunksize - - tconst_list = [] - - with pd.read_csv( - self.read_dir / name, - sep=r'\t', - chunksize=chunksize, - engine='python', - usecols=['tconst', 'titleType'], - dtype={'tconst': str, 'titleType': str}, - na_values='\\N') as reader: - - for i, r in enumerate(reader): - tconst_list += list(r[r.titleType.isin(title_types)]['tconst']) - return tconst_list - - def get_tconst(self, name: str) -> list[str]: - """ - Parameters - ---------- - name : str - Name of the tconst file to be read - - Returns - ------- - list - A list of tconst - - Raises - ------ - FileNotExistException - If the file does not exist - """ - - self.is_not_exist(self.save_dir / name) - return list(pd.read_csv(self.save_dir / name, usecols=['tconst'], dtype={'tconst': str})['tconst']) - - @timing_decorator - def filter_principal( - self, - name: str, - tconst_list: list[str], - category_list: list[str]=['actress', 'actor', 'director', 'writer'], - chunksize: int=None - ) -> pd.DataFrame: - """ - Parameters - ---------- - name : str - Name of the principals file to be read - tconst_list : list - List of tconst (It can be obtained by the get_tconst or read_tconst method). - category : list - List of categories of rows to be selected (default is ['actress', 'actor', 'director', 'writer']). - chunksize : int - Chunk size for reading data (default is self.default_chunksize (default is 3_000_000)). - - Returns - ------- - DataFrame - A DataFrame object with columns tconst, nconst, and category. - - Raises - ------ - FileNotExistException - If the file does not exist - """ - - self.is_not_exist(self.read_dir / name) - if chunksize is None: - chunksize = self.default_chunksize - - df = pd.DataFrame({ - 'tconst': tconst_list, - 'nconst': np.empty((len(tconst_list), 0)).tolist(), - 'category': np.empty((len(tconst_list), 0)).tolist() - }) - - # index = pd.Index(tconst_list, name='tconst') - # df = pd.DataFrame({ - # 'nconst': pd.Series(dtype='object', index=index), - # 'category': pd.Series(dtype='object', index=index) - # }) - - cnt = 0 - - with pd.read_csv(self.read_dir / name, - sep=r'\t', - chunksize=chunksize, - engine='python', - usecols=['tconst', 'nconst', 'category']) as reader: - - for i, r in enumerate(reader): - r = r.query(f"(tconst in @tconst_list) and (category in @category_list)") - r_group = r.groupby('tconst', as_index=0).agg({'nconst': lambda x: list(x), 'category': lambda x: list(x)}) - df = pd.concat([df, r_group]).groupby('tconst', as_index=0).agg(sum) - - # r_group.index.name = 'tconst' - # df.update(r_group) - del r_group - - print(cnt) - return df - - @timing_decorator - def filter_rating( - self, - name: str, - tconst_list: list[str], - chunksize: int=None - ) -> pd.DataFrame: - """ - Parameters - ---------- - name : str - Name of the ratings file to be read - tconst_list : list - List of tconst (It can be obtained by the get_tconst or read_tconst method). - chunksize : int - Chunk size for reading data (default is self.default_chunksize (default is 3_000_000)). - - Returns - ------- - DataFrame - A DataFrame object with columns tconst, and averageRating. - - Raises - ------ - FileNotExistException - If the file does not exist - """ - - self.is_not_exist(self.read_dir / name) - if chunksize is None: - chunksize = self.default_chunksize - - df = pd.DataFrame({'tconst': tconst_list}) - - with pd.read_csv( - self.read_dir / name, - sep=r'\t', - chunksize=chunksize, - engine='python', - usecols=['tconst', 'averageRating', 'numVotes'], - dtype={'tconst': str, 'averageRating': np.float16, 'numVotes': int}, - na_values='\\N') as reader: - - for i, r in enumerate(reader): - df = pd.concat([df, r.query("tconst in @tconst_list")]).groupby('tconst', as_index=0).first() - return df - - @timing_decorator - def filter_basic( - self, - name: str, - tconst_list: list[str], - chunksize: int=None - ) -> pd.DataFrame: - """ - Parameters - ---------- - name : str - Name of the basics file to be read - tconst_list : list - List of tconst (It can be obtained by the get_tconst or read_tconst method). - chunksize : int - Chunk size for reading data (default is self.default_chunksize (default is 3_000_000)). - - Returns - ------- - DataFrame - A DataFrame object with columns tconst, startYear and genres. - - Raises - ------ - FileNotExistException - If the file does not exist - """ - - self.is_not_exist(self.read_dir / name) - if chunksize is None: - chunksize = self.default_chunksize - - df = pd.DataFrame({'tconst': tconst_list}) - - with pd.read_csv(self.read_dir / name, - sep=r'\t', - chunksize=chunksize, - engine='python', - usecols=['tconst', 'startYear', 'genres'], - dtype={'tconst': str, 'startYear': 'Int16', 'genres': str}, - na_values='\\N') as reader: - - for i, r in enumerate(reader): - df = pd.concat([df, r.query("tconst in @tconst_list")]).groupby('tconst', as_index=0).first() - return df \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..dedf2b4 --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +.SILENT: + +proto-generate-go: + protoc \ + -I./protos \ + --go_out=./server/pkg/proto \ + --go_opt=paths=source_relative \ + --go-grpc_out=./server/pkg/proto \ + --go-grpc_opt=paths=source_relative \ + ./protos/*.proto + +proto-generate-py: + python -m grpc_tools.protoc \ + -I./protos \ + --python_out=./recommender/proto \ + --pyi_out=./recommender/proto \ + --grpc_python_out=./recommender/proto \ + ./protos/*.proto + +proto-generate: proto-generate-go proto-generate-py \ No newline at end of file diff --git a/config/postgres/.env.example b/config/postgres/.env.example new file mode 100644 index 0000000..d63866c --- /dev/null +++ b/config/postgres/.env.example @@ -0,0 +1,4 @@ +POSTGRES_USER=admin +POSTGRES_PASSWORD=admin +POSTGRES_DB=imdb +POSTGRES_PORT=5432 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..70b3353 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,96 @@ +services: + postgres: + image: postgres:17.0-alpine + container_name: "movier-postgres" + hostname: "movier-postgres" + ports: + - "5432:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + env_file: + - ./config/postgres/.env + healthcheck: + test: [ "CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}" ] + interval: 5s + timeout: 5s + retries: 5 + start_period: 10s + init: true + + migrate: + image: migrate/migrate:v4.18.1 + container_name: "movier-migrate" + hostname: "movier-migrate" + env_file: + - ./config/postgres/.env + environment: + - POSTGRES_HOST=movier-postgres + volumes: + - ./server/pkg/storage/postgresql/migrations:/migrations + init: true + depends_on: + postgres: + condition: service_healthy + entrypoint: [ "/bin/sh", "-c", "migrate -path=/migrations/ -database postgres://$${POSTGRES_USER}:$${POSTGRES_PASSWORD}@$${POSTGRES_HOST}:$${POSTGRES_PORT}/$${POSTGRES_DB}?sslmode=disable up" ] + + etl: + image: movier:latest + build: ./server + container_name: "movier-etl" + hostname: "movier-etl" + env_file: + - ./config/postgres/.env + environment: + - POSTGRES_HOST=movier-postgres + init: true + depends_on: + postgres: + condition: service_healthy + migrate: + condition: service_completed_successfully + entrypoint: [ "/bin/sh", "-c", "./movier download && ./movier filter" ] + + recommender: + build: ./recommender + container_name: "movier-recommender" + hostname: "movier-recommender" + env_file: + - ./config/postgres/.env + environment: + - POSTGRES_HOST=movier-postgres + - GRPC_PORT=50051 + init: true + depends_on: + postgres: + condition: service_healthy + etl: + condition: service_completed_successfully + healthcheck: + test: [ "CMD-SHELL", "/bin/grpc_health_probe -addr=:$${GRPC_PORT}" ] + interval: 5s # how often to check the health + timeout: 5s # how long to wait for a response + retries: 5 # how many retries before marking as unhealthy + start_period: 10s # initial delay before starting health checks + entrypoint: [ "uv", "run", "main.py" ] + + server: + image: movier:latest + container_name: "movier-server" + hostname: "movier-server" + ports: + - "8080:8080" + env_file: + - ./config/postgres/.env + environment: + - POSTGRES_HOST=movier-postgres + - BASE_URL=http://localhost:8080 + - RECOMMENDER_SERVICE_GRPC_HOST=movier-recommender + - RECOMMENDER_SERVICE_GRPC_PORT=50051 + init: true + depends_on: + recommender: + condition: service_healthy + entrypoint: [ "./movier", "serve" ] + +volumes: + postgres_data: diff --git a/exceptions.py b/exceptions.py deleted file mode 100644 index 654dd5c..0000000 --- a/exceptions.py +++ /dev/null @@ -1,3 +0,0 @@ -class FileExistException(Exception): ... - -class FileNotExistException(Exception): ... \ No newline at end of file diff --git a/imdb_recommendation.ipynb b/imdb_recommendation.ipynb deleted file mode 100755 index b1c7872..0000000 --- a/imdb_recommendation.ipynb +++ /dev/null @@ -1,517 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "from sklearn.feature_extraction.text import CountVectorizer\n", - "from sklearn.metrics.pairwise import cosine_similarity\n", - "from ast import literal_eval\n", - "from functools import reduce" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "input_film = 'tt0816692'\n", - "\n", - "trained = {'basics': {}, 'principals': {}, 'ratings': {}}\n", - "\n", - "def drop_by_tconst(df, tconst: str, inplace=True) -> pd.DataFrame:\n", - " return df.drop(df[df['tconst'] == tconst].index, inplace=inplace)\n", - "\n", - "# Basics\n", - "\n", - "df = pd.read_csv('./IMDB_data_sets/filtered/basics.csv', dtype={'tconst': str, 'startYear': 'Int16', 'genres': str})\n", - "df['genres'].fillna('', inplace=True)\n", - "\n", - "cv = CountVectorizer(dtype=np.int8, token_pattern=\"(?u)[\\w'-]+\")\n", - "count_matrix = cv.fit_transform(df['genres'])\n", - "\n", - "trained['basics']['genres'] = pd.DataFrame(\n", - " {\n", - " 'genres': cosine_similarity(count_matrix[df[df['tconst'] == input_film].index[0]], count_matrix)[0],\n", - " 'tconst': df['tconst']\n", - " }\n", - " )\n", - "\n", - "drop_by_tconst(trained['basics']['genres'], input_film)\n", - "\n", - "trained['basics']['genres'].sort_values(ascending=False, by='genres', inplace=True, ignore_index=True)\n", - "trained['basics']['genres'].drop('genres', axis=1, inplace=True)\n", - "\n", - "\n", - "year = int(df[df['tconst'] == input_film].startYear.iloc[0])\n", - "\n", - "trained['basics']['years'] = pd.DataFrame(\n", - " {\n", - " 'years': df['startYear'],\n", - " 'tconst': df['tconst']\n", - " }\n", - ")\n", - "\n", - "drop_by_tconst(trained['basics']['years'], input_film)\n", - "trained['basics']['years'].sort_values(by='years', key=lambda x: abs(year-x), inplace=True, ignore_index=True)\n", - "trained['basics']['years'].drop('years', axis=1, inplace=True)\n", - "trained['basics']['years'].reset_index(names='years_index', inplace=True)\n", - "\n", - "# Principals\n", - "\n", - "df = pd.read_csv('./IMDB_data_sets/filtered/principals.csv', dtype={'tconst': str, 'nconst': str}, usecols=['tconst', 'nconst'])\n", - "df.nconst = df.nconst.apply(lambda n: ','.join(literal_eval(n)))\n", - "\n", - "cv = CountVectorizer(dtype=np.int8, token_pattern=\"(?u)[\\w'-]+\")\n", - "count_matrix = cv.fit_transform(df['nconst'])\n", - "\n", - "trained['principals']['nconst'] = pd.DataFrame(\n", - " {\n", - " 'nconst': cosine_similarity(count_matrix[df[df['tconst'] == input_film].index[0]], count_matrix)[0],\n", - " 'tconst': df['tconst']\n", - " }\n", - " )\n", - "\n", - "drop_by_tconst(trained['principals']['nconst'], input_film)\n", - "trained['principals']['nconst'].sort_values(ascending=False, by='nconst', inplace=True, ignore_index=True)\n", - "trained['principals']['nconst'].drop('nconst', axis=1, inplace=True)\n", - "trained['principals']['nconst'].reset_index(names='nconst_index', inplace=True)\n", - "\n", - "# Ratings\n", - "\n", - "df = pd.read_csv('./IMDB_data_sets/filtered/ratings.csv', dtype={'tconst': str, 'averageRating': float, 'numVotes': 'Int64'})\n", - "\n", - "rating = float(df[df['tconst'] == input_film].averageRating.iloc[0])\n", - "votes = int(df[df['tconst'] == input_film].numVotes.iloc[0])\n", - "\n", - "drop_by_tconst(df, input_film)\n", - "\n", - "trained['ratings']['ratings'] = df.sort_values(by='averageRating', key=lambda x: abs(rating-x), ignore_index=True)\n", - "trained['ratings']['ratings'].drop(['averageRating', 'numVotes'], axis=1, inplace=True)\n", - "trained['ratings']['ratings'].reset_index(names='ratings_index', inplace=True)\n", - "\n", - "df.drop('averageRating', axis=1, inplace=True)\n", - "\n", - "trained['ratings']['votes'] = df.sort_values(by='numVotes', key=lambda x: abs(votes-x), ignore_index=True)\n", - "trained['ratings']['votes'].drop('numVotes', axis=1, inplace=True)\n", - "trained['ratings']['votes'].reset_index(names='votes_index', inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "merged = reduce(lambda left, right: pd.merge(\n", - " left,\n", - " right,\n", - " on=['tconst'],\n", - " how='outer'\n", - " ), [\n", - " trained['basics']['genres'],\n", - " trained['basics']['years'],\n", - " trained['principals']['nconst'],\n", - " trained['ratings']['ratings'],\n", - " trained['ratings']['votes']\n", - " ])" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
tconstyears_indexnconst_indexratings_indexvotes_indexaverage
0tt4255564744690297616669670669670476329.2
1tt2203897272939705602978602978248591.0
2tt0355627344502708640318038205177315271.8
3tt1538737871021498486540358540358377883.8
4tt515534072975386406103102152733143044.0
.....................
777383tt123021118987159982627843425539374210.6
777384tt12302076149946599825483066483066498657.4
777385tt123020618988559982330196925847378981.8
777386tt1230179255769599809483065483065519818.8
777387tt991675439373777387777387777387629784.2
\n", - "

777388 rows × 6 columns

\n", - "
" - ], - "text/plain": [ - " tconst years_index nconst_index ratings_index votes_index \n", - "0 tt4255564 744690 297616 669670 669670 \\\n", - "1 tt2203897 27293 9705 602978 602978 \n", - "2 tt0355627 344502 708640 318038 205177 \n", - "3 tt15387378 710214 98486 540358 540358 \n", - "4 tt5155340 72975 386406 103102 152733 \n", - "... ... ... ... ... ... \n", - "777383 tt1230211 189871 599826 278434 25539 \n", - "777384 tt12302076 149946 599825 483066 483066 \n", - "777385 tt1230206 189885 599823 301969 25847 \n", - "777386 tt1230179 255769 599809 483065 483065 \n", - "777387 tt9916754 39373 777387 777387 777387 \n", - "\n", - " average \n", - "0 476329.2 \n", - "1 248591.0 \n", - "2 315271.8 \n", - "3 377883.8 \n", - "4 143044.0 \n", - "... ... \n", - "777383 374210.6 \n", - "777384 498657.4 \n", - "777385 378981.8 \n", - "777386 519818.8 \n", - "777387 629784.2 \n", - "\n", - "[777388 rows x 6 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "merged['average'] = (merged.index*20 + merged.years_index*20 + merged.nconst_index*20 + merged.ratings_index*20 + merged.votes_index*20) / (5*20)\n", - "merged" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
tconstyears_indexnconst_indexratings_indexvotes_indexaverage
8695tt233815177751258623860115110813.4
14tt36593884965498257587915120.6
8501tt17546563099311646193324717810.0
7374tt210328111453279104961834719340.4
7549tt2358592549851763312668918220403.4
.....................
758545tt13334656700841672174500543500543626529.2
758577tt13336544700845672184500576500576626551.6
758587tt13335546700843672231500564500564626557.8
758590tt13335152700842672247500557500557626558.6
777387tt991675439373777387777387777387629784.2
\n", - "

777388 rows × 6 columns

\n", - "
" - ], - "text/plain": [ - " tconst years_index nconst_index ratings_index votes_index \n", - "8695 tt2338151 7775 12586 23860 1151 \\\n", - "14 tt3659388 49654 98 25758 79 \n", - "8501 tt1754656 30993 116 46193 3247 \n", - "7374 tt2103281 11453 27910 49618 347 \n", - "7549 tt2358592 54985 17633 12668 9182 \n", - "... ... ... ... ... ... \n", - "758545 tt13334656 700841 672174 500543 500543 \n", - "758577 tt13336544 700845 672184 500576 500576 \n", - "758587 tt13335546 700843 672231 500564 500564 \n", - "758590 tt13335152 700842 672247 500557 500557 \n", - "777387 tt9916754 39373 777387 777387 777387 \n", - "\n", - " average \n", - "8695 10813.4 \n", - "14 15120.6 \n", - "8501 17810.0 \n", - "7374 19340.4 \n", - "7549 20403.4 \n", - "... ... \n", - "758545 626529.2 \n", - "758577 626551.6 \n", - "758587 626557.8 \n", - "758590 626558.6 \n", - "777387 629784.2 \n", - "\n", - "[777388 rows x 6 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "merged.sort_values(by='average')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "08dff0a1cb2e37beec5bc340112a669cde11fa0a1a1e2fde92884d26090bd6fc" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/main.py b/main.py deleted file mode 100644 index e042b40..0000000 --- a/main.py +++ /dev/null @@ -1,23 +0,0 @@ -from IMDB_DTO import DTO -from time import time -import pandas as pd - - -if __name__ == '__main__': - start_time = time() - dto = DTO() - - tconst_list = dto.filter_tconst(name='title.basics.tsv') - dto.df2csv(df=pd.DataFrame({'tconst': tconst_list}), name='tconst.csv') - - tconst_list = dto.get_tconst('tconst.csv') - - df = dto.filter_basic(name='title.basics.tsv', tconst_list=tconst_list) - dto.df2csv(df=df, name='basics.csv') - del df - df = dto.filter_principal(name='title.principals.tsv', tconst_list=tconst_list) - dto.df2csv(df=df, name='principals_comma.csv', overwrite=True) - del df - df = dto.filter_rating(name='title.ratings.tsv', tconst_list=tconst_list) - dto.df2csv(df=df, name='ratings.csv', overwrite=1) - del df \ No newline at end of file diff --git a/protos/recommender.proto b/protos/recommender.proto new file mode 100644 index 0000000..5da0f99 --- /dev/null +++ b/protos/recommender.proto @@ -0,0 +1,53 @@ +syntax = "proto3"; + +package recommender; + +option go_package = "github.com/aykhans/movier/server/pkg/proto"; + +service Recommender { + rpc GetRecommendations(Request) returns (Response) {} +} + +message Filter { + oneof min_votes_oneof { + uint32 min_votes = 1; + } + oneof max_votes_oneof { + uint32 max_votes = 2; + } + oneof min_year_oneof { + uint32 min_year = 3; + } + oneof max_year_oneof { + uint32 max_year = 4; + } + oneof min_rating_oneof { + float min_rating = 5; + } + oneof max_rating_oneof { + float max_rating = 6; + } +} + +message Weight { + uint32 year = 1; + uint32 rating = 2; + uint32 genres = 3; + uint32 nconsts = 4; +} + +message Request { + repeated string tconsts = 1; + uint32 n = 2; + Filter filter = 3; + Weight weight = 4; +} + +message Response { + repeated RecommendedMovie movies = 1; +} + +message RecommendedMovie { + string tconst = 1; + repeated string weights = 2; +} diff --git a/recommender/.dockerignore b/recommender/.dockerignore new file mode 100644 index 0000000..c8fda29 --- /dev/null +++ b/recommender/.dockerignore @@ -0,0 +1,4 @@ +.venv +.ipynb_checkpoints +__pycache__ +*.ipynb \ No newline at end of file diff --git a/recommender/.gitignore b/recommender/.gitignore new file mode 100644 index 0000000..c8fda29 --- /dev/null +++ b/recommender/.gitignore @@ -0,0 +1,4 @@ +.venv +.ipynb_checkpoints +__pycache__ +*.ipynb \ No newline at end of file diff --git a/recommender/.python-version b/recommender/.python-version new file mode 100644 index 0000000..e4fba21 --- /dev/null +++ b/recommender/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/recommender/Dockerfile b/recommender/Dockerfile new file mode 100644 index 0000000..93e70f2 --- /dev/null +++ b/recommender/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.12.3-slim-bookworm + +ENV UV_COMPILE_BYTECODE=1 + +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ + +RUN apt-get update && apt-get install -y curl + +ENV GRPC_HEALTH_PROBE_VERSION=v0.4.35 + +RUN curl -L -o /bin/grpc_health_probe \ + https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-amd64 && \ + chmod +x /bin/grpc_health_probe + +WORKDIR /app + +COPY . . + +RUN uv sync --frozen diff --git a/recommender/config.py b/recommender/config.py new file mode 100644 index 0000000..6716890 --- /dev/null +++ b/recommender/config.py @@ -0,0 +1,38 @@ +import os + +def get_postgres_dsn(): + user = os.getenv('POSTGRES_USER', None) + if user is None: + raise ValueError('POSTGRES_USER is not set') + + password = os.getenv('POSTGRES_PASSWORD', None) + if password is None: + raise ValueError('POSTGRES_PASSWORD is not set') + + host = os.getenv('POSTGRES_HOST', None) + if host is None: + raise ValueError('POSTGRES_HOST is not set') + + port = os.getenv('POSTGRES_PORT', None) + if port is None: + raise ValueError('POSTGRES_PORT is not set') + try: + port = int(port) + except ValueError: + raise ValueError('POSTGRES_PORT is not an integer') + + dbname = os.getenv('POSTGRES_DB', None) + if dbname is None: + raise ValueError('POSTGRES_DB is not set') + + return f'postgres://{user}:{password}@{host}:{port}/{dbname}?sslmode=disable' + +def get_grpc_port(): + port = os.getenv('GRPC_PORT', None) + if port is None: + raise ValueError('GRPC_PORT is not set') + try: + port = int(port) + except ValueError: + raise ValueError('GRPC_PORT is not an integer') + return port diff --git a/recommender/main.py b/recommender/main.py new file mode 100644 index 0000000..0447432 --- /dev/null +++ b/recommender/main.py @@ -0,0 +1,114 @@ +from sys import path +path.append('./proto') + +from concurrent import futures +from time import sleep +import threading +from recommend import Recommender, Weight, Filter +from config import get_postgres_dsn, get_grpc_port + +import psycopg2 + +from proto import recommender_pb2, recommender_pb2_grpc +import grpc +from grpc_reflection.v1alpha import reflection +from grpc_health.v1 import health +from grpc_health.v1 import health_pb2 +from grpc_health.v1 import health_pb2_grpc + +postgres_dsn = get_postgres_dsn() + +class RecommenderServicer(recommender_pb2_grpc.RecommenderServicer): + def GetRecommendations(self, request: recommender_pb2.Request, context): + try: + recommender = Recommender( + filter_=Filter( + min_votes=request.filter.min_votes if request.filter.HasField('min_votes_oneof') else None, + max_votes=request.filter.max_votes if request.filter.HasField('max_votes_oneof') else None, + min_year=request.filter.min_year if request.filter.HasField('min_year_oneof') else None, + max_year=request.filter.max_year if request.filter.HasField('max_year_oneof') else None, + min_rating=request.filter.min_rating if request.filter.HasField('min_rating_oneof') else None, + max_rating=request.filter.max_rating if request.filter.HasField('max_rating_oneof') else None + ), + weight=Weight( + year=request.weight.year, + rating=request.weight.rating, + genres=request.weight.genres, + nconsts=request.weight.nconsts + ) + ) + except ValueError as e: + context.set_code(grpc.StatusCode.INVALID_ARGUMENT) + context.set_details(str(e)) + return recommender_pb2.Response() + except Exception as e: + context.set_code(grpc.StatusCode.INTERNAL) + context.set_details(str(e)) + return recommender_pb2.Response() + + with psycopg2.connect(dsn=postgres_dsn) as conn: + try: + data = recommender.get_recommendations(conn, request.tconsts, request.n) + except ValueError as e: + context.set_code(grpc.StatusCode.NOT_FOUND) + context.set_details(str(e)) + return recommender_pb2.Response() + except Exception as e: + context.set_code(grpc.StatusCode.INTERNAL) + context.set_details(str(e)) + return recommender_pb2.Response() + + movies = [] + for k, v in data.items(): + movies.append( + recommender_pb2.RecommendedMovie( + tconst=k, + weights=v + ) + ) + + return recommender_pb2.Response(movies=movies) + +def _toggle_health(health_servicer: health.HealthServicer, service: str): + next_status = health_pb2.HealthCheckResponse.SERVING + while True: + if next_status == health_pb2.HealthCheckResponse.SERVING: + next_status = health_pb2.HealthCheckResponse.NOT_SERVING + else: + next_status = health_pb2.HealthCheckResponse.SERVING + + health_servicer.set(service, next_status) + sleep(5) + +def _configure_health_server(server: grpc.Server): + health_servicer = health.HealthServicer( + experimental_non_blocking=True, + experimental_thread_pool=futures.ThreadPoolExecutor(max_workers=10), + ) + health_pb2_grpc.add_HealthServicer_to_server(health_servicer, server) + + toggle_health_status_thread = threading.Thread( + target=_toggle_health, + args=(health_servicer, "recommender.Recommender"), + daemon=True, + ) + toggle_health_status_thread.start() + +def serve(): + server = grpc.server(futures.ThreadPoolExecutor(max_workers=100)) + recommender_pb2_grpc.add_RecommenderServicer_to_server(RecommenderServicer(), server) + SERVICE_NAMES = ( + recommender_pb2.DESCRIPTOR.services_by_name["Recommender"].full_name, + reflection.SERVICE_NAME, + ) + reflection.enable_server_reflection(SERVICE_NAMES, server) + server.add_insecure_port(f'[::]:{get_grpc_port()}') + _configure_health_server(server) + server.start() + server.wait_for_termination() + +if __name__ == '__main__': + try: + serve() + except KeyboardInterrupt: + print("Shutting down server") diff --git a/recommender/proto/recommender_pb2.py b/recommender/proto/recommender_pb2.py new file mode 100644 index 0000000..cb2b19b --- /dev/null +++ b/recommender/proto/recommender_pb2.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE +# source: recommender.proto +# Protobuf Python Version: 5.27.2 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, + 5, + 27, + 2, + '', + 'recommender.proto' +) +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x11recommender.proto\x12\x0brecommender\"\xf8\x01\n\x06\x46ilter\x12\x13\n\tmin_votes\x18\x01 \x01(\rH\x00\x12\x13\n\tmax_votes\x18\x02 \x01(\rH\x01\x12\x12\n\x08min_year\x18\x03 \x01(\rH\x02\x12\x12\n\x08max_year\x18\x04 \x01(\rH\x03\x12\x14\n\nmin_rating\x18\x05 \x01(\x02H\x04\x12\x14\n\nmax_rating\x18\x06 \x01(\x02H\x05\x42\x11\n\x0fmin_votes_oneofB\x11\n\x0fmax_votes_oneofB\x10\n\x0emin_year_oneofB\x10\n\x0emax_year_oneofB\x12\n\x10min_rating_oneofB\x12\n\x10max_rating_oneof\"G\n\x06Weight\x12\x0c\n\x04year\x18\x01 \x01(\r\x12\x0e\n\x06rating\x18\x02 \x01(\r\x12\x0e\n\x06genres\x18\x03 \x01(\r\x12\x0f\n\x07nconsts\x18\x04 \x01(\r\"o\n\x07Request\x12\x0f\n\x07tconsts\x18\x01 \x03(\t\x12\t\n\x01n\x18\x02 \x01(\r\x12#\n\x06\x66ilter\x18\x03 \x01(\x0b\x32\x13.recommender.Filter\x12#\n\x06weight\x18\x04 \x01(\x0b\x32\x13.recommender.Weight\"9\n\x08Response\x12-\n\x06movies\x18\x01 \x03(\x0b\x32\x1d.recommender.RecommendedMovie\"3\n\x10RecommendedMovie\x12\x0e\n\x06tconst\x18\x01 \x01(\t\x12\x0f\n\x07weights\x18\x02 \x03(\t2R\n\x0bRecommender\x12\x43\n\x12GetRecommendations\x12\x14.recommender.Request\x1a\x15.recommender.Response\"\x00\x42,Z*github.com/aykhans/movier/server/pkg/protob\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'recommender_pb2', _globals) +if not _descriptor._USE_C_DESCRIPTORS: + _globals['DESCRIPTOR']._loaded_options = None + _globals['DESCRIPTOR']._serialized_options = b'Z*github.com/aykhans/movier/server/pkg/proto' + _globals['_FILTER']._serialized_start=35 + _globals['_FILTER']._serialized_end=283 + _globals['_WEIGHT']._serialized_start=285 + _globals['_WEIGHT']._serialized_end=356 + _globals['_REQUEST']._serialized_start=358 + _globals['_REQUEST']._serialized_end=469 + _globals['_RESPONSE']._serialized_start=471 + _globals['_RESPONSE']._serialized_end=528 + _globals['_RECOMMENDEDMOVIE']._serialized_start=530 + _globals['_RECOMMENDEDMOVIE']._serialized_end=581 + _globals['_RECOMMENDER']._serialized_start=583 + _globals['_RECOMMENDER']._serialized_end=665 +# @@protoc_insertion_point(module_scope) diff --git a/recommender/proto/recommender_pb2.pyi b/recommender/proto/recommender_pb2.pyi new file mode 100644 index 0000000..be97ac7 --- /dev/null +++ b/recommender/proto/recommender_pb2.pyi @@ -0,0 +1,60 @@ +from google.protobuf.internal import containers as _containers +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union + +DESCRIPTOR: _descriptor.FileDescriptor + +class Filter(_message.Message): + __slots__ = ("min_votes", "max_votes", "min_year", "max_year", "min_rating", "max_rating") + MIN_VOTES_FIELD_NUMBER: _ClassVar[int] + MAX_VOTES_FIELD_NUMBER: _ClassVar[int] + MIN_YEAR_FIELD_NUMBER: _ClassVar[int] + MAX_YEAR_FIELD_NUMBER: _ClassVar[int] + MIN_RATING_FIELD_NUMBER: _ClassVar[int] + MAX_RATING_FIELD_NUMBER: _ClassVar[int] + min_votes: int + max_votes: int + min_year: int + max_year: int + min_rating: float + max_rating: float + def __init__(self, min_votes: _Optional[int] = ..., max_votes: _Optional[int] = ..., min_year: _Optional[int] = ..., max_year: _Optional[int] = ..., min_rating: _Optional[float] = ..., max_rating: _Optional[float] = ...) -> None: ... + +class Weight(_message.Message): + __slots__ = ("year", "rating", "genres", "nconsts") + YEAR_FIELD_NUMBER: _ClassVar[int] + RATING_FIELD_NUMBER: _ClassVar[int] + GENRES_FIELD_NUMBER: _ClassVar[int] + NCONSTS_FIELD_NUMBER: _ClassVar[int] + year: int + rating: int + genres: int + nconsts: int + def __init__(self, year: _Optional[int] = ..., rating: _Optional[int] = ..., genres: _Optional[int] = ..., nconsts: _Optional[int] = ...) -> None: ... + +class Request(_message.Message): + __slots__ = ("tconsts", "n", "filter", "weight") + TCONSTS_FIELD_NUMBER: _ClassVar[int] + N_FIELD_NUMBER: _ClassVar[int] + FILTER_FIELD_NUMBER: _ClassVar[int] + WEIGHT_FIELD_NUMBER: _ClassVar[int] + tconsts: _containers.RepeatedScalarFieldContainer[str] + n: int + filter: Filter + weight: Weight + def __init__(self, tconsts: _Optional[_Iterable[str]] = ..., n: _Optional[int] = ..., filter: _Optional[_Union[Filter, _Mapping]] = ..., weight: _Optional[_Union[Weight, _Mapping]] = ...) -> None: ... + +class Response(_message.Message): + __slots__ = ("movies",) + MOVIES_FIELD_NUMBER: _ClassVar[int] + movies: _containers.RepeatedCompositeFieldContainer[RecommendedMovie] + def __init__(self, movies: _Optional[_Iterable[_Union[RecommendedMovie, _Mapping]]] = ...) -> None: ... + +class RecommendedMovie(_message.Message): + __slots__ = ("tconst", "weights") + TCONST_FIELD_NUMBER: _ClassVar[int] + WEIGHTS_FIELD_NUMBER: _ClassVar[int] + tconst: str + weights: _containers.RepeatedScalarFieldContainer[str] + def __init__(self, tconst: _Optional[str] = ..., weights: _Optional[_Iterable[str]] = ...) -> None: ... diff --git a/recommender/proto/recommender_pb2_grpc.py b/recommender/proto/recommender_pb2_grpc.py new file mode 100644 index 0000000..d6c23b2 --- /dev/null +++ b/recommender/proto/recommender_pb2_grpc.py @@ -0,0 +1,97 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc +import warnings + +import recommender_pb2 as recommender__pb2 + +GRPC_GENERATED_VERSION = '1.67.0' +GRPC_VERSION = grpc.__version__ +_version_not_supported = False + +try: + from grpc._utilities import first_version_is_lower + _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION) +except ImportError: + _version_not_supported = True + +if _version_not_supported: + raise RuntimeError( + f'The grpc package installed is at version {GRPC_VERSION},' + + f' but the generated code in recommender_pb2_grpc.py depends on' + + f' grpcio>={GRPC_GENERATED_VERSION}.' + + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}' + + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.' + ) + + +class RecommenderStub(object): + """Missing associated documentation comment in .proto file.""" + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.GetRecommendations = channel.unary_unary( + '/recommender.Recommender/GetRecommendations', + request_serializer=recommender__pb2.Request.SerializeToString, + response_deserializer=recommender__pb2.Response.FromString, + _registered_method=True) + + +class RecommenderServicer(object): + """Missing associated documentation comment in .proto file.""" + + def GetRecommendations(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_RecommenderServicer_to_server(servicer, server): + rpc_method_handlers = { + 'GetRecommendations': grpc.unary_unary_rpc_method_handler( + servicer.GetRecommendations, + request_deserializer=recommender__pb2.Request.FromString, + response_serializer=recommender__pb2.Response.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + 'recommender.Recommender', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) + server.add_registered_method_handlers('recommender.Recommender', rpc_method_handlers) + + + # This class is part of an EXPERIMENTAL API. +class Recommender(object): + """Missing associated documentation comment in .proto file.""" + + @staticmethod + def GetRecommendations(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/recommender.Recommender/GetRecommendations', + recommender__pb2.Request.SerializeToString, + recommender__pb2.Response.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) diff --git a/recommender/pyproject.toml b/recommender/pyproject.toml new file mode 100644 index 0000000..a51b309 --- /dev/null +++ b/recommender/pyproject.toml @@ -0,0 +1,15 @@ +[project] +name = "movier" +version = "0.1.0" +description = "Movier" +readme = "README.md" +requires-python = "==3.12.3" +dependencies = [ + "grpcio>=1.67.0", + "grpcio-reflection==1.67.0", + "grpcio-tools==1.67.0", + "polars==1.12.0", + "psycopg2-binary==2.9.10", + "scikit-learn==1.5.2", + "grpcio-health-checking==1.67.1", +] diff --git a/recommender/recommend.py b/recommender/recommend.py new file mode 100644 index 0000000..1fcc276 --- /dev/null +++ b/recommender/recommend.py @@ -0,0 +1,441 @@ +from typing import Any +import numpy as np +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.metrics.pairwise import cosine_similarity +import polars as pl +from dataclasses import dataclass + + +@dataclass +class Filter: + min_votes: int = None + max_votes: int = None + min_year: int = None + max_year: int = None + min_rating: float = None + max_rating: float = None + + def __post_init__(self): + if self.min_votes is not None and self.min_votes < 0: + raise ValueError("min_votes should be greater than or equal to 0") + if self.max_votes is not None and self.max_votes < 0: + raise ValueError("max_votes should be greater than or equal to 0") + if self.min_votes is not None and self.max_votes is not None and self.min_votes > self.max_votes: + raise ValueError("min_votes should be less than or equal to max_votes") + + if self.min_year is not None and self.min_year < 0: + raise ValueError("min_year should be greater than or equal to 0") + if self.max_year is not None and self.max_year < 0: + raise ValueError("max_year should be greater than or equal to 0") + if self.min_year is not None and self.max_year is not None and self.min_year > self.max_year: + raise ValueError("min_year should be less than or equal to max_year") + + if self.min_rating is not None and self.min_rating < 0: + raise ValueError("min_rating should be greater than or equal to 0") + if self.max_rating is not None and self.max_rating < 0: + raise ValueError("max_rating should be greater than or equal to 0") + if self.min_rating is not None and self.max_rating is not None and self.min_rating > self.max_rating: + raise ValueError("min_rating should be less than or equal to max_rating") + +@dataclass +class Weight: + year: int = 100 + rating: int = 100 + genres: int = 100 + nconsts: int = 100 + + def __post_init__(self): + total_sum = 0 + total_count = 0 + for k, v in self.__dict__.items(): + if v < 0: + raise ValueError(f'Weight for {k} must be greater than or equal to 0, got {v}') + if v > 0: + total_sum += v + total_count += 1 + + if total_sum < 100: + raise ValueError(f'Total sum of weights must be at least 100, got {total_sum}') + if total_count*100 != total_sum: + raise ValueError(f'Total sum of weights must be {total_count*100}, got {total_sum}') + +class Recommender: + def __init__( + self, + filter_: Filter = Filter(), + weight: Weight = Weight() + ) -> None: + self.filter = filter_ + self.weight = weight + self.sql_where_clause = '' + + self.sql_where_clause = self.add_sql_where_clause(self.sql_where_clause, f"genres != ''") + self.sql_where_clause = self.add_sql_where_clause(self.sql_where_clause, f"nconsts != ''") + + if filter_.min_votes: + self.sql_where_clause = self.add_sql_where_clause(self.sql_where_clause, f'votes >= {filter_.min_votes}') + if filter_.max_votes: + self.sql_where_clause = self.add_sql_where_clause(self.sql_where_clause, f'votes <= {filter_.max_votes}') + if filter_.min_year: + self.sql_where_clause = self.add_sql_where_clause(self.sql_where_clause, f'year >= {filter_.min_year}') + if filter_.max_year: + self.sql_where_clause = self.add_sql_where_clause(self.sql_where_clause, f'year <= {filter_.max_year}') + if filter_.min_rating: + self.sql_where_clause = self.add_sql_where_clause(self.sql_where_clause, f'rating >= {filter_.min_rating}') + if filter_.max_rating: + self.sql_where_clause = self.add_sql_where_clause(self.sql_where_clause, f'rating <= {filter_.max_rating}') + + def add_sql_where_clause(self, old: str, new: str) -> None: + return f'WHERE {new}' if old == '' else f'{old} AND {new}' + + def get_ordered_year_from_sql(self, conn, reference_year: int) -> pl.DataFrame: + """ + Args + ---- + conn: psycopg2 connection object + reference_year: int - year to sort by closest + + Returns + ------- + DataFrame: + First sorted by closest year, then by number of votes (descending). + | year_index (uint32) | tconst (str) | + | --- | --- | + | 0 | tt0000001 | + | 1 | tt0000002 | + | 2 | tt0000003 | + | ... | ... | + """ + return pl.read_database( + f""" + SELECT tconst + FROM imdb + {self.sql_where_clause} + ORDER BY ABS(year - {reference_year}), votes DESC + """, + conn, schema_overrides={'tconst': str} + ).with_row_index('year_index') + + def get_ordered_rating_from_sql(self, conn, reference_rating: int) -> pl.DataFrame: + """ + Args + ---- + conn: psycopg2 connection object + reference_rating: int - rating to sort by closest + + Returns + ------- + DataFrame: + First sorted by closest rating, then by number of votes (descending). + | rating_index (uint32) | tconst (str) | + | --- | --- | + | 0 | tt0000001 | + | 1 | tt0000002 | + | 2 | tt0000003 | + | ... | ... | + """ + return pl.read_database( + f""" + SELECT tconst + FROM imdb + {self.sql_where_clause} + ORDER BY ABS(rating - {reference_rating}), votes DESC + """, + conn, schema_overrides={'tconst': str} + ).with_row_index('rating_index') + + def get_ordered_genres_from_df(self, df: pl.DataFrame, reference_genres: str) -> pl.DataFrame: + """ + Args + ---- + df: DataFrame + | tconst (str) | genres (str) | votes (uint32) | + | --- | --- | --- | + | tt0000001 | Drama, Romance | 123 | + | tt0000002 | Comedy, Drama | 456 | + | tt0000003 | Action, Drama | 789 | + | ... | ... | ... | + reference_genres: str - genres to calculate cosine similarities + + Returns + ------- + DataFrame: + First sorted by cosine similarities genres (descending) and then by number of votes (descending). + | genres_index (uint32) | tconst (str) | + | --- | --- | + | 0 | tt0000001 | + | 1 | tt0000002 | + | 2 | tt0000003 | + | ... | ... | + """ + df = df.with_row_index('genres_index') + + genres_cv = CountVectorizer(dtype=np.uint8, token_pattern=r"(?u)[\w'-]+") + genres_count_matrix = genres_cv.fit_transform(df['genres']) + + genres_sims = cosine_similarity(genres_cv.transform([reference_genres]), genres_count_matrix)[0] + + return pl.DataFrame( + { + 'tconst': df['tconst'], + 'cosine_similarity': genres_sims, + 'votes': df['votes'] + }, schema={'tconst': str, 'cosine_similarity': pl.Float32, 'votes': pl.UInt32} + ).\ + sort(['cosine_similarity', 'votes'], descending=True).\ + drop(['cosine_similarity', 'votes']).\ + with_row_index('genres_index') + + def get_ordered_nconsts_from_df(self, df: pl.DataFrame, reference_nconsts: str) -> pl.DataFrame: + """ + Args + ---- + df: DataFrame + | tconst (str) | nconsts (str) | votes (uint32) | + | --- | --- | --- | + | tt0000001 | nm0000001, nm0000002 | 123 | + | tt0000002 | nm0000001, nm0000003 | 456 | + | tt0000003 | nm0000004, nm0000002 | 789 | + | ... | ... | ... | + reference_nconsts: str - nconsts to calculate cosine similarities + + Returns + ------- + df: DataFrame + First sorted by cosine similarities of nconsts (descending) and then by number of votes (descending). + | nconsts_index (uint32) | tconst (str) | + | --- | --- | + | 0 | tt0000001 | + | 1 | tt0000002 | + | 2 | tt0000003 | + | ... | ... | + """ + df = df.with_row_index('nconsts_index') + + nconsts_cv = CountVectorizer(dtype=np.uint8, token_pattern=r"(?u)[\w'-]+") + nconsts_count_matrix = nconsts_cv.fit_transform(df['nconsts']) + + nconsts_sims = cosine_similarity(nconsts_cv.transform([reference_nconsts]), nconsts_count_matrix)[0] + + return pl.DataFrame( + { + 'tconst': df['tconst'], + 'cosine_similarity': nconsts_sims, + 'votes': df['votes'] + }, schema={'tconst': str, 'cosine_similarity': pl.Float32, 'votes': pl.UInt32} + ).\ + sort(['cosine_similarity', 'votes'], descending=True).\ + drop(['cosine_similarity', 'votes']).\ + with_row_index('nconsts_index') + + def get_main_df(self, conn) -> pl.DataFrame: + """ + Args + ---- + conn: psycopg2 connection object + + Returns + ------- + DataFrame: + | tconst (str) | genres (str) | nconsts (str) | votes (uint32) | + | --- | --- | --- | --- | + | tt0000001 | Drama, Romance | nm0000001, nm0000002 | 123 | + | tt0000002 | Comedy, Drama | nm0000001, nm0000003 | 456 | + | tt0000003 | Action, Drama | nm0000004, nm0000002 | 789 | + | ... | ... | ... | ... | + """ + return pl.read_database( + f""" + SELECT tconst, genres, nconsts, votes + FROM imdb + {self.sql_where_clause} + """, conn, schema_overrides={'tconst': str, 'genres': str, 'nconsts': str, 'votes': pl.UInt32} + ) + + def get_row_by_tconst(self, conn, tconst: str) -> dict[str, Any]: + """ + Args + ---- + conn: psycopg2 connection object + tconst: str - tconst to get row from database + + Returns + ------- + dict: row from database + { + 'tconst': str, + 'year': int, + 'genres': str, + 'nconsts': str, + 'rating': float, + 'votes': int + } + + Raises + ------ + ValueError: if tconst is not found in database + """ + with conn.cursor() as cursor: + cursor.execute( + f""" + SELECT tconst, year, genres, nconsts, rating, votes + FROM imdb + WHERE tconst = '{tconst}' + """ + ) + row = cursor.fetchone() + if row is None: + raise ValueError(f"tconst '{tconst}' not found") + return {cursor.description[i][0]: value for i, value in enumerate(row)} + + def set_average(self, column_name: str, features: list[str], merged_df: pl.DataFrame) -> pl.DataFrame: + """ + Args + ---- + column_name: str - name of the column to store the average + features: list[str] - list of features to calculate the average + merged_df: DataFrame - merged DataFrame of all features + + Returns + ------- + DataFrame: Same DataFrame with the argument column_name added to it with the average of all features + """ + average = merged_df[f'{features[0]}_index'] * self.weight.__getattribute__(features[0]) + for feature in features[1:]: + average += merged_df[f'{feature}_index'] * self.weight.__getattribute__(feature) + + return merged_df.with_columns(**{column_name: (average / (len(features) * 100))}) + + def get_single_recommendation(self, conn, tconst: str, features: list[str]) -> pl.DataFrame: + """ + Args + ---- + conn: psycopg2 connection object + tconst: str - tconst to get recommendations + features: list[str] - list of features to calculate the average + + Returns + ------- + DataFrame: DataFrame with the average of all features + + Raises + ------ + ValueError: if no recommendations found + """ + reference_row = self.get_row_by_tconst(conn, tconst) + trained: dict[str, pl.DataFrame] = {} + + if 'year' in features: + df = self.get_ordered_year_from_sql(conn, reference_year=reference_row['year']) + if len(df) > 0: + trained['year'] = df + if 'rating' in features: + df = self.get_ordered_rating_from_sql(conn, reference_rating=reference_row['rating']) + if len(df) > 0: + trained['rating'] = df + if 'genres' in features or 'nconsts' in features: + main_df = self.get_main_df(conn) + if len(main_df) > 0: + if 'genres' in features: + trained['genres'] = self.get_ordered_genres_from_df( + pl.DataFrame( + { + 'tconst': main_df['tconst'], + 'genres': main_df['genres'], + 'votes': main_df['votes'] + } + ), reference_genres=reference_row['genres'] + ) + if 'nconsts' in features: + trained['nconsts'] = self.get_ordered_nconsts_from_df( + pl.DataFrame( + { + 'tconst': main_df['tconst'], + 'nconsts': main_df['nconsts'], + 'votes': main_df['votes'] + } + ), reference_nconsts=reference_row['nconsts'] + ) + + if len(trained) == 0: + raise ValueError("No recommendations found, try changing the filter or weight") + if len(features) > 1: + merged = pl.concat(trained.values(), how='align') + return self.set_average( + "average", features=features, merged_df=merged + ) + else: + trained_df = trained[features[0]] + return trained_df.with_columns( + average=trained_df[f'{features[0]}_index'] + ) + + def get_recommendations(self, conn, tconsts: list[str], n: int = 5) -> dict[str, list[str]]: + """ + Args + ---- + conn: psycopg2 connection object + tconsts: list[str] - list of tconsts to get recommendations + n: int - number of recommendations to get + + Returns + ------- + list[dict[str, list[str]]]: list of dictionaries with tconst (ascending) + as key and list of weights of columns as value (ascending) + """ + self.sql_where_clause = self.add_sql_where_clause( + self.sql_where_clause, + f"tconst NOT IN ({', '.join(f"'{tconst}'" for tconst in tconsts)})" + ) + + features: list[str] = [] + if self.weight.year > 0: + features.append('year') + if self.weight.rating > 0: + features.append('rating') + if self.weight.genres > 0: + features.append('genres') + if self.weight.nconsts > 0: + features.append('nconsts') + + if len(tconsts) == 1: + merged_df = self.get_single_recommendation(conn, tconsts[0], features).sort('average')[:n] + + responses: dict[str, list[str]] = dict() + for row in merged_df.rows(named=True): + row.pop('average') + t: str = row.pop('tconst') + for f in features: + row[f] = row[f"{f}_index"] / self.weight.__getattribute__(f) + row.pop(f"{f}_index") + weights: list[str] = [column for column, _ in sorted(row.items(), key=lambda item: item[1])] + responses[t] = weights + + return responses + else: + trained_dfs: dict[str, pl.DataFrame] = {} + for tconst in tconsts: + df = self.get_single_recommendation(conn, tconst, features) + trained_dfs[tconst] = pl.DataFrame({ + 'tconst': df['tconst'], + f"{tconst}_average": df['average'] + }) + + merged_df: pl.DataFrame = pl.concat(trained_dfs.values(), how='align') + + all_average = merged_df[f"{tconsts[0]}_average"] + for tconst in tconsts[1:]: + all_average += merged_df[f"{tconst}_average"] + merged_df = merged_df.with_columns(all_average=all_average / len(tconsts)).sort('all_average')[:n] + + responses: dict[str, list[str]] = dict() + for row in merged_df.rows(named=True): + row.pop('all_average') + curretn_tconst: str = row.pop('tconst') + for tconst in tconsts: + row[tconst] = row[f"{tconst}_average"] + row.pop(f"{tconst}_average") + weights: list[str] = [column for column, _ in sorted(row.items(), key=lambda item: item[1])] + responses[curretn_tconst] = weights + + return responses diff --git a/recommender/uv.lock b/recommender/uv.lock new file mode 100644 index 0000000..f55ba3b --- /dev/null +++ b/recommender/uv.lock @@ -0,0 +1,283 @@ +version = 1 +requires-python = "==3.12.3" + +[[package]] +name = "grpcio" +version = "1.67.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/53/d9282a66a5db45981499190b77790570617a604a38f3d103d0400974aeb5/grpcio-1.67.1.tar.gz", hash = "sha256:3dc2ed4cabea4dc14d5e708c2b426205956077cc5de419b4d4079315017e9732", size = 12580022 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/25/6f95bd18d5f506364379eabc0d5874873cc7dbdaf0757df8d1e82bc07a88/grpcio-1.67.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:267d1745894200e4c604958da5f856da6293f063327cb049a51fe67348e4f953", size = 5089809 }, + { url = "https://files.pythonhosted.org/packages/10/3f/d79e32e5d0354be33a12db2267c66d3cfeff700dd5ccdd09fd44a3ff4fb6/grpcio-1.67.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:85f69fdc1d28ce7cff8de3f9c67db2b0ca9ba4449644488c1e0303c146135ddb", size = 10981985 }, + { url = "https://files.pythonhosted.org/packages/21/f2/36fbc14b3542e3a1c20fb98bd60c4732c55a44e374a4eb68f91f28f14aab/grpcio-1.67.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:f26b0b547eb8d00e195274cdfc63ce64c8fc2d3e2d00b12bf468ece41a0423a0", size = 5588770 }, + { url = "https://files.pythonhosted.org/packages/0d/af/bbc1305df60c4e65de8c12820a942b5e37f9cf684ef5e49a63fbb1476a73/grpcio-1.67.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4422581cdc628f77302270ff839a44f4c24fdc57887dc2a45b7e53d8fc2376af", size = 6214476 }, + { url = "https://files.pythonhosted.org/packages/92/cf/1d4c3e93efa93223e06a5c83ac27e32935f998bc368e276ef858b8883154/grpcio-1.67.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d7616d2ded471231c701489190379e0c311ee0a6c756f3c03e6a62b95a7146e", size = 5850129 }, + { url = "https://files.pythonhosted.org/packages/ae/ca/26195b66cb253ac4d5ef59846e354d335c9581dba891624011da0e95d67b/grpcio-1.67.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8a00efecde9d6fcc3ab00c13f816313c040a28450e5e25739c24f432fc6d3c75", size = 6568489 }, + { url = "https://files.pythonhosted.org/packages/d1/94/16550ad6b3f13b96f0856ee5dfc2554efac28539ee84a51d7b14526da985/grpcio-1.67.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:699e964923b70f3101393710793289e42845791ea07565654ada0969522d0a38", size = 6149369 }, + { url = "https://files.pythonhosted.org/packages/33/0d/4c3b2587e8ad7f121b597329e6c2620374fccbc2e4e1aa3c73ccc670fde4/grpcio-1.67.1-cp312-cp312-win32.whl", hash = "sha256:4e7b904484a634a0fff132958dabdb10d63e0927398273917da3ee103e8d1f78", size = 3599176 }, + { url = "https://files.pythonhosted.org/packages/7d/36/0c03e2d80db69e2472cf81c6123aa7d14741de7cf790117291a703ae6ae1/grpcio-1.67.1-cp312-cp312-win_amd64.whl", hash = "sha256:5721e66a594a6c4204458004852719b38f3d5522082be9061d6510b455c90afc", size = 4346574 }, + { url = "https://files.pythonhosted.org/packages/12/d2/2f032b7a153c7723ea3dea08bffa4bcaca9e0e5bdf643ce565b76da87461/grpcio-1.67.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:aa0162e56fd10a5547fac8774c4899fc3e18c1aa4a4759d0ce2cd00d3696ea6b", size = 5091487 }, + { url = "https://files.pythonhosted.org/packages/d0/ae/ea2ff6bd2475a082eb97db1104a903cf5fc57c88c87c10b3c3f41a184fc0/grpcio-1.67.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:beee96c8c0b1a75d556fe57b92b58b4347c77a65781ee2ac749d550f2a365dc1", size = 10943530 }, + { url = "https://files.pythonhosted.org/packages/07/62/646be83d1a78edf8d69b56647327c9afc223e3140a744c59b25fbb279c3b/grpcio-1.67.1-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:a93deda571a1bf94ec1f6fcda2872dad3ae538700d94dc283c672a3b508ba3af", size = 5589079 }, + { url = "https://files.pythonhosted.org/packages/d0/25/71513d0a1b2072ce80d7f5909a93596b7ed10348b2ea4fdcbad23f6017bf/grpcio-1.67.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e6f255980afef598a9e64a24efce87b625e3e3c80a45162d111a461a9f92955", size = 6213542 }, + { url = "https://files.pythonhosted.org/packages/76/9a/d21236297111052dcb5dc85cd77dc7bf25ba67a0f55ae028b2af19a704bc/grpcio-1.67.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e838cad2176ebd5d4a8bb03955138d6589ce9e2ce5d51c3ada34396dbd2dba8", size = 5850211 }, + { url = "https://files.pythonhosted.org/packages/2d/fe/70b1da9037f5055be14f359026c238821b9bcf6ca38a8d760f59a589aacd/grpcio-1.67.1-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:a6703916c43b1d468d0756c8077b12017a9fcb6a1ef13faf49e67d20d7ebda62", size = 6572129 }, + { url = "https://files.pythonhosted.org/packages/74/0d/7df509a2cd2a54814598caf2fb759f3e0b93764431ff410f2175a6efb9e4/grpcio-1.67.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:917e8d8994eed1d86b907ba2a61b9f0aef27a2155bca6cbb322430fc7135b7bb", size = 6149819 }, + { url = "https://files.pythonhosted.org/packages/0a/08/bc3b0155600898fd10f16b79054e1cca6cb644fa3c250c0fe59385df5e6f/grpcio-1.67.1-cp313-cp313-win32.whl", hash = "sha256:e279330bef1744040db8fc432becc8a727b84f456ab62b744d3fdb83f327e121", size = 3596561 }, + { url = "https://files.pythonhosted.org/packages/5a/96/44759eca966720d0f3e1b105c43f8ad4590c97bf8eb3cd489656e9590baa/grpcio-1.67.1-cp313-cp313-win_amd64.whl", hash = "sha256:fa0c739ad8b1996bd24823950e3cb5152ae91fca1c09cc791190bf1627ffefba", size = 4346042 }, +] + +[[package]] +name = "grpcio-health-checking" +version = "1.67.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "grpcio" }, + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/64/dd/e3b339fa44dc75b501a1a22cb88f1af5b1f8c964488f19c4de4cfbbf05ba/grpcio_health_checking-1.67.1.tar.gz", hash = "sha256:ca90fa76a6afbb4fda71d734cb9767819bba14928b91e308cffbb0c311eb941e", size = 16775 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/8d/7a9878dca6616b48093d71c52d0bc79cb2dd1a2698ff6f5ce7406306de12/grpcio_health_checking-1.67.1-py3-none-any.whl", hash = "sha256:93753da5062152660aef2286c9b261e07dd87124a65e4dc9fbd47d1ce966b39d", size = 18924 }, +] + +[[package]] +name = "grpcio-reflection" +version = "1.67.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "grpcio" }, + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fe/69/08a7c3c3524e3af650d22bd8e0015e35bc284eae919fbb38bc2702809d07/grpcio_reflection-1.67.0.tar.gz", hash = "sha256:c47143738b1897b6ce4af5e0e338c85c9aee5fdcbb3355d368a8dcae46d8933c", size = 18818 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9c/cf/343839a5800c38a0231b78708e84db1fa4797751cb44892093ef8b215377/grpcio_reflection-1.67.0-py3-none-any.whl", hash = "sha256:2a2f6d865adecd8d5f81b1a6858252d61b1897997f0656c73807045c5e79c421", size = 22691 }, +] + +[[package]] +name = "grpcio-tools" +version = "1.67.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "grpcio" }, + { name = "protobuf" }, + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e7/f8/62e15867651b72f6f95313e21d81f5f1c210b69a4cc664aecf52ec4c8a53/grpcio_tools-1.67.0.tar.gz", hash = "sha256:181b3d4e61b83142c182ec366f3079b0023509743986e54c9465ca38cac255f8", size = 5159163 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d6/b6/57e67c0244db8d7c0c312041293b806bfb1c9d66c26159e6faf39cc10356/grpcio_tools-1.67.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:dca7f053cbdb26a587d4410ddb893877c585fb60a31f22fdd128e4f7c4dab27c", size = 2307646 }, + { url = "https://files.pythonhosted.org/packages/52/43/837f08b85b04ac225aebe1d7da1a7a79fc313f231306c865b5112cef7dc4/grpcio_tools-1.67.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:de8c4f68ffa690769d84329c17c7fdd5fbe4c61b8f8a0de03f1ad8ef8bb06963", size = 5525447 }, + { url = "https://files.pythonhosted.org/packages/3e/5f/adb8b87f5c403ba53529b6645184beddfa63abf2c524a6dabaa430e6bab3/grpcio_tools-1.67.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:6e4ecb24c27a78f09fead45d4ed873805d6026124ccb6793b6fb93a490b78ddf", size = 2281767 }, + { url = "https://files.pythonhosted.org/packages/6e/cd/3d6a7971e28b96cb618abb281325517443744ecfe48aa03f27a17cd5d4e1/grpcio_tools-1.67.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:004d6ef1b5f724480f05c0bdc904bf8c78c43d633c537d99abe51b52ce0cadeb", size = 2617363 }, + { url = "https://files.pythonhosted.org/packages/2d/a9/b8f1eae3db0f1b6f9548bd2032f48cb6f1ec9bc6781436d52dff4b352fab/grpcio_tools-1.67.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9dd257072c86eb9b36791b3674a513a215ba76bbdd38fc228f0e8c6dc5ce3524", size = 2415322 }, + { url = "https://files.pythonhosted.org/packages/9b/fc/0045bf2e5c97a5ffe0ff2c9a4e4a62894402e8d7094162c2084a809c9d1c/grpcio_tools-1.67.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:a8cca551317ed26e17d13b6ee27b2bd62f5fe9b3842b4e88389deb984f995848", size = 3225044 }, + { url = "https://files.pythonhosted.org/packages/dc/73/eaf40958dd648dd98a0fbd30df2b51c5beb7ee24127c1f0bb99ea44fd435/grpcio_tools-1.67.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a7ac3b4f837c693142f6688b629d1f6408f6ab250d927159b572555f5339fe25", size = 2870418 }, + { url = "https://files.pythonhosted.org/packages/b4/77/e307e91816123444ff657bbae2269cb912f31a9390118ed371bde9d0c1f3/grpcio_tools-1.67.0-cp312-cp312-win32.whl", hash = "sha256:95feec33388e2a8f72c360a68efe6f0bfed9c771e94d21b7f2359d0010f60219", size = 940540 }, + { url = "https://files.pythonhosted.org/packages/be/2a/0c1a64e88fbc17235b68d3178be6cf4a69aea5bd1deed683c0bbd2f5e9f9/grpcio_tools-1.67.0-cp312-cp312-win_amd64.whl", hash = "sha256:50a31d035193ebe7154181eac84734e25bdcdb36adba849d3b2adf1c3b0c382b", size = 1090427 }, + { url = "https://files.pythonhosted.org/packages/1e/44/cfe3aa14158d8becffd7823d5147039378d448097fb91ec723ad8b6d60cb/grpcio_tools-1.67.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:9ecb7c2e5da052a3feaeaa83d8f2a946a8feec8a50751b0e6175da982b49ebb1", size = 2307454 }, + { url = "https://files.pythonhosted.org/packages/46/9c/99b345764b355b11f1ea7d160276e9eb9d32a1c77e4bfaa2db3de025f7d2/grpcio_tools-1.67.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:3c52164f2b9d41c6d75464bb45f45737dcb421e92e98d85d94fda100c67a24d8", size = 5518036 }, + { url = "https://files.pythonhosted.org/packages/e8/8d/f1b4378335f39f37f9b440b26e7ecaa19660eef6b438ac7d8c5ad7e96d73/grpcio_tools-1.67.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:471f58b919767290260d427dd9b760796e6208ee5fcda2f76bb8bd585ff842ec", size = 2281084 }, + { url = "https://files.pythonhosted.org/packages/27/ec/c31e5ec4d01f17e38fce03dfc3e47880bc25e1dd681fffe00fab04e21e33/grpcio_tools-1.67.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:72c6bcdf38f672721c093c92b1fb1f9a02a365acc5bd42e1c69fe6e904b26081", size = 2616930 }, + { url = "https://files.pythonhosted.org/packages/b5/2b/82f3451ad9471ba946ca0e2ff43dc3269030d5e963d86d1cfe199e07dc38/grpcio_tools-1.67.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:833b1eb9c03d28a798294523f75294055eff78fa897adf797876337b901afeb9", size = 2414635 }, + { url = "https://files.pythonhosted.org/packages/89/59/2811cb32947f21075c0b53a87aa652653154e6db3f03766e29b1f80a4bb6/grpcio_tools-1.67.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:1db92ad6ade1946fc5705eb04956fcfdb3a0a4682de8dc3fce31cb97b6e4fcb8", size = 3224330 }, + { url = "https://files.pythonhosted.org/packages/3f/25/dde7cef6e639dae24e93f4385e689f554a1d2a531d5703786a6b7b8366f3/grpcio_tools-1.67.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:38128310ded818e1044c0cd0979d76f7c0d3c3946a526a8aa39cd258624c3bf3", size = 2869633 }, + { url = "https://files.pythonhosted.org/packages/6f/dc/313bbdc01e4bd062d1e86cf667d81338670b9f44afa81a7b4e5ebf566ff4/grpcio_tools-1.67.0-cp313-cp313-win32.whl", hash = "sha256:db57930dc20ab678311727883bdb9f122daf06c14f3fd3067c9ccedb7eb056c3", size = 939997 }, + { url = "https://files.pythonhosted.org/packages/9e/07/5227eb621973b6afe7e6b3d4c637ed14069b7f5f7f45cc804c59df791304/grpcio_tools-1.67.0-cp313-cp313-win_amd64.whl", hash = "sha256:7de44d8d3bb920a4973a559f2950d03382fa4aed4880306416ffa73d24838477", size = 1089819 }, +] + +[[package]] +name = "joblib" +version = "1.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/64/33/60135848598c076ce4b231e1b1895170f45fbcaeaa2c9d5e38b04db70c35/joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e", size = 2116621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/29/df4b9b42f2be0b623cbd5e2140cafcaa2bef0759a00b7b70104dcfe2fb51/joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6", size = 301817 }, +] + +[[package]] +name = "movier" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "grpcio" }, + { name = "grpcio-health-checking" }, + { name = "grpcio-reflection" }, + { name = "grpcio-tools" }, + { name = "polars" }, + { name = "psycopg2-binary" }, + { name = "scikit-learn" }, +] + +[package.metadata] +requires-dist = [ + { name = "grpcio", specifier = ">=1.67.0" }, + { name = "grpcio-health-checking", specifier = "==1.67.1" }, + { name = "grpcio-reflection", specifier = "==1.67.0" }, + { name = "grpcio-tools", specifier = "==1.67.0" }, + { name = "polars", specifier = "==1.12.0" }, + { name = "psycopg2-binary", specifier = "==2.9.10" }, + { name = "scikit-learn", specifier = "==1.5.2" }, +] + +[[package]] +name = "numpy" +version = "2.1.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/25/ca/1166b75c21abd1da445b97bf1fa2f14f423c6cfb4fc7c4ef31dccf9f6a94/numpy-2.1.3.tar.gz", hash = "sha256:aa08e04e08aaf974d4458def539dece0d28146d866a39da5639596f4921fd761", size = 20166090 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/f0/385eb9970309643cbca4fc6eebc8bb16e560de129c91258dfaa18498da8b/numpy-2.1.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f55ba01150f52b1027829b50d70ef1dafd9821ea82905b63936668403c3b471e", size = 20849658 }, + { url = "https://files.pythonhosted.org/packages/54/4a/765b4607f0fecbb239638d610d04ec0a0ded9b4951c56dc68cef79026abf/numpy-2.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13138eadd4f4da03074851a698ffa7e405f41a0845a6b1ad135b81596e4e9958", size = 13492258 }, + { url = "https://files.pythonhosted.org/packages/bd/a7/2332679479c70b68dccbf4a8eb9c9b5ee383164b161bee9284ac141fbd33/numpy-2.1.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a6b46587b14b888e95e4a24d7b13ae91fa22386c199ee7b418f449032b2fa3b8", size = 5090249 }, + { url = "https://files.pythonhosted.org/packages/c1/67/4aa00316b3b981a822c7a239d3a8135be2a6945d1fd11d0efb25d361711a/numpy-2.1.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:0fa14563cc46422e99daef53d725d0c326e99e468a9320a240affffe87852564", size = 6621704 }, + { url = "https://files.pythonhosted.org/packages/5e/da/1a429ae58b3b6c364eeec93bf044c532f2ff7b48a52e41050896cf15d5b1/numpy-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8637dcd2caa676e475503d1f8fdb327bc495554e10838019651b76d17b98e512", size = 13606089 }, + { url = "https://files.pythonhosted.org/packages/9e/3e/3757f304c704f2f0294a6b8340fcf2be244038be07da4cccf390fa678a9f/numpy-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2312b2aa89e1f43ecea6da6ea9a810d06aae08321609d8dc0d0eda6d946a541b", size = 16043185 }, + { url = "https://files.pythonhosted.org/packages/43/97/75329c28fea3113d00c8d2daf9bc5828d58d78ed661d8e05e234f86f0f6d/numpy-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a38c19106902bb19351b83802531fea19dee18e5b37b36454f27f11ff956f7fc", size = 16410751 }, + { url = "https://files.pythonhosted.org/packages/ad/7a/442965e98b34e0ae9da319f075b387bcb9a1e0658276cc63adb8c9686f7b/numpy-2.1.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:02135ade8b8a84011cbb67dc44e07c58f28575cf9ecf8ab304e51c05528c19f0", size = 14082705 }, + { url = "https://files.pythonhosted.org/packages/ac/b6/26108cf2cfa5c7e03fb969b595c93131eab4a399762b51ce9ebec2332e80/numpy-2.1.3-cp312-cp312-win32.whl", hash = "sha256:e6988e90fcf617da2b5c78902fe8e668361b43b4fe26dbf2d7b0f8034d4cafb9", size = 6239077 }, + { url = "https://files.pythonhosted.org/packages/a6/84/fa11dad3404b7634aaab50733581ce11e5350383311ea7a7010f464c0170/numpy-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:0d30c543f02e84e92c4b1f415b7c6b5326cbe45ee7882b6b77db7195fb971e3a", size = 12566858 }, + { url = "https://files.pythonhosted.org/packages/4d/0b/620591441457e25f3404c8057eb924d04f161244cb8a3680d529419aa86e/numpy-2.1.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96fe52fcdb9345b7cd82ecd34547fca4321f7656d500eca497eb7ea5a926692f", size = 20836263 }, + { url = "https://files.pythonhosted.org/packages/45/e1/210b2d8b31ce9119145433e6ea78046e30771de3fe353f313b2778142f34/numpy-2.1.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f653490b33e9c3a4c1c01d41bc2aef08f9475af51146e4a7710c450cf9761598", size = 13507771 }, + { url = "https://files.pythonhosted.org/packages/55/44/aa9ee3caee02fa5a45f2c3b95cafe59c44e4b278fbbf895a93e88b308555/numpy-2.1.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:dc258a761a16daa791081d026f0ed4399b582712e6fc887a95af09df10c5ca57", size = 5075805 }, + { url = "https://files.pythonhosted.org/packages/78/d6/61de6e7e31915ba4d87bbe1ae859e83e6582ea14c6add07c8f7eefd8488f/numpy-2.1.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:016d0f6f5e77b0f0d45d77387ffa4bb89816b57c835580c3ce8e099ef830befe", size = 6608380 }, + { url = "https://files.pythonhosted.org/packages/3e/46/48bdf9b7241e317e6cf94276fe11ba673c06d1fdf115d8b4ebf616affd1a/numpy-2.1.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c181ba05ce8299c7aa3125c27b9c2167bca4a4445b7ce73d5febc411ca692e43", size = 13602451 }, + { url = "https://files.pythonhosted.org/packages/70/50/73f9a5aa0810cdccda9c1d20be3cbe4a4d6ea6bfd6931464a44c95eef731/numpy-2.1.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5641516794ca9e5f8a4d17bb45446998c6554704d888f86df9b200e66bdcce56", size = 16039822 }, + { url = "https://files.pythonhosted.org/packages/ad/cd/098bc1d5a5bc5307cfc65ee9369d0ca658ed88fbd7307b0d49fab6ca5fa5/numpy-2.1.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ea4dedd6e394a9c180b33c2c872b92f7ce0f8e7ad93e9585312b0c5a04777a4a", size = 16411822 }, + { url = "https://files.pythonhosted.org/packages/83/a2/7d4467a2a6d984549053b37945620209e702cf96a8bc658bc04bba13c9e2/numpy-2.1.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0df3635b9c8ef48bd3be5f862cf71b0a4716fa0e702155c45067c6b711ddcef", size = 14079598 }, + { url = "https://files.pythonhosted.org/packages/e9/6a/d64514dcecb2ee70bfdfad10c42b76cab657e7ee31944ff7a600f141d9e9/numpy-2.1.3-cp313-cp313-win32.whl", hash = "sha256:50ca6aba6e163363f132b5c101ba078b8cbd3fa92c7865fd7d4d62d9779ac29f", size = 6236021 }, + { url = "https://files.pythonhosted.org/packages/bb/f9/12297ed8d8301a401e7d8eb6b418d32547f1d700ed3c038d325a605421a4/numpy-2.1.3-cp313-cp313-win_amd64.whl", hash = "sha256:747641635d3d44bcb380d950679462fae44f54b131be347d5ec2bce47d3df9ed", size = 12560405 }, + { url = "https://files.pythonhosted.org/packages/a7/45/7f9244cd792e163b334e3a7f02dff1239d2890b6f37ebf9e82cbe17debc0/numpy-2.1.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:996bb9399059c5b82f76b53ff8bb686069c05acc94656bb259b1d63d04a9506f", size = 20859062 }, + { url = "https://files.pythonhosted.org/packages/b1/b4/a084218e7e92b506d634105b13e27a3a6645312b93e1c699cc9025adb0e1/numpy-2.1.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:45966d859916ad02b779706bb43b954281db43e185015df6eb3323120188f9e4", size = 13515839 }, + { url = "https://files.pythonhosted.org/packages/27/45/58ed3f88028dcf80e6ea580311dc3edefdd94248f5770deb980500ef85dd/numpy-2.1.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:baed7e8d7481bfe0874b566850cb0b85243e982388b7b23348c6db2ee2b2ae8e", size = 5116031 }, + { url = "https://files.pythonhosted.org/packages/37/a8/eb689432eb977d83229094b58b0f53249d2209742f7de529c49d61a124a0/numpy-2.1.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a9f7f672a3388133335589cfca93ed468509cb7b93ba3105fce780d04a6576a0", size = 6629977 }, + { url = "https://files.pythonhosted.org/packages/42/a3/5355ad51ac73c23334c7caaed01adadfda49544f646fcbfbb4331deb267b/numpy-2.1.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7aac50327da5d208db2eec22eb11e491e3fe13d22653dce51b0f4109101b408", size = 13575951 }, + { url = "https://files.pythonhosted.org/packages/c4/70/ea9646d203104e647988cb7d7279f135257a6b7e3354ea6c56f8bafdb095/numpy-2.1.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4394bc0dbd074b7f9b52024832d16e019decebf86caf909d94f6b3f77a8ee3b6", size = 16022655 }, + { url = "https://files.pythonhosted.org/packages/14/ce/7fc0612903e91ff9d0b3f2eda4e18ef9904814afcae5b0f08edb7f637883/numpy-2.1.3-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:50d18c4358a0a8a53f12a8ba9d772ab2d460321e6a93d6064fc22443d189853f", size = 16399902 }, + { url = "https://files.pythonhosted.org/packages/ef/62/1d3204313357591c913c32132a28f09a26357e33ea3c4e2fe81269e0dca1/numpy-2.1.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:14e253bd43fc6b37af4921b10f6add6925878a42a0c5fe83daee390bca80bc17", size = 14067180 }, + { url = "https://files.pythonhosted.org/packages/24/d7/78a40ed1d80e23a774cb8a34ae8a9493ba1b4271dde96e56ccdbab1620ef/numpy-2.1.3-cp313-cp313t-win32.whl", hash = "sha256:08788d27a5fd867a663f6fc753fd7c3ad7e92747efc73c53bca2f19f8bc06f48", size = 6291907 }, + { url = "https://files.pythonhosted.org/packages/86/09/a5ab407bd7f5f5599e6a9261f964ace03a73e7c6928de906981c31c38082/numpy-2.1.3-cp313-cp313t-win_amd64.whl", hash = "sha256:2564fbdf2b99b3f815f2107c1bbc93e2de8ee655a69c261363a1172a79a257d4", size = 12644098 }, +] + +[[package]] +name = "polars" +version = "1.12.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5f/df/55127a3099e990b45ce3a29ab6789a083451e76e7109fb754aad5525360b/polars-1.12.0.tar.gz", hash = "sha256:fb5c92de1a8f7d0a3f923fe48ea89eb518bdf55315ae917012350fa072bd64f4", size = 4090738 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/ae/77c7ec395d9361ae2086693af1947c9a2b21346ba3faf092bb154b735227/polars-1.12.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:8f3c4e4e423c373dda07b4c8a7ff12aa02094b524767d0ca306b1eba67f2d99e", size = 32923786 }, + { url = "https://files.pythonhosted.org/packages/97/1c/60736d5588309eb528c52538e116593cb275310bab82ba28702cd87a76d1/polars-1.12.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:aa6f9862f0cec6353243920d9b8d858c21ec8f25f91af203dea6ff91980e140d", size = 28887255 }, + { url = "https://files.pythonhosted.org/packages/5a/3e/31257118e7e087fa27c230b8fadf8ff15d521140bf58558dc889ee0c9c5e/polars-1.12.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afb03647b5160737d2119532ee8ffe825de1d19d87f81bbbb005131786f7d59b", size = 34126501 }, + { url = "https://files.pythonhosted.org/packages/ad/e6/d03053e6064d262f2ec41172a5092b08fc20d10c059dda6c9460371cfd7e/polars-1.12.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:ea96aba5eb3dab8f0e6abf05ab3fc2136b329261860ef8661d20f5456a2d78e0", size = 30479546 }, + { url = "https://files.pythonhosted.org/packages/d5/28/3d44ddf56a5c95272b202ce8aa0e9b818a1310e83525c4c29176b538ae7c/polars-1.12.0-cp39-abi3-win_amd64.whl", hash = "sha256:a228a4b320a36d03a9ec9dfe7241b6d80a2f119b2dceb1da953166655e4cf43c", size = 33790337 }, +] + +[[package]] +name = "protobuf" +version = "5.28.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/74/6e/e69eb906fddcb38f8530a12f4b410699972ab7ced4e21524ece9d546ac27/protobuf-5.28.3.tar.gz", hash = "sha256:64badbc49180a5e401f373f9ce7ab1d18b63f7dd4a9cdc43c92b9f0b481cef7b", size = 422479 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/c5/05163fad52d7c43e124a545f1372d18266db36036377ad29de4271134a6a/protobuf-5.28.3-cp310-abi3-win32.whl", hash = "sha256:0c4eec6f987338617072592b97943fdbe30d019c56126493111cf24344c1cc24", size = 419624 }, + { url = "https://files.pythonhosted.org/packages/9c/4c/4563ebe001ff30dca9d7ed12e471fa098d9759712980cde1fd03a3a44fb7/protobuf-5.28.3-cp310-abi3-win_amd64.whl", hash = "sha256:91fba8f445723fcf400fdbe9ca796b19d3b1242cd873907979b9ed71e4afe868", size = 431464 }, + { url = "https://files.pythonhosted.org/packages/1c/f2/baf397f3dd1d3e4af7e3f5a0382b868d25ac068eefe1ebde05132333436c/protobuf-5.28.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a3f6857551e53ce35e60b403b8a27b0295f7d6eb63d10484f12bc6879c715687", size = 414743 }, + { url = "https://files.pythonhosted.org/packages/85/50/cd61a358ba1601f40e7d38bcfba22e053f40ef2c50d55b55926aecc8fec7/protobuf-5.28.3-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:3fa2de6b8b29d12c61911505d893afe7320ce7ccba4df913e2971461fa36d584", size = 316511 }, + { url = "https://files.pythonhosted.org/packages/5d/ae/3257b09328c0b4e59535e497b0c7537d4954038bdd53a2f0d2f49d15a7c4/protobuf-5.28.3-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:712319fbdddb46f21abb66cd33cb9e491a5763b2febd8f228251add221981135", size = 316624 }, + { url = "https://files.pythonhosted.org/packages/ad/c3/2377c159e28ea89a91cf1ca223f827ae8deccb2c9c401e5ca233cd73002f/protobuf-5.28.3-py3-none-any.whl", hash = "sha256:cee1757663fa32a1ee673434fcf3bf24dd54763c79690201208bafec62f19eed", size = 169511 }, +] + +[[package]] +name = "psycopg2-binary" +version = "2.9.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cb/0e/bdc8274dc0585090b4e3432267d7be4dfbfd8971c0fa59167c711105a6bf/psycopg2-binary-2.9.10.tar.gz", hash = "sha256:4b3df0e6990aa98acda57d983942eff13d824135fe2250e6522edaa782a06de2", size = 385764 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/7d/465cc9795cf76f6d329efdafca74693714556ea3891813701ac1fee87545/psycopg2_binary-2.9.10-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:880845dfe1f85d9d5f7c412efea7a08946a46894537e4e5d091732eb1d34d9a0", size = 3044771 }, + { url = "https://files.pythonhosted.org/packages/8b/31/6d225b7b641a1a2148e3ed65e1aa74fc86ba3fee850545e27be9e1de893d/psycopg2_binary-2.9.10-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9440fa522a79356aaa482aa4ba500b65f28e5d0e63b801abf6aa152a29bd842a", size = 3275336 }, + { url = "https://files.pythonhosted.org/packages/30/b7/a68c2b4bff1cbb1728e3ec864b2d92327c77ad52edcd27922535a8366f68/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3923c1d9870c49a2d44f795df0c889a22380d36ef92440ff618ec315757e539", size = 2851637 }, + { url = "https://files.pythonhosted.org/packages/0b/b1/cfedc0e0e6f9ad61f8657fd173b2f831ce261c02a08c0b09c652b127d813/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b2c956c028ea5de47ff3a8d6b3cc3330ab45cf0b7c3da35a2d6ff8420896526", size = 3082097 }, + { url = "https://files.pythonhosted.org/packages/18/ed/0a8e4153c9b769f59c02fb5e7914f20f0b2483a19dae7bf2db54b743d0d0/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f758ed67cab30b9a8d2833609513ce4d3bd027641673d4ebc9c067e4d208eec1", size = 3264776 }, + { url = "https://files.pythonhosted.org/packages/10/db/d09da68c6a0cdab41566b74e0a6068a425f077169bed0946559b7348ebe9/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cd9b4f2cfab88ed4a9106192de509464b75a906462fb846b936eabe45c2063e", size = 3020968 }, + { url = "https://files.pythonhosted.org/packages/94/28/4d6f8c255f0dfffb410db2b3f9ac5218d959a66c715c34cac31081e19b95/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dc08420625b5a20b53551c50deae6e231e6371194fa0651dbe0fb206452ae1f", size = 2872334 }, + { url = "https://files.pythonhosted.org/packages/05/f7/20d7bf796593c4fea95e12119d6cc384ff1f6141a24fbb7df5a668d29d29/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d7cd730dfa7c36dbe8724426bf5612798734bff2d3c3857f36f2733f5bfc7c00", size = 2822722 }, + { url = "https://files.pythonhosted.org/packages/4d/e4/0c407ae919ef626dbdb32835a03b6737013c3cc7240169843965cada2bdf/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:155e69561d54d02b3c3209545fb08938e27889ff5a10c19de8d23eb5a41be8a5", size = 2920132 }, + { url = "https://files.pythonhosted.org/packages/2d/70/aa69c9f69cf09a01da224909ff6ce8b68faeef476f00f7ec377e8f03be70/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c3cc28a6fd5a4a26224007712e79b81dbaee2ffb90ff406256158ec4d7b52b47", size = 2959312 }, + { url = "https://files.pythonhosted.org/packages/d3/bd/213e59854fafe87ba47814bf413ace0dcee33a89c8c8c814faca6bc7cf3c/psycopg2_binary-2.9.10-cp312-cp312-win32.whl", hash = "sha256:ec8a77f521a17506a24a5f626cb2aee7850f9b69a0afe704586f63a464f3cd64", size = 1025191 }, + { url = "https://files.pythonhosted.org/packages/92/29/06261ea000e2dc1e22907dbbc483a1093665509ea586b29b8986a0e56733/psycopg2_binary-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:18c5ee682b9c6dd3696dad6e54cc7ff3a1a9020df6a5c0f861ef8bfd338c3ca0", size = 1164031 }, + { url = "https://files.pythonhosted.org/packages/3e/30/d41d3ba765609c0763505d565c4d12d8f3c79793f0d0f044ff5a28bf395b/psycopg2_binary-2.9.10-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:26540d4a9a4e2b096f1ff9cce51253d0504dca5a85872c7f7be23be5a53eb18d", size = 3044699 }, + { url = "https://files.pythonhosted.org/packages/35/44/257ddadec7ef04536ba71af6bc6a75ec05c5343004a7ec93006bee66c0bc/psycopg2_binary-2.9.10-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e217ce4d37667df0bc1c397fdcd8de5e81018ef305aed9415c3b093faaeb10fb", size = 3275245 }, + { url = "https://files.pythonhosted.org/packages/1b/11/48ea1cd11de67f9efd7262085588790a95d9dfcd9b8a687d46caf7305c1a/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:245159e7ab20a71d989da00f280ca57da7641fa2cdcf71749c193cea540a74f7", size = 2851631 }, + { url = "https://files.pythonhosted.org/packages/62/e0/62ce5ee650e6c86719d621a761fe4bc846ab9eff8c1f12b1ed5741bf1c9b/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c4ded1a24b20021ebe677b7b08ad10bf09aac197d6943bfe6fec70ac4e4690d", size = 3082140 }, + { url = "https://files.pythonhosted.org/packages/27/ce/63f946c098611f7be234c0dd7cb1ad68b0b5744d34f68062bb3c5aa510c8/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3abb691ff9e57d4a93355f60d4f4c1dd2d68326c968e7db17ea96df3c023ef73", size = 3264762 }, + { url = "https://files.pythonhosted.org/packages/43/25/c603cd81402e69edf7daa59b1602bd41eb9859e2824b8c0855d748366ac9/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8608c078134f0b3cbd9f89b34bd60a943b23fd33cc5f065e8d5f840061bd0673", size = 3020967 }, + { url = "https://files.pythonhosted.org/packages/5f/d6/8708d8c6fca531057fa170cdde8df870e8b6a9b136e82b361c65e42b841e/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:230eeae2d71594103cd5b93fd29d1ace6420d0b86f4778739cb1a5a32f607d1f", size = 2872326 }, + { url = "https://files.pythonhosted.org/packages/ce/ac/5b1ea50fc08a9df82de7e1771537557f07c2632231bbab652c7e22597908/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909", size = 2822712 }, + { url = "https://files.pythonhosted.org/packages/c4/fc/504d4503b2abc4570fac3ca56eb8fed5e437bf9c9ef13f36b6621db8ef00/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1", size = 2920155 }, + { url = "https://files.pythonhosted.org/packages/b2/d1/323581e9273ad2c0dbd1902f3fb50c441da86e894b6e25a73c3fda32c57e/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567", size = 2959356 }, +] + +[[package]] +name = "scikit-learn" +version = "1.5.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "joblib" }, + { name = "numpy" }, + { name = "scipy" }, + { name = "threadpoolctl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/37/59/44985a2bdc95c74e34fef3d10cb5d93ce13b0e2a7baefffe1b53853b502d/scikit_learn-1.5.2.tar.gz", hash = "sha256:b4237ed7b3fdd0a4882792e68ef2545d5baa50aca3bb45aa7df468138ad8f94d", size = 7001680 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/db/b485c1ac54ff3bd9e7e6b39d3cc6609c4c76a65f52ab0a7b22b6c3ab0e9d/scikit_learn-1.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f932a02c3f4956dfb981391ab24bda1dbd90fe3d628e4b42caef3e041c67707a", size = 12110344 }, + { url = "https://files.pythonhosted.org/packages/54/1a/7deb52fa23aebb855431ad659b3c6a2e1709ece582cb3a63d66905e735fe/scikit_learn-1.5.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3b923d119d65b7bd555c73be5423bf06c0105678ce7e1f558cb4b40b0a5502b1", size = 11033502 }, + { url = "https://files.pythonhosted.org/packages/a1/32/4a7a205b14c11225609b75b28402c196e4396ac754dab6a81971b811781c/scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd", size = 12085794 }, + { url = "https://files.pythonhosted.org/packages/c6/29/044048c5e911373827c0e1d3051321b9183b2a4f8d4e2f11c08fcff83f13/scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6", size = 12945797 }, + { url = "https://files.pythonhosted.org/packages/aa/ce/c0b912f2f31aeb1b756a6ba56bcd84dd1f8a148470526a48515a3f4d48cd/scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1", size = 10985467 }, + { url = "https://files.pythonhosted.org/packages/a4/50/8891028437858cc510e13578fe7046574a60c2aaaa92b02d64aac5b1b412/scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5", size = 12025584 }, + { url = "https://files.pythonhosted.org/packages/d2/79/17feef8a1c14149436083bec0e61d7befb4812e272d5b20f9d79ea3e9ab1/scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908", size = 10959795 }, + { url = "https://files.pythonhosted.org/packages/b1/c8/f08313f9e2e656bd0905930ae8bf99a573ea21c34666a813b749c338202f/scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3", size = 12077302 }, + { url = "https://files.pythonhosted.org/packages/a7/48/fbfb4dc72bed0fe31fe045fb30e924909ad03f717c36694351612973b1a9/scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12", size = 13002811 }, + { url = "https://files.pythonhosted.org/packages/a5/e7/0c869f9e60d225a77af90d2aefa7a4a4c0e745b149325d1450f0f0ce5399/scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f", size = 10951354 }, +] + +[[package]] +name = "scipy" +version = "1.14.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/62/11/4d44a1f274e002784e4dbdb81e0ea96d2de2d1045b2132d5af62cc31fd28/scipy-1.14.1.tar.gz", hash = "sha256:5a275584e726026a5699459aa72f828a610821006228e841b94275c4a7c08417", size = 58620554 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/04/2bdacc8ac6387b15db6faa40295f8bd25eccf33f1f13e68a72dc3c60a99e/scipy-1.14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:631f07b3734d34aced009aaf6fedfd0eb3498a97e581c3b1e5f14a04164a456d", size = 39128781 }, + { url = "https://files.pythonhosted.org/packages/c8/53/35b4d41f5fd42f5781dbd0dd6c05d35ba8aa75c84ecddc7d44756cd8da2e/scipy-1.14.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:af29a935803cc707ab2ed7791c44288a682f9c8107bc00f0eccc4f92c08d6e07", size = 29939542 }, + { url = "https://files.pythonhosted.org/packages/66/67/6ef192e0e4d77b20cc33a01e743b00bc9e68fb83b88e06e636d2619a8767/scipy-1.14.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:2843f2d527d9eebec9a43e6b406fb7266f3af25a751aa91d62ff416f54170bc5", size = 23148375 }, + { url = "https://files.pythonhosted.org/packages/f6/32/3a6dedd51d68eb7b8e7dc7947d5d841bcb699f1bf4463639554986f4d782/scipy-1.14.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:eb58ca0abd96911932f688528977858681a59d61a7ce908ffd355957f7025cfc", size = 25578573 }, + { url = "https://files.pythonhosted.org/packages/f0/5a/efa92a58dc3a2898705f1dc9dbaf390ca7d4fba26d6ab8cfffb0c72f656f/scipy-1.14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30ac8812c1d2aab7131a79ba62933a2a76f582d5dbbc695192453dae67ad6310", size = 35319299 }, + { url = "https://files.pythonhosted.org/packages/8e/ee/8a26858ca517e9c64f84b4c7734b89bda8e63bec85c3d2f432d225bb1886/scipy-1.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f9ea80f2e65bdaa0b7627fb00cbeb2daf163caa015e59b7516395fe3bd1e066", size = 40849331 }, + { url = "https://files.pythonhosted.org/packages/a5/cd/06f72bc9187840f1c99e1a8750aad4216fc7dfdd7df46e6280add14b4822/scipy-1.14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:edaf02b82cd7639db00dbff629995ef185c8df4c3ffa71a5562a595765a06ce1", size = 42544049 }, + { url = "https://files.pythonhosted.org/packages/aa/7d/43ab67228ef98c6b5dd42ab386eae2d7877036970a0d7e3dd3eb47a0d530/scipy-1.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:2ff38e22128e6c03ff73b6bb0f85f897d2362f8c052e3b8ad00532198fbdae3f", size = 44521212 }, + { url = "https://files.pythonhosted.org/packages/50/ef/ac98346db016ff18a6ad7626a35808f37074d25796fd0234c2bb0ed1e054/scipy-1.14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1729560c906963fc8389f6aac023739ff3983e727b1a4d87696b7bf108316a79", size = 39091068 }, + { url = "https://files.pythonhosted.org/packages/b9/cc/70948fe9f393b911b4251e96b55bbdeaa8cca41f37c26fd1df0232933b9e/scipy-1.14.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:4079b90df244709e675cdc8b93bfd8a395d59af40b72e339c2287c91860deb8e", size = 29875417 }, + { url = "https://files.pythonhosted.org/packages/3b/2e/35f549b7d231c1c9f9639f9ef49b815d816bf54dd050da5da1c11517a218/scipy-1.14.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e0cf28db0f24a38b2a0ca33a85a54852586e43cf6fd876365c86e0657cfe7d73", size = 23084508 }, + { url = "https://files.pythonhosted.org/packages/3f/d6/b028e3f3e59fae61fb8c0f450db732c43dd1d836223a589a8be9f6377203/scipy-1.14.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0c2f95de3b04e26f5f3ad5bb05e74ba7f68b837133a4492414b3afd79dfe540e", size = 25503364 }, + { url = "https://files.pythonhosted.org/packages/a7/2f/6c142b352ac15967744d62b165537a965e95d557085db4beab2a11f7943b/scipy-1.14.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b99722ea48b7ea25e8e015e8341ae74624f72e5f21fc2abd45f3a93266de4c5d", size = 35292639 }, + { url = "https://files.pythonhosted.org/packages/56/46/2449e6e51e0d7c3575f289f6acb7f828938eaab8874dbccfeb0cd2b71a27/scipy-1.14.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5149e3fd2d686e42144a093b206aef01932a0059c2a33ddfa67f5f035bdfe13e", size = 40798288 }, + { url = "https://files.pythonhosted.org/packages/32/cd/9d86f7ed7f4497c9fd3e39f8918dd93d9f647ba80d7e34e4946c0c2d1a7c/scipy-1.14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e4f5a7c49323533f9103d4dacf4e4f07078f360743dec7f7596949149efeec06", size = 42524647 }, + { url = "https://files.pythonhosted.org/packages/f5/1b/6ee032251bf4cdb0cc50059374e86a9f076308c1512b61c4e003e241efb7/scipy-1.14.1-cp313-cp313-win_amd64.whl", hash = "sha256:baff393942b550823bfce952bb62270ee17504d02a1801d7fd0719534dfb9c84", size = 44469524 }, +] + +[[package]] +name = "setuptools" +version = "75.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ed/22/a438e0caa4576f8c383fa4d35f1cc01655a46c75be358960d815bfbb12bd/setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686", size = 1351577 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/12/282ee9bce8b58130cb762fbc9beabd531549952cac11fc56add11dcb7ea0/setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd", size = 1251070 }, +] + +[[package]] +name = "threadpoolctl" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bd/55/b5148dcbf72f5cde221f8bfe3b6a540da7aa1842f6b491ad979a6c8b84af/threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107", size = 41936 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467", size = 18414 }, +] diff --git a/server/.dockerignore b/server/.dockerignore new file mode 100644 index 0000000..6320cd2 --- /dev/null +++ b/server/.dockerignore @@ -0,0 +1 @@ +data \ No newline at end of file diff --git a/server/.gitignore b/server/.gitignore new file mode 100644 index 0000000..5fac628 --- /dev/null +++ b/server/.gitignore @@ -0,0 +1 @@ +/data/ \ No newline at end of file diff --git a/server/Dockerfile b/server/Dockerfile new file mode 100644 index 0000000..32493bf --- /dev/null +++ b/server/Dockerfile @@ -0,0 +1,16 @@ +FROM golang:1.23.2-alpine AS builder + +WORKDIR /server + +COPY go.mod go.sum ./ +RUN go mod download +COPY . . + +RUN go build -ldflags "-s -w" -o movier + +FROM alpine:latest + +WORKDIR /server + +COPY --from=builder /server/movier /server/movier +COPY --from=builder /server/pkg/templates /server/pkg/templates \ No newline at end of file diff --git a/server/cmd/download.go b/server/cmd/download.go new file mode 100644 index 0000000..6664e4b --- /dev/null +++ b/server/cmd/download.go @@ -0,0 +1,82 @@ +package cmd + +import ( + "log" + + "github.com/aykhans/movier/server/pkg/config" + "github.com/aykhans/movier/server/pkg/dto" + "github.com/aykhans/movier/server/pkg/utils" + "github.com/spf13/cobra" +) + +func getDownloadCmd() *cobra.Command { + downloadCmd := &cobra.Command{ + Use: "download", + Short: "Movie Data Downloader", + Run: func(cmd *cobra.Command, args []string) { + err := runDownload() + if err != nil { + log.Fatalln(err) + } + }, + } + + return downloadCmd +} + +func runDownload() error { + downloadPath := config.GetDownloadPath() + extractPath := config.GetExtractPath() + err := utils.MakeDirIfNotExist(downloadPath) + if err != nil { + return err + } + err = utils.MakeDirIfNotExist(extractPath) + if err != nil { + return err + } + download(downloadPath, extractPath) + return nil +} + +func download( + downloadPath string, + extractPath string, +) error { + for _, downloadConfig := range config.DownloadConfigs { + extracted, err := utils.IsDirExist(extractPath + "/" + downloadConfig.ExtractName) + if err != nil { + return err + } + if extracted { + log.Printf("File %s already extracted. Skipping...\n\n", downloadConfig.ExtractName) + continue + } + + downloaded, err := utils.IsDirExist(downloadPath + "/" + downloadConfig.DownloadName) + if err != nil { + return err + } + if downloaded { + log.Printf("File %s already downloaded. Extracting...\n\n", downloadConfig.DownloadName) + if err := dto.ExtractGzFile( + downloadPath+"/"+downloadConfig.DownloadName, + extractPath+"/"+downloadConfig.ExtractName, + ); err != nil { + return err + } + continue + } + + log.Printf("Downloading and extracting %s file...\n\n", downloadConfig.DownloadName) + if err := dto.DownloadAndExtractGz( + downloadConfig.URL, + downloadPath+"/"+downloadConfig.DownloadName, + extractPath+"/"+downloadConfig.ExtractName, + ); err != nil { + return err + } + } + + return nil +} diff --git a/server/cmd/filter.go b/server/cmd/filter.go new file mode 100644 index 0000000..a96209f --- /dev/null +++ b/server/cmd/filter.go @@ -0,0 +1,102 @@ +package cmd + +import ( + "fmt" + "log" + "time" + + "github.com/aykhans/movier/server/pkg/config" + "github.com/aykhans/movier/server/pkg/dto" + + "github.com/aykhans/movier/server/pkg/storage/postgresql" + "github.com/aykhans/movier/server/pkg/storage/postgresql/repository" + "github.com/spf13/cobra" +) + +func getFilterCmd() *cobra.Command { + filterCmd := &cobra.Command{ + Use: "filter", + Short: "Movie Data Filter", + Run: func(cmd *cobra.Command, args []string) { + err := runFilter() + if err != nil { + log.Fatalln(err) + } + }, + } + + return filterCmd +} + +func runFilter() error { + generalStartTime := time.Now() + extractedPath := config.GetExtractPath() + + log.Printf("Filtering basics data...\n\n") + startTime := time.Now() + basics, err := dto.FilterBasics(extractedPath + "/title.basics.tsv") + if err != nil { + return err + } + log.Printf("Basics data filtered. Found %d records (%s)\n\n", len(basics), time.Since(startTime)) + + log.Printf("Inserting basics data...\n\n") + postgresURL, err := config.NewPostgresURL() + if err != nil { + return err + } + + db, err := postgresql.NewDB(postgresURL) + if err != nil { + return err + } + imdbRepo := repository.NewIMDbRepository(db) + startTime = time.Now() + err = imdbRepo.InsertMultipleBasics(basics) + if err != nil { + return err + } + log.Printf("Basics data inserted. (%s)\n\n", time.Since(startTime)) + + log.Printf("Filtering principals data...\n\n") + tconsts, err := imdbRepo.GetAllTconsts() + if err != nil { + return err + } + if len(tconsts) == 0 { + return fmt.Errorf("no tconsts found") + } + startTime = time.Now() + principals, err := dto.FilterPrincipals(extractedPath+"/title.principals.tsv", tconsts) + if err != nil { + return err + } + log.Printf("Principals data filtered. (%s)\n\n", time.Since(startTime)) + + log.Printf("Inserting principals data...\n\n") + startTime = time.Now() + err = imdbRepo.UpdateMultiplePrincipals(principals) + if err != nil { + return err + } + log.Printf("Principals data inserted. (%s)\n\n", time.Since(startTime)) + + log.Printf("Filtering ratings data...\n\n") + startTime = time.Now() + ratings, err := dto.FilterRatings(extractedPath+"/title.ratings.tsv", tconsts) + if err != nil { + return err + } + log.Printf("Ratings data filtered. (%s)\n\n", time.Since(startTime)) + + log.Printf("Inserting ratings data...\n\n") + startTime = time.Now() + err = imdbRepo.UpdateMultipleRatings(ratings) + if err != nil { + return err + } + log.Printf("Ratings data inserted. (%s)\n\n", time.Since(startTime)) + + log.Printf("Filtering done! (%s)\n", time.Since(generalStartTime)) + return nil +} diff --git a/server/cmd/root.go b/server/cmd/root.go new file mode 100644 index 0000000..e23464d --- /dev/null +++ b/server/cmd/root.go @@ -0,0 +1,20 @@ +package cmd + +import ( + "github.com/spf13/cobra" +) + +var rootCmd = &cobra.Command{ + Use: "movier", + Short: "Movie Recommendation System", + Run: func(cmd *cobra.Command, args []string) { + cmd.Help() + }, +} + +func Execute() error { + rootCmd.AddCommand(getDownloadCmd()) + rootCmd.AddCommand(getFilterCmd()) + rootCmd.AddCommand(getServeCmd()) + return rootCmd.Execute() +} diff --git a/server/cmd/serve.go b/server/cmd/serve.go new file mode 100644 index 0000000..9de67eb --- /dev/null +++ b/server/cmd/serve.go @@ -0,0 +1,75 @@ +package cmd + +import ( + "context" + "fmt" + "log" + "net/http" + + "github.com/aykhans/movier/server/pkg/config" + "github.com/aykhans/movier/server/pkg/handlers" + "github.com/aykhans/movier/server/pkg/storage/postgresql" + "github.com/aykhans/movier/server/pkg/storage/postgresql/repository" + "github.com/spf13/cobra" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +func getServeCmd() *cobra.Command { + serveCmd := &cobra.Command{ + Use: "serve", + Short: "Movie Recommendation Serve", + Run: func(cmd *cobra.Command, args []string) { + err := runServe() + if err != nil { + log.Fatalln(err) + } + fmt.Println("Movie Recommendation Serve") + }, + } + return serveCmd +} + +func runServe() error { + dbURL, err := config.NewPostgresURL() + if err != nil { + return err + } + db, err := postgresql.NewDB(dbURL) + defer db.Close(context.Background()) + if err != nil { + return err + } + imdbRepo := repository.NewIMDbRepository(db) + + grpcRecommenderServiceTarget, err := config.NewRecommenderServiceGrpcTarget() + if err != nil { + return err + } + conn, err := grpc.NewClient( + grpcRecommenderServiceTarget, + grpc.WithTransportCredentials(insecure.NewCredentials()), + ) + if err != nil { + log.Fatalf("did not connect to grpc recommender service: %v", err) + } + defer conn.Close() + + router := http.NewServeMux() + imdbHandler := handlers.NewIMDbHandler(*imdbRepo, conn, config.GetBaseURL()) + + router.HandleFunc("GET /ping", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain") + w.WriteHeader(http.StatusOK) + w.Write([]byte("pong")) + }) + router.HandleFunc("GET /", imdbHandler.HandlerHome) + router.HandleFunc("GET /recs", imdbHandler.HandlerGetRecommendations) + + log.Printf("serving on port %d", config.ServePort) + err = http.ListenAndServe(fmt.Sprintf(":%d", config.ServePort), handlers.CORSMiddleware(router)) + if err != nil { + return err + } + return nil +} diff --git a/server/go.mod b/server/go.mod new file mode 100644 index 0000000..0045902 --- /dev/null +++ b/server/go.mod @@ -0,0 +1,22 @@ +module github.com/aykhans/movier/server + +go 1.23.2 + +require ( + github.com/jackc/pgx/v5 v5.7.1 + github.com/spf13/cobra v1.8.1 + google.golang.org/grpc v1.67.1 + google.golang.org/protobuf v1.35.1 +) + +require ( + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/jackc/pgpassfile v1.0.0 // indirect + github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect + github.com/spf13/pflag v1.0.5 // indirect + golang.org/x/crypto v0.27.0 // indirect + golang.org/x/net v0.28.0 // indirect + golang.org/x/sys v0.25.0 // indirect + golang.org/x/text v0.18.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 // indirect +) diff --git a/server/go.sum b/server/go.sum new file mode 100644 index 0000000..c497745 --- /dev/null +++ b/server/go.sum @@ -0,0 +1,48 @@ +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= +github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= +github.com/jackc/pgx/v5 v5.7.1 h1:x7SYsPBYDkHDksogeSmZZ5xzThcTgRz++I5E+ePFUcs= +github.com/jackc/pgx/v5 v5.7.1/go.mod h1:e7O26IywZZ+naJtWWos6i6fvWK+29etgITqrqHLfoZA= +github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= +github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= +github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A= +golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70= +golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= +golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= +golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= +golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 h1:e7S5W7MGGLaSu8j3YjdezkZ+m1/Nm0uRVRMEMGk26Xs= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= +google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= +google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/server/main.go b/server/main.go new file mode 100644 index 0000000..a8cdf8c --- /dev/null +++ b/server/main.go @@ -0,0 +1,23 @@ +package main + +import ( + "log" + "os" + + "github.com/aykhans/movier/server/cmd" + "github.com/aykhans/movier/server/pkg/config" +) + +func main() { + // log.SetFlags(log.LstdFlags | log.Lshortfile) + baseDir, err := os.Getwd() + if err != nil { + log.Fatal(err) + } + config.BaseDir = baseDir + + err = cmd.Execute() + if err != nil { + log.Fatal(err) + } +} diff --git a/server/pkg/config/config.go b/server/pkg/config/config.go new file mode 100644 index 0000000..1bf9030 --- /dev/null +++ b/server/pkg/config/config.go @@ -0,0 +1,105 @@ +package config + +import ( + "fmt" + "strconv" + + "github.com/aykhans/movier/server/pkg/utils" +) + +type DownloadConfig struct { + URL string + DownloadName string + ExtractName string +} + +var DownloadConfigs = []DownloadConfig{ + { + URL: "https://datasets.imdbws.com/title.basics.tsv.gz", + DownloadName: "title.basics.tsv.gz", + ExtractName: "title.basics.tsv", + }, + { + URL: "https://datasets.imdbws.com/title.principals.tsv.gz", + DownloadName: "title.principals.tsv.gz", + ExtractName: "title.principals.tsv", + }, + { + URL: "https://datasets.imdbws.com/title.ratings.tsv.gz", + DownloadName: "title.ratings.tsv.gz", + ExtractName: "title.ratings.tsv", + }, +} + +var BaseDir = "/" + +func GetTemplatePath() string { + return BaseDir + "/pkg/templates" +} + +func GetDownloadPath() string { + return BaseDir + "/data/raw" +} + +func GetExtractPath() string { + return BaseDir + "/data/extracted" +} + +const ( + ServePort = 8080 +) + +var TitleTypes = []string{"movie", "tvMovie"} +var NconstCategories = []string{"actress", "actor", "director", "writer"} + +func NewPostgresURL() (string, error) { + username := utils.GetEnv("POSTGRES_USER", "") + if username == "" { + return "", fmt.Errorf("POSTGRES_USER env variable is not set") + } + password := utils.GetEnv("POSTGRES_PASSWORD", "") + if password == "" { + return "", fmt.Errorf("POSTGRES_PASSWORD env variable is not set") + } + host := utils.GetEnv("POSTGRES_HOST", "") + if host == "" { + return "", fmt.Errorf("POSTGRES_HOST env variable is not set") + } + port := utils.GetEnv("POSTGRES_PORT", "") + if port == "" { + return "", fmt.Errorf("POSTGRES_PORT env variable is not set") + } + _, err := strconv.Atoi(port) + if err != nil { + return "", fmt.Errorf("POSTGRES_PORT env variable is not a number") + } + db := utils.GetEnv("POSTGRES_DB", "") + if db == "" { + return "", fmt.Errorf("POSTGRES_DB env variable is not set") + } + + return fmt.Sprintf( + "postgres://%s:%s@%s:%s/%s?sslmode=disable", + username, password, host, port, db, + ), nil +} + +func NewRecommenderServiceGrpcTarget() (string, error) { + host := utils.GetEnv("RECOMMENDER_SERVICE_GRPC_HOST", "") + if host == "" { + return "", fmt.Errorf("RECOMMENDER_SERVICE_GRPC_HOST env variable is not set") + } + port := utils.GetEnv("RECOMMENDER_SERVICE_GRPC_PORT", "") + if port == "" { + return "", fmt.Errorf("RECOMMENDER_SERVICE_GRPC_PORT env variable is not set") + } + _, err := strconv.Atoi(port) + if err != nil { + return "", fmt.Errorf("RECOMMENDER_SERVICE_GRPC_PORT env variable is not a number") + } + return fmt.Sprintf("%s:%s", host, port), nil +} + +func GetBaseURL() string { + return utils.GetEnv("BASE_URL", "http://localhost:8080") +} diff --git a/server/pkg/dto/download.go b/server/pkg/dto/download.go new file mode 100644 index 0000000..ab8564a --- /dev/null +++ b/server/pkg/dto/download.go @@ -0,0 +1,31 @@ +package dto + +import ( + "io" + "net/http" + "os" +) + +func DownloadAndExtractGz(url, downloadFilepath, extractFilepath string) error { + if err := Download(url, downloadFilepath); err != nil { + return err + } + return ExtractGzFile(downloadFilepath, extractFilepath) +} + +func Download(url, filepath string) error { + resp, err := http.Get(url) + if err != nil { + return err + } + defer resp.Body.Close() + + out, err := os.Create(filepath) + if err != nil { + return err + } + defer out.Close() + + _, err = io.Copy(out, resp.Body) + return err +} diff --git a/server/pkg/dto/extract.go b/server/pkg/dto/extract.go new file mode 100644 index 0000000..085b9c4 --- /dev/null +++ b/server/pkg/dto/extract.go @@ -0,0 +1,30 @@ +package dto + +import ( + "compress/gzip" + "io" + "os" +) + +func ExtractGzFile(gzFile, extractedFilepath string) error { + file, err := os.Open(gzFile) + if err != nil { + return err + } + defer file.Close() + + gzReader, err := gzip.NewReader(file) + if err != nil { + return err + } + defer gzReader.Close() + + outFile, err := os.Create(extractedFilepath) + if err != nil { + return err + } + defer outFile.Close() + + _, err = io.Copy(outFile, gzReader) + return err +} diff --git a/server/pkg/dto/filter.go b/server/pkg/dto/filter.go new file mode 100644 index 0000000..cda7f33 --- /dev/null +++ b/server/pkg/dto/filter.go @@ -0,0 +1,259 @@ +package dto + +import ( + "bufio" + "fmt" + "math" + "os" + "slices" + "strconv" + "strings" + + "github.com/aykhans/movier/server/pkg/config" +) + +func FilterBasics(filePath string) ([]Basic, error) { + file, err := os.Open(filePath) + if err != nil { + return nil, fmt.Errorf("could not open file: %v", err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + + columnCount := 9 + var headers []string + if scanner.Scan() { + headers = strings.Split(scanner.Text(), "\t") + if len(headers) != columnCount { + return nil, fmt.Errorf("expected %d column headers, found %d", columnCount, len(headers)) + } + } else { + return nil, fmt.Errorf("could not read column headers: %v", scanner.Err()) + } + + var ( + tconstIndex int = -1 + titleTypeIndex int = -1 + startYearIndex int = -1 + genresIndex int = -1 + ) + for i, header := range headers { + switch header { + case "tconst": + tconstIndex = i + case "titleType": + titleTypeIndex = i + case "startYear": + startYearIndex = i + case "genres": + genresIndex = i + } + } + switch { + case tconstIndex == -1: + return nil, fmt.Errorf("column %s not found", "`tconst`") + case titleTypeIndex == -1: + return nil, fmt.Errorf("column %s not found", "`titleType`") + case startYearIndex == -1: + return nil, fmt.Errorf("column %s not found", "`startYear`") + case genresIndex == -1: + return nil, fmt.Errorf("column %s not found", "`genres`") + } + + var basics []Basic + for scanner.Scan() { + line := scanner.Text() + columns := strings.Split(line, "\t") + if len(columns) != columnCount { + fmt.Println("Columns are:", columns) + return nil, fmt.Errorf("expected %d columns, found %d", columnCount, len(columns)) + } + + if slices.Contains(config.TitleTypes, columns[titleTypeIndex]) { + var startYearUint16 uint16 + startYear, err := strconv.Atoi(columns[startYearIndex]) + if err != nil { + startYearUint16 = 0 + } else { + startYearUint16 = uint16(startYear) + } + + var genres string + if columns[genresIndex] == "\\N" { + genres = "" + } else { + genres = strings.ReplaceAll(strings.ToLower(columns[genresIndex]), " ", "") + } + + basics = append(basics, Basic{ + Tconst: columns[tconstIndex], + StartYear: startYearUint16, + Genres: genres, + }) + } + } + + if err := scanner.Err(); err != nil { + return nil, err + } + return basics, nil +} + +func FilterPrincipals(filePath string, tconsts []string) ([]Principal, error) { + file, err := os.Open(filePath) + if err != nil { + return nil, fmt.Errorf("could not open file: %v", err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + + columnCount := 6 + var headers []string + if scanner.Scan() { + headers = strings.Split(scanner.Text(), "\t") + if len(headers) != columnCount { + return nil, fmt.Errorf("expected %d column headers, found %d", columnCount, len(headers)) + } + } else { + return nil, fmt.Errorf("could not read column headers: %v", scanner.Err()) + } + + var ( + tconstIndex int = -1 + nconstIndex int = -1 + categoryIndex int = -1 + ) + for i, header := range headers { + switch header { + case "tconst": + tconstIndex = i + case "nconst": + nconstIndex = i + case "category": + categoryIndex = i + } + } + switch { + case tconstIndex == -1: + return nil, fmt.Errorf("column %s not found", "`tconst`") + case nconstIndex == -1: + return nil, fmt.Errorf("column %s not found", "`nconst`") + case categoryIndex == -1: + return nil, fmt.Errorf("column %s not found", "`category`") + } + + tconstMap := make(map[string][]string) + for _, tconst := range tconsts { + tconstMap[tconst] = []string{} + } + for scanner.Scan() { + line := scanner.Text() + columns := strings.Split(line, "\t") + if len(columns) != columnCount { + fmt.Println("Columns are:", columns) + return nil, fmt.Errorf("expected %d columns, found %d", columnCount, len(columns)) + } + + if slices.Contains(config.NconstCategories, columns[categoryIndex]) { + if _, ok := tconstMap[columns[tconstIndex]]; ok { + tconstMap[columns[tconstIndex]] = append(tconstMap[columns[tconstIndex]], columns[nconstIndex]) + } + } + } + if err := scanner.Err(); err != nil { + return nil, err + } + + var principals []Principal + for tconst, nconsts := range tconstMap { + principals = append(principals, Principal{ + Tconst: tconst, + Nconsts: strings.Join(nconsts, ","), + }) + } + return principals, nil +} + +func FilterRatings(filePath string, tconsts []string) ([]Ratings, error) { + file, err := os.Open(filePath) + if err != nil { + return nil, fmt.Errorf("could not open file: %v", err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + + columnCount := 3 + var headers []string + if scanner.Scan() { + headers = strings.Split(scanner.Text(), "\t") + if len(headers) != columnCount { + return nil, fmt.Errorf("expected %d column headers, found %d", columnCount, len(headers)) + } + } else { + return nil, fmt.Errorf("could not read column headers: %v", scanner.Err()) + } + + var ( + tconstIndex int = -1 + averageRatingIndex int = -1 + numVotesIndex int = -1 + ) + for i, header := range headers { + switch header { + case "tconst": + tconstIndex = i + case "averageRating": + averageRatingIndex = i + case "numVotes": + numVotesIndex = i + } + } + switch { + case tconstIndex == -1: + return nil, fmt.Errorf("column %s not found", "`tconst`") + case averageRatingIndex == -1: + return nil, fmt.Errorf("column %s not found", "`averageRating`") + case numVotesIndex == -1: + return nil, fmt.Errorf("column %s not found", "`numVotes`") + } + + tconstMap := make(map[string][]string) + for _, tconst := range tconsts { + tconstMap[tconst] = []string{} + } + var ratings []Ratings + for scanner.Scan() { + line := scanner.Text() + columns := strings.Split(line, "\t") + if len(columns) != columnCount { + fmt.Println("Columns are:", columns) + return nil, fmt.Errorf("expected %d columns, found %d", columnCount, len(columns)) + } + + if _, ok := tconstMap[columns[tconstIndex]]; ok { + rating, err := strconv.ParseFloat(columns[averageRatingIndex], 32) + if err != nil { + rating = 0 + } + + votes, err := strconv.Atoi(columns[numVotesIndex]) + if err != nil { + votes = 0 + } + + ratings = append(ratings, Ratings{ + Tconst: columns[tconstIndex], + Rating: math.Round(rating*10) / 10, + Votes: votes, + }) + } + } + if err := scanner.Err(); err != nil { + return nil, err + } + + return ratings, nil +} diff --git a/server/pkg/dto/models.go b/server/pkg/dto/models.go new file mode 100644 index 0000000..a5d7701 --- /dev/null +++ b/server/pkg/dto/models.go @@ -0,0 +1,27 @@ +package dto + +type Basic struct { + Tconst string `json:"tconst"` + StartYear uint16 `json:"startYear"` + Genres string `json:"genres"` +} + +type Principal struct { + Tconst string `json:"tconst"` + Nconsts string `json:"nconsts"` +} + +type Ratings struct { + Tconst string `json:"tconst"` + Rating float64 `json:"rating"` + Votes int `json:"votes"` +} + +type MinMax struct { + MinVotes uint `json:"minVotes"` + MaxVotes uint `json:"maxVotes"` + MinYear uint `json:"minYear"` + MaxYear uint `json:"maxYear"` + MinRating float64 `json:"minRating"` + MaxRating float64 `json:"maxRating"` +} diff --git a/server/pkg/dto/vector.go b/server/pkg/dto/vector.go new file mode 100644 index 0000000..344d382 --- /dev/null +++ b/server/pkg/dto/vector.go @@ -0,0 +1,58 @@ +package dto + +import ( + "fmt" + "math" +) + +type CountVectorizer struct { + WordIndex map[string]int +} + +func NewCountVectorizer() *CountVectorizer { + return &CountVectorizer{} +} + +func (cv *CountVectorizer) SetWordIndexes(docs [][]string) { + cv.WordIndex = make(map[string]int) + index := 0 + for _, doc := range docs { + for _, word := range doc { + if word == "" { + continue + } + if _, exists := cv.WordIndex[word]; !exists { + cv.WordIndex[word] = index + index++ + } + } + } +} + +func (cv *CountVectorizer) Vectorize(doc []string) []uint8 { + vector := make([]uint8, len(cv.WordIndex)) + for _, word := range doc { + vector[cv.WordIndex[word]]++ + } + return vector +} + +func CosineSimilarity(a, b []uint8) (float32, error) { + if len(a) != len(b) { + return 0, fmt.Errorf("slices must have the same length") + } + var dotProduct, normA, normB float64 + for i := 0; i < len(a); i++ { + x := float64(a[i]) + y := float64(b[i]) + dotProduct += x * y + normA += x * x + normB += y * y + } + + if normA == 0 || normB == 0 { + return 0, nil + } + + return float32(dotProduct / (math.Sqrt(normA) * math.Sqrt(normB))), nil +} diff --git a/server/pkg/handlers/imdb.go b/server/pkg/handlers/imdb.go new file mode 100644 index 0000000..549e3ad --- /dev/null +++ b/server/pkg/handlers/imdb.go @@ -0,0 +1,301 @@ +package handlers + +import ( + "fmt" + "log" + "net/http" + "strconv" + "strings" + + "github.com/aykhans/movier/server/pkg/dto" + "github.com/aykhans/movier/server/pkg/proto" + "github.com/aykhans/movier/server/pkg/storage/postgresql/repository" + "github.com/aykhans/movier/server/pkg/utils" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +type IMDbHandler struct { + imdbRepo repository.IMDbRepository + grpcRecommenderService *grpc.ClientConn + baseURL string +} + +func NewIMDbHandler(imdbRepo repository.IMDbRepository, grpcRecommenderService *grpc.ClientConn, baseURL string) *IMDbHandler { + return &IMDbHandler{ + imdbRepo: imdbRepo, + grpcRecommenderService: grpcRecommenderService, + baseURL: baseURL, + } +} + +func (h *IMDbHandler) HandlerGetRecommendations(w http.ResponseWriter, r *http.Request) { + query := r.URL.Query() + + tconstsQ := query["tconst"] + tconstsLen := len(tconstsQ) + if tconstsLen < 1 || tconstsLen > 5 { + RespondWithJSON(w, ErrorResponse{Error: "tconsts should be between 1 and 5"}, http.StatusBadRequest) + return + } + + uniqueTconsts := make(map[string]struct{}) + for _, str := range tconstsQ { + uniqueTconsts[str] = struct{}{} + } + + invalidTconsts := []string{} + tconsts := []string{} + for tconst := range uniqueTconsts { + tconstLength := len(tconst) + if 9 > tconstLength || tconstLength > 12 || !strings.HasPrefix(tconst, "tt") { + invalidTconsts = append(invalidTconsts, tconst) + } + tconsts = append(tconsts, tconst) + } + if len(invalidTconsts) > 0 { + RespondWithJSON( + w, + ErrorResponse{ + Error: fmt.Sprintf("Invalid tconsts: %s", strings.Join(invalidTconsts, ", ")), + }, + http.StatusBadRequest, + ) + return + } + + n := 5 + nQuery := query.Get("n") + if nQuery != "" { + nInt, err := strconv.Atoi(nQuery) + if err != nil { + RespondWithJSON(w, ErrorResponse{Error: "n should be an integer"}, http.StatusBadRequest) + return + } + if nInt < 1 || nInt > 20 { + RespondWithJSON(w, ErrorResponse{Error: "n should be greater than 0 and less than 21"}, http.StatusBadRequest) + return + } + n = nInt + } + + filter := &proto.Filter{} + minVotesQ := query.Get("min_votes") + if minVotesQ != "" { + minVotesInt, err := strconv.Atoi(minVotesQ) + if err != nil { + RespondWithJSON(w, ErrorResponse{Error: "min_votes should be an integer"}, http.StatusBadRequest) + return + } + if !utils.IsUint32(minVotesInt) { + RespondWithJSON(w, ErrorResponse{Error: "min_votes should be greater than or equal to 0 and less than or equal to 4294967295"}, http.StatusBadRequest) + return + } + filter.MinVotesOneof = &proto.Filter_MinVotes{MinVotes: uint32(minVotesInt)} + } + + maxVotesQ := query.Get("max_votes") + if maxVotesQ != "" { + maxVotesInt, err := strconv.Atoi(maxVotesQ) + if err != nil { + RespondWithJSON(w, ErrorResponse{Error: "max_votes should be an integer"}, http.StatusBadRequest) + return + } + if !utils.IsUint32(maxVotesInt) { + RespondWithJSON(w, ErrorResponse{Error: "max_votes should be greater than 0 or equal to and less than or equal to 4294967295"}, http.StatusBadRequest) + return + } + if uint32(maxVotesInt) < filter.GetMinVotes() { + RespondWithJSON(w, ErrorResponse{Error: "max_votes should be greater than min_votes"}, http.StatusBadRequest) + return + } + filter.MaxVotesOneof = &proto.Filter_MaxVotes{MaxVotes: uint32(maxVotesInt)} + } + + minRatingQ := query.Get("min_rating") + if minRatingQ != "" { + minRatingFloat, err := strconv.ParseFloat(minRatingQ, 32) + if err != nil { + RespondWithJSON(w, ErrorResponse{Error: "min_rating should be a float"}, http.StatusBadRequest) + return + } + if minRatingFloat < 0 || minRatingFloat > 10 { + RespondWithJSON(w, ErrorResponse{Error: "min_rating should be greater than or equal to 0.0 and less than equal to 10.0"}, http.StatusBadRequest) + return + } + filter.MinRatingOneof = &proto.Filter_MinRating{MinRating: float32(minRatingFloat)} + } + + maxRatingQ := query.Get("max_rating") + if maxRatingQ != "" { + maxRatingFloat, err := strconv.ParseFloat(maxRatingQ, 32) + if err != nil { + RespondWithJSON(w, ErrorResponse{Error: "max_rating should be a float"}, http.StatusBadRequest) + return + } + if maxRatingFloat < 0 || maxRatingFloat > 10 { + RespondWithJSON(w, ErrorResponse{Error: "max_rating should be greater than or equal to 0.0 and less than or equal to 10.0"}, http.StatusBadRequest) + return + } + if float32(maxRatingFloat) < filter.GetMinRating() { + RespondWithJSON(w, ErrorResponse{Error: "max_rating should be greater than min_rating"}, http.StatusBadRequest) + return + } + filter.MaxRatingOneof = &proto.Filter_MaxRating{MaxRating: float32(maxRatingFloat)} + } + + minYearQ := query.Get("min_year") + if minYearQ != "" { + minYearInt, err := strconv.Atoi(minYearQ) + if err != nil { + RespondWithJSON(w, ErrorResponse{Error: "min_year should be an integer"}, http.StatusBadRequest) + return + } + if !utils.IsUint32(minYearInt) { + RespondWithJSON(w, ErrorResponse{Error: "min_year should be greater than or equal to 0 and less than or equal to 4294967295"}, http.StatusBadRequest) + return + } + filter.MinYearOneof = &proto.Filter_MinYear{MinYear: uint32(minYearInt)} + } + + maxYearQ := query.Get("max_year") + if maxYearQ != "" { + maxYearInt, err := strconv.Atoi(maxYearQ) + if err != nil { + RespondWithJSON(w, ErrorResponse{Error: "max_year should be an integer"}, http.StatusBadRequest) + return + } + if !utils.IsUint32(maxYearInt) { + RespondWithJSON(w, ErrorResponse{Error: "max_year should be greater than or equal to 0 and less than or equal to 4294967295"}, http.StatusBadRequest) + return + } + if uint32(maxYearInt) < filter.GetMinYear() { + RespondWithJSON(w, ErrorResponse{Error: "max_year should be greater than min_year"}, http.StatusBadRequest) + return + } + filter.MaxYearOneof = &proto.Filter_MaxYear{MaxYear: uint32(maxYearInt)} + } + + yearWeightQ := query.Get("year_weight") + ratingWeightQ := query.Get("rating_weight") + genresWeightQ := query.Get("genres_weight") + nconstsWeightQ := query.Get("nconsts_weight") + + weight := &proto.Weight{} + + features := []string{} + totalSum := 0 + if yearWeightQ != "" { + yearWeight, err := strconv.Atoi(yearWeightQ) + if err != nil { + RespondWithJSON(w, ErrorResponse{Error: "year_weight should be an integer"}, http.StatusBadRequest) + return + } + if yearWeight < 0 || yearWeight > 400 { + RespondWithJSON(w, ErrorResponse{Error: "year_weight should be greater than or equal to 0 and less than or equal to 400"}, http.StatusBadRequest) + return + } + weight.Year = uint32(yearWeight) + totalSum += yearWeight + features = append(features, "year") + } + if ratingWeightQ != "" { + ratingWeight, err := strconv.Atoi(ratingWeightQ) + if err != nil { + RespondWithJSON(w, ErrorResponse{Error: "rating_weight should be an integer"}, http.StatusBadRequest) + return + } + if ratingWeight < 0 || ratingWeight > 400 { + RespondWithJSON(w, ErrorResponse{Error: "rating_weight should be greater than or equal to 0 and less than or equal to 400"}, http.StatusBadRequest) + return + } + weight.Rating = uint32(ratingWeight) + totalSum += ratingWeight + features = append(features, "rating") + } + if genresWeightQ != "" { + genresWeight, err := strconv.Atoi(genresWeightQ) + if err != nil { + RespondWithJSON(w, ErrorResponse{Error: "genres_weight should be an integer"}, http.StatusBadRequest) + return + } + if genresWeight < 0 || genresWeight > 400 { + RespondWithJSON(w, ErrorResponse{Error: "genres_weight should be greater than or equal to 0 and less than or equal to 400"}, http.StatusBadRequest) + return + } + weight.Genres = uint32(genresWeight) + totalSum += genresWeight + features = append(features, "genres") + } + if nconstsWeightQ != "" { + nconstsWeight, err := strconv.Atoi(nconstsWeightQ) + if err != nil { + RespondWithJSON(w, ErrorResponse{Error: "nconsts_weight should be an integer"}, http.StatusBadRequest) + return + } + if nconstsWeight < 0 || nconstsWeight > 400 { + RespondWithJSON(w, ErrorResponse{Error: "nconsts_weight should be greater than or equal to 0 and less than or equal to 400"}, http.StatusBadRequest) + return + } + weight.Nconsts = uint32(nconstsWeight) + totalSum += nconstsWeight + features = append(features, "nconsts") + } + + featuresLen := len(features) + if featuresLen < 1 { + RespondWithJSON(w, ErrorResponse{Error: "At least one feature should be selected"}, http.StatusBadRequest) + return + } + if featuresLen*100 != totalSum { + RespondWithJSON(w, ErrorResponse{Error: fmt.Sprintf("Sum of the %d features should be equal to %d", featuresLen, featuresLen*100)}, http.StatusBadRequest) + return + } + + client := proto.NewRecommenderClient(h.grpcRecommenderService) + response, err := client.GetRecommendations(r.Context(), &proto.Request{ + Tconsts: tconsts, + N: uint32(n), + Filter: filter, + Weight: weight, + }) + if err != nil { + if st, ok := status.FromError(err); ok { + switch st.Code() { + case codes.InvalidArgument: + RespondWithJSON(w, ErrorResponse{Error: st.Message()}, http.StatusBadRequest) + case codes.NotFound: + RespondWithJSON(w, ErrorResponse{Error: st.Message()}, http.StatusNotFound) + case codes.Internal: + RespondWithServerError(w) + default: + fmt.Println(err) + RespondWithServerError(w) + } + return + } + RespondWithServerError(w) + return + } + + RespondWithJSON(w, response.Movies, http.StatusOK) +} + +func (h *IMDbHandler) HandlerHome(w http.ResponseWriter, r *http.Request) { + minMax, err := h.imdbRepo.GetMinMax() + if err != nil { + log.Printf("error getting min max: %v", err) + RespondWithServerError(w) + return + } + + RespondWithHTML( + w, "index.html", + struct { + MinMax dto.MinMax + BaseURL string + }{*minMax, h.baseURL}, + http.StatusOK, + ) +} diff --git a/server/pkg/handlers/middlewares.go b/server/pkg/handlers/middlewares.go new file mode 100644 index 0000000..b551d71 --- /dev/null +++ b/server/pkg/handlers/middlewares.go @@ -0,0 +1,18 @@ +package handlers + +import "net/http" + +func CORSMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Access-Control-Allow-Origin", "*") + w.Header().Set("Access-Control-Allow-Methods", "GET, OPTIONS") + w.Header().Set("Access-Control-Allow-Headers", "Content-Type") + + if r.Method == http.MethodOptions { + w.WriteHeader(http.StatusOK) + return + } + + next.ServeHTTP(w, r) + }) +} diff --git a/server/pkg/handlers/responses.go b/server/pkg/handlers/responses.go new file mode 100644 index 0000000..8d1fa3b --- /dev/null +++ b/server/pkg/handlers/responses.go @@ -0,0 +1,64 @@ +package handlers + +import ( + "encoding/json" + "fmt" + "html/template" + "log" + "net/http" + "strings" + + "github.com/aykhans/movier/server/pkg/config" +) + +type ErrorResponse struct { + Error string `json:"error"` +} + +func RespondWithServerError(w http.ResponseWriter) { + http.Error(w, "Internal server error", http.StatusInternalServerError) +} + +func RespondWithJSON(w http.ResponseWriter, data any, statusCode int) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(statusCode) + if err := json.NewEncoder(w).Encode(data); err != nil { + log.Printf("error encoding response: %v", err) + RespondWithServerError(w) + } +} + +func formatNumber(n uint) string { + s := fmt.Sprintf("%d", n) + var result strings.Builder + length := len(s) + + for i, digit := range s { + if i > 0 && (length-i)%3 == 0 { + result.WriteString(",") + } + result.WriteRune(digit) + } + return result.String() +} + +func RespondWithHTML(w http.ResponseWriter, templateName string, data any, statusCode int) { + w.Header().Set("Content-Type", "text/html") + w.WriteHeader(statusCode) + + funcMap := template.FuncMap{ + "formatNumber": formatNumber, + } + + t, err := template.New(templateName).Funcs(funcMap).ParseFiles(config.GetTemplatePath() + "/" + templateName) + if err != nil { + log.Printf("error parsing template: %v", err) + RespondWithServerError(w) + return + } + err = t.Execute(w, data) + if err != nil { + log.Printf("error executing template: %v", err) + RespondWithServerError(w) + } +} diff --git a/server/pkg/proto/recommender.pb.go b/server/pkg/proto/recommender.pb.go new file mode 100644 index 0000000..402b423 --- /dev/null +++ b/server/pkg/proto/recommender.pb.go @@ -0,0 +1,587 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.35.1 +// protoc v5.28.3 +// source: recommender.proto + +package proto + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type Filter struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Types that are assignable to MinVotesOneof: + // + // *Filter_MinVotes + MinVotesOneof isFilter_MinVotesOneof `protobuf_oneof:"min_votes_oneof"` + // Types that are assignable to MaxVotesOneof: + // + // *Filter_MaxVotes + MaxVotesOneof isFilter_MaxVotesOneof `protobuf_oneof:"max_votes_oneof"` + // Types that are assignable to MinYearOneof: + // + // *Filter_MinYear + MinYearOneof isFilter_MinYearOneof `protobuf_oneof:"min_year_oneof"` + // Types that are assignable to MaxYearOneof: + // + // *Filter_MaxYear + MaxYearOneof isFilter_MaxYearOneof `protobuf_oneof:"max_year_oneof"` + // Types that are assignable to MinRatingOneof: + // + // *Filter_MinRating + MinRatingOneof isFilter_MinRatingOneof `protobuf_oneof:"min_rating_oneof"` + // Types that are assignable to MaxRatingOneof: + // + // *Filter_MaxRating + MaxRatingOneof isFilter_MaxRatingOneof `protobuf_oneof:"max_rating_oneof"` +} + +func (x *Filter) Reset() { + *x = Filter{} + mi := &file_recommender_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Filter) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Filter) ProtoMessage() {} + +func (x *Filter) ProtoReflect() protoreflect.Message { + mi := &file_recommender_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Filter.ProtoReflect.Descriptor instead. +func (*Filter) Descriptor() ([]byte, []int) { + return file_recommender_proto_rawDescGZIP(), []int{0} +} + +func (m *Filter) GetMinVotesOneof() isFilter_MinVotesOneof { + if m != nil { + return m.MinVotesOneof + } + return nil +} + +func (x *Filter) GetMinVotes() uint32 { + if x, ok := x.GetMinVotesOneof().(*Filter_MinVotes); ok { + return x.MinVotes + } + return 0 +} + +func (m *Filter) GetMaxVotesOneof() isFilter_MaxVotesOneof { + if m != nil { + return m.MaxVotesOneof + } + return nil +} + +func (x *Filter) GetMaxVotes() uint32 { + if x, ok := x.GetMaxVotesOneof().(*Filter_MaxVotes); ok { + return x.MaxVotes + } + return 0 +} + +func (m *Filter) GetMinYearOneof() isFilter_MinYearOneof { + if m != nil { + return m.MinYearOneof + } + return nil +} + +func (x *Filter) GetMinYear() uint32 { + if x, ok := x.GetMinYearOneof().(*Filter_MinYear); ok { + return x.MinYear + } + return 0 +} + +func (m *Filter) GetMaxYearOneof() isFilter_MaxYearOneof { + if m != nil { + return m.MaxYearOneof + } + return nil +} + +func (x *Filter) GetMaxYear() uint32 { + if x, ok := x.GetMaxYearOneof().(*Filter_MaxYear); ok { + return x.MaxYear + } + return 0 +} + +func (m *Filter) GetMinRatingOneof() isFilter_MinRatingOneof { + if m != nil { + return m.MinRatingOneof + } + return nil +} + +func (x *Filter) GetMinRating() float32 { + if x, ok := x.GetMinRatingOneof().(*Filter_MinRating); ok { + return x.MinRating + } + return 0 +} + +func (m *Filter) GetMaxRatingOneof() isFilter_MaxRatingOneof { + if m != nil { + return m.MaxRatingOneof + } + return nil +} + +func (x *Filter) GetMaxRating() float32 { + if x, ok := x.GetMaxRatingOneof().(*Filter_MaxRating); ok { + return x.MaxRating + } + return 0 +} + +type isFilter_MinVotesOneof interface { + isFilter_MinVotesOneof() +} + +type Filter_MinVotes struct { + MinVotes uint32 `protobuf:"varint,1,opt,name=min_votes,json=minVotes,proto3,oneof"` +} + +func (*Filter_MinVotes) isFilter_MinVotesOneof() {} + +type isFilter_MaxVotesOneof interface { + isFilter_MaxVotesOneof() +} + +type Filter_MaxVotes struct { + MaxVotes uint32 `protobuf:"varint,2,opt,name=max_votes,json=maxVotes,proto3,oneof"` +} + +func (*Filter_MaxVotes) isFilter_MaxVotesOneof() {} + +type isFilter_MinYearOneof interface { + isFilter_MinYearOneof() +} + +type Filter_MinYear struct { + MinYear uint32 `protobuf:"varint,3,opt,name=min_year,json=minYear,proto3,oneof"` +} + +func (*Filter_MinYear) isFilter_MinYearOneof() {} + +type isFilter_MaxYearOneof interface { + isFilter_MaxYearOneof() +} + +type Filter_MaxYear struct { + MaxYear uint32 `protobuf:"varint,4,opt,name=max_year,json=maxYear,proto3,oneof"` +} + +func (*Filter_MaxYear) isFilter_MaxYearOneof() {} + +type isFilter_MinRatingOneof interface { + isFilter_MinRatingOneof() +} + +type Filter_MinRating struct { + MinRating float32 `protobuf:"fixed32,5,opt,name=min_rating,json=minRating,proto3,oneof"` +} + +func (*Filter_MinRating) isFilter_MinRatingOneof() {} + +type isFilter_MaxRatingOneof interface { + isFilter_MaxRatingOneof() +} + +type Filter_MaxRating struct { + MaxRating float32 `protobuf:"fixed32,6,opt,name=max_rating,json=maxRating,proto3,oneof"` +} + +func (*Filter_MaxRating) isFilter_MaxRatingOneof() {} + +type Weight struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Year uint32 `protobuf:"varint,1,opt,name=year,proto3" json:"year,omitempty"` + Rating uint32 `protobuf:"varint,2,opt,name=rating,proto3" json:"rating,omitempty"` + Genres uint32 `protobuf:"varint,3,opt,name=genres,proto3" json:"genres,omitempty"` + Nconsts uint32 `protobuf:"varint,4,opt,name=nconsts,proto3" json:"nconsts,omitempty"` +} + +func (x *Weight) Reset() { + *x = Weight{} + mi := &file_recommender_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Weight) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Weight) ProtoMessage() {} + +func (x *Weight) ProtoReflect() protoreflect.Message { + mi := &file_recommender_proto_msgTypes[1] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Weight.ProtoReflect.Descriptor instead. +func (*Weight) Descriptor() ([]byte, []int) { + return file_recommender_proto_rawDescGZIP(), []int{1} +} + +func (x *Weight) GetYear() uint32 { + if x != nil { + return x.Year + } + return 0 +} + +func (x *Weight) GetRating() uint32 { + if x != nil { + return x.Rating + } + return 0 +} + +func (x *Weight) GetGenres() uint32 { + if x != nil { + return x.Genres + } + return 0 +} + +func (x *Weight) GetNconsts() uint32 { + if x != nil { + return x.Nconsts + } + return 0 +} + +type Request struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Tconsts []string `protobuf:"bytes,1,rep,name=tconsts,proto3" json:"tconsts,omitempty"` + N uint32 `protobuf:"varint,2,opt,name=n,proto3" json:"n,omitempty"` + Filter *Filter `protobuf:"bytes,3,opt,name=filter,proto3" json:"filter,omitempty"` + Weight *Weight `protobuf:"bytes,4,opt,name=weight,proto3" json:"weight,omitempty"` +} + +func (x *Request) Reset() { + *x = Request{} + mi := &file_recommender_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Request) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Request) ProtoMessage() {} + +func (x *Request) ProtoReflect() protoreflect.Message { + mi := &file_recommender_proto_msgTypes[2] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Request.ProtoReflect.Descriptor instead. +func (*Request) Descriptor() ([]byte, []int) { + return file_recommender_proto_rawDescGZIP(), []int{2} +} + +func (x *Request) GetTconsts() []string { + if x != nil { + return x.Tconsts + } + return nil +} + +func (x *Request) GetN() uint32 { + if x != nil { + return x.N + } + return 0 +} + +func (x *Request) GetFilter() *Filter { + if x != nil { + return x.Filter + } + return nil +} + +func (x *Request) GetWeight() *Weight { + if x != nil { + return x.Weight + } + return nil +} + +type Response struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Movies []*RecommendedMovie `protobuf:"bytes,1,rep,name=movies,proto3" json:"movies,omitempty"` +} + +func (x *Response) Reset() { + *x = Response{} + mi := &file_recommender_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Response) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Response) ProtoMessage() {} + +func (x *Response) ProtoReflect() protoreflect.Message { + mi := &file_recommender_proto_msgTypes[3] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Response.ProtoReflect.Descriptor instead. +func (*Response) Descriptor() ([]byte, []int) { + return file_recommender_proto_rawDescGZIP(), []int{3} +} + +func (x *Response) GetMovies() []*RecommendedMovie { + if x != nil { + return x.Movies + } + return nil +} + +type RecommendedMovie struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Tconst string `protobuf:"bytes,1,opt,name=tconst,proto3" json:"tconst,omitempty"` + Weights []string `protobuf:"bytes,2,rep,name=weights,proto3" json:"weights,omitempty"` +} + +func (x *RecommendedMovie) Reset() { + *x = RecommendedMovie{} + mi := &file_recommender_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *RecommendedMovie) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*RecommendedMovie) ProtoMessage() {} + +func (x *RecommendedMovie) ProtoReflect() protoreflect.Message { + mi := &file_recommender_proto_msgTypes[4] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use RecommendedMovie.ProtoReflect.Descriptor instead. +func (*RecommendedMovie) Descriptor() ([]byte, []int) { + return file_recommender_proto_rawDescGZIP(), []int{4} +} + +func (x *RecommendedMovie) GetTconst() string { + if x != nil { + return x.Tconst + } + return "" +} + +func (x *RecommendedMovie) GetWeights() []string { + if x != nil { + return x.Weights + } + return nil +} + +var File_recommender_proto protoreflect.FileDescriptor + +var file_recommender_proto_rawDesc = []byte{ + 0x0a, 0x11, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x12, 0x0b, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x65, 0x72, + 0x22, 0xb4, 0x02, 0x0a, 0x06, 0x46, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x12, 0x1d, 0x0a, 0x09, 0x6d, + 0x69, 0x6e, 0x5f, 0x76, 0x6f, 0x74, 0x65, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x48, 0x00, + 0x52, 0x08, 0x6d, 0x69, 0x6e, 0x56, 0x6f, 0x74, 0x65, 0x73, 0x12, 0x1d, 0x0a, 0x09, 0x6d, 0x61, + 0x78, 0x5f, 0x76, 0x6f, 0x74, 0x65, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x48, 0x01, 0x52, + 0x08, 0x6d, 0x61, 0x78, 0x56, 0x6f, 0x74, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x08, 0x6d, 0x69, 0x6e, + 0x5f, 0x79, 0x65, 0x61, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x48, 0x02, 0x52, 0x07, 0x6d, + 0x69, 0x6e, 0x59, 0x65, 0x61, 0x72, 0x12, 0x1b, 0x0a, 0x08, 0x6d, 0x61, 0x78, 0x5f, 0x79, 0x65, + 0x61, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x48, 0x03, 0x52, 0x07, 0x6d, 0x61, 0x78, 0x59, + 0x65, 0x61, 0x72, 0x12, 0x1f, 0x0a, 0x0a, 0x6d, 0x69, 0x6e, 0x5f, 0x72, 0x61, 0x74, 0x69, 0x6e, + 0x67, 0x18, 0x05, 0x20, 0x01, 0x28, 0x02, 0x48, 0x04, 0x52, 0x09, 0x6d, 0x69, 0x6e, 0x52, 0x61, + 0x74, 0x69, 0x6e, 0x67, 0x12, 0x1f, 0x0a, 0x0a, 0x6d, 0x61, 0x78, 0x5f, 0x72, 0x61, 0x74, 0x69, + 0x6e, 0x67, 0x18, 0x06, 0x20, 0x01, 0x28, 0x02, 0x48, 0x05, 0x52, 0x09, 0x6d, 0x61, 0x78, 0x52, + 0x61, 0x74, 0x69, 0x6e, 0x67, 0x42, 0x11, 0x0a, 0x0f, 0x6d, 0x69, 0x6e, 0x5f, 0x76, 0x6f, 0x74, + 0x65, 0x73, 0x5f, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x42, 0x11, 0x0a, 0x0f, 0x6d, 0x61, 0x78, 0x5f, + 0x76, 0x6f, 0x74, 0x65, 0x73, 0x5f, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x42, 0x10, 0x0a, 0x0e, 0x6d, + 0x69, 0x6e, 0x5f, 0x79, 0x65, 0x61, 0x72, 0x5f, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x42, 0x10, 0x0a, + 0x0e, 0x6d, 0x61, 0x78, 0x5f, 0x79, 0x65, 0x61, 0x72, 0x5f, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x42, + 0x12, 0x0a, 0x10, 0x6d, 0x69, 0x6e, 0x5f, 0x72, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x5f, 0x6f, 0x6e, + 0x65, 0x6f, 0x66, 0x42, 0x12, 0x0a, 0x10, 0x6d, 0x61, 0x78, 0x5f, 0x72, 0x61, 0x74, 0x69, 0x6e, + 0x67, 0x5f, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x22, 0x66, 0x0a, 0x06, 0x57, 0x65, 0x69, 0x67, 0x68, + 0x74, 0x12, 0x12, 0x0a, 0x04, 0x79, 0x65, 0x61, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, + 0x04, 0x79, 0x65, 0x61, 0x72, 0x12, 0x16, 0x0a, 0x06, 0x72, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x72, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x12, 0x16, 0x0a, + 0x06, 0x67, 0x65, 0x6e, 0x72, 0x65, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x67, + 0x65, 0x6e, 0x72, 0x65, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x73, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x73, 0x22, + 0x8b, 0x01, 0x0a, 0x07, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x74, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x74, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x73, 0x12, 0x0c, 0x0a, 0x01, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, + 0x52, 0x01, 0x6e, 0x12, 0x2b, 0x0a, 0x06, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x65, + 0x72, 0x2e, 0x46, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x52, 0x06, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, + 0x12, 0x2b, 0x0a, 0x06, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x13, 0x2e, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x2e, 0x57, + 0x65, 0x69, 0x67, 0x68, 0x74, 0x52, 0x06, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x22, 0x41, 0x0a, + 0x08, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x35, 0x0a, 0x06, 0x6d, 0x6f, 0x76, + 0x69, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x72, 0x65, 0x63, 0x6f, + 0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x2e, 0x52, 0x65, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, + 0x64, 0x65, 0x64, 0x4d, 0x6f, 0x76, 0x69, 0x65, 0x52, 0x06, 0x6d, 0x6f, 0x76, 0x69, 0x65, 0x73, + 0x22, 0x44, 0x0a, 0x10, 0x52, 0x65, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x65, 0x64, 0x4d, + 0x6f, 0x76, 0x69, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x74, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x12, 0x18, 0x0a, 0x07, + 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x77, + 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x32, 0x52, 0x0a, 0x0b, 0x52, 0x65, 0x63, 0x6f, 0x6d, 0x6d, + 0x65, 0x6e, 0x64, 0x65, 0x72, 0x12, 0x43, 0x0a, 0x12, 0x47, 0x65, 0x74, 0x52, 0x65, 0x63, 0x6f, + 0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x2e, 0x72, 0x65, + 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x2e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x1a, 0x15, 0x2e, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x2e, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x2c, 0x5a, 0x2a, 0x67, 0x69, + 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x61, 0x79, 0x6b, 0x68, 0x61, 0x6e, 0x73, + 0x2f, 0x6d, 0x6f, 0x76, 0x69, 0x65, 0x72, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2f, 0x70, + 0x6b, 0x67, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_recommender_proto_rawDescOnce sync.Once + file_recommender_proto_rawDescData = file_recommender_proto_rawDesc +) + +func file_recommender_proto_rawDescGZIP() []byte { + file_recommender_proto_rawDescOnce.Do(func() { + file_recommender_proto_rawDescData = protoimpl.X.CompressGZIP(file_recommender_proto_rawDescData) + }) + return file_recommender_proto_rawDescData +} + +var file_recommender_proto_msgTypes = make([]protoimpl.MessageInfo, 5) +var file_recommender_proto_goTypes = []any{ + (*Filter)(nil), // 0: recommender.Filter + (*Weight)(nil), // 1: recommender.Weight + (*Request)(nil), // 2: recommender.Request + (*Response)(nil), // 3: recommender.Response + (*RecommendedMovie)(nil), // 4: recommender.RecommendedMovie +} +var file_recommender_proto_depIdxs = []int32{ + 0, // 0: recommender.Request.filter:type_name -> recommender.Filter + 1, // 1: recommender.Request.weight:type_name -> recommender.Weight + 4, // 2: recommender.Response.movies:type_name -> recommender.RecommendedMovie + 2, // 3: recommender.Recommender.GetRecommendations:input_type -> recommender.Request + 3, // 4: recommender.Recommender.GetRecommendations:output_type -> recommender.Response + 4, // [4:5] is the sub-list for method output_type + 3, // [3:4] is the sub-list for method input_type + 3, // [3:3] is the sub-list for extension type_name + 3, // [3:3] is the sub-list for extension extendee + 0, // [0:3] is the sub-list for field type_name +} + +func init() { file_recommender_proto_init() } +func file_recommender_proto_init() { + if File_recommender_proto != nil { + return + } + file_recommender_proto_msgTypes[0].OneofWrappers = []any{ + (*Filter_MinVotes)(nil), + (*Filter_MaxVotes)(nil), + (*Filter_MinYear)(nil), + (*Filter_MaxYear)(nil), + (*Filter_MinRating)(nil), + (*Filter_MaxRating)(nil), + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_recommender_proto_rawDesc, + NumEnums: 0, + NumMessages: 5, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_recommender_proto_goTypes, + DependencyIndexes: file_recommender_proto_depIdxs, + MessageInfos: file_recommender_proto_msgTypes, + }.Build() + File_recommender_proto = out.File + file_recommender_proto_rawDesc = nil + file_recommender_proto_goTypes = nil + file_recommender_proto_depIdxs = nil +} diff --git a/server/pkg/proto/recommender_grpc.pb.go b/server/pkg/proto/recommender_grpc.pb.go new file mode 100644 index 0000000..e7a74cb --- /dev/null +++ b/server/pkg/proto/recommender_grpc.pb.go @@ -0,0 +1,121 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.5.1 +// - protoc v5.28.3 +// source: recommender.proto + +package proto + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.64.0 or later. +const _ = grpc.SupportPackageIsVersion9 + +const ( + Recommender_GetRecommendations_FullMethodName = "/recommender.Recommender/GetRecommendations" +) + +// RecommenderClient is the client API for Recommender service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type RecommenderClient interface { + GetRecommendations(ctx context.Context, in *Request, opts ...grpc.CallOption) (*Response, error) +} + +type recommenderClient struct { + cc grpc.ClientConnInterface +} + +func NewRecommenderClient(cc grpc.ClientConnInterface) RecommenderClient { + return &recommenderClient{cc} +} + +func (c *recommenderClient) GetRecommendations(ctx context.Context, in *Request, opts ...grpc.CallOption) (*Response, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(Response) + err := c.cc.Invoke(ctx, Recommender_GetRecommendations_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +// RecommenderServer is the server API for Recommender service. +// All implementations must embed UnimplementedRecommenderServer +// for forward compatibility. +type RecommenderServer interface { + GetRecommendations(context.Context, *Request) (*Response, error) + mustEmbedUnimplementedRecommenderServer() +} + +// UnimplementedRecommenderServer must be embedded to have +// forward compatible implementations. +// +// NOTE: this should be embedded by value instead of pointer to avoid a nil +// pointer dereference when methods are called. +type UnimplementedRecommenderServer struct{} + +func (UnimplementedRecommenderServer) GetRecommendations(context.Context, *Request) (*Response, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetRecommendations not implemented") +} +func (UnimplementedRecommenderServer) mustEmbedUnimplementedRecommenderServer() {} +func (UnimplementedRecommenderServer) testEmbeddedByValue() {} + +// UnsafeRecommenderServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to RecommenderServer will +// result in compilation errors. +type UnsafeRecommenderServer interface { + mustEmbedUnimplementedRecommenderServer() +} + +func RegisterRecommenderServer(s grpc.ServiceRegistrar, srv RecommenderServer) { + // If the following call pancis, it indicates UnimplementedRecommenderServer was + // embedded by pointer and is nil. This will cause panics if an + // unimplemented method is ever invoked, so we test this at initialization + // time to prevent it from happening at runtime later due to I/O. + if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { + t.testEmbeddedByValue() + } + s.RegisterService(&Recommender_ServiceDesc, srv) +} + +func _Recommender_GetRecommendations_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(Request) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(RecommenderServer).GetRecommendations(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Recommender_GetRecommendations_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(RecommenderServer).GetRecommendations(ctx, req.(*Request)) + } + return interceptor(ctx, in, info, handler) +} + +// Recommender_ServiceDesc is the grpc.ServiceDesc for Recommender service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var Recommender_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "recommender.Recommender", + HandlerType: (*RecommenderServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "GetRecommendations", + Handler: _Recommender_GetRecommendations_Handler, + }, + }, + Streams: []grpc.StreamDesc{}, + Metadata: "recommender.proto", +} diff --git a/server/pkg/storage/postgresql/db.go b/server/pkg/storage/postgresql/db.go new file mode 100644 index 0000000..c8bc05d --- /dev/null +++ b/server/pkg/storage/postgresql/db.go @@ -0,0 +1,16 @@ +package postgresql + +import ( + "context" + "fmt" + + "github.com/jackc/pgx/v5" +) + +func NewDB(dbURL string) (*pgx.Conn, error) { + conn, err := pgx.Connect(context.Background(), dbURL) + if err != nil { + return nil, fmt.Errorf("failed to connect to database: %w", err) + } + return conn, nil +} diff --git a/server/pkg/storage/postgresql/migrations/000001_imdb_table.down.sql b/server/pkg/storage/postgresql/migrations/000001_imdb_table.down.sql new file mode 100644 index 0000000..b81a12a --- /dev/null +++ b/server/pkg/storage/postgresql/migrations/000001_imdb_table.down.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS imdb; \ No newline at end of file diff --git a/server/pkg/storage/postgresql/migrations/000001_imdb_table.up.sql b/server/pkg/storage/postgresql/migrations/000001_imdb_table.up.sql new file mode 100644 index 0000000..f30abb6 --- /dev/null +++ b/server/pkg/storage/postgresql/migrations/000001_imdb_table.up.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS imdb ( + tconst VARCHAR(12) PRIMARY KEY NOT NULL, + year SMALLINT NOT NULL DEFAULT 0, + genres TEXT NOT NULL DEFAULT '', + nconsts TEXT NOT NULL DEFAULT '', + rating REAL NOT NULL DEFAULT 0.0, + votes INTEGER NOT NULL DEFAULT 0 +); diff --git a/server/pkg/storage/postgresql/repository/imdb.go b/server/pkg/storage/postgresql/repository/imdb.go new file mode 100644 index 0000000..9dea4af --- /dev/null +++ b/server/pkg/storage/postgresql/repository/imdb.go @@ -0,0 +1,107 @@ +package repository + +import ( + "context" + + "github.com/jackc/pgx/v5" + + "github.com/aykhans/movier/server/pkg/dto" +) + +type IMDbRepository struct { + db *pgx.Conn +} + +func NewIMDbRepository(db *pgx.Conn) *IMDbRepository { + return &IMDbRepository{ + db: db, + } +} + +func (repo *IMDbRepository) InsertMultipleBasics(basics []dto.Basic) error { + batch := &pgx.Batch{} + for _, basic := range basics { + batch.Queue( + `INSERT INTO imdb (tconst, year, genres) + VALUES ($1, $2, $3) + ON CONFLICT (tconst) DO UPDATE + SET year = EXCLUDED.year, genres = EXCLUDED.genres`, + basic.Tconst, basic.StartYear, basic.Genres, + ) + } + + results := repo.db.SendBatch(context.Background(), batch) + if err := results.Close(); err != nil { + return err + } + return nil +} + +func (repo *IMDbRepository) GetAllTconsts() ([]string, error) { + rows, err := repo.db.Query( + context.Background(), + "SELECT tconst FROM imdb", + ) + if err != nil { + return nil, err + } + defer rows.Close() + + var tconsts []string + for rows.Next() { + var tconst string + if err := rows.Scan(&tconst); err != nil { + return nil, err + } + tconsts = append(tconsts, tconst) + } + if err := rows.Err(); err != nil { + return nil, err + } + return tconsts, nil +} + +func (repo *IMDbRepository) UpdateMultiplePrincipals(principals []dto.Principal) error { + batch := &pgx.Batch{} + for _, principal := range principals { + batch.Queue( + `UPDATE imdb SET nconsts = $1 WHERE tconst = $2`, + principal.Nconsts, principal.Tconst, + ) + } + + results := repo.db.SendBatch(context.Background(), batch) + if err := results.Close(); err != nil { + return err + } + return nil +} + +func (repo *IMDbRepository) UpdateMultipleRatings(ratings []dto.Ratings) error { + batch := &pgx.Batch{} + for _, rating := range ratings { + batch.Queue( + `UPDATE imdb SET rating = $1, votes = $2 WHERE tconst = $3`, + rating.Rating, rating.Votes, rating.Tconst, + ) + } + + results := repo.db.SendBatch(context.Background(), batch) + if err := results.Close(); err != nil { + return err + } + return nil +} + +func (repo *IMDbRepository) GetMinMax() (*dto.MinMax, error) { + var minMax dto.MinMax + + err := repo.db.QueryRow( + context.Background(), + "SELECT MIN(votes), MAX(votes), MIN(year), MAX(year), MIN(rating), MAX(rating) FROM imdb LIMIT 1", + ).Scan(&minMax.MinVotes, &minMax.MaxVotes, &minMax.MinYear, &minMax.MaxYear, &minMax.MinRating, &minMax.MaxRating) + if err != nil { + return nil, err + } + return &minMax, nil +} diff --git a/server/pkg/templates/index.html b/server/pkg/templates/index.html new file mode 100644 index 0000000..9e9e292 --- /dev/null +++ b/server/pkg/templates/index.html @@ -0,0 +1,357 @@ + + + + + + + Movier + + + + + + + + +
+
+
+
+
+
+
+ +
+
+
+ +
+ +
+ +
+
+ + +
+
+ + +
+
+ +
+
+ + +
+
+ + +
+
+ +
+
+ + +
+
+ + +
+
+ +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+
+

+
+ +
+
+ + +
+
+ +
+

+ +
+ +
+
+
+
+
+ + + + + + \ No newline at end of file diff --git a/server/pkg/utils/env.go b/server/pkg/utils/env.go new file mode 100644 index 0000000..a609a8a --- /dev/null +++ b/server/pkg/utils/env.go @@ -0,0 +1,11 @@ +package utils + +import "os" + +func GetEnv(key, default_ string) string { + value := os.Getenv(key) + if value == "" { + return default_ + } + return value +} diff --git a/server/pkg/utils/file.go b/server/pkg/utils/file.go new file mode 100644 index 0000000..a41d054 --- /dev/null +++ b/server/pkg/utils/file.go @@ -0,0 +1,16 @@ +package utils + +import "os" + +func MakeDirIfNotExist(path string) error { + return os.MkdirAll(path, os.ModePerm) +} + +func IsDirExist(path string) (bool, error) { + if _, err := os.Stat(path); err == nil { + return true, nil + } else if !os.IsNotExist(err) { + return false, err + } + return false, nil +} diff --git a/server/pkg/utils/validation.go b/server/pkg/utils/validation.go new file mode 100644 index 0000000..335c112 --- /dev/null +++ b/server/pkg/utils/validation.go @@ -0,0 +1,20 @@ +package utils + +import ( + "math" + "path/filepath" + "strconv" +) + +func IsValidPath(path string) bool { + return filepath.IsAbs(path) +} + +func IsUint32(value int) bool { + return value >= 0 && value <= math.MaxUint32 +} + +func IsInt(value string) bool { + _, err := strconv.Atoi(value) + return err == nil +}