mirror of
https://github.com/aykhans/IMDBDataVisualization.git
synced 2025-04-08 12:14:01 +00:00

self.st added to Main class Renamed class: App -> DataPreprocessing Renamed file: app.py -> data_preprocessing.py Changed the application class's function types to staticmethod
75 lines
2.8 KiB
Plaintext
75 lines
2.8 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class DataPreparation():\n",
|
|
" def __init__(self, page_title: str, page_icon: str, layout: str) -> None:\n",
|
|
" self.st = st\n",
|
|
" self.st.set_page_config(page_title=page_title, page_icon=page_icon, layout=layout)\n",
|
|
"\n",
|
|
" def getDataGenresCount(self, data: pd.DataFrame, genres_type: str) -> Counter:\n",
|
|
" if genres_type == 'Movie':\n",
|
|
" data = data.query(\"Title_Type == 'movie' or Title_Type == 'tvMovie'\")\n",
|
|
" elif genres_type == 'Series':\n",
|
|
" data = data.query(\"Title_Type == 'tvSeries' or Title_Type == 'tvMiniSeries'\")\n",
|
|
"\n",
|
|
" data_genres = pd.DataFrame({'title': data['Title'], 'genres': data['Genres']})\n",
|
|
"\n",
|
|
" genres = []\n",
|
|
" for i in data_genres['genres']:\n",
|
|
" genres += i.replace(' ', '').split(',')\n",
|
|
" return Counter(genres)\n",
|
|
"\n",
|
|
" def createPieChart(self, data: pd.DataFrame, genres_type: str, selected_genres: List[str]):\n",
|
|
" genres_count = self.getDataGenresCount(data, genres_type)\n",
|
|
" if len(selected_genres) < 2: return None\n",
|
|
" df = pd.DataFrame({'title': genres_count.keys(), 'count': genres_count.values()}).\\\n",
|
|
" query('title == @selected_genres')\n",
|
|
" return px.pie(df, names='title', values='count', title=f'{genres_type} genres')\n",
|
|
"\n",
|
|
" def createBarChart(self, data: pd.DataFrame, genres_type: str, selected_genres: List[str]):\n",
|
|
" genres_count = self.getDataGenresCount(data, genres_type)\n",
|
|
" if len(selected_genres) < 2: return None\n",
|
|
" genres_count_selected = {i: j for i, j in genres_count.items() if i in selected_genres}\n",
|
|
" return px.bar(x=genres_count_selected.keys(), y=genres_count_selected.values())\n",
|
|
"\n",
|
|
" def preprocessingData(self, upload_file: str) -> Optional[pd.DataFrame]:\n",
|
|
" if upload_file is not None:\n",
|
|
" data = pd.read_csv(upload_file)\n",
|
|
" data.rename(columns = {'Title Type': 'Title_Type'}, inplace = True)\n",
|
|
" return data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3.10.4 ('venv': venv)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"name": "python",
|
|
"version": "3.10.4"
|
|
},
|
|
"orig_nbformat": 4,
|
|
"vscode": {
|
|
"interpreter": {
|
|
"hash": "bf977f077e71610de8bc34395c7967902e3367702f38f6975908043ca1c810e0"
|
|
}
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|