IMDBDataVisualization/app/data_preprocessing.py
ayxan ad1ba1717d self.st removed from App class
self.st added to Main class
Renamed class: App -> DataPreprocessing
Renamed file: app.py -> data_preprocessing.py
Changed the application class's function types to staticmethod
2022-07-17 20:50:49 +04:00

43 lines
1.8 KiB
Python

import pandas as pd
import plotly.express as px
from collections import Counter
from typing import Optional, List
class DataPreprocessing:
@staticmethod
def getDataGenresCount(data: pd.DataFrame, genres_type: str) -> Counter:
if genres_type == 'Movie':
data = data.query("Title_Type == 'movie' or Title_Type == 'tvMovie'")
elif genres_type == 'Series':
data = data.query("Title_Type == 'tvSeries' or Title_Type == 'tvMiniSeries'")
data_genres = pd.DataFrame({'title': data['Title'], 'genres': data['Genres']})
genres = []
for i in data_genres['genres']:
genres += i.replace(' ', '').split(',')
return Counter(genres)
@staticmethod
def createPieChart(data: pd.DataFrame, genres_type: str, selected_genres: List[str]):
genres_count = DataPreprocessing.getDataGenresCount(data, genres_type)
if len(selected_genres) < 2: return None
df = pd.DataFrame({'title': genres_count.keys(), 'count': genres_count.values()}).\
query('title == @selected_genres')
return px.pie(df, names='title', values='count', title=f'{genres_type} genres')
@staticmethod
def createBarChart(data: pd.DataFrame, genres_type: str, selected_genres: List[str]):
genres_count = DataPreprocessing.getDataGenresCount(data, genres_type)
if len(selected_genres) < 2: return None
genres_count_selected = {i: j for i, j in genres_count.items() if i in selected_genres}
return px.bar(x=genres_count_selected.keys(), y=genres_count_selected.values())
@staticmethod
def preprocessingData(upload_file: str) -> Optional[pd.DataFrame]:
if upload_file is not None:
data = pd.read_csv(upload_file)
data.rename(columns = {'Title Type': 'Title_Type'}, inplace = True)
return data