mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-04-23 10:43:06 +00:00
381 lines
11 KiB
Python
381 lines
11 KiB
Python
# Copyright 2018-2022 Streamlit Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""Data marshalling utilities for ArrowTable protobufs, which are used by
|
|
CustomComponent for dataframe serialization.
|
|
"""
|
|
|
|
import pandas as pd
|
|
import pyarrow as pa
|
|
|
|
from streamlit import type_util, util
|
|
|
|
|
|
def marshall(proto, data, default_uuid=None):
|
|
"""Marshall data into an ArrowTable proto.
|
|
|
|
Parameters
|
|
----------
|
|
proto : proto.ArrowTable
|
|
Output. The protobuf for a Streamlit ArrowTable proto.
|
|
|
|
data : pandas.DataFrame, pandas.Styler, numpy.ndarray, Iterable, dict, or None
|
|
Something that is or can be converted to a dataframe.
|
|
|
|
"""
|
|
if type_util.is_pandas_styler(data):
|
|
_marshall_styler(proto, data, default_uuid)
|
|
|
|
df = type_util.convert_anything_to_df(data)
|
|
_marshall_index(proto, df.index)
|
|
_marshall_columns(proto, df.columns)
|
|
_marshall_data(proto, df.to_numpy())
|
|
|
|
|
|
def _marshall_styler(proto, styler, default_uuid):
|
|
"""Marshall pandas.Styler styling data into an ArrowTable proto.
|
|
|
|
Parameters
|
|
----------
|
|
proto : proto.ArrowTable
|
|
Output. The protobuf for a Streamlit ArrowTable proto.
|
|
|
|
styler : pandas.Styler
|
|
Styler holding styling data for the dataframe.
|
|
|
|
default_uuid : str
|
|
If Styler custom uuid is not provided, this value will be used.
|
|
|
|
"""
|
|
# NB: UUID should be set before _compute is called.
|
|
_marshall_uuid(proto, styler, default_uuid)
|
|
|
|
# NB: We're using protected members of Styler to get styles,
|
|
# which is non-ideal and could break if Styler's interface changes.
|
|
styler._compute()
|
|
|
|
# In Pandas 1.3.0, styler._translate() signature was changed.
|
|
# 2 arguments were added: sparse_index and sparse_columns.
|
|
# The functionality that they provide is not yet supported.
|
|
if type_util.is_pandas_version_less_than("1.3.0"):
|
|
pandas_styles = styler._translate()
|
|
else:
|
|
pandas_styles = styler._translate(False, False)
|
|
|
|
_marshall_caption(proto, styler)
|
|
_marshall_styles(proto, styler, pandas_styles)
|
|
_marshall_display_values(proto, styler.data, pandas_styles)
|
|
|
|
|
|
def _marshall_uuid(proto, styler, default_uuid):
|
|
"""Marshall pandas.Styler UUID into an ArrowTable proto.
|
|
|
|
Parameters
|
|
----------
|
|
proto : proto.ArrowTable
|
|
Output. The protobuf for a Streamlit ArrowTable proto.
|
|
|
|
styler : pandas.Styler
|
|
Styler holding styling data for the dataframe.
|
|
|
|
default_uuid : str
|
|
If Styler custom uuid is not provided, this value will be used.
|
|
|
|
"""
|
|
if styler.uuid is None:
|
|
styler.set_uuid(default_uuid)
|
|
|
|
proto.styler.uuid = str(styler.uuid)
|
|
|
|
|
|
def _marshall_caption(proto, styler):
|
|
"""Marshall pandas.Styler caption into an ArrowTable proto.
|
|
|
|
Parameters
|
|
----------
|
|
proto : proto.ArrowTable
|
|
Output. The protobuf for a Streamlit ArrowTable proto.
|
|
|
|
styler : pandas.Styler
|
|
Styler holding styling data for the dataframe.
|
|
|
|
"""
|
|
if styler.caption is not None:
|
|
proto.styler.caption = styler.caption
|
|
|
|
|
|
def _marshall_styles(proto, styler, styles):
|
|
"""Marshall pandas.Styler styles into an ArrowTable proto.
|
|
|
|
Parameters
|
|
----------
|
|
proto : proto.ArrowTable
|
|
Output. The protobuf for a Streamlit ArrowTable proto.
|
|
|
|
styler : pandas.Styler
|
|
Styler holding styling data for the dataframe.
|
|
|
|
styles : dict
|
|
pandas.Styler translated styles.
|
|
|
|
"""
|
|
css_rules = []
|
|
|
|
if "table_styles" in styles:
|
|
table_styles = styles["table_styles"]
|
|
table_styles = _trim_pandas_styles(table_styles)
|
|
for style in table_styles:
|
|
# NB: styles in "table_styles" have a space
|
|
# between the UUID and the selector.
|
|
rule = _pandas_style_to_css(
|
|
"table_styles", style, styler.uuid, separator=" "
|
|
)
|
|
css_rules.append(rule)
|
|
|
|
if "cellstyle" in styles:
|
|
cellstyle = styles["cellstyle"]
|
|
cellstyle = _trim_pandas_styles(cellstyle)
|
|
for style in cellstyle:
|
|
rule = _pandas_style_to_css("cell_style", style, styler.uuid)
|
|
css_rules.append(rule)
|
|
|
|
if len(css_rules) > 0:
|
|
proto.styler.styles = "\n".join(css_rules)
|
|
|
|
|
|
def _trim_pandas_styles(styles):
|
|
"""Trim pandas styles dict.
|
|
|
|
Parameters
|
|
----------
|
|
styles : dict
|
|
pandas.Styler translated styles.
|
|
|
|
"""
|
|
# Filter out empty styles, as every cell will have a class
|
|
# but the list of props may just be [['', '']].
|
|
return [x for x in styles if any(any(y) for y in x["props"])]
|
|
|
|
|
|
def _pandas_style_to_css(style_type, style, uuid, separator=""):
|
|
"""Convert pandas.Styler translated styles entry to CSS.
|
|
|
|
Parameters
|
|
----------
|
|
style : dict
|
|
pandas.Styler translated styles entry.
|
|
|
|
uuid: str
|
|
pandas.Styler UUID.
|
|
|
|
separator: str
|
|
A string separator used between table and cell selectors.
|
|
|
|
"""
|
|
declarations = []
|
|
for css_property, css_value in style["props"]:
|
|
declaration = css_property.strip() + ": " + css_value.strip()
|
|
declarations.append(declaration)
|
|
|
|
table_selector = "#T_" + str(uuid)
|
|
|
|
# In pandas < 1.1.0
|
|
# translated_style["cellstyle"] has the following shape:
|
|
# [
|
|
# {
|
|
# "props": [["color", " black"], ["background-color", "orange"], ["", ""]],
|
|
# "selector": "row0_col0"
|
|
# }
|
|
# ...
|
|
# ]
|
|
#
|
|
# In pandas >= 1.1.0
|
|
# translated_style["cellstyle"] has the following shape:
|
|
# [
|
|
# {
|
|
# "props": [("color", " black"), ("background-color", "orange"), ("", "")],
|
|
# "selectors": ["row0_col0"]
|
|
# }
|
|
# ...
|
|
# ]
|
|
if style_type == "table_styles" or (
|
|
style_type == "cell_style" and type_util.is_pandas_version_less_than("1.1.0")
|
|
):
|
|
cell_selectors = [style["selector"]]
|
|
else:
|
|
cell_selectors = style["selectors"]
|
|
|
|
selectors = []
|
|
for cell_selector in cell_selectors:
|
|
selectors.append(table_selector + separator + cell_selector)
|
|
selector = ", ".join(selectors)
|
|
|
|
declaration_block = "; ".join(declarations)
|
|
rule_set = selector + " { " + declaration_block + " }"
|
|
|
|
return rule_set
|
|
|
|
|
|
def _marshall_display_values(proto, df, styles):
|
|
"""Marshall pandas.Styler display values into an ArrowTable proto.
|
|
|
|
Parameters
|
|
----------
|
|
proto : proto.ArrowTable
|
|
Output. The protobuf for a Streamlit ArrowTable proto.
|
|
|
|
df : pandas.DataFrame
|
|
A dataframe with original values.
|
|
|
|
styles : dict
|
|
pandas.Styler translated styles.
|
|
|
|
"""
|
|
new_df = _use_display_values(df, styles)
|
|
proto.styler.display_values = _dataframe_to_pybytes(new_df)
|
|
|
|
|
|
def _use_display_values(df, styles):
|
|
"""Create a new pandas.DataFrame where display values are used instead of original ones.
|
|
|
|
Parameters
|
|
----------
|
|
df : pandas.DataFrame
|
|
A dataframe with original values.
|
|
|
|
styles : dict
|
|
pandas.Styler translated styles.
|
|
|
|
"""
|
|
# (HK) TODO: Rewrite this method without using regex.
|
|
import re
|
|
|
|
# If values in a column are not of the same type, Arrow Table
|
|
# serialization would fail. Thus, we need to cast all values
|
|
# of the dataframe to strings before assigning them display values.
|
|
new_df = df.astype(str)
|
|
|
|
cell_selector_regex = re.compile(r"row(\d+)_col(\d+)")
|
|
if "body" in styles:
|
|
rows = styles["body"]
|
|
for row in rows:
|
|
for cell in row:
|
|
cell_id = cell["id"]
|
|
match = cell_selector_regex.match(cell_id)
|
|
if match:
|
|
r, c = map(int, match.groups())
|
|
new_df.iat[r, c] = str(cell["display_value"])
|
|
|
|
return new_df
|
|
|
|
|
|
def _dataframe_to_pybytes(df):
|
|
"""Convert pandas.DataFrame to pybytes.
|
|
|
|
Parameters
|
|
----------
|
|
df : pandas.DataFrame
|
|
A dataframe to convert.
|
|
|
|
"""
|
|
table = pa.Table.from_pandas(df)
|
|
sink = pa.BufferOutputStream()
|
|
writer = pa.RecordBatchStreamWriter(sink, table.schema)
|
|
writer.write_table(table)
|
|
writer.close()
|
|
return sink.getvalue().to_pybytes()
|
|
|
|
|
|
def _marshall_index(proto, index):
|
|
"""Marshall pandas.DataFrame index into an ArrowTable proto.
|
|
|
|
Parameters
|
|
----------
|
|
proto : proto.ArrowTable
|
|
Output. The protobuf for a Streamlit ArrowTable proto.
|
|
|
|
index : Index or array-like
|
|
Index to use for resulting frame.
|
|
Will default to RangeIndex (0, 1, 2, ..., n) if no index is provided.
|
|
|
|
"""
|
|
index = map(util._maybe_tuple_to_list, index.values)
|
|
index_df = pd.DataFrame(index)
|
|
proto.index = _dataframe_to_pybytes(index_df)
|
|
|
|
|
|
def _marshall_columns(proto, columns):
|
|
"""Marshall pandas.DataFrame columns into an ArrowTable proto.
|
|
|
|
Parameters
|
|
----------
|
|
proto : proto.ArrowTable
|
|
Output. The protobuf for a Streamlit ArrowTable proto.
|
|
|
|
columns : Index or array-like
|
|
Column labels to use for resulting frame.
|
|
Will default to RangeIndex (0, 1, 2, ..., n) if no column labels are provided.
|
|
|
|
"""
|
|
columns = map(util._maybe_tuple_to_list, columns.values)
|
|
columns_df = pd.DataFrame(columns)
|
|
proto.columns = _dataframe_to_pybytes(columns_df)
|
|
|
|
|
|
def _marshall_data(proto, data):
|
|
"""Marshall pandas.DataFrame data into an ArrowTable proto.
|
|
|
|
Parameters
|
|
----------
|
|
proto : proto.ArrowTable
|
|
Output. The protobuf for a Streamlit ArrowTable proto.
|
|
|
|
df : pandas.DataFrame
|
|
A dataframe to marshall.
|
|
|
|
"""
|
|
df = pd.DataFrame(data)
|
|
proto.data = _dataframe_to_pybytes(df)
|
|
|
|
|
|
def arrow_proto_to_dataframe(proto):
|
|
"""Convert ArrowTable proto to pandas.DataFrame.
|
|
|
|
Parameters
|
|
----------
|
|
proto : proto.ArrowTable
|
|
Output. pandas.DataFrame
|
|
|
|
"""
|
|
data = _pybytes_to_dataframe(proto.data)
|
|
index = _pybytes_to_dataframe(proto.index)
|
|
columns = _pybytes_to_dataframe(proto.columns)
|
|
|
|
return pd.DataFrame(
|
|
data.values, index=index.values.T.tolist(), columns=columns.values.T.tolist()
|
|
)
|
|
|
|
|
|
def _pybytes_to_dataframe(source):
|
|
"""Convert pybytes to pandas.DataFrame.
|
|
|
|
Parameters
|
|
----------
|
|
source : pybytes
|
|
Will default to RangeIndex (0, 1, 2, ..., n) if no `index` or `columns` are provided.
|
|
|
|
"""
|
|
reader = pa.RecordBatchStreamReader(source)
|
|
return reader.read_pandas()
|