Ayxan 9e90b4a150 Version 0.1
Added sidebar, Dashboard, Line Graph
2022-05-23 04:12:37 +04:00

319 lines
11 KiB
Python

# Copyright (c) 2010-2022 openpyxl
"""Read an xlsx file into Python"""
# Python stdlib imports
from zipfile import ZipFile, ZIP_DEFLATED, BadZipfile
from sys import exc_info
from io import BytesIO
import os.path
import warnings
from openpyxl.pivot.table import TableDefinition
# Allow blanket setting of KEEP_VBA for testing
try:
from ..tests import KEEP_VBA
except ImportError:
KEEP_VBA = False
# package imports
from openpyxl.utils.exceptions import InvalidFileException
from openpyxl.xml.constants import (
ARC_SHARED_STRINGS,
ARC_CORE,
ARC_CONTENT_TYPES,
ARC_WORKBOOK,
ARC_THEME,
COMMENTS_NS,
SHARED_STRINGS,
EXTERNAL_LINK,
XLTM,
XLTX,
XLSM,
XLSX,
)
from openpyxl.cell import MergedCell
from openpyxl.comments.comment_sheet import CommentSheet
from .strings import read_string_table
from .workbook import WorkbookParser
from openpyxl.styles.stylesheet import apply_stylesheet
from openpyxl.packaging.core import DocumentProperties
from openpyxl.packaging.manifest import Manifest, Override
from openpyxl.packaging.relationship import (
RelationshipList,
get_dependents,
get_rels_path,
)
from openpyxl.worksheet._read_only import ReadOnlyWorksheet
from openpyxl.worksheet._reader import WorksheetReader
from openpyxl.chartsheet import Chartsheet
from openpyxl.worksheet.table import Table
from openpyxl.drawing.spreadsheet_drawing import SpreadsheetDrawing
from openpyxl.xml.functions import fromstring
from .drawings import find_images
SUPPORTED_FORMATS = ('.xlsx', '.xlsm', '.xltx', '.xltm')
def _validate_archive(filename):
"""
Does a first check whether filename is a string or a file-like
object. If it is a string representing a filename, a check is done
for supported formats by checking the given file-extension. If the
file-extension is not in SUPPORTED_FORMATS an InvalidFileException
will raised. Otherwise the filename (resp. file-like object) will
forwarded to zipfile.ZipFile returning a ZipFile-Instance.
"""
is_file_like = hasattr(filename, 'read')
if not is_file_like:
file_format = os.path.splitext(filename)[-1].lower()
if file_format not in SUPPORTED_FORMATS:
if file_format == '.xls':
msg = ('openpyxl does not support the old .xls file format, '
'please use xlrd to read this file, or convert it to '
'the more recent .xlsx file format.')
elif file_format == '.xlsb':
msg = ('openpyxl does not support binary format .xlsb, '
'please convert this file to .xlsx format if you want '
'to open it with openpyxl')
else:
msg = ('openpyxl does not support %s file format, '
'please check you can open '
'it with Excel first. '
'Supported formats are: %s') % (file_format,
','.join(SUPPORTED_FORMATS))
raise InvalidFileException(msg)
archive = ZipFile(filename, 'r')
return archive
def _find_workbook_part(package):
workbook_types = [XLTM, XLTX, XLSM, XLSX]
for ct in workbook_types:
part = package.find(ct)
if part:
return part
# some applications reassign the default for application/xml
defaults = {p.ContentType for p in package.Default}
workbook_type = defaults & set(workbook_types)
if workbook_type:
return Override("/" + ARC_WORKBOOK, workbook_type.pop())
raise IOError("File contains no valid workbook part")
class ExcelReader:
"""
Read an Excel package and dispatch the contents to the relevant modules
"""
def __init__(self, fn, read_only=False, keep_vba=KEEP_VBA,
data_only=False, keep_links=True):
self.archive = _validate_archive(fn)
self.valid_files = self.archive.namelist()
self.read_only = read_only
self.keep_vba = keep_vba
self.data_only = data_only
self.keep_links = keep_links
self.shared_strings = []
def read_manifest(self):
src = self.archive.read(ARC_CONTENT_TYPES)
root = fromstring(src)
self.package = Manifest.from_tree(root)
def read_strings(self):
ct = self.package.find(SHARED_STRINGS)
if ct is not None:
strings_path = ct.PartName[1:]
with self.archive.open(strings_path,) as src:
self.shared_strings = read_string_table(src)
def read_workbook(self):
wb_part = _find_workbook_part(self.package)
self.parser = WorkbookParser(self.archive, wb_part.PartName[1:], keep_links=self.keep_links)
self.parser.parse()
wb = self.parser.wb
wb._sheets = []
wb._data_only = self.data_only
wb._read_only = self.read_only
wb.template = wb_part.ContentType in (XLTX, XLTM)
# If are going to preserve the vba then attach a copy of the archive to the
# workbook so that is available for the save.
if self.keep_vba:
wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED)
for name in self.valid_files:
wb.vba_archive.writestr(name, self.archive.read(name))
if self.read_only:
wb._archive = self.archive
self.wb = wb
def read_properties(self):
if ARC_CORE in self.valid_files:
src = fromstring(self.archive.read(ARC_CORE))
self.wb.properties = DocumentProperties.from_tree(src)
def read_theme(self):
if ARC_THEME in self.valid_files:
self.wb.loaded_theme = self.archive.read(ARC_THEME)
def read_chartsheet(self, sheet, rel):
sheet_path = rel.target
rels_path = get_rels_path(sheet_path)
rels = []
if rels_path in self.valid_files:
rels = get_dependents(self.archive, rels_path)
with self.archive.open(sheet_path, "r") as src:
xml = src.read()
node = fromstring(xml)
cs = Chartsheet.from_tree(node)
cs._parent = self.wb
cs.title = sheet.name
self.wb._add_sheet(cs)
drawings = rels.find(SpreadsheetDrawing._rel_type)
for rel in drawings:
charts, images = find_images(self.archive, rel.target)
for c in charts:
cs.add_chart(c)
def read_worksheets(self):
comment_warning = """Cell '{0}':{1} is part of a merged range but has a comment which will be removed because merged cells cannot contain any data."""
for sheet, rel in self.parser.find_sheets():
if rel.target not in self.valid_files:
continue
if "chartsheet" in rel.Type:
self.read_chartsheet(sheet, rel)
continue
rels_path = get_rels_path(rel.target)
rels = RelationshipList()
if rels_path in self.valid_files:
rels = get_dependents(self.archive, rels_path)
if self.read_only:
ws = ReadOnlyWorksheet(self.wb, sheet.name, rel.target, self.shared_strings)
ws.sheet_state = sheet.state
self.wb._sheets.append(ws)
continue
else:
fh = self.archive.open(rel.target)
ws = self.wb.create_sheet(sheet.name)
ws._rels = rels
ws_parser = WorksheetReader(ws, fh, self.shared_strings, self.data_only)
ws_parser.bind_all()
# assign any comments to cells
for r in rels.find(COMMENTS_NS):
src = self.archive.read(r.target)
comment_sheet = CommentSheet.from_tree(fromstring(src))
for ref, comment in comment_sheet.comments:
try:
ws[ref].comment = comment
except AttributeError:
c = ws[ref]
if isinstance(c, MergedCell):
warnings.warn(comment_warning.format(ws.title, c.coordinate))
continue
# preserve link to VML file if VBA
if self.wb.vba_archive and ws.legacy_drawing:
ws.legacy_drawing = rels[ws.legacy_drawing].target
else:
ws.legacy_drawing = None
for t in ws_parser.tables:
src = self.archive.read(t)
xml = fromstring(src)
table = Table.from_tree(xml)
ws.add_table(table)
drawings = rels.find(SpreadsheetDrawing._rel_type)
for rel in drawings:
charts, images = find_images(self.archive, rel.target)
for c in charts:
ws.add_chart(c, c.anchor)
for im in images:
ws.add_image(im, im.anchor)
pivot_rel = rels.find(TableDefinition.rel_type)
for r in pivot_rel:
pivot_path = r.Target
src = self.archive.read(pivot_path)
tree = fromstring(src)
pivot = TableDefinition.from_tree(tree)
pivot.cache = self.parser.pivot_caches[pivot.cacheId]
ws.add_pivot(pivot)
ws.sheet_state = sheet.state
def read(self):
self.read_manifest()
self.read_strings()
self.read_workbook()
self.read_properties()
self.read_theme()
apply_stylesheet(self.archive, self.wb)
self.read_worksheets()
self.parser.assign_names()
if not self.read_only:
self.archive.close()
def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA,
data_only=False, keep_links=True):
"""Open the given filename and return the workbook
:param filename: the path to open or a file-like object
:type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`
:param read_only: optimised for reading, content cannot be edited
:type read_only: bool
:param keep_vba: preseve vba content (this does NOT mean you can use it)
:type keep_vba: bool
:param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
:type data_only: bool
:param keep_links: whether links to external workbooks should be preserved. The default is True
:type keep_links: bool
:rtype: :class:`openpyxl.workbook.Workbook`
.. note::
When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
and the returned workbook will be read-only.
"""
reader = ExcelReader(filename, read_only, keep_vba,
data_only, keep_links)
reader.read()
return reader.wb