""" Provide saving, loading and presenting gathered `ClassTracker` statistics. """ from typing import ( Any, Dict, IO, Iterable, List, Optional, Tuple, TYPE_CHECKING, Union ) import os import pickle import sys from copy import deepcopy from pympler.util.stringutils import trunc, pp, pp_timestamp from pympler.asizeof import Asized if TYPE_CHECKING: from .classtracker import TrackedObject, ClassTracker, Snapshot __all__ = ["Stats", "ConsoleStats", "HtmlStats"] def _ref2key(ref: Asized) -> str: return ref.name.split(':')[0] def _merge_asized(base: Asized, other: Asized, level: int = 0) -> None: """ Merge **Asized** instances `base` and `other` into `base`. """ base.size += other.size base.flat += other.flat if level > 0: base.name = _ref2key(base) # Add refs from other to base. Any new refs are appended. base.refs = list(base.refs) # we may need to append items refs = {} for ref in base.refs: refs[_ref2key(ref)] = ref for ref in other.refs: key = _ref2key(ref) if key in refs: _merge_asized(refs[key], ref, level=level + 1) else: # Don't modify existing Asized instances => deepcopy base.refs.append(deepcopy(ref)) base.refs[-1].name = key def _merge_objects(tref: float, merged: Asized, obj: 'TrackedObject') -> None: """ Merge the snapshot size information of multiple tracked objects. The tracked object `obj` is scanned for size information at time `tref`. The sizes are merged into **Asized** instance `merged`. """ size = None for (timestamp, tsize) in obj.snapshots: if timestamp == tref: size = tsize if size: _merge_asized(merged, size) def _format_trace(trace: List[Tuple]) -> str: """ Convert the (stripped) stack-trace to a nice readable format. The stack trace `trace` is a list of frame records as returned by **inspect.stack** but without the frame objects. Returns a string. """ lines = [] for fname, lineno, func, src, _ in trace: if src: for line in src: lines.append(' ' + line.strip() + '\n') lines.append(' %s:%4d in %s\n' % (fname, lineno, func)) return ''.join(lines) class Stats(object): """ Presents the memory statistics gathered by a `ClassTracker` based on user preferences. """ def __init__(self, tracker: 'Optional[ClassTracker]' = None, filename: Optional[str] = None, stream: Optional[IO] = None): """ Initialize the data log structures either from a `ClassTracker` instance (argument `tracker`) or a previously dumped file (argument `filename`). :param tracker: ClassTracker instance :param filename: filename of previously dumped statistics :param stream: where to print statistics, defaults to ``sys.stdout`` """ if stream: self.stream = stream else: self.stream = sys.stdout self.tracker = tracker self.index = {} # type: Dict[str, List[TrackedObject]] self.snapshots = [] # type: List[Snapshot] if tracker: self.index = tracker.index self.snapshots = tracker.snapshots self.history = tracker.history self.sorted = [] # type: List[TrackedObject] if filename: self.load_stats(filename) def load_stats(self, fdump: Union[str, IO[bytes]]) -> None: """ Load the data from a dump file. The argument `fdump` can be either a filename or an open file object that requires read access. """ if isinstance(fdump, str): fdump = open(fdump, 'rb') self.index = pickle.load(fdump) self.snapshots = pickle.load(fdump) self.sorted = [] def dump_stats(self, fdump: Union[str, IO[bytes]], close: bool = True ) -> None: """ Dump the logged data to a file. The argument `file` can be either a filename or an open file object that requires write access. `close` controls if the file is closed before leaving this method (the default behaviour). """ if self.tracker: self.tracker.stop_periodic_snapshots() if isinstance(fdump, str): fdump = open(fdump, 'wb') pickle.dump(self.index, fdump, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(self.snapshots, fdump, protocol=pickle.HIGHEST_PROTOCOL) if close: fdump.close() def _init_sort(self) -> None: """ Prepare the data to be sorted. If not yet sorted, import all tracked objects from the tracked index. Extend the tracking information by implicit information to make sorting easier (DSU pattern). """ if not self.sorted: # Identify the snapshot that tracked the largest amount of memory. tmax = None maxsize = 0 for snapshot in self.snapshots: if snapshot.tracked_total > maxsize: tmax = snapshot.timestamp for key in list(self.index.keys()): for tobj in self.index[key]: tobj.classname = key # type: ignore tobj.size = tobj.get_max_size() # type: ignore tobj.tsize = tobj.get_size_at_time(tmax) # type: ignore self.sorted.extend(self.index[key]) def sort_stats(self, *args: str) -> 'Stats': """ Sort the tracked objects according to the supplied criteria. The argument is a string identifying the basis of a sort (example: 'size' or 'classname'). When more than one key is provided, then additional keys are used as secondary criteria when there is equality in all keys selected before them. For example, ``sort_stats('name', 'size')`` will sort all the entries according to their class name, and resolve all ties (identical class names) by sorting by size. The criteria are fields in the tracked object instances. Results are stored in the ``self.sorted`` list which is used by ``Stats.print_stats()`` and other methods. The fields available for sorting are: 'classname' the name with which the class was registered 'name' the classname 'birth' creation timestamp 'death' destruction timestamp 'size' the maximum measured size of the object 'tsize' the measured size during the largest snapshot 'repr' string representation of the object Note that sorts on size are in descending order (placing most memory consuming items first), whereas name, repr, and creation time searches are in ascending order (alphabetical). The function returns self to allow calling functions on the result:: stats.sort_stats('size').reverse_order().print_stats() """ criteria = ('classname', 'tsize', 'birth', 'death', 'name', 'repr', 'size') if not set(criteria).issuperset(set(args)): raise ValueError("Invalid sort criteria") if not args: args = criteria def args_to_tuple(obj: 'TrackedObject') -> Tuple[str, ...]: keys: List[str] = [] for attr in args: attribute = getattr(obj, attr, '') if attr in ('tsize', 'size'): attribute = -int(attribute) keys.append(attribute) return tuple(keys) self._init_sort() self.sorted.sort(key=args_to_tuple) return self def reverse_order(self) -> 'Stats': """ Reverse the order of the tracked instance index `self.sorted`. """ self._init_sort() self.sorted.reverse() return self def annotate(self) -> None: """ Annotate all snapshots with class-based summaries. """ for snapshot in self.snapshots: self.annotate_snapshot(snapshot) def annotate_snapshot(self, snapshot: 'Snapshot' ) -> Dict[str, Dict[str, Any]]: """ Store additional statistical data in snapshot. """ if snapshot.classes is not None: return snapshot.classes snapshot.classes = {} for classname in list(self.index.keys()): total = 0 active = 0 merged = Asized(0, 0) for tobj in self.index[classname]: _merge_objects(snapshot.timestamp, merged, tobj) total += tobj.get_size_at_time(snapshot.timestamp) if (tobj.birth < snapshot.timestamp and (tobj.death is None or tobj.death > snapshot.timestamp)): active += 1 try: pct = total * 100.0 / snapshot.total except ZeroDivisionError: # pragma: no cover pct = 0 try: avg = total / active except ZeroDivisionError: avg = 0 snapshot.classes[classname] = dict(sum=total, avg=avg, pct=pct, active=active) snapshot.classes[classname]['merged'] = merged return snapshot.classes @property def tracked_classes(self) -> List[str]: """Return a list of all tracked classes occurring in any snapshot.""" return sorted(list(self.index.keys())) class ConsoleStats(Stats): """ Presentation layer for `Stats` to be used in text-based consoles. """ def _print_refs(self, refs: Iterable[Asized], total: int, prefix: str = ' ', level: int = 1, minsize: int = 0, minpct: float = 0.1) -> None: """ Print individual referents recursively. """ lrefs = list(refs) lrefs.sort(key=lambda x: x.size) lrefs.reverse() for ref in lrefs: if ref.size > minsize and (ref.size * 100.0 / total) > minpct: self.stream.write('%-50s %-14s %3d%% [%d]\n' % ( trunc(prefix + str(ref.name), 50), pp(ref.size), int(ref.size * 100.0 / total), level )) self._print_refs(ref.refs, total, prefix=prefix + ' ', level=level + 1) def print_object(self, tobj: 'TrackedObject') -> None: """ Print the gathered information of object `tobj` in human-readable format. """ if tobj.death: self.stream.write('%-32s ( free ) %-35s\n' % ( trunc(tobj.name, 32, left=True), trunc(tobj.repr, 35))) else: self.stream.write('%-32s 0x%08x %-35s\n' % ( trunc(tobj.name, 32, left=True), tobj.id, trunc(tobj.repr, 35) )) if tobj.trace: self.stream.write(_format_trace(tobj.trace)) for (timestamp, size) in tobj.snapshots: self.stream.write(' %-30s %s\n' % ( pp_timestamp(timestamp), pp(size.size) )) self._print_refs(size.refs, size.size) if tobj.death is not None: self.stream.write(' %-30s finalize\n' % ( pp_timestamp(tobj.death), )) def print_stats(self, clsname: Optional[str] = None, limit: float = 1.0 ) -> None: """ Write tracked objects to stdout. The output can be filtered and pruned. Only objects are printed whose classname contain the substring supplied by the `clsname` argument. The output can be pruned by passing a `limit` value. :param clsname: Only print objects whose classname contain the given substring. :param limit: If `limit` is a float smaller than one, only the supplied percentage of the total tracked data is printed. If `limit` is bigger than one, this number of tracked objects are printed. Tracked objects are first filtered, and then pruned (if specified). """ if self.tracker: self.tracker.stop_periodic_snapshots() if not self.sorted: self.sort_stats() _sorted = self.sorted if clsname: _sorted = [ to for to in _sorted if clsname in to.classname # type: ignore ] if limit < 1.0: limit = max(1, int(len(self.sorted) * limit)) _sorted = _sorted[:int(limit)] # Emit per-instance data for tobj in _sorted: self.print_object(tobj) def print_summary(self) -> None: """ Print per-class summary for each snapshot. """ # Emit class summaries for each snapshot classlist = self.tracked_classes fobj = self.stream fobj.write('---- SUMMARY ' + '-' * 66 + '\n') for snapshot in self.snapshots: classes = self.annotate_snapshot(snapshot) fobj.write('%-35s %11s %12s %12s %5s\n' % ( trunc(snapshot.desc, 35), 'active', pp(snapshot.asizeof_total), 'average', 'pct' )) for classname in classlist: info = classes[classname] fobj.write(' %-33s %11d %12s %12s %4d%%\n' % ( trunc(classname, 33), info['active'], pp(info['sum']), pp(info['avg']), info['pct'] )) fobj.write('-' * 79 + '\n') class HtmlStats(Stats): """ Output the `ClassTracker` statistics as HTML pages and graphs. """ style = """ """ nopylab_msg = """
%(cnt)d instances of %(cls)s were registered. The average size is %(avg)s, the minimal size is %(min)s, the maximum size is %(max)s.
\n""" class_snapshot = '''No per-referent sizes recorded.
\n') fobj.write("Instance | ' + '%s at 0x%08x |
Representation | " + "%s |
Lifetime | %s - %s |
Instantiation | %s |
%s | " % pp_timestamp(timestamp)) if not size.refs: fobj.write("%s | %s" % pp(size.size)) self._print_refs(fobj, size.refs, size.size) fobj.write(" | \n") fobj.write("
Total virtual memory assigned to the program at that time was %(sys)s, which includes %(overhead)s profiling overhead. The ClassTracker tracked %(tracked)s in total. The measurable objects including code objects but excluding overhead have a total size of %(asizeof)s.
\n""" def relative_path(self, filepath: str, basepath: Optional[str] = None ) -> str: """ Convert the filepath path to a relative path against basepath. By default basepath is self.basedir. """ if basepath is None: basepath = self.basedir if not basepath: return filepath if filepath.startswith(basepath): filepath = filepath[len(basepath):] if filepath and filepath[0] == os.sep: filepath = filepath[1:] return filepath def create_title_page(self, filename: str, title: str = '') -> None: """ Output the title page. """ fobj = open(filename, "w") fobj.write(self.header % (title, self.style)) fobj.write("\n')
fobj.write(' | \n') if snapshot.tracked_total: fobj.write(self.charts[snapshot]) fobj.write(' |