mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-04-22 02:23:48 +00:00
591 lines
22 KiB
Python
591 lines
22 KiB
Python
"""
|
|
The `ClassTracker` is a facility delivering insight into the memory
|
|
distribution of a Python program. It can introspect memory consumption of
|
|
certain classes and objects. Facilities are provided to track and size
|
|
individual objects or all instances of certain classes. Tracked objects are
|
|
sized recursively to provide an overview of memory distribution between the
|
|
different tracked objects.
|
|
"""
|
|
|
|
from typing import Any, Callable, Dict, IO, List, Optional, Tuple
|
|
|
|
from collections import defaultdict
|
|
from functools import partial
|
|
from inspect import stack, isclass
|
|
from threading import Thread, Lock
|
|
from time import sleep, time
|
|
from weakref import ref as weakref_ref
|
|
|
|
from pympler.classtracker_stats import ConsoleStats
|
|
from pympler.util.stringutils import safe_repr
|
|
|
|
import pympler.asizeof as asizeof
|
|
import pympler.process
|
|
|
|
|
|
__all__ = ["ClassTracker"]
|
|
|
|
# Fixpoint for program start relative time stamp.
|
|
_local_start = time()
|
|
|
|
|
|
class _ClassObserver(object):
|
|
"""
|
|
Stores options for tracked classes.
|
|
The observer also keeps the original constructor of the observed class.
|
|
"""
|
|
__slots__ = ('init', 'name', 'detail', 'keep', 'trace')
|
|
|
|
def __init__(self, init: Callable, name: str, detail: int, keep: bool,
|
|
trace: bool):
|
|
self.init = init
|
|
self.name = name
|
|
self.detail = detail
|
|
self.keep = keep
|
|
self.trace = trace
|
|
|
|
def modify(self, name: str, detail: int, keep: bool, trace: bool) -> None:
|
|
self.name = name
|
|
self.detail = detail
|
|
self.keep = keep
|
|
self.trace = trace
|
|
|
|
|
|
def _get_time() -> float:
|
|
"""
|
|
Get a timestamp relative to the program start time.
|
|
"""
|
|
return time() - _local_start
|
|
|
|
|
|
class TrackedObject(object):
|
|
"""
|
|
Stores size and lifetime information of a tracked object. A weak reference
|
|
is attached to monitor the object without preventing its deletion.
|
|
"""
|
|
__slots__ = ("ref", "id", "repr", "name", "birth", "death", "trace",
|
|
"snapshots", "_resolution_level", "__dict__")
|
|
|
|
def __init__(self, instance: Any, name: str, resolution_level: int = 0,
|
|
trace: bool = False, on_delete: Optional[Callable] = None):
|
|
"""
|
|
Create a weak reference for 'instance' to observe an object but which
|
|
won't prevent its deletion (which is monitored by the finalize
|
|
callback). The size of the object is recorded in 'snapshots' as
|
|
(timestamp, size) tuples.
|
|
"""
|
|
self.ref = weakref_ref(instance, self.finalize)
|
|
self.id = id(instance)
|
|
self.repr = ''
|
|
self.name = name
|
|
self.birth = _get_time()
|
|
self.death = None # type: Optional[float]
|
|
self._resolution_level = resolution_level
|
|
self.trace = None # type: Optional[List[Tuple]]
|
|
|
|
if trace:
|
|
self._save_trace()
|
|
|
|
initial_size = asizeof.basicsize(instance) or 0
|
|
size = asizeof.Asized(initial_size, initial_size)
|
|
self.snapshots = [(self.birth, size)]
|
|
self.on_delete = on_delete
|
|
|
|
def __getstate__(self) -> Dict:
|
|
"""
|
|
Make the object serializable for dump_stats. Read the available slots
|
|
and store the values in a dictionary. Derived values (stored in the
|
|
dict) are not pickled as those can be reconstructed based on the other
|
|
data. References cannot be serialized, ignore 'ref' as well.
|
|
"""
|
|
state = {}
|
|
for name in getattr(TrackedObject, '__slots__', ()):
|
|
if hasattr(self, name) and name not in ['ref', '__dict__']:
|
|
state[name] = getattr(self, name)
|
|
return state
|
|
|
|
def __setstate__(self, state: Dict) -> None:
|
|
"""
|
|
Restore the state from pickled data. Needed because a slotted class is
|
|
used.
|
|
"""
|
|
for key, value in list(state.items()):
|
|
setattr(self, key, value)
|
|
|
|
def _save_trace(self) -> None:
|
|
"""
|
|
Save current stack trace as formatted string.
|
|
"""
|
|
stack_trace = stack()
|
|
try:
|
|
self.trace = []
|
|
for frm in stack_trace[5:]: # eliminate our own overhead
|
|
self.trace.insert(0, frm[1:])
|
|
finally:
|
|
del stack_trace
|
|
|
|
def track_size(self, ts: float, sizer: asizeof.Asizer) -> None:
|
|
"""
|
|
Store timestamp and current size for later evaluation.
|
|
The 'sizer' is a stateful sizing facility that excludes other tracked
|
|
objects.
|
|
"""
|
|
obj = self.ref()
|
|
self.snapshots.append(
|
|
(ts, sizer.asized(obj, detail=self._resolution_level))
|
|
)
|
|
if obj is not None:
|
|
self.repr = safe_repr(obj, clip=128)
|
|
|
|
def get_max_size(self) -> int:
|
|
"""
|
|
Get the maximum of all sampled sizes.
|
|
"""
|
|
return max([s.size for (_, s) in self.snapshots])
|
|
|
|
def get_size_at_time(self, timestamp: float) -> int:
|
|
"""
|
|
Get the size of the object at a specific time (snapshot).
|
|
If the object was not alive/sized at that instant, return 0.
|
|
"""
|
|
size = 0
|
|
for (t, s) in self.snapshots:
|
|
if t == timestamp:
|
|
size = s.size
|
|
return size
|
|
|
|
def set_resolution_level(self, resolution_level: int) -> None:
|
|
"""
|
|
Set resolution level to a new value. The next size estimation will
|
|
respect the new value. This is useful to set different levels for
|
|
different instances of tracked classes.
|
|
"""
|
|
self._resolution_level = resolution_level
|
|
|
|
def finalize(self, ref: weakref_ref) -> None:
|
|
"""
|
|
Mark the reference as dead and remember the timestamp. It would be
|
|
great if we could measure the pre-destruction size. Unfortunately, the
|
|
object is gone by the time the weakref callback is called. However,
|
|
weakref callbacks are useful to be informed when tracked objects died
|
|
without the need of destructors.
|
|
|
|
If the object is destroyed at the end of the program execution, it's
|
|
not possible to import modules anymore. Hence, the finalize callback
|
|
just does nothing (self.death stays None).
|
|
"""
|
|
try:
|
|
self.death = _get_time()
|
|
if self.on_delete:
|
|
self.on_delete()
|
|
except Exception: # pragma: no cover
|
|
pass
|
|
|
|
|
|
def track_object_creation(time_series: List[Tuple[float, int]]) -> None:
|
|
num_instances = time_series[-1][1] if time_series else 0
|
|
time_series.append((_get_time(), num_instances+1))
|
|
|
|
|
|
def track_object_deletion(time_series: List[Tuple[float, int]]) -> None:
|
|
num_instances = time_series[-1][1]
|
|
time_series.append((_get_time(), num_instances-1))
|
|
|
|
|
|
class PeriodicThread(Thread):
|
|
"""
|
|
Thread object to take snapshots periodically.
|
|
"""
|
|
|
|
def __init__(self, tracker: 'ClassTracker', interval: float, *args: Any,
|
|
**kwargs: Any):
|
|
"""
|
|
Create thread with given interval and associated with the given
|
|
tracker.
|
|
"""
|
|
self.interval = interval
|
|
self.tracker = tracker
|
|
self.stop = False
|
|
super(PeriodicThread, self).__init__(*args, **kwargs)
|
|
|
|
def run(self) -> None:
|
|
"""
|
|
Loop until a stop signal is set.
|
|
"""
|
|
self.stop = False
|
|
while not self.stop:
|
|
self.tracker.create_snapshot()
|
|
sleep(self.interval)
|
|
|
|
|
|
class Snapshot(object):
|
|
"""Sample sizes of objects and the process at an instant."""
|
|
|
|
def __init__(self, timestamp: float, description: str = '') -> None:
|
|
"""Initialize process-wide size information."""
|
|
self.tracked_total = 0
|
|
self.asizeof_total = 0
|
|
self.overhead = 0
|
|
self.timestamp = timestamp
|
|
self.system_total = pympler.process.ProcessMemoryInfo()
|
|
self.desc = description
|
|
self.classes = None # type: Optional[Dict[str, Dict[str, Any]]]
|
|
|
|
@property
|
|
def total(self) -> int:
|
|
"""
|
|
Return the total (virtual) size of the process in bytes. If process
|
|
information is not available, get the best number available, even if it
|
|
is a poor approximation of reality.
|
|
"""
|
|
if self.system_total.available:
|
|
return self.system_total.vsz
|
|
elif self.asizeof_total: # pragma: no cover
|
|
return self.asizeof_total
|
|
else: # pragma: no cover
|
|
return self.tracked_total
|
|
|
|
@property
|
|
def label(self) -> str:
|
|
"""Return timestamped label for this snapshot, or a raw timestamp."""
|
|
if not self.desc:
|
|
return "%.3fs" % self.timestamp
|
|
return "%s (%.3fs)" % (self.desc, self.timestamp)
|
|
|
|
|
|
class ClassTracker(object):
|
|
|
|
def __init__(self, stream: Optional[IO] = None):
|
|
"""
|
|
Creates a new `ClassTracker` object.
|
|
|
|
:param stream: Output stream to use when printing statistics via
|
|
``stats``.
|
|
"""
|
|
# Dictionaries of TrackedObject objects associated with the actual
|
|
# objects that are tracked. 'index' uses the class name as the key and
|
|
# associates a list of tracked objects. It contains all TrackedObject
|
|
# instances, including those of dead objects.
|
|
self.index = defaultdict(list) # type: Dict[str, List[TrackedObject]]
|
|
|
|
# 'objects' uses the id (address) as the key and associates the tracked
|
|
# object with it. TrackedObject's referring to dead objects are
|
|
# replaced lazily, i.e. when the id is recycled by another tracked
|
|
# object.
|
|
self.objects = {} # type: Dict[int, Any]
|
|
|
|
# List of `Snapshot` objects.
|
|
self.snapshots = [] # type: List[Snapshot]
|
|
|
|
# Time series of instance count for each tracked class.
|
|
self.history = defaultdict(list) \
|
|
# type: Dict[str, List[Tuple[float, int]]]
|
|
|
|
# Keep objects alive by holding a strong reference.
|
|
self._keepalive = [] # type: List[Any]
|
|
|
|
# Dictionary of class observers identified by classname.
|
|
self._observers = {} # type: Dict[type, _ClassObserver]
|
|
|
|
# Thread object responsible for background monitoring
|
|
self._periodic_thread = None # type: Optional[PeriodicThread]
|
|
|
|
self._stream = stream
|
|
|
|
@property
|
|
def stats(self) -> ConsoleStats:
|
|
"""
|
|
Return a ``ConsoleStats`` instance initialized with the current state
|
|
of the class tracker.
|
|
"""
|
|
return ConsoleStats(tracker=self, stream=self._stream)
|
|
|
|
def _tracker(self, _observer_: _ClassObserver, _self_: Any, *args: Any,
|
|
**kwds: Any) -> None:
|
|
"""
|
|
Injected constructor for tracked classes.
|
|
Call the actual constructor of the object and track the object. Attach
|
|
to the object before calling the constructor to track the object with
|
|
the parameters of the most specialized class.
|
|
"""
|
|
self.track_object(_self_,
|
|
name=_observer_.name,
|
|
resolution_level=_observer_.detail,
|
|
keep=_observer_.keep,
|
|
trace=_observer_.trace)
|
|
_observer_.init(_self_, *args, **kwds)
|
|
|
|
def _inject_constructor(self, cls: type, func: Callable, name: str,
|
|
resolution_level: int, keep: bool, trace: bool,
|
|
) -> None:
|
|
"""
|
|
Modifying Methods in Place - after the recipe 15.7 in the Python
|
|
Cookbook by Ken Seehof. The original constructors may be restored
|
|
later.
|
|
"""
|
|
try:
|
|
constructor = cls.__init__ # type: ignore
|
|
except AttributeError:
|
|
def constructor(self: Any, *_args: Any, **_kwargs: Any) -> None:
|
|
pass
|
|
|
|
# Possible name clash between keyword arguments of the tracked class'
|
|
# constructor and the curried arguments of the injected constructor.
|
|
# Therefore, the additional argument has a 'magic' name to make it less
|
|
# likely that an argument name clash occurs.
|
|
observer = _ClassObserver(constructor,
|
|
name,
|
|
resolution_level,
|
|
keep,
|
|
trace)
|
|
self._observers[cls] = observer
|
|
|
|
def new_constructor(*args: Any, **kwargs: Any) -> None:
|
|
return func(observer, *args, **kwargs)
|
|
|
|
cls.__init__ = new_constructor # type: ignore
|
|
|
|
def _is_tracked(self, cls: type) -> bool:
|
|
"""
|
|
Determine if the class is tracked.
|
|
"""
|
|
return cls in self._observers
|
|
|
|
def _track_modify(self, cls: type, name: str, detail: int, keep: bool,
|
|
trace: bool) -> None:
|
|
"""
|
|
Modify settings of a tracked class
|
|
"""
|
|
self._observers[cls].modify(name, detail, keep, trace)
|
|
|
|
def _restore_constructor(self, cls: type) -> None:
|
|
"""
|
|
Restore the original constructor, lose track of class.
|
|
"""
|
|
cls.__init__ = self._observers[cls].init # type: ignore
|
|
del self._observers[cls]
|
|
|
|
def track_change(self, instance: Any, resolution_level: int = 0) -> None:
|
|
"""
|
|
Change tracking options for the already tracked object 'instance'.
|
|
If instance is not tracked, a KeyError will be raised.
|
|
"""
|
|
tobj = self.objects[id(instance)]
|
|
tobj.set_resolution_level(resolution_level)
|
|
|
|
def track_object(self, instance: Any, name: Optional[str] = None,
|
|
resolution_level: int = 0, keep: bool = False,
|
|
trace: bool = False) -> None:
|
|
"""
|
|
Track object 'instance' and sample size and lifetime information. Not
|
|
all objects can be tracked; trackable objects are class instances and
|
|
other objects that can be weakly referenced. When an object cannot be
|
|
tracked, a `TypeError` is raised.
|
|
|
|
:param resolution_level: The recursion depth up to which referents are
|
|
sized individually. Resolution level 0 (default) treats the object
|
|
as an opaque entity, 1 sizes all direct referents individually, 2
|
|
also sizes the referents of the referents and so forth.
|
|
:param keep: Prevent the object's deletion by keeping a (strong)
|
|
reference to the object.
|
|
"""
|
|
|
|
# Check if object is already tracked. This happens if track_object is
|
|
# called multiple times for the same object or if an object inherits
|
|
# from multiple tracked classes. In the latter case, the most
|
|
# specialized class wins. To detect id recycling, the weak reference
|
|
# is checked. If it is 'None' a tracked object is dead and another one
|
|
# takes the same 'id'.
|
|
if id(instance) in self.objects and \
|
|
self.objects[id(instance)].ref() is not None:
|
|
return
|
|
|
|
name = name if name else instance.__class__.__name__
|
|
|
|
track_object_creation(self.history[name])
|
|
on_delete = partial(track_object_deletion, self.history[name])
|
|
|
|
tobj = TrackedObject(instance,
|
|
name,
|
|
resolution_level=resolution_level,
|
|
trace=trace,
|
|
on_delete=on_delete)
|
|
|
|
self.index[name].append(tobj)
|
|
self.objects[id(instance)] = tobj
|
|
|
|
if keep:
|
|
self._keepalive.append(instance)
|
|
|
|
def track_class(self, cls: type, name: Optional[str] = None,
|
|
resolution_level: int = 0, keep: bool = False,
|
|
trace: bool = False) -> None:
|
|
"""
|
|
Track all objects of the class `cls`. Objects of that type that already
|
|
exist are *not* tracked. If `track_class` is called for a class already
|
|
tracked, the tracking parameters are modified. Instantiation traces can
|
|
be generated by setting `trace` to True.
|
|
A constructor is injected to begin instance tracking on creation
|
|
of the object. The constructor calls `track_object` internally.
|
|
|
|
:param cls: class to be tracked, may be an old-style or a new-style
|
|
class
|
|
:param name: reference the class by a name, default is the
|
|
concatenation of module and class name
|
|
:param resolution_level: The recursion depth up to which referents are
|
|
sized individually. Resolution level 0 (default) treats the object
|
|
as an opaque entity, 1 sizes all direct referents individually, 2
|
|
also sizes the referents of the referents and so forth.
|
|
:param keep: Prevent the object's deletion by keeping a (strong)
|
|
reference to the object.
|
|
:param trace: Save instantiation stack trace for each instance
|
|
"""
|
|
if not isclass(cls):
|
|
raise TypeError("only class objects can be tracked")
|
|
if name is None:
|
|
name = cls.__module__ + '.' + cls.__name__
|
|
if self._is_tracked(cls):
|
|
self._track_modify(cls, name, resolution_level, keep, trace)
|
|
else:
|
|
self._inject_constructor(cls, self._tracker, name,
|
|
resolution_level, keep, trace)
|
|
|
|
def detach_class(self, cls: type) -> None:
|
|
"""
|
|
Stop tracking class 'cls'. Any new objects of that type are not
|
|
tracked anymore. Existing objects are still tracked.
|
|
"""
|
|
self._restore_constructor(cls)
|
|
|
|
def detach_all_classes(self) -> None:
|
|
"""
|
|
Detach from all tracked classes.
|
|
"""
|
|
classes = list(self._observers.keys())
|
|
for cls in classes:
|
|
self.detach_class(cls)
|
|
|
|
def detach_all(self) -> None:
|
|
"""
|
|
Detach from all tracked classes and objects.
|
|
Restore the original constructors and cleanse the tracking lists.
|
|
"""
|
|
self.detach_all_classes()
|
|
self.objects.clear()
|
|
self.index.clear()
|
|
self._keepalive[:] = []
|
|
|
|
def clear(self) -> None:
|
|
"""
|
|
Clear all gathered data and detach from all tracked objects/classes.
|
|
"""
|
|
self.detach_all()
|
|
self.snapshots[:] = []
|
|
|
|
def close(self) -> None:
|
|
"""
|
|
Detach from tracked classes by removing injected constructors. Makes it
|
|
possible to use ClassTracker in `contextlib.closing` to safely remove
|
|
profiling hooks when the tracker goes out of scope::
|
|
|
|
import contextlib
|
|
with contextlib.closing(ClassTracker()) as tracker:
|
|
tracker.track_class(Foo)
|
|
|
|
"""
|
|
self.detach_all_classes()
|
|
|
|
#
|
|
# Background Monitoring
|
|
#
|
|
|
|
def start_periodic_snapshots(self, interval: float = 1.0) -> None:
|
|
"""
|
|
Start a thread which takes snapshots periodically. The `interval`
|
|
specifies the time in seconds the thread waits between taking
|
|
snapshots. The thread is started as a daemon allowing the program to
|
|
exit. If periodic snapshots are already active, the interval is
|
|
updated.
|
|
"""
|
|
if not self._periodic_thread:
|
|
self._periodic_thread = PeriodicThread(self, interval,
|
|
name='BackgroundMonitor')
|
|
self._periodic_thread.setDaemon(True)
|
|
self._periodic_thread.start()
|
|
else:
|
|
self._periodic_thread.interval = interval
|
|
|
|
def stop_periodic_snapshots(self) -> None:
|
|
"""
|
|
Post a stop signal to the thread that takes the periodic snapshots. The
|
|
function waits for the thread to terminate which can take some time
|
|
depending on the configured interval.
|
|
"""
|
|
if self._periodic_thread and self._periodic_thread.is_alive():
|
|
self._periodic_thread.stop = True
|
|
self._periodic_thread.join()
|
|
self._periodic_thread = None
|
|
|
|
#
|
|
# Snapshots
|
|
#
|
|
|
|
snapshot_lock = Lock()
|
|
|
|
def create_snapshot(self, description: str = '',
|
|
compute_total: bool = False) -> None:
|
|
"""
|
|
Collect current per instance statistics and saves total amount of
|
|
memory associated with the Python process.
|
|
|
|
If `compute_total` is `True`, the total consumption of all objects
|
|
known to *asizeof* is computed. The latter might be very slow if many
|
|
objects are mapped into memory at the time the snapshot is taken.
|
|
Therefore, `compute_total` is set to `False` by default.
|
|
|
|
The overhead of the `ClassTracker` structure is also computed.
|
|
|
|
Snapshots can be taken asynchronously. The function is protected with a
|
|
lock to prevent race conditions.
|
|
"""
|
|
|
|
try:
|
|
# TODO: It is not clear what happens when memory is allocated or
|
|
# released while this function is executed but it will likely lead
|
|
# to inconsistencies. Either pause all other threads or don't size
|
|
# individual objects in asynchronous mode.
|
|
self.snapshot_lock.acquire()
|
|
|
|
timestamp = _get_time()
|
|
|
|
sizer = asizeof.Asizer()
|
|
objs = [tobj.ref() for tobj in list(self.objects.values())]
|
|
sizer.exclude_refs(*objs)
|
|
|
|
# The objects need to be sized in a deterministic order. Sort the
|
|
# objects by its creation date which should at least work for
|
|
# non-parallel execution. The "proper" fix would be to handle
|
|
# shared data separately.
|
|
tracked_objects = list(self.objects.values())
|
|
tracked_objects.sort(key=lambda x: x.birth)
|
|
for tobj in tracked_objects:
|
|
tobj.track_size(timestamp, sizer)
|
|
|
|
snapshot = Snapshot(timestamp, str(description))
|
|
snapshot.tracked_total = sizer.total
|
|
if compute_total:
|
|
snapshot.asizeof_total = asizeof.asizeof(all=True, code=True)
|
|
|
|
# Compute overhead of all structures, use sizer to exclude tracked
|
|
# objects(!)
|
|
snapshot.overhead = 0
|
|
if snapshot.tracked_total:
|
|
snapshot.overhead = sizer.asizeof(self)
|
|
if snapshot.asizeof_total:
|
|
snapshot.asizeof_total -= snapshot.overhead
|
|
|
|
self.snapshots.append(snapshot)
|
|
|
|
finally:
|
|
self.snapshot_lock.release()
|