from typing import Any, Callable, Dict, List, Optional, Set, Tuple import gc from pympler import summary from pympler.util import compat from inspect import isframe, stack from sys import getsizeof from pympler.asizeof import _Py_TPFLAGS_HAVE_GC def ignore_object(obj: Any) -> bool: try: return isframe(obj) except ReferenceError: return True def get_objects(remove_dups: bool = True, include_frames: bool = False ) -> List[Any]: """Return a list of all known objects excluding frame objects. If (outer) frame objects shall be included, pass `include_frames=True`. In order to prevent building reference cycles, the current frame object (of the caller of get_objects) is ignored. This will not prevent creating reference cycles if the object list is passed up the call-stack. Therefore, frame objects are not included by default. Keyword arguments: remove_dups -- if True, all duplicate objects will be removed. include_frames -- if True, includes frame objects. """ gc.collect() # Do not initialize local variables before calling gc.get_objects or those # will be included in the list. Furthermore, ignore frame objects to # prevent reference cycles. tmp = gc.get_objects() tmp = [o for o in tmp if not ignore_object(o)] res = [] for o in tmp: # gc.get_objects returns only container objects, but we also want # the objects referenced by them refs = get_referents(o) for ref in refs: if not gc.is_tracked(ref): # we already got the container objects, now we only add # non-container objects res.append(ref) res.extend(tmp) if remove_dups: res = _remove_duplicates(res) if include_frames: for sf in stack()[2:]: res.append(sf[0]) return res def get_size(objects: List[Any]) -> int: """Compute the total size of all elements in objects.""" res = 0 for o in objects: try: res += getsizeof(o) except AttributeError: print("IGNORING: type=%s; o=%s" % (str(type(o)), str(o))) return res def get_diff(left: List[Any], right: List[Any]) -> Dict[str, List[Any]]: """Get the difference of both lists. The result will be a dict with this form {'+': [], '-': []}. Items listed in '+' exist only in the right list, items listed in '-' exist only in the left list. """ res = {'+': [], '-': []} # type: Dict[str, List[Any]] def partition(objects: List[Any]) -> Dict[type, List[Any]]: """Partition the passed object list.""" res = {} # type: Dict[type, List[Any]] for o in objects: t = type(o) if type(o) not in res: res[t] = [] res[t].append(o) return res def get_not_included(foo: List[Any], bar: Dict[type, List[Any]] ) -> List[Any]: """Compare objects from foo with objects defined in the values of bar (set of partitions). Returns a list of all objects included in list, but not dict values. """ res = [] # type: List[Any] for o in foo: if not compat.object_in_list(type(o), bar): res.append(o) elif not compat.object_in_list(o, bar[type(o)]): res.append(o) return res # Create partitions of both lists. This will reduce the time required for # the comparison left_objects = partition(left) right_objects = partition(right) # and then do the diff res['+'] = get_not_included(right, left_objects) res['-'] = get_not_included(left, right_objects) return res def sort(objects: List[Any]) -> List[Any]: """Sort objects by size in bytes.""" objects = sorted(objects, key=getsizeof) return objects def filter(objects: List[Any], Type: Optional[type] = None, min: int = -1, max: int = -1) -> List[Any]: """Filter objects. The filter can be by type, minimum size, and/or maximum size. Keyword arguments: Type -- object type to filter by min -- minimum object size max -- maximum object size """ res = [] # type: List[Any] if min > max and max > -1: raise ValueError("minimum must be smaller than maximum") if Type is not None: objects = [o for o in objects if isinstance(o, Type)] if min > -1: objects = [o for o in objects if getsizeof(o) > min] if max > -1: objects = [o for o in objects if getsizeof(o) < max] return objects def get_referents(object: Any, level: int = 1) -> List[Any]: """Get all referents of an object up to a certain level. The referents will not be returned in a specific order and will not contain duplicate objects. Duplicate objects will be removed. Keyword arguments: level -- level of indirection to which referents considered. This function is recursive. """ res = gc.get_referents(object) level -= 1 if level > 0: for o in res: res.extend(get_referents(o, level)) res = _remove_duplicates(res) return res def _get_usage(function: Callable, *args: Any) -> Optional[List]: """Test if more memory is used after the function has been called. The function will be invoked twice and only the second measurement will be considered. Thus, memory used in initialisation (e.g. loading modules) will not be included in the result. The goal is to identify memory leaks caused by functions which use more and more memory. Any arguments next to the function will be passed on to the function on invocation. Note that this function is currently experimental, because it is not tested thoroughly and performs poorly. """ # The usage of a function is calculated by creating one summary of all # objects before the function is invoked and afterwards. These summaries # are compared and the diff is returned. # This function works in a 2-steps process. Before the actual function is # invoked an empty dummy function is measurement to identify the overhead # involved in the measuring process. This overhead then is subtracted from # the measurement performed on the passed function. The result reflects the # actual usage of a function call. # Also, a measurement is performed twice, allowing the adjustment to # initializing things, e.g. modules res = None def _get_summaries(function: Callable, *args: Any) -> Tuple: """Get a 2-tuple containing one summary from before, and one summary from after the function has been invoked. """ s_before = summary.summarize(get_objects()) function(*args) s_after = summary.summarize(get_objects()) return (s_before, s_after) def _get_usage(function: Callable, *args: Any) -> List: """Get the usage of a function call. This function is to be used only internally. The 'real' get_usage function is a wrapper around _get_usage, but the workload is done here. """ # init before calling (s_before, s_after) = _get_summaries(function, *args) # ignore all objects used for the measurement ignore = [] if s_before != s_after: ignore.append(s_before) for row in s_before: # ignore refs from summary and frame (loop) if len(gc.get_referrers(row)) == 2: ignore.append(row) for item in row: # ignore refs from summary and frame (loop) if len(gc.get_referrers(item)) == 2: ignore.append(item) for o in ignore: s_after = summary._subtract(s_after, o) res = summary.get_diff(s_before, s_after) return summary._sweep(res) # calibrate; twice for initialization def noop() -> None: pass offset = _get_usage(noop) offset = _get_usage(noop) # perform operation twice to handle objects possibly used in # initialisation tmp = _get_usage(function, *args) tmp = _get_usage(function, *args) tmp = summary.get_diff(offset, tmp) tmp = summary._sweep(tmp) if len(tmp) != 0: res = tmp return res def _is_containerobject(o: Any) -> bool: """Is the passed object a container object.""" return bool(getattr(type(o), '__flags__', 0) & _Py_TPFLAGS_HAVE_GC) def _remove_duplicates(objects: List[Any]) -> List[Any]: """Remove duplicate objects. Inspired by http://www.peterbe.com/plog/uniqifiers-benchmark """ seen = set() # type: Set[int] result = [] for item in objects: marker = id(item) if marker in seen: continue seen.add(marker) result.append(item) return result def print_summary() -> None: """Print a summary of all known objects.""" summary.print_(summary.summarize(get_objects()))