mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-04-22 02:23:48 +00:00
322 lines
10 KiB
Python
322 lines
10 KiB
Python
"""A collection of functions to summarize object information.
|
|
|
|
This module provides several function which will help you to analyze object
|
|
information which was gathered. Often it is sufficient to work with aggregated
|
|
data instead of handling the entire set of existing objects. For example can a
|
|
memory leak identified simple based on the number and size of existing objects.
|
|
|
|
A summary contains information about objects in a table-like manner.
|
|
Technically, it is a list of lists. Each of these lists represents a row,
|
|
whereas the first column reflects the object type, the second column the number
|
|
of objects, and the third column the size of all these objects. This allows a
|
|
simple table-like output like the following:
|
|
|
|
============= ============ =============
|
|
types # objects total size
|
|
============= ============ =============
|
|
<type 'dict'> 2 560
|
|
<type 'str'> 3 126
|
|
<type 'int'> 4 96
|
|
<type 'long'> 2 66
|
|
<type 'list'> 1 40
|
|
============= ============ =============
|
|
|
|
Another advantage of summaries is that they influence the system you analyze
|
|
only to a minimum. Working with references to existing objects will keep these
|
|
objects alive. Most of the times this is no desired behavior (as it will have
|
|
an impact on the observations). Using summaries reduces this effect greatly.
|
|
|
|
output representation
|
|
---------------------
|
|
|
|
The output representation of types is defined in summary.representations.
|
|
Every type defined in this dictionary will be represented as specified. Each
|
|
definition has a list of different representations. The later a representation
|
|
appears in this list, the higher its verbosity level. From types which are not
|
|
defined in summary.representations the default str() representation will be
|
|
used.
|
|
|
|
Per default, summaries will use the verbosity level 1 for any encountered type.
|
|
The reason is that several computations are done with summaries and rows have
|
|
to remain comparable. Therefore information which reflect an objects state,
|
|
e.g. the current line number of a frame, should not be included. You may add
|
|
more detailed information at higher verbosity levels than 1.
|
|
"""
|
|
|
|
import re
|
|
import sys
|
|
import types
|
|
|
|
from pympler.util import stringutils
|
|
from sys import getsizeof
|
|
|
|
representations = {}
|
|
|
|
|
|
def _init_representations():
|
|
global representations
|
|
if sys.hexversion < 0x2040000:
|
|
classobj = [
|
|
lambda c: "classobj(%s)" % repr(c),
|
|
]
|
|
representations[types.ClassType] = classobj
|
|
instance = [
|
|
lambda f: "instance(%s)" % repr(f.__class__),
|
|
]
|
|
representations[types.InstanceType] = instance
|
|
instancemethod = [
|
|
lambda i: "instancemethod (%s)" % (repr(i.im_func)),
|
|
lambda i: "instancemethod (%s, %s)" % (repr(i.im_class),
|
|
repr(i.im_func)),
|
|
]
|
|
representations[types.MethodType] = instancemethod
|
|
frame = [
|
|
lambda f: "frame (codename: %s)" % (f.f_code.co_name),
|
|
lambda f: "frame (codename: %s, codeline: %s)" %
|
|
(f.f_code.co_name, f.f_code.co_firstlineno),
|
|
lambda f: "frame (codename: %s, filename: %s, codeline: %s)" %
|
|
(f.f_code.co_name, f.f_code.co_filename,
|
|
f.f_code.co_firstlineno)
|
|
]
|
|
representations[types.FrameType] = frame
|
|
_dict = [
|
|
lambda d: str(type(d)),
|
|
lambda d: "dict, len=%s" % len(d),
|
|
]
|
|
representations[dict] = _dict
|
|
function = [
|
|
lambda f: "function (%s)" % f.__name__,
|
|
lambda f: "function (%s.%s)" % (f.__module__, f.__name__),
|
|
]
|
|
representations[types.FunctionType] = function
|
|
_list = [
|
|
lambda l: str(type(l)),
|
|
lambda l: "list, len=%s" % len(l)
|
|
]
|
|
representations[list] = _list
|
|
module = [lambda m: "module(%s)" % getattr(
|
|
m, '__name__', getattr(m, '__file__', 'nameless, id: %d' % id(m))
|
|
)]
|
|
representations[types.ModuleType] = module
|
|
_set = [
|
|
lambda s: str(type(s)),
|
|
lambda s: "set, len=%s" % len(s)
|
|
]
|
|
representations[set] = _set
|
|
|
|
|
|
_init_representations()
|
|
|
|
|
|
def summarize(objects):
|
|
"""Summarize an objects list.
|
|
|
|
Return a list of lists, whereas each row consists of::
|
|
[str(type), number of objects of this type, total size of these objects].
|
|
|
|
No guarantee regarding the order is given.
|
|
|
|
"""
|
|
count = {}
|
|
total_size = {}
|
|
for o in objects:
|
|
otype = _repr(o)
|
|
if otype in count:
|
|
count[otype] += 1
|
|
total_size[otype] += getsizeof(o)
|
|
else:
|
|
count[otype] = 1
|
|
total_size[otype] = getsizeof(o)
|
|
rows = []
|
|
for otype in count:
|
|
rows.append([otype, count[otype], total_size[otype]])
|
|
return rows
|
|
|
|
|
|
def get_diff(left, right):
|
|
"""Get the difference of two summaries.
|
|
|
|
Subtracts the values of the right summary from the values of the left
|
|
summary.
|
|
If similar rows appear on both sides, the are included in the summary with
|
|
0 for number of elements and total size.
|
|
If the number of elements of a row of the diff is 0, but the total size is
|
|
not, it means that objects likely have changed, but not there number, thus
|
|
resulting in a changed size.
|
|
|
|
"""
|
|
res = []
|
|
|
|
right_by_key = dict((r[0], r) for r in right)
|
|
left_by_key = dict((r[0], r) for r in left)
|
|
|
|
keys = set(right_by_key)
|
|
keys.update(left_by_key)
|
|
|
|
for key in keys:
|
|
r = right_by_key.get(key)
|
|
l = left_by_key.get(key)
|
|
if l and r:
|
|
res.append([key, r[1] - l[1], r[2] - l[2]])
|
|
elif r:
|
|
res.append(r)
|
|
elif l:
|
|
res.append([key, -l[1], -l[2]])
|
|
else:
|
|
continue # shouldn't happen
|
|
return res
|
|
|
|
|
|
def format_(rows, limit=15, sort='size', order='descending'):
|
|
"""Format the rows as a summary.
|
|
|
|
Keyword arguments:
|
|
limit -- the maximum number of elements to be listed
|
|
sort -- sort elements by 'size', 'type', or '#'
|
|
order -- sort 'ascending' or 'descending'
|
|
"""
|
|
localrows = []
|
|
for row in rows:
|
|
localrows.append(list(row))
|
|
# input validation
|
|
sortby = ['type', '#', 'size']
|
|
if sort not in sortby:
|
|
raise ValueError("invalid sort, should be one of" + str(sortby))
|
|
orders = ['ascending', 'descending']
|
|
if order not in orders:
|
|
raise ValueError("invalid order, should be one of" + str(orders))
|
|
# sort rows
|
|
if sortby.index(sort) == 0:
|
|
if order == "ascending":
|
|
localrows.sort(key=lambda x: _repr(x[0]))
|
|
elif order == "descending":
|
|
localrows.sort(key=lambda x: _repr(x[0]), reverse=True)
|
|
else:
|
|
if order == "ascending":
|
|
localrows.sort(key=lambda x: x[sortby.index(sort)])
|
|
elif order == "descending":
|
|
localrows.sort(key=lambda x: x[sortby.index(sort)], reverse=True)
|
|
# limit rows
|
|
localrows = localrows[0:limit]
|
|
for row in localrows:
|
|
row[2] = stringutils.pp(row[2])
|
|
# print rows
|
|
localrows.insert(0, ["types", "# objects", "total size"])
|
|
return _format_table(localrows)
|
|
|
|
|
|
def _format_table(rows, header=True):
|
|
"""Format a list of lists as a pretty table.
|
|
|
|
Keyword arguments:
|
|
header -- if True the first row is treated as a table header
|
|
|
|
inspired by http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/267662
|
|
"""
|
|
border = "="
|
|
# vertical delimiter
|
|
vdelim = " | "
|
|
# padding nr. of spaces are left around the longest element in the
|
|
# column
|
|
padding = 1
|
|
# may be left,center,right
|
|
justify = 'right'
|
|
justify = {'left': str.ljust,
|
|
'center': str.center,
|
|
'right': str.rjust}[justify.lower()]
|
|
# calculate column widths (longest item in each col
|
|
# plus "padding" nr of spaces on both sides)
|
|
cols = zip(*rows)
|
|
colWidths = [max([len(str(item)) + 2 * padding for item in col])
|
|
for col in cols]
|
|
borderline = vdelim.join([w * border for w in colWidths])
|
|
for row in rows:
|
|
yield vdelim.join([justify(str(item), width)
|
|
for (item, width) in zip(row, colWidths)])
|
|
if header:
|
|
yield borderline
|
|
header = False
|
|
|
|
|
|
def print_(rows, limit=15, sort='size', order='descending'):
|
|
"""Print the rows as a summary.
|
|
|
|
Keyword arguments:
|
|
limit -- the maximum number of elements to be listed
|
|
sort -- sort elements by 'size', 'type', or '#'
|
|
order -- sort 'ascending' or 'descending'
|
|
|
|
"""
|
|
for line in format_(rows, limit=limit, sort=sort, order=order):
|
|
print(line)
|
|
|
|
|
|
# regular expressions used by _repr to replace default type representations
|
|
type_repr = re.compile(r"^<(type|class) '(\S+)'>$")
|
|
address = re.compile(r' at 0x[0-9a-f]+')
|
|
|
|
|
|
def _repr(o, verbosity=1):
|
|
"""Get meaning object representation.
|
|
|
|
This function should be used when the simple str(o) output would result in
|
|
too general data. E.g. "<type 'instance'" is less meaningful than
|
|
"instance: Foo".
|
|
|
|
Keyword arguments:
|
|
verbosity -- if True the first row is treated as a table header
|
|
|
|
"""
|
|
res = ""
|
|
|
|
t = type(o)
|
|
if (verbosity == 0) or (t not in representations):
|
|
res = str(t)
|
|
else:
|
|
verbosity -= 1
|
|
if len(representations[t]) <= verbosity:
|
|
verbosity = len(representations[t]) - 1
|
|
res = representations[t][verbosity](o)
|
|
|
|
res = address.sub('', res)
|
|
res = type_repr.sub(r'\2', res)
|
|
|
|
return res
|
|
|
|
|
|
def _traverse(summary, function, *args):
|
|
"""Traverse all objects of a summary and call function with each as a
|
|
parameter.
|
|
|
|
Using this function, the following objects will be traversed:
|
|
- the summary
|
|
- each row
|
|
- each item of a row
|
|
"""
|
|
function(summary, *args)
|
|
for row in summary:
|
|
function(row, *args)
|
|
for item in row:
|
|
function(item, *args)
|
|
|
|
|
|
def _subtract(summary, o):
|
|
"""Remove object o from the summary by subtracting it's size."""
|
|
found = False
|
|
row = [_repr(o), 1, getsizeof(o)]
|
|
for r in summary:
|
|
if r[0] == row[0]:
|
|
(r[1], r[2]) = (r[1] - row[1], r[2] - row[2])
|
|
found = True
|
|
if not found:
|
|
summary.append([row[0], -row[1], -row[2]])
|
|
return summary
|
|
|
|
|
|
def _sweep(summary):
|
|
"""Remove all rows in which the total size and the total number of
|
|
objects is zero.
|
|
|
|
"""
|
|
return [row for row in summary if ((row[2] != 0) or (row[1] != 0))]
|