mirror of
				https://github.com/aykhans/AzSuicideDataVisualization.git
				synced 2025-10-31 10:09:59 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			268 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			268 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """The tracker module allows you to track changes in the memory usage over
 | |
| time.
 | |
| 
 | |
| Using the SummaryTracker, you can create summaries and compare them
 | |
| with each other. Stored summaries can be ignored during comparison,
 | |
| avoiding the observer effect.
 | |
| 
 | |
| The ObjectTracker allows to monitor object creation. You create objects from
 | |
| one time and compare with objects from an earlier time.
 | |
| 
 | |
| """
 | |
| import gc
 | |
| import inspect
 | |
| 
 | |
| from pympler import muppy, summary
 | |
| from pympler.util import compat
 | |
| 
 | |
| 
 | |
| class SummaryTracker(object):
 | |
|     """ Helper class to track changes between two summaries taken.
 | |
| 
 | |
|     Detailed information on single objects will be lost, e.g. object size or
 | |
|     object id. But often summaries are sufficient to monitor the memory usage
 | |
|     over the lifetime of an application.
 | |
| 
 | |
|     On initialisation, a first summary is taken. Every time `diff` is called,
 | |
|     a new summary will be created. Thus, a diff between the new and the last
 | |
|     summary can be extracted.
 | |
| 
 | |
|     Be aware that filtering out previous summaries is time-intensive. You
 | |
|     should therefore restrict yourself to the number of summaries you really
 | |
|     need.
 | |
| 
 | |
|     """
 | |
|     def __init__(self, ignore_self=True):
 | |
|         """Constructor.
 | |
| 
 | |
|         The number of summaries managed by the tracker has a performance
 | |
|         impact on new summaries, iff you decide to exclude them from further
 | |
|         summaries. Therefore it is suggested to use them economically.
 | |
| 
 | |
|         Keyword arguments:
 | |
|         ignore_self -- summaries managed by this object will be ignored.
 | |
|         """
 | |
|         self.s0 = summary.summarize(muppy.get_objects())
 | |
|         self.summaries = {}
 | |
|         self.ignore_self = ignore_self
 | |
| 
 | |
|     def create_summary(self):
 | |
|         """Return a summary.
 | |
| 
 | |
|         See also the notes on ignore_self in the class as well as the
 | |
|         initializer documentation.
 | |
| 
 | |
|         """
 | |
|         if not self.ignore_self:
 | |
|             res = summary.summarize(muppy.get_objects())
 | |
|         else:
 | |
|             # If the user requested the data required to store summaries to be
 | |
|             # ignored in the summaries, we need to identify all objects which
 | |
|             # are related to each summary stored.
 | |
|             # Thus we build a list of all objects used for summary storage as
 | |
|             # well as a dictionary which tells us how often an object is
 | |
|             # referenced by the summaries.
 | |
|             # During this identification process, more objects are referenced,
 | |
|             # namely int objects identifying referenced objects as well as the
 | |
|             # corresponding count.
 | |
|             # For all these objects it will be checked whether they are
 | |
|             # referenced from outside the monitor's scope. If not, they will be
 | |
|             # subtracted from the snapshot summary, otherwise they are
 | |
|             # included (as this indicates that they are relevant to the
 | |
|             # application).
 | |
| 
 | |
|             all_of_them = []  # every single object
 | |
|             ref_counter = {}  # how often it is referenced; (id(o), o) pairs
 | |
| 
 | |
|             def store_info(o):
 | |
|                 all_of_them.append(o)
 | |
|                 if id(o) in ref_counter:
 | |
|                     ref_counter[id(o)] += 1
 | |
|                 else:
 | |
|                     ref_counter[id(o)] = 1
 | |
| 
 | |
|             # store infos on every single object related to the summaries
 | |
|             store_info(self.summaries)
 | |
|             for k, v in self.summaries.items():
 | |
|                 store_info(k)
 | |
|                 summary._traverse(v, store_info)
 | |
| 
 | |
|             # do the summary
 | |
|             res = summary.summarize(muppy.get_objects())
 | |
| 
 | |
|             # remove ids stored in the ref_counter
 | |
|             for _id in ref_counter:
 | |
|                 # referenced in frame, ref_counter, ref_counter.keys()
 | |
|                 if len(gc.get_referrers(_id)) == (3):
 | |
|                     summary._subtract(res, _id)
 | |
|             for o in all_of_them:
 | |
|                 # referenced in frame, summary, all_of_them
 | |
|                 if len(gc.get_referrers(o)) == (ref_counter[id(o)] + 2):
 | |
|                     summary._subtract(res, o)
 | |
| 
 | |
|         return res
 | |
| 
 | |
|     def diff(self, summary1=None, summary2=None):
 | |
|         """Compute diff between to summaries.
 | |
| 
 | |
|         If no summary is provided, the diff from the last to the current
 | |
|         summary is used. If summary1 is provided the diff from summary1
 | |
|         to the current summary is used. If summary1 and summary2 are
 | |
|         provided, the diff between these two is used.
 | |
| 
 | |
|         """
 | |
|         res = None
 | |
|         if summary2 is None:
 | |
|             self.s1 = self.create_summary()
 | |
|             if summary1 is None:
 | |
|                 res = summary.get_diff(self.s0, self.s1)
 | |
|             else:
 | |
|                 res = summary.get_diff(summary1, self.s1)
 | |
|             self.s0 = self.s1
 | |
|         else:
 | |
|             if summary1 is not None:
 | |
|                 res = summary.get_diff(summary1, summary2)
 | |
|             else:
 | |
|                 raise ValueError(
 | |
|                     "You cannot provide summary2 without summary1.")
 | |
|         return summary._sweep(res)
 | |
| 
 | |
|     def print_diff(self, summary1=None, summary2=None):
 | |
|         """Compute diff between to summaries and print it.
 | |
| 
 | |
|         If no summary is provided, the diff from the last to the current
 | |
|         summary is used. If summary1 is provided the diff from summary1
 | |
|         to the current summary is used. If summary1 and summary2 are
 | |
|         provided, the diff between these two is used.
 | |
|         """
 | |
|         summary.print_(self.diff(summary1=summary1, summary2=summary2))
 | |
| 
 | |
|     def format_diff(self, summary1=None, summary2=None):
 | |
|         """Compute diff between to summaries and return a list of formatted
 | |
|         lines.
 | |
| 
 | |
|         If no summary is provided, the diff from the last to the current
 | |
|         summary is used. If summary1 is provided the diff from summary1
 | |
|         to the current summary is used. If summary1 and summary2 are
 | |
|         provided, the diff between these two is used.
 | |
|         """
 | |
|         return summary.format_(self.diff(summary1=summary1, summary2=summary2))
 | |
| 
 | |
|     def store_summary(self, key):
 | |
|         """Store a current summary in self.summaries."""
 | |
|         self.summaries[key] = self.create_summary()
 | |
| 
 | |
| 
 | |
| class ObjectTracker(object):
 | |
|     """
 | |
|     Helper class to track changes in the set of existing objects.
 | |
| 
 | |
|     Each time you invoke a diff with this tracker, the objects which existed
 | |
|     during the last invocation are compared with the objects which exist during
 | |
|     the current invocation.
 | |
| 
 | |
|     Please note that in order to do so, strong references to all objects will
 | |
|     be stored. This means that none of these objects can be garbage collected.
 | |
|     A use case for the ObjectTracker is the monitoring of a state which should
 | |
|     be stable, but you see new objects being created nevertheless. With the
 | |
|     ObjectTracker you can identify these new objects.
 | |
| 
 | |
|     """
 | |
| 
 | |
|     # Some precaution needs to be taken when handling frame objects (see
 | |
|     # warning at http://docs.python.org/lib/inspect-stack.html). All ignore
 | |
|     # lists used need to be emptied so no frame objects remain referenced.
 | |
| 
 | |
|     def __init__(self):
 | |
|         """On initialisation, the current state of objects is stored.
 | |
| 
 | |
|         Note that all objects which exist at this point in time will not be
 | |
|         released until you destroy this ObjectTracker instance.
 | |
|         """
 | |
|         self.o0 = self._get_objects(ignore=(inspect.currentframe(),))
 | |
| 
 | |
|     def _get_objects(self, ignore=()):
 | |
|         """Get all currently existing objects.
 | |
| 
 | |
|         XXX - ToDo: This method is a copy&paste from muppy.get_objects, but
 | |
|         some modifications are applied. Specifically, it allows to ignore
 | |
|         objects (which includes the current frame).
 | |
| 
 | |
|         keyword arguments
 | |
|         ignore -- list of objects to ignore
 | |
|         """
 | |
|         def remove_ignore(objects, ignore=()):
 | |
|             # remove all objects listed in the ignore list
 | |
|             res = []
 | |
|             for o in objects:
 | |
|                 if not compat.object_in_list(o, ignore):
 | |
|                     res.append(o)
 | |
|             return res
 | |
| 
 | |
|         tmp = gc.get_objects()
 | |
|         ignore += (inspect.currentframe(), self, ignore, remove_ignore)
 | |
|         if hasattr(self, 'o0'):
 | |
|             ignore += (self.o0,)
 | |
|         if hasattr(self, 'o1'):
 | |
|             ignore += (self.o1,)
 | |
|         # this implies that referenced objects are also ignored
 | |
|         tmp = remove_ignore(tmp, ignore)
 | |
|         res = []
 | |
|         for o in tmp:
 | |
|             # gc.get_objects returns only container objects, but we also want
 | |
|             # the objects referenced by them
 | |
|             refs = muppy.get_referents(o)
 | |
|             for ref in refs:
 | |
|                 if not gc.is_tracked(ref):
 | |
|                     # we already got the container objects, now we only add
 | |
|                     # non-container objects
 | |
|                     res.append(ref)
 | |
|         res.extend(tmp)
 | |
|         res = muppy._remove_duplicates(res)
 | |
|         if ignore is not None:
 | |
|             # repeat to filter out objects which may have been referenced
 | |
|             res = remove_ignore(res, ignore)
 | |
|         # manual cleanup, see comment above
 | |
|         del ignore
 | |
|         return res
 | |
| 
 | |
|     def get_diff(self, ignore=()):
 | |
|         """Get the diff to the last time the  state of objects was measured.
 | |
| 
 | |
|         keyword arguments
 | |
|         ignore -- list of objects to ignore
 | |
|         """
 | |
|         # ignore this and the caller frame
 | |
|         self.o1 = self._get_objects(ignore+(inspect.currentframe(),))
 | |
|         diff = muppy.get_diff(self.o0, self.o1)
 | |
|         self.o0 = self.o1
 | |
|         # manual cleanup, see comment above
 | |
|         return diff
 | |
| 
 | |
|     def print_diff(self, ignore=()):
 | |
|         """Print the diff to the last time the state of objects was measured.
 | |
| 
 | |
|         keyword arguments
 | |
|         ignore -- list of objects to ignore
 | |
|         """
 | |
|         # ignore this and the caller frame
 | |
|         for line in self.format_diff(ignore+(inspect.currentframe(),)):
 | |
|             print(line)
 | |
| 
 | |
|     def format_diff(self, ignore=()):
 | |
|         """Format the diff to the last time the state of objects was measured.
 | |
| 
 | |
|         keyword arguments
 | |
|         ignore -- list of objects to ignore
 | |
|         """
 | |
|         # ignore this and the caller frame
 | |
|         lines = []
 | |
|         diff = self.get_diff(ignore+(inspect.currentframe(),))
 | |
|         lines.append("Added objects:")
 | |
|         for line in summary.format_(summary.summarize(diff['+'])):
 | |
|             lines.append(line)
 | |
|         lines.append("Removed objects:")
 | |
|         for line in summary.format_(summary.summarize(diff['-'])):
 | |
|             lines.append(line)
 | |
|         return lines
 | 
