mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-04-22 10:28:02 +00:00
134 lines
4.2 KiB
Python
134 lines
4.2 KiB
Python
from toolz.itertoolz import getter, cons, pluck
|
|
from itertools import tee, starmap
|
|
|
|
|
|
# See #166: https://github.com/pytoolz/toolz/issues/166
|
|
# See #173: https://github.com/pytoolz/toolz/pull/173
|
|
class EqualityHashKey(object):
|
|
""" Create a hash key that uses equality comparisons between items.
|
|
|
|
This may be used to create hash keys for otherwise unhashable types:
|
|
|
|
>>> from toolz import curry
|
|
>>> EqualityHashDefault = curry(EqualityHashKey, None)
|
|
>>> set(map(EqualityHashDefault, [[], (), [1], [1]])) # doctest: +SKIP
|
|
{=[]=, =()=, =[1]=}
|
|
|
|
**Caution:** adding N ``EqualityHashKey`` items to a hash container
|
|
may require O(N**2) operations, not O(N) as for typical hashable types.
|
|
Therefore, a suitable key function such as ``tuple`` or ``frozenset``
|
|
is usually preferred over using ``EqualityHashKey`` if possible.
|
|
|
|
The ``key`` argument to ``EqualityHashKey`` should be a function or
|
|
index that returns a hashable object that effectively distinguishes
|
|
unequal items. This helps avoid the poor scaling that occurs when
|
|
using the default key. For example, the above example can be improved
|
|
by using a key function that distinguishes items by length or type:
|
|
|
|
>>> EqualityHashLen = curry(EqualityHashKey, len)
|
|
>>> EqualityHashType = curry(EqualityHashKey, type) # this works too
|
|
>>> set(map(EqualityHashLen, [[], (), [1], [1]])) # doctest: +SKIP
|
|
{=[]=, =()=, =[1]=}
|
|
|
|
``EqualityHashKey`` is convenient to use when a suitable key function
|
|
is complicated or unavailable. For example, the following returns all
|
|
unique values based on equality:
|
|
|
|
>>> from toolz import unique
|
|
>>> vals = [[], [], (), [1], [1], [2], {}, {}, {}]
|
|
>>> list(unique(vals, key=EqualityHashDefault))
|
|
[[], (), [1], [2], {}]
|
|
|
|
**Warning:** don't change the equality value of an item already in a hash
|
|
containter. Unhashable types are unhashable for a reason. For example:
|
|
|
|
>>> L1 = [1] ; L2 = [2]
|
|
>>> s = set(map(EqualityHashDefault, [L1, L2]))
|
|
>>> s # doctest: +SKIP
|
|
{=[1]=, =[2]=}
|
|
|
|
>>> L1[0] = 2 # Don't do this! ``s`` now has duplicate items!
|
|
>>> s # doctest: +SKIP
|
|
{=[2]=, =[2]=}
|
|
|
|
Although this may appear problematic, immutable data types is a common
|
|
idiom in functional programming, and``EqualityHashKey`` easily allows
|
|
the same idiom to be used by convention rather than strict requirement.
|
|
|
|
See Also:
|
|
identity
|
|
"""
|
|
__slots__ = ['item', 'key']
|
|
_default_hashkey = '__default__hashkey__'
|
|
|
|
def __init__(self, key, item):
|
|
if key is None:
|
|
self.key = self._default_hashkey
|
|
elif not callable(key):
|
|
self.key = getter(key)
|
|
else:
|
|
self.key = key
|
|
self.item = item
|
|
|
|
def __hash__(self):
|
|
if self.key == self._default_hashkey:
|
|
val = self.key
|
|
else:
|
|
val = self.key(self.item)
|
|
return hash(val)
|
|
|
|
def __eq__(self, other):
|
|
try:
|
|
return (self._default_hashkey == other._default_hashkey and
|
|
self.item == other.item)
|
|
except AttributeError:
|
|
return False
|
|
|
|
def __ne__(self, other):
|
|
return not self.__eq__(other)
|
|
|
|
def __str__(self):
|
|
return '=%s=' % str(self.item)
|
|
|
|
def __repr__(self):
|
|
return '=%s=' % repr(self.item)
|
|
|
|
|
|
# See issue #293: https://github.com/pytoolz/toolz/issues/239
|
|
def unzip(seq):
|
|
"""Inverse of ``zip``
|
|
|
|
>>> a, b = unzip([('a', 1), ('b', 2)])
|
|
>>> list(a)
|
|
['a', 'b']
|
|
>>> list(b)
|
|
[1, 2]
|
|
|
|
Unlike the naive implementation ``def unzip(seq): zip(*seq)`` this
|
|
implementation can handle an infinite sequence ``seq``.
|
|
|
|
Caveats:
|
|
|
|
* The implementation uses ``tee``, and so can use a significant amount
|
|
of auxiliary storage if the resulting iterators are consumed at
|
|
different times.
|
|
|
|
* The inner sequence cannot be infinite. In Python 3 ``zip(*seq)`` can be
|
|
used if ``seq`` is a finite sequence of infinite sequences.
|
|
|
|
"""
|
|
|
|
seq = iter(seq)
|
|
|
|
# Check how many iterators we need
|
|
try:
|
|
first = tuple(next(seq))
|
|
except StopIteration:
|
|
return tuple()
|
|
|
|
# and create them
|
|
niters = len(first)
|
|
seqs = tee(cons(first, seq), niters)
|
|
|
|
return tuple(starmap(pluck, enumerate(seqs)))
|