mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-02 14:27:31 +00:00
first commit
This commit is contained in:
11
.venv/Lib/site-packages/gitdb/db/__init__.py
Normal file
11
.venv/Lib/site-packages/gitdb/db/__init__.py
Normal file
@ -0,0 +1,11 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
|
||||
from gitdb.db.base import *
|
||||
from gitdb.db.loose import *
|
||||
from gitdb.db.mem import *
|
||||
from gitdb.db.pack import *
|
||||
from gitdb.db.git import *
|
||||
from gitdb.db.ref import *
|
278
.venv/Lib/site-packages/gitdb/db/base.py
Normal file
278
.venv/Lib/site-packages/gitdb/db/base.py
Normal file
@ -0,0 +1,278 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Contains implementations of database retrieveing objects"""
|
||||
from gitdb.util import (
|
||||
join,
|
||||
LazyMixin,
|
||||
hex_to_bin
|
||||
)
|
||||
|
||||
from gitdb.utils.encoding import force_text
|
||||
from gitdb.exc import (
|
||||
BadObject,
|
||||
AmbiguousObjectName
|
||||
)
|
||||
|
||||
from itertools import chain
|
||||
from functools import reduce
|
||||
|
||||
|
||||
__all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB')
|
||||
|
||||
|
||||
class ObjectDBR(object):
|
||||
|
||||
"""Defines an interface for object database lookup.
|
||||
Objects are identified either by their 20 byte bin sha"""
|
||||
|
||||
def __contains__(self, sha):
|
||||
return self.has_obj
|
||||
|
||||
#{ Query Interface
|
||||
def has_object(self, sha):
|
||||
"""
|
||||
Whether the object identified by the given 20 bytes
|
||||
binary sha is contained in the database
|
||||
|
||||
:return: True if the object identified by the given 20 bytes
|
||||
binary sha is contained in the database"""
|
||||
raise NotImplementedError("To be implemented in subclass")
|
||||
|
||||
def info(self, sha):
|
||||
""" :return: OInfo instance
|
||||
:param sha: bytes binary sha
|
||||
:raise BadObject:"""
|
||||
raise NotImplementedError("To be implemented in subclass")
|
||||
|
||||
def stream(self, sha):
|
||||
""":return: OStream instance
|
||||
:param sha: 20 bytes binary sha
|
||||
:raise BadObject:"""
|
||||
raise NotImplementedError("To be implemented in subclass")
|
||||
|
||||
def size(self):
|
||||
""":return: amount of objects in this database"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def sha_iter(self):
|
||||
"""Return iterator yielding 20 byte shas for all objects in this data base"""
|
||||
raise NotImplementedError()
|
||||
|
||||
#} END query interface
|
||||
|
||||
|
||||
class ObjectDBW(object):
|
||||
|
||||
"""Defines an interface to create objects in the database"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._ostream = None
|
||||
|
||||
#{ Edit Interface
|
||||
def set_ostream(self, stream):
|
||||
"""
|
||||
Adjusts the stream to which all data should be sent when storing new objects
|
||||
|
||||
:param stream: if not None, the stream to use, if None the default stream
|
||||
will be used.
|
||||
:return: previously installed stream, or None if there was no override
|
||||
:raise TypeError: if the stream doesn't have the supported functionality"""
|
||||
cstream = self._ostream
|
||||
self._ostream = stream
|
||||
return cstream
|
||||
|
||||
def ostream(self):
|
||||
"""
|
||||
Return the output stream
|
||||
|
||||
:return: overridden output stream this instance will write to, or None
|
||||
if it will write to the default stream"""
|
||||
return self._ostream
|
||||
|
||||
def store(self, istream):
|
||||
"""
|
||||
Create a new object in the database
|
||||
:return: the input istream object with its sha set to its corresponding value
|
||||
|
||||
:param istream: IStream compatible instance. If its sha is already set
|
||||
to a value, the object will just be stored in the our database format,
|
||||
in which case the input stream is expected to be in object format ( header + contents ).
|
||||
:raise IOError: if data could not be written"""
|
||||
raise NotImplementedError("To be implemented in subclass")
|
||||
|
||||
#} END edit interface
|
||||
|
||||
|
||||
class FileDBBase(object):
|
||||
|
||||
"""Provides basic facilities to retrieve files of interest, including
|
||||
caching facilities to help mapping hexsha's to objects"""
|
||||
|
||||
def __init__(self, root_path):
|
||||
"""Initialize this instance to look for its files at the given root path
|
||||
All subsequent operations will be relative to this path
|
||||
:raise InvalidDBRoot:
|
||||
**Note:** The base will not perform any accessablity checking as the base
|
||||
might not yet be accessible, but become accessible before the first
|
||||
access."""
|
||||
super(FileDBBase, self).__init__()
|
||||
self._root_path = root_path
|
||||
|
||||
#{ Interface
|
||||
def root_path(self):
|
||||
""":return: path at which this db operates"""
|
||||
return self._root_path
|
||||
|
||||
def db_path(self, rela_path):
|
||||
"""
|
||||
:return: the given relative path relative to our database root, allowing
|
||||
to pontentially access datafiles"""
|
||||
return join(self._root_path, force_text(rela_path))
|
||||
#} END interface
|
||||
|
||||
|
||||
class CachingDB(object):
|
||||
|
||||
"""A database which uses caches to speed-up access"""
|
||||
|
||||
#{ Interface
|
||||
def update_cache(self, force=False):
|
||||
"""
|
||||
Call this method if the underlying data changed to trigger an update
|
||||
of the internal caching structures.
|
||||
|
||||
:param force: if True, the update must be performed. Otherwise the implementation
|
||||
may decide not to perform an update if it thinks nothing has changed.
|
||||
:return: True if an update was performed as something change indeed"""
|
||||
|
||||
# END interface
|
||||
|
||||
|
||||
def _databases_recursive(database, output):
|
||||
"""Fill output list with database from db, in order. Deals with Loose, Packed
|
||||
and compound databases."""
|
||||
if isinstance(database, CompoundDB):
|
||||
dbs = database.databases()
|
||||
output.extend(db for db in dbs if not isinstance(db, CompoundDB))
|
||||
for cdb in (db for db in dbs if isinstance(db, CompoundDB)):
|
||||
_databases_recursive(cdb, output)
|
||||
else:
|
||||
output.append(database)
|
||||
# END handle database type
|
||||
|
||||
|
||||
class CompoundDB(ObjectDBR, LazyMixin, CachingDB):
|
||||
|
||||
"""A database which delegates calls to sub-databases.
|
||||
|
||||
Databases are stored in the lazy-loaded _dbs attribute.
|
||||
Define _set_cache_ to update it with your databases"""
|
||||
|
||||
def _set_cache_(self, attr):
|
||||
if attr == '_dbs':
|
||||
self._dbs = list()
|
||||
elif attr == '_db_cache':
|
||||
self._db_cache = dict()
|
||||
else:
|
||||
super(CompoundDB, self)._set_cache_(attr)
|
||||
|
||||
def _db_query(self, sha):
|
||||
""":return: database containing the given 20 byte sha
|
||||
:raise BadObject:"""
|
||||
# most databases use binary representations, prevent converting
|
||||
# it every time a database is being queried
|
||||
try:
|
||||
return self._db_cache[sha]
|
||||
except KeyError:
|
||||
pass
|
||||
# END first level cache
|
||||
|
||||
for db in self._dbs:
|
||||
if db.has_object(sha):
|
||||
self._db_cache[sha] = db
|
||||
return db
|
||||
# END for each database
|
||||
raise BadObject(sha)
|
||||
|
||||
#{ ObjectDBR interface
|
||||
|
||||
def has_object(self, sha):
|
||||
try:
|
||||
self._db_query(sha)
|
||||
return True
|
||||
except BadObject:
|
||||
return False
|
||||
# END handle exceptions
|
||||
|
||||
def info(self, sha):
|
||||
return self._db_query(sha).info(sha)
|
||||
|
||||
def stream(self, sha):
|
||||
return self._db_query(sha).stream(sha)
|
||||
|
||||
def size(self):
|
||||
""":return: total size of all contained databases"""
|
||||
return reduce(lambda x, y: x + y, (db.size() for db in self._dbs), 0)
|
||||
|
||||
def sha_iter(self):
|
||||
return chain(*(db.sha_iter() for db in self._dbs))
|
||||
|
||||
#} END object DBR Interface
|
||||
|
||||
#{ Interface
|
||||
|
||||
def databases(self):
|
||||
""":return: tuple of database instances we use for lookups"""
|
||||
return tuple(self._dbs)
|
||||
|
||||
def update_cache(self, force=False):
|
||||
# something might have changed, clear everything
|
||||
self._db_cache.clear()
|
||||
stat = False
|
||||
for db in self._dbs:
|
||||
if isinstance(db, CachingDB):
|
||||
stat |= db.update_cache(force)
|
||||
# END if is caching db
|
||||
# END for each database to update
|
||||
return stat
|
||||
|
||||
def partial_to_complete_sha_hex(self, partial_hexsha):
|
||||
"""
|
||||
:return: 20 byte binary sha1 from the given less-than-40 byte hexsha (bytes or str)
|
||||
:param partial_hexsha: hexsha with less than 40 byte
|
||||
:raise AmbiguousObjectName: """
|
||||
databases = list()
|
||||
_databases_recursive(self, databases)
|
||||
partial_hexsha = force_text(partial_hexsha)
|
||||
len_partial_hexsha = len(partial_hexsha)
|
||||
if len_partial_hexsha % 2 != 0:
|
||||
partial_binsha = hex_to_bin(partial_hexsha + "0")
|
||||
else:
|
||||
partial_binsha = hex_to_bin(partial_hexsha)
|
||||
# END assure successful binary conversion
|
||||
|
||||
candidate = None
|
||||
for db in databases:
|
||||
full_bin_sha = None
|
||||
try:
|
||||
if hasattr(db, 'partial_to_complete_sha_hex'):
|
||||
full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
|
||||
else:
|
||||
full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
|
||||
# END handle database type
|
||||
except BadObject:
|
||||
continue
|
||||
# END ignore bad objects
|
||||
if full_bin_sha:
|
||||
if candidate and candidate != full_bin_sha:
|
||||
raise AmbiguousObjectName(partial_hexsha)
|
||||
candidate = full_bin_sha
|
||||
# END handle candidate
|
||||
# END for each db
|
||||
if not candidate:
|
||||
raise BadObject(partial_binsha)
|
||||
return candidate
|
||||
|
||||
#} END interface
|
85
.venv/Lib/site-packages/gitdb/db/git.py
Normal file
85
.venv/Lib/site-packages/gitdb/db/git.py
Normal file
@ -0,0 +1,85 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
from gitdb.db.base import (
|
||||
CompoundDB,
|
||||
ObjectDBW,
|
||||
FileDBBase
|
||||
)
|
||||
|
||||
from gitdb.db.loose import LooseObjectDB
|
||||
from gitdb.db.pack import PackedDB
|
||||
from gitdb.db.ref import ReferenceDB
|
||||
|
||||
from gitdb.exc import InvalidDBRoot
|
||||
|
||||
import os
|
||||
|
||||
__all__ = ('GitDB', )
|
||||
|
||||
|
||||
class GitDB(FileDBBase, ObjectDBW, CompoundDB):
|
||||
|
||||
"""A git-style object database, which contains all objects in the 'objects'
|
||||
subdirectory
|
||||
|
||||
``IMPORTANT``: The usage of this implementation is highly discouraged as it fails to release file-handles.
|
||||
This can be a problem with long-running processes and/or big repositories.
|
||||
"""
|
||||
# Configuration
|
||||
PackDBCls = PackedDB
|
||||
LooseDBCls = LooseObjectDB
|
||||
ReferenceDBCls = ReferenceDB
|
||||
|
||||
# Directories
|
||||
packs_dir = 'pack'
|
||||
loose_dir = ''
|
||||
alternates_dir = os.path.join('info', 'alternates')
|
||||
|
||||
def __init__(self, root_path):
|
||||
"""Initialize ourselves on a git objects directory"""
|
||||
super(GitDB, self).__init__(root_path)
|
||||
|
||||
def _set_cache_(self, attr):
|
||||
if attr == '_dbs' or attr == '_loose_db':
|
||||
self._dbs = list()
|
||||
loose_db = None
|
||||
for subpath, dbcls in ((self.packs_dir, self.PackDBCls),
|
||||
(self.loose_dir, self.LooseDBCls),
|
||||
(self.alternates_dir, self.ReferenceDBCls)):
|
||||
path = self.db_path(subpath)
|
||||
if os.path.exists(path):
|
||||
self._dbs.append(dbcls(path))
|
||||
if dbcls is self.LooseDBCls:
|
||||
loose_db = self._dbs[-1]
|
||||
# END remember loose db
|
||||
# END check path exists
|
||||
# END for each db type
|
||||
|
||||
# should have at least one subdb
|
||||
if not self._dbs:
|
||||
raise InvalidDBRoot(self.root_path())
|
||||
# END handle error
|
||||
|
||||
# we the first one should have the store method
|
||||
assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality"
|
||||
|
||||
# finally set the value
|
||||
self._loose_db = loose_db
|
||||
else:
|
||||
super(GitDB, self)._set_cache_(attr)
|
||||
# END handle attrs
|
||||
|
||||
#{ ObjectDBW interface
|
||||
|
||||
def store(self, istream):
|
||||
return self._loose_db.store(istream)
|
||||
|
||||
def ostream(self):
|
||||
return self._loose_db.ostream()
|
||||
|
||||
def set_ostream(self, ostream):
|
||||
return self._loose_db.set_ostream(ostream)
|
||||
|
||||
#} END objectdbw interface
|
258
.venv/Lib/site-packages/gitdb/db/loose.py
Normal file
258
.venv/Lib/site-packages/gitdb/db/loose.py
Normal file
@ -0,0 +1,258 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
from gitdb.db.base import (
|
||||
FileDBBase,
|
||||
ObjectDBR,
|
||||
ObjectDBW
|
||||
)
|
||||
|
||||
|
||||
from gitdb.exc import (
|
||||
BadObject,
|
||||
AmbiguousObjectName
|
||||
)
|
||||
|
||||
from gitdb.stream import (
|
||||
DecompressMemMapReader,
|
||||
FDCompressedSha1Writer,
|
||||
FDStream,
|
||||
Sha1Writer
|
||||
)
|
||||
|
||||
from gitdb.base import (
|
||||
OStream,
|
||||
OInfo
|
||||
)
|
||||
|
||||
from gitdb.util import (
|
||||
file_contents_ro_filepath,
|
||||
ENOENT,
|
||||
hex_to_bin,
|
||||
bin_to_hex,
|
||||
exists,
|
||||
chmod,
|
||||
isdir,
|
||||
isfile,
|
||||
remove,
|
||||
mkdir,
|
||||
rename,
|
||||
dirname,
|
||||
basename,
|
||||
join
|
||||
)
|
||||
|
||||
from gitdb.fun import (
|
||||
chunk_size,
|
||||
loose_object_header_info,
|
||||
write_object,
|
||||
stream_copy
|
||||
)
|
||||
|
||||
from gitdb.utils.encoding import force_bytes
|
||||
|
||||
import tempfile
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
__all__ = ('LooseObjectDB', )
|
||||
|
||||
|
||||
class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
|
||||
|
||||
"""A database which operates on loose object files"""
|
||||
|
||||
# CONFIGURATION
|
||||
# chunks in which data will be copied between streams
|
||||
stream_chunk_size = chunk_size
|
||||
|
||||
# On windows we need to keep it writable, otherwise it cannot be removed
|
||||
# either
|
||||
new_objects_mode = int("444", 8)
|
||||
if os.name == 'nt':
|
||||
new_objects_mode = int("644", 8)
|
||||
|
||||
def __init__(self, root_path):
|
||||
super(LooseObjectDB, self).__init__(root_path)
|
||||
self._hexsha_to_file = dict()
|
||||
# Additional Flags - might be set to 0 after the first failure
|
||||
# Depending on the root, this might work for some mounts, for others not, which
|
||||
# is why it is per instance
|
||||
self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
|
||||
|
||||
#{ Interface
|
||||
def object_path(self, hexsha):
|
||||
"""
|
||||
:return: path at which the object with the given hexsha would be stored,
|
||||
relative to the database root"""
|
||||
return join(hexsha[:2], hexsha[2:])
|
||||
|
||||
def readable_db_object_path(self, hexsha):
|
||||
"""
|
||||
:return: readable object path to the object identified by hexsha
|
||||
:raise BadObject: If the object file does not exist"""
|
||||
try:
|
||||
return self._hexsha_to_file[hexsha]
|
||||
except KeyError:
|
||||
pass
|
||||
# END ignore cache misses
|
||||
|
||||
# try filesystem
|
||||
path = self.db_path(self.object_path(hexsha))
|
||||
if exists(path):
|
||||
self._hexsha_to_file[hexsha] = path
|
||||
return path
|
||||
# END handle cache
|
||||
raise BadObject(hexsha)
|
||||
|
||||
def partial_to_complete_sha_hex(self, partial_hexsha):
|
||||
""":return: 20 byte binary sha1 string which matches the given name uniquely
|
||||
:param name: hexadecimal partial name (bytes or ascii string)
|
||||
:raise AmbiguousObjectName:
|
||||
:raise BadObject: """
|
||||
candidate = None
|
||||
for binsha in self.sha_iter():
|
||||
if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)):
|
||||
# it can't ever find the same object twice
|
||||
if candidate is not None:
|
||||
raise AmbiguousObjectName(partial_hexsha)
|
||||
candidate = binsha
|
||||
# END for each object
|
||||
if candidate is None:
|
||||
raise BadObject(partial_hexsha)
|
||||
return candidate
|
||||
|
||||
#} END interface
|
||||
|
||||
def _map_loose_object(self, sha):
|
||||
"""
|
||||
:return: memory map of that file to allow random read access
|
||||
:raise BadObject: if object could not be located"""
|
||||
db_path = self.db_path(self.object_path(bin_to_hex(sha)))
|
||||
try:
|
||||
return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
|
||||
except OSError as e:
|
||||
if e.errno != ENOENT:
|
||||
# try again without noatime
|
||||
try:
|
||||
return file_contents_ro_filepath(db_path)
|
||||
except OSError as new_e:
|
||||
raise BadObject(sha) from new_e
|
||||
# didn't work because of our flag, don't try it again
|
||||
self._fd_open_flags = 0
|
||||
else:
|
||||
raise BadObject(sha) from e
|
||||
# END handle error
|
||||
# END exception handling
|
||||
|
||||
def set_ostream(self, stream):
|
||||
""":raise TypeError: if the stream does not support the Sha1Writer interface"""
|
||||
if stream is not None and not isinstance(stream, Sha1Writer):
|
||||
raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
|
||||
return super(LooseObjectDB, self).set_ostream(stream)
|
||||
|
||||
def info(self, sha):
|
||||
m = self._map_loose_object(sha)
|
||||
try:
|
||||
typ, size = loose_object_header_info(m)
|
||||
return OInfo(sha, typ, size)
|
||||
finally:
|
||||
if hasattr(m, 'close'):
|
||||
m.close()
|
||||
# END assure release of system resources
|
||||
|
||||
def stream(self, sha):
|
||||
m = self._map_loose_object(sha)
|
||||
type, size, stream = DecompressMemMapReader.new(m, close_on_deletion=True)
|
||||
return OStream(sha, type, size, stream)
|
||||
|
||||
def has_object(self, sha):
|
||||
try:
|
||||
self.readable_db_object_path(bin_to_hex(sha))
|
||||
return True
|
||||
except BadObject:
|
||||
return False
|
||||
# END check existence
|
||||
|
||||
def store(self, istream):
|
||||
"""note: The sha we produce will be hex by nature"""
|
||||
tmp_path = None
|
||||
writer = self.ostream()
|
||||
if writer is None:
|
||||
# open a tmp file to write the data to
|
||||
fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
|
||||
|
||||
if istream.binsha is None:
|
||||
writer = FDCompressedSha1Writer(fd)
|
||||
else:
|
||||
writer = FDStream(fd)
|
||||
# END handle direct stream copies
|
||||
# END handle custom writer
|
||||
|
||||
try:
|
||||
try:
|
||||
if istream.binsha is not None:
|
||||
# copy as much as possible, the actual uncompressed item size might
|
||||
# be smaller than the compressed version
|
||||
stream_copy(istream.read, writer.write, sys.maxsize, self.stream_chunk_size)
|
||||
else:
|
||||
# write object with header, we have to make a new one
|
||||
write_object(istream.type, istream.size, istream.read, writer.write,
|
||||
chunk_size=self.stream_chunk_size)
|
||||
# END handle direct stream copies
|
||||
finally:
|
||||
if tmp_path:
|
||||
writer.close()
|
||||
# END assure target stream is closed
|
||||
except:
|
||||
if tmp_path:
|
||||
os.remove(tmp_path)
|
||||
raise
|
||||
# END assure tmpfile removal on error
|
||||
|
||||
hexsha = None
|
||||
if istream.binsha:
|
||||
hexsha = istream.hexsha
|
||||
else:
|
||||
hexsha = writer.sha(as_hex=True)
|
||||
# END handle sha
|
||||
|
||||
if tmp_path:
|
||||
obj_path = self.db_path(self.object_path(hexsha))
|
||||
obj_dir = dirname(obj_path)
|
||||
if not isdir(obj_dir):
|
||||
mkdir(obj_dir)
|
||||
# END handle destination directory
|
||||
# rename onto existing doesn't work on NTFS
|
||||
if isfile(obj_path):
|
||||
remove(tmp_path)
|
||||
else:
|
||||
rename(tmp_path, obj_path)
|
||||
# end rename only if needed
|
||||
|
||||
# make sure its readable for all ! It started out as rw-- tmp file
|
||||
# but needs to be rwrr
|
||||
chmod(obj_path, self.new_objects_mode)
|
||||
# END handle dry_run
|
||||
|
||||
istream.binsha = hex_to_bin(hexsha)
|
||||
return istream
|
||||
|
||||
def sha_iter(self):
|
||||
# find all files which look like an object, extract sha from there
|
||||
for root, dirs, files in os.walk(self.root_path()):
|
||||
root_base = basename(root)
|
||||
if len(root_base) != 2:
|
||||
continue
|
||||
|
||||
for f in files:
|
||||
if len(f) != 38:
|
||||
continue
|
||||
yield hex_to_bin(root_base + f)
|
||||
# END for each file
|
||||
# END for each walk iteration
|
||||
|
||||
def size(self):
|
||||
return len(tuple(self.sha_iter()))
|
110
.venv/Lib/site-packages/gitdb/db/mem.py
Normal file
110
.venv/Lib/site-packages/gitdb/db/mem.py
Normal file
@ -0,0 +1,110 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Contains the MemoryDatabase implementation"""
|
||||
from gitdb.db.loose import LooseObjectDB
|
||||
from gitdb.db.base import (
|
||||
ObjectDBR,
|
||||
ObjectDBW
|
||||
)
|
||||
|
||||
from gitdb.base import (
|
||||
OStream,
|
||||
IStream,
|
||||
)
|
||||
|
||||
from gitdb.exc import (
|
||||
BadObject,
|
||||
UnsupportedOperation
|
||||
)
|
||||
|
||||
from gitdb.stream import (
|
||||
ZippedStoreShaWriter,
|
||||
DecompressMemMapReader,
|
||||
)
|
||||
|
||||
from io import BytesIO
|
||||
|
||||
__all__ = ("MemoryDB", )
|
||||
|
||||
|
||||
class MemoryDB(ObjectDBR, ObjectDBW):
|
||||
|
||||
"""A memory database stores everything to memory, providing fast IO and object
|
||||
retrieval. It should be used to buffer results and obtain SHAs before writing
|
||||
it to the actual physical storage, as it allows to query whether object already
|
||||
exists in the target storage before introducing actual IO"""
|
||||
|
||||
def __init__(self):
|
||||
super(MemoryDB, self).__init__()
|
||||
self._db = LooseObjectDB("path/doesnt/matter")
|
||||
|
||||
# maps 20 byte shas to their OStream objects
|
||||
self._cache = dict()
|
||||
|
||||
def set_ostream(self, stream):
|
||||
raise UnsupportedOperation("MemoryDB's always stream into memory")
|
||||
|
||||
def store(self, istream):
|
||||
zstream = ZippedStoreShaWriter()
|
||||
self._db.set_ostream(zstream)
|
||||
|
||||
istream = self._db.store(istream)
|
||||
zstream.close() # close to flush
|
||||
zstream.seek(0)
|
||||
|
||||
# don't provide a size, the stream is written in object format, hence the
|
||||
# header needs decompression
|
||||
decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False)
|
||||
self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream)
|
||||
|
||||
return istream
|
||||
|
||||
def has_object(self, sha):
|
||||
return sha in self._cache
|
||||
|
||||
def info(self, sha):
|
||||
# we always return streams, which are infos as well
|
||||
return self.stream(sha)
|
||||
|
||||
def stream(self, sha):
|
||||
try:
|
||||
ostream = self._cache[sha]
|
||||
# rewind stream for the next one to read
|
||||
ostream.stream.seek(0)
|
||||
return ostream
|
||||
except KeyError as e:
|
||||
raise BadObject(sha) from e
|
||||
# END exception handling
|
||||
|
||||
def size(self):
|
||||
return len(self._cache)
|
||||
|
||||
def sha_iter(self):
|
||||
return self._cache.keys()
|
||||
|
||||
#{ Interface
|
||||
def stream_copy(self, sha_iter, odb):
|
||||
"""Copy the streams as identified by sha's yielded by sha_iter into the given odb
|
||||
The streams will be copied directly
|
||||
**Note:** the object will only be written if it did not exist in the target db
|
||||
|
||||
:return: amount of streams actually copied into odb. If smaller than the amount
|
||||
of input shas, one or more objects did already exist in odb"""
|
||||
count = 0
|
||||
for sha in sha_iter:
|
||||
if odb.has_object(sha):
|
||||
continue
|
||||
# END check object existence
|
||||
|
||||
ostream = self.stream(sha)
|
||||
# compressed data including header
|
||||
sio = BytesIO(ostream.stream.data())
|
||||
istream = IStream(ostream.type, ostream.size, sio, sha)
|
||||
|
||||
odb.store(istream)
|
||||
count += 1
|
||||
# END for each sha
|
||||
return count
|
||||
#} END interface
|
206
.venv/Lib/site-packages/gitdb/db/pack.py
Normal file
206
.venv/Lib/site-packages/gitdb/db/pack.py
Normal file
@ -0,0 +1,206 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Module containing a database to deal with packs"""
|
||||
from gitdb.db.base import (
|
||||
FileDBBase,
|
||||
ObjectDBR,
|
||||
CachingDB
|
||||
)
|
||||
|
||||
from gitdb.util import LazyMixin
|
||||
|
||||
from gitdb.exc import (
|
||||
BadObject,
|
||||
UnsupportedOperation,
|
||||
AmbiguousObjectName
|
||||
)
|
||||
|
||||
from gitdb.pack import PackEntity
|
||||
|
||||
from functools import reduce
|
||||
|
||||
import os
|
||||
import glob
|
||||
|
||||
__all__ = ('PackedDB', )
|
||||
|
||||
#{ Utilities
|
||||
|
||||
|
||||
class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
|
||||
|
||||
"""A database operating on a set of object packs"""
|
||||
|
||||
# sort the priority list every N queries
|
||||
# Higher values are better, performance tests don't show this has
|
||||
# any effect, but it should have one
|
||||
_sort_interval = 500
|
||||
|
||||
def __init__(self, root_path):
|
||||
super(PackedDB, self).__init__(root_path)
|
||||
# list of lists with three items:
|
||||
# * hits - number of times the pack was hit with a request
|
||||
# * entity - Pack entity instance
|
||||
# * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
|
||||
# self._entities = list() # lazy loaded list
|
||||
self._hit_count = 0 # amount of hits
|
||||
self._st_mtime = 0 # last modification data of our root path
|
||||
|
||||
def _set_cache_(self, attr):
|
||||
if attr == '_entities':
|
||||
self._entities = list()
|
||||
self.update_cache(force=True)
|
||||
# END handle entities initialization
|
||||
|
||||
def _sort_entities(self):
|
||||
self._entities.sort(key=lambda l: l[0], reverse=True)
|
||||
|
||||
def _pack_info(self, sha):
|
||||
""":return: tuple(entity, index) for an item at the given sha
|
||||
:param sha: 20 or 40 byte sha
|
||||
:raise BadObject:
|
||||
**Note:** This method is not thread-safe, but may be hit in multi-threaded
|
||||
operation. The worst thing that can happen though is a counter that
|
||||
was not incremented, or the list being in wrong order. So we safe
|
||||
the time for locking here, lets see how that goes"""
|
||||
# presort ?
|
||||
if self._hit_count % self._sort_interval == 0:
|
||||
self._sort_entities()
|
||||
# END update sorting
|
||||
|
||||
for item in self._entities:
|
||||
index = item[2](sha)
|
||||
if index is not None:
|
||||
item[0] += 1 # one hit for you
|
||||
self._hit_count += 1 # general hit count
|
||||
return (item[1], index)
|
||||
# END index found in pack
|
||||
# END for each item
|
||||
|
||||
# no hit, see whether we have to update packs
|
||||
# NOTE: considering packs don't change very often, we safe this call
|
||||
# and leave it to the super-caller to trigger that
|
||||
raise BadObject(sha)
|
||||
|
||||
#{ Object DB Read
|
||||
|
||||
def has_object(self, sha):
|
||||
try:
|
||||
self._pack_info(sha)
|
||||
return True
|
||||
except BadObject:
|
||||
return False
|
||||
# END exception handling
|
||||
|
||||
def info(self, sha):
|
||||
entity, index = self._pack_info(sha)
|
||||
return entity.info_at_index(index)
|
||||
|
||||
def stream(self, sha):
|
||||
entity, index = self._pack_info(sha)
|
||||
return entity.stream_at_index(index)
|
||||
|
||||
def sha_iter(self):
|
||||
for entity in self.entities():
|
||||
index = entity.index()
|
||||
sha_by_index = index.sha
|
||||
for index in range(index.size()):
|
||||
yield sha_by_index(index)
|
||||
# END for each index
|
||||
# END for each entity
|
||||
|
||||
def size(self):
|
||||
sizes = [item[1].index().size() for item in self._entities]
|
||||
return reduce(lambda x, y: x + y, sizes, 0)
|
||||
|
||||
#} END object db read
|
||||
|
||||
#{ object db write
|
||||
|
||||
def store(self, istream):
|
||||
"""Storing individual objects is not feasible as a pack is designed to
|
||||
hold multiple objects. Writing or rewriting packs for single objects is
|
||||
inefficient"""
|
||||
raise UnsupportedOperation()
|
||||
|
||||
#} END object db write
|
||||
|
||||
#{ Interface
|
||||
|
||||
def update_cache(self, force=False):
|
||||
"""
|
||||
Update our cache with the acutally existing packs on disk. Add new ones,
|
||||
and remove deleted ones. We keep the unchanged ones
|
||||
|
||||
:param force: If True, the cache will be updated even though the directory
|
||||
does not appear to have changed according to its modification timestamp.
|
||||
:return: True if the packs have been updated so there is new information,
|
||||
False if there was no change to the pack database"""
|
||||
stat = os.stat(self.root_path())
|
||||
if not force and stat.st_mtime <= self._st_mtime:
|
||||
return False
|
||||
# END abort early on no change
|
||||
self._st_mtime = stat.st_mtime
|
||||
|
||||
# packs are supposed to be prefixed with pack- by git-convention
|
||||
# get all pack files, figure out what changed
|
||||
pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
|
||||
our_pack_files = {item[1].pack().path() for item in self._entities}
|
||||
|
||||
# new packs
|
||||
for pack_file in (pack_files - our_pack_files):
|
||||
# init the hit-counter/priority with the size, a good measure for hit-
|
||||
# probability. Its implemented so that only 12 bytes will be read
|
||||
entity = PackEntity(pack_file)
|
||||
self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
|
||||
# END for each new packfile
|
||||
|
||||
# removed packs
|
||||
for pack_file in (our_pack_files - pack_files):
|
||||
del_index = -1
|
||||
for i, item in enumerate(self._entities):
|
||||
if item[1].pack().path() == pack_file:
|
||||
del_index = i
|
||||
break
|
||||
# END found index
|
||||
# END for each entity
|
||||
assert del_index != -1
|
||||
del(self._entities[del_index])
|
||||
# END for each removed pack
|
||||
|
||||
# reinitialize prioritiess
|
||||
self._sort_entities()
|
||||
return True
|
||||
|
||||
def entities(self):
|
||||
""":return: list of pack entities operated upon by this database"""
|
||||
return [item[1] for item in self._entities]
|
||||
|
||||
def partial_to_complete_sha(self, partial_binsha, canonical_length):
|
||||
""":return: 20 byte sha as inferred by the given partial binary sha
|
||||
:param partial_binsha: binary sha with less than 20 bytes
|
||||
:param canonical_length: length of the corresponding canonical representation.
|
||||
It is required as binary sha's cannot display whether the original hex sha
|
||||
had an odd or even number of characters
|
||||
:raise AmbiguousObjectName:
|
||||
:raise BadObject: """
|
||||
candidate = None
|
||||
for item in self._entities:
|
||||
item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
|
||||
if item_index is not None:
|
||||
sha = item[1].index().sha(item_index)
|
||||
if candidate and candidate != sha:
|
||||
raise AmbiguousObjectName(partial_binsha)
|
||||
candidate = sha
|
||||
# END handle full sha could be found
|
||||
# END for each entity
|
||||
|
||||
if candidate:
|
||||
return candidate
|
||||
|
||||
# still not found ?
|
||||
raise BadObject(partial_binsha)
|
||||
|
||||
#} END interface
|
82
.venv/Lib/site-packages/gitdb/db/ref.py
Normal file
82
.venv/Lib/site-packages/gitdb/db/ref.py
Normal file
@ -0,0 +1,82 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
import codecs
|
||||
from gitdb.db.base import (
|
||||
CompoundDB,
|
||||
)
|
||||
|
||||
__all__ = ('ReferenceDB', )
|
||||
|
||||
|
||||
class ReferenceDB(CompoundDB):
|
||||
|
||||
"""A database consisting of database referred to in a file"""
|
||||
|
||||
# Configuration
|
||||
# Specifies the object database to use for the paths found in the alternates
|
||||
# file. If None, it defaults to the GitDB
|
||||
ObjectDBCls = None
|
||||
|
||||
def __init__(self, ref_file):
|
||||
super(ReferenceDB, self).__init__()
|
||||
self._ref_file = ref_file
|
||||
|
||||
def _set_cache_(self, attr):
|
||||
if attr == '_dbs':
|
||||
self._dbs = list()
|
||||
self._update_dbs_from_ref_file()
|
||||
else:
|
||||
super(ReferenceDB, self)._set_cache_(attr)
|
||||
# END handle attrs
|
||||
|
||||
def _update_dbs_from_ref_file(self):
|
||||
dbcls = self.ObjectDBCls
|
||||
if dbcls is None:
|
||||
# late import
|
||||
from gitdb.db.git import GitDB
|
||||
dbcls = GitDB
|
||||
# END get db type
|
||||
|
||||
# try to get as many as possible, don't fail if some are unavailable
|
||||
ref_paths = list()
|
||||
try:
|
||||
with codecs.open(self._ref_file, 'r', encoding="utf-8") as f:
|
||||
ref_paths = [l.strip() for l in f]
|
||||
except (OSError, IOError):
|
||||
pass
|
||||
# END handle alternates
|
||||
|
||||
ref_paths_set = set(ref_paths)
|
||||
cur_ref_paths_set = {db.root_path() for db in self._dbs}
|
||||
|
||||
# remove existing
|
||||
for path in (cur_ref_paths_set - ref_paths_set):
|
||||
for i, db in enumerate(self._dbs[:]):
|
||||
if db.root_path() == path:
|
||||
del(self._dbs[i])
|
||||
continue
|
||||
# END del matching db
|
||||
# END for each path to remove
|
||||
|
||||
# add new
|
||||
# sort them to maintain order
|
||||
added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p))
|
||||
for path in added_paths:
|
||||
try:
|
||||
db = dbcls(path)
|
||||
# force an update to verify path
|
||||
if isinstance(db, CompoundDB):
|
||||
db.databases()
|
||||
# END verification
|
||||
self._dbs.append(db)
|
||||
except Exception:
|
||||
# ignore invalid paths or issues
|
||||
pass
|
||||
# END for each path to add
|
||||
|
||||
def update_cache(self, force=False):
|
||||
# re-read alternates and update databases
|
||||
self._update_dbs_from_ref_file()
|
||||
return super(ReferenceDB, self).update_cache(force)
|
Reference in New Issue
Block a user