first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@ -0,0 +1,11 @@
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
from gitdb.db.base import *
from gitdb.db.loose import *
from gitdb.db.mem import *
from gitdb.db.pack import *
from gitdb.db.git import *
from gitdb.db.ref import *

View File

@ -0,0 +1,278 @@
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Contains implementations of database retrieveing objects"""
from gitdb.util import (
join,
LazyMixin,
hex_to_bin
)
from gitdb.utils.encoding import force_text
from gitdb.exc import (
BadObject,
AmbiguousObjectName
)
from itertools import chain
from functools import reduce
__all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB')
class ObjectDBR(object):
"""Defines an interface for object database lookup.
Objects are identified either by their 20 byte bin sha"""
def __contains__(self, sha):
return self.has_obj
#{ Query Interface
def has_object(self, sha):
"""
Whether the object identified by the given 20 bytes
binary sha is contained in the database
:return: True if the object identified by the given 20 bytes
binary sha is contained in the database"""
raise NotImplementedError("To be implemented in subclass")
def info(self, sha):
""" :return: OInfo instance
:param sha: bytes binary sha
:raise BadObject:"""
raise NotImplementedError("To be implemented in subclass")
def stream(self, sha):
""":return: OStream instance
:param sha: 20 bytes binary sha
:raise BadObject:"""
raise NotImplementedError("To be implemented in subclass")
def size(self):
""":return: amount of objects in this database"""
raise NotImplementedError()
def sha_iter(self):
"""Return iterator yielding 20 byte shas for all objects in this data base"""
raise NotImplementedError()
#} END query interface
class ObjectDBW(object):
"""Defines an interface to create objects in the database"""
def __init__(self, *args, **kwargs):
self._ostream = None
#{ Edit Interface
def set_ostream(self, stream):
"""
Adjusts the stream to which all data should be sent when storing new objects
:param stream: if not None, the stream to use, if None the default stream
will be used.
:return: previously installed stream, or None if there was no override
:raise TypeError: if the stream doesn't have the supported functionality"""
cstream = self._ostream
self._ostream = stream
return cstream
def ostream(self):
"""
Return the output stream
:return: overridden output stream this instance will write to, or None
if it will write to the default stream"""
return self._ostream
def store(self, istream):
"""
Create a new object in the database
:return: the input istream object with its sha set to its corresponding value
:param istream: IStream compatible instance. If its sha is already set
to a value, the object will just be stored in the our database format,
in which case the input stream is expected to be in object format ( header + contents ).
:raise IOError: if data could not be written"""
raise NotImplementedError("To be implemented in subclass")
#} END edit interface
class FileDBBase(object):
"""Provides basic facilities to retrieve files of interest, including
caching facilities to help mapping hexsha's to objects"""
def __init__(self, root_path):
"""Initialize this instance to look for its files at the given root path
All subsequent operations will be relative to this path
:raise InvalidDBRoot:
**Note:** The base will not perform any accessablity checking as the base
might not yet be accessible, but become accessible before the first
access."""
super(FileDBBase, self).__init__()
self._root_path = root_path
#{ Interface
def root_path(self):
""":return: path at which this db operates"""
return self._root_path
def db_path(self, rela_path):
"""
:return: the given relative path relative to our database root, allowing
to pontentially access datafiles"""
return join(self._root_path, force_text(rela_path))
#} END interface
class CachingDB(object):
"""A database which uses caches to speed-up access"""
#{ Interface
def update_cache(self, force=False):
"""
Call this method if the underlying data changed to trigger an update
of the internal caching structures.
:param force: if True, the update must be performed. Otherwise the implementation
may decide not to perform an update if it thinks nothing has changed.
:return: True if an update was performed as something change indeed"""
# END interface
def _databases_recursive(database, output):
"""Fill output list with database from db, in order. Deals with Loose, Packed
and compound databases."""
if isinstance(database, CompoundDB):
dbs = database.databases()
output.extend(db for db in dbs if not isinstance(db, CompoundDB))
for cdb in (db for db in dbs if isinstance(db, CompoundDB)):
_databases_recursive(cdb, output)
else:
output.append(database)
# END handle database type
class CompoundDB(ObjectDBR, LazyMixin, CachingDB):
"""A database which delegates calls to sub-databases.
Databases are stored in the lazy-loaded _dbs attribute.
Define _set_cache_ to update it with your databases"""
def _set_cache_(self, attr):
if attr == '_dbs':
self._dbs = list()
elif attr == '_db_cache':
self._db_cache = dict()
else:
super(CompoundDB, self)._set_cache_(attr)
def _db_query(self, sha):
""":return: database containing the given 20 byte sha
:raise BadObject:"""
# most databases use binary representations, prevent converting
# it every time a database is being queried
try:
return self._db_cache[sha]
except KeyError:
pass
# END first level cache
for db in self._dbs:
if db.has_object(sha):
self._db_cache[sha] = db
return db
# END for each database
raise BadObject(sha)
#{ ObjectDBR interface
def has_object(self, sha):
try:
self._db_query(sha)
return True
except BadObject:
return False
# END handle exceptions
def info(self, sha):
return self._db_query(sha).info(sha)
def stream(self, sha):
return self._db_query(sha).stream(sha)
def size(self):
""":return: total size of all contained databases"""
return reduce(lambda x, y: x + y, (db.size() for db in self._dbs), 0)
def sha_iter(self):
return chain(*(db.sha_iter() for db in self._dbs))
#} END object DBR Interface
#{ Interface
def databases(self):
""":return: tuple of database instances we use for lookups"""
return tuple(self._dbs)
def update_cache(self, force=False):
# something might have changed, clear everything
self._db_cache.clear()
stat = False
for db in self._dbs:
if isinstance(db, CachingDB):
stat |= db.update_cache(force)
# END if is caching db
# END for each database to update
return stat
def partial_to_complete_sha_hex(self, partial_hexsha):
"""
:return: 20 byte binary sha1 from the given less-than-40 byte hexsha (bytes or str)
:param partial_hexsha: hexsha with less than 40 byte
:raise AmbiguousObjectName: """
databases = list()
_databases_recursive(self, databases)
partial_hexsha = force_text(partial_hexsha)
len_partial_hexsha = len(partial_hexsha)
if len_partial_hexsha % 2 != 0:
partial_binsha = hex_to_bin(partial_hexsha + "0")
else:
partial_binsha = hex_to_bin(partial_hexsha)
# END assure successful binary conversion
candidate = None
for db in databases:
full_bin_sha = None
try:
if hasattr(db, 'partial_to_complete_sha_hex'):
full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
else:
full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
# END handle database type
except BadObject:
continue
# END ignore bad objects
if full_bin_sha:
if candidate and candidate != full_bin_sha:
raise AmbiguousObjectName(partial_hexsha)
candidate = full_bin_sha
# END handle candidate
# END for each db
if not candidate:
raise BadObject(partial_binsha)
return candidate
#} END interface

View File

@ -0,0 +1,85 @@
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
from gitdb.db.base import (
CompoundDB,
ObjectDBW,
FileDBBase
)
from gitdb.db.loose import LooseObjectDB
from gitdb.db.pack import PackedDB
from gitdb.db.ref import ReferenceDB
from gitdb.exc import InvalidDBRoot
import os
__all__ = ('GitDB', )
class GitDB(FileDBBase, ObjectDBW, CompoundDB):
"""A git-style object database, which contains all objects in the 'objects'
subdirectory
``IMPORTANT``: The usage of this implementation is highly discouraged as it fails to release file-handles.
This can be a problem with long-running processes and/or big repositories.
"""
# Configuration
PackDBCls = PackedDB
LooseDBCls = LooseObjectDB
ReferenceDBCls = ReferenceDB
# Directories
packs_dir = 'pack'
loose_dir = ''
alternates_dir = os.path.join('info', 'alternates')
def __init__(self, root_path):
"""Initialize ourselves on a git objects directory"""
super(GitDB, self).__init__(root_path)
def _set_cache_(self, attr):
if attr == '_dbs' or attr == '_loose_db':
self._dbs = list()
loose_db = None
for subpath, dbcls in ((self.packs_dir, self.PackDBCls),
(self.loose_dir, self.LooseDBCls),
(self.alternates_dir, self.ReferenceDBCls)):
path = self.db_path(subpath)
if os.path.exists(path):
self._dbs.append(dbcls(path))
if dbcls is self.LooseDBCls:
loose_db = self._dbs[-1]
# END remember loose db
# END check path exists
# END for each db type
# should have at least one subdb
if not self._dbs:
raise InvalidDBRoot(self.root_path())
# END handle error
# we the first one should have the store method
assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality"
# finally set the value
self._loose_db = loose_db
else:
super(GitDB, self)._set_cache_(attr)
# END handle attrs
#{ ObjectDBW interface
def store(self, istream):
return self._loose_db.store(istream)
def ostream(self):
return self._loose_db.ostream()
def set_ostream(self, ostream):
return self._loose_db.set_ostream(ostream)
#} END objectdbw interface

View File

@ -0,0 +1,258 @@
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
from gitdb.db.base import (
FileDBBase,
ObjectDBR,
ObjectDBW
)
from gitdb.exc import (
BadObject,
AmbiguousObjectName
)
from gitdb.stream import (
DecompressMemMapReader,
FDCompressedSha1Writer,
FDStream,
Sha1Writer
)
from gitdb.base import (
OStream,
OInfo
)
from gitdb.util import (
file_contents_ro_filepath,
ENOENT,
hex_to_bin,
bin_to_hex,
exists,
chmod,
isdir,
isfile,
remove,
mkdir,
rename,
dirname,
basename,
join
)
from gitdb.fun import (
chunk_size,
loose_object_header_info,
write_object,
stream_copy
)
from gitdb.utils.encoding import force_bytes
import tempfile
import os
import sys
__all__ = ('LooseObjectDB', )
class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
"""A database which operates on loose object files"""
# CONFIGURATION
# chunks in which data will be copied between streams
stream_chunk_size = chunk_size
# On windows we need to keep it writable, otherwise it cannot be removed
# either
new_objects_mode = int("444", 8)
if os.name == 'nt':
new_objects_mode = int("644", 8)
def __init__(self, root_path):
super(LooseObjectDB, self).__init__(root_path)
self._hexsha_to_file = dict()
# Additional Flags - might be set to 0 after the first failure
# Depending on the root, this might work for some mounts, for others not, which
# is why it is per instance
self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
#{ Interface
def object_path(self, hexsha):
"""
:return: path at which the object with the given hexsha would be stored,
relative to the database root"""
return join(hexsha[:2], hexsha[2:])
def readable_db_object_path(self, hexsha):
"""
:return: readable object path to the object identified by hexsha
:raise BadObject: If the object file does not exist"""
try:
return self._hexsha_to_file[hexsha]
except KeyError:
pass
# END ignore cache misses
# try filesystem
path = self.db_path(self.object_path(hexsha))
if exists(path):
self._hexsha_to_file[hexsha] = path
return path
# END handle cache
raise BadObject(hexsha)
def partial_to_complete_sha_hex(self, partial_hexsha):
""":return: 20 byte binary sha1 string which matches the given name uniquely
:param name: hexadecimal partial name (bytes or ascii string)
:raise AmbiguousObjectName:
:raise BadObject: """
candidate = None
for binsha in self.sha_iter():
if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)):
# it can't ever find the same object twice
if candidate is not None:
raise AmbiguousObjectName(partial_hexsha)
candidate = binsha
# END for each object
if candidate is None:
raise BadObject(partial_hexsha)
return candidate
#} END interface
def _map_loose_object(self, sha):
"""
:return: memory map of that file to allow random read access
:raise BadObject: if object could not be located"""
db_path = self.db_path(self.object_path(bin_to_hex(sha)))
try:
return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
except OSError as e:
if e.errno != ENOENT:
# try again without noatime
try:
return file_contents_ro_filepath(db_path)
except OSError as new_e:
raise BadObject(sha) from new_e
# didn't work because of our flag, don't try it again
self._fd_open_flags = 0
else:
raise BadObject(sha) from e
# END handle error
# END exception handling
def set_ostream(self, stream):
""":raise TypeError: if the stream does not support the Sha1Writer interface"""
if stream is not None and not isinstance(stream, Sha1Writer):
raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
return super(LooseObjectDB, self).set_ostream(stream)
def info(self, sha):
m = self._map_loose_object(sha)
try:
typ, size = loose_object_header_info(m)
return OInfo(sha, typ, size)
finally:
if hasattr(m, 'close'):
m.close()
# END assure release of system resources
def stream(self, sha):
m = self._map_loose_object(sha)
type, size, stream = DecompressMemMapReader.new(m, close_on_deletion=True)
return OStream(sha, type, size, stream)
def has_object(self, sha):
try:
self.readable_db_object_path(bin_to_hex(sha))
return True
except BadObject:
return False
# END check existence
def store(self, istream):
"""note: The sha we produce will be hex by nature"""
tmp_path = None
writer = self.ostream()
if writer is None:
# open a tmp file to write the data to
fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
if istream.binsha is None:
writer = FDCompressedSha1Writer(fd)
else:
writer = FDStream(fd)
# END handle direct stream copies
# END handle custom writer
try:
try:
if istream.binsha is not None:
# copy as much as possible, the actual uncompressed item size might
# be smaller than the compressed version
stream_copy(istream.read, writer.write, sys.maxsize, self.stream_chunk_size)
else:
# write object with header, we have to make a new one
write_object(istream.type, istream.size, istream.read, writer.write,
chunk_size=self.stream_chunk_size)
# END handle direct stream copies
finally:
if tmp_path:
writer.close()
# END assure target stream is closed
except:
if tmp_path:
os.remove(tmp_path)
raise
# END assure tmpfile removal on error
hexsha = None
if istream.binsha:
hexsha = istream.hexsha
else:
hexsha = writer.sha(as_hex=True)
# END handle sha
if tmp_path:
obj_path = self.db_path(self.object_path(hexsha))
obj_dir = dirname(obj_path)
if not isdir(obj_dir):
mkdir(obj_dir)
# END handle destination directory
# rename onto existing doesn't work on NTFS
if isfile(obj_path):
remove(tmp_path)
else:
rename(tmp_path, obj_path)
# end rename only if needed
# make sure its readable for all ! It started out as rw-- tmp file
# but needs to be rwrr
chmod(obj_path, self.new_objects_mode)
# END handle dry_run
istream.binsha = hex_to_bin(hexsha)
return istream
def sha_iter(self):
# find all files which look like an object, extract sha from there
for root, dirs, files in os.walk(self.root_path()):
root_base = basename(root)
if len(root_base) != 2:
continue
for f in files:
if len(f) != 38:
continue
yield hex_to_bin(root_base + f)
# END for each file
# END for each walk iteration
def size(self):
return len(tuple(self.sha_iter()))

View File

@ -0,0 +1,110 @@
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Contains the MemoryDatabase implementation"""
from gitdb.db.loose import LooseObjectDB
from gitdb.db.base import (
ObjectDBR,
ObjectDBW
)
from gitdb.base import (
OStream,
IStream,
)
from gitdb.exc import (
BadObject,
UnsupportedOperation
)
from gitdb.stream import (
ZippedStoreShaWriter,
DecompressMemMapReader,
)
from io import BytesIO
__all__ = ("MemoryDB", )
class MemoryDB(ObjectDBR, ObjectDBW):
"""A memory database stores everything to memory, providing fast IO and object
retrieval. It should be used to buffer results and obtain SHAs before writing
it to the actual physical storage, as it allows to query whether object already
exists in the target storage before introducing actual IO"""
def __init__(self):
super(MemoryDB, self).__init__()
self._db = LooseObjectDB("path/doesnt/matter")
# maps 20 byte shas to their OStream objects
self._cache = dict()
def set_ostream(self, stream):
raise UnsupportedOperation("MemoryDB's always stream into memory")
def store(self, istream):
zstream = ZippedStoreShaWriter()
self._db.set_ostream(zstream)
istream = self._db.store(istream)
zstream.close() # close to flush
zstream.seek(0)
# don't provide a size, the stream is written in object format, hence the
# header needs decompression
decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False)
self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream)
return istream
def has_object(self, sha):
return sha in self._cache
def info(self, sha):
# we always return streams, which are infos as well
return self.stream(sha)
def stream(self, sha):
try:
ostream = self._cache[sha]
# rewind stream for the next one to read
ostream.stream.seek(0)
return ostream
except KeyError as e:
raise BadObject(sha) from e
# END exception handling
def size(self):
return len(self._cache)
def sha_iter(self):
return self._cache.keys()
#{ Interface
def stream_copy(self, sha_iter, odb):
"""Copy the streams as identified by sha's yielded by sha_iter into the given odb
The streams will be copied directly
**Note:** the object will only be written if it did not exist in the target db
:return: amount of streams actually copied into odb. If smaller than the amount
of input shas, one or more objects did already exist in odb"""
count = 0
for sha in sha_iter:
if odb.has_object(sha):
continue
# END check object existence
ostream = self.stream(sha)
# compressed data including header
sio = BytesIO(ostream.stream.data())
istream = IStream(ostream.type, ostream.size, sio, sha)
odb.store(istream)
count += 1
# END for each sha
return count
#} END interface

View File

@ -0,0 +1,206 @@
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Module containing a database to deal with packs"""
from gitdb.db.base import (
FileDBBase,
ObjectDBR,
CachingDB
)
from gitdb.util import LazyMixin
from gitdb.exc import (
BadObject,
UnsupportedOperation,
AmbiguousObjectName
)
from gitdb.pack import PackEntity
from functools import reduce
import os
import glob
__all__ = ('PackedDB', )
#{ Utilities
class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
"""A database operating on a set of object packs"""
# sort the priority list every N queries
# Higher values are better, performance tests don't show this has
# any effect, but it should have one
_sort_interval = 500
def __init__(self, root_path):
super(PackedDB, self).__init__(root_path)
# list of lists with three items:
# * hits - number of times the pack was hit with a request
# * entity - Pack entity instance
# * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
# self._entities = list() # lazy loaded list
self._hit_count = 0 # amount of hits
self._st_mtime = 0 # last modification data of our root path
def _set_cache_(self, attr):
if attr == '_entities':
self._entities = list()
self.update_cache(force=True)
# END handle entities initialization
def _sort_entities(self):
self._entities.sort(key=lambda l: l[0], reverse=True)
def _pack_info(self, sha):
""":return: tuple(entity, index) for an item at the given sha
:param sha: 20 or 40 byte sha
:raise BadObject:
**Note:** This method is not thread-safe, but may be hit in multi-threaded
operation. The worst thing that can happen though is a counter that
was not incremented, or the list being in wrong order. So we safe
the time for locking here, lets see how that goes"""
# presort ?
if self._hit_count % self._sort_interval == 0:
self._sort_entities()
# END update sorting
for item in self._entities:
index = item[2](sha)
if index is not None:
item[0] += 1 # one hit for you
self._hit_count += 1 # general hit count
return (item[1], index)
# END index found in pack
# END for each item
# no hit, see whether we have to update packs
# NOTE: considering packs don't change very often, we safe this call
# and leave it to the super-caller to trigger that
raise BadObject(sha)
#{ Object DB Read
def has_object(self, sha):
try:
self._pack_info(sha)
return True
except BadObject:
return False
# END exception handling
def info(self, sha):
entity, index = self._pack_info(sha)
return entity.info_at_index(index)
def stream(self, sha):
entity, index = self._pack_info(sha)
return entity.stream_at_index(index)
def sha_iter(self):
for entity in self.entities():
index = entity.index()
sha_by_index = index.sha
for index in range(index.size()):
yield sha_by_index(index)
# END for each index
# END for each entity
def size(self):
sizes = [item[1].index().size() for item in self._entities]
return reduce(lambda x, y: x + y, sizes, 0)
#} END object db read
#{ object db write
def store(self, istream):
"""Storing individual objects is not feasible as a pack is designed to
hold multiple objects. Writing or rewriting packs for single objects is
inefficient"""
raise UnsupportedOperation()
#} END object db write
#{ Interface
def update_cache(self, force=False):
"""
Update our cache with the acutally existing packs on disk. Add new ones,
and remove deleted ones. We keep the unchanged ones
:param force: If True, the cache will be updated even though the directory
does not appear to have changed according to its modification timestamp.
:return: True if the packs have been updated so there is new information,
False if there was no change to the pack database"""
stat = os.stat(self.root_path())
if not force and stat.st_mtime <= self._st_mtime:
return False
# END abort early on no change
self._st_mtime = stat.st_mtime
# packs are supposed to be prefixed with pack- by git-convention
# get all pack files, figure out what changed
pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
our_pack_files = {item[1].pack().path() for item in self._entities}
# new packs
for pack_file in (pack_files - our_pack_files):
# init the hit-counter/priority with the size, a good measure for hit-
# probability. Its implemented so that only 12 bytes will be read
entity = PackEntity(pack_file)
self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
# END for each new packfile
# removed packs
for pack_file in (our_pack_files - pack_files):
del_index = -1
for i, item in enumerate(self._entities):
if item[1].pack().path() == pack_file:
del_index = i
break
# END found index
# END for each entity
assert del_index != -1
del(self._entities[del_index])
# END for each removed pack
# reinitialize prioritiess
self._sort_entities()
return True
def entities(self):
""":return: list of pack entities operated upon by this database"""
return [item[1] for item in self._entities]
def partial_to_complete_sha(self, partial_binsha, canonical_length):
""":return: 20 byte sha as inferred by the given partial binary sha
:param partial_binsha: binary sha with less than 20 bytes
:param canonical_length: length of the corresponding canonical representation.
It is required as binary sha's cannot display whether the original hex sha
had an odd or even number of characters
:raise AmbiguousObjectName:
:raise BadObject: """
candidate = None
for item in self._entities:
item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
if item_index is not None:
sha = item[1].index().sha(item_index)
if candidate and candidate != sha:
raise AmbiguousObjectName(partial_binsha)
candidate = sha
# END handle full sha could be found
# END for each entity
if candidate:
return candidate
# still not found ?
raise BadObject(partial_binsha)
#} END interface

View File

@ -0,0 +1,82 @@
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
import codecs
from gitdb.db.base import (
CompoundDB,
)
__all__ = ('ReferenceDB', )
class ReferenceDB(CompoundDB):
"""A database consisting of database referred to in a file"""
# Configuration
# Specifies the object database to use for the paths found in the alternates
# file. If None, it defaults to the GitDB
ObjectDBCls = None
def __init__(self, ref_file):
super(ReferenceDB, self).__init__()
self._ref_file = ref_file
def _set_cache_(self, attr):
if attr == '_dbs':
self._dbs = list()
self._update_dbs_from_ref_file()
else:
super(ReferenceDB, self)._set_cache_(attr)
# END handle attrs
def _update_dbs_from_ref_file(self):
dbcls = self.ObjectDBCls
if dbcls is None:
# late import
from gitdb.db.git import GitDB
dbcls = GitDB
# END get db type
# try to get as many as possible, don't fail if some are unavailable
ref_paths = list()
try:
with codecs.open(self._ref_file, 'r', encoding="utf-8") as f:
ref_paths = [l.strip() for l in f]
except (OSError, IOError):
pass
# END handle alternates
ref_paths_set = set(ref_paths)
cur_ref_paths_set = {db.root_path() for db in self._dbs}
# remove existing
for path in (cur_ref_paths_set - ref_paths_set):
for i, db in enumerate(self._dbs[:]):
if db.root_path() == path:
del(self._dbs[i])
continue
# END del matching db
# END for each path to remove
# add new
# sort them to maintain order
added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p))
for path in added_paths:
try:
db = dbcls(path)
# force an update to verify path
if isinstance(db, CompoundDB):
db.databases()
# END verification
self._dbs.append(db)
except Exception:
# ignore invalid paths or issues
pass
# END for each path to add
def update_cache(self, force=False):
# re-read alternates and update databases
self._update_dbs_from_ref_file()
return super(ReferenceDB, self).update_cache(force)