first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@ -0,0 +1,4 @@
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php

View File

@ -0,0 +1,207 @@
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Utilities used in ODB testing"""
from gitdb import OStream
import sys
import random
from array import array
from io import BytesIO
import glob
import unittest
import tempfile
import shutil
import os
import gc
import logging
from functools import wraps
#{ Bases
class TestBase(unittest.TestCase):
"""Base class for all tests
TestCase providing access to readonly repositories using the following member variables.
* gitrepopath
* read-only base path of the git source repository, i.e. .../git/.git
"""
#{ Invvariants
k_env_git_repo = "GITDB_TEST_GIT_REPO_BASE"
#} END invariants
@classmethod
def setUpClass(cls):
try:
super(TestBase, cls).setUpClass()
except AttributeError:
pass
cls.gitrepopath = os.environ.get(cls.k_env_git_repo)
if not cls.gitrepopath:
logging.info(
"You can set the %s environment variable to a .git repository of your choice - defaulting to the gitdb repository", cls.k_env_git_repo)
ospd = os.path.dirname
cls.gitrepopath = os.path.join(ospd(ospd(ospd(__file__))), '.git')
# end assure gitrepo is set
assert cls.gitrepopath.endswith('.git')
#} END bases
#{ Decorators
def skip_on_travis_ci(func):
"""All tests decorated with this one will raise SkipTest when run on travis ci.
Use it to workaround difficult to solve issues
NOTE: copied from bcore (https://github.com/Byron/bcore)"""
@wraps(func)
def wrapper(self, *args, **kwargs):
if 'TRAVIS' in os.environ:
import nose
raise nose.SkipTest("Cannot run on travis-ci")
# end check for travis ci
return func(self, *args, **kwargs)
# end wrapper
return wrapper
def with_rw_directory(func):
"""Create a temporary directory which can be written to, remove it if the
test succeeds, but leave it otherwise to aid additional debugging"""
def wrapper(self):
path = tempfile.mktemp(prefix=func.__name__)
os.mkdir(path)
keep = False
try:
try:
return func(self, path)
except Exception:
sys.stderr.write("Test {}.{} failed, output is at {!r}\n".format(type(self).__name__, func.__name__, path))
keep = True
raise
finally:
# Need to collect here to be sure all handles have been closed. It appears
# a windows-only issue. In fact things should be deleted, as well as
# memory maps closed, once objects go out of scope. For some reason
# though this is not the case here unless we collect explicitly.
if not keep:
gc.collect()
shutil.rmtree(path)
# END handle exception
# END wrapper
wrapper.__name__ = func.__name__
return wrapper
def with_packs_rw(func):
"""Function that provides a path into which the packs for testing should be
copied. Will pass on the path to the actual function afterwards"""
def wrapper(self, path):
src_pack_glob = fixture_path('packs/*')
copy_files_globbed(src_pack_glob, path, hard_link_ok=True)
return func(self, path)
# END wrapper
wrapper.__name__ = func.__name__
return wrapper
#} END decorators
#{ Routines
def fixture_path(relapath=''):
""":return: absolute path into the fixture directory
:param relapath: relative path into the fixtures directory, or ''
to obtain the fixture directory itself"""
return os.path.join(os.path.dirname(__file__), 'fixtures', relapath)
def copy_files_globbed(source_glob, target_dir, hard_link_ok=False):
"""Copy all files found according to the given source glob into the target directory
:param hard_link_ok: if True, hard links will be created if possible. Otherwise
the files will be copied"""
for src_file in glob.glob(source_glob):
if hard_link_ok and hasattr(os, 'link'):
target = os.path.join(target_dir, os.path.basename(src_file))
try:
os.link(src_file, target)
except OSError:
shutil.copy(src_file, target_dir)
# END handle cross device links ( and resulting failure )
else:
shutil.copy(src_file, target_dir)
# END try hard link
# END for each file to copy
def make_bytes(size_in_bytes, randomize=False):
""":return: string with given size in bytes
:param randomize: try to produce a very random stream"""
actual_size = size_in_bytes // 4
producer = range(actual_size)
if randomize:
producer = list(producer)
random.shuffle(producer)
# END randomize
a = array('i', producer)
return a.tobytes()
def make_object(type, data):
""":return: bytes resembling an uncompressed object"""
odata = "blob %i\0" % len(data)
return odata.encode("ascii") + data
def make_memory_file(size_in_bytes, randomize=False):
""":return: tuple(size_of_stream, stream)
:param randomize: try to produce a very random stream"""
d = make_bytes(size_in_bytes, randomize)
return len(d), BytesIO(d)
#} END routines
#{ Stream Utilities
class DummyStream(object):
def __init__(self):
self.was_read = False
self.bytes = 0
self.closed = False
def read(self, size):
self.was_read = True
self.bytes = size
def close(self):
self.closed = True
def _assert(self):
assert self.was_read
class DeriveTest(OStream):
def __init__(self, sha, type, size, stream, *args, **kwargs):
self.myarg = kwargs.pop('myarg')
self.args = args
def _assert(self):
assert self.args
assert self.myarg
#} END stream utilitiess

View File

@ -0,0 +1,105 @@
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Test for object db"""
from gitdb.test.lib import (
TestBase,
DummyStream,
DeriveTest,
)
from gitdb import (
OInfo,
OPackInfo,
ODeltaPackInfo,
OStream,
OPackStream,
ODeltaPackStream,
IStream
)
from gitdb.util import (
NULL_BIN_SHA
)
from gitdb.typ import (
str_blob_type
)
class TestBaseTypes(TestBase):
def test_streams(self):
# test info
sha = NULL_BIN_SHA
s = 20
blob_id = 3
info = OInfo(sha, str_blob_type, s)
assert info.binsha == sha
assert info.type == str_blob_type
assert info.type_id == blob_id
assert info.size == s
# test pack info
# provides type_id
pinfo = OPackInfo(0, blob_id, s)
assert pinfo.type == str_blob_type
assert pinfo.type_id == blob_id
assert pinfo.pack_offset == 0
dpinfo = ODeltaPackInfo(0, blob_id, s, sha)
assert dpinfo.type == str_blob_type
assert dpinfo.type_id == blob_id
assert dpinfo.delta_info == sha
assert dpinfo.pack_offset == 0
# test ostream
stream = DummyStream()
ostream = OStream(*(info + (stream, )))
assert ostream.stream is stream
ostream.read(15)
stream._assert()
assert stream.bytes == 15
ostream.read(20)
assert stream.bytes == 20
# test packstream
postream = OPackStream(*(pinfo + (stream, )))
assert postream.stream is stream
postream.read(10)
stream._assert()
assert stream.bytes == 10
# test deltapackstream
dpostream = ODeltaPackStream(*(dpinfo + (stream, )))
dpostream.stream is stream
dpostream.read(5)
stream._assert()
assert stream.bytes == 5
# derive with own args
DeriveTest(sha, str_blob_type, s, stream, 'mine', myarg=3)._assert()
# test istream
istream = IStream(str_blob_type, s, stream)
assert istream.binsha == None
istream.binsha = sha
assert istream.binsha == sha
assert len(istream.binsha) == 20
assert len(istream.hexsha) == 40
assert istream.size == s
istream.size = s * 2
istream.size == s * 2
assert istream.type == str_blob_type
istream.type = "something"
assert istream.type == "something"
assert istream.stream is stream
istream.stream = None
assert istream.stream is None
assert istream.error is None
istream.error = Exception()
assert isinstance(istream.error, Exception)

View File

@ -0,0 +1,43 @@
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Module with examples from the tutorial section of the docs"""
import os
from gitdb.test.lib import TestBase
from gitdb import IStream
from gitdb.db import LooseObjectDB
from io import BytesIO
class TestExamples(TestBase):
def test_base(self):
ldb = LooseObjectDB(os.path.join(self.gitrepopath, 'objects'))
for sha1 in ldb.sha_iter():
oinfo = ldb.info(sha1)
ostream = ldb.stream(sha1)
assert oinfo[:3] == ostream[:3]
assert len(ostream.read()) == ostream.size
assert ldb.has_object(oinfo.binsha)
# END for each sha in database
# assure we close all files
try:
del(ostream)
del(oinfo)
except UnboundLocalError:
pass
# END ignore exception if there are no loose objects
data = "my data".encode("ascii")
istream = IStream("blob", len(data), BytesIO(data))
# the object does not yet have a sha
assert istream.binsha is None
ldb.store(istream)
# now the sha is set
assert len(istream.binsha) == 20
assert ldb.has_object(istream.binsha)

View File

@ -0,0 +1,249 @@
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Test everything about packs reading and writing"""
from gitdb.test.lib import (
TestBase,
with_rw_directory,
fixture_path
)
from gitdb.stream import DeltaApplyReader
from gitdb.pack import (
PackEntity,
PackIndexFile,
PackFile
)
from gitdb.base import (
OInfo,
OStream,
)
from gitdb.fun import delta_types
from gitdb.exc import UnsupportedOperation
from gitdb.util import to_bin_sha
from nose import SkipTest
import os
import tempfile
#{ Utilities
def bin_sha_from_filename(filename):
return to_bin_sha(os.path.splitext(os.path.basename(filename))[0][5:])
#} END utilities
class TestPack(TestBase):
packindexfile_v1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx'), 1, 67)
packindexfile_v2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx'), 2, 30)
packindexfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx'), 2, 42)
packfile_v2_1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack'), 2, packindexfile_v1[2])
packfile_v2_2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack'), 2, packindexfile_v2[2])
packfile_v2_3_ascii = (
fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack'), 2, packindexfile_v2_3_ascii[2])
def _assert_index_file(self, index, version, size):
assert index.packfile_checksum() != index.indexfile_checksum()
assert len(index.packfile_checksum()) == 20
assert len(index.indexfile_checksum()) == 20
assert index.version() == version
assert index.size() == size
assert len(index.offsets()) == size
# get all data of all objects
for oidx in range(index.size()):
sha = index.sha(oidx)
assert oidx == index.sha_to_index(sha)
entry = index.entry(oidx)
assert len(entry) == 3
assert entry[0] == index.offset(oidx)
assert entry[1] == sha
assert entry[2] == index.crc(oidx)
# verify partial sha
for l in (4, 8, 11, 17, 20):
assert index.partial_sha_to_index(sha[:l], l * 2) == oidx
# END for each object index in indexfile
self.assertRaises(ValueError, index.partial_sha_to_index, "\0", 2)
def _assert_pack_file(self, pack, version, size):
assert pack.version() == 2
assert pack.size() == size
assert len(pack.checksum()) == 20
num_obj = 0
for obj in pack.stream_iter():
num_obj += 1
info = pack.info(obj.pack_offset)
stream = pack.stream(obj.pack_offset)
assert info.pack_offset == stream.pack_offset
assert info.type_id == stream.type_id
assert hasattr(stream, 'read')
# it should be possible to read from both streams
assert obj.read() == stream.read()
streams = pack.collect_streams(obj.pack_offset)
assert streams
# read the stream
try:
dstream = DeltaApplyReader.new(streams)
except ValueError:
# ignore these, old git versions use only ref deltas,
# which we havent resolved ( as we are without an index )
# Also ignore non-delta streams
continue
# END get deltastream
# read all
data = dstream.read()
assert len(data) == dstream.size
# test seek
dstream.seek(0)
assert dstream.read() == data
# read chunks
# NOTE: the current implementation is safe, it basically transfers
# all calls to the underlying memory map
# END for each object
assert num_obj == size
def test_pack_index(self):
# check version 1 and 2
for indexfile, version, size in (self.packindexfile_v1, self.packindexfile_v2):
index = PackIndexFile(indexfile)
self._assert_index_file(index, version, size)
# END run tests
def test_pack(self):
# there is this special version 3, but apparently its like 2 ...
for packfile, version, size in (self.packfile_v2_3_ascii, self.packfile_v2_1, self.packfile_v2_2):
pack = PackFile(packfile)
self._assert_pack_file(pack, version, size)
# END for each pack to test
@with_rw_directory
def test_pack_entity(self, rw_dir):
pack_objs = list()
for packinfo, indexinfo in ((self.packfile_v2_1, self.packindexfile_v1),
(self.packfile_v2_2, self.packindexfile_v2),
(self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)):
packfile, version, size = packinfo
indexfile, version, size = indexinfo
entity = PackEntity(packfile)
assert entity.pack().path() == packfile
assert entity.index().path() == indexfile
pack_objs.extend(entity.stream_iter())
count = 0
for info, stream in zip(entity.info_iter(), entity.stream_iter()):
count += 1
assert info.binsha == stream.binsha
assert len(info.binsha) == 20
assert info.type_id == stream.type_id
assert info.size == stream.size
# we return fully resolved items, which is implied by the sha centric access
assert not info.type_id in delta_types
# try all calls
assert len(entity.collect_streams(info.binsha))
oinfo = entity.info(info.binsha)
assert isinstance(oinfo, OInfo)
assert oinfo.binsha is not None
ostream = entity.stream(info.binsha)
assert isinstance(ostream, OStream)
assert ostream.binsha is not None
# verify the stream
try:
assert entity.is_valid_stream(info.binsha, use_crc=True)
except UnsupportedOperation:
pass
# END ignore version issues
assert entity.is_valid_stream(info.binsha, use_crc=False)
# END for each info, stream tuple
assert count == size
# END for each entity
# pack writing - write all packs into one
# index path can be None
pack_path1 = tempfile.mktemp('', "pack1", rw_dir)
pack_path2 = tempfile.mktemp('', "pack2", rw_dir)
index_path = tempfile.mktemp('', 'index', rw_dir)
iteration = 0
def rewind_streams():
for obj in pack_objs:
obj.stream.seek(0)
# END utility
for ppath, ipath, num_obj in zip((pack_path1, pack_path2),
(index_path, None),
(len(pack_objs), None)):
iwrite = None
if ipath:
ifile = open(ipath, 'wb')
iwrite = ifile.write
# END handle ip
# make sure we rewind the streams ... we work on the same objects over and over again
if iteration > 0:
rewind_streams()
# END rewind streams
iteration += 1
with open(ppath, 'wb') as pfile:
pack_sha, index_sha = PackEntity.write_pack(pack_objs, pfile.write, iwrite, object_count=num_obj)
assert os.path.getsize(ppath) > 100
# verify pack
pf = PackFile(ppath)
assert pf.size() == len(pack_objs)
assert pf.version() == PackFile.pack_version_default
assert pf.checksum() == pack_sha
pf.close()
# verify index
if ipath is not None:
ifile.close()
assert os.path.getsize(ipath) > 100
idx = PackIndexFile(ipath)
assert idx.version() == PackIndexFile.index_version_default
assert idx.packfile_checksum() == pack_sha
assert idx.indexfile_checksum() == index_sha
assert idx.size() == len(pack_objs)
idx.close()
# END verify files exist
# END for each packpath, indexpath pair
# verify the packs thoroughly
rewind_streams()
entity = PackEntity.create(pack_objs, rw_dir)
count = 0
for info in entity.info_iter():
count += 1
for use_crc in range(2):
assert entity.is_valid_stream(info.binsha, use_crc)
# END for each crc mode
# END for each info
assert count == len(pack_objs)
entity.close()
def test_pack_64(self):
# TODO: hex-edit a pack helping us to verify that we can handle 64 byte offsets
# of course without really needing such a huge pack
raise SkipTest()

View File

@ -0,0 +1,164 @@
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Test for object db"""
from gitdb.test.lib import (
TestBase,
DummyStream,
make_bytes,
make_object,
fixture_path
)
from gitdb import (
DecompressMemMapReader,
FDCompressedSha1Writer,
LooseObjectDB,
Sha1Writer,
MemoryDB,
IStream,
)
from gitdb.util import hex_to_bin
import zlib
from gitdb.typ import (
str_blob_type
)
import tempfile
import os
from io import BytesIO
class TestStream(TestBase):
"""Test stream classes"""
data_sizes = (15, 10000, 1000 * 1024 + 512)
def _assert_stream_reader(self, stream, cdata, rewind_stream=lambda s: None):
"""Make stream tests - the orig_stream is seekable, allowing it to be
rewound and reused
:param cdata: the data we expect to read from stream, the contents
:param rewind_stream: function called to rewind the stream to make it ready
for reuse"""
ns = 10
assert len(cdata) > ns - 1, "Data must be larger than %i, was %i" % (ns, len(cdata))
# read in small steps
ss = len(cdata) // ns
for i in range(ns):
data = stream.read(ss)
chunk = cdata[i * ss:(i + 1) * ss]
assert data == chunk
# END for each step
rest = stream.read()
if rest:
assert rest == cdata[-len(rest):]
# END handle rest
if isinstance(stream, DecompressMemMapReader):
assert len(stream.data()) == stream.compressed_bytes_read()
# END handle special type
rewind_stream(stream)
# read everything
rdata = stream.read()
assert rdata == cdata
if isinstance(stream, DecompressMemMapReader):
assert len(stream.data()) == stream.compressed_bytes_read()
# END handle special type
def test_decompress_reader(self):
for close_on_deletion in range(2):
for with_size in range(2):
for ds in self.data_sizes:
cdata = make_bytes(ds, randomize=False)
# zdata = zipped actual data
# cdata = original content data
# create reader
if with_size:
# need object data
zdata = zlib.compress(make_object(str_blob_type, cdata))
typ, size, reader = DecompressMemMapReader.new(zdata, close_on_deletion)
assert size == len(cdata)
assert typ == str_blob_type
# even if we don't set the size, it will be set automatically on first read
test_reader = DecompressMemMapReader(zdata, close_on_deletion=False)
assert test_reader._s == len(cdata)
else:
# here we need content data
zdata = zlib.compress(cdata)
reader = DecompressMemMapReader(zdata, close_on_deletion, len(cdata))
assert reader._s == len(cdata)
# END get reader
self._assert_stream_reader(reader, cdata, lambda r: r.seek(0))
# put in a dummy stream for closing
dummy = DummyStream()
reader._m = dummy
assert not dummy.closed
del(reader)
assert dummy.closed == close_on_deletion
# END for each datasize
# END whether size should be used
# END whether stream should be closed when deleted
def test_sha_writer(self):
writer = Sha1Writer()
assert 2 == writer.write("hi".encode("ascii"))
assert len(writer.sha(as_hex=1)) == 40
assert len(writer.sha(as_hex=0)) == 20
# make sure it does something ;)
prev_sha = writer.sha()
writer.write("hi again".encode("ascii"))
assert writer.sha() != prev_sha
def test_compressed_writer(self):
for ds in self.data_sizes:
fd, path = tempfile.mkstemp()
ostream = FDCompressedSha1Writer(fd)
data = make_bytes(ds, randomize=False)
# for now, just a single write, code doesn't care about chunking
assert len(data) == ostream.write(data)
ostream.close()
# its closed already
self.assertRaises(OSError, os.close, fd)
# read everything back, compare to data we zip
fd = os.open(path, os.O_RDONLY | getattr(os, 'O_BINARY', 0))
written_data = os.read(fd, os.path.getsize(path))
assert len(written_data) == os.path.getsize(path)
os.close(fd)
assert written_data == zlib.compress(data, 1) # best speed
os.remove(path)
# END for each os
def test_decompress_reader_special_case(self):
odb = LooseObjectDB(fixture_path('objects'))
mdb = MemoryDB()
for sha in (b'888401851f15db0eed60eb1bc29dec5ddcace911',
b'7bb839852ed5e3a069966281bb08d50012fb309b',):
ostream = odb.stream(hex_to_bin(sha))
# if there is a bug, we will be missing one byte exactly !
data = ostream.read()
assert len(data) == ostream.size
# Putting it back in should yield nothing new - after all, we have
dump = mdb.store(IStream(ostream.type, ostream.size, BytesIO(data)))
assert dump.hexsha == sha
# end for each loose object sha to test

View File

@ -0,0 +1,100 @@
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Test for object db"""
import tempfile
import os
from gitdb.test.lib import TestBase
from gitdb.util import (
to_hex_sha,
to_bin_sha,
NULL_HEX_SHA,
LockedFD
)
class TestUtils(TestBase):
def test_basics(self):
assert to_hex_sha(NULL_HEX_SHA) == NULL_HEX_SHA
assert len(to_bin_sha(NULL_HEX_SHA)) == 20
assert to_hex_sha(to_bin_sha(NULL_HEX_SHA)) == NULL_HEX_SHA.encode("ascii")
def _cmp_contents(self, file_path, data):
# raise if data from file at file_path
# does not match data string
with open(file_path, "rb") as fp:
assert fp.read() == data.encode("ascii")
def test_lockedfd(self):
my_file = tempfile.mktemp()
orig_data = "hello"
new_data = "world"
with open(my_file, "wb") as my_file_fp:
my_file_fp.write(orig_data.encode("ascii"))
try:
lfd = LockedFD(my_file)
lockfilepath = lfd._lockfilepath()
# cannot end before it was started
self.assertRaises(AssertionError, lfd.rollback)
self.assertRaises(AssertionError, lfd.commit)
# open for writing
assert not os.path.isfile(lockfilepath)
wfd = lfd.open(write=True)
assert lfd._fd is wfd
assert os.path.isfile(lockfilepath)
# write data and fail
os.write(wfd, new_data.encode("ascii"))
lfd.rollback()
assert lfd._fd is None
self._cmp_contents(my_file, orig_data)
assert not os.path.isfile(lockfilepath)
# additional call doesn't fail
lfd.commit()
lfd.rollback()
# test reading
lfd = LockedFD(my_file)
rfd = lfd.open(write=False)
assert os.read(rfd, len(orig_data)) == orig_data.encode("ascii")
assert os.path.isfile(lockfilepath)
# deletion rolls back
del(lfd)
assert not os.path.isfile(lockfilepath)
# write data - concurrently
lfd = LockedFD(my_file)
olfd = LockedFD(my_file)
assert not os.path.isfile(lockfilepath)
wfdstream = lfd.open(write=True, stream=True) # this time as stream
assert os.path.isfile(lockfilepath)
# another one fails
self.assertRaises(IOError, olfd.open)
wfdstream.write(new_data.encode("ascii"))
lfd.commit()
assert not os.path.isfile(lockfilepath)
self._cmp_contents(my_file, new_data)
# could test automatic _end_writing on destruction
finally:
os.remove(my_file)
# END final cleanup
# try non-existing file for reading
lfd = LockedFD(tempfile.mktemp())
try:
lfd.open(write=False)
except OSError:
assert not os.path.exists(lfd._lockfilepath())
else:
self.fail("expected OSError")
# END handle exceptions