Skip to content

Commit 0c7a3ec

Browse files
committed
Fixed _perf module, which built, but didn't link dynamically. All the time, I think it never successfully imported, but its hard to believe this slipped by.
Added performance test for pack-writing, which isn't really showing what I want as it currently read data from a densly compressed pack which takes most of the time in the nearly pure python implementation. Compared to c++, all the measured performance is just below anything I'd want to use. But we shouldn't forget this is just a test implementation, writing packs is quite simple actually, if you leave out the delta compression part and the delta logic
1 parent 184a776 commit 0c7a3ec

File tree

3 files changed

+55
-13
lines changed

3 files changed

+55
-13
lines changed

gitdb/_delta_apply.c

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#include "_delta_apply.h"
1+
#include <_delta_apply.h>
22
#include <stdint.h>
33
#include <assert.h>
44
#include <stdio.h>
@@ -463,7 +463,7 @@ void DIV_reset(DeltaInfoVector* vec)
463463

464464
// Append one chunk to the end of the list, and return a pointer to it
465465
// It will not have been initialized !
466-
static inline
466+
inline
467467
DeltaInfo* DIV_append(DeltaInfoVector* vec)
468468
{
469469
if (vec->size + 1 > vec->reserved_size){
@@ -703,7 +703,7 @@ typedef struct {
703703
} DeltaChunkList;
704704

705705

706-
static
706+
707707
int DCL_init(DeltaChunkList*self, PyObject *args, PyObject *kwds)
708708
{
709709
if(args && PySequence_Size(args) > 0){
@@ -715,20 +715,20 @@ int DCL_init(DeltaChunkList*self, PyObject *args, PyObject *kwds)
715715
return 0;
716716
}
717717

718-
static
718+
719719
void DCL_dealloc(DeltaChunkList* self)
720720
{
721721
TSI_destroy(&(self->istream));
722722
}
723723

724-
static
724+
725725
PyObject* DCL_py_rbound(DeltaChunkList* self)
726726
{
727727
return PyLong_FromUnsignedLongLong(self->istream.target_size);
728728
}
729729

730730
// Write using a write function, taking remaining bytes from a base buffer
731-
static
731+
732732
PyObject* DCL_apply(DeltaChunkList* self, PyObject* args)
733733
{
734734
PyObject* pybuf = 0;
@@ -769,13 +769,13 @@ PyObject* DCL_apply(DeltaChunkList* self, PyObject* args)
769769
Py_RETURN_NONE;
770770
}
771771

772-
static PyMethodDef DCL_methods[] = {
772+
PyMethodDef DCL_methods[] = {
773773
{"apply", (PyCFunction)DCL_apply, METH_VARARGS, "Apply the given iterable of delta streams" },
774774
{"rbound", (PyCFunction)DCL_py_rbound, METH_NOARGS, NULL},
775775
{NULL} /* Sentinel */
776776
};
777777

778-
static PyTypeObject DeltaChunkListType = {
778+
PyTypeObject DeltaChunkListType = {
779779
PyObject_HEAD_INIT(NULL)
780780
0, /*ob_size*/
781781
"DeltaChunkList", /*tp_name*/
@@ -897,7 +897,7 @@ uint compute_chunk_count(const uchar* data, const uchar* dend, bool read_header)
897897
return num_chunks;
898898
}
899899

900-
static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
900+
PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
901901
{
902902
// obtain iterator
903903
PyObject* stream_iter = 0;
@@ -1088,7 +1088,6 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
10881088

10891089
// Write using a write function, taking remaining bytes from a base buffer
10901090
// replaces the corresponding method in python
1091-
static
10921091
PyObject* apply_delta(PyObject* self, PyObject* args)
10931092
{
10941093
PyObject* pybbuf = 0;

gitdb/_delta_apply.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include <Python.h>
22

3-
static PyObject* connect_deltas(PyObject *self, PyObject *dstreams);
4-
static PyObject* apply_delta(PyObject* self, PyObject* args);
3+
extern PyObject* connect_deltas(PyObject *self, PyObject *dstreams);
4+
extern PyObject* apply_delta(PyObject* self, PyObject* args);
55

6-
static PyTypeObject DeltaChunkListType;
6+
extern PyTypeObject DeltaChunkListType;

gitdb/test/performance/test_pack_streaming.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,57 @@
88
)
99

1010
from gitdb.db.pack import PackedDB
11+
from gitdb.stream import NullStream
12+
from gitdb.pack import PackEntity
1113

1214
import os
1315
import sys
1416
from time import time
17+
from nose import SkipTest
18+
19+
class CountedNullStream(NullStream):
20+
__slots__ = '_bw'
21+
def __init__(self):
22+
self._bw = 0
23+
24+
def bytes_written(self):
25+
return self._bw
26+
27+
def write(self, d):
28+
self._bw += NullStream.write(self, d)
29+
1530

1631
class TestPackStreamingPerformance(TestBigRepoR):
1732

33+
def test_pack_writing(self):
34+
# see how fast we can write a pack from object streams.
35+
# This will not be fast, as we take time for decompressing the streams as well
36+
ostream = CountedNullStream()
37+
pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
38+
39+
ni = 5000
40+
count = 0
41+
total_size = 0
42+
st = time()
43+
objs = list()
44+
for sha in pdb.sha_iter():
45+
count += 1
46+
objs.append(pdb.stream(sha))
47+
if count == ni:
48+
break
49+
#END gather objects for pack-writing
50+
elapsed = time() - st
51+
print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed)
52+
53+
st = time()
54+
PackEntity.write_pack(objs, ostream.write)
55+
elapsed = time() - st
56+
total_kb = ostream.bytes_written() / 1000
57+
print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed)
58+
59+
1860
def test_stream_reading(self):
61+
raise SkipTest()
1962
pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
2063

2164
# streaming only, meant for --with-profile runs

0 commit comments

Comments
 (0)