Skip to content

Commit

Permalink
Change the way methods that take a path argument behave.
Browse files Browse the repository at this point in the history
For a number of user facing methods, make it possible to pass in paths in as
either a unicode string, or bytes. These include:

* Repo.__init__
* Repo.init
* Repo.init_bare
* index.build_index_from_tree
* index.get_unstaged_changes

The repo.Repo.path attribute will remain unchanged. The Repo._controldir
attribute and a new Repo._path_bytes attribute are ensured to be bytes.

For a number of internal methods, it now requires a bytes path rather than
a unicode string. These include:

* objects.hex_to_filename
* objects.filename_to_hex
* _GitFile.__init__
* ShellHook.__init__ (and subclassed of ShellHook)
* DiskObjectStore.__init__
* DiskRefsContainer.__init__
* Repo._put_named_file
* Repo.get_named_file
* pack.write_pack
* Pack.__init__
* etc...
  • Loading branch information
garyvdm committed Apr 23, 2015
1 parent fd2f4f4 commit 7a02943
Show file tree
Hide file tree
Showing 22 changed files with 266 additions and 188 deletions.
2 changes: 1 addition & 1 deletion dulwich/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class _GitFile(object):
'truncate', 'write', 'writelines')
def __init__(self, filename, mode, bufsize):
self._filename = filename
self._lockfilename = '%s.lock' % self._filename
self._lockfilename = self._filename + b'.lock'
fd = os.open(self._lockfilename,
os.O_RDWR | os.O_CREAT | os.O_EXCL | getattr(os, "O_BINARY", 0))
self._file = os.fdopen(fd, mode, bufsize)
Expand Down
6 changes: 3 additions & 3 deletions dulwich/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class PreCommitShellHook(ShellHook):
"""pre-commit shell hook"""

def __init__(self, controldir):
filepath = os.path.join(controldir, 'hooks', 'pre-commit')
filepath = os.path.join(controldir, b'hooks', b'pre-commit')

ShellHook.__init__(self, 'pre-commit', filepath, 0)

Expand All @@ -109,7 +109,7 @@ class PostCommitShellHook(ShellHook):
"""post-commit shell hook"""

def __init__(self, controldir):
filepath = os.path.join(controldir, 'hooks', 'post-commit')
filepath = os.path.join(controldir, b'hooks', b'post-commit')

ShellHook.__init__(self, 'post-commit', filepath, 0)

Expand All @@ -122,7 +122,7 @@ class CommitMsgShellHook(ShellHook):
"""

def __init__(self, controldir):
filepath = os.path.join(controldir, 'hooks', 'commit-msg')
filepath = os.path.join(controldir, b'hooks', b'commit-msg')

def prepare_msg(*args):
(fd, path) = tempfile.mkstemp()
Expand Down
16 changes: 13 additions & 3 deletions dulwich/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,12 +476,15 @@ def build_index_from_tree(prefix, index_path, object_store, tree_id,
in a working dir. Suiteable only for fresh clones.
"""

if not isinstance(prefix, bytes):
prefix = prefix.encode(sys.getfilesystemencoding())

index = Index(index_path)

for entry in object_store.iter_tree_contents(tree_id):
if not validate_path(entry.path):
continue
full_path = os.path.join(prefix, entry.path.decode(sys.getfilesystemencoding()))
full_path = os.path.join(prefix, entry.path)

if not os.path.exists(os.path.dirname(full_path)):
os.makedirs(os.path.dirname(full_path))
Expand Down Expand Up @@ -509,7 +512,11 @@ def blob_from_path_and_stat(path, st):
with open(path, 'rb') as f:
blob.data = f.read()
else:
blob.data = os.readlink(path).encode(sys.getfilesystemencoding())
if not isinstance(path, bytes):
blob.data = os.readlink(path.encode(sys.getfilesystemencoding()))
else:
blob.data = os.readlink(path)

return blob


Expand All @@ -521,8 +528,11 @@ def get_unstaged_changes(index, path):
:return: iterator over paths with unstaged changes
"""
# For each entry in the index check the sha1 & ensure not staged
if not isinstance(path, bytes):
path = path.encode(sys.getfilesystemencoding())

for name, entry in index.iteritems():
fp = os.path.join(path, name.decode(sys.getfilesystemencoding()))
fp = os.path.join(path, name)
blob = blob_from_path_and_stat(fp, os.lstat(fp))
if blob.id != entry.sha:
yield name
45 changes: 24 additions & 21 deletions dulwich/object_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@
PackStreamCopier,
)

INFODIR = 'info'
PACKDIR = 'pack'
INFODIR = b'info'
PACKDIR = b'pack'


class BaseObjectStore(object):
Expand Down Expand Up @@ -425,7 +425,7 @@ def alternates(self):

def _read_alternate_paths(self):
try:
f = GitFile(os.path.join(self.path, "info", "alternates"),
f = GitFile(os.path.join(self.path, INFODIR, b'alternates'),
'rb')
except (OSError, IOError) as e:
if e.errno == errno.ENOENT:
Expand All @@ -437,19 +437,19 @@ def _read_alternate_paths(self):
if l[0] == b"#":
continue
if os.path.isabs(l):
yield l.decode(sys.getfilesystemencoding())
yield l
else:
yield os.path.join(self.path, l).decode(sys.getfilesystemencoding())
yield os.path.join(self.path, l)

def add_alternate_path(self, path):
"""Add an alternate path to this object store.
"""
try:
os.mkdir(os.path.join(self.path, "info"))
os.mkdir(os.path.join(self.path, INFODIR))
except OSError as e:
if e.errno != errno.EEXIST:
raise
alternates_path = os.path.join(self.path, "info/alternates")
alternates_path = os.path.join(self.path, INFODIR, b'alternates')
with GitFile(alternates_path, 'wb') as f:
try:
orig_f = open(alternates_path, 'rb')
Expand All @@ -459,7 +459,7 @@ def add_alternate_path(self, path):
else:
with orig_f:
f.write(orig_f.read())
f.write(path.encode(sys.getfilesystemencoding()) + b"\n")
f.write(path + b"\n")

if not os.path.isabs(path):
path = os.path.join(self.path, path)
Expand All @@ -477,10 +477,10 @@ def _update_pack_cache(self):
self._pack_cache_time = os.stat(self.pack_dir).st_mtime
pack_files = set()
for name in pack_dir_contents:
assert type(name) is str
assert type(name) is bytes
# TODO: verify that idx exists first
if name.startswith("pack-") and name.endswith(".pack"):
pack_files.add(name[:-len(".pack")])
if name.startswith(b'pack-') and name.endswith(b'.pack'):
pack_files.add(name[:-len(b'.pack')])

# Open newly appeared pack files
for f in pack_files:
Expand All @@ -507,7 +507,7 @@ def _iter_loose_objects(self):
if len(base) != 2:
continue
for rest in os.listdir(os.path.join(self.path, base)):
yield (base+rest).encode(sys.getfilesystemencoding())
yield (base+rest)

def _get_loose_object(self, sha):
path = self._get_shafile_path(sha)
Expand All @@ -524,8 +524,7 @@ def _remove_loose_object(self, sha):
def _get_pack_basepath(self, entries):
suffix = iter_sha1(entry[0] for entry in entries)
# TODO: Handle self.pack_dir being bytes
suffix = suffix.decode('ascii')
return os.path.join(self.pack_dir, "pack-" + suffix)
return os.path.join(self.pack_dir, b"pack-" + suffix)

def _complete_thin_pack(self, f, path, copier, indexer):
"""Move a specific file containing a pack into the pack directory.
Expand Down Expand Up @@ -567,10 +566,10 @@ def _complete_thin_pack(self, f, path, copier, indexer):
# Move the pack in.
entries.sort()
pack_base_name = self._get_pack_basepath(entries)
os.rename(path, pack_base_name + '.pack')
os.rename(path, pack_base_name + b'.pack')

# Write the index.
index_file = GitFile(pack_base_name + '.idx', 'wb')
index_file = GitFile(pack_base_name + b'.idx', 'wb')
try:
write_pack_index_v2(index_file, entries, pack_sha)
index_file.close()
Expand All @@ -597,7 +596,10 @@ def add_thin_pack(self, read_all, read_some):
:return: A Pack object pointing at the now-completed thin pack in the
objects/pack directory.
"""
fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
fd, path = tempfile.mkstemp(
dir=self.path.decode(sys.getfilesystemencoding()),
prefix='tmp_pack_')
path = path.encode(sys.getfilesystemencoding())
with os.fdopen(fd, 'w+b') as f:
indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
copier = PackStreamCopier(read_all, read_some, f,
Expand All @@ -616,9 +618,9 @@ def move_in_pack(self, path):
with PackData(path) as p:
entries = p.sorted_entries()
basename = self._get_pack_basepath(entries)
with GitFile(basename+".idx", "wb") as f:
with GitFile(basename+b'.idx', "wb") as f:
write_pack_index_v2(f, entries, p.get_stored_checksum())
os.rename(path, basename + ".pack")
os.rename(path, basename + b'.pack')
final_pack = Pack(basename)
self._add_known_pack(basename, final_pack)
return final_pack
Expand All @@ -630,7 +632,8 @@ def add_pack(self):
call when the pack is finished and an abort
function.
"""
fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
pack_dir_str = self.pack_dir.decode(sys.getfilesystemencoding())
fd, path = tempfile.mkstemp(dir=pack_dir_str, suffix='.pack')
f = os.fdopen(fd, 'wb')
def commit():
os.fsync(fd)
Expand Down Expand Up @@ -669,7 +672,7 @@ def init(cls, path):
except OSError as e:
if e.errno != errno.EEXIST:
raise
os.mkdir(os.path.join(path, "info"))
os.mkdir(os.path.join(path, INFODIR))
os.mkdir(os.path.join(path, PACKDIR))
return cls(path)

Expand Down
6 changes: 2 additions & 4 deletions dulwich/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,10 @@ def hex_to_filename(path, hex):
# os.path.join accepts bytes or unicode, but all args must be of the same
# type. Make sure that hex which is expected to be bytes, is the same type
# as path.
if getattr(path, 'encode', None) is not None:
hex = hex.decode('ascii')
dir = hex[:2]
directory = hex[:2]
file = hex[2:]
# Check from object dir
return os.path.join(path, dir, file)
return os.path.join(path, directory, file)


def filename_to_hex(filename):
Expand Down
14 changes: 7 additions & 7 deletions dulwich/pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -1474,12 +1474,12 @@ def write_pack(filename, objects, deltify=None, delta_window_size=None):
:param deltify: Whether to deltify pack objects
:return: Tuple with checksum of pack file and index file
"""
with GitFile(filename + '.pack', 'wb') as f:
with GitFile(filename + b'.pack', 'wb') as f:
entries, data_sum = write_pack_objects(f, objects,
delta_window_size=delta_window_size, deltify=deltify)
entries = [(k, v[0], v[1]) for (k, v) in entries.items()]
entries.sort()
with GitFile(filename + '.idx', 'wb') as f:
with GitFile(filename + b'.idx', 'wb') as f:
return data_sum, write_pack_index_v2(f, entries, data_sum)


Expand Down Expand Up @@ -1785,8 +1785,8 @@ def __init__(self, basename, resolve_ext_ref=None):
self._basename = basename
self._data = None
self._idx = None
self._idx_path = self._basename + '.idx'
self._data_path = self._basename + '.pack'
self._idx_path = self._basename + b'.idx'
self._data_path = self._basename + b'.pack'
self._data_load = lambda: PackData(self._data_path)
self._idx_load = lambda: load_pack_index(self._idx_path)
self.resolve_ext_ref = resolve_ext_ref
Expand All @@ -1795,15 +1795,15 @@ def __init__(self, basename, resolve_ext_ref=None):
def from_lazy_objects(self, data_fn, idx_fn):
"""Create a new pack object from callables to load pack data and
index objects."""
ret = Pack('')
ret = Pack(b'')
ret._data_load = data_fn
ret._idx_load = idx_fn
return ret

@classmethod
def from_objects(self, data, idx):
"""Create a new pack object from pack data and index objects."""
ret = Pack('')
ret = Pack(b'')
ret._data_load = lambda: data
ret._idx_load = lambda: idx
return ret
Expand Down Expand Up @@ -1930,7 +1930,7 @@ def keep(self, msg=None):
determine whether or not a .keep file is obsolete.
:return: The path of the .keep file, as a string.
"""
keepfile_name = '%s.keep' % self._basename
keepfile_name = self._basename + b'.keep'
with GitFile(keepfile_name, 'wb') as keepfile:
if msg:
keepfile.write(msg)
Expand Down
8 changes: 6 additions & 2 deletions dulwich/porcelain.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,12 +515,16 @@ def update_refs(refs):
return refs

err_encoding = getattr(errstream, 'encoding', 'utf-8')
if not isinstance(remote_location, bytes):
remote_location_bytes = remote_location.encode(err_encoding)
else:
remote_location_bytes = remote_location
try:
client.send_pack(path, update_refs,
r.object_store.generate_pack_contents, progress=errstream.write)
errstream.write(b"Push to " + remote_location.encode(err_encoding) + b" successful.\n")
errstream.write(b"Push to " + remote_location_bytes + b" successful.\n")
except (UpdateRefsError, SendPackError) as e:
errstream.write(b"Push to " + remote_location.encode(err_encoding) + b" failed -> " + e.message.encode(err_encoding) + b"\n")
errstream.write(b"Push to " + remote_location_bytes + b" failed -> " + e.message.encode(err_encoding) + b"\n")


def pull(repo, remote_location, refs_path,
Expand Down
21 changes: 11 additions & 10 deletions dulwich/refs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"""
import errno
import os
import sys

from dulwich.errors import (
PackedRefsException,
Expand All @@ -43,6 +44,8 @@
LOCAL_BRANCH_PREFIX = b'refs/heads/'
BAD_REF_CHARS = set(b'\177 ~^:?*[')

path_sep_bytes = os.path.sep.encode(sys.getfilesystemencoding())


def check_ref_format(refname):
"""Check if a refname is correctly formatted.
Expand Down Expand Up @@ -395,10 +398,9 @@ def subkeys(self, base):
subkeys = set()
path = self.refpath(base)
for root, dirs, files in os.walk(path):
dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
dir = root[len(path):].strip(path_sep_bytes).replace(path_sep_bytes, b'/')
for filename in files:
refname = (("%s/%s" % (dir, filename))
.strip("/").encode('ascii'))
refname = (dir + b'/' + filename).strip(b'/')
# check_ref_format requires at least one /, so we prepend the
# base before calling it.
if check_ref_format(base + b'/' + refname):
Expand All @@ -414,9 +416,9 @@ def allkeys(self):
allkeys.add(b'HEAD')
path = self.refpath(b'')
for root, dirs, files in os.walk(self.refpath(b'refs')):
dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
dir = root[len(path):].strip(path_sep_bytes).replace(path_sep_bytes, b'/')
for filename in files:
refname = ("%s/%s" % (dir, filename)).strip("/").encode('ascii')
refname = (dir + b'/' + filename).strip(b'/')
if check_ref_format(refname):
allkeys.add(refname)
allkeys.update(self.get_packed_refs())
Expand All @@ -426,9 +428,8 @@ def refpath(self, name):
"""Return the disk path of a ref.
"""
name = name.decode('ascii')
if os.path.sep != "/":
name = name.replace("/", os.path.sep)
if path_sep_bytes != b'/':
name = name.replace(b'/', path_sep_bytes)
return os.path.join(self.path, name)

def get_packed_refs(self):
Expand All @@ -445,7 +446,7 @@ def get_packed_refs(self):
# None if and only if _packed_refs is also None.
self._packed_refs = {}
self._peeled_refs = {}
path = os.path.join(self.path, 'packed-refs')
path = os.path.join(self.path, b'packed-refs')
try:
f = GitFile(path, 'rb')
except IOError as e:
Expand Down Expand Up @@ -513,7 +514,7 @@ def read_loose_ref(self, name):
def _remove_packed_ref(self, name):
if self._packed_refs is None:
return
filename = os.path.join(self.path, 'packed-refs')
filename = os.path.join(self.path, b'packed-refs')
# reread cached refs from disk, while holding the lock
f = GitFile(filename, 'wb')
try:
Expand Down
Loading

0 comments on commit 7a02943

Please sign in to comment.