Skip to content

Commit 4aff646

Browse files
rscharfegitster
authored andcommitted
archive-zip: mark text files in archives
Set the text flag for ZIP archive entries that look like text files so that unzip -a can be used to perform end-of-line conversions. Info-ZIP zip does the same. Detect binary files the same way as git diff and git grep do, namely by checking for the attribute "diff" and its negation "-diff", and if none is found by falling back to checking for the presence of NUL bytes in the first few bytes of the file contents. 7-Zip, Windows' built-in ZIP functionality and Info-ZIP unzip without the switch -a are not affected by the change and still extract text files without doing any end-of-line conversions. NB: The actual end-of-line style used in the archive entries doesn't matter to unzip -a, as it converts any CR, CRLF and LF to the line end characters appropriate for the platform it is running on. Suggested-by: Ulrike Fischer <[email protected]> Signed-off-by: Rene Scharfe <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 282616c commit 4aff646

File tree

2 files changed

+70
-2
lines changed

2 files changed

+70
-2
lines changed

archive-zip.c

+24-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
#include "archive.h"
66
#include "streaming.h"
77
#include "utf8.h"
8+
#include "userdiff.h"
9+
#include "xdiff-interface.h"
810

911
static int zip_date;
1012
static int zip_time;
@@ -189,6 +191,16 @@ static int has_only_ascii(const char *s)
189191
}
190192
}
191193

194+
static int entry_is_binary(const char *path, const void *buffer, size_t size)
195+
{
196+
struct userdiff_driver *driver = userdiff_find_by_path(path);
197+
if (!driver)
198+
driver = userdiff_find_by_name("default");
199+
if (driver->binary != -1)
200+
return driver->binary;
201+
return buffer_is_binary(buffer, size);
202+
}
203+
192204
#define STREAM_BUFFER_SIZE (1024 * 16)
193205

194206
static int write_zip_entry(struct archiver_args *args,
@@ -210,6 +222,8 @@ static int write_zip_entry(struct archiver_args *args,
210222
struct git_istream *stream = NULL;
211223
unsigned long flags = 0;
212224
unsigned long size;
225+
int is_binary = -1;
226+
const char *path_without_prefix = path + args->baselen;
213227

214228
crc = crc32(0, NULL, 0);
215229

@@ -256,6 +270,8 @@ static int write_zip_entry(struct archiver_args *args,
256270
return error("cannot read %s",
257271
sha1_to_hex(sha1));
258272
crc = crc32(crc, buffer, size);
273+
is_binary = entry_is_binary(path_without_prefix,
274+
buffer, size);
259275
out = buffer;
260276
}
261277
compressed_size = (method == 0) ? size : 0;
@@ -300,7 +316,6 @@ static int write_zip_entry(struct archiver_args *args,
300316
copy_le16(dirent.extra_length, ZIP_EXTRA_MTIME_SIZE);
301317
copy_le16(dirent.comment_length, 0);
302318
copy_le16(dirent.disk, 0);
303-
copy_le16(dirent.attr1, 0);
304319
copy_le32(dirent.attr2, attr2);
305320
copy_le32(dirent.offset, zip_offset);
306321

@@ -328,6 +343,9 @@ static int write_zip_entry(struct archiver_args *args,
328343
if (readlen <= 0)
329344
break;
330345
crc = crc32(crc, buf, readlen);
346+
if (is_binary == -1)
347+
is_binary = entry_is_binary(path_without_prefix,
348+
buf, readlen);
331349
write_or_die(1, buf, readlen);
332350
}
333351
close_istream(stream);
@@ -361,6 +379,9 @@ static int write_zip_entry(struct archiver_args *args,
361379
if (readlen <= 0)
362380
break;
363381
crc = crc32(crc, buf, readlen);
382+
if (is_binary == -1)
383+
is_binary = entry_is_binary(path_without_prefix,
384+
buf, readlen);
364385

365386
zstream.next_in = buf;
366387
zstream.avail_in = readlen;
@@ -405,6 +426,8 @@ static int write_zip_entry(struct archiver_args *args,
405426
free(deflated);
406427
free(buffer);
407428

429+
copy_le16(dirent.attr1, !is_binary);
430+
408431
memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE);
409432
zip_dir_offset += ZIP_DIR_HEADER_SIZE;
410433
memcpy(zip_dir + zip_dir_offset, path, pathlen);

t/t5003-archive-zip.sh

+46-1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,37 @@ check_zip() {
3333
test_expect_success UNZIP " validate file contents" "
3434
diff -r a ${dir_with_prefix}a
3535
"
36+
37+
dir=eol_$1
38+
dir_with_prefix=$dir/$2
39+
extracted=${dir_with_prefix}a
40+
original=a
41+
42+
test_expect_success UNZIP " extract ZIP archive with EOL conversion" '
43+
(mkdir $dir && cd $dir && "$GIT_UNZIP" -a ../$zipfile)
44+
'
45+
46+
test_expect_success UNZIP " validate that text files are converted" "
47+
test_cmp_bin $extracted/text.cr $extracted/text.crlf &&
48+
test_cmp_bin $extracted/text.cr $extracted/text.lf
49+
"
50+
51+
test_expect_success UNZIP " validate that binary files are unchanged" "
52+
test_cmp_bin $original/binary.cr $extracted/binary.cr &&
53+
test_cmp_bin $original/binary.crlf $extracted/binary.crlf &&
54+
test_cmp_bin $original/binary.lf $extracted/binary.lf
55+
"
56+
57+
test_expect_success UNZIP " validate that diff files are converted" "
58+
test_cmp_bin $extracted/diff.cr $extracted/diff.crlf &&
59+
test_cmp_bin $extracted/diff.cr $extracted/diff.lf
60+
"
61+
62+
test_expect_success UNZIP " validate that -diff files are unchanged" "
63+
test_cmp_bin $original/nodiff.cr $extracted/nodiff.cr &&
64+
test_cmp_bin $original/nodiff.crlf $extracted/nodiff.crlf &&
65+
test_cmp_bin $original/nodiff.lf $extracted/nodiff.lf
66+
"
3667
}
3768

3869
test_expect_success \
@@ -41,6 +72,18 @@ test_expect_success \
4172
echo simple textfile >a/a &&
4273
mkdir a/bin &&
4374
cp /bin/sh a/bin &&
75+
printf "text\r" >a/text.cr &&
76+
printf "text\r\n" >a/text.crlf &&
77+
printf "text\n" >a/text.lf &&
78+
printf "text\r" >a/nodiff.cr &&
79+
printf "text\r\n" >a/nodiff.crlf &&
80+
printf "text\n" >a/nodiff.lf &&
81+
printf "\0\r" >a/binary.cr &&
82+
printf "\0\r\n" >a/binary.crlf &&
83+
printf "\0\n" >a/binary.lf &&
84+
printf "\0\r" >a/diff.cr &&
85+
printf "\0\r\n" >a/diff.crlf &&
86+
printf "\0\n" >a/diff.lf &&
4487
printf "A\$Format:%s\$O" "$SUBSTFORMAT" >a/substfile1 &&
4588
printf "A not substituted O" >a/substfile2 &&
4689
(p=long_path_to_a_file && cd a &&
@@ -70,7 +113,9 @@ test_expect_success \
70113
git update-ref HEAD $(TZ=GMT GIT_COMMITTER_DATE="2005-05-27 22:00:00" \
71114
git commit-tree $treeid </dev/null)'
72115

73-
test_expect_success 'setup export-subst' '
116+
test_expect_success 'setup export-subst and diff attributes' '
117+
echo "a/nodiff.* -diff" >>.git/info/attributes &&
118+
echo "a/diff.* diff" >>.git/info/attributes &&
74119
echo "substfile?" export-subst >>.git/info/attributes &&
75120
git log --max-count=1 "--pretty=format:A${SUBSTFORMAT}O" HEAD \
76121
>a/substfile1

0 commit comments

Comments
 (0)