diff --git a/src/pack-objects.c b/src/pack-objects.c index 8475f64f2..8b65ac26a 100644 --- a/src/pack-objects.c +++ b/src/pack-objects.c @@ -291,7 +291,6 @@ static int write_object( void *delta_data = NULL; void *data; size_t hdr_len, zbuf_len = COMPRESS_BUFLEN, data_len; - ssize_t written; int error; if (po->delta) { @@ -337,19 +336,15 @@ static int write_object( GITERR_CHECK_ALLOC(zbuf); git_zstream_reset(&pb->zstream); + git_zstream_set_input(&pb->zstream, data, data_len); - while ((written = git_zstream_deflate(zbuf, zbuf_len, &pb->zstream, data, data_len)) > 0) { - if ((error = write_cb(zbuf, written, cb_data)) < 0 || - (error = git_hash_update(&pb->ctx, zbuf, written)) < 0) + while (!git_zstream_done(&pb->zstream)) { + if ((error = git_zstream_get_output(zbuf, &zbuf_len, &pb->zstream)) < 0 || + (error = write_cb(zbuf, zbuf_len, cb_data)) < 0 || + (error = git_hash_update(&pb->ctx, zbuf, zbuf_len)) < 0) goto done; - data = (char *)data + written; - data_len -= written; - } - - if (written < 0) { - error = (int)written; - goto done; + zbuf_len = COMPRESS_BUFLEN; /* reuse buffer */ } if (po->delta) diff --git a/src/zstream.c b/src/zstream.c index 0bca72ff3..c83602297 100644 --- a/src/zstream.c +++ b/src/zstream.c @@ -10,14 +10,18 @@ #include "zstream.h" #include "buffer.h" -#define BUFFER_SIZE (1024 * 1024) +#define ZSTREAM_BUFFER_SIZE (1024 * 1024) +#define ZSTREAM_BUFFER_MIN_EXTRA 8 -static int zstream_seterr(int zerr, git_zstream *zstream) +static int zstream_seterr(git_zstream *zs) { - if (zerr == Z_MEM_ERROR) + if (zs->zerr == Z_OK || zs->zerr == Z_STREAM_END) + return 0; + + if (zs->zerr == Z_MEM_ERROR) giterr_set_oom(); - else if (zstream->msg) - giterr_set(GITERR_ZLIB, zstream->msg); + else if (zs->z.msg) + giterr_set(GITERR_ZLIB, zs->z.msg); else giterr_set(GITERR_ZLIB, "Unknown compression error"); @@ -26,69 +30,123 @@ static int zstream_seterr(int zerr, git_zstream *zstream) int git_zstream_init(git_zstream *zstream) { - int zerr; - - if ((zerr = deflateInit(zstream, Z_DEFAULT_COMPRESSION)) != Z_OK) - return zstream_seterr(zerr, zstream); - - return 0; -} - -ssize_t git_zstream_deflate(void *out, size_t out_len, git_zstream *zstream, const void *in, size_t in_len) -{ - int zerr; - - if ((ssize_t)out_len < 0) - out_len = INT_MAX; - - zstream->next_in = (Bytef *)in; - zstream->avail_in = in_len; - zstream->next_out = out; - zstream->avail_out = out_len; - - if ((zerr = deflate(zstream, Z_FINISH)) == Z_STREAM_ERROR) - return zstream_seterr(zerr, zstream); - - return (out_len - zstream->avail_out); -} - -void git_zstream_reset(git_zstream *zstream) -{ - deflateReset(zstream); + zstream->zerr = deflateInit(&zstream->z, Z_DEFAULT_COMPRESSION); + return zstream_seterr(zstream); } void git_zstream_free(git_zstream *zstream) { - deflateEnd(zstream); + deflateEnd(&zstream->z); +} + +void git_zstream_reset(git_zstream *zstream) +{ + deflateReset(&zstream->z); + zstream->in = NULL; + zstream->in_len = 0; + zstream->zerr = Z_STREAM_END; +} + +int git_zstream_set_input(git_zstream *zstream, const void *in, size_t in_len) +{ + zstream->in = in; + zstream->in_len = in_len; + zstream->zerr = Z_OK; + return 0; +} + +bool git_zstream_done(git_zstream *zstream) +{ + return (!zstream->in_len && zstream->zerr == Z_STREAM_END); +} + +size_t git_zstream_suggest_output_len(git_zstream *zstream) +{ + if (zstream->in_len > ZSTREAM_BUFFER_SIZE) + return ZSTREAM_BUFFER_SIZE; + else if (zstream->in_len > ZSTREAM_BUFFER_MIN_EXTRA) + return zstream->in_len; + else + return ZSTREAM_BUFFER_MIN_EXTRA; +} + +int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream) +{ + int zflush = Z_FINISH; + size_t out_remain = *out_len; + + while (out_remain > 0 && zstream->zerr != Z_STREAM_END) { + size_t out_queued, in_queued, out_used, in_used; + + /* set up in data */ + zstream->z.next_in = (Bytef *)zstream->in; + zstream->z.avail_in = (uInt)zstream->in_len; + if ((size_t)zstream->z.avail_in != zstream->in_len) { + zstream->z.avail_in = INT_MAX; + zflush = Z_NO_FLUSH; + } else { + zflush = Z_FINISH; + } + in_queued = (size_t)zstream->z.avail_in; + + /* set up out data */ + zstream->z.next_out = out; + zstream->z.avail_out = (uInt)out_remain; + if ((size_t)zstream->z.avail_out != out_remain) + zstream->z.avail_out = INT_MAX; + out_queued = (size_t)zstream->z.avail_out; + + /* compress next chunk */ + zstream->zerr = deflate(&zstream->z, zflush); + if (zstream->zerr == Z_STREAM_ERROR) + return zstream_seterr(zstream); + + out_used = (out_queued - zstream->z.avail_out); + out_remain -= out_used; + out = ((char *)out) + out_used; + + in_used = (in_queued - zstream->z.avail_in); + zstream->in_len -= in_used; + zstream->in += in_used; + } + + /* either we finished the input or we did not flush the data */ + assert(zstream->in_len > 0 || zflush == Z_FINISH); + + /* set out_size to number of bytes actually written to output */ + *out_len = *out_len - out_remain; + + return 0; } int git_zstream_deflatebuf(git_buf *out, const void *in, size_t in_len) { - git_zstream zstream = GIT_ZSTREAM_INIT; - size_t out_len; - ssize_t written; + git_zstream zs = GIT_ZSTREAM_INIT; int error = 0; - if ((error = git_zstream_init(&zstream)) < 0) + if ((error = git_zstream_init(&zs)) < 0) return error; - do { - if (out->asize - out->size < BUFFER_SIZE) - git_buf_grow(out, out->asize + BUFFER_SIZE); + if ((error = git_zstream_set_input(&zs, in, in_len)) < 0) + goto done; - out_len = out->asize - out->size; + while (!git_zstream_done(&zs)) { + size_t step = git_zstream_suggest_output_len(&zs), written; - if ((written = git_zstream_deflate(out->ptr + out->size, out_len, &zstream, in, in_len)) <= 0) - break; + if ((error = git_buf_grow(out, out->asize + step)) < 0) + goto done; + + written = out->asize - out->size; + + if ((error = git_zstream_get_output( + out->ptr + out->size, &written, &zs)) < 0) + goto done; - in = (char *)in + written; - in_len -= written; out->size += written; - } while (written > 0); + out->ptr[out->size] = '\0'; + } - if (written < 0) - error = written; - - git_zstream_free(&zstream); +done: + git_zstream_free(&zs); return error; } diff --git a/src/zstream.h b/src/zstream.h index 9672903c0..9b5bf6ace 100644 --- a/src/zstream.h +++ b/src/zstream.h @@ -12,15 +12,28 @@ #include "common.h" #include "buffer.h" -#define git_zstream z_stream +typedef struct { + z_stream z; + const char *in; + size_t in_len; + int zerr; +} git_zstream; -#define GIT_ZSTREAM_INIT {0} +#define GIT_ZSTREAM_INIT {{0}} int git_zstream_init(git_zstream *zstream); -ssize_t git_zstream_deflate(void *out, size_t out_len, git_zstream *zstream, const void *in, size_t in_len); -void git_zstream_reset(git_zstream *zstream); void git_zstream_free(git_zstream *zstream); +int git_zstream_set_input(git_zstream *zstream, const void *in, size_t in_len); + +size_t git_zstream_suggest_output_len(git_zstream *zstream); + +int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream); + +bool git_zstream_done(git_zstream *zstream); + +void git_zstream_reset(git_zstream *zstream); + int git_zstream_deflatebuf(git_buf *out, const void *in, size_t in_len); #endif /* INCLUDE_zstream_h__ */ diff --git a/tests/core/zstream.c b/tests/core/zstream.c new file mode 100644 index 000000000..63ff8c93a --- /dev/null +++ b/tests/core/zstream.c @@ -0,0 +1,98 @@ +#include "clar_libgit2.h" +#include "buffer.h" +#include "zstream.h" + +static const char *data = "This is a test test test of This is a test"; + +#define INFLATE_EXTRA 2 + +static void assert_zlib_equal_( + const void *expected, size_t e_len, + const void *compressed, size_t c_len, + const char *msg, const char *file, int line) +{ + z_stream stream; + char *expanded = git__calloc(1, e_len + INFLATE_EXTRA); + cl_assert(expanded); + + memset(&stream, 0, sizeof(stream)); + stream.next_out = (Bytef *)expanded; + stream.avail_out = (uInt)(e_len + INFLATE_EXTRA); + stream.next_in = (Bytef *)compressed; + stream.avail_in = (uInt)c_len; + + cl_assert(inflateInit(&stream) == Z_OK); + cl_assert(inflate(&stream, Z_FINISH)); + inflateEnd(&stream); + + clar__assert_equal( + file, line, msg, 1, + "%d", (int)stream.total_out, (int)e_len); + clar__assert_equal( + file, line, "Buffer len was not exact match", 1, + "%d", (int)stream.avail_out, (int)INFLATE_EXTRA); + + clar__assert( + memcmp(expanded, expected, e_len) == 0, + file, line, "uncompressed data did not match", NULL, 1); + + git__free(expanded); +} + +#define assert_zlib_equal(E,EL,C,CL) \ + assert_zlib_equal_(E, EL, C, CL, #EL " != " #CL, __FILE__, (int)__LINE__) + +void test_core_zstream__basic(void) +{ + git_zstream z = GIT_ZSTREAM_INIT; + char out[128]; + size_t outlen = sizeof(out); + + cl_git_pass(git_zstream_init(&z)); + cl_git_pass(git_zstream_set_input(&z, data, strlen(data) + 1)); + cl_git_pass(git_zstream_get_output(out, &outlen, &z)); + cl_assert(git_zstream_done(&z)); + cl_assert(outlen > 0); + git_zstream_free(&z); + + assert_zlib_equal(data, strlen(data) + 1, out, outlen); +} + +void test_core_zstream__buffer(void) +{ + git_buf out = GIT_BUF_INIT; + cl_git_pass(git_zstream_deflatebuf(&out, data, strlen(data) + 1)); + assert_zlib_equal(data, strlen(data) + 1, out.ptr, out.size); + git_buf_free(&out); +} + +#define BIG_STRING_PART "Big Data IS Big - Long Data IS Long - We need a buffer larger than 1024 x 1024 to make sure we trigger chunked compression - Big Big Data IS Bigger than Big - Long Long Data IS Longer than Long" + +void test_core_zstream__big_data(void) +{ + git_buf in = GIT_BUF_INIT; + git_buf out = GIT_BUF_INIT; + size_t scan; + + /* make a big string that's easy to compress */ + while (in.size < 1024 * 1024) + cl_git_pass(git_buf_put(&in, BIG_STRING_PART, strlen(BIG_STRING_PART))); + + cl_git_pass(git_zstream_deflatebuf(&out, in.ptr, in.size)); + assert_zlib_equal(in.ptr, in.size, out.ptr, out.size); + + git_buf_free(&out); + + /* make a big string that's hard to compress */ + + srand(0xabad1dea); + for (scan = 0; scan < in.size; ++scan) + in.ptr[scan] = (char)rand(); + + cl_git_pass(git_zstream_deflatebuf(&out, in.ptr, in.size)); + assert_zlib_equal(in.ptr, in.size, out.ptr, out.size); + + git_buf_free(&out); + + git_buf_free(&in); +}