From b542d293b4502ea03f0b847ca41d85379aadf387 Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Thu, 2 Sep 2021 13:49:28 +0300 Subject: [PATCH 1/5] Refactor Windows file reading/writing --- src/filehandling.c | 123 +++++++++++++++++++++++---------------------- 1 file changed, 62 insertions(+), 61 deletions(-) diff --git a/src/filehandling.c b/src/filehandling.c index 11d56af16..f8a8452a3 100644 --- a/src/filehandling.c +++ b/src/filehandling.c @@ -25,6 +25,10 @@ _Static_assert(sizeof (size_t) == sizeof (SizeT), "Check why sizeof(size_t) != s #define HCFILE_BUFFER_SIZE 256 * 1024 #endif +#ifndef HCFILE_CHUNK_SIZE +#define HCFILE_CHUNK_SIZE 4 * 1024 * 1024 +#endif + static bool xz_initialized = false; static const ISzAlloc xz_alloc = { hc_lzma_alloc, hc_lzma_free }; @@ -354,49 +358,48 @@ size_t hc_fread (void *ptr, size_t size, size_t nmemb, HCFILE *fp) { size_t n = -1; - if (fp == NULL) return n; + if (ptr == NULL || fp == NULL) return n; - if (ptr == NULL || size == 0 || nmemb == 0) return 0; + if (size == 0 || nmemb == 0) return 0; if (fp->pfp) { - #if defined (_WIN) + #ifdef _WIN + u64 len = (u64) size * nmemb; - // 4 GB fread () limit for windows systems ? - // see: https://social.msdn.microsoft.com/Forums/vstudio/en-US/7c913001-227e-439b-bf07-54369ba07994/fwrite-issues-with-large-data-write - - #define GIGABYTE (1024u * 1024u * 1024u) + #ifndef _WIN64 + /* check 2 GB limit with 32 bit build */ + if (len >= INT32_MAX) + { + return n; + } + #endif - if (((size * nmemb) / GIGABYTE) < 4) + if (len <= HCFILE_CHUNK_SIZE) { n = fread (ptr, size, nmemb, fp->pfp); } else { - if ((size / GIGABYTE) > 3) return -1; - - size_t elements_max = (3u * GIGABYTE) / size; - size_t elements_left = nmemb; - - size_t off = 0; + size_t left = (size_t) len; + size_t pos = 0; - n = 0; + /* assume success */ + n = nmemb; - while (elements_left > 0) + do { - size_t elements_cur = elements_max; - - if (elements_left < elements_max) elements_cur = elements_left; - - size_t ret = fread (ptr + off, size, elements_cur, fp->pfp); - - if (ret != elements_cur) return -1; - - n += ret; - off += ret * size; - - elements_left -= ret; - } + size_t chunk = (left > HCFILE_CHUNK_SIZE) ? HCFILE_CHUNK_SIZE : left; + size_t bytes = fread ((unsigned char *) ptr + pos, 1, chunk, fp->pfp); + pos += bytes; + left -= bytes; + if (chunk != bytes) + { + /* partial read */ + n = pos / size; + break; + } + } while (left); } #else n = fread (ptr, size, nmemb, fp->pfp); @@ -408,9 +411,11 @@ size_t hc_fread (void *ptr, size_t size, size_t nmemb, HCFILE *fp) } else if (fp->ufp) { - unsigned s = size * nmemb; - - n = unzReadCurrentFile (fp->ufp, ptr, s); + u64 len = (u64) size * nmemb; + if (len == (unsigned) len) + { + n = unzReadCurrentFile (fp->ufp, ptr, (unsigned) len); + } } else if (fp->xfp) { @@ -454,47 +459,43 @@ size_t hc_fwrite (const void *ptr, size_t size, size_t nmemb, HCFILE *fp) { size_t n = -1; - if (fp == NULL) return n; + if (ptr == NULL || fp == NULL) return n; + + if (size == 0 || nmemb == 0) return 0; if (fp->pfp) { - #if defined (_WIN) + #ifdef _WIN + u64 len = (u64) size * nmemb; - // 4 GB fwrite () limit for windows systems ? - // see: https://social.msdn.microsoft.com/Forums/vstudio/en-US/7c913001-227e-439b-bf07-54369ba07994/fwrite-issues-with-large-data-write - - #define GIGABYTE (1024u * 1024u * 1024u) + #ifndef _WIN64 + /* check 2 GB limit with 32 bit build */ + if (len >= INT32_MAX) + { + return n; + } + #endif - if (((size * nmemb) / GIGABYTE) < 4) + if (len <= HCFILE_CHUNK_SIZE) { n = fwrite (ptr, size, nmemb, fp->pfp); } else { - if ((size / GIGABYTE) > 3) return -1; - - size_t elements_max = (3u * GIGABYTE) / size; - size_t elements_left = nmemb; + size_t left = (size_t) len; + size_t pos = 0; - size_t off = 0; + /* assume success */ + n = nmemb; - n = 0; - - while (elements_left > 0) + do { - size_t elements_cur = elements_max; - - if (elements_left < elements_max) elements_cur = elements_left; - - size_t ret = fwrite (ptr + off, size, elements_cur, fp->pfp); - - if (ret != elements_cur) return -1; - - n += ret; - off += ret * size; - - elements_left -= ret; - } + size_t chunk = (left > HCFILE_CHUNK_SIZE) ? HCFILE_CHUNK_SIZE : left; + size_t bytes = fwrite ((unsigned char *) ptr + pos, 1, chunk, fp->pfp); + pos += bytes; + left -= bytes; + if (chunk != bytes) return -1; + } while (left); } #else n = fwrite (ptr, size, nmemb, fp->pfp); @@ -623,7 +624,7 @@ int hc_fstat (HCFILE *fp, struct stat *buf) { /* check that the uncompressed size is known */ const xzfile_t *xfp = fp->xfp; - if (xfp->outSize != (UInt64)((Int64)-1)) + if (xfp->outSize != (UInt64) ((Int64) -1)) { buf->st_size = (off_t) xfp->outSize; } From 63cc905bbca0f40680af2cb3794e7b519804fce7 Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Thu, 2 Sep 2021 14:59:38 +0300 Subject: [PATCH 2/5] Refactor zip file reading --- src/filehandling.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/filehandling.c b/src/filehandling.c index f8a8452a3..999009ed9 100644 --- a/src/filehandling.c +++ b/src/filehandling.c @@ -412,10 +412,25 @@ size_t hc_fread (void *ptr, size_t size, size_t nmemb, HCFILE *fp) else if (fp->ufp) { u64 len = (u64) size * nmemb; - if (len == (unsigned) len) + u64 pos = 0; + + /* assume success */ + n = nmemb; + + do { - n = unzReadCurrentFile (fp->ufp, ptr, (unsigned) len); - } + unsigned chunk = (len > HCFILE_CHUNK_SIZE) ? HCFILE_CHUNK_SIZE : (unsigned) len; + int result = unzReadCurrentFile (fp->ufp, (unsigned char *) ptr + pos, chunk); + if (result < 0) return -1; + pos += (u64) result; + len -= (u64) result; + if (chunk != (unsigned) result) + { + /* partial read */ + n = pos / size; + break; + } + } while (len); } else if (fp->xfp) { From e6c89040fef0372940918e95a71ad95b3e1be280 Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Thu, 2 Sep 2021 15:16:19 +0300 Subject: [PATCH 3/5] Refactor xz file reading --- src/filehandling.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/filehandling.c b/src/filehandling.c index 999009ed9..2b6c09897 100644 --- a/src/filehandling.c +++ b/src/filehandling.c @@ -356,7 +356,7 @@ bool hc_fopen_raw (HCFILE *fp, const char *path, const char *mode) size_t hc_fread (void *ptr, size_t size, size_t nmemb, HCFILE *fp) { - size_t n = -1; + size_t n = (size_t) -1; if (ptr == NULL || fp == NULL) return n; @@ -421,7 +421,7 @@ size_t hc_fread (void *ptr, size_t size, size_t nmemb, HCFILE *fp) { unsigned chunk = (len > HCFILE_CHUNK_SIZE) ? HCFILE_CHUNK_SIZE : (unsigned) len; int result = unzReadCurrentFile (fp->ufp, (unsigned char *) ptr + pos, chunk); - if (result < 0) return -1; + if (result < 0) return (size_t) -1; pos += (u64) result; len -= (u64) result; if (chunk != (unsigned) result) @@ -440,6 +440,9 @@ size_t hc_fread (void *ptr, size_t size, size_t nmemb, HCFILE *fp) SRes res = SZ_OK; xzfile_t *xfp = fp->xfp; + /* assume success */ + n = nmemb; + do { /* fill buffer if needed */ @@ -458,13 +461,16 @@ size_t hc_fread (void *ptr, size_t size, size_t nmemb, HCFILE *fp) res = XzUnpacker_Code (&xfp->state, outBuf + outPos, &outLeft, xfp->inBuf + xfp->inPos, &inLeft, inLeft == 0, CODER_FINISH_ANY, &status); xfp->inPos += inLeft; xfp->inProcessed += inLeft; - if (res != SZ_OK) return -1; - if (inLeft == 0 && outLeft == 0) break; + if (res != SZ_OK) return (size_t) -1; + if (inLeft == 0 && outLeft == 0) + { + /* partial read */ + n = (size_t) (outPos / size); + break; + } outPos += outLeft; xfp->outProcessed += outLeft; } while (outPos < outLen); - - n = outPos; } return n; From 592b28f65f0873f75d91fd800acd03a7b79b1a02 Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Thu, 2 Sep 2021 15:57:22 +0300 Subject: [PATCH 4/5] Add tests for 32 bit build --- src/filehandling.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/filehandling.c b/src/filehandling.c index 2b6c09897..c890129c3 100644 --- a/src/filehandling.c +++ b/src/filehandling.c @@ -369,10 +369,7 @@ size_t hc_fread (void *ptr, size_t size, size_t nmemb, HCFILE *fp) #ifndef _WIN64 /* check 2 GB limit with 32 bit build */ - if (len >= INT32_MAX) - { - return n; - } + if (len >= INT32_MAX) return n; #endif if (len <= HCFILE_CHUNK_SIZE) @@ -414,6 +411,11 @@ size_t hc_fread (void *ptr, size_t size, size_t nmemb, HCFILE *fp) u64 len = (u64) size * nmemb; u64 pos = 0; + #if defined(_WIN) && !defined(_WIN64) + /* check 2 GB limit with 32 bit build */ + if (len >= INT32_MAX) return n; + #endif + /* assume success */ n = nmemb; @@ -440,6 +442,11 @@ size_t hc_fread (void *ptr, size_t size, size_t nmemb, HCFILE *fp) SRes res = SZ_OK; xzfile_t *xfp = fp->xfp; + #if defined(_WIN) && !defined(_WIN64) + /* check 2 GB limit with 32 bit build */ + if (outLen >= INT32_MAX) return n; + #endif + /* assume success */ n = nmemb; From ac9d2241c239b42c2876d6f095aaa8838838a33c Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Thu, 2 Sep 2021 16:03:25 +0300 Subject: [PATCH 5/5] Increase unzReadCurrentFile() chunk size to INT_MAX --- src/filehandling.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/filehandling.c b/src/filehandling.c index c890129c3..7c9fffdd1 100644 --- a/src/filehandling.c +++ b/src/filehandling.c @@ -421,7 +421,7 @@ size_t hc_fread (void *ptr, size_t size, size_t nmemb, HCFILE *fp) do { - unsigned chunk = (len > HCFILE_CHUNK_SIZE) ? HCFILE_CHUNK_SIZE : (unsigned) len; + unsigned chunk = (len > INT_MAX) ? INT_MAX : (unsigned) len; int result = unzReadCurrentFile (fp->ufp, (unsigned char *) ptr + pos, chunk); if (result < 0) return (size_t) -1; pos += (u64) result;