From cee021c358afc2ba98e412993bf2c52bf7bed715 Mon Sep 17 00:00:00 2001 From: DL6ER Date: Wed, 29 Jan 2025 16:58:23 +0100 Subject: [PATCH 1/6] Use HTTP ETags in addition to the If-Modified-Since header if supported by curl to avoid downloading from servers not using the latter method (raw.githubcontent.com) Signed-off-by: DL6ER --- gravity.sh | 52 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/gravity.sh b/gravity.sh index 68840b51..f34cec40 100755 --- a/gravity.sh +++ b/gravity.sh @@ -44,6 +44,7 @@ gravityDBcopy="${piholeGitDir}/advanced/Templates/gravity_copy.sql" domainsExtension="domains" curl_connect_timeout=10 +etag_support=false # Check gravity temp directory if [ ! -d "${GRAVITY_TMPDIR}" ] || [ ! -w "${GRAVITY_TMPDIR}" ]; then @@ -502,6 +503,20 @@ gravity_DownloadBlocklists() { compression="" echo -e " ${INFO} Libz compression not available\n" fi + + # Check if etag is supported by the locally available version of curl by + # comparing the version string being >= 7.68.0 (released Jan 2020) + # https://github.com/curl/curl/pull/4543 followed by + # https://github.com/curl/curl/pull/4678 + if curl -V | grep -q "curl 7\.[6-9][8-9]"; then + etag_support=true + else + # Check if the version is >= 8 + if curl -V | grep -q "curl 8"; then + etag_support=true + fi + fi + # Loop through $sources and download each one for ((i = 0; i < "${#sources[@]}"; i++)); do url="${sources[$i]}" @@ -595,12 +610,37 @@ gravity_DownloadBlocklistFromUrl() { mv "${listCurlBuffer}" "${listCurlBuffer%.*}.phgpb" listCurlBuffer="${listCurlBuffer%.*}.phgpb" - # Determine if $saveLocation has read permission - if [[ -r "${saveLocation}" && $url != "file"* ]]; then - # Have curl determine if a remote file has been modified since last retrieval - # Uses "Last-Modified" header, which certain web servers do not provide (e.g: raw github urls) - # Note: Don't do this for local files, always download them - heisenbergCompensator="-z ${saveLocation}" + # For all remote files, we try to determine if the file has changed to skip + # downloading them whenever possible. + if [[ $url != "file"* ]]; then + # Use the HTTP ETag header to determine if the file has changed if supported + # by curl. Using ETags is supported by raw.githubusercontent.com URLs. + if [[ "${etag_support}" == true ]]; then + # Save HTTP ETag to the specified file. An ETag is a caching related header, + # usually returned in a response. If no ETag is sent by the server, an empty + # file is created and can later be used consistently. + heisenbergCompensator="--etag-save ${saveLocation}.etag" + + if [[ -f "${saveLocation}.etag" ]]; then + # This option makes a conditional HTTP request for the specific ETag read + # from the given file by sending a custom If-None-Match header using the + # stored ETag. This way, the server will only send the file if it has + # changed since the last request. + heisenbergCompensator="${heisenbergCompensator} --etag-compare ${saveLocation}.etag" + fi + fi + + # Add If-Modified-Since header to the request if we did already download the + # file once + if [[ -f "${saveLocation}" ]]; then + # Request a file that has been modified later than the given time and + # date. We provide a file here which makes curl use the modification + # timestamp (mtime) of this file. + # Interstingly, this option is not supported by raw.githubusercontent.com + # URLs, however, it is still supported by many older web servers which may + # not support the HTTP ETag method so we keep it as a fallback. + heisenbergCompensator="${heisenbergCompensator} -z ${saveLocation}" + fi fi str="Status:" From 4322299c39e6657d9973e4c19b1fbf26865ecfba Mon Sep 17 00:00:00 2001 From: DL6ER Date: Wed, 29 Jan 2025 16:58:54 +0100 Subject: [PATCH 2/6] Drive-by fixing of errors reported by shellcheck for gravity.sh Signed-off-by: DL6ER --- gravity.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gravity.sh b/gravity.sh index f34cec40..c39a0f26 100755 --- a/gravity.sh +++ b/gravity.sh @@ -534,7 +534,7 @@ gravity_DownloadBlocklists() { # Save the file as list.#.domain saveLocation="${piholeDir}/list.${id}.${domain}.${domainsExtension}" - activeDomains[$i]="${saveLocation}" + activeDomains[i]="${saveLocation}" # Check if we can write to the save location file without actually creating # it (in case it doesn't exist) @@ -1055,7 +1055,7 @@ timeit(){ elapsed_time=$((end_time - start_time)) # Display the elapsed time - printf " %b--> took %d.%03d seconds%b\n" ${COL_BLUE} $((elapsed_time / 1000)) $((elapsed_time % 1000)) ${COL_NC} + printf " %b--> took %d.%03d seconds%b\n" "${COL_BLUE}" $((elapsed_time / 1000)) $((elapsed_time % 1000)) "${COL_NC}" return $ret } From 6e0b16ed1acbca2512e0f22ec02ddffef6ef619b Mon Sep 17 00:00:00 2001 From: DL6ER Date: Wed, 29 Jan 2025 19:17:04 +0100 Subject: [PATCH 3/6] Rename heisenbergCompensator -> modifiedOptions Signed-off-by: DL6ER --- gravity.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gravity.sh b/gravity.sh index c39a0f26..e9da1a97 100755 --- a/gravity.sh +++ b/gravity.sh @@ -601,7 +601,7 @@ compareLists() { # Download specified URL and perform checks on HTTP status and file content gravity_DownloadBlocklistFromUrl() { local url="${1}" adlistID="${2}" saveLocation="${3}" target="${4}" compression="${5}" gravity_type="${6}" domain="${7}" - local heisenbergCompensator="" listCurlBuffer str httpCode success="" ip cmd_ext + local modifiedOptions="" listCurlBuffer str httpCode success="" ip cmd_ext local file_path permissions ip_addr port blocked=false download=true # Create temp file to store content on disk instead of RAM @@ -619,14 +619,14 @@ gravity_DownloadBlocklistFromUrl() { # Save HTTP ETag to the specified file. An ETag is a caching related header, # usually returned in a response. If no ETag is sent by the server, an empty # file is created and can later be used consistently. - heisenbergCompensator="--etag-save ${saveLocation}.etag" + modifiedOptions="--etag-save ${saveLocation}.etag" if [[ -f "${saveLocation}.etag" ]]; then # This option makes a conditional HTTP request for the specific ETag read # from the given file by sending a custom If-None-Match header using the # stored ETag. This way, the server will only send the file if it has # changed since the last request. - heisenbergCompensator="${heisenbergCompensator} --etag-compare ${saveLocation}.etag" + modifiedOptions="${modifiedOptions} --etag-compare ${saveLocation}.etag" fi fi @@ -639,7 +639,7 @@ gravity_DownloadBlocklistFromUrl() { # Interstingly, this option is not supported by raw.githubusercontent.com # URLs, however, it is still supported by many older web servers which may # not support the HTTP ETag method so we keep it as a fallback. - heisenbergCompensator="${heisenbergCompensator} -z ${saveLocation}" + modifiedOptions="${modifiedOptions} -z ${saveLocation}" fi fi @@ -770,7 +770,7 @@ gravity_DownloadBlocklistFromUrl() { if [[ "${download}" == true ]]; then # shellcheck disable=SC2086 - httpCode=$(curl --connect-timeout ${curl_connect_timeout} -s -L ${compression} ${cmd_ext} ${heisenbergCompensator} -w "%{http_code}" "${url}" -o "${listCurlBuffer}" 2>/dev/null) + httpCode=$(curl --connect-timeout ${curl_connect_timeout} -s -L ${compression} ${cmd_ext} ${modifiedOptions} -w "%{http_code}" "${url}" -o "${listCurlBuffer}" 2>/dev/null) fi case $url in From f50c6962531db57d7621d8a140eb6efb0ed0ef54 Mon Sep 17 00:00:00 2001 From: DL6ER Date: Wed, 29 Jan 2025 19:19:01 +0100 Subject: [PATCH 4/6] Use temp variable for curl -V output Signed-off-by: DL6ER --- gravity.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/gravity.sh b/gravity.sh index e9da1a97..bc11867f 100755 --- a/gravity.sh +++ b/gravity.sh @@ -414,7 +414,7 @@ gravity_DownloadBlocklists() { echo -e " ${INFO} Storing gravity database in ${COL_BOLD}${gravityDBfile}${COL_NC}" fi - local url domain str target compression adlist_type directory + local url domain str target compression adlist_type directory curlVersion echo "" # Prepare new gravity database @@ -496,7 +496,8 @@ gravity_DownloadBlocklists() { # Use compression to reduce the amount of data that is transferred # between the Pi-hole and the ad list provider. Use this feature # only if it is supported by the locally available version of curl - if curl -V | grep -q "Features:.* libz"; then + curlVersion=$(curl -V) + if echo "${curlVersion}" | grep -q "Features:.* libz"; then compression="--compressed" echo -e " ${INFO} Using libz compression\n" else @@ -508,11 +509,11 @@ gravity_DownloadBlocklists() { # comparing the version string being >= 7.68.0 (released Jan 2020) # https://github.com/curl/curl/pull/4543 followed by # https://github.com/curl/curl/pull/4678 - if curl -V | grep -q "curl 7\.[6-9][8-9]"; then + if echo "${curlVersion}" | grep -q "curl 7\.[6-9][8-9]"; then etag_support=true else # Check if the version is >= 8 - if curl -V | grep -q "curl 8"; then + if echo "${curlVersion}" | grep -q "curl 8"; then etag_support=true fi fi From 37718c65e9efd348824948aec3dc29483b49ec55 Mon Sep 17 00:00:00 2001 From: DL6ER Date: Sat, 1 Feb 2025 19:36:25 +0100 Subject: [PATCH 5/6] Simplify cURL etag feature detection following discussion in #5873 Signed-off-by: DL6ER --- gravity.sh | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/gravity.sh b/gravity.sh index 276e59c2..f6d09ac5 100755 --- a/gravity.sh +++ b/gravity.sh @@ -507,17 +507,12 @@ gravity_DownloadBlocklists() { echo -e " ${INFO} Libz compression not available\n" fi - # Check if etag is supported by the locally available version of curl by - # comparing the version string being >= 7.68.0 (released Jan 2020) - # https://github.com/curl/curl/pull/4543 followed by + # Check if etag is supported by the locally available version of curl + # (available as of curl 7.68.0, released Jan 2020) + # https://github.com/curl/curl/pull/4543 + # https://github.com/curl/curl/pull/4678 - if echo "${curlVersion}" | grep -q "curl 7\.[6-9][8-9]"; then + if curl --help all | grep -q "etag-save"; then etag_support=true - else - # Check if the version is >= 8 - if echo "${curlVersion}" | grep -q "curl 8"; then - etag_support=true - fi fi # Loop through $sources and download each one From 7162be18e2585dfd14db65f2275d2bd6a1379af6 Mon Sep 17 00:00:00 2001 From: DL6ER Date: Sun, 2 Feb 2025 20:57:13 +0100 Subject: [PATCH 6/6] Remove curl -V temp variable Signed-off-by: DL6ER --- gravity.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gravity.sh b/gravity.sh index f6d09ac5..c01b2823 100755 --- a/gravity.sh +++ b/gravity.sh @@ -414,7 +414,7 @@ gravity_DownloadBlocklists() { echo -e " ${INFO} Storing gravity database in ${COL_BOLD}${gravityDBfile}${COL_NC}" fi - local url domain str target compression adlist_type directory curlVersion success + local url domain str target compression adlist_type directory success echo "" # Prepare new gravity database @@ -498,8 +498,7 @@ gravity_DownloadBlocklists() { # Use compression to reduce the amount of data that is transferred # between the Pi-hole and the ad list provider. Use this feature # only if it is supported by the locally available version of curl - curlVersion=$(curl -V) - if echo "${curlVersion}" | grep -q "Features:.* libz"; then + if curl -V | grep -q "Features:.* libz"; then compression="--compressed" echo -e " ${INFO} Using libz compression\n" else