From ca74152d1d5a5fc179314458a0a96a1843156f0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 16 Feb 2023 23:11:00 +0100 Subject: [PATCH 1/9] Allow adding ABP style blocklists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adam Warner Signed-off-by: Christian König --- gravity.sh | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/gravity.sh b/gravity.sh index 28c28a9e..d3bb4a07 100755 --- a/gravity.sh +++ b/gravity.sh @@ -519,25 +519,31 @@ gravity_DownloadBlocklists() { gravity_Blackbody=true } -# num_total_imported_domains increases for each list processed -num_total_imported_domains=0 -num_domains=0 -num_non_domains=0 parseList() { - local adlistID="${1}" src="${2}" target="${3}" non_domains sample_non_domains - # This sed does the following things: - # 1. Remove all lines containing no domains - # 2. Remove all domains containing invalid characters. Valid are: a-z, A-Z, 0-9, dot (.), minus (-), underscore (_) - # 3. Append ,adlistID to every line - # 4. Remove trailing period (see https://github.com/pi-hole/pi-hole/issues/4701) - # 5. Ensures there is a newline on the last line - sed -r "/([^\.]+\.)+[^\.]{2,}/!d;/[^a-zA-Z0-9.\_-]/d;s/\.$//;s/$/,${adlistID}/;/.$/a\\" "${src}" >> "${target}" + local adlistID="${1}" src="${2}" target="${3}" temp_file non_domains sample_non_domains + + # Create a temporary file for the sed magic instead of using "${target}" directly + # this allows to split the sed commands to improve readability + temp_file="$(mktemp -p "/tmp" --suffix=".gravity")" + + # 1. Add all valid domains (adapted from https://stackoverflow.com/a/30007882) + # no need to include uppercase letters, as we convert to lowercase in gravity_ParseFileIntoDomains() already + sed -r "/^([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]$/!d" "${src}" > "${temp_file}" + # 2. Add all supported ABP style lines (||subdomain.domain.tlp^) + sed -r "/^\|\|([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]\^$/!d" "${src}" >> "${temp_file}" - # Find lines containing no domains or with invalid characters (see above) + # Find lines containing no domains or with invalid characters (not matching regex above) + # This is simply everything that is not in $temp_file compared to $src # Remove duplicates from the list - mapfile -t non_domains <<< "$(sed -r "/([^\.]+\.)+[^\.]{2,}/d" < "${src}")" - mapfile -t -O "${#non_domains[@]}" non_domains <<< "$(sed -r "/[^a-zA-Z0-9.\_-]/!d" < "${src}")" - IFS=" " read -r -a non_domains <<< "$(tr ' ' '\n' <<< "${non_domains[@]}" | sort -u | tr '\n' ' ')" + mapfile -t non_domains < <(grep -Fvf "${temp_file}" "${src}" | sort -u ) + + # 3. Remove trailing period (see https://github.com/pi-hole/pi-hole/issues/4701) + # 4. Append ,adlistID to every line + # 5. Ensures there is a newline on the last line + sed -i "s/\.$//;s/$/,${adlistID}/;/.$/a\\" "${temp_file}" + + # concatenate the temporary file to the target file + cat "${temp_file}" >> "${target}" # A list of items of common local hostnames not to report as unusable # Some lists (i.e StevenBlack's) contain these as they are supposed to be used as HOST files @@ -553,13 +559,8 @@ parseList() { # Get a sample of non-domain entries, limited to 5 (the list should already have been de-duplicated) IFS=" " read -r -a sample_non_domains <<< "$(tr ' ' '\n' <<< "${non_domains[@]}" | head -n 5 | tr '\n' ' ')" - local tmp_new_imported_total - # Get the new number of domains in destination file - tmp_new_imported_total="$(grep -c "^" "${target}")" - # Number of imported lines for this file is the difference between the new total and the old total. (Or, the number of domains we just added.) - num_domains="$(( tmp_new_imported_total-num_total_imported_domains ))" - # Replace the running total with the new total. - num_total_imported_domains="$tmp_new_imported_total" + # Get the number of domains added + num_domains="$(grep -c "^" "${temp_file}")" # Get the number of non_domains (this is the number of entries left after stripping the source of comments/duplicates/false positives/domains) num_non_domains="${#non_domains[@]}" From 1f3f8491065949f38faac75879764f7edf9ce94e Mon Sep 17 00:00:00 2001 From: Adam Warner Date: Sun, 19 Feb 2023 17:47:10 +0000 Subject: [PATCH 2/9] Remove lines starting with ! or [ to account for ABP style comments and header Also splits the piped "one-liner" in ParseFileIntoDomains into individually commented commands (makes for easier reading and debugging) Signed-off-by: Adam Warner --- gravity.sh | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/gravity.sh b/gravity.sh index d3bb4a07..fb7f42f1 100755 --- a/gravity.sh +++ b/gravity.sh @@ -745,18 +745,30 @@ gravity_ParseFileIntoDomains() { # Most of the lists downloaded are already in hosts file format but the spacing/formatting is not contiguous # This helps with that and makes it easier to read # It also helps with debugging so each stage of the script can be researched more in depth - # 1) Remove carriage returns - # 2) Convert all characters to lowercase - # 3) Remove comments (text starting with "#", include possible spaces before the hash sign) + # 1) Convert all characters to lowercase + tr '[:upper:]' '[:lower:]' < "${src}" > "${destination}" + + # 2) Remove carriage returns + sed -i 's/\r$//' "${destination}" + + # 3a) Remove comments (text starting with "#", include possible spaces before the hash sign) + sed -i 's/\s*#.*//g' "${destination}" + + # 3b) Remove lines starting with ! (ABP Comments) + sed -i 's/\s*!.*//g' "${destination}" + + # 3c) Remove lines starting with [ (ABP Header) + sed -i 's/\s*\[.*//g' "${destination}" + # 4) Remove lines containing "/" - # 5) Remove leading tabs, spaces, etc. + sed -i -r '/(\/).*$/d' "${destination}" + + # 5) Remove leading tabs, spaces, etc. (Also removes leading IP addresses) + sed -i -r 's/^.*\s+//g' "${destination}" + # 6) Remove empty lines - < "${src}" tr -d '\r' | \ - tr '[:upper:]' '[:lower:]' | \ - sed 's/\s*#.*//g' | \ - sed -r '/(\/).*$/d' | \ - sed -r 's/^.*\s+//g' | \ - sed '/^$/d'> "${destination}" + sed -i '/^$/d' "${destination}" + chmod 644 "${destination}" } From c5faf3d1744f100bbbfc800702c70ffc79eda02e Mon Sep 17 00:00:00 2001 From: Adam Warner Date: Sun, 19 Feb 2023 18:12:03 +0000 Subject: [PATCH 3/9] Use ',' as the separator char in query rather than the default '|' as we now expect some valid results to contain '|' Signed-off-by: Adam Warner --- advanced/Scripts/query.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/advanced/Scripts/query.sh b/advanced/Scripts/query.sh index d48e9363..8717d328 100755 --- a/advanced/Scripts/query.sh +++ b/advanced/Scripts/query.sh @@ -116,7 +116,7 @@ scanDatabaseTable() { fi # Send prepared query to gravity database - result="$(pihole-FTL sqlite3 "${gravityDBfile}" "${querystr}")" 2> /dev/null + result="$(pihole-FTL sqlite3 -separator ',' "${gravityDBfile}" "${querystr}")" 2> /dev/null if [[ -z "${result}" ]]; then # Return early when there are no matches in this table return @@ -136,8 +136,8 @@ scanDatabaseTable() { # Loop over results and print them mapfile -t results <<< "${result}" for result in "${results[@]}"; do - domain="${result/|*}" - if [[ "${result#*|}" == "0" ]]; then + domain="${result/,*}" + if [[ "${result#*,}" == "0" ]]; then extra=" (disabled)" else extra="" @@ -212,10 +212,10 @@ if [[ -n "${exact}" ]]; then fi for result in "${results[@]}"; do - match="${result/|*/}" - extra="${result#*|}" - adlistAddress="${extra/|*/}" - extra="${extra#*|}" + match="${result/,*/}" + extra="${result#*,}" + adlistAddress="${extra/,*/}" + extra="${extra#*,}" if [[ "${extra}" == "0" ]]; then extra=" (disabled)" else From eedd93d7823ace35e2915e83a6dcc86b13abfd68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 22 Feb 2023 21:14:27 +0100 Subject: [PATCH 4/9] Remove temporary files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König --- gravity.sh | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/gravity.sh b/gravity.sh index fb7f42f1..57959b25 100755 --- a/gravity.sh +++ b/gravity.sh @@ -524,7 +524,13 @@ parseList() { # Create a temporary file for the sed magic instead of using "${target}" directly # this allows to split the sed commands to improve readability + # we use a file handle here and remove the temporary file immediately so the content will be deleted in any case + # when the script stops temp_file="$(mktemp -p "/tmp" --suffix=".gravity")" + exec 3>"$temp_file" + rm "${temp_file}" + temp_file="/proc/$$/fd/3" + # 1. Add all valid domains (adapted from https://stackoverflow.com/a/30007882) # no need to include uppercase letters, as we convert to lowercase in gravity_ParseFileIntoDomains() already @@ -540,10 +546,8 @@ parseList() { # 3. Remove trailing period (see https://github.com/pi-hole/pi-hole/issues/4701) # 4. Append ,adlistID to every line # 5. Ensures there is a newline on the last line - sed -i "s/\.$//;s/$/,${adlistID}/;/.$/a\\" "${temp_file}" - - # concatenate the temporary file to the target file - cat "${temp_file}" >> "${target}" + # and write everything to the target file + sed "s/\.$//;s/$/,${adlistID}/;/.$/a\\" "${temp_file}" >> "${target}" # A list of items of common local hostnames not to report as unusable # Some lists (i.e StevenBlack's) contain these as they are supposed to be used as HOST files @@ -575,6 +579,9 @@ parseList() { else echo " ${INFO} Imported ${num_domains} domains" fi + + # close file handle + exec 3<&- } compareLists() { From 821c7dc190cdabc28f024e4688bff3670f028731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 24 Feb 2023 22:18:41 +0100 Subject: [PATCH 5/9] Add info when list cotains ABP style domains MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König --- gravity.sh | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/gravity.sh b/gravity.sh index 57959b25..451a40fc 100755 --- a/gravity.sh +++ b/gravity.sh @@ -520,23 +520,29 @@ gravity_DownloadBlocklists() { } parseList() { - local adlistID="${1}" src="${2}" target="${3}" temp_file non_domains sample_non_domains + local adlistID="${1}" src="${2}" target="${3}" temp_file temp_file_base non_domains sample_non_domains # Create a temporary file for the sed magic instead of using "${target}" directly # this allows to split the sed commands to improve readability # we use a file handle here and remove the temporary file immediately so the content will be deleted in any case # when the script stops - temp_file="$(mktemp -p "/tmp" --suffix=".gravity")" - exec 3>"$temp_file" - rm "${temp_file}" + temp_file_base="$(mktemp -p "/tmp" --suffix=".gravity")" + exec 3>"$temp_file_base" + rm "${temp_file_base}" temp_file="/proc/$$/fd/3" # 1. Add all valid domains (adapted from https://stackoverflow.com/a/30007882) # no need to include uppercase letters, as we convert to lowercase in gravity_ParseFileIntoDomains() already sed -r "/^([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]$/!d" "${src}" > "${temp_file}" - # 2. Add all supported ABP style lines (||subdomain.domain.tlp^) - sed -r "/^\|\|([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]\^$/!d" "${src}" >> "${temp_file}" + + # if there is at least one ABP style domains + if grep -E "^\|\|([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]" -m 1 -q "${src}"; then + echo " ${INFO} List contained AdBlock Plus style domains" + # 2. Add all supported ABP style lines (||subdomain.domain.tlp^) + sed -r "/^\|\|([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]\^$/!d" "${src}" >> "${temp_file}" + fi + # Find lines containing no domains or with invalid characters (not matching regex above) # This is simply everything that is not in $temp_file compared to $src From 6cb0be82caca4ca7b642a292f3d5a16ab1b47bcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sun, 26 Feb 2023 10:34:17 +0100 Subject: [PATCH 6/9] Add flag abp_domains into info table to signal if abp domains have been found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König --- gravity.sh | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/gravity.sh b/gravity.sh index 451a40fc..39ac2ded 100755 --- a/gravity.sh +++ b/gravity.sh @@ -137,6 +137,18 @@ update_gravity_timestamp() { return 0 } +# Update timestamp when the gravity table was last updated successfully +set_abp_info() { + pihole-FTL sqlite3 "${gravityDBfile}" "INSERT OR REPLACE INTO info (property,value) VALUES ('abp_domains',${abp_domains});" + status="$?" + + if [[ "${status}" -ne 0 ]]; then + echo -e "\\n ${CROSS} Unable to update ABP domain status in database ${gravityDBfile}\\n ${output}" + return 1 + fi + return 0 +} + # Import domains from file and store them in the specified database table database_table_from_file() { # Define locals @@ -519,6 +531,10 @@ gravity_DownloadBlocklists() { gravity_Blackbody=true } + +# global variable to indicate if we found ABP style domains during the gravity run +# is saved in gravtiy's info table to signal FTL if such domains are available +abp_domains=0 parseList() { local adlistID="${1}" src="${2}" target="${3}" temp_file temp_file_base non_domains sample_non_domains @@ -536,9 +552,10 @@ parseList() { # no need to include uppercase letters, as we convert to lowercase in gravity_ParseFileIntoDomains() already sed -r "/^([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]$/!d" "${src}" > "${temp_file}" - # if there is at least one ABP style domains + # if there is at least one ABP style domain if grep -E "^\|\|([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]" -m 1 -q "${src}"; then echo " ${INFO} List contained AdBlock Plus style domains" + abp_domains=1 # 2. Add all supported ABP style lines (||subdomain.domain.tlp^) sed -r "/^\|\|([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]\^$/!d" "${src}" >> "${temp_file}" fi @@ -1014,6 +1031,9 @@ fi # Update gravity timestamp update_gravity_timestamp +# Set abp_domain info field +set_abp_info + # Ensure proper permissions are set for the database chown pihole:pihole "${gravityDBfile}" chmod g+w "${piholeDir}" "${gravityDBfile}" From 16385af3ef473e4f148224bfc735e2582e666a3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sun, 26 Feb 2023 21:16:45 +0100 Subject: [PATCH 7/9] Use dedicated pattern variable to make RegEx reusable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König --- gravity.sh | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/gravity.sh b/gravity.sh index 39ac2ded..8db728a4 100755 --- a/gravity.sh +++ b/gravity.sh @@ -536,7 +536,7 @@ gravity_DownloadBlocklists() { # is saved in gravtiy's info table to signal FTL if such domains are available abp_domains=0 parseList() { - local adlistID="${1}" src="${2}" target="${3}" temp_file temp_file_base non_domains sample_non_domains + local adlistID="${1}" src="${2}" target="${3}" temp_file temp_file_base non_domains sample_non_domains valid_domain_pattern abp_domain_pattern # Create a temporary file for the sed magic instead of using "${target}" directly # this allows to split the sed commands to improve readability @@ -547,17 +547,23 @@ parseList() { rm "${temp_file_base}" temp_file="/proc/$$/fd/3" - - # 1. Add all valid domains (adapted from https://stackoverflow.com/a/30007882) + # define valid domain patterns # no need to include uppercase letters, as we convert to lowercase in gravity_ParseFileIntoDomains() already - sed -r "/^([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]$/!d" "${src}" > "${temp_file}" + # adapted from https://stackoverflow.com/a/30007882 + # supported ABP style: ||subdomain.domain.tlp^ + + valid_domain_pattern="([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]" + abp_domain_pattern="\|\|${valid_domain_pattern}\^" + + + # 1. Add all valid domains + sed -r "/^${valid_domain_pattern}$/!d" "${src}" > "${temp_file}" - # if there is at least one ABP style domain - if grep -E "^\|\|([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]" -m 1 -q "${src}"; then + # 2. Add valid ABP style domains if there is at least one such domain + if grep -E "^${abp_domain_pattern}$" -m 1 -q "${src}"; then echo " ${INFO} List contained AdBlock Plus style domains" abp_domains=1 - # 2. Add all supported ABP style lines (||subdomain.domain.tlp^) - sed -r "/^\|\|([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]\^$/!d" "${src}" >> "${temp_file}" + sed -r "/^${abp_domain_pattern}$/!d" "${src}" >> "${temp_file}" fi From 0b5da9f0dabccaf902aada60527c3a4b5855b925 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sat, 4 Mar 2023 10:36:07 +0100 Subject: [PATCH 8/9] Allow final dot (root zone) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König --- gravity.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gravity.sh b/gravity.sh index 8db728a4..281d5d54 100755 --- a/gravity.sh +++ b/gravity.sh @@ -552,7 +552,7 @@ parseList() { # adapted from https://stackoverflow.com/a/30007882 # supported ABP style: ||subdomain.domain.tlp^ - valid_domain_pattern="([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]" + valid_domain_pattern="([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]\.?" abp_domain_pattern="\|\|${valid_domain_pattern}\^" From 71e262c37f5582d54e5c7d9f05b4f61533ca81a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 14 Mar 2023 19:42:05 +0100 Subject: [PATCH 9/9] Revert "Allow final dot (root zone)" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 0b5da9f0dabccaf902aada60527c3a4b5855b925. Signed-off-by: Christian König --- advanced/Scripts/piholeDebug.sh | 2 +- gravity.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/advanced/Scripts/piholeDebug.sh b/advanced/Scripts/piholeDebug.sh index ad25e866..fa1cebbb 100755 --- a/advanced/Scripts/piholeDebug.sh +++ b/advanced/Scripts/piholeDebug.sh @@ -230,7 +230,7 @@ initialize_debug() { # This is a function for visually displaying the current test that is being run. # Accepts one variable: the name of what is being diagnosed -# Colors do not show in the dasboard, but the icons do: [i], [✓], and [✗] +# Colors do not show in the dashboard, but the icons do: [i], [✓], and [✗] echo_current_diagnostic() { # Colors are used for visually distinguishing each test in the output # These colors do not show in the GUI, but the formatting will diff --git a/gravity.sh b/gravity.sh index 281d5d54..8db728a4 100755 --- a/gravity.sh +++ b/gravity.sh @@ -552,7 +552,7 @@ parseList() { # adapted from https://stackoverflow.com/a/30007882 # supported ABP style: ||subdomain.domain.tlp^ - valid_domain_pattern="([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]\.?" + valid_domain_pattern="([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]" abp_domain_pattern="\|\|${valid_domain_pattern}\^"