diff --git a/gravity.sh b/gravity.sh index ca859e38..a926bacd 100755 --- a/gravity.sh +++ b/gravity.sh @@ -544,7 +544,10 @@ gravity_DownloadBlocklists() { # is saved in gravtiy's info table to signal FTL if such domains are available abp_domains=0 parseList() { - local adlistID="${1}" src="${2}" target="${3}" temp_file temp_file_base non_domains sample_non_domains valid_domain_pattern abp_domain_pattern + local adlistID="${1}" src="${2}" target="${3}" temp_file temp_file_base num_domains num_abp_domains valid_domain_pattern abp_domain_pattern + + # Print status message + echo -ne " ${INFO} Processing list..." # Create a temporary file for the sed magic instead of using "${target}" directly # this allows to split the sed commands to improve readability @@ -559,62 +562,35 @@ parseList() { # no need to include uppercase letters, as we convert to lowercase in gravity_ParseFileIntoDomains() already # adapted from https://stackoverflow.com/a/30007882 # supported ABP style: ||subdomain.domain.tlp^ - valid_domain_pattern="([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]" abp_domain_pattern="\|\|${valid_domain_pattern}\^" - - # 1. Add all valid domains - sed -r "/^${valid_domain_pattern}$/!d" "${src}" > "${temp_file}" + # 1. Add all valid domains to the temporary file and count them + sed -r "/^${valid_domain_pattern}$/!d" "${src}" > "${temp_file}" + num_domains=$(grep -c ^ "${temp_file}") # 2. Add valid ABP style domains if there is at least one such domain + num_abp_domains=0 if grep -E "^${abp_domain_pattern}$" -m 1 -q "${src}"; then echo " ${INFO} List contained AdBlock Plus style domains" abp_domains=1 sed -r "/^${abp_domain_pattern}$/!d" "${src}" >> "${temp_file}" + # count the number of ABP style domains + num_abp_domains=$(grep -c ^ "${temp_file}") + num_abp_domains=$((num_abp_domains-num_domains)) fi - - # Find lines containing no domains or with invalid characters (not matching regex above) - # This is simply everything that is not in $temp_file compared to $src - # Remove duplicates from the list - mapfile -t non_domains < <(grep -Fvf "${temp_file}" "${src}" | sort -u ) - # 3. Remove trailing period (see https://github.com/pi-hole/pi-hole/issues/4701) # 4. Append ,adlistID to every line # 5. Ensures there is a newline on the last line # and write everything to the target file sed "s/\.$//;s/$/,${adlistID}/;/.$/a\\" "${temp_file}" >> "${target}" - # A list of items of common local hostnames not to report as unusable - # Some lists (i.e StevenBlack's) contain these as they are supposed to be used as HOST files - # but flagging them as unusable causes more confusion than it's worth - so we suppress them from the output - false_positives="localhost|localhost.localdomain|local|broadcasthost|localhost|ip6-localhost|ip6-loopback|lo0 localhost|ip6-localnet|ip6-mcastprefix|ip6-allnodes|ip6-allrouters|ip6-allhosts" - - # if there are any non-domains, filter the array for false-positives - # Credit: https://stackoverflow.com/a/40264051 - if [[ "${#non_domains[@]}" -gt 0 ]]; then - mapfile -d $'\0' -t non_domains < <(printf '%s\0' "${non_domains[@]}" | grep -Ezv "^${false_positives}") - fi - - # Get a sample of non-domain entries, limited to 5 (the list should already have been de-duplicated) - IFS=" " read -r -a sample_non_domains <<< "$(tr ' ' '\n' <<< "${non_domains[@]}" | head -n 5 | tr '\n' ' ')" - - # Get the number of domains added - num_domains="$(grep -c "^" "${temp_file}")" - # Get the number of non_domains (this is the number of entries left after stripping the source of comments/duplicates/false positives/domains) - num_non_domains="${#non_domains[@]}" - # If there are unusable lines, we display some information about them. This is not error or major cause for concern. - if [[ "${num_non_domains}" -ne 0 ]]; then - echo " ${INFO} Imported ${num_domains} domains, ignoring ${num_non_domains} non-domain entries" - echo " Sample of non-domain entries:" - for each in "${sample_non_domains[@]}" - do - echo " - ${each}" - done + if [[ "${num_abp_domains}" -ne 0 ]]; then + echo -e "${OVER} ${INFO} Imported ${num_domains} exact and ${num_abp_domains} ABP-style domains" else - echo " ${INFO} Imported ${num_domains} domains" + echo -e "${OVER} ${INFO} Imported ${num_domains} exact domains" fi # close file handle @@ -761,6 +737,9 @@ gravity_DownloadBlocklistFromUrl() { fi fi + # Remove temporary file + rm -f "${patternBuffer}" + # Do we need to fall back to a cached list (if available)? if [[ "${done}" != "true" ]]; then # Determine if cached list has read permission @@ -868,7 +847,6 @@ gravity_Cleanup() { # Delete tmp content generated by Gravity rm ${piholeDir}/pihole.*.txt 2> /dev/null rm ${piholeDir}/*.tmp 2> /dev/null - rm "${GRAVITY_TMPDIR}"/*.phgpb 2> /dev/null # Ensure this function only runs when gravity_SetDownloadOptions() has completed if [[ "${gravity_Blackbody:-}" == true ]]; then