@ -244,7 +244,7 @@ database_adlist_number() {
return ;
fi
output = $( { printf ".timeout 30000\\nUPDATE adlist SET number = %i, invalid_domains = %i WHERE id = %i;\\n" " ${ num_ source_lines} " " ${ num_unusable } " " ${ 1 } " | pihole-FTL sqlite3 " ${ gravityDBfile } " ; } 2>& 1 )
output = $( { printf ".timeout 30000\\nUPDATE adlist SET number = %i, invalid_domains = %i WHERE id = %i;\\n" " ${ num_ domains} " " ${ num_non_domains } " " ${ 1 } " | pihole-FTL sqlite3 " ${ gravityDBfile } " ; } 2>& 1 )
status = " $? "
if [ [ " ${ status } " -ne 0 ] ] ; then
@ -519,12 +519,12 @@ gravity_DownloadBlocklists() {
gravity_Blackbody = true
}
# num_t arget_lines does increase for every correctly added domain in pareseList()
num_t arget_line s= 0
num_ source_line s= 0
num_ unusable = 0
# num_t otal_imported_domains increases for each list processed
num_t otal_imported_domain s= 0
num_ domain s= 0
num_ non_domains = 0
parseList( ) {
local adlistID = " ${ 1 } " src = " ${ 2 } " target = " ${ 3 } " unusable_lines sample_unusable_lines tmp_unusuable_line s_str false_positive
local adlistID = " ${ 1 } " src = " ${ 2 } " target = " ${ 3 } " non_domains sample_non_domains tmp_non_domain s_str false_positive
# This sed does the following things:
# 1. Remove all lines containing no domains
# 2. Remove all domains containing invalid characters. Valid are: a-z, A-Z, 0-9, dot (.), minus (-), underscore (_)
@ -535,9 +535,9 @@ parseList() {
# Find lines containing no domains or with invalid characters (see above)
# Remove duplicates from the list
mapfile -t unusable_line s <<< " $( sed -r "/([^\.]+\.)+[^\.]{2,}/d" < " ${ src } " ) "
mapfile -t -O " ${# unusable_lines[@] } " unusable_line s <<< " $( sed -r "/[^a-zA-Z0-9.\_-]/!d" < " ${ src } " ) "
IFS = " " read -r -a unusable_line s <<< " $( tr ' ' '\n' <<< " ${ unusable_line s[@] } " | sort -u | tr '\n' ' ' ) "
mapfile -t non_domain s <<< " $( sed -r "/([^\.]+\.)+[^\.]{2,}/d" < " ${ src } " ) "
mapfile -t -O " ${# non_domains[@] } " non_domain s <<< " $( sed -r "/[^a-zA-Z0-9.\_-]/!d" < " ${ src } " ) "
IFS = " " read -r -a non_domain s <<< " $( tr ' ' '\n' <<< " ${ non_domain s[@] } " | sort -u | tr '\n' ' ' ) "
# A list of items of common local hostnames not to report as unusable
# Some lists (i.e StevenBlack's) contain these as they are supposed to be used as HOST files
@ -559,37 +559,37 @@ parseList() {
)
# Read the unusable lines into a string
tmp_ unusuable_lines_str= " ${ unusable_line s[*] } "
tmp_ non_domains_str= " ${ non_domain s[*] } "
for false_positive in " ${ false_positives [@] } " ; do
# Remove false positives from tmp_ unusuable_line s_str
tmp_ unusuable_lines_str= " ${ tmp_unusuable_line s_str/ ${ false_positive } / } "
# Remove false positives from tmp_ non_domain s_str
tmp_ non_domains_str= " ${ tmp_non_domain s_str/ ${ false_positive } / } "
done
# Read the string back into an array
IFS = " " read -r -a unusable_lines <<< " ${ tmp_unusuable_line s_str} "
# Get a sample of the incorrect lin es, limited to 5 (the list should already have been de-duplicated)
IFS = " " read -r -a sample_unusable_line s <<< " $( tr ' ' '\n' <<< " ${ unusable_line s[@] } " | head -n 5 | tr '\n' ' ' ) "
local num_target_lines_new num_correct_lines
# Get number of lines in source file
num_source_lines= " $( grep -c "^" " ${ src } " ) "
# Get the new number of lines in destination file
num_ target_lines_new= " $( grep -c "^" " ${ target } " )"
# Number of new correctly added lines
num_ correct_lines= " $(( num_target_lines_new-num_target_lines )) "
# Update number of lines in target file
num_ target_lines= " $num_target_lines_new "
num_unusable = " ${# unusable_lines [@] } "
if [ [ " ${ num_ unusable } " -ne 0 ] ] ; then
echo " ${ INFO } Imported ${ num_ correct_lines} domains, ignoring ${ num_unusable } non-domain entries "
IFS = " " read -r -a non_domains <<< " ${ tmp_non_domain s_str} "
# Get a sample of non-domain entri es, limited to 5 (the list should already have been de-duplicated)
IFS = " " read -r -a sample_non_domain s <<< " $( tr ' ' '\n' <<< " ${ non_domain s[@] } " | head -n 5 | tr '\n' ' ' ) "
local tmp_new_imported_total
# Get the new number of domains in destination file
tmp_new_imported_total= " $( grep -c "^" " ${ target } " ) "
# Number of imported lines for this file is the difference between the new total and the old total. (Or, the number of domains we just added.)
num_ domains= " $(( tmp_new_imported_total-num_total_imported_domains ) )"
# Replace the running total with the new total.
num_ total_imported_domains= " $tmp_new_imported_total "
# Get the number of non_domains (this is the number of entries left after stripping the source of comments/duplicates/false positives/domains)
num_ non_domains= " ${# non_domains [@] } "
# If there are unusable lines, we display some information about them. This is not error or major cause for concern.
if [ [ " ${ num_ non_domains } " -ne 0 ] ] ; then
echo " ${ INFO } Imported ${ num_ domains} domains, ignoring ${ num_non_domains } non-domain entries "
echo " Sample of non-domain entries:"
for each in " ${ sample_ unusable_line s[@] } "
for each in " ${ sample_ non_domain s[@] } "
do
echo " - ${ each } "
done
else
echo " ${ INFO } Imported ${ num_ correct_line s} domains "
echo " ${ INFO } Imported ${ num_ domain s} domains "
fi
}
@ -745,8 +745,8 @@ gravity_DownloadBlocklistFromUrl() {
else
echo -e " ${ CROSS } List download failed: ${ COL_LIGHT_RED } no cached list available ${ COL_NC } "
# Manually reset these two numbers because we do not call parseList here
num_ source_line s= 0
num_ unusable = 0
num_ domain s= 0
num_ non_domains = 0
database_adlist_number " ${ adlistID } "
database_adlist_status " ${ adlistID } " "4"
fi