mirror of
https://github.com/pi-hole/pi-hole
synced 2025-01-03 04:30:55 +00:00
Improve detection of incorrect lines during gravity (#5009)
This commit is contained in:
commit
955e36a955
34
gravity.sh
34
gravity.sh
@ -524,15 +524,20 @@ num_target_lines=0
|
|||||||
num_source_lines=0
|
num_source_lines=0
|
||||||
num_invalid=0
|
num_invalid=0
|
||||||
parseList() {
|
parseList() {
|
||||||
local adlistID="${1}" src="${2}" target="${3}" incorrect_lines
|
local adlistID="${1}" src="${2}" target="${3}" incorrect_lines sample_incorrect_lines
|
||||||
# This sed does the following things:
|
# This sed does the following things:
|
||||||
# 1. Remove all domains containing invalid characters. Valid are: a-z, A-Z, 0-9, dot (.), minus (-), underscore (_)
|
# 1. Remove all lines containing no domains
|
||||||
# 2. Append ,adlistID to every line
|
# 2. Remove all domains containing invalid characters. Valid are: a-z, A-Z, 0-9, dot (.), minus (-), underscore (_)
|
||||||
# 3. Remove trailing period (see https://github.com/pi-hole/pi-hole/issues/4701)
|
# 3. Append ,adlistID to every line
|
||||||
# 4. Ensures there is a newline on the last line
|
# 4. Remove trailing period (see https://github.com/pi-hole/pi-hole/issues/4701)
|
||||||
sed -e "/[^a-zA-Z0-9.\_-]/d;s/\.$//;s/$/,${adlistID}/;/.$/a\\" "${src}" >> "${target}"
|
# 5. Ensures there is a newline on the last line
|
||||||
# Find (up to) five domains containing invalid characters (see above)
|
sed -r "/([^\.]+\.)+[^\.]{2,}/!d;/[^a-zA-Z0-9.\_-]/d;s/\.$//;s/$/,${adlistID}/;/.$/a\\" "${src}" >> "${target}"
|
||||||
incorrect_lines="$(sed -e "/[^a-zA-Z0-9.\_-]/!d" "${src}" | head -n 5)"
|
|
||||||
|
# Find lines containing no domains or with invalid characters (see above)
|
||||||
|
# Remove duplicates and limit to 5 domains
|
||||||
|
mapfile -t incorrect_lines <<< "$(sed -r "/([^\.]+\.)+[^\.]{2,}/d" < "${src}")"
|
||||||
|
mapfile -t -O "${#incorrect_lines[@]}" incorrect_lines <<< "$(sed -r "/[^a-zA-Z0-9.\_-]/!d" < "${src}")"
|
||||||
|
IFS=" " read -r -a sample_incorrect_lines <<< "$(tr ' ' '\n' <<< "${incorrect_lines[@]}" | sort -u | head -n 5| tr '\n' ' ')"
|
||||||
|
|
||||||
local num_target_lines_new num_correct_lines
|
local num_target_lines_new num_correct_lines
|
||||||
# Get number of lines in source file
|
# Get number of lines in source file
|
||||||
@ -551,11 +556,12 @@ parseList() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# Display sample of invalid lines if we found some
|
# Display sample of invalid lines if we found some
|
||||||
if [[ -n "${incorrect_lines}" ]]; then
|
if [ ${#sample_incorrect_lines[@]} -ne 0 ]; then
|
||||||
echo " Sample of invalid domains:"
|
echo " Sample of invalid domains:"
|
||||||
while IFS= read -r line; do
|
for each in "${sample_incorrect_lines[@]}"
|
||||||
echo " - ${line}"
|
do
|
||||||
done <<< "${incorrect_lines}"
|
echo " - ${each}"
|
||||||
|
done
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
compareLists() {
|
compareLists() {
|
||||||
@ -731,13 +737,13 @@ gravity_ParseFileIntoDomains() {
|
|||||||
# 3) Remove comments (text starting with "#", include possible spaces before the hash sign)
|
# 3) Remove comments (text starting with "#", include possible spaces before the hash sign)
|
||||||
# 4) Remove lines containing "/"
|
# 4) Remove lines containing "/"
|
||||||
# 5) Remove leading tabs, spaces, etc.
|
# 5) Remove leading tabs, spaces, etc.
|
||||||
# 6) Delete lines not matching domain names
|
# 6) Remove empty lines
|
||||||
< "${src}" tr -d '\r' | \
|
< "${src}" tr -d '\r' | \
|
||||||
tr '[:upper:]' '[:lower:]' | \
|
tr '[:upper:]' '[:lower:]' | \
|
||||||
sed 's/\s*#.*//g' | \
|
sed 's/\s*#.*//g' | \
|
||||||
sed -r '/(\/).*$/d' | \
|
sed -r '/(\/).*$/d' | \
|
||||||
sed -r 's/^.*\s+//g' | \
|
sed -r 's/^.*\s+//g' | \
|
||||||
sed -r '/([^\.]+\.)+[^\.]{2,}/!d' > "${destination}"
|
sed '/^$/d'> "${destination}"
|
||||||
chmod 644 "${destination}"
|
chmod 644 "${destination}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user