mirror of
https://github.com/pi-hole/pi-hole
synced 2024-12-22 14:58:08 +00:00
Improve non-standard list parsing
* Add 504 status (Gateway connection timed out) * Add text for non-standard list parsing * Improve adblock parsing * Ensure adblock exception rules are removed from file * Ensure "www." is not treated as a URL-format list * Corrected typo * Ensure script does not fail if "-f" is used when there are no blocklists generated Signed off by WaLLy3K <wally3k@pi-hole.net>
This commit is contained in:
parent
c957124fad
commit
d02bf258af
53
gravity.sh
53
gravity.sh
@ -217,6 +217,7 @@ gravity_Pull() {
|
|||||||
"408") echo -e "${OVER} ${CROSS} ${str} Time-out";;
|
"408") echo -e "${OVER} ${CROSS} ${str} Time-out";;
|
||||||
"451") echo -e "${OVER} ${CROSS} ${str} Unavailable For Legal Reasons";;
|
"451") echo -e "${OVER} ${CROSS} ${str} Unavailable For Legal Reasons";;
|
||||||
"500") echo -e "${OVER} ${CROSS} ${str} Internal Server Error";;
|
"500") echo -e "${OVER} ${CROSS} ${str} Internal Server Error";;
|
||||||
|
"504") echo -e "${OVER} ${CROSS} ${str} Connection Timed Out (Gateway)";;
|
||||||
"521") echo -e "${OVER} ${CROSS} ${str} Web Server Is Down (Cloudflare)";;
|
"521") echo -e "${OVER} ${CROSS} ${str} Web Server Is Down (Cloudflare)";;
|
||||||
"522") echo -e "${OVER} ${CROSS} ${str} Connection Timed Out (Cloudflare)";;
|
"522") echo -e "${OVER} ${CROSS} ${str} Connection Timed Out (Cloudflare)";;
|
||||||
* ) echo -e "${OVER} ${CROSS} ${str} ${httpCode}";;
|
* ) echo -e "${OVER} ${CROSS} ${str} ${httpCode}";;
|
||||||
@ -286,6 +287,7 @@ gravity_ParseFileIntoDomains() {
|
|||||||
# Determine how to parse individual source file formats
|
# Determine how to parse individual source file formats
|
||||||
if [[ "${firstLine,,}" =~ (adblock|ublock|^!) ]]; then
|
if [[ "${firstLine,,}" =~ (adblock|ublock|^!) ]]; then
|
||||||
# Compare $firstLine against lower case words found in Adblock lists
|
# Compare $firstLine against lower case words found in Adblock lists
|
||||||
|
echo -ne " ${INFO} Format: Adblock"
|
||||||
|
|
||||||
# Define symbols used as comments: [!
|
# Define symbols used as comments: [!
|
||||||
# "||.*^" includes the "Example 2" domains we can extract
|
# "||.*^" includes the "Example 2" domains we can extract
|
||||||
@ -296,19 +298,42 @@ gravity_ParseFileIntoDomains() {
|
|||||||
# Logic: Ignore lines which do not include comments or domain name anchor
|
# Logic: Ignore lines which do not include comments or domain name anchor
|
||||||
awk ''"${abpFilter}"' {
|
awk ''"${abpFilter}"' {
|
||||||
# Remove valid adblock type options
|
# Remove valid adblock type options
|
||||||
gsub(/~?(important|third-party|popup|subdocument|websocket),?/, "", $0)
|
gsub(/\$?~?(important|third-party|popup|subdocument|websocket),?/, "", $0)
|
||||||
# Remove starting domain name anchor "||" and ending seperator "^$" ($ optional)
|
# Remove starting domain name anchor "||" and ending seperator "^"
|
||||||
gsub(/(\|\||\^\$?$)/, "", $0)
|
gsub(/^(\|\|)|(\^)/, "", $0)
|
||||||
# Remove lines which are only IPv4 addresses or contain "^/*"
|
# Remove invalid characters (*/,=$)
|
||||||
if($0 ~ /(^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$|[\\^\/\*])/) { $0="" }
|
if($0 ~ /[*\/,=\$]/) { $0="" }
|
||||||
|
# Remove lines which are only IPv4 addresses
|
||||||
|
if($0 ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/) { $0="" }
|
||||||
if($0) { print $0 }
|
if($0) { print $0 }
|
||||||
}' "${source}" 2> /dev/null > "${destination}"
|
}' "${source}" > "${destination}"
|
||||||
|
|
||||||
|
# Determine if there are Adblock exception rules
|
||||||
|
# https://adblockplus.org/filters
|
||||||
|
if grep -q "^@@||" "${source}" &> /dev/null; then
|
||||||
|
# Parse Adblock lists by extracting exception rules
|
||||||
|
# Logic: Ignore lines which do not include exception format "@@||example.com^"
|
||||||
|
awk -F "[|^]" '/^@@\|\|.*\^/ {
|
||||||
|
# Remove valid adblock type options
|
||||||
|
gsub(/\$?~?(third-party)/, "", $0)
|
||||||
|
# Remove invalid characters (*/,=$)
|
||||||
|
if($0 ~ /[*\/,=\$]/) { $0="" }
|
||||||
|
if($3) { print $3 }
|
||||||
|
}' "${source}" > "${destination}.exceptionsFile.tmp"
|
||||||
|
|
||||||
|
# Remove exceptions
|
||||||
|
grep -F -x -v -f "${destination}.exceptionsFile.tmp" "${destination}" > "${source}"
|
||||||
|
mv "${source}" "${destination}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -e "${OVER} ${TICK} Format: Adblock"
|
||||||
elif grep -q "^address=/" "${source}" &> /dev/null; then
|
elif grep -q "^address=/" "${source}" &> /dev/null; then
|
||||||
# Parse Dnsmasq format lists
|
# Parse Dnsmasq format lists
|
||||||
echo -e " ${CROSS} ${COL_BOLD}dnsmasq${COL_NC} format lists are not supported"
|
echo -e " ${CROSS} Format: Dnsmasq (list type not supported)"
|
||||||
elif grep -q -E "^(https?://|www\\.)" "${source}" &> /dev/null; then
|
elif grep -q -E "^https?://" "${source}" &> /dev/null; then
|
||||||
# Parse URL list if source file contains "http://" or "www."
|
# Parse URL list if source file contains "http://" or "https://"
|
||||||
# Scanning for "^IPv4$" is too slow with large (1M) lists on low-end hardware
|
# Scanning for "^IPv4$" is too slow with large (1M) lists on low-end hardware
|
||||||
|
echo -ne " ${INFO} Format: URL"
|
||||||
|
|
||||||
awk '{
|
awk '{
|
||||||
# Remove URL protocol, optional "username:password@", and ":?/;"
|
# Remove URL protocol, optional "username:password@", and ":?/;"
|
||||||
@ -317,6 +342,8 @@ gravity_ParseFileIntoDomains() {
|
|||||||
if ($0 ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/) { $0="" }
|
if ($0 ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/) { $0="" }
|
||||||
if ($0) { print $0 }
|
if ($0) { print $0 }
|
||||||
}' "${source}" 2> /dev/null > "${destination}"
|
}' "${source}" 2> /dev/null > "${destination}"
|
||||||
|
|
||||||
|
echo -e "${OVER} ${TICK} Format: URL"
|
||||||
else
|
else
|
||||||
# Default: Keep hosts/domains file in same format as it was downloaded
|
# Default: Keep hosts/domains file in same format as it was downloaded
|
||||||
output=$( { mv "${source}" "${destination}"; } 2>&1 )
|
output=$( { mv "${source}" "${destination}"; } 2>&1 )
|
||||||
@ -580,15 +607,11 @@ done
|
|||||||
gravity_Trap
|
gravity_Trap
|
||||||
|
|
||||||
if [[ "${forceDelete:-}" == true ]]; then
|
if [[ "${forceDelete:-}" == true ]]; then
|
||||||
str="Deleting exising list cache"
|
str="Deleting existing list cache"
|
||||||
echo -ne "${INFO} ${str}..."
|
echo -ne "${INFO} ${str}..."
|
||||||
|
|
||||||
if rm /etc/pihole/list.* 2> /dev/null; then
|
rm /etc/pihole/list.* 2> /dev/null || true
|
||||||
echo -e "${OVER} ${TICK} ${str}"
|
echo -e "${OVER} ${TICK} ${str}"
|
||||||
else
|
|
||||||
echo -e "${OVER} ${CROSS} ${str}"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Determine which functions to run
|
# Determine which functions to run
|
||||||
|
Loading…
Reference in New Issue
Block a user