1
0
mirror of https://github.com/pi-hole/pi-hole synced 2025-01-22 22:10:56 +00:00

Merge pull request #2027 from ravron/ravron-urls-in-query-strings

Prevent inadvertent blocking of good domains appearing in query strings
This commit is contained in:
Dan Schaper 2018-04-16 17:31:51 -07:00 committed by GitHub
commit c9f3c02cb7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -345,13 +345,18 @@ gravity_ParseFileIntoDomains() {
# Scanning for "^IPv4$" is too slow with large (1M) lists on low-end hardware # Scanning for "^IPv4$" is too slow with large (1M) lists on low-end hardware
echo -ne " ${INFO} Format: URL" echo -ne " ${INFO} Format: URL"
awk '{ awk '
# Remove URL protocol, optional "username:password@", and ":?/;" # Remove URL scheme, optional "username:password@", and ":?/;"
if ($0 ~ /[:?\/;]/) { gsub(/(^.*:\/\/(.*:.*@)?|[:?\/;].*)/, "", $0) } # The scheme must be matched carefully to avoid blocking the wrong URL
# Remove lines which are only IPv4 addresses # in cases like:
if ($0 ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/) { $0="" } # http://www.evil.com?http://www.good.com
if ($0) { print $0 } # See RFC 3986 section 3.1 for details.
}' "${source}" 2> /dev/null > "${destination}" /[:?\/;]/ { gsub(/(^[a-zA-Z][a-zA-Z0-9+.-]*:\/\/(.*:.*@)?|[:?\/;].*)/, "", $0) }
# Skip lines which are only IPv4 addresses
/^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/ { next }
# Print if nonempty
length { print }
' "${source}" 2> /dev/null > "${destination}"
echo -e "${OVER} ${TICK} Format: URL" echo -e "${OVER} ${TICK} Format: URL"
else else