From f390671018b46c9e60ca99bb6810ea43dfb6a9fc Mon Sep 17 00:00:00 2001 From: Riley Avron Date: Tue, 6 Mar 2018 21:48:12 -0800 Subject: [PATCH] Improve scheme matching to avoid inadvertent blocking Signed-off-by: Riley Avron --- gravity.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/gravity.sh b/gravity.sh index f1af8d9e..011d2f8c 100755 --- a/gravity.sh +++ b/gravity.sh @@ -346,12 +346,16 @@ gravity_ParseFileIntoDomains() { echo -ne " ${INFO} Format: URL" awk ' - # Remove URL protocol, optional "username:password@", and ":?/;" - /[:?\/;]/ { gsub(/(^.*:\/\/(.*:.*@)?|[:?\/;].*)/, "", $0) } + # Remove URL scheme, optional "username:password@", and ":?/;" + # The scheme must be matched carefully to avoid blocking the wrong URL + # in cases like: + # http://www.evil.com?http://www.good.com + # See RFC 3986 section 3.1 for details. + /[:?\/;]/ { gsub(/(^[a-zA-Z][a-zA-Z0-9+.-]*:\/\/(.*:.*@)?|[:?\/;].*)/, "", $0) } # Skip lines which are only IPv4 addresses /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/ { next } # Print if nonempty - length { print $0 } + length { print } ' "${source}" 2> /dev/null > "${destination}" echo -e "${OVER} ${TICK} Format: URL"