From 512ec7fb8709aabb818a27d2d1390518c346eb6c Mon Sep 17 00:00:00 2001 From: Riley Avron Date: Tue, 6 Mar 2018 21:44:29 -0800 Subject: [PATCH 1/2] Remove unneeded ifs in awk Signed-off-by: Riley Avron --- gravity.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/gravity.sh b/gravity.sh index 395ea548..f1af8d9e 100755 --- a/gravity.sh +++ b/gravity.sh @@ -345,13 +345,14 @@ gravity_ParseFileIntoDomains() { # Scanning for "^IPv4$" is too slow with large (1M) lists on low-end hardware echo -ne " ${INFO} Format: URL" - awk '{ + awk ' # Remove URL protocol, optional "username:password@", and ":?/;" - if ($0 ~ /[:?\/;]/) { gsub(/(^.*:\/\/(.*:.*@)?|[:?\/;].*)/, "", $0) } - # Remove lines which are only IPv4 addresses - if ($0 ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/) { $0="" } - if ($0) { print $0 } - }' "${source}" 2> /dev/null > "${destination}" + /[:?\/;]/ { gsub(/(^.*:\/\/(.*:.*@)?|[:?\/;].*)/, "", $0) } + # Skip lines which are only IPv4 addresses + /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/ { next } + # Print if nonempty + length { print $0 } + ' "${source}" 2> /dev/null > "${destination}" echo -e "${OVER} ${TICK} Format: URL" else From f390671018b46c9e60ca99bb6810ea43dfb6a9fc Mon Sep 17 00:00:00 2001 From: Riley Avron Date: Tue, 6 Mar 2018 21:48:12 -0800 Subject: [PATCH 2/2] Improve scheme matching to avoid inadvertent blocking Signed-off-by: Riley Avron --- gravity.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/gravity.sh b/gravity.sh index f1af8d9e..011d2f8c 100755 --- a/gravity.sh +++ b/gravity.sh @@ -346,12 +346,16 @@ gravity_ParseFileIntoDomains() { echo -ne " ${INFO} Format: URL" awk ' - # Remove URL protocol, optional "username:password@", and ":?/;" - /[:?\/;]/ { gsub(/(^.*:\/\/(.*:.*@)?|[:?\/;].*)/, "", $0) } + # Remove URL scheme, optional "username:password@", and ":?/;" + # The scheme must be matched carefully to avoid blocking the wrong URL + # in cases like: + # http://www.evil.com?http://www.good.com + # See RFC 3986 section 3.1 for details. + /[:?\/;]/ { gsub(/(^[a-zA-Z][a-zA-Z0-9+.-]*:\/\/(.*:.*@)?|[:?\/;].*)/, "", $0) } # Skip lines which are only IPv4 addresses /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/ { next } # Print if nonempty - length { print $0 } + length { print } ' "${source}" 2> /dev/null > "${destination}" echo -e "${OVER} ${TICK} Format: URL"