diff --git a/gravity.sh b/gravity.sh index 75a51608..015764ac 100755 --- a/gravity.sh +++ b/gravity.sh @@ -335,14 +335,16 @@ gravity_ParseFileIntoDomains() { # Most of the lists downloaded are already in hosts file format but the spacing/formating is not contigious # This helps with that and makes it easier to read # It also helps with debugging so each stage of the script can be researched more in depth - # Awk -F splits on given IFS, we grab the right hand side (chops trailing #coments and /'s to grab the domain only. - # Last awk command takes non-commented lines and if they have 2 fields, take the right field (the domain) and leave - # the left (IP address), otherwise grab the single field. - - < ${source} awk -F '#' '{print $1}' | \ - awk -F '/' '{print $1}' | \ - awk '($1 !~ /^#/) { if (NF>1) {print $2} else {print $1}}' | \ - sed -nr -e 's/\.{2,}/./g' -e '/\./p' > ${destination} + # 1) Remove carriage returns + # 2) Convert all characters to lowercase + # 3) Remove lines containing "#" or "/" + # 4) Remove leading tabs, spaces, etc. + # 5) Delete lines not matching domain names + < "${source}" tr -d '\r' | \ + tr '[:upper:]' '[:lower:]' | \ + sed -r '/(\/|#).*$/d' | \ + sed -r 's/^.*\s+//g' | \ + sed -r '/([^\.]+\.)+[^\.]{2,}/!d' > "${destination}" return 0 fi