Revert parsing method to that of 3.1.4. New Method is allowing illegal lists through, and that is causing issues such as blocking www.google.com. This method will just throw away invalid list entries.

Signed-off-by: Adam Warner <adamw@rner.email>
2025-07-24 16:08:10 +00:00 · 2017-12-12 12:36:09 +00:00 · 2017-12-12 12:36:09 +00:00 · cc3035267f
commit cc3035267f
parent 47ed0b7627
1 changed files with 13 additions and 27 deletions
--- a/gravity.sh
+++ b/gravity.sh
@ -270,36 +270,22 @@ gravity_Pull() {

 # Parse source files into domains format
 gravity_ParseFileIntoDomains() {
-  local source="${1}" destination="${2}" commentPattern firstLine abpFilter
+  local source="${1}" destination="${2}" firstLine abpFilter

  # Determine if we are parsing a consolidated list
  if [[ "${source}" == "${piholeDir}/${matterAndLight}" ]]; then
-    # Define symbols used as comments: #;@![/
-    commentPattern="[#;@![\\/]"
-
-    # Parse Domains/Hosts files by removing comments & host IPs
-    # Logic: Ignore lines which begin with comments
-    awk '!/^'"${commentPattern}"'/ {
-      # Determine if there are multiple words seperated by a space
-      if(NF>1) {
-        # Remove comments (including prefixed spaces/tabs)
-        if($0 ~ /'"${commentPattern}"'/) { gsub("( |\t)'"${commentPattern}"'.*", "", $0) }
-        # Determine if there are aliased domains
-        if($3) {
-          # Remove IP address
-          $1=""
-          # Remove space which is left in $0 when removing $1
-          gsub("^ ", "", $0)
-          print $0
-        } else if($2) {
-          # Print single domain without IP
-          print $2
-        }
-      # If there are no words seperated by space
-      } else if($1) {
-        print $1
-      }
-    }' "${source}" 2> /dev/null > "${destination}"
+    # Remove comments and print only the domain name
+    # Most of the lists downloaded are already in hosts file format but the spacing/formating is not contigious
+    # This helps with that and makes it easier to read
+    # It also helps with debugging so each stage of the script can be researched more in depth
+    #Awk -F splits on given IFS, we grab the right hand side (chops trailing #coments and /'s to grab the domain only.
+    #Last awk command takes non-commented lines and if they have 2 fields, take the left field (the domain) and leave
+    #+ the right (IP address), otherwise grab the single field.
+    cat ${source} | \
+    awk -F '#' '{print $1}' | \
+    awk -F '/' '{print $1}' | \
+    awk '($1 !~ /^#/) { if (NF>1) {print $2} else {print $1}}' | \
+    sed -nr -e 's/\.{2,}/./g' -e '/\./p' >  ${destination}
    return 0
  fi