Merge pull request #2597 from xCh12i5/RegEx-Fix

Solves RegEx issue.
2025-01-03 12:40:56 +00:00 · 2019-04-25 15:27:35 +02:00 · 2019-04-25 15:27:35 +02:00 · 6a01a1257f
commit 6a01a1257f
parent 42c7653e99 54ae9d83b0
1 changed files with 10 additions and 8 deletions
--- a/gravity.sh
+++ b/gravity.sh
@ -335,14 +335,16 @@ gravity_ParseFileIntoDomains() {
    # Most of the lists downloaded are already in hosts file format but the spacing/formating is not contigious
    # This helps with that and makes it easier to read
    # It also helps with debugging so each stage of the script can be researched more in depth
-    # Awk -F splits on given IFS, we grab the right hand side (chops trailing #coments and /'s to grab the domain only.
-    # Last awk command takes non-commented lines and if they have 2 fields, take the right field (the domain) and leave
-    # the left (IP address), otherwise grab the single field.
-
-    < ${source} awk -F '#' '{print $1}' | \
-    awk -F '/' '{print $1}' | \
-    awk '($1 !~ /^#/) { if (NF>1) {print $2} else {print $1}}' | \
-    sed -nr -e 's/\.{2,}/./g' -e '/\./p' >  ${destination}
+    # 1) Remove carriage returns
+    # 2) Convert all characters to lowercase
+    # 3) Remove lines containing "#" or "/"
+    # 4) Remove leading tabs, spaces, etc.
+    # 5) Delete lines not matching domain names
+    < "${source}" tr -d '\r' | \
+    tr '[:upper:]' '[:lower:]' | \
+    sed -r '/(\/|#).*$/d' | \
+    sed -r 's/^.*\s+//g' | \
+    sed -r '/([^\.]+\.)+[^\.]{2,}/!d' >  "${destination}"
    return 0
  fi