From 3c09cd4a3ae296a6ed2d061f30cb433aa044bf45 Mon Sep 17 00:00:00 2001 From: DL6ER Date: Fri, 21 Feb 2020 21:41:28 +0100 Subject: [PATCH 1/6] Experimental output of matching line from shown warnings. Signed-off-by: DL6ER --- gravity.sh | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/gravity.sh b/gravity.sh index b9abd83d..eac0acfe 100755 --- a/gravity.sh +++ b/gravity.sh @@ -391,7 +391,29 @@ gravity_DownloadBlocklists() { fi if [[ "${status}" -eq 0 && -n "${output}" ]]; then - echo -e " Encountered non-critical SQL warnings. Please check the suitability of the list you're using!\\nSQL warnings:\\n${output}\\n" + echo -e " Encountered non-critical SQL warnings. Please check the suitability of the lists you're using!\\n\\n SQL warnings:" + local warning file line lineno + while IFS= read -r line; do + echo " - ${line}" + warning="$(grep -oh "^[^:]*:[0-9]*" <<< "${line}")" + file="${warning%:*}" + lineno="${warning#*:}" + if [[ -n "${file}" && -n "${lineno}" ]]; then + echo -n " Line contains: " + awk "NR==${lineno}" < ${file} + fi + done <<< "${output}" + echo "" + local file line + while IFS= read -r line; do + warning="$(grep -oh "^[^:]*:[0-9]*" <<< "${line}")" + file="${warning%:*}" + lineno="${warning#*:}" + if [[ -n "${file}" && -n "${lineno}" ]]; then + echo -n "Line contains: " + awk "NR==${lineno}" < ${file} + fi + done <<< "${output}" fi rm "${target}" > /dev/null 2>&1 || \ From 050e2963c754cab03262bcc53948b620ca92cd01 Mon Sep 17 00:00:00 2001 From: DL6ER Date: Fri, 21 Feb 2020 22:28:53 +0100 Subject: [PATCH 2/6] Remove redundant code. Signed-off-by: DL6ER --- gravity.sh | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/gravity.sh b/gravity.sh index eac0acfe..4860339f 100755 --- a/gravity.sh +++ b/gravity.sh @@ -404,16 +404,6 @@ gravity_DownloadBlocklists() { fi done <<< "${output}" echo "" - local file line - while IFS= read -r line; do - warning="$(grep -oh "^[^:]*:[0-9]*" <<< "${line}")" - file="${warning%:*}" - lineno="${warning#*:}" - if [[ -n "${file}" && -n "${lineno}" ]]; then - echo -n "Line contains: " - awk "NR==${lineno}" < ${file} - fi - done <<< "${output}" fi rm "${target}" > /dev/null 2>&1 || \ From 81d4531e10b07e96b863b23920d32de189b7616d Mon Sep 17 00:00:00 2001 From: DL6ER Date: Sat, 22 Feb 2020 13:00:38 +0100 Subject: [PATCH 3/6] Implement performant list checking routine. Signed-off-by: DL6ER --- gravity.sh | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/gravity.sh b/gravity.sh index 4860339f..30e43135 100755 --- a/gravity.sh +++ b/gravity.sh @@ -412,11 +412,36 @@ gravity_DownloadBlocklists() { gravity_Blackbody=true } +total_num=0 parseList() { - local adlistID="${1}" src="${2}" target="${3}" + local adlistID="${1}" src="${2}" target="${3}" incorrect_lines #Append ,${arg} to every line and then remove blank lines before import # /.$/a\\ ensures there is a newline on the last line - sed -e "s/$/,${adlistID}/;/^$/d;/.$/a\\" "${src}" >> "${target}" + sed -e "/[^a-zA-Z0-9.\_-]/d;s/$/,${adlistID}/;/^$/d;/.$/a\\" "${src}" >> "${target}" + incorrect_lines="$(sed -e "/[^a-zA-Z0-9.\_-]/!d" "${src}" | head -n 5)" + + local num_lines num_target_lines num_correct_lines percentage percentage_fraction + num_lines="$(grep -c "^" "${src}")" + #num_correct_lines="$(grep -c "^[a-zA-Z0-9.-]*$" "${src}")" + num_target_lines="$(grep -c "^" "${target}")" + num_correct_lines="$(( num_target_lines-total_num ))" + total_num="$num_target_lines" + percentage=100 + percentage_fraction=0 + if [[ "${num_lines}" -gt 0 ]]; then + percentage="$(( 1000*num_correct_lines/num_lines ))" + percentage_fraction="$(( percentage%10 ))" + percentage="$(( percentage/10 ))" + fi + echo " ${INFO} List quality: ${num_correct_lines} of ${num_lines} lines importable (${percentage}.${percentage_fraction}%)" + + if [[ -n "${incorrect_lines}" ]]; then + echo " Example for invalid domains (showing only the first five):" + while IFS= read -r line; do + echo " - ${line}" + done <<< "${incorrect_lines}" + fi + } # Download specified URL and perform checks on HTTP status and file content From 8131b5961cdaf0df890408d1400f11e67b2d56e9 Mon Sep 17 00:00:00 2001 From: DL6ER Date: Sat, 22 Feb 2020 15:17:24 +0100 Subject: [PATCH 4/6] Add comments to the code describing the changes. Signed-off-by: DL6ER --- gravity.sh | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/gravity.sh b/gravity.sh index 30e43135..b3a70f74 100755 --- a/gravity.sh +++ b/gravity.sh @@ -400,7 +400,7 @@ gravity_DownloadBlocklists() { lineno="${warning#*:}" if [[ -n "${file}" && -n "${lineno}" ]]; then echo -n " Line contains: " - awk "NR==${lineno}" < ${file} + awk "NR==${lineno}" < "${file}" fi done <<< "${output}" echo "" @@ -415,17 +415,22 @@ gravity_DownloadBlocklists() { total_num=0 parseList() { local adlistID="${1}" src="${2}" target="${3}" incorrect_lines - #Append ,${arg} to every line and then remove blank lines before import - # /.$/a\\ ensures there is a newline on the last line - sed -e "/[^a-zA-Z0-9.\_-]/d;s/$/,${adlistID}/;/^$/d;/.$/a\\" "${src}" >> "${target}" + # This sed does the following things: + # 1. Remove all domains containing invalid characters. Valid are: a-z, A-Z, 0-9, dot (.), minus (-), underscore (_) + # 2. Append ,adlistID to every line + # 3. Ensures there is a newline on the last line + sed -e "/[^a-zA-Z0-9.\_-]/d;s/$/,${adlistID}/;/.$/a\\" "${src}" >> "${target}" + # Find (up to) five domains containing invalid characters (see above) incorrect_lines="$(sed -e "/[^a-zA-Z0-9.\_-]/!d" "${src}" | head -n 5)" local num_lines num_target_lines num_correct_lines percentage percentage_fraction + # Get number of lines in source file num_lines="$(grep -c "^" "${src}")" - #num_correct_lines="$(grep -c "^[a-zA-Z0-9.-]*$" "${src}")" + # Get number of lines in destination file num_target_lines="$(grep -c "^" "${target}")" num_correct_lines="$(( num_target_lines-total_num ))" total_num="$num_target_lines" + # Compute percentage of valid lines percentage=100 percentage_fraction=0 if [[ "${num_lines}" -gt 0 ]]; then @@ -433,15 +438,15 @@ parseList() { percentage_fraction="$(( percentage%10 ))" percentage="$(( percentage/10 ))" fi - echo " ${INFO} List quality: ${num_correct_lines} of ${num_lines} lines importable (${percentage}.${percentage_fraction}%)" + echo " ${INFO} ${num_correct_lines} of ${num_lines} domains imported (${percentage}.${percentage_fraction}%)" + # Display sample of invalid lines if we found some if [[ -n "${incorrect_lines}" ]]; then - echo " Example for invalid domains (showing only the first five):" + echo " Sample of invalid domains (showing up to five):" while IFS= read -r line; do echo " - ${line}" done <<< "${incorrect_lines}" fi - } # Download specified URL and perform checks on HTTP status and file content From 1e8bfd33f5375bf4e2d2967be08bd544dad86d71 Mon Sep 17 00:00:00 2001 From: DL6ER Date: Sun, 23 Feb 2020 22:50:06 +0100 Subject: [PATCH 5/6] Improve output Signed-off-by: DL6ER --- gravity.sh | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/gravity.sh b/gravity.sh index b3a70f74..d09211c8 100755 --- a/gravity.sh +++ b/gravity.sh @@ -423,26 +423,19 @@ parseList() { # Find (up to) five domains containing invalid characters (see above) incorrect_lines="$(sed -e "/[^a-zA-Z0-9.\_-]/!d" "${src}" | head -n 5)" - local num_lines num_target_lines num_correct_lines percentage percentage_fraction + local num_lines num_target_lines num_correct_lines num_invalid # Get number of lines in source file num_lines="$(grep -c "^" "${src}")" # Get number of lines in destination file num_target_lines="$(grep -c "^" "${target}")" num_correct_lines="$(( num_target_lines-total_num ))" total_num="$num_target_lines" - # Compute percentage of valid lines - percentage=100 - percentage_fraction=0 - if [[ "${num_lines}" -gt 0 ]]; then - percentage="$(( 1000*num_correct_lines/num_lines ))" - percentage_fraction="$(( percentage%10 ))" - percentage="$(( percentage/10 ))" - fi - echo " ${INFO} ${num_correct_lines} of ${num_lines} domains imported (${percentage}.${percentage_fraction}%)" + num_invalid="$(( num_lines-num_correct_lines ))" + echo " ${INFO} Imported ${num_correct_lines} of ${num_lines} domains, ${num_invalid} domains invalid" # Display sample of invalid lines if we found some if [[ -n "${incorrect_lines}" ]]; then - echo " Sample of invalid domains (showing up to five):" + echo " Sample of invalid domains:" while IFS= read -r line; do echo " - ${line}" done <<< "${incorrect_lines}" From 3dd05606ca4714023124b579c648b991fc57af77 Mon Sep 17 00:00:00 2001 From: DL6ER Date: Mon, 24 Feb 2020 07:06:15 +0100 Subject: [PATCH 6/6] Call it the received number of domains instead of the imported number as importing does only happen a bit later. Only show the number of invalid domains if there are invalid domains. Signed-off-by: DL6ER --- gravity.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gravity.sh b/gravity.sh index d09211c8..c421e832 100755 --- a/gravity.sh +++ b/gravity.sh @@ -431,7 +431,11 @@ parseList() { num_correct_lines="$(( num_target_lines-total_num ))" total_num="$num_target_lines" num_invalid="$(( num_lines-num_correct_lines ))" - echo " ${INFO} Imported ${num_correct_lines} of ${num_lines} domains, ${num_invalid} domains invalid" + if [[ "${num_invalid}" -eq 0 ]]; then + echo " ${INFO} Received ${num_lines} domains" + else + echo " ${INFO} Received ${num_lines} domains, ${num_invalid} domains invalid!" + fi # Display sample of invalid lines if we found some if [[ -n "${incorrect_lines}" ]]; then