From 6b33b8b4c09d3f33524a1b89df5dc8e01d12efbe Mon Sep 17 00:00:00 2001 From: DL6ER Date: Sun, 27 Dec 2020 19:14:52 +0100 Subject: [PATCH 1/5] Store status of downloaded list (downloaded, using cache, some error, ...) and number of (in-)valid domains on this list in the gravity database. This updates the gravity databaes to version 14. Signed-off-by: DL6ER --- .../Scripts/database_migration/gravity-db.sh | 8 +- .../database_migration/gravity/13_to_14.sql | 13 ++++ advanced/Templates/gravity.db.sql | 7 +- gravity.sh | 74 +++++++++++++++++-- 4 files changed, 92 insertions(+), 10 deletions(-) create mode 100644 advanced/Scripts/database_migration/gravity/13_to_14.sql diff --git a/advanced/Scripts/database_migration/gravity-db.sh b/advanced/Scripts/database_migration/gravity-db.sh index 282ea07b..22f241dd 100644 --- a/advanced/Scripts/database_migration/gravity-db.sh +++ b/advanced/Scripts/database_migration/gravity-db.sh @@ -111,9 +111,15 @@ upgrade_gravityDB(){ version=12 fi if [[ "$version" == "12" ]]; then - # Add column date_updated to alist table + # Add column date_updated to adlist table echo -e " ${INFO} Upgrading gravity database from version 12 to 13" sqlite3 "${database}" < "${scriptPath}/12_to_13.sql" version=13 fi + if [[ "$version" == "13" ]]; then + # Add columns number and status to adlist table + echo -e " ${INFO} Upgrading gravity database from version 13 to 14" + sqlite3 "${database}" < "${scriptPath}/13_to_14.sql" + version=14 + fi } diff --git a/advanced/Scripts/database_migration/gravity/13_to_14.sql b/advanced/Scripts/database_migration/gravity/13_to_14.sql new file mode 100644 index 00000000..fa230865 --- /dev/null +++ b/advanced/Scripts/database_migration/gravity/13_to_14.sql @@ -0,0 +1,13 @@ +.timeout 30000 + +PRAGMA FOREIGN_KEYS=OFF; + +BEGIN TRANSACTION; + +ALTER TABLE adlist ADD COLUMN number INTEGER NOT NULL DEFAULT 0; +ALTER TABLE adlist ADD COLUMN invalid_domains INTEGER NOT NULL DEFAULT 0; +ALTER TABLE adlist ADD COLUMN status INTEGER NOT NULL DEFAULT 0; + +UPDATE info SET value = 14 WHERE property = 'version'; + +COMMIT; \ No newline at end of file diff --git a/advanced/Templates/gravity.db.sql b/advanced/Templates/gravity.db.sql index 26b030c8..5d7bafa9 100644 --- a/advanced/Templates/gravity.db.sql +++ b/advanced/Templates/gravity.db.sql @@ -32,7 +32,10 @@ CREATE TABLE adlist date_added INTEGER NOT NULL DEFAULT (cast(strftime('%s', 'now') as int)), date_modified INTEGER NOT NULL DEFAULT (cast(strftime('%s', 'now') as int)), comment TEXT, - date_updated INTEGER + date_updated INTEGER, + number INTEGER NOT NULL DEFAULT 0, + invalid_domains INTEGER NOT NULL DEFAULT 0, + status INTEGER NOT NULL DEFAULT 0 ); CREATE TABLE adlist_by_group @@ -54,7 +57,7 @@ CREATE TABLE info value TEXT NOT NULL ); -INSERT INTO "info" VALUES('version','13'); +INSERT INTO "info" VALUES('version','14'); CREATE TABLE domain_audit ( diff --git a/gravity.sh b/gravity.sh index 5a831dae..b238275f 100755 --- a/gravity.sh +++ b/gravity.sh @@ -217,6 +217,48 @@ database_adlist_updated() { fi } +# Check if a column with name ${2} exists in gravity table with name ${1} +gravity_column_exists() { + output=$( { printf ".timeout 30000\\nSELECT EXISTS(SELECT * FROM pragma_table_info('%s') WHERE name='%s');\\n" "${1}" "${2}" | sqlite3 "${gravityDBfile}"; } 2>&1 ) + if [[ "${output}" == "1" ]]; then + return 0 # Bash 0 is success + fi + + return 1 # Bash non-0 is failure +} + +# Update number of domain on this list. We store this in the "old" database as all values in the new database will later be overwritten +database_adlist_number() { + # Only try to set number of domains when this field exists in the gravity database + if ! gravity_column_exists "adlist" "number"; then + return; + fi + + output=$( { printf ".timeout 30000\\nUPDATE adlist SET number = %i, invalid_domains = %i WHERE id = %i;\\n" "${total_num}" "${invalid_num}" "${1}" | sqlite3 "${gravityDBfile}"; } 2>&1 ) + status="$?" + + if [[ "${status}" -ne 0 ]]; then + echo -e "\\n ${CROSS} Unable to update number of domains in adlist with ID ${1} in database ${gravityDBfile}\\n ${output}" + gravity_Cleanup "error" + fi +} + +# Update status of this list. We store this in the "old" database as all values in the new database will later be overwritten +database_adlist_status() { + # Only try to set the status when this field exists in the gravity database + if ! gravity_column_exists "adlist" "status"; then + return; + fi + + output=$( { printf ".timeout 30000\\nUPDATE adlist SET status = %i WHERE id = %i;\\n" "${2}" "${1}" | sqlite3 "${gravityDBfile}"; } 2>&1 ) + status="$?" + + if [[ "${status}" -ne 0 ]]; then + echo -e "\\n ${CROSS} Unable to update status of adlist with ID ${1} in database ${gravityDBfile}\\n ${output}" + gravity_Cleanup "error" + fi +} + # Migrate pre-v5.0 list files to database-based Pi-hole versions migrate_to_database() { # Create database file only if not present @@ -439,6 +481,7 @@ gravity_DownloadBlocklists() { } total_num=0 +invalid_num=0 parseList() { local adlistID="${1}" src="${2}" target="${3}" incorrect_lines # This sed does the following things: @@ -456,11 +499,11 @@ parseList() { num_target_lines="$(grep -c "^" "${target}")" num_correct_lines="$(( num_target_lines-total_num ))" total_num="$num_target_lines" - num_invalid="$(( num_lines-num_correct_lines ))" - if [[ "${num_invalid}" -eq 0 ]]; then - echo " ${INFO} Received ${num_lines} domains" + invalid_num="$(( num_lines-num_correct_lines ))" + if [[ "${invalid_num}" -eq 0 ]]; then + echo " ${INFO} Analyzed ${num_lines} domains" else - echo " ${INFO} Received ${num_lines} domains, ${num_invalid} domains invalid!" + echo " ${INFO} Analyzed ${num_lines} domains, ${invalid_num} domains invalid!" fi # Display sample of invalid lines if we found some @@ -554,31 +597,48 @@ gravity_DownloadBlocklistFromUrl() { esac;; esac + local done="false" # Determine if the blocklist was downloaded and saved correctly if [[ "${success}" == true ]]; then if [[ "${httpCode}" == "304" ]]; then # Add domains to database table file parseList "${adlistID}" "${saveLocation}" "${target}" + database_adlist_status "${adlistID}" "2" + database_adlist_number "${adlistID}" + done="true" # Check if $patternbuffer is a non-zero length file elif [[ -s "${patternBuffer}" ]]; then # Determine if blocklist is non-standard and parse as appropriate gravity_ParseFileIntoDomains "${patternBuffer}" "${saveLocation}" # Add domains to database table file parseList "${adlistID}" "${saveLocation}" "${target}" - # Update date_updated field in gravity database table + # Update gravity database table + database_adlist_status "${adlistID}" "1" database_adlist_updated "${adlistID}" + database_adlist_number "${adlistID}" + done="true" else # Fall back to previously cached list if $patternBuffer is empty - echo -e " ${INFO} Received empty file: ${COL_LIGHT_GREEN}using previously cached list${COL_NC}" + echo -e " ${INFO} Received empty file" fi - else + fi + + # Do we need to fall back to a cached list (if available)? + if [[ "${done}" != "true" ]]; then # Determine if cached list has read permission if [[ -r "${saveLocation}" ]]; then echo -e " ${CROSS} List download failed: ${COL_LIGHT_GREEN}using previously cached list${COL_NC}" # Add domains to database table file parseList "${adlistID}" "${saveLocation}" "${target}" + database_adlist_number "${adlistID}" + database_adlist_status "${adlistID}" "3" else echo -e " ${CROSS} List download failed: ${COL_LIGHT_RED}no cached list available${COL_NC}" + # Total number == -1 means there was no cached list that could have been used + total_num=-1 + invalid_num=0 + database_adlist_number "${adlistID}" + database_adlist_status "${adlistID}" "4" fi fi } From 8c56f54a1e4a0ad659914471f8aeb8d92b259956 Mon Sep 17 00:00:00 2001 From: DL6ER Date: Tue, 29 Dec 2020 09:54:25 +0100 Subject: [PATCH 2/5] Compare checksum of downloaded list against older checksums to see if the list content changed since the last download Signed-off-by: DL6ER --- gravity.sh | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/gravity.sh b/gravity.sh index b238275f..1defa06f 100755 --- a/gravity.sh +++ b/gravity.sh @@ -514,6 +514,27 @@ parseList() { done <<< "${incorrect_lines}" fi } +compareLists() { + local adlistID="${1}" target="${2}" result + + # Verify checksum when an older checksum exists + if [[ -s "${target}.sha1" ]]; then + if ! sha1sum --check --status --strict "${target}.sha1"; then + # The list changed upstream, we need to update the checksum + sha1sum "${target}" > "${target}.sha1" + echo " ${INFO} List has been updated" + database_adlist_status "${adlistID}" "1" + else + echo " ${INFO} List stayed unchanged" + database_adlist_status "${adlistID}" "2" + fi + else + # No checksum available, create one for comparing on the next run + sha1sum "${target}" > "${target}.sha1" + # We assume here it was changed upstream + database_adlist_status "${adlistID}" "1" + fi +} # Download specified URL and perform checks on HTTP status and file content gravity_DownloadBlocklistFromUrl() { @@ -612,8 +633,9 @@ gravity_DownloadBlocklistFromUrl() { gravity_ParseFileIntoDomains "${patternBuffer}" "${saveLocation}" # Add domains to database table file parseList "${adlistID}" "${saveLocation}" "${target}" - # Update gravity database table - database_adlist_status "${adlistID}" "1" + # Compare lists, are they identical? + compareLists "${adlistID}" "${saveLocation}" + # Update gravity database table (status is set in compareLists) database_adlist_updated "${adlistID}" database_adlist_number "${adlistID}" done="true" From a216848c1db65dc1be5a1928e3f225c7f10be694 Mon Sep 17 00:00:00 2001 From: DL6ER Date: Tue, 29 Dec 2020 20:28:09 +0100 Subject: [PATCH 3/5] Only update time of last list change when we see a list for the first time or when it really changed content Signed-off-by: DL6ER --- gravity.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gravity.sh b/gravity.sh index 1defa06f..0119b710 100755 --- a/gravity.sh +++ b/gravity.sh @@ -524,6 +524,7 @@ compareLists() { sha1sum "${target}" > "${target}.sha1" echo " ${INFO} List has been updated" database_adlist_status "${adlistID}" "1" + database_adlist_updated "${adlistID}" else echo " ${INFO} List stayed unchanged" database_adlist_status "${adlistID}" "2" @@ -531,8 +532,10 @@ compareLists() { else # No checksum available, create one for comparing on the next run sha1sum "${target}" > "${target}.sha1" + echo " ${INFO} This list is new" # We assume here it was changed upstream database_adlist_status "${adlistID}" "1" + database_adlist_updated "${adlistID}" fi } @@ -635,8 +638,8 @@ gravity_DownloadBlocklistFromUrl() { parseList "${adlistID}" "${saveLocation}" "${target}" # Compare lists, are they identical? compareLists "${adlistID}" "${saveLocation}" - # Update gravity database table (status is set in compareLists) - database_adlist_updated "${adlistID}" + # Update gravity database table (status and updated timestamp are set in + # compareLists) database_adlist_number "${adlistID}" done="true" else From 0944807491ca40009eabe47c5198907b518100c4 Mon Sep 17 00:00:00 2001 From: DL6ER Date: Tue, 29 Dec 2020 20:35:48 +0100 Subject: [PATCH 4/5] Actually store correct number of domains for the individual lists (and not the sum of the so far collected number of domains) Signed-off-by: DL6ER --- gravity.sh | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/gravity.sh b/gravity.sh index 0119b710..684d64b3 100755 --- a/gravity.sh +++ b/gravity.sh @@ -234,7 +234,7 @@ database_adlist_number() { return; fi - output=$( { printf ".timeout 30000\\nUPDATE adlist SET number = %i, invalid_domains = %i WHERE id = %i;\\n" "${total_num}" "${invalid_num}" "${1}" | sqlite3 "${gravityDBfile}"; } 2>&1 ) + output=$( { printf ".timeout 30000\\nUPDATE adlist SET number = %i, invalid_domains = %i WHERE id = %i;\\n" "${num_lines}" "${num_invalid}" "${1}" | sqlite3 "${gravityDBfile}"; } 2>&1 ) status="$?" if [[ "${status}" -ne 0 ]]; then @@ -481,7 +481,8 @@ gravity_DownloadBlocklists() { } total_num=0 -invalid_num=0 +num_lines=0 +num_invalid=0 parseList() { local adlistID="${1}" src="${2}" target="${3}" incorrect_lines # This sed does the following things: @@ -492,18 +493,18 @@ parseList() { # Find (up to) five domains containing invalid characters (see above) incorrect_lines="$(sed -e "/[^a-zA-Z0-9.\_-]/!d" "${src}" | head -n 5)" - local num_lines num_target_lines num_correct_lines num_invalid + local num_target_lines num_correct_lines num_invalid # Get number of lines in source file num_lines="$(grep -c "^" "${src}")" # Get number of lines in destination file num_target_lines="$(grep -c "^" "${target}")" num_correct_lines="$(( num_target_lines-total_num ))" total_num="$num_target_lines" - invalid_num="$(( num_lines-num_correct_lines ))" - if [[ "${invalid_num}" -eq 0 ]]; then + num_invalid="$(( num_lines-num_correct_lines ))" + if [[ "${num_invalid}" -eq 0 ]]; then echo " ${INFO} Analyzed ${num_lines} domains" else - echo " ${INFO} Analyzed ${num_lines} domains, ${invalid_num} domains invalid!" + echo " ${INFO} Analyzed ${num_lines} domains, ${num_invalid} domains invalid!" fi # Display sample of invalid lines if we found some @@ -515,7 +516,7 @@ parseList() { fi } compareLists() { - local adlistID="${1}" target="${2}" result + local adlistID="${1}" target="${2}" # Verify checksum when an older checksum exists if [[ -s "${target}.sha1" ]]; then @@ -659,9 +660,9 @@ gravity_DownloadBlocklistFromUrl() { database_adlist_status "${adlistID}" "3" else echo -e " ${CROSS} List download failed: ${COL_LIGHT_RED}no cached list available${COL_NC}" - # Total number == -1 means there was no cached list that could have been used - total_num=-1 - invalid_num=0 + # Manually reset these two numbers because we do not call parseList here + num_lines=0 + num_invalid=0 database_adlist_number "${adlistID}" database_adlist_status "${adlistID}" "4" fi From a2625df5e2a7e406cadcd430ea3902c47234769c Mon Sep 17 00:00:00 2001 From: DL6ER Date: Wed, 30 Dec 2020 11:27:34 +0100 Subject: [PATCH 5/5] Remove "The list is new" comment because it is superfluous Signed-off-by: DL6ER --- gravity.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/gravity.sh b/gravity.sh index 684d64b3..0dd65bfb 100755 --- a/gravity.sh +++ b/gravity.sh @@ -533,7 +533,6 @@ compareLists() { else # No checksum available, create one for comparing on the next run sha1sum "${target}" > "${target}.sha1" - echo " ${INFO} This list is new" # We assume here it was changed upstream database_adlist_status "${adlistID}" "1" database_adlist_updated "${adlistID}"