From d61fd01d61d95d86f79d1d041319adfc46c59c61 Mon Sep 17 00:00:00 2001 From: Rob Gill Date: Wed, 13 Jun 2018 15:47:08 +1000 Subject: [PATCH] Split queryFunc() into query.sh Signed-off-by: Rob Gill --- advanced/query.sh | 220 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 advanced/query.sh diff --git a/advanced/query.sh b/advanced/query.sh new file mode 100644 index 00000000..3bae7422 --- /dev/null +++ b/advanced/query.sh @@ -0,0 +1,220 @@ +#!/usr/bin/env bash +# Pi-hole: A black hole for Internet advertisements +# (c) 2018 Pi-hole, LLC (https://pi-hole.net) +# Network-wide ad blocking via your own hardware. +# +# Query Domain Lists +# +# This file is copyright under the latest version of the EUPL. +# Please see LICENSE file for your rights under this license. + +# Globals +piholeDir="/etc/pihole" +adListsList="$piholeDir/adlists.list" +options="$*" +adlist="" +all="" +exact="" +blockpage="" +matchType="match" + +colfile="/opt/pihole/COL_TABLE" +source ${colfile} + +# Scan an array of files for matching strings +scanList(){ + # Escape full stops + local domain="${1//./\\.}" lists="${2}" type="${3:-}" + + # Prevent grep from printing file path + cd "$piholeDir" || exit 1 + + # Prevent grep -i matching slowly: http://bit.ly/2xFXtUX + export LC_CTYPE=C + + # /dev/null forces filename to be printed when only one list has been generated + # shellcheck disable=SC2086 + case "${type}" in + "exact" ) grep -i -E -l "(^|\\s)${domain}($|\\s|#)" ${lists} /dev/null;; + "wc" ) grep -i -o -m 1 "/${domain}/" ${lists};; + * ) grep -i "${domain}" ${lists} /dev/null;; + esac +} + +if [[ "${options}" == "-h" ]] || [[ "${options}" == "--help" ]]; then + echo "Usage: pihole -q [option] +Example: 'pihole -q -exact domain.com' +Query the adlists for a specified domain + +Options: + -adlist Print the name of the block list URL + -exact Search the block lists for exact domain matches + -all Return all query matches within a block list + -h, --help Show this help dialog" + exit 0 +fi + +if [[ ! -e "$adListsList" ]]; then + echo -e "${COL_LIGHT_RED}The file '/etc/pihole/adlists.list' was not found${COL_NC}" + exit 1 +fi + +# Handle valid options +if [[ "${options}" == *"-bp"* ]]; then + exact="exact"; blockpage=true +else + [[ "${options}" == *"-adlist"* ]] && adlist=true + [[ "${options}" == *"-all"* ]] && all=true + if [[ "${options}" == *"-exact"* ]]; then + exact="exact"; matchType="exact ${matchType}" + fi +fi + +# Strip valid options, leaving only the domain and invalid options +# This allows users to place the options before or after the domain +options=$(sed -E 's/ ?-(bp|adlists?|all|exact) ?//g' <<< "${options}") + +# Handle remaining options +# If $options contain non ASCII characters, convert to punycode +case "${options}" in + "" ) str="No domain specified";; + *" "* ) str="Unknown query option specified";; + *[![:ascii:]]* ) domainQuery=$(idn2 "${options}");; + * ) domainQuery="${options}";; +esac + +if [[ -n "${str:-}" ]]; then + echo -e "${str}${COL_NC}\\nTry 'pihole -q --help' for more information." + exit 1 +fi + +# Scan Whitelist and Blacklist +lists="whitelist.txt blacklist.txt" +mapfile -t results <<< "$(scanList "${domainQuery}" "${lists}" "${exact}")" + if [[ -n "${results[*]}" ]]; then + wbMatch=true + # Loop through each result in order to print unique file title once + for result in "${results[@]}"; do + fileName="${result%%.*}" + if [[ -n "${blockpage}" ]]; then + echo "π ${result}" + exit 0 + elif [[ -n "${exact}" ]]; then + echo " ${matchType^} found in ${COL_BOLD}${fileName^}${COL_NC}" + else + # Only print filename title once per file + if [[ ! "${fileName}" == "${fileName_prev:-}" ]]; then + echo " ${matchType^} found in ${COL_BOLD}${fileName^}${COL_NC}" + fileName_prev="${fileName}" + fi + echo " ${result#*:}" + fi + done +fi + +# Scan Wildcards +if [[ -e "${wildcardlist}" ]]; then + # Determine all subdomains, domain and TLDs + mapfile -t wildcards <<< "$(processWildcards "${domainQuery}")" + for match in "${wildcards[@]}"; do + # Search wildcard list for matches + mapfile -t results <<< "$(scanList "${match}" "${wildcardlist}" "wc")" + if [[ -n "${results[*]}" ]]; then + if [[ -z "${wcMatch:-}" ]] && [[ -z "${blockpage}" ]]; then + wcMatch=true + echo " ${matchType^} found in ${COL_BOLD}Wildcards${COL_NC}:" + fi + case "${blockpage}" in + true ) echo "π ${wildcardlist##*/}"; exit 0;; + * ) echo " *.${match}";; + esac + fi + done +fi + +# Get version sorted *.domains filenames (without dir path) +lists=("$(cd "$piholeDir" || exit 0; printf "%s\\n" -- *.domains | sort -V)") + +# Query blocklists for occurences of domain +mapfile -t results <<< "$(scanList "${domainQuery}" "${lists[*]}" "${exact}")" + +# Handle notices +if [[ -z "${wbMatch:-}" ]] && [[ -z "${wcMatch:-}" ]] && [[ -z "${results[*]}" ]]; then + echo -e " ${INFO} No ${exact/t/t }results found for ${COL_BOLD}${domainQuery}${COL_NC} within the block lists" + exit 0 +elif [[ -z "${results[*]}" ]]; then + # Result found in WL/BL/Wildcards + exit 0 +elif [[ -z "${all}" ]] && [[ "${#results[*]}" -ge 100 ]]; then + echo -e " ${INFO} Over 100 ${exact/t/t }results found for ${COL_BOLD}${domainQuery}${COL_NC} + This can be overridden using the -all option" + exit 0 +fi + +# Remove unwanted content from non-exact $results +if [[ -z "${exact}" ]]; then + # Delete lines starting with # + # Remove comments after domain + # Remove hosts format IP address + mapfile -t results <<< "$(IFS=$'\n'; sed \ + -e "/:#/d" \ + -e "s/[ \\t]#.*//g" \ + -e "s/:.*[ \\t]/:/g" \ + <<< "${results[*]}")" + # Exit if result was in a comment + [[ -z "${results[*]}" ]] && exit 0 +fi + +# Get adlist file content as array +if [[ -n "${adlist}" ]] || [[ -n "${blockpage}" ]]; then + for adlistUrl in $(< "adListsList"); do + if [[ "${adlistUrl:0:4}" =~ (http|www.) ]]; then + adlists+=("${adlistUrl}") + fi + done +fi + +# Print "Exact matches for" title +if [[ -n "${exact}" ]] && [[ -z "${blockpage}" ]]; then + plural=""; [[ "${#results[*]}" -gt 1 ]] && plural="es" + echo " ${matchType^}${plural} for ${COL_BOLD}${domainQuery}${COL_NC} found in:" +fi + +for result in "${results[@]}"; do + fileName="${result/:*/}" + + # Determine *.domains URL using filename's number + if [[ -n "${adlist}" ]] || [[ -n "${blockpage}" ]]; then + fileNum="${fileName/list./}"; fileNum="${fileNum%%.*}" + fileName="${adlists[$fileNum]}" + + # Discrepency occurs when adlists has been modified, but Gravity has not been run + if [[ -z "${fileName}" ]]; then + fileName="${COL_LIGHT_RED}(no associated adlists URL found)${COL_NC}" + fi + fi + + if [[ -n "${blockpage}" ]]; then + echo "${fileNum} ${fileName}" + elif [[ -n "${exact}" ]]; then + echo " ${fileName}" + else + if [[ ! "${fileName}" == "${fileName_prev:-}" ]]; then + count="" + echo " ${matchType^} found in ${COL_BOLD}${fileName}${COL_NC}:" + fileName_prev="${fileName}" + fi + : $((count++)) + + # Print matching domain if $max_count has not been reached + [[ -z "${all}" ]] && max_count="50" + if [[ -z "${all}" ]] && [[ "${count}" -ge "${max_count}" ]]; then + [[ "${count}" -gt "${max_count}" ]] && continue + echo " ${COL_GRAY}Over ${count} results found, skipping rest of file${COL_NC}" + else + echo " ${result#*:}" + fi + fi +done + +exit 0