Slow query fix & queryFunc optimisation

* Validate modified code using Shellcheck & Strict Bash
* Added and/or elaborated more comments

* scanlist() should exit if /etc/pihole is not available
* Add `export LC_CTYPE=C` to prevent extreme grep slowdown
* Consider "domain.com#comment" an exact match
* Add specialised wildcard searching grep

* Optimise and simplify queryFunc() for readability
* Replace IDN `python` parsing with `idn2`, as `python` is not guaranteed to be available
* Use ${COL_BOLD} when printing filenames
pull/1694/head
WaLLy3K 7 years ago committed by GitHub
parent c458e4a93b
commit c2d3e99ddd

299
pihole

@ -84,21 +84,27 @@ updateGravityFunc() {
exit 0 exit 0
} }
# Scan an array of files for matching strings
scanList(){ scanList(){
domain="${1}" local domain="${1}" lists="${2}" type="${3:-}"
list="${2}"
method="${3}"
# Switch folder, preventing grep from printing file path # Prevent grep from printing file path
cd "/etc/pihole" || return 1 cd "/etc/pihole" || exit 1
if [[ -n "${method}" ]]; then # Prevent grep -i matching slowly: http://bit.ly/2xFXtUX
grep -i -E -l "(^|\s|\/)${domain}($|\s|\/)" ${list} /dev/null 2> /dev/null export LC_CTYPE=C
else
grep -i "${domain}" ${list} /dev/null 2> /dev/null # /dev/null forces filename to be printed when only one list has been generated
fi # shellcheck disable=SC2086
case "${type}" in
"exact" ) grep -i -E -l "(^|\\s)${domain}($|\\s|#)" ${lists} /dev/null;;
"wc" ) grep -i -o -m 1 "/${domain}/" ${lists};;
* ) grep -i "${domain}" ${lists} /dev/null;;
esac
} }
# Print each subdomain
# e.g: foo.bar.baz.com = "foo.bar.baz.com bar.baz.com baz.com com"
processWildcards() { processWildcards() {
IFS="." read -r -a array <<< "${1}" IFS="." read -r -a array <<< "${1}"
for (( i=${#array[@]}-1; i>=0; i-- )); do for (( i=${#array[@]}-1; i>=0; i-- )); do
@ -115,8 +121,8 @@ processWildcards() {
} }
queryFunc() { queryFunc() {
options="$*" shift
options="${options/-q /}" local options="$*" adlist="" all="" exact="" blockpage="" matchType="match"
if [[ "${options}" == "-h" ]] || [[ "${options}" == "--help" ]]; then if [[ "${options}" == "-h" ]] || [[ "${options}" == "--help" ]]; then
echo "Usage: pihole -q [option] <domain> echo "Usage: pihole -q [option] <domain>
@ -131,200 +137,175 @@ Options:
exit 0 exit 0
fi fi
if [[ "${options}" == *"-exact"* ]]; then if [[ ! -e "/etc/pihole/adlists.list" ]]; then
method="exact" echo -e "${COL_LIGHT_RED}The file '/etc/pihole/adlists.list' was not found${COL_NC}"
exact=true exit 1
fi
if [[ "${options}" == *"-adlist"* ]]; then
adlist=true
fi fi
# Handle valid options
if [[ "${options}" == *"-bp"* ]]; then if [[ "${options}" == *"-bp"* ]]; then
method="exact" exact="exact"; blockpage=true
blockpage=true else
fi [[ "${options}" == *"-adlist"* ]] && adlist=true
[[ "${options}" == *"-all"* ]] && all=true
if [[ "${options}" == *"-all"* ]]; then if [[ "${options}" == *"-exact"* ]]; then
all=true exact="exact"; matchType="exact ${matchType}"
fi
fi fi
# Strip valid options, leaving only the domain and invalid options # Strip valid options, leaving only the domain and invalid options
options=$(sed 's/ \?-\(exact\|adlist\(s\)\?\|bp\|all\) \?//g' <<< "$options") # This allows users to place the options before or after the domain
options=$(sed -E 's/ ?-(bp|adlists?|all|exact)//g' <<< "${options}")
# Handle errors
if [[ "${options}" == *" "* ]]; then # Handle remaining options
error=true # If $options contain non ASCII characters, convert to punycode
str="Unknown option specified" case "${options}" in
elif [[ "${options}" == "-q" ]]; then "" ) str="No domain specified";;
error=true *$'\n'* ) str="Unknown query option specified";;
str="No domain specified" *[![:ascii:]]* ) domainQuery=$(idn2 "${options}");;
fi * ) domainQuery="${options}";;
esac
if [[ -n "${error}" ]]; then
echo -e " ${COL_LIGHT_RED}${str}${COL_NC} if [[ -n "${str:-}" ]]; then
Try 'pihole -q --help' for more information." echo -e "${str}${COL_NC}\\nTry 'pihole -q --help' for more information."
exit 1 exit 1
fi fi
# If domain contains non ASCII characters, convert domain to punycode if python is available
# Cr: https://serverfault.com/a/335079
if [[ "$options" = *[![:ascii:]]* ]]; then
if command -v python &> /dev/null; then
query=$(python -c 'import sys;print sys.argv[1].decode("utf-8").encode("idna")' "${options}")
fi
else
query="${options}"
fi
# Scan Whitelist and Blacklist # Scan Whitelist and Blacklist
lists="whitelist.txt blacklist.txt" lists="whitelist.txt blacklist.txt"
results=($(scanList "${query}" "${lists}" "${method}")) mapfile -t results <<< "$(scanList "${domainQuery}" "${lists}" "${exact}")"
if [[ -n "${results[*]}" ]]; then if [[ -n "${results[*]}" ]]; then
blResult=true wbMatch=true
# Loop through each scanList line to print appropriate title
# Loop through each result in order to print unique file title once
for result in "${results[@]}"; do for result in "${results[@]}"; do
filename="${result/:*/}" fileName="${result%%.*}"
if [[ -n "$exact" ]]; then
printf " Exact result in %s\n" "${filename}" if [[ -n "${blockpage}" ]]; then
elif [[ -n "$blockpage" ]]; then echo "π ${fileName}"
printf "π %s\n" "${filename}" exit 0
elif [[ -n "${exact}" ]]; then
echo " ${matchType^} found in ${COL_BOLD}${fileName^}${COL_NC}"
else else
domain="${result/*:/}" # Only print filename title once per file
if [[ ! "${filename}" == "${filename_prev:-}" ]]; then if [[ ! "${fileName}" == "${fileName_prev:-}" ]]; then
printf " Result from %s\n" "${filename}" echo " ${matchType^} found in ${COL_BOLD}${fileName^}${COL_NC}"
fileName_prev="${fileName}"
fi fi
printf " %s\n" "${domain}" echo " ${result#*:}"
filename_prev="${filename}"
fi fi
done done
fi fi
# Scan Wildcards # Scan Wildcards
if [[ -e "${wildcardlist}" ]]; then if [[ -e "${wildcardlist}" ]]; then
wildcards=($(processWildcards "${query}")) # Determine all subdomains, domain and TLDs
mapfile -t wildcards <<< "$(processWildcards "${domainQuery}")"
for match in "${wildcards[@]}"; do for match in "${wildcards[@]}"; do
results=($(scanList "\/${match}\/" ${wildcardlist})) # Search wildcard list for matches
mapfile -t results <<< "$(scanList "${match}" "${wildcardlist}" "wc")"
if [[ -n "${results[*]}" ]]; then if [[ -n "${results[*]}" ]]; then
# Remove empty lines before couting number of results if [[ -z "${wcMatch:-}" ]] && [[ -z "${blockpage}" ]]; then
count=$(sed '/^\s*$/d' <<< "${results[@]}" | wc -l) wcMatch=true
if [[ "${count}" -ge 0 ]]; then echo " ${matchType^} found in ${COL_BOLD}Wildcards${COL_NC}:"
blResult=true
if [[ -z "${blockpage}" ]]; then
printf " Wildcard result in %s\n" "${wildcardlist/*dnsmasq.d\/}"
fi
if [[ -n "${blockpage}" ]]; then
echo "π ${wildcardlist/*\/}"
else
echo " *.${match}"
fi
fi fi
case "${blockpage}" in
true ) echo "π ${wildcardlist##*/}"; exit 0;;
* ) echo " *.${match}";;
esac
fi fi
done done
[[ -n "${blResult}" ]] && [[ -n "${blockpage}" ]] && exit 0
fi fi
# Glob *.domains file names, remove file paths and sort by list number # Get version sorted *.domains filenames (without dir path)
lists_raw=(/etc/pihole/*.domains) lists=("$(cd "/etc/pihole" || exit 0; printf "%s\\n" -- *.domains | sort -V)")
IFS_OLD=$IFS
IFS=$'\n'
lists=$(sort -t . -k 2 -g <<< "${lists_raw[*]//\/etc\/pihole\//}")
# Scan Domains files # Query blocklists for occurences of domain
results=($(scanList "${query}" "${lists}" "${method}")) mapfile -t results <<< "$(scanList "${domainQuery}" "${lists[*]}" "${exact}")"
# Handle notices # Handle notices
if [[ -z "${blResult}" ]] && [[ -z "${results[*]}" ]]; then if [[ -z "${wbMatch:-}" ]] && [[ -z "${wcMatch:-}" ]] && [[ -z "${results[*]}" ]]; then
notice=true echo -e " ${INFO} No ${exact/t/t }results found for ${COL_BOLD}${domainQuery}${COL_NC} found within block lists"
str="No ${method/t/t }results found for ${query} found within block lists" exit 0
elif [[ -z "${all}" ]] && [[ "${#results[*]}" -ge 16000 ]]; then elif [[ -z "${results[*]}" ]]; then
# 16000 chars is 15 chars X 1000 lines worth of results # Result found in WL/BL/Wildcards
notice=true exit 0
str="Hundreds of ${method/t/t }results found for ${query} elif [[ -z "${all}" ]] && [[ "${#results[*]}" -ge 100 ]]; then
This can be overriden using the -all option" echo -e " ${INFO} Over 100 ${exact/t/t }results found for ${COL_BOLD}${domainQuery}${COL_NC}
This can be overridden using the -all option"
exit 0
fi fi
if [[ -n "${notice}" ]]; then # Remove unwanted content from non-exact $results
echo -e " ${INFO} ${str}" if [[ -z "${exact}" ]]; then
exit # Delete lines starting with #
# Remove comments after domain
# Remove hosts format IP address
mapfile -t results <<< "$(IFS=$'\n'; sed \
-e "/:#/d" \
-e "s/[ \\t]#.*//g" \
-e "s/:.*[ \\t]/:/g" \
<<< "${results[*]}")"
# Exit if result was in a comment
[[ -z "${results[*]}" ]] && exit 0
fi fi
# Remove unwanted content from results # Get adlist file content as array
if [[ -z "${method}" ]]; then if [[ -n "${adlist}" ]] || [[ -n "${blockpage}" ]]; then
results=($(sed "/:#/d" <<< "${results[*]}")) # Lines starting with comments for adlistUrl in $(< "/etc/pihole/adlists.list"); do
results=($(sed "s/[ \t]#.*//g" <<< "${results[*]}")) # Comments after domain if [[ "${adlistUrl:0:4}" =~ (http|www.) ]]; then
results=($(sed "s/:.*[ \t]/:/g" <<< "${results[*]}")) # IP address adlists+=("${adlistUrl}")
fi
done
fi fi
IFS=$IFS_OLD
# Get adlist content as array # Print "Exact matches for" title
if [[ -n "${adlist}" ]] || [[ -n "${blockpage}" ]]; then if [[ -n "${exact}" ]] && [[ -z "${blockpage}" ]]; then
if [[ -f "/etc/pihole/adlists.list" ]]; then plural=""; [[ "${#results[*]}" -gt 1 ]] && plural="es"
for url in $(< /etc/pihole/adlists.list); do echo " ${matchType^}${plural} for ${COL_BOLD}${domainQuery}${COL_NC} found in:"
if [[ "${url:0:4}" == "http" ]] || [[ "${url:0:3}" == "www" ]]; then
adlists+=("$url")
fi
done
else
echo -e " ${COL_LIGHT_RED}The file '/etc/pihole/adlists.list' was not found${COL_NC}"
exit 1
fi
fi fi
if [[ -n "${results[*]}" ]]; then for result in "${results[@]}"; do
if [[ -n "${exact}" ]]; then fileName="${result/:*/}"
echo " Exact result(s) for ${query} found in:"
fi
for result in "${results[@]}"; do # Determine *.domains URL using filename's number
filename="${result/:*/}" if [[ -n "${adlist}" ]] || [[ -n "${blockpage}" ]]; then
fileNum="${fileName/list./}"; fileNum="${fileNum%%.*}"
# Convert file name to URL name for -adlist or -bp options fileName="${adlists[$fileNum]}"
if [[ -n "${adlist}" ]] || [[ -n "${blockpage}" ]]; then
filenum=("${filename/list./}")
filenum=("${filenum/.*/}")
filename="${adlists[$filenum]}"
# If gravity has generated associated .domains files
# but adlists.list has been modified since
if [[ -z "${filename}" ]]; then
filename="${COL_LIGHT_RED}Error: no associated adlists URL found${COL_NC}"
fi
fi
if [[ -n "${exact}" ]]; then # Discrepency occurs when adlists has been modified, but Gravity has not been run
printf " %s\n" "${filename}" if [[ -z "${fileName}" ]]; then
elif [[ -n "${blockpage}" ]]; then fileName="${COL_LIGHT_RED}(no associated adlists URL found)${COL_NC}"
printf "%s %s\n" "${filenum}" "${filename}" fi
else # Standard query output fi
# Print filename heading once per file, not for every match if [[ -n "${blockpage}" ]]; then
if [[ ! "${filename}" == "${filename_prev:-}" ]]; then echo "${fileNum} ${fileName}"
unset count elif [[ -n "${exact}" ]]; then
printf " Result from %s\n" "${filename}" echo " ${fileName}"
else else
let count++ if [[ ! "${fileName}" == "${fileName_prev:-}" ]]; then
fi count=""
echo " ${matchType^} found in ${COL_BOLD}${fileName}${COL_NC}:"
fileName_prev="${fileName}"
fi
: $((count++))
# Print matching domain if $max_count has not been reached # Print matching domain if $max_count has not been reached
[[ -z "${all}" ]] && max_count="20" [[ -z "${all}" ]] && max_count="50"
if [[ -z "${all}" ]] && [[ "${count}" -eq "${max_count}" ]]; then if [[ -z "${all}" ]] && [[ "${count}" -ge "${max_count}" ]]; then
echo " Over $count results found, skipping rest of file" [[ "${count}" -gt "${max_count}" ]] && continue
elif [[ -z "${all}" ]] && [[ "${count}" -gt "${max_count}" ]]; then echo " ${COL_GRAY}Over ${count} results found, skipping rest of file${COL_NC}"
continue else
else echo " ${result#*:}"
domain="${result/*:/}"
printf " %s\n" "${domain}"
fi
filename_prev="${filename}"
fi fi
done fi
fi done
exit 0 exit 0
} }

Loading…
Cancel
Save