#!/usr/bin/env bash
# shellcheck disable=SC1090
# Pi-hole: A black hole for Internet advertisements
# (c) 2018 Pi-hole, LLC (https://pi-hole.net)
# Network-wide ad blocking via your own hardware.
#
# Query Domain Lists
#
# This file is copyright under the latest version of the EUPL.
# Please see LICENSE file for your rights under this license.

# Globals
piholeDir="/etc/pihole"
adListsList="$piholeDir/adlists.list"
wildcardlist="/etc/dnsmasq.d/03-pihole-wildcard.conf"
options="$*"
adlist=""
all=""
exact=""
blockpage=""
matchType="match"

colfile="/opt/pihole/COL_TABLE"
source "${colfile}"

# Print each subdomain
# e.g: foo.bar.baz.com = "foo.bar.baz.com bar.baz.com baz.com com"
processWildcards() {
    IFS="." read -r -a array <<< "${1}"
    for (( i=${#array[@]}-1; i>=0; i-- )); do
        ar=""
        for (( j=${#array[@]}-1; j>${#array[@]}-i-2; j-- )); do
            if [[ $j == $((${#array[@]}-1)) ]]; then
                ar="${array[$j]}"
            else
                ar="${array[$j]}.${ar}"
            fi
        done
        echo "${ar}"
    done
}

# Scan an array of files for matching strings
scanList(){
    # Escape full stops
    local domain="${1//./\\.}" lists="${2}" type="${3:-}"

    # Prevent grep from printing file path
    cd "$piholeDir" || exit 1

    # Prevent grep -i matching slowly: http://bit.ly/2xFXtUX
    export LC_CTYPE=C

    # /dev/null forces filename to be printed when only one list has been generated
    # shellcheck disable=SC2086
    case "${type}" in
        "exact" ) grep -i -E -l "(^|\\s)${domain}($|\\s|#)" ${lists} /dev/null 2>/dev/null;;
        "wc"    ) grep -i -o -m 1 "/${domain}/" ${lists} 2>/dev/null;;
        *       ) grep -i "${domain}" ${lists} /dev/null 2>/dev/null;;
    esac
}

if [[ "${options}" == "-h" ]] || [[ "${options}" == "--help" ]]; then
    echo "Usage: pihole -q [option] <domain>
Example: 'pihole -q -exact domain.com'
Query the adlists for a specified domain

Options:
  -adlist             Print the name of the block list URL
  -exact              Search the block lists for exact domain matches
  -all                Return all query matches within a block list
  -h, --help          Show this help dialog"
  exit 0
fi

if [[ ! -e "$adListsList" ]]; then
    echo -e "${COL_LIGHT_RED}The file $adListsList was not found${COL_NC}"
    exit 1
fi

# Handle valid options
if [[ "${options}" == *"-bp"* ]]; then
    exact="exact"; blockpage=true
else
    [[ "${options}" == *"-adlist"* ]] && adlist=true
    [[ "${options}" == *"-all"* ]] && all=true
    if [[ "${options}" == *"-exact"* ]]; then
        exact="exact"; matchType="exact ${matchType}"
    fi
fi

# Strip valid options, leaving only the domain and invalid options
# This allows users to place the options before or after the domain
options=$(sed -E 's/ ?-(bp|adlists?|all|exact) ?//g' <<< "${options}")

# Handle remaining options
# If $options contain non ASCII characters, convert to punycode
case "${options}" in
    ""             ) str="No domain specified";;
    *" "*          ) str="Unknown query option specified";;
    *[![:ascii:]]* ) domainQuery=$(idn2 "${options}");;
    *              ) domainQuery="${options}";;
esac

if [[ -n "${str:-}" ]]; then
    echo -e "${str}${COL_NC}\\nTry 'pihole -q --help' for more information."
    exit 1
fi

# Scan Whitelist and Blacklist
lists="whitelist.txt blacklist.txt"
mapfile -t results <<< "$(scanList "${domainQuery}" "${lists}" "${exact}")"
if [[ -n "${results[*]}" ]]; then
    wbMatch=true
    # Loop through each result in order to print unique file title once
    for result in "${results[@]}"; do
        fileName="${result%%.*}"
        if [[ -n "${blockpage}" ]]; then
            echo "π ${result}"
            exit 0
        elif [[ -n "${exact}" ]]; then
            echo " ${matchType^} found in ${COL_BOLD}${fileName^}${COL_NC}"
        else
            # Only print filename title once per file
            if [[ ! "${fileName}" == "${fileName_prev:-}" ]]; then
                echo " ${matchType^} found in ${COL_BOLD}${fileName^}${COL_NC}"
                fileName_prev="${fileName}"
            fi
        echo "   ${result#*:}"
        fi
    done
fi

# Scan Wildcards
if [[ -e "${wildcardlist}" ]]; then
    # Determine all subdomains, domain and TLDs
    mapfile -t wildcards <<< "$(processWildcards "${domainQuery}")"
    for match in "${wildcards[@]}"; do
        # Search wildcard list for matches
        mapfile -t results <<< "$(scanList "${match}" "${wildcardlist}" "wc")"
        if [[ -n "${results[*]}" ]]; then
            if [[ -z "${wcMatch:-}" ]] && [[ -z "${blockpage}" ]]; then
                wcMatch=true
                echo " ${matchType^} found in ${COL_BOLD}Wildcards${COL_NC}:"
            fi
            case "${blockpage}" in
                true ) echo "π ${wildcardlist##*/}"; exit 0;;
                *    ) echo "   *.${match}";;
            esac
        fi
    done
fi

# Get version sorted *.domains filenames (without dir path)
lists=("$(cd "$piholeDir" || exit 0; printf "%s\\n" -- *.domains | sort -V)")

# Query blocklists for occurences of domain
mapfile -t results <<< "$(scanList "${domainQuery}" "${lists[*]}" "${exact}")"

# Handle notices
if [[ -z "${wbMatch:-}" ]] && [[ -z "${wcMatch:-}" ]] && [[ -z "${results[*]}" ]]; then
    echo -e "  ${INFO} No ${exact/t/t }results found for ${COL_BOLD}${domainQuery}${COL_NC} within the block lists"
    exit 0
elif [[ -z "${results[*]}" ]]; then
    # Result found in WL/BL/Wildcards
    exit 0
elif [[ -z "${all}" ]] && [[ "${#results[*]}" -ge 100 ]]; then
    echo -e "  ${INFO} Over 100 ${exact/t/t }results found for ${COL_BOLD}${domainQuery}${COL_NC}
        This can be overridden using the -all option"
    exit 0
fi

# Remove unwanted content from non-exact $results
if [[ -z "${exact}" ]]; then
    # Delete lines starting with #
    # Remove comments after domain
    # Remove hosts format IP address
    mapfile -t results <<< "$(IFS=$'\n'; sed \
        -e "/:#/d" \
        -e "s/[ \\t]#.*//g" \
        -e "s/:.*[ \\t]/:/g" \
        <<< "${results[*]}")"
    # Exit if result was in a comment
    [[ -z "${results[*]}" ]] && exit 0
fi

# Get adlist file content as array
if [[ -n "${adlist}" ]] || [[ -n "${blockpage}" ]]; then
    for adlistUrl in $(< "${adListsList}"); do
        if [[ "${adlistUrl:0:4}" =~ (http|www.) ]]; then
            adlists+=("${adlistUrl}")
        fi
    done
fi

# Print "Exact matches for" title
if [[ -n "${exact}" ]] && [[ -z "${blockpage}" ]]; then
    plural=""; [[ "${#results[*]}" -gt 1 ]] && plural="es"
    echo " ${matchType^}${plural} for ${COL_BOLD}${domainQuery}${COL_NC} found in:"
fi

for result in "${results[@]}"; do
    fileName="${result/:*/}"

    # Determine *.domains URL using filename's number
    if [[ -n "${adlist}" ]] || [[ -n "${blockpage}" ]]; then
        fileNum="${fileName/list./}"; fileNum="${fileNum%%.*}"
        fileName="${adlists[$fileNum]}"

        # Discrepency occurs when adlists has been modified, but Gravity has not been run
        if [[ -z "${fileName}" ]]; then
            fileName="${COL_LIGHT_RED}(no associated adlists URL found)${COL_NC}"
        fi
    fi

    if [[ -n "${blockpage}" ]]; then
        echo "${fileNum} ${fileName}"
    elif [[ -n "${exact}" ]]; then
        echo "   ${fileName}"
    else
        if [[ ! "${fileName}" == "${fileName_prev:-}" ]]; then
            count=""
            echo " ${matchType^} found in ${COL_BOLD}${fileName}${COL_NC}:"
            fileName_prev="${fileName}"
        fi
        : $((count++))

        # Print matching domain if $max_count has not been reached
        [[ -z "${all}" ]] && max_count="50"
        if [[ -z "${all}" ]] && [[ "${count}" -ge "${max_count}" ]]; then
            [[ "${count}" -gt "${max_count}" ]] && continue
            echo "   ${COL_GRAY}Over ${count} results found, skipping rest of file${COL_NC}"
        else
            echo "   ${result#*:}"
        fi
    fi
done

exit 0