aboutsummaryrefslogtreecommitdiffstats
path: root/tools/search_wikipedia.sh
blob: 6c7010dd7d712c87c4b0ebc2b303a75e4d72fc98 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/usr/bin/env bash
set -e

# @describe Search Wikipedia for a query.
# Uses it to get detailed information about a public figure, interpretation of a complex scientific concept or in-depth connectivity of a significant historical event,.

# @option --query! The query to search for.

# @env LLM_OUTPUT=/dev/stdout The output path

main() {
    encoded_query="$(jq -nr --arg q "$argc_query" '$q|@uri')"
    base_url="https://en.wikipedia.org/w/api.php"
    url="$base_url?action=query&list=search&srprop=&srlimit=1&limit=1&srsearch=$encoded_query&srinfo=suggestion&format=json"
    json="$(curl -fsSL "$url")"
    suggestion="$(echo "$json" | jq -r '.query.searchinfo.suggestion // empty')"
    title="$(echo "$json" | jq -r '.query.search[0].title // empty')"
    pageid="$(echo "$json" | jq -r '.query.search[0].pageid // empty')"
    if [[ -z "$title" || -z "$pageid" ]]; then
        echo "error: no results for '$argc_query'" >&2
        exit 1
    fi
    title="$(echo "$title" | tr ' ' '_')"
    url="$base_url?action=query&prop=extracts&explaintext=&titles=$title&exintro=&format=json"
    curl -fsSL "$url" | jq -r '.query.pages["'"$pageid"'"].extract' >> "$LLM_OUTPUT"
}

eval "$(argc --argc-eval "$0" "$@")"