blob: 9dfc1652693d51d97a98515a52a7fdfd0ba5d067 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
#!/usr/bin/env bash
set -uo pipefail # kein -e: wir behandeln Fehler selbst
# @describe Perform a web search using Tavily API.
# NOTE: This tool NEVER exits non-zero. It reports errors in output.
# @option --query The query to search for.
# @env TAVILY_API_KEY The api key
# @env LLM_OUTPUT=/dev/stdout The output path
OUT="${LLM_OUTPUT:-/dev/stdout}"
MAX_CHARS="${LLM_WEB_QUERY_MAX_CHARS:-380}"
MAX_RESULTS="${LLM_WEB_MAX_RESULTS:-5}"
err() {
echo "ERROR: $*" >> "$OUT"
exit 0
}
sanitize_query() {
local q="$1"
q="$(printf "%s" "$q" | tr '\r\n\t' ' ' | sed -E 's/^[[:space:]]+|[[:space:]]+$//g; s/[[:space:]]+/ /g')"
q="${q#:}"
q="$(printf "%s" "$q" | sed -E 's/^[[:space:]]+|[[:space:]]+$//g')"
q="${q:0:$MAX_CHARS}"
printf "%s" "$q"
}
main() {
local query="${argc_query:-}"
local api_key="${TAVILY_API_KEY:-}"
[[ -z "$query" ]] && err "missing --query"
[[ -z "$api_key" ]] && err "missing TAVILY_API_KEY"
query="$(sanitize_query "$query")"
[[ -z "$query" ]] && err "query empty after sanitization"
local payload
payload="$(jq -n \
--arg api_key "$api_key" \
--arg query "$query" \
--argjson max_results "$MAX_RESULTS" \
'{
api_key: $api_key,
query: $query,
include_answer: true,
max_results: $max_results,
search_depth: "basic"
}' 2>/tmp/web_payload.err)" || {
err "jq failed building payload: $(tr '\n' ' ' < /tmp/web_payload.err)"
}
local curl_err resp curl_rc http_code
curl_err="$(mktemp)"
resp="$(
curl -sS -L --connect-timeout 10 --max-time 25 \
--retry 2 --retry-all-errors --retry-delay 0 \
-X POST "https://api.tavily.com/search" \
-H "content-type: application/json" \
--data-binary "$payload" \
-w "\n__HTTP_CODE__:%{http_code}\n" \
2>"$curl_err"
)"
curl_rc=$?
if [[ $curl_rc -ne 0 ]]; then
err "curl failed (exit_code=$curl_rc): $(tr '\n' ' ' < "$curl_err")"
fi
http_code="$(printf "%s" "$resp" | sed -n 's/^__HTTP_CODE__:\([0-9]\{3\}\)$/\1/p' | tail -n 1)"
resp="$(printf "%s" "$resp" | sed '/^__HTTP_CODE__:/d')"
[[ -z "$http_code" ]] && err "missing http_code (unexpected curl output)"
if [[ "$http_code" != "200" ]]; then
echo "ERROR: http_status=$http_code" >> "$OUT"
echo "BODY_SNIPPET:" >> "$OUT"
printf "%s" "$resp" | head -c 2000 >> "$OUT"
echo >> "$OUT"
exit 0
fi
local jq_out jq_rc
jq_out="$(printf "%s" "$resp" | jq -r '
def clip(n): tostring | gsub("[\r\n\t]+";" ") | gsub(" +";" ") | .[0:n];
. as $r |
($r.answer // "") as $a |
($r.results // []) as $rs |
"QUERY: " + ($r.query // ""),
(if ($a|length)>0 then "ANSWER: " + ($a|clip(1200)) else "ANSWER: " + "" end),
"RESULTS:",
($rs[0:5] | to_entries[] | "- " + (.value.title // "" | clip(200)) + " | " + (.value.url // "") + (if (.value.content // "") != "" then " | " + (.value.content|clip(240)) else "" end))
' 2>&1)"
jq_rc=$?
if [[ $jq_rc -ne 0 ]]; then
echo "ERROR: jq parse failed (exit_code=$jq_rc): $jq_out" >> "$OUT"
echo "BODY_SNIPPET:" >> "$OUT"
printf "%s" "$resp" | head -c 2000 >> "$OUT"
echo >> "$OUT"
exit 0
fi
if grep -q '^ANSWER: *$' <<<"$jq_out" && ! grep -q '^- ' <<<"$jq_out"; then
echo "ERROR: empty answer and no results (try different query)" >> "$OUT"
echo "DEBUG_BODY_SNIPPET:" >> "$OUT"
printf "%s" "$resp" | head -c 2000 >> "$OUT"
echo >> "$OUT"
exit 0
fi
printf "%s\n" "$jq_out" >> "$OUT"
exit 0
}
# argc parse: erst Code holen, dann eval -> argc ruft main() auf
argc_eval="$(argc --argc-eval "$0" "$@" 2>/tmp/argc_web_search.err)" || {
err "argc parse failed: $(tr '\n' ' ' < /tmp/argc_web_search.err)"
}
eval "$argc_eval"
|