diff options
| author | Leonard Kugis <leonard@kug.is> | 2026-02-21 14:24:26 +0100 |
|---|---|---|
| committer | Leonard Kugis <leonard@kug.is> | 2026-02-21 14:24:26 +0100 |
| commit | 99d82aa5b73bb4f5a09f347860aa408dd2b5cc9e (patch) | |
| tree | 59a286e8d23157d34c7fff0fdaea06d4e393106a | |
| parent | 5bb4a3038a0a638359d366f2934a7bb6a8d5bd6b (diff) | |
| download | llm-functions-docker-99d82aa5b73bb4f5a09f347860aa408dd2b5cc9e.tar.gz | |
| -rwxr-xr-x | tools/web_search_tavily.sh | 148 |
1 files changed, 82 insertions, 66 deletions
diff --git a/tools/web_search_tavily.sh b/tools/web_search_tavily.sh index a3d8997..9dfc165 100755 --- a/tools/web_search_tavily.sh +++ b/tools/web_search_tavily.sh @@ -1,19 +1,30 @@ #!/usr/bin/env bash -set -u # kein -e, damit wir Fehler selbst behandeln können +set -uo pipefail # kein -e: wir behandeln Fehler selbst -# @describe Perform a web search using Tavily API to get up-to-date information or additional context. -# NOTE: This tool NEVER exits with non-zero (even on missing args or curl/jq errors). It reports errors in output. +# @describe Perform a web search using Tavily API. +# NOTE: This tool NEVER exits non-zero. It reports errors in output. # @option --query The query to search for. # @env TAVILY_API_KEY The api key # @env LLM_OUTPUT=/dev/stdout The output path OUT="${LLM_OUTPUT:-/dev/stdout}" +MAX_CHARS="${LLM_WEB_QUERY_MAX_CHARS:-380}" +MAX_RESULTS="${LLM_WEB_MAX_RESULTS:-5}" err() { echo "ERROR: $*" >> "$OUT" exit 0 } +sanitize_query() { + local q="$1" + q="$(printf "%s" "$q" | tr '\r\n\t' ' ' | sed -E 's/^[[:space:]]+|[[:space:]]+$//g; s/[[:space:]]+/ /g')" + q="${q#:}" + q="$(printf "%s" "$q" | sed -E 's/^[[:space:]]+|[[:space:]]+$//g')" + q="${q:0:$MAX_CHARS}" + printf "%s" "$q" +} + main() { local query="${argc_query:-}" local api_key="${TAVILY_API_KEY:-}" @@ -21,84 +32,89 @@ main() { [[ -z "$query" ]] && err "missing --query" [[ -z "$api_key" ]] && err "missing TAVILY_API_KEY" - # Request (capture body + rc) - local body rc - body="$(curl -sS -L -X POST "https://api.tavily.com/search" \ - -H "content-type: application/json" \ - --data-binary "$(cat <<JSON -{ - "api_key": "$api_key", - "query": "$query", - "include_answer": true -} -JSON -)" 2>&1)" - rc=$? - - if [[ $rc -ne 0 ]]; then - echo "ERROR: curl failed (exit_code=$rc): $body" >> "$OUT" - exit 0 + query="$(sanitize_query "$query")" + [[ -z "$query" ]] && err "query empty after sanitization" + + local payload + payload="$(jq -n \ + --arg api_key "$api_key" \ + --arg query "$query" \ + --argjson max_results "$MAX_RESULTS" \ + '{ + api_key: $api_key, + query: $query, + include_answer: true, + max_results: $max_results, + search_depth: "basic" + }' 2>/tmp/web_payload.err)" || { + err "jq failed building payload: $(tr '\n' ' ' < /tmp/web_payload.err)" + } + + local curl_err resp curl_rc http_code + curl_err="$(mktemp)" + resp="$( + curl -sS -L --connect-timeout 10 --max-time 25 \ + --retry 2 --retry-all-errors --retry-delay 0 \ + -X POST "https://api.tavily.com/search" \ + -H "content-type: application/json" \ + --data-binary "$payload" \ + -w "\n__HTTP_CODE__:%{http_code}\n" \ + 2>"$curl_err" + )" + curl_rc=$? + + if [[ $curl_rc -ne 0 ]]; then + err "curl failed (exit_code=$curl_rc): $(tr '\n' ' ' < "$curl_err")" fi - # Parse answer (capture jq errors too) - local answer jq_err - answer="$(printf "%s" "$body" | jq -r '.answer' 2> >(jq_err="$(cat)"; typeset -p jq_err >/dev/null) )" || true - - # The process substitution above is messy in some shells; use a simpler portable way: -} - -# ---- argc parse (never fail) ---- -eval "$(argc --argc-eval "$0" "$@" 2>/tmp/argc_web_search_tavily.err || true)" -if [[ -s /tmp/argc_web_search_tavily.err ]]; then - echo "ERROR: argc parse failed: $(cat /tmp/argc_web_search_tavily.err)" >> "$OUT" - rm -f /tmp/argc_web_search_tavily.err - exit 0 -fi -rm -f /tmp/argc_web_search_tavily.err + http_code="$(printf "%s" "$resp" | sed -n 's/^__HTTP_CODE__:\([0-9]\{3\}\)$/\1/p' | tail -n 1)" + resp="$(printf "%s" "$resp" | sed '/^__HTTP_CODE__:/d')" -# ---- main with portable jq handling ---- -main() { - local query="${argc_query:-}" - local api_key="${TAVILY_API_KEY:-}" - [[ -z "$query" ]] && err "missing --query" - [[ -z "$api_key" ]] && err "missing TAVILY_API_KEY" - - local resp rc - resp="$(curl -sS -L -X POST "https://api.tavily.com/search" \ - -H "content-type: application/json" \ - --data-binary "$(cat <<JSON -{ - "api_key": "$api_key", - "query": "$query", - "include_answer": true -} -JSON -)" 2>&1)" - rc=$? - - if [[ $rc -ne 0 ]]; then - echo "ERROR: curl failed (exit_code=$rc): $resp" >> "$OUT" + [[ -z "$http_code" ]] && err "missing http_code (unexpected curl output)" + if [[ "$http_code" != "200" ]]; then + echo "ERROR: http_status=$http_code" >> "$OUT" + echo "BODY_SNIPPET:" >> "$OUT" + printf "%s" "$resp" | head -c 2000 >> "$OUT" + echo >> "$OUT" exit 0 fi - # jq parse with error capture - local answer jq_out jq_rc - jq_out="$(printf "%s" "$resp" | jq -r '.answer' 2>&1)" + local jq_out jq_rc + jq_out="$(printf "%s" "$resp" | jq -r ' + def clip(n): tostring | gsub("[\r\n\t]+";" ") | gsub(" +";" ") | .[0:n]; + . as $r | + ($r.answer // "") as $a | + ($r.results // []) as $rs | + "QUERY: " + ($r.query // ""), + (if ($a|length)>0 then "ANSWER: " + ($a|clip(1200)) else "ANSWER: " + "" end), + "RESULTS:", + ($rs[0:5] | to_entries[] | "- " + (.value.title // "" | clip(200)) + " | " + (.value.url // "") + (if (.value.content // "") != "" then " | " + (.value.content|clip(240)) else "" end)) + ' 2>&1)" jq_rc=$? if [[ $jq_rc -ne 0 ]]; then - echo "ERROR: jq failed (exit_code=$jq_rc): $jq_out" >> "$OUT" - # optional: dump response for debugging (capped) - echo "RESPONSE:" >> "$OUT" - printf "%s\n" "$resp" | head -c 20000 >> "$OUT" + echo "ERROR: jq parse failed (exit_code=$jq_rc): $jq_out" >> "$OUT" + echo "BODY_SNIPPET:" >> "$OUT" + printf "%s" "$resp" | head -c 2000 >> "$OUT" + echo >> "$OUT" + exit 0 + fi + + if grep -q '^ANSWER: *$' <<<"$jq_out" && ! grep -q '^- ' <<<"$jq_out"; then + echo "ERROR: empty answer and no results (try different query)" >> "$OUT" + echo "DEBUG_BODY_SNIPPET:" >> "$OUT" + printf "%s" "$resp" | head -c 2000 >> "$OUT" echo >> "$OUT" exit 0 fi - # jq succeeded; write answer printf "%s\n" "$jq_out" >> "$OUT" exit 0 } -main +# argc parse: erst Code holen, dann eval -> argc ruft main() auf +argc_eval="$(argc --argc-eval "$0" "$@" 2>/tmp/argc_web_search.err)" || { + err "argc parse failed: $(tr '\n' ' ' < /tmp/argc_web_search.err)" +} +eval "$argc_eval" |
