From a7342a1cbf0ae558db9005870ddb20a539daac68 Mon Sep 17 00:00:00 2001 From: sigoden Date: Mon, 29 Jul 2024 07:32:26 +0800 Subject: feat: add web_search_* tools, rm some search_* tools (#78) --- .github/workflows/ci.yaml | 2 +- Argcfile.sh | 14 +++++++++++--- tools/search_bing.sh | 20 -------------------- tools/search_brave.sh | 21 --------------------- tools/search_duckduckgo.sh | 35 ----------------------------------- tools/search_exa.sh | 30 ------------------------------ tools/search_google.sh | 20 -------------------- tools/search_jina.sh | 22 ---------------------- tools/search_searxng.sh | 20 -------------------- tools/search_tavily.sh | 25 ------------------------- tools/web_search_perplexity.sh | 30 ++++++++++++++++++++++++++++++ tools/web_search_tavily.sh | 22 ++++++++++++++++++++++ 12 files changed, 64 insertions(+), 197 deletions(-) delete mode 100755 tools/search_bing.sh delete mode 100755 tools/search_brave.sh delete mode 100755 tools/search_duckduckgo.sh delete mode 100755 tools/search_exa.sh delete mode 100755 tools/search_google.sh delete mode 100755 tools/search_jina.sh delete mode 100755 tools/search_searxng.sh delete mode 100755 tools/search_tavily.sh create mode 100755 tools/web_search_perplexity.sh create mode 100755 tools/web_search_tavily.sh diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 88ea049..91170e6 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -42,7 +42,7 @@ jobs: - name: Link web-search and code-interpreter run: | - argc link-web-search search_tavily.sh + argc link-web-search web_search_perplexity.sh argc link-code-interpreter execute_py_code.py - name: Run Test diff --git a/Argcfile.sh b/Argcfile.sh index 7e1e73b..19903d7 100644 --- a/Argcfile.sh +++ b/Argcfile.sh @@ -453,8 +453,8 @@ clean@agent() { # @cmd Link a tool as web_search tool # # Example: -# argc link-web-search search_bing.sh -# @arg tool![`_choice_tool`] The tool work as web_search +# argc link-web-search web_search_perplexity.sh +# @arg tool![`_choice_web_search`] The tool work as web_search link-web-search() { _link_tool $1 web_search } @@ -463,7 +463,7 @@ link-web-search() { # # Example: # argc link-code-interpreter execute_py_code.py -# @arg tool![`_choice_tool`] The tool work as code_interpreter +# @arg tool![`_choice_code_interpreter`] The tool work as code_interpreter link-code-interpreter() { _link_tool $1 code_interpreter } @@ -608,6 +608,14 @@ _choice_tool() { done } +_choice_web_search() { + _choice_tool | grep '^web_search_' +} + +_choice_code_interpreter() { + _choice_tool | grep '^execute_.*_code' +} + _choice_agent() { ls -1 agents } diff --git a/tools/search_bing.sh b/tools/search_bing.sh deleted file mode 100755 index dd60e0c..0000000 --- a/tools/search_bing.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash -set -e - -# @describe Perform a web search using Bing Web Search API to get up-to-date information or additional context. -# Use this when you need current information or feel a search could provide a better answer. - -# @env BING_API_KEY! The api key -# @env SEARCH_MAX_RESULTS=5 The max results to return. -# @option --query! The query to search for. - -main() { - encoded_query="$(jq -nr --arg q "$argc_query" '$q|@uri')" - url="https://api.bing.microsoft.com/v7.0/search?q=$encoded_query&mkt=en-us&textdecorations=true&textformat=raw&count=$SEARCH_MAX_RESULTS&offset=0" - curl -fsSL "$url" \ - -H "Ocp-Apim-Subscription-Key: $BING_API_KEY" | \ - jq '[.webPages.value[] | {link: .url, title: .name, snippet: .snippet}]' \ - >> "$LLM_OUTPUT" -} - -eval "$(argc --argc-eval "$0" "$@")" diff --git a/tools/search_brave.sh b/tools/search_brave.sh deleted file mode 100755 index f76e9ca..0000000 --- a/tools/search_brave.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash -set -e - -# @describe Perform a web search using Brave Search API to get up-to-date information or additional context. -# Use this when you need current information or feel a search could provide a better answer. - -# @env BRAVE_API_KEY! The api key -# @env SEARCH_MAX_RESULTS=5 The max results to return. -# @option --query! The query to search for. - -main() { - encoded_query="$(jq -nr --arg q "$argc_query" '$q|@uri')" - url="https://api.search.brave.com/res/v1/web/search?q=$encoded_query&count=$SEARCH_MAX_RESULTS" - curl -fsSL "$url" \ - -H "Accept: application/json" \ - -H "X-Subscription-Token: $BRAVE_API_KEY" | \ - jq '[.web.results[] | {link: .url, title: .title, snippet: .description}]' \ - >> "$LLM_OUTPUT" -} - -eval "$(argc --argc-eval "$0" "$@")" diff --git a/tools/search_duckduckgo.sh b/tools/search_duckduckgo.sh deleted file mode 100755 index 062bfb5..0000000 --- a/tools/search_duckduckgo.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env bash -set -e - -# @describe Perform a web search using DuckDuckGo API to get up-to-date information or additional context. -# Use this when you need current information or feel a search could provide a better answer. - -# @env SEARCH_MAX_RESULTS=5 The max results to return. -# @option --query! The query to search for. - -main() { - encoded_query="$(jq -nr --arg q "$argc_query" '$q|@uri')" - vqd="$(curl -fsSL -X POST https://duckduckgo.com -d "q=$encoded_query" | sed -En 's/.*vqd=([0-9-]+)&.*/\1/p')" - url="https://links.duckduckgo.com/d.js?q=$encoded_query&kl=wt-wt&l=wt-wt&p=&s=0&df=&vqd=$vqd&bing_market=wt-WT&ex=-1" - data="$(curl -fsSL "$url" | sed -En 's/.*DDG.pageLayout.load\(\x27d\x27,\[(.*)\]\);DDG.duckbar.load\(.*/\1/p')" - echo "[$data]" | jq ' -def strip_tags: - gsub("<[^>]*>"; ""); - -def unescape_html_entities: - gsub("&"; "&") | - gsub("<"; "<") | - gsub(">"; ">") | - gsub("""; "\"") | - gsub("'"; "'\''") | - gsub("'"; "'\''") | - gsub(" "; " "); - -def normalize: strip_tags | unescape_html_entities; - -[.[:'"$SEARCH_MAX_RESULTS"'] | .[] | select(has("u")) | {link: .u, title: (.t | normalize), snippet: (.a | normalize)}] -' >> "$LLM_OUTPUT" - -} - -eval "$(argc --argc-eval "$0" "$@")" diff --git a/tools/search_exa.sh b/tools/search_exa.sh deleted file mode 100755 index f14d342..0000000 --- a/tools/search_exa.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -set -e - -# @describe Perform a web search using Exa API to get up-to-date information or additional context. -# Use this when you need current information or feel a search could provide a better answer. - -# @env EXA_API_KEY! The api key -# @env SEARCH_MAX_RESULTS=5 The max results to return. -# @option --query! The query to search for. - -main() { - curl -fsSL -X POST https://api.exa.ai/search \ - -H "content-type: application/json" \ - -H "x-api-key: $EXA_API_KEY" \ - -d ' -{ - "query": "'"$argc_query"'", - "numResults": '"$SEARCH_MAX_RESULTS"', - "type": "keyword", - "contents": { - "text": { - "maxCharacters": 200 - } - } -}' | \ - jq '[.results[] | {link: .url, title: .title, snippet: .text}]' \ - >> "$LLM_OUTPUT" -} - -eval "$(argc --argc-eval "$0" "$@")" diff --git a/tools/search_google.sh b/tools/search_google.sh deleted file mode 100755 index 8e82f89..0000000 --- a/tools/search_google.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash -set -e - -# @describe Perform a web search using Google Search API to get up-to-date information or additional context. -# Use this when you need current information or feel a search could provide a better answer. - -# @env GOOGLE_API_KEY! The api key -# @env GOOGLE_CSE_ID! The id of google search engine -# @env SEARCH_MAX_RESULTS=5 The max results to return. -# @option --query! The query to search for. - -main() { - encoded_query="$(jq -nr --arg q "$argc_query" '$q|@uri')" - url="https://www.googleapis.com/customsearch/v1?key=$GOOGLE_API_KEY&cx=$GOOGLE_CSE_ID&q=$encoded_query" - curl -fsSL "$url" | \ - jq '[.items[:'"$SEARCH_MAX_RESULTS"'] | .[] | {link: .link, title: .title, snippet: .snippet}]' \ - >> "$LLM_OUTPUT" -} - -eval "$(argc --argc-eval "$0" "$@")" diff --git a/tools/search_jina.sh b/tools/search_jina.sh deleted file mode 100755 index ce51a55..0000000 --- a/tools/search_jina.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash -set -e - -# @describe Perform a web search using Jina API to get up-to-date information or additional context. -# Use this when you need current information or feel a search could provide a better answer. - -# @env JINA_API_KEY The api key -# @env SEARCH_MAX_RESULTS=5 The max results to return. -# @option --query! The query to search for. - -main() { - curl_args=("-H" "Accept: application/json") - if [[ -n "$JINA_API_KEY" ]]; then - curl_args+=("-H" "Authorization: Bearer $JINA_API_KEY") - fi - encoded_query="$(jq -nr --arg q "$argc_query" '$q|@uri')" - curl -fsSL "${curl_args[@]}" "https://s.jina.ai/$encoded_query" | \ - jq '[.data[:'"$SEARCH_MAX_RESULTS"'] | .[] | {link: .url, title: .title, snippet: .description}]' \ - >> "$LLM_OUTPUT" -} - -eval "$(argc --argc-eval "$0" "$@")" diff --git a/tools/search_searxng.sh b/tools/search_searxng.sh deleted file mode 100755 index 7272182..0000000 --- a/tools/search_searxng.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash -set -e - -# @describe Perform a web search using SearXNG API to get up-to-date information or additional context. -# Use this when you need current information or feel a search could provide a better answer. - -# @env SEARXNG_API_BASE! The api url -# @env SEARCH_MAX_RESULTS=5 The max results to return. -# @option --query! The query to search for. - -main() { - encoded_query="$(jq -nr --arg q "$argc_query" '$q|@uri')" - url="$SEARXNG_API_BASE/search?q=$encoded_query&categories=general&language=en-US&format=json" - curl -fsSL "$url" | \ - jq '[.results[:'"$SEARCH_MAX_RESULTS"'] | .[] | {link: .url, title: .title, snippet: .content}]' \ - >> "$LLM_OUTPUT" - -} - -eval "$(argc --argc-eval "$0" "$@")" diff --git a/tools/search_tavily.sh b/tools/search_tavily.sh deleted file mode 100755 index fb1e367..0000000 --- a/tools/search_tavily.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash -set -e - -# @describe Perform a web search using EXA API to get up-to-date information or additional context. -# Use this when you need current information or feel a search could provide a better answer. - -# @env TAVILY_API_KEY! The api key -# @env SEARCH_MAX_RESULTS=5 The max results to return. -# @option --query! The query to search for. - -main() { - curl -fsSL -X POST https://api.tavily.com/search \ - -H "content-type: application/json" \ - -d ' -{ - "api_key": "'"$TAVILY_API_KEY"'", - "query": "'"$argc_query"'", - "search_depth": "advanced", - "max_results": "'"$SEARCH_MAX_RESULTS"'" -}' | \ - jq '[.results[] | {link: .url, title: .title, snippet: .content}]' \ - >> "$LLM_OUTPUT" -} - -eval "$(argc --argc-eval "$0" "$@")" diff --git a/tools/web_search_perplexity.sh b/tools/web_search_perplexity.sh new file mode 100755 index 0000000..95b3d27 --- /dev/null +++ b/tools/web_search_perplexity.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -e + +# @describe Perform a web search using Perplexity API to get up-to-date information or additional context. +# Use this when you need current information or feel a search could provide a better answer. + +# @env PERPLEXITY_API_KEY! The api key +# @option --query! The query to search for. + +main() { + curl -fsS -X POST https://api.perplexity.ai/chat/completions \ + -H "authorization: Bearer $PERPLEXITY_API_KEY" \ + -H "accept: application/json" \ + -H "content-type: application/json" \ + --data ' +{ + "model": "llama-3-sonar-small-32k-online", + "messages": [ + { + "role": "user", + "content": "'"$argc_query"'" + } + ] +} +' | \ + jq -r '.choices[0].message.content' \ + >> "$LLM_OUTPUT" +} + +eval "$(argc --argc-eval "$0" "$@")" diff --git a/tools/web_search_tavily.sh b/tools/web_search_tavily.sh new file mode 100755 index 0000000..d80e5dd --- /dev/null +++ b/tools/web_search_tavily.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +set -e + +# @describe Perform a web search using EXA API to get up-to-date information or additional context. +# Use this when you need current information or feel a search could provide a better answer. + +# @env TAVILY_API_KEY! The api key +# @option --query! The query to search for. + +main() { + curl -fsSL -X POST https://api.tavily.com/search \ + -H "content-type: application/json" \ + -d ' +{ + "api_key": "'"$TAVILY_API_KEY"'", + "query": "'"$argc_query"'", + "include_answer": true +}' | \ + jq -r '.answer' >> "$LLM_OUTPUT" +} + +eval "$(argc --argc-eval "$0" "$@")" -- cgit v1.2.3