aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsigoden <sigoden@gmail.com>2024-07-29 07:32:26 +0800
committerGitHub <noreply@github.com>2024-07-29 07:32:26 +0800
commita7342a1cbf0ae558db9005870ddb20a539daac68 (patch)
treea5b4f3ddaf2421f12967291f6ce4a89243b03360
parent4d18d6346d4aea16cc9dac1bcfc3ccc4c15a7bd0 (diff)
downloadllm-functions-docker-a7342a1cbf0ae558db9005870ddb20a539daac68.tar.gz
feat: add web_search_* tools, rm some search_* tools (#78)
-rw-r--r--.github/workflows/ci.yaml2
-rw-r--r--Argcfile.sh14
-rwxr-xr-xtools/search_bing.sh20
-rwxr-xr-xtools/search_brave.sh21
-rwxr-xr-xtools/search_duckduckgo.sh35
-rwxr-xr-xtools/search_exa.sh30
-rwxr-xr-xtools/search_google.sh20
-rwxr-xr-xtools/search_jina.sh22
-rwxr-xr-xtools/search_searxng.sh20
-rwxr-xr-xtools/web_search_perplexity.sh30
-rwxr-xr-xtools/web_search_tavily.sh (renamed from tools/search_tavily.sh)7
11 files changed, 44 insertions, 177 deletions
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 88ea049..91170e6 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -42,7 +42,7 @@ jobs:
- name: Link web-search and code-interpreter
run: |
- argc link-web-search search_tavily.sh
+ argc link-web-search web_search_perplexity.sh
argc link-code-interpreter execute_py_code.py
- name: Run Test
diff --git a/Argcfile.sh b/Argcfile.sh
index 7e1e73b..19903d7 100644
--- a/Argcfile.sh
+++ b/Argcfile.sh
@@ -453,8 +453,8 @@ clean@agent() {
# @cmd Link a tool as web_search tool
#
# Example:
-# argc link-web-search search_bing.sh
-# @arg tool![`_choice_tool`] The tool work as web_search
+# argc link-web-search web_search_perplexity.sh
+# @arg tool![`_choice_web_search`] The tool work as web_search
link-web-search() {
_link_tool $1 web_search
}
@@ -463,7 +463,7 @@ link-web-search() {
#
# Example:
# argc link-code-interpreter execute_py_code.py
-# @arg tool![`_choice_tool`] The tool work as code_interpreter
+# @arg tool![`_choice_code_interpreter`] The tool work as code_interpreter
link-code-interpreter() {
_link_tool $1 code_interpreter
}
@@ -608,6 +608,14 @@ _choice_tool() {
done
}
+_choice_web_search() {
+ _choice_tool | grep '^web_search_'
+}
+
+_choice_code_interpreter() {
+ _choice_tool | grep '^execute_.*_code'
+}
+
_choice_agent() {
ls -1 agents
}
diff --git a/tools/search_bing.sh b/tools/search_bing.sh
deleted file mode 100755
index dd60e0c..0000000
--- a/tools/search_bing.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/env bash
-set -e
-
-# @describe Perform a web search using Bing Web Search API to get up-to-date information or additional context.
-# Use this when you need current information or feel a search could provide a better answer.
-
-# @env BING_API_KEY! The api key
-# @env SEARCH_MAX_RESULTS=5 The max results to return.
-# @option --query! The query to search for.
-
-main() {
- encoded_query="$(jq -nr --arg q "$argc_query" '$q|@uri')"
- url="https://api.bing.microsoft.com/v7.0/search?q=$encoded_query&mkt=en-us&textdecorations=true&textformat=raw&count=$SEARCH_MAX_RESULTS&offset=0"
- curl -fsSL "$url" \
- -H "Ocp-Apim-Subscription-Key: $BING_API_KEY" | \
- jq '[.webPages.value[] | {link: .url, title: .name, snippet: .snippet}]' \
- >> "$LLM_OUTPUT"
-}
-
-eval "$(argc --argc-eval "$0" "$@")"
diff --git a/tools/search_brave.sh b/tools/search_brave.sh
deleted file mode 100755
index f76e9ca..0000000
--- a/tools/search_brave.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env bash
-set -e
-
-# @describe Perform a web search using Brave Search API to get up-to-date information or additional context.
-# Use this when you need current information or feel a search could provide a better answer.
-
-# @env BRAVE_API_KEY! The api key
-# @env SEARCH_MAX_RESULTS=5 The max results to return.
-# @option --query! The query to search for.
-
-main() {
- encoded_query="$(jq -nr --arg q "$argc_query" '$q|@uri')"
- url="https://api.search.brave.com/res/v1/web/search?q=$encoded_query&count=$SEARCH_MAX_RESULTS"
- curl -fsSL "$url" \
- -H "Accept: application/json" \
- -H "X-Subscription-Token: $BRAVE_API_KEY" | \
- jq '[.web.results[] | {link: .url, title: .title, snippet: .description}]' \
- >> "$LLM_OUTPUT"
-}
-
-eval "$(argc --argc-eval "$0" "$@")"
diff --git a/tools/search_duckduckgo.sh b/tools/search_duckduckgo.sh
deleted file mode 100755
index 062bfb5..0000000
--- a/tools/search_duckduckgo.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env bash
-set -e
-
-# @describe Perform a web search using DuckDuckGo API to get up-to-date information or additional context.
-# Use this when you need current information or feel a search could provide a better answer.
-
-# @env SEARCH_MAX_RESULTS=5 The max results to return.
-# @option --query! The query to search for.
-
-main() {
- encoded_query="$(jq -nr --arg q "$argc_query" '$q|@uri')"
- vqd="$(curl -fsSL -X POST https://duckduckgo.com -d "q=$encoded_query" | sed -En 's/.*vqd=([0-9-]+)&.*/\1/p')"
- url="https://links.duckduckgo.com/d.js?q=$encoded_query&kl=wt-wt&l=wt-wt&p=&s=0&df=&vqd=$vqd&bing_market=wt-WT&ex=-1"
- data="$(curl -fsSL "$url" | sed -En 's/.*DDG.pageLayout.load\(\x27d\x27,\[(.*)\]\);DDG.duckbar.load\(.*/\1/p')"
- echo "[$data]" | jq '
-def strip_tags:
- gsub("<[^>]*>"; "");
-
-def unescape_html_entities:
- gsub("&amp;"; "&") |
- gsub("&lt;"; "<") |
- gsub("&gt;"; ">") |
- gsub("&quot;"; "\"") |
- gsub("&apos;"; "'\''") |
- gsub("&#x27;"; "'\''") |
- gsub("&nbsp;"; " ");
-
-def normalize: strip_tags | unescape_html_entities;
-
-[.[:'"$SEARCH_MAX_RESULTS"'] | .[] | select(has("u")) | {link: .u, title: (.t | normalize), snippet: (.a | normalize)}]
-' >> "$LLM_OUTPUT"
-
-}
-
-eval "$(argc --argc-eval "$0" "$@")"
diff --git a/tools/search_exa.sh b/tools/search_exa.sh
deleted file mode 100755
index f14d342..0000000
--- a/tools/search_exa.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env bash
-set -e
-
-# @describe Perform a web search using Exa API to get up-to-date information or additional context.
-# Use this when you need current information or feel a search could provide a better answer.
-
-# @env EXA_API_KEY! The api key
-# @env SEARCH_MAX_RESULTS=5 The max results to return.
-# @option --query! The query to search for.
-
-main() {
- curl -fsSL -X POST https://api.exa.ai/search \
- -H "content-type: application/json" \
- -H "x-api-key: $EXA_API_KEY" \
- -d '
-{
- "query": "'"$argc_query"'",
- "numResults": '"$SEARCH_MAX_RESULTS"',
- "type": "keyword",
- "contents": {
- "text": {
- "maxCharacters": 200
- }
- }
-}' | \
- jq '[.results[] | {link: .url, title: .title, snippet: .text}]' \
- >> "$LLM_OUTPUT"
-}
-
-eval "$(argc --argc-eval "$0" "$@")"
diff --git a/tools/search_google.sh b/tools/search_google.sh
deleted file mode 100755
index 8e82f89..0000000
--- a/tools/search_google.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/env bash
-set -e
-
-# @describe Perform a web search using Google Search API to get up-to-date information or additional context.
-# Use this when you need current information or feel a search could provide a better answer.
-
-# @env GOOGLE_API_KEY! The api key
-# @env GOOGLE_CSE_ID! The id of google search engine
-# @env SEARCH_MAX_RESULTS=5 The max results to return.
-# @option --query! The query to search for.
-
-main() {
- encoded_query="$(jq -nr --arg q "$argc_query" '$q|@uri')"
- url="https://www.googleapis.com/customsearch/v1?key=$GOOGLE_API_KEY&cx=$GOOGLE_CSE_ID&q=$encoded_query"
- curl -fsSL "$url" | \
- jq '[.items[:'"$SEARCH_MAX_RESULTS"'] | .[] | {link: .link, title: .title, snippet: .snippet}]' \
- >> "$LLM_OUTPUT"
-}
-
-eval "$(argc --argc-eval "$0" "$@")"
diff --git a/tools/search_jina.sh b/tools/search_jina.sh
deleted file mode 100755
index ce51a55..0000000
--- a/tools/search_jina.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env bash
-set -e
-
-# @describe Perform a web search using Jina API to get up-to-date information or additional context.
-# Use this when you need current information or feel a search could provide a better answer.
-
-# @env JINA_API_KEY The api key
-# @env SEARCH_MAX_RESULTS=5 The max results to return.
-# @option --query! The query to search for.
-
-main() {
- curl_args=("-H" "Accept: application/json")
- if [[ -n "$JINA_API_KEY" ]]; then
- curl_args+=("-H" "Authorization: Bearer $JINA_API_KEY")
- fi
- encoded_query="$(jq -nr --arg q "$argc_query" '$q|@uri')"
- curl -fsSL "${curl_args[@]}" "https://s.jina.ai/$encoded_query" | \
- jq '[.data[:'"$SEARCH_MAX_RESULTS"'] | .[] | {link: .url, title: .title, snippet: .description}]' \
- >> "$LLM_OUTPUT"
-}
-
-eval "$(argc --argc-eval "$0" "$@")"
diff --git a/tools/search_searxng.sh b/tools/search_searxng.sh
deleted file mode 100755
index 7272182..0000000
--- a/tools/search_searxng.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/env bash
-set -e
-
-# @describe Perform a web search using SearXNG API to get up-to-date information or additional context.
-# Use this when you need current information or feel a search could provide a better answer.
-
-# @env SEARXNG_API_BASE! The api url
-# @env SEARCH_MAX_RESULTS=5 The max results to return.
-# @option --query! The query to search for.
-
-main() {
- encoded_query="$(jq -nr --arg q "$argc_query" '$q|@uri')"
- url="$SEARXNG_API_BASE/search?q=$encoded_query&categories=general&language=en-US&format=json"
- curl -fsSL "$url" | \
- jq '[.results[:'"$SEARCH_MAX_RESULTS"'] | .[] | {link: .url, title: .title, snippet: .content}]' \
- >> "$LLM_OUTPUT"
-
-}
-
-eval "$(argc --argc-eval "$0" "$@")"
diff --git a/tools/web_search_perplexity.sh b/tools/web_search_perplexity.sh
new file mode 100755
index 0000000..95b3d27
--- /dev/null
+++ b/tools/web_search_perplexity.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+set -e
+
+# @describe Perform a web search using Perplexity API to get up-to-date information or additional context.
+# Use this when you need current information or feel a search could provide a better answer.
+
+# @env PERPLEXITY_API_KEY! The api key
+# @option --query! The query to search for.
+
+main() {
+ curl -fsS -X POST https://api.perplexity.ai/chat/completions \
+ -H "authorization: Bearer $PERPLEXITY_API_KEY" \
+ -H "accept: application/json" \
+ -H "content-type: application/json" \
+ --data '
+{
+ "model": "llama-3-sonar-small-32k-online",
+ "messages": [
+ {
+ "role": "user",
+ "content": "'"$argc_query"'"
+ }
+ ]
+}
+' | \
+ jq -r '.choices[0].message.content' \
+ >> "$LLM_OUTPUT"
+}
+
+eval "$(argc --argc-eval "$0" "$@")"
diff --git a/tools/search_tavily.sh b/tools/web_search_tavily.sh
index fb1e367..d80e5dd 100755
--- a/tools/search_tavily.sh
+++ b/tools/web_search_tavily.sh
@@ -5,7 +5,6 @@ set -e
# Use this when you need current information or feel a search could provide a better answer.
# @env TAVILY_API_KEY! The api key
-# @env SEARCH_MAX_RESULTS=5 The max results to return.
# @option --query! The query to search for.
main() {
@@ -15,11 +14,9 @@ main() {
{
"api_key": "'"$TAVILY_API_KEY"'",
"query": "'"$argc_query"'",
- "search_depth": "advanced",
- "max_results": "'"$SEARCH_MAX_RESULTS"'"
+ "include_answer": true
}' | \
- jq '[.results[] | {link: .url, title: .title, snippet: .content}]' \
- >> "$LLM_OUTPUT"
+ jq -r '.answer' >> "$LLM_OUTPUT"
}
eval "$(argc --argc-eval "$0" "$@")"