From 6a657e02359bad0b4787125dd365df7f6401bb8d Mon Sep 17 00:00:00 2001 From: seungbeom Date: Tue, 3 Dec 2024 09:37:02 +0900 Subject: [PATCH 1/4] =?UTF-8?q?[feature]=20langgraph=20tools=20(websearch,?= =?UTF-8?q?=20vectorstore=20search)=20=20=EC=83=9D=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/langgraph/tools.py | 42 ++++++++++++++++++++++++++++++++++++++++++ poetry.lock | 42 +++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + 3 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 app/langgraph/tools.py diff --git a/app/langgraph/tools.py b/app/langgraph/tools.py new file mode 100644 index 0000000..ed84afb --- /dev/null +++ b/app/langgraph/tools.py @@ -0,0 +1,42 @@ +from langchain_core.tools import tool +from langchain_community.tools import DuckDuckGoSearchRun +from vectorstore.opensearch_hybrid import OpenSearchHybridSearch + +# tool에 필요한 class 초기화 +duckduckgo_search = DuckDuckGoSearchRun() +opensearch = OpenSearchHybridSearch() + +@tool(parse_docstring=True) +def web_search(query: str): + """A tool to use when websearch is needed. Use this tool when there isn't enough information in OpenSearch vector store Database + + Args: + query : a query to websearch + + Returns: + list[str]: websearch result in list of strings + """ + res = duckduckgo_search.invoke({"query": query}) + return [res] + +@tool(parse_docstring=True) +def vectorstore_search(query:str, search_type:str="hybrid"): + """search OpenSearch vector databsee for documents that are relevant to a given query + + Args: + query: a query to search vector database + search_type (str, optional): one of three similarity search method: hybrid, BM25, and Cosine similarity search Defaults to "hybrid". + + Returns: + list[pd.DataFrmae]: list of langchain document + """ + if search_type == "hybrid": + result = opensearch.hybrid_search(query) + elif search_type == "bm25": + result = opensearch.bm25_search(query) + elif search_type == "cosine": + result = opensearch.cosine_similarity_search(query) + + result['text'].tolist() + + return result \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 3e8ba89..beff29f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -817,6 +817,25 @@ files = [ {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, ] +[[package]] +name = "duckduckgo-search" +version = "6.3.7" +description = "Search for words, documents, images, news, maps and text translation using the DuckDuckGo.com search engine." +optional = false +python-versions = ">=3.8" +files = [ + {file = "duckduckgo_search-6.3.7-py3-none-any.whl", hash = "sha256:6a831a27977751e8928222f04c99a5d069ff80e2a7c78b699c9b9ac6cb48c41b"}, + {file = "duckduckgo_search-6.3.7.tar.gz", hash = "sha256:53d84966429a6377647e2a1ea7224b657575c7a4d506729bdb837e4ee12915ed"}, +] + +[package.dependencies] +click = ">=8.1.7" +primp = ">=0.8.1" + +[package.extras] +dev = ["mypy (>=1.11.1)", "pytest (>=8.3.1)", "pytest-asyncio (>=0.23.8)", "ruff (>=0.6.1)"] +lxml = ["lxml (>=5.2.2)"] + [[package]] name = "durationpy" version = "0.9" @@ -3259,6 +3278,27 @@ dev = ["packaging", "prawcore[lint]", "prawcore[test]"] lint = ["pre-commit", "ruff (>=0.0.291)"] test = ["betamax (>=0.8,<0.9)", "pytest (>=2.7.3)", "urllib3 (==1.26.*)"] +[[package]] +name = "primp" +version = "0.8.1" +description = "HTTP client that can impersonate web browsers, mimicking their headers and `TLS/JA3/JA4/HTTP2` fingerprints" +optional = false +python-versions = ">=3.8" +files = [ + {file = "primp-0.8.1-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:8294db817701ad76b6a186c16e22cc49d36fac5986647a83657ad4a58ddeee42"}, + {file = "primp-0.8.1-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:e8117531dcdb0dbcf9855fdbac73febdde5967ca0332a2c05b5961d2fbcfe749"}, + {file = "primp-0.8.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:993cc4284e8c5c858254748f078e872ba250c9339d64398dc000a8f9cffadda3"}, + {file = "primp-0.8.1-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4a27ac642be5c616fc5f139a5ad391dcd0c5964ace56fe6cf31cbffb972a7480"}, + {file = "primp-0.8.1-cp38-abi3-manylinux_2_34_armv7l.whl", hash = "sha256:e8483b8d9eec9fc43d77bb448555466030f29cdd99d9375eb75155e9f832e5bd"}, + {file = "primp-0.8.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:92f5f8267216252cfb27f2149811e14682bb64f0c5d37f00d218d1592e02f0b9"}, + {file = "primp-0.8.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:98f7f3a9481c55c56e7eff9024f29e16379a87d5b0a1b683e145dd8fcbdcc46b"}, + {file = "primp-0.8.1-cp38-abi3-win_amd64.whl", hash = "sha256:6f0018a26be787431504e32548b296a278abbe85da43bcbaf2d4982ac3dcd332"}, + {file = "primp-0.8.1.tar.gz", hash = "sha256:ddf05754a7b70d59df8a014a8585e418f9c04e0b69065bab6633f4a9b92bad93"}, +] + +[package.extras] +dev = ["certifi", "pytest (>=8.1.1)"] + [[package]] name = "prompt-toolkit" version = "3.0.48" @@ -5686,4 +5726,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "ab924349f342b586764b84a73b1b84e4d29f3b71292c3c3c63e8e1f64cbdd7f7" +content-hash = "ab7b9aaabcd5a53f16e9cd97bf388a34794d5c2bf73642935bc8cfec45ceb007" diff --git a/pyproject.toml b/pyproject.toml index 9be6dc2..0bd949b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ langchain-huggingface = "^0.1.0" faiss-cpu = "^1.9.0" langchain-ollama = "^0.2.0" tavily-python = "^0.5.0" +duckduckgo-search = "^6.3.7" [tool.poetry.group.dev.dependencies] From 4b323e3ecc82c648f3670704629bea945137f766 Mon Sep 17 00:00:00 2001 From: tmdqja75 Date: Tue, 3 Dec 2024 23:30:42 +0900 Subject: [PATCH 2/4] =?UTF-8?q?[fix]=20custom=20vectorstore=20=ED=8C=A8?= =?UTF-8?q?=ED=82=A4=EC=A7=80=20import=20=EB=AC=B8=EC=A0=9C=20=ED=95=B4?= =?UTF-8?q?=EA=B2=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/langgraph/tools.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/app/langgraph/tools.py b/app/langgraph/tools.py index ed84afb..e71bc33 100644 --- a/app/langgraph/tools.py +++ b/app/langgraph/tools.py @@ -1,10 +1,22 @@ +import getpass +import os +import sys + from langchain_core.tools import tool from langchain_community.tools import DuckDuckGoSearchRun + +# custom 패키지 import 위해 sys.path에 Parent directory 추가 +parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +sys.path.append(parent_dir) + from vectorstore.opensearch_hybrid import OpenSearchHybridSearch # tool에 필요한 class 초기화 duckduckgo_search = DuckDuckGoSearchRun() -opensearch = OpenSearchHybridSearch() + +opensearch_password = getpass.getpass("Enter your Opensearch password: ") +opensearch = OpenSearchHybridSearch(user="admin", pw=opensearch_password) + @tool(parse_docstring=True) def web_search(query: str): @@ -19,8 +31,9 @@ def web_search(query: str): res = duckduckgo_search.invoke({"query": query}) return [res] + @tool(parse_docstring=True) -def vectorstore_search(query:str, search_type:str="hybrid"): +def vectorstore_search(query: str, search_type: str = "hybrid"): """search OpenSearch vector databsee for documents that are relevant to a given query Args: @@ -28,7 +41,7 @@ def vectorstore_search(query:str, search_type:str="hybrid"): search_type (str, optional): one of three similarity search method: hybrid, BM25, and Cosine similarity search Defaults to "hybrid". Returns: - list[pd.DataFrmae]: list of langchain document + list[pd.DataFrmae]: list of langchain document """ if search_type == "hybrid": result = opensearch.hybrid_search(query) @@ -37,6 +50,6 @@ def vectorstore_search(query:str, search_type:str="hybrid"): elif search_type == "cosine": result = opensearch.cosine_similarity_search(query) - result['text'].tolist() + result["text"].tolist() - return result \ No newline at end of file + return result From 6700f65bb123d608fd3931cc96dbe73d590c3da8 Mon Sep 17 00:00:00 2001 From: seungbeom Date: Fri, 6 Dec 2024 18:01:52 +0900 Subject: [PATCH 3/4] =?UTF-8?q?[fix]vectorstore=20=ED=8C=A8=ED=82=A4?= =?UTF-8?q?=EC=A7=80=20import=20=EB=B2=84=EA=B7=B8=20=ED=95=B4=EA=B2=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/__init__.py | 0 app/langgraph/tools.py | 12 ++++-------- pyproject.toml | 1 + 3 files changed, 5 insertions(+), 8 deletions(-) create mode 100644 app/__init__.py diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/langgraph/tools.py b/app/langgraph/tools.py index e71bc33..d5897f6 100644 --- a/app/langgraph/tools.py +++ b/app/langgraph/tools.py @@ -2,14 +2,9 @@ import os import sys -from langchain_core.tools import tool from langchain_community.tools import DuckDuckGoSearchRun - -# custom 패키지 import 위해 sys.path에 Parent directory 추가 -parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) -sys.path.append(parent_dir) - -from vectorstore.opensearch_hybrid import OpenSearchHybridSearch +from langchain_core.tools import tool +from app.vectorstore.opensearch_hybrid import OpenSearchHybridSearch # tool에 필요한 class 초기화 duckduckgo_search = DuckDuckGoSearchRun() @@ -17,7 +12,6 @@ opensearch_password = getpass.getpass("Enter your Opensearch password: ") opensearch = OpenSearchHybridSearch(user="admin", pw=opensearch_password) - @tool(parse_docstring=True) def web_search(query: str): """A tool to use when websearch is needed. Use this tool when there isn't enough information in OpenSearch vector store Database @@ -53,3 +47,5 @@ def vectorstore_search(query: str, search_type: str = "hybrid"): result["text"].tolist() return result + + diff --git a/pyproject.toml b/pyproject.toml index 0bd949b..6b13b5e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,6 +4,7 @@ version = "0.0.0" description = "ML/DL 관련 지식과 최신 정보관련 대화할 수 있는 챗봇" authors = ["tmdqja75 "] readme = "README.md" +packages = [{ include = "app" }] [tool.poetry.dependencies] python = "^3.10" From 165f27dc6a00e64fbb3f3bed2905f6b556a38fb5 Mon Sep 17 00:00:00 2001 From: tmdqja75 Date: Sun, 8 Dec 2024 15:58:12 +0900 Subject: [PATCH 4/4] =?UTF-8?q?[fix]=20unused=20import=20=EC=A0=9C?= =?UTF-8?q?=EA=B1=B0=20+=20=EC=98=A4=ED=83=80=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/langgraph/tools.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/app/langgraph/tools.py b/app/langgraph/tools.py index d5897f6..f80e958 100644 --- a/app/langgraph/tools.py +++ b/app/langgraph/tools.py @@ -1,10 +1,9 @@ import getpass -import os -import sys + +from app.vectorstore.opensearch_hybrid import OpenSearchHybridSearch from langchain_community.tools import DuckDuckGoSearchRun from langchain_core.tools import tool -from app.vectorstore.opensearch_hybrid import OpenSearchHybridSearch # tool에 필요한 class 초기화 duckduckgo_search = DuckDuckGoSearchRun()