From c9f6962268685a6f387edcadd53964b33ca3d611 Mon Sep 17 00:00:00 2001 From: Roman Porozhnetov Date: Tue, 23 May 2023 14:20:24 +0200 Subject: [PATCH 01/27] bump version to 1.12.0-rc.1 --- .ci/version.txt | 4 ++-- api/dotnet/src/Indigo.Net.csproj | 2 +- api/http/requirements.txt | 2 +- api/http/setup.py | 2 +- api/indigo-version.cmake | 2 +- api/java/pom.xml | 2 +- api/python/indigo/__init__.py | 2 +- api/python/setup.py | 2 +- api/r/DESCRIPTION | 2 +- bingo/bingo-elastic/java/pom.xml | 2 +- bingo/bingo-elastic/python/bingo_elastic/__init__.py | 2 +- bingo/bingo-elastic/python/setup.py | 4 ++-- utils/indigo-ml/setup.py | 2 +- 13 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.ci/version.txt b/.ci/version.txt index 42b54bd619..d4814881ec 100644 --- a/.ci/version.txt +++ b/.ci/version.txt @@ -1,5 +1,5 @@ 1.12.0 -dev +rc 1 -a7a7aef1e0ff9dc8a304b501605b8c7d4e72406f80732a575008e17c3736c162 \ No newline at end of file +f4cafd678446f6ceccbc1cd8a80f48e385ffa73a319797f5d6fbdea8d28023c7 \ No newline at end of file diff --git a/api/dotnet/src/Indigo.Net.csproj b/api/dotnet/src/Indigo.Net.csproj index cfab910d0a..5b6b94221e 100644 --- a/api/dotnet/src/Indigo.Net.csproj +++ b/api/dotnet/src/Indigo.Net.csproj @@ -2,7 +2,7 @@ netstandard2.0 - 1.12.0-dev.1 + 1.12.0-rc.1 EPAM Systems Life Science Department EPAM Systems Universal cheminformatics toolkit diff --git a/api/http/requirements.txt b/api/http/requirements.txt index a5730a0e82..a47affd084 100644 --- a/api/http/requirements.txt +++ b/api/http/requirements.txt @@ -1,7 +1,7 @@ anyio==3.3.4 asgiref==3.4.1 click==8.0.3 -epam.indigo==1.12.0.dev1 +epam.indigo==1.12.0.rc1 fastapi==0.70.0 h11==0.12.0 idna==3.3 diff --git a/api/http/setup.py b/api/http/setup.py index 26ea9c3b53..e5bf6eafde 100644 --- a/api/http/setup.py +++ b/api/http/setup.py @@ -19,7 +19,7 @@ setup_kwargs = { "name": "indigo-service", - "version": "1.12.0.dev1", + "version": "1.12.0.rc1", "description": "", "long_description": None, "author": "Ruslan Khyurri", diff --git a/api/indigo-version.cmake b/api/indigo-version.cmake index 0bc8936680..d660dc15fb 100644 --- a/api/indigo-version.cmake +++ b/api/indigo-version.cmake @@ -1,4 +1,4 @@ -set(INDIGO_DEFAULT_VERSION "1.12.0-dev.1") +set(INDIGO_DEFAULT_VERSION "1.12.0-rc.1") set(INDIGO_MAX_REVISION 1000) find_package(Git) if(GIT_EXECUTABLE) diff --git a/api/java/pom.xml b/api/java/pom.xml index 9a7c023c81..5156f4e384 100644 --- a/api/java/pom.xml +++ b/api/java/pom.xml @@ -17,7 +17,7 @@ UTF-8 1.8 ${maven.compiler.source} - 1.12.0-dev.1 + 1.12.0-rc.1 diff --git a/api/python/indigo/__init__.py b/api/python/indigo/__init__.py index e07740263a..985378a401 100644 --- a/api/python/indigo/__init__.py +++ b/api/python/indigo/__init__.py @@ -3,4 +3,4 @@ from .indigo.indigo_exception import IndigoException # noqa from .indigo.indigo_object import IndigoObject # noqa -__version__ = "1.12.0.dev1" +__version__ = "1.12.0.rc1" diff --git a/api/python/setup.py b/api/python/setup.py index 27c0390ccd..a26c027c94 100644 --- a/api/python/setup.py +++ b/api/python/setup.py @@ -98,7 +98,7 @@ setup( name="epam.indigo", - version="1.12.0.dev1", + version="1.12.0.rc1", description="Indigo universal cheminformatics toolkit", author="EPAM Systems Life Science Department", author_email="lifescience.opensource@epam.com", diff --git a/api/r/DESCRIPTION b/api/r/DESCRIPTION index 6f1419f744..aed023e54a 100644 --- a/api/r/DESCRIPTION +++ b/api/r/DESCRIPTION @@ -1,5 +1,5 @@ Package: rindigo -Version: 1.12.0-dev.1 +Version: 1.12.0-rc.1 Date: 2020-12-10 Title: R Package for Indigo Toolkit Authors@R: c(person(given = "Mikhail", family = "Kviatkovskii", role = c("cre"), diff --git a/bingo/bingo-elastic/java/pom.xml b/bingo/bingo-elastic/java/pom.xml index a92c8d979f..cd0c264b9a 100644 --- a/bingo/bingo-elastic/java/pom.xml +++ b/bingo/bingo-elastic/java/pom.xml @@ -2,7 +2,7 @@ 4.0.0 com.epam.indigo bingo-elastic - 1.12.0-dev.1 + 1.12.0-rc.1 jar Bingo Elastic diff --git a/bingo/bingo-elastic/python/bingo_elastic/__init__.py b/bingo/bingo-elastic/python/bingo_elastic/__init__.py index 059c5c7050..1ecff31e3f 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/__init__.py +++ b/bingo/bingo-elastic/python/bingo_elastic/__init__.py @@ -1 +1 @@ -__version__ = "1.12.0.dev1" +__version__ = "1.12.0.rc1" diff --git a/bingo/bingo-elastic/python/setup.py b/bingo/bingo-elastic/python/setup.py index acb48d10c6..ea74b8edab 100644 --- a/bingo/bingo-elastic/python/setup.py +++ b/bingo/bingo-elastic/python/setup.py @@ -21,7 +21,7 @@ setup( name="bingo_elastic", - version="1.12.0.dev1", + version="1.12.0.rc1", description="Cartridge that provides fast, scalable, and efficient storage and searching solution for chemical information using Elasticsearch", author="Ruslan Khyurri", author_email="ruslan_khyurri@epam.com", @@ -35,7 +35,7 @@ download_url="https://pypi.org/project/bingo_elastic", python_requires=">=3.7", packages=["bingo_elastic", "bingo_elastic.model"], - install_requires=["epam.indigo==1.12.0.dev1", "elasticsearch==7.16.2"], + install_requires=["epam.indigo==1.12.0.rc1", "elasticsearch==7.16.2"], extras_require={ "async": ["elasticsearch[async]==7.16.2"], "dev": [ diff --git a/utils/indigo-ml/setup.py b/utils/indigo-ml/setup.py index 41a330b8f0..340bb1ecf4 100644 --- a/utils/indigo-ml/setup.py +++ b/utils/indigo-ml/setup.py @@ -95,7 +95,7 @@ setup( name="epam.indigo", - version="1.12.0.dev1", + version="1.12.0.rc1", description="Indigo universal cheminformatics toolkit", author="EPAM Systems Life Science Department", author_email="lifescience.opensource@epam.com", From 25f4e6956177d53c7caf9be175dc06214c30212d Mon Sep 17 00:00:00 2001 From: even1024 Date: Wed, 31 May 2023 08:17:14 +0200 Subject: [PATCH 02/27] #1144 Unable to save the structure after clicking 'Save', an error appears (#1147) --- .../ref/formats/ket_bad_stereo.py.out | 2 + .../tests/formats/ket_bad_stereo.py | 32 +++++ .../tests/formats/reactions/bad_stereo.ket | 125 ++++++++++++++++++ core/indigo-core/CMakeLists.txt | 4 +- core/indigo-core/common/base_cpp/output.h | 2 + core/indigo-core/molecule/metadata_storage.h | 2 + .../reaction/src/reaction_json_loader.cpp | 10 +- 7 files changed, 170 insertions(+), 7 deletions(-) create mode 100644 api/tests/integration/ref/formats/ket_bad_stereo.py.out create mode 100644 api/tests/integration/tests/formats/ket_bad_stereo.py create mode 100644 api/tests/integration/tests/formats/reactions/bad_stereo.ket diff --git a/api/tests/integration/ref/formats/ket_bad_stereo.py.out b/api/tests/integration/ref/formats/ket_bad_stereo.py.out new file mode 100644 index 0000000000..c7640fce94 --- /dev/null +++ b/api/tests/integration/ref/formats/ket_bad_stereo.py.out @@ -0,0 +1,2 @@ +molecule json loader: stereo type specified for atom #0, but the bond directions does not say that it is a stereocenter +C(C)CCC>>C diff --git a/api/tests/integration/tests/formats/ket_bad_stereo.py b/api/tests/integration/tests/formats/ket_bad_stereo.py new file mode 100644 index 0000000000..b4951a3282 --- /dev/null +++ b/api/tests/integration/tests/formats/ket_bad_stereo.py @@ -0,0 +1,32 @@ +import difflib +import os +import sys + + +def find_diff(a, b): + return "\n".join(difflib.unified_diff(a.splitlines(), b.splitlines())) + + +sys.path.append( + os.path.normpath( + os.path.join(os.path.abspath(__file__), "..", "..", "..", "common") + ) +) +from env_indigo import * # noqa + +indigo = Indigo() + +root_rea = joinPathPy("reactions/", __file__) + +try: + reaction = indigo.loadReactionFromFile( + os.path.join(root_rea, "bad_stereo.ket") + ) +except IndigoException as e: + print(getIndigoExceptionText(e)) + +indigo.setOption("ignore-stereochemistry-errors", "1") +reaction = indigo.loadReactionFromFile( + os.path.join(root_rea, "bad_stereo.ket") +) +print(reaction.smiles()) diff --git a/api/tests/integration/tests/formats/reactions/bad_stereo.ket b/api/tests/integration/tests/formats/reactions/bad_stereo.ket new file mode 100644 index 0000000000..7b14f16a85 --- /dev/null +++ b/api/tests/integration/tests/formats/reactions/bad_stereo.ket @@ -0,0 +1,125 @@ +{ + "root": { + "nodes": [ + { + "$ref": "mol0" + }, + { + "$ref": "mol1" + }, + { + "type": "arrow", + "data": { + "mode": "open-angle", + "pos": [ + { + "x": 7.050000000000001, + "y": -3.85, + "z": 0 + }, + { + "x": 9.9, + "y": -3.825, + "z": 0 + } + ] + } + } + ] + }, + "mol0": { + "type": "molecule", + "atoms": [ + { + "label": "C", + "location": [ + 2.441025403784439, + -4.25, + 0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 3.307050807568878, + -3.75, + 0 + ] + }, + { + "label": "C", + "location": [ + 4.173076211353316, + -4.25, + 0 + ] + }, + { + "label": "C", + "location": [ + 5.039101615137755, + -3.75, + 0 + ] + }, + { + "label": "C", + "location": [ + 1.5750000000000002, + -3.75, + 0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 0 + ] + } + ] + }, + "mol1": { + "type": "molecule", + "atoms": [ + { + "label": "C", + "location": [ + 10.864101615137756, + -3.85, + 0 + ] + } + ], + "stereoFlagPosition": { + "x": 10.864101615137756, + "y": 2.85, + "z": 0 + } + } +} \ No newline at end of file diff --git a/core/indigo-core/CMakeLists.txt b/core/indigo-core/CMakeLists.txt index db8b657102..b811d829f7 100644 --- a/core/indigo-core/CMakeLists.txt +++ b/core/indigo-core/CMakeLists.txt @@ -13,9 +13,9 @@ file(GLOB ${PROJECT_NAME}_SOURCES CONFIUGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/layout/src/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/reaction/src/*.cpp) if (MSVC OR MINGW) - list(FILTER ${PROJECT_NAME}_SOURCES EXCLUDE REGEX .*posix\.cp*$) + list(FILTER ${PROJECT_NAME}_SOURCES EXCLUDE REGEX .*posix\\.cp*$) else() - list(FILTER ${PROJECT_NAME}_SOURCES EXCLUDE REGEX .*win32\.cp*$) + list(FILTER ${PROJECT_NAME}_SOURCES EXCLUDE REGEX .*win32\\.cp*$) endif() add_library(${PROJECT_NAME} STATIC ${${PROJECT_NAME}_SOURCES}) diff --git a/core/indigo-core/common/base_cpp/output.h b/core/indigo-core/common/base_cpp/output.h index 65a3f0c233..2ae6679a0a 100644 --- a/core/indigo-core/common/base_cpp/output.h +++ b/core/indigo-core/common/base_cpp/output.h @@ -18,6 +18,8 @@ #pragma once +#include + #include "base_c/defs.h" #include "base_cpp/array.h" #include "base_cpp/exception.h" diff --git a/core/indigo-core/molecule/metadata_storage.h b/core/indigo-core/molecule/metadata_storage.h index e1470e1e6a..4046d42aba 100644 --- a/core/indigo-core/molecule/metadata_storage.h +++ b/core/indigo-core/molecule/metadata_storage.h @@ -18,6 +18,8 @@ #ifndef __metadata_storage__ #define __metadata_storage__ +#include + #include "base_cpp/ptr_array.h" namespace indigo diff --git a/core/indigo-core/reaction/src/reaction_json_loader.cpp b/core/indigo-core/reaction/src/reaction_json_loader.cpp index a0fc0c9979..ad132fe59c 100644 --- a/core/indigo-core/reaction/src/reaction_json_loader.cpp +++ b/core/indigo-core/reaction/src/reaction_json_loader.cpp @@ -37,11 +37,6 @@ ReactionJsonLoader::ReactionJsonLoader(Document& ket) : _loader(ket), _molecule(kArrayType), _prxn(nullptr), _pqrxn(nullptr), ignore_noncritical_query_features(false) { ignore_bad_valence = false; - - _loader.stereochemistry_options = stereochemistry_options; - _loader.ignore_noncritical_query_features = ignore_noncritical_query_features; - _loader.treat_x_as_pseudoatom = treat_x_as_pseudoatom; - _loader.ignore_no_chiral_flag = ignore_no_chiral_flag; } ReactionJsonLoader::~ReactionJsonLoader() @@ -50,6 +45,11 @@ ReactionJsonLoader::~ReactionJsonLoader() void ReactionJsonLoader::loadReaction(BaseReaction& rxn) { + _loader.stereochemistry_options = stereochemistry_options; + _loader.ignore_noncritical_query_features = ignore_noncritical_query_features; + _loader.treat_x_as_pseudoatom = treat_x_as_pseudoatom; + _loader.ignore_no_chiral_flag = ignore_no_chiral_flag; + if (rxn.isQueryReaction()) _pqrxn = &rxn.asQueryReaction(); else From 836a09af5d27fdd065946b4a65317e46d60ee4e3 Mon Sep 17 00:00:00 2001 From: Roman Porozhnetov Date: Fri, 2 Jun 2023 05:40:55 +0200 Subject: [PATCH 03/27] bump version to 1.12.0-rc.2 --- .ci/version.txt | 4 ++-- api/dotnet/src/Indigo.Net.csproj | 2 +- api/http/requirements.txt | 2 +- api/http/setup.py | 2 +- api/indigo-version.cmake | 2 +- api/java/pom.xml | 2 +- api/python/indigo/__init__.py | 2 +- api/python/setup.py | 2 +- api/r/DESCRIPTION | 2 +- api/wasm/indigo-ketcher/package.json | 2 +- bingo/bingo-elastic/java/pom.xml | 2 +- bingo/bingo-elastic/python/bingo_elastic/__init__.py | 2 +- bingo/bingo-elastic/python/setup.py | 4 ++-- utils/indigo-ml/setup.py | 2 +- 14 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.ci/version.txt b/.ci/version.txt index d4814881ec..0d8217da3d 100644 --- a/.ci/version.txt +++ b/.ci/version.txt @@ -1,5 +1,5 @@ 1.12.0 rc -1 +2 -f4cafd678446f6ceccbc1cd8a80f48e385ffa73a319797f5d6fbdea8d28023c7 \ No newline at end of file +f6dc659d9b1e2632dd1cff3f19d00ca7d1d41ea6751c471e980d7b3dfaa90a1c \ No newline at end of file diff --git a/api/dotnet/src/Indigo.Net.csproj b/api/dotnet/src/Indigo.Net.csproj index 5b6b94221e..b63ca645cf 100644 --- a/api/dotnet/src/Indigo.Net.csproj +++ b/api/dotnet/src/Indigo.Net.csproj @@ -2,7 +2,7 @@ netstandard2.0 - 1.12.0-rc.1 + 1.12.0-rc.2 EPAM Systems Life Science Department EPAM Systems Universal cheminformatics toolkit diff --git a/api/http/requirements.txt b/api/http/requirements.txt index a47affd084..9bf6071805 100644 --- a/api/http/requirements.txt +++ b/api/http/requirements.txt @@ -1,7 +1,7 @@ anyio==3.3.4 asgiref==3.4.1 click==8.0.3 -epam.indigo==1.12.0.rc1 +epam.indigo==1.12.0.rc2 fastapi==0.70.0 h11==0.12.0 idna==3.3 diff --git a/api/http/setup.py b/api/http/setup.py index e5bf6eafde..d3cf4c39b3 100644 --- a/api/http/setup.py +++ b/api/http/setup.py @@ -19,7 +19,7 @@ setup_kwargs = { "name": "indigo-service", - "version": "1.12.0.rc1", + "version": "1.12.0.rc2", "description": "", "long_description": None, "author": "Ruslan Khyurri", diff --git a/api/indigo-version.cmake b/api/indigo-version.cmake index d660dc15fb..f958bde4fc 100644 --- a/api/indigo-version.cmake +++ b/api/indigo-version.cmake @@ -1,4 +1,4 @@ -set(INDIGO_DEFAULT_VERSION "1.12.0-rc.1") +set(INDIGO_DEFAULT_VERSION "1.12.0-rc.2") set(INDIGO_MAX_REVISION 1000) find_package(Git) if(GIT_EXECUTABLE) diff --git a/api/java/pom.xml b/api/java/pom.xml index 5156f4e384..28a5c1daeb 100644 --- a/api/java/pom.xml +++ b/api/java/pom.xml @@ -17,7 +17,7 @@ UTF-8 1.8 ${maven.compiler.source} - 1.12.0-rc.1 + 1.12.0-rc.2 diff --git a/api/python/indigo/__init__.py b/api/python/indigo/__init__.py index 985378a401..0c391dead0 100644 --- a/api/python/indigo/__init__.py +++ b/api/python/indigo/__init__.py @@ -3,4 +3,4 @@ from .indigo.indigo_exception import IndigoException # noqa from .indigo.indigo_object import IndigoObject # noqa -__version__ = "1.12.0.rc1" +__version__ = "1.12.0.rc2" diff --git a/api/python/setup.py b/api/python/setup.py index a26c027c94..81a3b6c8ac 100644 --- a/api/python/setup.py +++ b/api/python/setup.py @@ -98,7 +98,7 @@ setup( name="epam.indigo", - version="1.12.0.rc1", + version="1.12.0.rc2", description="Indigo universal cheminformatics toolkit", author="EPAM Systems Life Science Department", author_email="lifescience.opensource@epam.com", diff --git a/api/r/DESCRIPTION b/api/r/DESCRIPTION index aed023e54a..aa5915feb1 100644 --- a/api/r/DESCRIPTION +++ b/api/r/DESCRIPTION @@ -1,5 +1,5 @@ Package: rindigo -Version: 1.12.0-rc.1 +Version: 1.12.0-rc.2 Date: 2020-12-10 Title: R Package for Indigo Toolkit Authors@R: c(person(given = "Mikhail", family = "Kviatkovskii", role = c("cre"), diff --git a/api/wasm/indigo-ketcher/package.json b/api/wasm/indigo-ketcher/package.json index e0a0a1d2b9..c9cafa2e57 100644 --- a/api/wasm/indigo-ketcher/package.json +++ b/api/wasm/indigo-ketcher/package.json @@ -1,6 +1,6 @@ { "name": "indigo-ketcher", - "version": "1.9.0-rc.1-29", + "version": "1.12.0-rc.2", "description": "WASM implementation of Indigo organic chemistry toolkit web service for Ketcher", "main": "indigo-ketcher.js", "files": [ diff --git a/bingo/bingo-elastic/java/pom.xml b/bingo/bingo-elastic/java/pom.xml index cd0c264b9a..96000fa6cf 100644 --- a/bingo/bingo-elastic/java/pom.xml +++ b/bingo/bingo-elastic/java/pom.xml @@ -2,7 +2,7 @@ 4.0.0 com.epam.indigo bingo-elastic - 1.12.0-rc.1 + 1.12.0-rc.2 jar Bingo Elastic diff --git a/bingo/bingo-elastic/python/bingo_elastic/__init__.py b/bingo/bingo-elastic/python/bingo_elastic/__init__.py index 1ecff31e3f..18d4424547 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/__init__.py +++ b/bingo/bingo-elastic/python/bingo_elastic/__init__.py @@ -1 +1 @@ -__version__ = "1.12.0.rc1" +__version__ = "1.12.0.rc2" diff --git a/bingo/bingo-elastic/python/setup.py b/bingo/bingo-elastic/python/setup.py index ea74b8edab..1b8bb9b1cf 100644 --- a/bingo/bingo-elastic/python/setup.py +++ b/bingo/bingo-elastic/python/setup.py @@ -21,7 +21,7 @@ setup( name="bingo_elastic", - version="1.12.0.rc1", + version="1.12.0.rc2", description="Cartridge that provides fast, scalable, and efficient storage and searching solution for chemical information using Elasticsearch", author="Ruslan Khyurri", author_email="ruslan_khyurri@epam.com", @@ -35,7 +35,7 @@ download_url="https://pypi.org/project/bingo_elastic", python_requires=">=3.7", packages=["bingo_elastic", "bingo_elastic.model"], - install_requires=["epam.indigo==1.12.0.rc1", "elasticsearch==7.16.2"], + install_requires=["epam.indigo==1.12.0.rc2", "elasticsearch==7.16.2"], extras_require={ "async": ["elasticsearch[async]==7.16.2"], "dev": [ diff --git a/utils/indigo-ml/setup.py b/utils/indigo-ml/setup.py index 340bb1ecf4..febb6bfe60 100644 --- a/utils/indigo-ml/setup.py +++ b/utils/indigo-ml/setup.py @@ -95,7 +95,7 @@ setup( name="epam.indigo", - version="1.12.0.rc1", + version="1.12.0.rc2", description="Indigo universal cheminformatics toolkit", author="EPAM Systems Life Science Department", author_email="lifescience.opensource@epam.com", From a4d536cb0e6e09a9184353223afc60eac9fb5dd2 Mon Sep 17 00:00:00 2001 From: Roman Porozhnetov Date: Sun, 9 Jul 2023 22:30:02 +0200 Subject: [PATCH 04/27] bump to Indigo 1.12.0 --- .ci/version.txt | 6 +++--- api/dotnet/src/Indigo.Net.csproj | 2 +- api/http/requirements.txt | 2 +- api/http/setup.py | 2 +- api/indigo-version.cmake | 2 +- api/java/pom.xml | 2 +- api/python/indigo/__init__.py | 2 +- api/python/setup.py | 2 +- api/r/DESCRIPTION | 2 +- api/wasm/indigo-ketcher/package.json | 2 +- bingo/bingo-elastic/java/pom.xml | 2 +- bingo/bingo-elastic/python/bingo_elastic/__init__.py | 2 +- bingo/bingo-elastic/python/setup.py | 4 ++-- utils/indigo-ml/setup.py | 2 +- 14 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.ci/version.txt b/.ci/version.txt index 0d8217da3d..18df25124e 100644 --- a/.ci/version.txt +++ b/.ci/version.txt @@ -1,5 +1,5 @@ 1.12.0 -rc -2 -f6dc659d9b1e2632dd1cff3f19d00ca7d1d41ea6751c471e980d7b3dfaa90a1c \ No newline at end of file + + +c92df6bdde330be219d62f48ec6490b01889436f2f59027aab200e74de619168 \ No newline at end of file diff --git a/api/dotnet/src/Indigo.Net.csproj b/api/dotnet/src/Indigo.Net.csproj index b63ca645cf..8e7b56970d 100644 --- a/api/dotnet/src/Indigo.Net.csproj +++ b/api/dotnet/src/Indigo.Net.csproj @@ -2,7 +2,7 @@ netstandard2.0 - 1.12.0-rc.2 + 1.12.0 EPAM Systems Life Science Department EPAM Systems Universal cheminformatics toolkit diff --git a/api/http/requirements.txt b/api/http/requirements.txt index 9bf6071805..a509b70331 100644 --- a/api/http/requirements.txt +++ b/api/http/requirements.txt @@ -1,7 +1,7 @@ anyio==3.3.4 asgiref==3.4.1 click==8.0.3 -epam.indigo==1.12.0.rc2 +epam.indigo==1.12.0 fastapi==0.70.0 h11==0.12.0 idna==3.3 diff --git a/api/http/setup.py b/api/http/setup.py index d3cf4c39b3..2058604a27 100644 --- a/api/http/setup.py +++ b/api/http/setup.py @@ -19,7 +19,7 @@ setup_kwargs = { "name": "indigo-service", - "version": "1.12.0.rc2", + "version": "1.12.0", "description": "", "long_description": None, "author": "Ruslan Khyurri", diff --git a/api/indigo-version.cmake b/api/indigo-version.cmake index f958bde4fc..592debe008 100644 --- a/api/indigo-version.cmake +++ b/api/indigo-version.cmake @@ -1,4 +1,4 @@ -set(INDIGO_DEFAULT_VERSION "1.12.0-rc.2") +set(INDIGO_DEFAULT_VERSION "1.12.0") set(INDIGO_MAX_REVISION 1000) find_package(Git) if(GIT_EXECUTABLE) diff --git a/api/java/pom.xml b/api/java/pom.xml index 28a5c1daeb..4e87556f76 100644 --- a/api/java/pom.xml +++ b/api/java/pom.xml @@ -17,7 +17,7 @@ UTF-8 1.8 ${maven.compiler.source} - 1.12.0-rc.2 + 1.12.0 diff --git a/api/python/indigo/__init__.py b/api/python/indigo/__init__.py index 0c391dead0..14490264dc 100644 --- a/api/python/indigo/__init__.py +++ b/api/python/indigo/__init__.py @@ -3,4 +3,4 @@ from .indigo.indigo_exception import IndigoException # noqa from .indigo.indigo_object import IndigoObject # noqa -__version__ = "1.12.0.rc2" +__version__ = "1.12.0" diff --git a/api/python/setup.py b/api/python/setup.py index 81a3b6c8ac..d54bc2b403 100644 --- a/api/python/setup.py +++ b/api/python/setup.py @@ -98,7 +98,7 @@ setup( name="epam.indigo", - version="1.12.0.rc2", + version="1.12.0", description="Indigo universal cheminformatics toolkit", author="EPAM Systems Life Science Department", author_email="lifescience.opensource@epam.com", diff --git a/api/r/DESCRIPTION b/api/r/DESCRIPTION index aa5915feb1..5f7cb81421 100644 --- a/api/r/DESCRIPTION +++ b/api/r/DESCRIPTION @@ -1,5 +1,5 @@ Package: rindigo -Version: 1.12.0-rc.2 +Version: 1.12.0 Date: 2020-12-10 Title: R Package for Indigo Toolkit Authors@R: c(person(given = "Mikhail", family = "Kviatkovskii", role = c("cre"), diff --git a/api/wasm/indigo-ketcher/package.json b/api/wasm/indigo-ketcher/package.json index c9cafa2e57..27b0007fc3 100644 --- a/api/wasm/indigo-ketcher/package.json +++ b/api/wasm/indigo-ketcher/package.json @@ -1,6 +1,6 @@ { "name": "indigo-ketcher", - "version": "1.12.0-rc.2", + "version": "1.12.0", "description": "WASM implementation of Indigo organic chemistry toolkit web service for Ketcher", "main": "indigo-ketcher.js", "files": [ diff --git a/bingo/bingo-elastic/java/pom.xml b/bingo/bingo-elastic/java/pom.xml index 96000fa6cf..153e69d437 100644 --- a/bingo/bingo-elastic/java/pom.xml +++ b/bingo/bingo-elastic/java/pom.xml @@ -2,7 +2,7 @@ 4.0.0 com.epam.indigo bingo-elastic - 1.12.0-rc.2 + 1.12.0 jar Bingo Elastic diff --git a/bingo/bingo-elastic/python/bingo_elastic/__init__.py b/bingo/bingo-elastic/python/bingo_elastic/__init__.py index 18d4424547..b518f6eed0 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/__init__.py +++ b/bingo/bingo-elastic/python/bingo_elastic/__init__.py @@ -1 +1 @@ -__version__ = "1.12.0.rc2" +__version__ = "1.12.0" diff --git a/bingo/bingo-elastic/python/setup.py b/bingo/bingo-elastic/python/setup.py index 1b8bb9b1cf..2957b2a24d 100644 --- a/bingo/bingo-elastic/python/setup.py +++ b/bingo/bingo-elastic/python/setup.py @@ -21,7 +21,7 @@ setup( name="bingo_elastic", - version="1.12.0.rc2", + version="1.12.0", description="Cartridge that provides fast, scalable, and efficient storage and searching solution for chemical information using Elasticsearch", author="Ruslan Khyurri", author_email="ruslan_khyurri@epam.com", @@ -35,7 +35,7 @@ download_url="https://pypi.org/project/bingo_elastic", python_requires=">=3.7", packages=["bingo_elastic", "bingo_elastic.model"], - install_requires=["epam.indigo==1.12.0.rc2", "elasticsearch==7.16.2"], + install_requires=["epam.indigo==1.12.0", "elasticsearch==7.16.2"], extras_require={ "async": ["elasticsearch[async]==7.16.2"], "dev": [ diff --git a/utils/indigo-ml/setup.py b/utils/indigo-ml/setup.py index febb6bfe60..7c851d7278 100644 --- a/utils/indigo-ml/setup.py +++ b/utils/indigo-ml/setup.py @@ -95,7 +95,7 @@ setup( name="epam.indigo", - version="1.12.0.rc2", + version="1.12.0", description="Indigo universal cheminformatics toolkit", author="EPAM Systems Life Science Department", author_email="lifescience.opensource@epam.com", From 4634054d204819ad7276aeaf7acde9707ac8ee5a Mon Sep 17 00:00:00 2001 From: Roman Porozhnetov Date: Sun, 9 Jul 2023 23:26:40 +0200 Subject: [PATCH 05/27] CI & changelog fix --- .github/workflows/indigo-ci.yaml | 6 ++-- CHANGELOG.md | 56 ++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/.github/workflows/indigo-ci.yaml b/.github/workflows/indigo-ci.yaml index 1a079d1f1e..76d06eda52 100644 --- a/.github/workflows/indigo-ci.yaml +++ b/.github/workflows/indigo-ci.yaml @@ -267,7 +267,7 @@ jobs: name: indigo-python path: dist/ - name: Install Pillow - run: python3 -m pip install Pillow + run: python3 -m pip install Pillow==9.5.0 - name: Install wrappers Linux if: ${{ matrix.os == 'ubuntu' }} run: python3 -m pip install dist/*manylinux1_x86_64*.whl @@ -322,7 +322,7 @@ jobs: run: | apt update apt install -y --no-install-recommends python3 python3-pip - python3 -m pip install Pillow + python3 -m pip install Pillow==9.5.0 python3 -m pip install dist/*manylinux2014_aarch64*.whl python3 api/tests/integration/test.py -p basic/basic.py -t 1 -j junit_report.xml - name: Publish Test Report @@ -364,7 +364,7 @@ jobs: python-version: 3.7 architecture: x86 - name: Install Pillow - run: python -m pip install Pillow + run: python -m pip install Pillow==9.5.0 - name: Install wrappers Windows if: ${{ matrix.os == 'windows' }} run: Get-ChildItem dist -Filter *win32*.whl -Recurse | % { python -m pip install $_.FullName } diff --git a/CHANGELOG.md b/CHANGELOG.md index b72ea58beb..c2b9c5e42e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,59 @@ +# Indigo 1.12.0 + +## What's Changed + +## Bugfixes +* #965 MDL Molfile v3000: when opening files containing 'Salts an Solvents', names are truncated and abbreviation is expanded +* #1036 SMILES import: general chiral specification labels (TH, AL, SP, TB, OH ) don't work +* #1051 Opening file with a superatom label saved in RXN v3000 format only the first part of the label is displayed +* #1114 Atoms of Benzene ring become Monoradicals when opened from file saved in Daylight SMARTS +* #1132 SMILES loader uninitialized heap fix +* #1102 When pasting Extended SMILES structure with stereochemistry there are two &1 centers instead of an ABS and an &1 +* #1135 C library macro - va_end() is missing before return statement. +* #1126 Segfault when iterating CDX file from USPTO downloads +* #1144 Unable to save the structure after clicking 'Save', an error appears + +## Improvements +* #1098 api: add method for copying RGroups + +**Full Changelog**: https://github.com/epam/Indigo/compare/indigo-1.11.0...indigo-1.12.0 + + +# Indigo 1.11.0 +Released 2023-06-07 + +## What's Changed + +## Features +* #1053 Split publish job in "Indigo CI" GitHub Action +* #310 Support stereo CIP calculation in Ket format +* #957 Support of Korean, Chinese and Japanese characters in Standalone. +* #995 Automated memory leaks testing + +## Bugfixes +* #1044 SVG/PNG: Reaction arrows are not visible without structures at preview and in saved files +* #932 Reagents: When opening Daylight SMILES and Extended SMILES files with reagent the original structure is distorted +* #1084 Can't open mol v3000 files with 'S-Group Properties Type = Generic' and 'S-Group Properties Type = Multiple' +* #1083 Indigo Service: enable of using Indigo Options +* #910 MDL Molfile v3000 encoding: Automatic selection of MDL Molfile v3000 encoding doesn't work if the number of atoms (or bonds) exceeds 999 +* #956 Copy Image: When inversion type is chosen in the atom's properties, it is not saved +* #955 Copy Image: Saved bonds does not have Reacting Center marks +* #1052 Set "Indigo Docker images preparation" GItHub Action to start manually only add version tag to Docker images +* #1064 Keep implicit hydrogens information in KET-format +* #1048 Memory leak in 3rd party library +* #1056 RXN2000/3000 should not serialize INDIGO_DESC fields for s-groups +* #1050 Memory leak in StringPool code +* #1031 Calculate CIP: Hovering over the label R/S displays Indigo system information +* #1049 Memory leak in the SMILES loader code +* #973 Daylight SMARTS: Error when save file in SMART format with reaction arrow and reagent +* #1017 imagoVersions is undefined +* #899 Add restrictions on size to be less than 1000 +* #1015 Cannot test CDX export with certain files +* #944 CDX import: Greek letters, Celsius and Fahrenheit signs are replaced with question marks +* #1093 python binding memory leak from 1.8.0 (and still present in 1.10.0) + +**Full Changelog**: https://github.com/epam/Indigo/compare/indigo-1.10.0...indigo-1.11.0 + # Indigo 1.10.0 Released 2023-03-22 From 206cd57898fc68857181ab6373f6846460ad32f2 Mon Sep 17 00:00:00 2001 From: even1024 Date: Mon, 10 Jul 2023 01:28:31 +0400 Subject: [PATCH 06/27] Update CHANGELOG.md --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2b9c5e42e..faa07fd1a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ # Indigo 1.12.0 +Released 2023-07-09 ## What's Changed @@ -99,7 +100,7 @@ Released 2023-01-31 * Improve ssl bingo elastic by @MysterionRise in #901 * bingo: postgres: add support for Postgres 15, drop support for Postgres 10 by @mkviatkovskii in #903 * #521: core: replace MultiMap in MoleculeRGroupsComposition class by @loimu in #917 -* #521: core: replace MultiMap in MolfileLoader class by @loimu in #911  +* #521: core: replace MultiMap in MolfileLoader class by @loimu in #911  * #929: fix auto-saving to CTAB v3000 by @mkviatkovskii in #931 **Full Changelog**: https://github.com/epam/Indigo/compare/indigo-1.8.0...indigo-1.9.0 From 91d2d19e1d6c06d1be6d41ebed3372f90003dbb3 Mon Sep 17 00:00:00 2001 From: Roman Porozhnetov Date: Mon, 10 Jul 2023 00:24:47 +0200 Subject: [PATCH 07/27] tzdata --- utils/indigo-service/backend/service/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/indigo-service/backend/service/requirements.txt b/utils/indigo-service/backend/service/requirements.txt index 73fc0bd917..694d00dcf2 100644 --- a/utils/indigo-service/backend/service/requirements.txt +++ b/utils/indigo-service/backend/service/requirements.txt @@ -9,3 +9,4 @@ pyparsing==3.0 psycopg2-binary redis==4.5.4 SQLAlchemy==2.0 +tzdata \ No newline at end of file From 75902f3d2a5726cf63315e770ecd7caaa6327165 Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Mon, 24 Jul 2023 18:13:17 -0400 Subject: [PATCH 08/27] change username/password type. add timeout parameter. --- .../python/bingo_elastic/elastic.py | 134 +++++++++--------- 1 file changed, 70 insertions(+), 64 deletions(-) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index 7c0a04213b..3565c2d968 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -51,7 +51,7 @@ def get_index_name(record: IndigoRecord) -> IndexName: def get_record_by_index( - response: Dict, index: str + response: Dict, index: str ) -> Union[IndigoRecordMolecule, IndigoRecordReaction]: if index == IndexName.BINGO_MOLECULE.value: return IndigoRecordMolecule(elastic_response=response) @@ -69,21 +69,23 @@ def elastic_repository_reaction(*args, **kwargs): def get_client( - *, - client_type: Type[ElasticRepositoryT], - host: Union[str, List[str]] = "localhost", - port: int = 9200, - scheme: str = "", - http_auth: Optional[Tuple[str]] = None, - ssl_context: Any = None, - request_timeout: int = 60, - retry_on_timeout: bool = True, + *, + client_type: Type[ElasticRepositoryT], + host: Union[str, List[str]] = "localhost", + port: int = 9200, + scheme: str = "", + http_auth: Optional[List[str]] = None, + ssl_context: Any = None, + request_timeout: int = 60, + timeout: int = 60, + retry_on_timeout: bool = True, ) -> ElasticRepositoryT: arguments = { "port": port, "scheme": "https" if scheme == "https" else "http", "request_timeout": request_timeout, "retry_on_timeout": retry_on_timeout, + "timeout": timeout } if isinstance(host, str): arguments["host"] = host @@ -119,8 +121,8 @@ def check_index_exception(err_: RequestError) -> None: raise err_ cause = err_.info.get("error", {}).get("root_cause", []) if ( - len(cause) == 1 - and cause[0].get("type") == "resource_already_exists_exception" + len(cause) == 1 + and cause[0].get("type") == "resource_already_exists_exception" ): return raise err_ @@ -134,7 +136,7 @@ def create_index(index_name: str, el_client: Elasticsearch) -> None: async def a_create_index( - index_name: str, el_client: "AsyncElasticsearch" + index_name: str, el_client: "AsyncElasticsearch" ) -> None: try: await el_client.indices.create(index=index_name, body=index_body) @@ -143,7 +145,7 @@ async def a_create_index( def prepare( - index_name: str, records: Generator[IndigoRecord, None, None] + index_name: str, records: Generator[IndigoRecord, None, None] ) -> Generator[Dict, None, None]: for record in records: if get_index_name(record).value != index_name: @@ -155,11 +157,11 @@ def prepare( def response_to_records( - res: dict, - index_name: str, - postprocess_actions: PostprocessType = None, - indigo_session: Indigo = None, - options: str = "", + res: dict, + index_name: str, + postprocess_actions: PostprocessType = None, + indigo_session: Indigo = None, + options: str = "", ) -> Generator[IndigoRecord, None, None]: for el_response in res.get("hits", {}).get("hits", []): record = get_record_by_index(el_response, index_name) @@ -172,16 +174,17 @@ def response_to_records( class AsyncElasticRepository: def __init__( - self, - index_name: IndexName, - *, - host: Union[str, List[str]] = "localhost", - port: int = 9200, - scheme: str = "", - http_auth: Optional[Tuple[str]] = None, - ssl_context: Any = None, - request_timeout: int = 60, - retry_on_timeout: bool = True, + self, + index_name: IndexName, + *, + host: Union[str, List[str]] = "localhost", + port: int = 9200, + scheme: str = "", + http_auth: Optional[Tuple[str]] = None, + ssl_context: Any = None, + request_timeout: int = 60, + timeout: int = 60, + retry_on_timeout: bool = True, ) -> None: """ :param index_name: use function get_index_name for setting this argument @@ -203,6 +206,7 @@ def __init__( http_auth=http_auth, ssl_context=ssl_context, request_timeout=request_timeout, + timeout=timeout, retry_on_timeout=retry_on_timeout, ) @@ -216,20 +220,20 @@ async def index_records(self, records: Generator, chunk_size: int = 500): await a_create_index(self.index_name, self.el_client) # pylint: disable=unused-variable async for is_ok, action in async_streaming_bulk( - self.el_client, - prepare(self.index_name, records), - index=self.index_name, - chunk_size=chunk_size, + self.el_client, + prepare(self.index_name, records), + index=self.index_name, + chunk_size=chunk_size, ): pass async def filter( - self, - query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - indigo_session: Indigo = None, - limit: int = 10, - options: str = "", - **kwargs, + self, + query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, + indigo_session: Indigo = None, + limit: int = 10, + options: str = "", + **kwargs, ) -> AsyncGenerator[IndigoRecord, None]: if limit > MAX_ALLOWED_SIZE: @@ -247,7 +251,7 @@ async def filter( ) res = await self.el_client.search(index=self.index_name, body=query) for record in response_to_records( - res, self.index_name, postprocess_actions, indigo_session, options + res, self.index_name, postprocess_actions, indigo_session, options ): yield record @@ -263,16 +267,17 @@ async def __aexit__(self, *args, **kwargs) -> None: class ElasticRepository: def __init__( - self, - index_name: IndexName, - *, - host: Union[str, List[str]] = "localhost", - port: int = 9200, - scheme: str = "", - http_auth: Optional[Tuple[str]] = None, - ssl_context: Any = None, - request_timeout: int = 60, - retry_on_timeout: bool = True, + self, + index_name: IndexName, + *, + host: Union[str, List[str]] = "localhost", + port: int = 9200, + scheme: str = "", + http_auth: Optional[Tuple[str]] = None, + ssl_context: Any = None, + request_timeout: int = 60, + timeout: int = 60, + retry_on_timeout: bool = True, ) -> None: """ :param index_name: use function get_index_name for setting this argument @@ -294,6 +299,7 @@ def __init__( http_auth=http_auth, ssl_context=ssl_context, request_timeout=request_timeout, + timeout=timeout, retry_on_timeout=retry_on_timeout, ) @@ -307,10 +313,10 @@ def index_records(self, records: Generator, chunk_size: int = 500): create_index(self.index_name, self.el_client) # pylint: disable=unused-variable for is_ok, action in streaming_bulk( - self.el_client, - prepare(self.index_name, records), - index=self.index_name, - chunk_size=chunk_size, + self.el_client, + prepare(self.index_name, records), + index=self.index_name, + chunk_size=chunk_size, ): pass @@ -321,12 +327,12 @@ def delete_all_records(self): pass def filter( - self, - query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - indigo_session: Indigo = None, - limit: int = 10, - options: str = "", - **kwargs, + self, + query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, + indigo_session: Indigo = None, + limit: int = 10, + options: str = "", + **kwargs, ) -> Generator[IndigoRecord, None, None]: if limit > MAX_ALLOWED_SIZE: @@ -348,10 +354,10 @@ def filter( def compile_query( - query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - limit: int = 10, - postprocess_actions: PostprocessType = None, - **kwargs, + query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, + limit: int = 10, + postprocess_actions: PostprocessType = None, + **kwargs, ) -> Dict: query = { "size": limit, From a9e4d5a22b579b6c9677195aaf6be1092b557eab Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Tue, 25 Jul 2023 15:48:08 -0400 Subject: [PATCH 09/27] index type becoming a enum that can take in a suffix for various index names. --- .../python/bingo_elastic/elastic.py | 60 ++++++++++--------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index 3565c2d968..a362ac25ff 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -37,35 +37,35 @@ MAX_ALLOWED_SIZE = 1000 -class IndexName(Enum): +class IndexType(Enum): BINGO_MOLECULE = "bingo-molecules" BINGO_REACTION = "bingo-reactions" -def get_index_name(record: IndigoRecord) -> IndexName: +def get_index_type(record: IndigoRecord) -> IndexType: if isinstance(record, IndigoRecordMolecule): - return IndexName.BINGO_MOLECULE + return IndexType.BINGO_MOLECULE if isinstance(record, IndigoRecordReaction): - return IndexName.BINGO_REACTION + return IndexType.BINGO_REACTION raise AttributeError(f"Unknown IndigoRecord type {record}") def get_record_by_index( - response: Dict, index: str + response: Dict, index_type: IndexType ) -> Union[IndigoRecordMolecule, IndigoRecordReaction]: - if index == IndexName.BINGO_MOLECULE.value: + if index_type == IndexType.BINGO_MOLECULE: return IndigoRecordMolecule(elastic_response=response) - if index == IndexName.BINGO_REACTION.value: + if index_type == IndexType.BINGO_REACTION: return IndigoRecordReaction(elastic_response=response) - raise AttributeError(f"Unknown index {index}") + raise AttributeError(f"Unknown index {str(index_type)}") -def elastic_repository_molecule(*args, **kwargs): - return ElasticRepository(IndexName.BINGO_MOLECULE, *args, **kwargs) +def elastic_repository_molecule(index_name: str, *args, **kwargs): + return ElasticRepository(IndexType.BINGO_MOLECULE, index_name, *args, **kwargs) -def elastic_repository_reaction(*args, **kwargs): - return ElasticRepository(IndexName.BINGO_REACTION, *args, **kwargs) +def elastic_repository_reaction(index_name: str, *args, **kwargs): + return ElasticRepository(IndexType.BINGO_REACTION, index_name, *args, **kwargs) def get_client( @@ -145,12 +145,12 @@ async def a_create_index( def prepare( - index_name: str, records: Generator[IndigoRecord, None, None] + index_type: IndexType, records: Generator[IndigoRecord, None, None] ) -> Generator[Dict, None, None]: for record in records: - if get_index_name(record).value != index_name: + if index_type == get_index_type(record): raise ValueError( - f"Index {index_name} doesn't support store value " + f"Index {str(index_type)} doesn't support store value " f"of type {type(record)}" ) yield record.as_dict() @@ -158,13 +158,13 @@ def prepare( def response_to_records( res: dict, - index_name: str, + index_type: IndexType, postprocess_actions: PostprocessType = None, indigo_session: Indigo = None, options: str = "", ) -> Generator[IndigoRecord, None, None]: for el_response in res.get("hits", {}).get("hits", []): - record = get_record_by_index(el_response, index_name) + record = get_record_by_index(el_response, index_type) for action_fn in postprocess_actions: # type: ignore record = action_fn(record, indigo_session, options) # type: ignore if not record: @@ -175,7 +175,8 @@ def response_to_records( class AsyncElasticRepository: def __init__( self, - index_name: IndexName, + index_type: IndexType, + index_name: str, *, host: Union[str, List[str]] = "localhost", port: int = 9200, @@ -187,7 +188,8 @@ def __init__( retry_on_timeout: bool = True, ) -> None: """ - :param index_name: use function get_index_name for setting this argument + :param index_type: use function get_index_name for setting this argument + :param index_name: the name of the index :param host: host or list of hosts :param port: :param scheme: http or https @@ -196,7 +198,8 @@ def __init__( :param timeout: :param retry_on_timeout: """ - self.index_name = index_name.value + self.index_type = index_type + self.index_name = index_type.value + index_name self.el_client = get_client( client_type=AsyncElasticsearch, @@ -221,7 +224,7 @@ async def index_records(self, records: Generator, chunk_size: int = 500): # pylint: disable=unused-variable async for is_ok, action in async_streaming_bulk( self.el_client, - prepare(self.index_name, records), + prepare(self.index_type, records), index=self.index_name, chunk_size=chunk_size, ): @@ -251,7 +254,7 @@ async def filter( ) res = await self.el_client.search(index=self.index_name, body=query) for record in response_to_records( - res, self.index_name, postprocess_actions, indigo_session, options + res, self.index_type, postprocess_actions, indigo_session, options ): yield record @@ -268,7 +271,8 @@ async def __aexit__(self, *args, **kwargs) -> None: class ElasticRepository: def __init__( self, - index_name: IndexName, + index_type: IndexType, + index_name: str, *, host: Union[str, List[str]] = "localhost", port: int = 9200, @@ -280,7 +284,8 @@ def __init__( retry_on_timeout: bool = True, ) -> None: """ - :param index_name: use function get_index_name for setting this argument + :param index_type: use function get_index_name for setting this argument + :param index_name: the name of this index after index type. :param host: host or list of hosts :param port: :param scheme: http or https @@ -289,7 +294,8 @@ def __init__( :param timeout: :param retry_on_timeout: """ - self.index_name = index_name.value + self.index_type = index_type + self.index_name = index_type.value + index_name self.el_client = get_client( client_type=Elasticsearch, @@ -314,7 +320,7 @@ def index_records(self, records: Generator, chunk_size: int = 500): # pylint: disable=unused-variable for is_ok, action in streaming_bulk( self.el_client, - prepare(self.index_name, records), + prepare(self.index_type, records), index=self.index_name, chunk_size=chunk_size, ): @@ -349,7 +355,7 @@ def filter( ) res = self.el_client.search(index=self.index_name, body=query) yield from response_to_records( - res, self.index_name, postprocess_actions, indigo_session, options + res, self.index_type, postprocess_actions, indigo_session, options ) From f59b8389f6c98800280581d9aec0bce05e1f6879 Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Tue, 25 Jul 2023 16:19:10 -0400 Subject: [PATCH 10/27] index type becoming a enum that can take in a suffix for various index names. --- bingo/bingo-elastic/python/bingo_elastic/elastic.py | 6 +++--- bingo/bingo-elastic/python/setup.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index a362ac25ff..99ebbfcaf0 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -148,7 +148,7 @@ def prepare( index_type: IndexType, records: Generator[IndigoRecord, None, None] ) -> Generator[Dict, None, None]: for record in records: - if index_type == get_index_type(record): + if index_type != get_index_type(record): raise ValueError( f"Index {str(index_type)} doesn't support store value " f"of type {type(record)}" @@ -181,7 +181,7 @@ def __init__( host: Union[str, List[str]] = "localhost", port: int = 9200, scheme: str = "", - http_auth: Optional[Tuple[str]] = None, + http_auth: Optional[List[str]] = None, ssl_context: Any = None, request_timeout: int = 60, timeout: int = 60, @@ -277,7 +277,7 @@ def __init__( host: Union[str, List[str]] = "localhost", port: int = 9200, scheme: str = "", - http_auth: Optional[Tuple[str]] = None, + http_auth: Optional[List[str]] = None, ssl_context: Any = None, request_timeout: int = 60, timeout: int = 60, diff --git a/bingo/bingo-elastic/python/setup.py b/bingo/bingo-elastic/python/setup.py index 2957b2a24d..a8653678a4 100644 --- a/bingo/bingo-elastic/python/setup.py +++ b/bingo/bingo-elastic/python/setup.py @@ -21,7 +21,7 @@ setup( name="bingo_elastic", - version="1.12.0", + version="1.12.1", description="Cartridge that provides fast, scalable, and efficient storage and searching solution for chemical information using Elasticsearch", author="Ruslan Khyurri", author_email="ruslan_khyurri@epam.com", From f02b0861bcb99ebbbe6c51ea49a5dbb99e059504 Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Wed, 26 Jul 2023 11:07:57 -0400 Subject: [PATCH 11/27] Added TermQuery --- .../python/bingo_elastic/queries.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/bingo/bingo-elastic/python/bingo_elastic/queries.py b/bingo/bingo-elastic/python/bingo_elastic/queries.py index ce6eb0308f..6bfa7b44aa 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/queries.py +++ b/bingo/bingo-elastic/python/bingo_elastic/queries.py @@ -62,6 +62,28 @@ def compile( default_script_score(query) +class TermQuery(CompilableQuery): + """ + Result must match at least one result in the querying field. + """ + def __init__(self, value_list: List[str]): + self._value = value_list + + def compile( + self, query: Dict, postprocess_actions: PostprocessType = None + ) -> None: + bool_head: Dict = head_by_path( + query, ("query", "script_score", "query", "bool") + ) + field_name = str(self.field) + ".keyword" + if not bool_head.get("must"): + bool_head["must"] = [] + bool_head["must"].append({ + "terms": {field_name: self._value} + }) + default_script_score(query) + + class SubstructureQuery(CompilableQuery): def __init__(self, key: str, value: IndigoObject) -> None: self._key = key From 48c49d8f6a1da34c7e02fd13d7054083e3ccb60d Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Wed, 26 Jul 2023 14:07:58 -0400 Subject: [PATCH 12/27] Add delete by query support. --- .../python/bingo_elastic/elastic.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index 99ebbfcaf0..da83703172 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -258,6 +258,18 @@ async def filter( ): yield record + async def delete(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, + limit: int = 10, **kwargs,) -> None: + """ + Delete documents in index by a query filter. + """ + query = compile_query( + query_subject=query_subject, + limit=limit, + **kwargs, + ) + await self.el_client.delete_by_query(index=self.index_name, body=query) + async def close(self) -> None: await self.el_client.close() @@ -358,6 +370,22 @@ def filter( res, self.index_type, postprocess_actions, indigo_session, options ) + def delete(self, + query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, + limit: int = 10, + **kwargs,) -> None: + """ + Delete documents in index by a query filter. + """ + query = compile_query( + query_subject=query_subject, + limit=limit, + **kwargs, + ) + self.el_client.delete_by_query(index=self.index_name, body=query) + + + def compile_query( query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, From fef5816a0a9a020569a7aecc0d33c2ebb1ba7884 Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Wed, 26 Jul 2023 14:17:10 -0400 Subject: [PATCH 13/27] Add delete by query support. --- bingo/bingo-elastic/python/bingo_elastic/elastic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index da83703172..6768b87616 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -259,7 +259,7 @@ async def filter( yield record async def delete(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - limit: int = 10, **kwargs,) -> None: + limit: int = 10, **kwargs,): """ Delete documents in index by a query filter. """ @@ -373,7 +373,7 @@ def filter( def delete(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, limit: int = 10, - **kwargs,) -> None: + **kwargs,) -> Dict[str, Any]: """ Delete documents in index by a query filter. """ @@ -382,7 +382,7 @@ def delete(self, limit=limit, **kwargs, ) - self.el_client.delete_by_query(index=self.index_name, body=query) + return self.el_client.delete_by_query(index=self.index_name, body=query) From 9b6afa76bd9884c3b937e0dcb363c1b1bca4d55f Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Fri, 28 Jul 2023 12:12:10 -0400 Subject: [PATCH 14/27] Add a "-" separating type and name of index when deleting first check if index exists and ignore if it doesn't. --- bingo/bingo-elastic/python/bingo_elastic/elastic.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index 6768b87616..37b86665fd 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -199,7 +199,7 @@ def __init__( :param retry_on_timeout: """ self.index_type = index_type - self.index_name = index_type.value + index_name + self.index_name = index_type.value + "-" + index_name self.el_client = get_client( client_type=AsyncElasticsearch, @@ -263,6 +263,8 @@ async def delete(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecor """ Delete documents in index by a query filter. """ + if not self.el_client.indices.exists(index=self.index_name): + return dict() query = compile_query( query_subject=query_subject, limit=limit, @@ -307,7 +309,7 @@ def __init__( :param retry_on_timeout: """ self.index_type = index_type - self.index_name = index_type.value + index_name + self.index_name = index_type.value + "-" + index_name self.el_client = get_client( client_type=Elasticsearch, @@ -352,7 +354,6 @@ def filter( options: str = "", **kwargs, ) -> Generator[IndigoRecord, None, None]: - if limit > MAX_ALLOWED_SIZE: raise ValueError( f"limit should less or equal to {MAX_ALLOWED_SIZE}" @@ -377,6 +378,8 @@ def delete(self, """ Delete documents in index by a query filter. """ + if not self.el_client.indices.exists(index=self.index_name): + return dict() query = compile_query( query_subject=query_subject, limit=limit, From 7f4d5429850f7ff5e278308c4e8ee36e36ba50a8 Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Fri, 28 Jul 2023 14:58:48 -0400 Subject: [PATCH 15/27] Allow index name to be empty for backward compatibility Fixed KeywordQuery and TermQuery so that they work for substructure search. Note query must be constructed with is_substructure=True. --- .../python/bingo_elastic/elastic.py | 8 ++- .../python/bingo_elastic/queries.py | 51 ++++++++++++------- 2 files changed, 39 insertions(+), 20 deletions(-) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index 37b86665fd..7b30605882 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -199,7 +199,9 @@ def __init__( :param retry_on_timeout: """ self.index_type = index_type - self.index_name = index_type.value + "-" + index_name + self.index_name = index_type.value + if index_name: + self.index_name += "-" + index_name self.el_client = get_client( client_type=AsyncElasticsearch, @@ -309,7 +311,9 @@ def __init__( :param retry_on_timeout: """ self.index_type = index_type - self.index_name = index_type.value + "-" + index_name + self.index_name = index_type.value + if index_name: + self.index_name += "-" + index_name self.el_client = get_client( client_type=Elasticsearch, diff --git a/bingo/bingo-elastic/python/bingo_elastic/queries.py b/bingo/bingo-elastic/python/bingo_elastic/queries.py index 6bfa7b44aa..890b785278 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/queries.py +++ b/bingo/bingo-elastic/python/bingo_elastic/queries.py @@ -45,43 +45,58 @@ def compile( class KeywordQuery(CompilableQuery): - def __init__(self, value: str): + is_substructure: bool + + def __init__(self, value: str, is_substructure: bool = False): self._value = value + self.is_substructure = is_substructure def compile( self, query: Dict, postprocess_actions: PostprocessType = None ) -> None: - bool_head = head_by_path( - query, ("query", "script_score", "query", "bool") - ) - if not bool_head.get("must"): - bool_head["must"] = [] - bool_head["must"].append( + if self.is_substructure: + bool_head = head_by_path(query, ("query", "bool")) + else: + bool_head = head_by_path(query, ("query", "script_score", "query", "bool")) + parent_term = "must" + if not bool_head.get(parent_term): + bool_head[parent_term] = [] + bool_head[parent_term].append( {"match": {self.field: {"query": self._value, "boost": 0}}} ) - default_script_score(query) + if not self.is_substructure: + default_script_score(query) class TermQuery(CompilableQuery): """ Result must match at least one result in the querying field. """ - def __init__(self, value_list: List[str]): + def __init__(self, value_list: List[str], is_substructure: bool = False): self._value = value_list + self.is_substructure = is_substructure def compile( self, query: Dict, postprocess_actions: PostprocessType = None ) -> None: - bool_head: Dict = head_by_path( - query, ("query", "script_score", "query", "bool") - ) + if self.is_substructure: + bool_head = head_by_path(query, ("query", "bool")) + else: + bool_head = head_by_path(query, ("query", "script_score", "query", "bool")) + parent_term = "filter" field_name = str(self.field) + ".keyword" - if not bool_head.get("must"): - bool_head["must"] = [] - bool_head["must"].append({ - "terms": {field_name: self._value} - }) - default_script_score(query) + if not bool_head.get(parent_term): + bool_head[parent_term] = [] + if len(self._value) > 1: + bool_head[parent_term].append({ + "terms": {field_name: self._value} + }) + else: + bool_head[parent_term].append({ + "term": {field_name: self._value[0]} + }) + if not self.is_substructure: + default_script_score(query) class SubstructureQuery(CompilableQuery): From b0494b0a553da9bb327ca880a158a488be39741a Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Fri, 28 Jul 2023 15:01:37 -0400 Subject: [PATCH 16/27] declare version as alpha --- bingo/bingo-elastic/python/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bingo/bingo-elastic/python/setup.py b/bingo/bingo-elastic/python/setup.py index a8653678a4..18f8725b49 100644 --- a/bingo/bingo-elastic/python/setup.py +++ b/bingo/bingo-elastic/python/setup.py @@ -21,7 +21,7 @@ setup( name="bingo_elastic", - version="1.12.1", + version="1.12.1alpha", description="Cartridge that provides fast, scalable, and efficient storage and searching solution for chemical information using Elasticsearch", author="Ruslan Khyurri", author_email="ruslan_khyurri@epam.com", From ad44bf5c956bb691646e227edef40ed384b41e67 Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Thu, 10 Aug 2023 16:52:46 -0400 Subject: [PATCH 17/27] when doing SSS make sure we aromtize on query and post-processing per https://github.com/epam/Indigo/issues/774 --- bingo/bingo-elastic/python/bingo_elastic/elastic.py | 1 + bingo/bingo-elastic/python/bingo_elastic/queries.py | 1 + 2 files changed, 2 insertions(+) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index 7b30605882..593e02d043 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -420,6 +420,7 @@ def compile_query( query, postprocess_actions ) elif isinstance(query_subject, IndigoObject): + query_subject.aromatize() query_factory("substructure", query_subject).compile( query, postprocess_actions ) diff --git a/bingo/bingo-elastic/python/bingo_elastic/queries.py b/bingo/bingo-elastic/python/bingo_elastic/queries.py index 890b785278..c1b2fa6139 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/queries.py +++ b/bingo/bingo-elastic/python/bingo_elastic/queries.py @@ -112,6 +112,7 @@ def postprocess( return None mol = record.as_indigo_object(indigo) + mol.aromatize() matcher = indigo.substructureMatcher(mol, options) if matcher.match(self._value): From 0502b72a8970d11b9ba592e4abd60cec7f560117 Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Tue, 15 Aug 2023 15:11:54 -0400 Subject: [PATCH 18/27] For reaction search we need to do automap() before testing when the file is rxn from chemaxon it may need sanitized with automap(). --- .../python/bingo_elastic/elastic.py | 34 ++++++++++++------- .../python/bingo_elastic/queries.py | 4 ++- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index 593e02d043..12ac410bc6 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -248,7 +248,7 @@ async def filter( # actions needed to be called on elastic_search result postprocess_actions: PostprocessType = [] - query = compile_query( + query = self.compile_query( query_subject=query_subject, limit=limit, postprocess_actions=postprocess_actions, @@ -261,13 +261,13 @@ async def filter( yield record async def delete(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - limit: int = 10, **kwargs,): + limit: int = 10, **kwargs, ): """ Delete documents in index by a query filter. """ if not self.el_client.indices.exists(index=self.index_name): return dict() - query = compile_query( + query = self.compile_query( query_subject=query_subject, limit=limit, **kwargs, @@ -283,6 +283,10 @@ async def __aenter__(self, *args, **kwargs) -> "AsyncElasticRepository": async def __aexit__(self, *args, **kwargs) -> None: await self.close() + def compile_query(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, limit: int = 10, + postprocess_actions: PostprocessType = None, **kwargs, ) -> Dict: + return _compile_query(self.index_type, query_subject, limit, postprocess_actions, **kwargs) + class ElasticRepository: def __init__( @@ -364,7 +368,7 @@ def filter( ) # actions needed to be called on elastic_search result postprocess_actions: PostprocessType = [] - query = compile_query( + query = self.compile_query( query_subject=query_subject, limit=limit, postprocess_actions=postprocess_actions, @@ -378,28 +382,30 @@ def filter( def delete(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, limit: int = 10, - **kwargs,) -> Dict[str, Any]: + **kwargs, ) -> Dict[str, Any]: """ Delete documents in index by a query filter. """ if not self.el_client.indices.exists(index=self.index_name): return dict() - query = compile_query( + query = self.compile_query( query_subject=query_subject, limit=limit, **kwargs, ) return self.el_client.delete_by_query(index=self.index_name, body=query) + def compile_query(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, limit: int = 10, + postprocess_actions: PostprocessType = None, **kwargs, ) -> Dict: + return _compile_query(self.index_type, query_subject, limit, postprocess_actions, **kwargs) - -def compile_query( - query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - limit: int = 10, - postprocess_actions: PostprocessType = None, - **kwargs, -) -> Dict: +def _compile_query(index_type: IndexType, + query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, + limit: int = 10, + postprocess_actions: PostprocessType = None, + **kwargs, + ) -> Dict: query = { "size": limit, "_source": { @@ -421,6 +427,8 @@ def compile_query( ) elif isinstance(query_subject, IndigoObject): query_subject.aromatize() + if index_type == IndexType.BINGO_REACTION: + query_subject.automap() query_factory("substructure", query_subject).compile( query, postprocess_actions ) diff --git a/bingo/bingo-elastic/python/bingo_elastic/queries.py b/bingo/bingo-elastic/python/bingo_elastic/queries.py index c1b2fa6139..bec7087c36 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/queries.py +++ b/bingo/bingo-elastic/python/bingo_elastic/queries.py @@ -5,7 +5,7 @@ from indigo import Indigo, IndigoObject # type: ignore -from bingo_elastic.model.record import IndigoRecord, IndigoRecordMolecule +from bingo_elastic.model.record import IndigoRecord, IndigoRecordMolecule, IndigoRecordReaction from bingo_elastic.utils import PostprocessType, head_by_path @@ -113,6 +113,8 @@ def postprocess( mol = record.as_indigo_object(indigo) mol.aromatize() + if isinstance(record, IndigoRecordReaction): + mol.automap() matcher = indigo.substructureMatcher(mol, options) if matcher.match(self._value): From f1cba52e86cb7a80a2bfc48d1361f9d9442cfa7a Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Tue, 22 Aug 2023 09:28:43 -0400 Subject: [PATCH 19/27] remove reaction automap during registration and query. --- bingo/bingo-elastic/python/bingo_elastic/elastic.py | 2 -- bingo/bingo-elastic/python/bingo_elastic/queries.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index 12ac410bc6..14fae513f6 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -427,8 +427,6 @@ def _compile_query(index_type: IndexType, ) elif isinstance(query_subject, IndigoObject): query_subject.aromatize() - if index_type == IndexType.BINGO_REACTION: - query_subject.automap() query_factory("substructure", query_subject).compile( query, postprocess_actions ) diff --git a/bingo/bingo-elastic/python/bingo_elastic/queries.py b/bingo/bingo-elastic/python/bingo_elastic/queries.py index bec7087c36..537233a98a 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/queries.py +++ b/bingo/bingo-elastic/python/bingo_elastic/queries.py @@ -113,8 +113,6 @@ def postprocess( mol = record.as_indigo_object(indigo) mol.aromatize() - if isinstance(record, IndigoRecordReaction): - mol.automap() matcher = indigo.substructureMatcher(mol, options) if matcher.match(self._value): From 047a48500a97b641dab90aa523d5cf1c05dd4003 Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Thu, 24 Aug 2023 15:51:05 -0400 Subject: [PATCH 20/27] remove reaction automap during registration and query. --- .../python/bingo_elastic/elastic.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index 14fae513f6..44b2e85029 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -236,7 +236,8 @@ async def filter( self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, indigo_session: Indigo = None, - limit: int = 10, + offset: int = 0, + limit: int = MAX_ALLOWED_SIZE, options: str = "", **kwargs, ) -> AsyncGenerator[IndigoRecord, None]: @@ -250,6 +251,7 @@ async def filter( query = self.compile_query( query_subject=query_subject, + offset=offset, limit=limit, postprocess_actions=postprocess_actions, **kwargs, @@ -283,9 +285,10 @@ async def __aenter__(self, *args, **kwargs) -> "AsyncElasticRepository": async def __aexit__(self, *args, **kwargs) -> None: await self.close() - def compile_query(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, limit: int = 10, + def compile_query(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, + offset: int = 0, limit: int = 10, postprocess_actions: PostprocessType = None, **kwargs, ) -> Dict: - return _compile_query(self.index_type, query_subject, limit, postprocess_actions, **kwargs) + return _compile_query(self.index_type, query_subject, offset, limit, postprocess_actions, **kwargs) class ElasticRepository: @@ -358,6 +361,7 @@ def filter( self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, indigo_session: Indigo = None, + offset: int = 0, limit: int = 10, options: str = "", **kwargs, @@ -370,6 +374,7 @@ def filter( postprocess_actions: PostprocessType = [] query = self.compile_query( query_subject=query_subject, + offset=offset, limit=limit, postprocess_actions=postprocess_actions, **kwargs, @@ -395,14 +400,15 @@ def delete(self, ) return self.el_client.delete_by_query(index=self.index_name, body=query) - def compile_query(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, limit: int = 10, + def compile_query(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, + offset: int = 0, limit: int = 10, postprocess_actions: PostprocessType = None, **kwargs, ) -> Dict: - return _compile_query(self.index_type, query_subject, limit, postprocess_actions, **kwargs) + return _compile_query(self.index_type, query_subject, offset, limit, postprocess_actions, **kwargs) def _compile_query(index_type: IndexType, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - limit: int = 10, + offset: int = 0, limit: int = 10, postprocess_actions: PostprocessType = None, **kwargs, ) -> Dict: @@ -418,6 +424,8 @@ def _compile_query(index_type: IndexType, ], }, } + if offset > 0: + query["from"] = offset if isinstance(query_subject, BaseMatch): query_subject.compile(query, postprocess_actions) From d4e55147c378f57e18badb990c4443175ec36bbb Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Tue, 29 Aug 2023 12:45:05 -0400 Subject: [PATCH 21/27] now using point-in-time query support with paging. --- .../python/bingo_elastic/elastic.py | 411 ++++++++++++++---- .../python/bingo_elastic/model/record.py | 7 + .../python/bingo_elastic/queries.py | 2 +- .../python/bingo_elastic/utils.py | 2 +- 4 files changed, 336 insertions(+), 86 deletions(-) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index 44b2e85029..61d81139da 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -1,7 +1,8 @@ +from __future__ import annotations + from enum import Enum from typing import ( Any, - AsyncGenerator, Dict, Generator, List, @@ -9,7 +10,7 @@ Tuple, Type, TypeVar, - Union, + Union, Awaitable, Iterable, Iterator, ) from elasticsearch import Elasticsearch @@ -34,7 +35,214 @@ ElasticRepositoryT = TypeVar("ElasticRepositoryT") -MAX_ALLOWED_SIZE = 1000 + +class BingoElasticPageCriteria: + """ + Captures the criteria to make a paged query in Bingo + """ + _pit_id: Optional[str] + _page_size: int + _pit_stay_alive_minutes: int + _sort: List[Dict[str, str]] + _search_after: List[Any] + _query: Optional[Dict[str, Any]] + _next_page_search_after: List[Any] + + def to_json(self) -> Dict[str, Any]: + """ + Provide ability to serialize this page criteria into a JSON for REST API clients. + """ + return { + "pit_id": self._pit_id, + "page_size": self._page_size, + "stay_alive_minutes": self._pit_stay_alive_minutes, + "sort": self._sort, + "search_after": self._search_after, + "query": self._query + } + + @staticmethod + def from_json(json_dct: Dict[str, Any]) -> BingoElasticPageCriteria: + """ + Provide deserialization ability from page criteria to a JSON REST API client. + """ + _pit_id: Optional[str] = json_dct.get("pit_id") + _page_size: int = json_dct.get("page_size") + _pit_stay_alive_minutes: int = json_dct.get("stay_alive_minutes") + _sort: List[Dict[str, str]] = json_dct.get("sort") + _search_after: List[Any] = json_dct.get("search_after") + _query: Dict[str, Any] = json_dct.get("query") + return BingoElasticPageCriteria(page_size=_page_size, pit_id=_pit_id, + sort=_sort, pit_stay_alive_minutes=_pit_stay_alive_minutes, + search_after=_search_after, query=_query) + + @property + def query(self) -> Optional[Dict[str, Any]]: + return self._query + + @property + def pit_id(self) -> Optional[str]: + return self._pit_id + + @property + def page_size(self) -> int: + return self._page_size + + @property + def pit_stay_alive_minutes(self) -> int: + return self._pit_stay_alive_minutes + + @property + def sort_criteria(self) -> List[Dict[str, str]]: + return self._sort + + @property + def search_after(self) -> List[Any]: + return self._search_after + + def __init__(self, page_size: int = 10, + pit_id: Optional[str] = None, sort: Optional[List[Dict[str, str]]] = None, + pit_stay_alive_minutes: int = 30, + search_after: List[Any] = None, + query: Optional[Dict[str, Any]] = None): + """ + Create custom page criteria to query any particular page with particular number of records to skip. + Note: in order to continue the query, the sort order must not be changed. + https://www.elastic.co/guide/en/elasticsearch/reference/current/paginate-search-results.html + :param page_size: The page size must not exceed the elastic page limit. The elastic page limit is adjustable + by admin configuration and is usually default to 999 (with canary). + Note: there will be one extra hit to be used for canary so this should be 1 less than the limit. + :param pit_id: The PIT identifier for the current query. A fresh query with have this as None. + :param sort: The optional sort order of the paged query. If none provided, this will be sort by relevance score. + :param pit_stay_alive_minutes: the number of minutes the PIT (point in time) query will stay alive for continued + browsing. + :param search_after To continue querying, obtain the last record's sort result and append it in this parameter. + """ + if not sort: + sort = [ + {"_score": "desc"} + ] + # shard_doc in sort is implicit + self._page_size = page_size + self._pit_id = pit_id + self._sort = sort + self._pit_stay_alive_minutes = pit_stay_alive_minutes + self._search_after = search_after + self._query = query + + +class BingoElasticPageResult(Awaitable, Iterable): + """ + Result of a single page query in Bingo elastic. + """ + + _records_of_page: List[Optional[IndigoRecord]] + _current_page_criteria: BingoElasticPageCriteria + _num_hits_in_elastic: int + _num_actual_hits: int + _last_hit_sort_object: List[Any] + _gen: Generator[IndigoRecord, None, None] + _completed_processing: bool + _tracking_iterator: Optional[Iterator[Optional[IndigoRecord]]] = None + + def get_records(self, filter_false_positives: bool = True) -> Tuple[Optional[IndigoRecord], ...]: + """ + Get records in this page. + :param filter_false_positives: If true, the hits in elastic that are filtered out by post-processor will + not be returned. If false, null object will be returned in position where false positive had occurred. + """ + if not filter_false_positives: + return tuple(self._records_of_page) + return tuple([x for x in self._records_of_page if x is not None]) + + @property + def current_page(self) -> BingoElasticPageCriteria: + """ + Get the current page criteria for the query. + """ + return self._current_page_criteria + + @property + def num_hits_in_elastic(self) -> int: + """ + Get number of hits in this page that's in elastic but may be false-positives. + """ + return self._num_hits_in_elastic + + @property + def num_actual_hits(self) -> int: + """ + Get the number of actual hits after post-processing filter. + Note: this can be size 1 greater than original page size if the canary for next page is hit. + """ + return self._num_actual_hits + + @property + def has_next_page(self) -> bool: + """ + Use the canary to decide whether the next page is available or not. + """ + # If there isn't any hit in ELASTIC page at all for some reason (i.e. first page no result) then no next page. + if not self._completed_processing: + raise AssertionError("Cannot test next page availability using async I/O " + "without fully retrieving current page result first..") + if not self._last_hit_sort_object: + return False + return self._num_hits_in_elastic >= self._current_page_criteria.page_size + 1 + + @property + def next_page_criteria(self) -> Optional[BingoElasticPageCriteria]: + if not self.has_next_page: + return None + cur = self.current_page + return BingoElasticPageCriteria(page_size=cur.page_size, pit_id=cur.pit_id, + sort=cur.sort_criteria, pit_stay_alive_minutes=cur.pit_stay_alive_minutes, + query=cur.query, + search_after=self._last_hit_sort_object) + + def synchronized(self) -> None: + """ + Synchronize the object so we finish the processing completely + """ + if self._completed_processing: + return + for record in self.__await__(): + pass + + def __init__(self, gen: Generator[IndigoRecord, None, None], + current_page_criteria: BingoElasticPageCriteria): + self._current_page_criteria = current_page_criteria + self._gen = gen + self._records_of_page = list() + self._num_hits_in_elastic = 0 + self._num_actual_hits = 0 + self._completed_processing = False + + def __iter__(self) -> Iterator[Optional[IndigoRecord]]: + """ + Backward compatibility method to obtain iterator of indigo records. + We track the iterator, so it never goes back like before, to mimic its behavior. + """ + self.synchronized() + if self._tracking_iterator is None: + self._tracking_iterator = self.get_records(filter_false_positives=False).__iter__() + return self._tracking_iterator + + def __await__(self) -> Generator[IndigoRecord, None, None]: + for record in self._gen: + self._num_hits_in_elastic += 1 + # Avoid returning the canary in the page. + if self._num_hits_in_elastic > self._current_page_criteria.page_size: + break + # make sure we get canary of the last hit of actual last page instead of canary (must be after break) + # noinspection PyProtectedMember + self._last_hit_sort_object = self._current_page_criteria._next_page_search_after + self._records_of_page.append(record) + # If post-processing filtered it out then it's not an actual hit. + if record is not None: + self._num_actual_hits += 1 + yield record + self._completed_processing = True class IndexType(Enum): @@ -156,20 +364,24 @@ def prepare( yield record.as_dict() -def response_to_records( +def get_page_result( res: dict, index_type: IndexType, + page_criteria: BingoElasticPageCriteria, postprocess_actions: PostprocessType = None, indigo_session: Indigo = None, options: str = "", -) -> Generator[IndigoRecord, None, None]: - for el_response in res.get("hits", {}).get("hits", []): - record = get_record_by_index(el_response, index_type) - for action_fn in postprocess_actions: # type: ignore - record = action_fn(record, indigo_session, options) # type: ignore - if not record: - continue - yield record +) -> BingoElasticPageResult: + def page_result_gen() -> Generator[IndigoRecord, None, None]: + for el_response in res.get("hits", {}).get("hits", []): + record = get_record_by_index(el_response, index_type) + for action_fn in postprocess_actions: # type: ignore + record = action_fn(record, indigo_session, options) # type: ignore + if not record: + continue + yield record + + return BingoElasticPageResult(gen=page_result_gen(), current_page_criteria=page_criteria) class AsyncElasticRepository: @@ -236,45 +448,45 @@ async def filter( self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, indigo_session: Indigo = None, - offset: int = 0, - limit: int = MAX_ALLOWED_SIZE, + page_criteria: Optional[BingoElasticPageCriteria] = None, options: str = "", **kwargs, - ) -> AsyncGenerator[IndigoRecord, None]: + ) -> BingoElasticPageResult: + """ + Return async page result without waiting for page's full post-processing to complete. + The client is expected to consume the Awaitable object returned by using "await object" syntax + or consume its generator directly (depending on the type of parallelism desired). + """ - if limit > MAX_ALLOWED_SIZE: - raise ValueError( - f"limit should less or equal to {MAX_ALLOWED_SIZE}" - ) # actions needed to be called on elastic_search result postprocess_actions: PostprocessType = [] - query = self.compile_query( + page_criteria = self.compile_query( query_subject=query_subject, - offset=offset, - limit=limit, + page_criteria=page_criteria, postprocess_actions=postprocess_actions, **kwargs, ) - res = await self.el_client.search(index=self.index_name, body=query) - for record in response_to_records( - res, self.index_type, postprocess_actions, indigo_session, options - ): - yield record + # We must NOT specify an index name as this is inherited by PIT. + res = await self.el_client.search(body=page_criteria.query) + ret = get_page_result( + res, self.index_type, page_criteria, postprocess_actions, indigo_session, options + ) + return ret async def delete(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - limit: int = 10, **kwargs, ): + limit: int = 10, **kwargs, ) -> Dict[str, Any]: """ Delete documents in index by a query filter. """ if not self.el_client.indices.exists(index=self.index_name): return dict() - query = self.compile_query( + page_criteria = self.compile_query( query_subject=query_subject, limit=limit, **kwargs, ) - await self.el_client.delete_by_query(index=self.index_name, body=query) + return await self.el_client.delete_by_query(index=self.index_name, body=page_criteria.query, slices="auto") async def close(self) -> None: await self.el_client.close() @@ -286,9 +498,10 @@ async def __aexit__(self, *args, **kwargs) -> None: await self.close() def compile_query(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - offset: int = 0, limit: int = 10, - postprocess_actions: PostprocessType = None, **kwargs, ) -> Dict: - return _compile_query(self.index_type, query_subject, offset, limit, postprocess_actions, **kwargs) + page_criteria: Optional[BingoElasticPageCriteria] = None, + postprocess_actions: PostprocessType = None, **kwargs, ) -> BingoElasticPageCriteria: + return _compile_query(self.index_name, self.el_client, + query_subject, page_criteria, postprocess_actions, **kwargs) class ElasticRepository: @@ -361,85 +574,115 @@ def filter( self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, indigo_session: Indigo = None, - offset: int = 0, - limit: int = 10, + page_criteria: Optional[BingoElasticPageCriteria] = None, options: str = "", **kwargs, - ) -> Generator[IndigoRecord, None, None]: - if limit > MAX_ALLOWED_SIZE: - raise ValueError( - f"limit should less or equal to {MAX_ALLOWED_SIZE}" - ) + ) -> BingoElasticPageResult: # actions needed to be called on elastic_search result postprocess_actions: PostprocessType = [] - query = self.compile_query( + page_criteria = self.compile_query( query_subject=query_subject, - offset=offset, - limit=limit, + page_criteria=page_criteria, postprocess_actions=postprocess_actions, **kwargs, ) - res = self.el_client.search(index=self.index_name, body=query) - yield from response_to_records( - res, self.index_type, postprocess_actions, indigo_session, options + # We must NOT specify an index name as this is inherited by PIT. + res = self.el_client.search(body=page_criteria.query) + ret = get_page_result( + res, self.index_type, page_criteria, postprocess_actions, indigo_session, options ) + ret.synchronized() + return ret def delete(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - limit: int = 10, + limit: int = 999, **kwargs, ) -> Dict[str, Any]: """ Delete documents in index by a query filter. """ if not self.el_client.indices.exists(index=self.index_name): return dict() - query = self.compile_query( + page_criteria = self.compile_query( query_subject=query_subject, limit=limit, **kwargs, ) - return self.el_client.delete_by_query(index=self.index_name, body=query) + return self.el_client.delete_by_query(index=self.index_name, body=page_criteria.query, + slices="auto") def compile_query(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - offset: int = 0, limit: int = 10, - postprocess_actions: PostprocessType = None, **kwargs, ) -> Dict: - return _compile_query(self.index_type, query_subject, offset, limit, postprocess_actions, **kwargs) + page_criteria: Optional[BingoElasticPageCriteria] = None, + postprocess_actions: PostprocessType = None, **kwargs, ) -> BingoElasticPageCriteria: + return _compile_query(self.index_name, self.el_client, + query_subject, page_criteria, postprocess_actions, **kwargs) -def _compile_query(index_type: IndexType, +def _compile_query(index_name: str, el_client: ElasticRepositoryT, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - offset: int = 0, limit: int = 10, + page_criteria: Optional[BingoElasticPageCriteria] = None, postprocess_actions: PostprocessType = None, **kwargs, - ) -> Dict: - query = { - "size": limit, - "_source": { - "includes": ["*"], - "excludes": [ - "sim_fingerprint", - "sim_fingerprint_len", - "sub_fingerprint_len", - "sub_fingerprint", - ], - }, - } - if offset > 0: - query["from"] = offset - - if isinstance(query_subject, BaseMatch): - query_subject.compile(query, postprocess_actions) - elif isinstance(query_subject, IndigoRecord): - query_factory("exact", query_subject).compile( - query, postprocess_actions - ) - elif isinstance(query_subject, IndigoObject): - query_subject.aromatize() - query_factory("substructure", query_subject).compile( - query, postprocess_actions - ) + ) -> BingoElasticPageCriteria: + # record last elastic hit's sort object, regardless of its post-process filtering status. + if postprocess_actions is None: + postprocess_actions = [] + if page_criteria is None: + page_criteria = BingoElasticPageCriteria() + if not page_criteria.pit_id: + pit_result = el_client.open_point_in_time(index=index_name, + keep_alive=str(page_criteria.pit_stay_alive_minutes) + "m") + pit_id: str = pit_result["id"] + page_criteria._pit_id = pit_id + + def page_processing_routine(record: IndigoRecord, indigo: Indigo, options: str) -> Optional[IndigoRecord]: + # This is the first post-processing action always, so it shouldn't return None + assert record is not None + page_criteria._next_page_search_after = record.sort + return record + + postprocess_actions.insert(0, page_processing_routine) + + query: Dict[str, Any] + if not page_criteria.query: + query = { + "size": page_criteria.page_size + 1, + "_source": { + "includes": ["*"], + "excludes": [ + "sim_fingerprint", + "sim_fingerprint_len", + "sub_fingerprint_len", + "sub_fingerprint", + ], + }, + "pit": { + "id": page_criteria.pit_id, + "keep_alive": str(page_criteria.pit_stay_alive_minutes) + "m" + }, + # Sort is necessary for paging. + "sort": page_criteria.sort_criteria + } - for key, value in kwargs.items(): - query_factory(key, value).compile(query) + if isinstance(query_subject, BaseMatch): + query_subject.compile(query, postprocess_actions) + elif isinstance(query_subject, IndigoRecord): + query_factory("exact", query_subject).compile( + query, postprocess_actions + ) + elif isinstance(query_subject, IndigoObject): + query_subject.aromatize() + query_factory("substructure", query_subject).compile( + query, postprocess_actions + ) - return query + for key, value in kwargs.items(): + query_factory(key, value).compile(query) + else: + # We only bother to compile the query if this is the first page. Otherwise, we use the same query as before. + query = page_criteria.query + # But regardless of which page, we will overwrite search_after criteria if specified. + if page_criteria.search_after: + query["search_after"] = page_criteria.search_after + page_criteria._query = query + return page_criteria diff --git a/bingo/bingo-elastic/python/bingo_elastic/model/record.py b/bingo/bingo-elastic/python/bingo_elastic/model/record.py index f720b204aa..11396f5d28 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/model/record.py +++ b/bingo/bingo-elastic/python/bingo_elastic/model/record.py @@ -28,6 +28,7 @@ def __set__(self, instance: IndigoRecord, value: Dict): el_src = value["_source"] for arg, val in el_src.items(): setattr(instance, arg, val) + setattr(instance, "_sort", value.get("sort")) class WithIndigoObject: @@ -108,6 +109,12 @@ class IndigoRecord: elastic_response = WithElasticResponse() record_id: Optional[str] = None error_handler: Optional[Callable[[object, BaseException], None]] = None + # Sort for page cursor + _sort: list + + @property + def sort(self) -> list: + return self._sort def __init__(self, **kwargs) -> None: """ diff --git a/bingo/bingo-elastic/python/bingo_elastic/queries.py b/bingo/bingo-elastic/python/bingo_elastic/queries.py index 537233a98a..c1b2fa6139 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/queries.py +++ b/bingo/bingo-elastic/python/bingo_elastic/queries.py @@ -5,7 +5,7 @@ from indigo import Indigo, IndigoObject # type: ignore -from bingo_elastic.model.record import IndigoRecord, IndigoRecordMolecule, IndigoRecordReaction +from bingo_elastic.model.record import IndigoRecord, IndigoRecordMolecule from bingo_elastic.utils import PostprocessType, head_by_path diff --git a/bingo/bingo-elastic/python/bingo_elastic/utils.py b/bingo/bingo-elastic/python/bingo_elastic/utils.py index 6e77bb002f..22ab2f67b0 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/utils.py +++ b/bingo/bingo-elastic/python/bingo_elastic/utils.py @@ -5,7 +5,7 @@ from bingo_elastic.model.record import IndigoRecord PostprocessType = List[ - Callable[[IndigoRecord, Indigo], Optional[IndigoRecord]] + Callable[[IndigoRecord, Indigo, str], Optional[IndigoRecord]] ] From 5be722bce2c4fde54e32aaac4045a375170a99fa Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Wed, 30 Aug 2023 09:50:24 -0400 Subject: [PATCH 22/27] now using point-in-time query support with paging. --- bingo/bingo-elastic/python/bingo_elastic/elastic.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index 61d81139da..079cad8acb 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -42,7 +42,7 @@ class BingoElasticPageCriteria: """ _pit_id: Optional[str] _page_size: int - _pit_stay_alive_minutes: int + _pit_stay_alive_minutes: float _sort: List[Dict[str, str]] _search_after: List[Any] _query: Optional[Dict[str, Any]] @@ -143,7 +143,6 @@ class BingoElasticPageResult(Awaitable, Iterable): _last_hit_sort_object: List[Any] _gen: Generator[IndigoRecord, None, None] _completed_processing: bool - _tracking_iterator: Optional[Iterator[Optional[IndigoRecord]]] = None def get_records(self, filter_false_positives: bool = True) -> Tuple[Optional[IndigoRecord], ...]: """ @@ -224,9 +223,7 @@ def __iter__(self) -> Iterator[Optional[IndigoRecord]]: We track the iterator, so it never goes back like before, to mimic its behavior. """ self.synchronized() - if self._tracking_iterator is None: - self._tracking_iterator = self.get_records(filter_false_positives=False).__iter__() - return self._tracking_iterator + return self.get_records(filter_false_positives=False).__iter__() def __await__(self) -> Generator[IndigoRecord, None, None]: for record in self._gen: From 5c3d07c0680aecef9659a9b3b36bf8b2694a5c01 Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Thu, 31 Aug 2023 09:21:55 -0400 Subject: [PATCH 23/27] now using point-in-time query support with paging. --- .../python/bingo_elastic/elastic.py | 37 +++++++++++++++---- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index 079cad8acb..541b539237 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -42,9 +42,9 @@ class BingoElasticPageCriteria: """ _pit_id: Optional[str] _page_size: int - _pit_stay_alive_minutes: float - _sort: List[Dict[str, str]] - _search_after: List[Any] + _pit_stay_alive_minutes: int + _sort: Optional[List[Dict[str, str]]] + _search_after: Optional[List[Any]] _query: Optional[Dict[str, Any]] _next_page_search_after: List[Any] @@ -78,32 +78,55 @@ def from_json(json_dct: Dict[str, Any]) -> BingoElasticPageCriteria: @property def query(self) -> Optional[Dict[str, Any]]: + """ + Get the precompiled query, which will be stored in the following pages of first page for performance. + """ return self._query @property def pit_id(self) -> Optional[str]: + """ + Get the Point In Time (PIT) query identifier. The identifier must either be blank, or must be non-expired. + """ return self._pit_id @property def page_size(self) -> int: + """ + The page size of the query total. + Cannot exceed maximum 999 (1 extra is canary for testing next page availability). + """ return self._page_size @property def pit_stay_alive_minutes(self) -> int: + """ + Get the Point In Time query stay alive minutes, which will be refreshed if there is another paged query again. + Note the elasticsearch does not support floating point values. + """ return self._pit_stay_alive_minutes @property - def sort_criteria(self) -> List[Dict[str, str]]: + def sort_criteria(self) -> Optional[List[Dict[str, str]]]: + """ + By default, the query will be sorted by score followed by PIT shard ID as tie-breaker implicitly. + If an alternative sort order is desired, enter it here. + """ return self._sort @property - def search_after(self) -> List[Any]: + def search_after(self) -> Optional[List[Any]]: + """ + The cursor of the page we are retrieving of the previous record of the first record of this page. + If this is the first page. This will be None. + """ return self._search_after def __init__(self, page_size: int = 10, - pit_id: Optional[str] = None, sort: Optional[List[Dict[str, str]]] = None, + pit_id: Optional[str] = None, + sort: Optional[List[Dict[str, str]]] = None, pit_stay_alive_minutes: int = 30, - search_after: List[Any] = None, + search_after: Optional[List[Any]] = None, query: Optional[Dict[str, Any]] = None): """ Create custom page criteria to query any particular page with particular number of records to skip. From 992273e705fca0c86c9c0782d256af5e1b216ca6 Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Thu, 31 Aug 2023 11:34:54 -0400 Subject: [PATCH 24/27] now using point-in-time query support with paging. --- bingo/bingo-elastic/python/bingo_elastic/elastic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index 541b539237..11199ff60b 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -163,7 +163,7 @@ class BingoElasticPageResult(Awaitable, Iterable): _current_page_criteria: BingoElasticPageCriteria _num_hits_in_elastic: int _num_actual_hits: int - _last_hit_sort_object: List[Any] + _last_hit_sort_object: Optional[List[Any]] _gen: Generator[IndigoRecord, None, None] _completed_processing: bool @@ -239,6 +239,7 @@ def __init__(self, gen: Generator[IndigoRecord, None, None], self._num_hits_in_elastic = 0 self._num_actual_hits = 0 self._completed_processing = False + self._last_hit_sort_object = None def __iter__(self) -> Iterator[Optional[IndigoRecord]]: """ From 318040a66384f009889b5e1264010f856c497ca3 Mon Sep 17 00:00:00 2001 From: Yechen Qiao Date: Fri, 1 Sep 2023 11:19:50 -0400 Subject: [PATCH 25/27] Fixed the case of deletion. --- .../python/bingo_elastic/elastic.py | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index 11199ff60b..2a332b7f93 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -496,7 +496,7 @@ async def filter( return ret async def delete(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - limit: int = 10, **kwargs, ) -> Dict[str, Any]: + limit: int = 1000, **kwargs, ) -> Dict[str, Any]: """ Delete documents in index by a query filter. """ @@ -504,7 +504,8 @@ async def delete(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecor return dict() page_criteria = self.compile_query( query_subject=query_subject, - limit=limit, + page_criteria=BingoElasticPageCriteria(page_size=limit-1), + is_delete_query=True, **kwargs, ) return await self.el_client.delete_by_query(index=self.index_name, body=page_criteria.query, slices="auto") @@ -520,9 +521,10 @@ async def __aexit__(self, *args, **kwargs) -> None: def compile_query(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, page_criteria: Optional[BingoElasticPageCriteria] = None, - postprocess_actions: PostprocessType = None, **kwargs, ) -> BingoElasticPageCriteria: + postprocess_actions: PostprocessType = None, + is_delete_query: bool = True, **kwargs, ) -> BingoElasticPageCriteria: return _compile_query(self.index_name, self.el_client, - query_subject, page_criteria, postprocess_actions, **kwargs) + query_subject, page_criteria, postprocess_actions, is_delete_query, **kwargs) class ElasticRepository: @@ -617,7 +619,7 @@ def filter( def delete(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - limit: int = 999, + limit: int = 1000, **kwargs, ) -> Dict[str, Any]: """ Delete documents in index by a query filter. @@ -626,7 +628,8 @@ def delete(self, return dict() page_criteria = self.compile_query( query_subject=query_subject, - limit=limit, + page_criteria=BingoElasticPageCriteria(page_size=limit-1), + is_delete_query=True, **kwargs, ) return self.el_client.delete_by_query(index=self.index_name, body=page_criteria.query, @@ -634,15 +637,17 @@ def delete(self, def compile_query(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, page_criteria: Optional[BingoElasticPageCriteria] = None, - postprocess_actions: PostprocessType = None, **kwargs, ) -> BingoElasticPageCriteria: + postprocess_actions: PostprocessType = None, + is_delete_query: bool = False, **kwargs, ) -> BingoElasticPageCriteria: return _compile_query(self.index_name, self.el_client, - query_subject, page_criteria, postprocess_actions, **kwargs) + query_subject, page_criteria, postprocess_actions, is_delete_query, **kwargs) def _compile_query(index_name: str, el_client: ElasticRepositoryT, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, page_criteria: Optional[BingoElasticPageCriteria] = None, postprocess_actions: PostprocessType = None, + is_delete_query: bool = False, **kwargs, ) -> BingoElasticPageCriteria: # record last elastic hit's sort object, regardless of its post-process filtering status. @@ -650,7 +655,7 @@ def _compile_query(index_name: str, el_client: ElasticRepositoryT, postprocess_actions = [] if page_criteria is None: page_criteria = BingoElasticPageCriteria() - if not page_criteria.pit_id: + if not page_criteria.pit_id and not is_delete_query: pit_result = el_client.open_point_in_time(index=index_name, keep_alive=str(page_criteria.pit_stay_alive_minutes) + "m") pit_id: str = pit_result["id"] @@ -662,7 +667,8 @@ def page_processing_routine(record: IndigoRecord, indigo: Indigo, options: str) page_criteria._next_page_search_after = record.sort return record - postprocess_actions.insert(0, page_processing_routine) + if not is_delete_query: + postprocess_actions.insert(0, page_processing_routine) query: Dict[str, Any] if not page_criteria.query: @@ -677,13 +683,14 @@ def page_processing_routine(record: IndigoRecord, indigo: Indigo, options: str) "sub_fingerprint", ], }, - "pit": { - "id": page_criteria.pit_id, - "keep_alive": str(page_criteria.pit_stay_alive_minutes) + "m" - }, # Sort is necessary for paging. "sort": page_criteria.sort_criteria } + if not is_delete_query: + query["pit"] = { + "id": page_criteria.pit_id, + "keep_alive": str(page_criteria.pit_stay_alive_minutes) + "m" + } if isinstance(query_subject, BaseMatch): query_subject.compile(query, postprocess_actions) From 3d9bcd2b1711c6aab587c0cc73bdf83cd44788ed Mon Sep 17 00:00:00 2001 From: Aleksandr Savelev Date: Thu, 15 Feb 2024 23:53:01 +0300 Subject: [PATCH 26/27] reset changelog --- CHANGELOG.md | 59 +--------------------------------------------------- 1 file changed, 1 insertion(+), 58 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index faa07fd1a2..b72ea58beb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,60 +1,3 @@ -# Indigo 1.12.0 -Released 2023-07-09 - -## What's Changed - -## Bugfixes -* #965 MDL Molfile v3000: when opening files containing 'Salts an Solvents', names are truncated and abbreviation is expanded -* #1036 SMILES import: general chiral specification labels (TH, AL, SP, TB, OH ) don't work -* #1051 Opening file with a superatom label saved in RXN v3000 format only the first part of the label is displayed -* #1114 Atoms of Benzene ring become Monoradicals when opened from file saved in Daylight SMARTS -* #1132 SMILES loader uninitialized heap fix -* #1102 When pasting Extended SMILES structure with stereochemistry there are two &1 centers instead of an ABS and an &1 -* #1135 C library macro - va_end() is missing before return statement. -* #1126 Segfault when iterating CDX file from USPTO downloads -* #1144 Unable to save the structure after clicking 'Save', an error appears - -## Improvements -* #1098 api: add method for copying RGroups - -**Full Changelog**: https://github.com/epam/Indigo/compare/indigo-1.11.0...indigo-1.12.0 - - -# Indigo 1.11.0 -Released 2023-06-07 - -## What's Changed - -## Features -* #1053 Split publish job in "Indigo CI" GitHub Action -* #310 Support stereo CIP calculation in Ket format -* #957 Support of Korean, Chinese and Japanese characters in Standalone. -* #995 Automated memory leaks testing - -## Bugfixes -* #1044 SVG/PNG: Reaction arrows are not visible without structures at preview and in saved files -* #932 Reagents: When opening Daylight SMILES and Extended SMILES files with reagent the original structure is distorted -* #1084 Can't open mol v3000 files with 'S-Group Properties Type = Generic' and 'S-Group Properties Type = Multiple' -* #1083 Indigo Service: enable of using Indigo Options -* #910 MDL Molfile v3000 encoding: Automatic selection of MDL Molfile v3000 encoding doesn't work if the number of atoms (or bonds) exceeds 999 -* #956 Copy Image: When inversion type is chosen in the atom's properties, it is not saved -* #955 Copy Image: Saved bonds does not have Reacting Center marks -* #1052 Set "Indigo Docker images preparation" GItHub Action to start manually only add version tag to Docker images -* #1064 Keep implicit hydrogens information in KET-format -* #1048 Memory leak in 3rd party library -* #1056 RXN2000/3000 should not serialize INDIGO_DESC fields for s-groups -* #1050 Memory leak in StringPool code -* #1031 Calculate CIP: Hovering over the label R/S displays Indigo system information -* #1049 Memory leak in the SMILES loader code -* #973 Daylight SMARTS: Error when save file in SMART format with reaction arrow and reagent -* #1017 imagoVersions is undefined -* #899 Add restrictions on size to be less than 1000 -* #1015 Cannot test CDX export with certain files -* #944 CDX import: Greek letters, Celsius and Fahrenheit signs are replaced with question marks -* #1093 python binding memory leak from 1.8.0 (and still present in 1.10.0) - -**Full Changelog**: https://github.com/epam/Indigo/compare/indigo-1.10.0...indigo-1.11.0 - # Indigo 1.10.0 Released 2023-03-22 @@ -100,7 +43,7 @@ Released 2023-01-31 * Improve ssl bingo elastic by @MysterionRise in #901 * bingo: postgres: add support for Postgres 15, drop support for Postgres 10 by @mkviatkovskii in #903 * #521: core: replace MultiMap in MoleculeRGroupsComposition class by @loimu in #917 -* #521: core: replace MultiMap in MolfileLoader class by @loimu in #911  +* #521: core: replace MultiMap in MolfileLoader class by @loimu in #911  * #929: fix auto-saving to CTAB v3000 by @mkviatkovskii in #931 **Full Changelog**: https://github.com/epam/Indigo/compare/indigo-1.8.0...indigo-1.9.0 From 86f269fcc68fbfa5dec7c1edb3d9f2cc0a7befa3 Mon Sep 17 00:00:00 2001 From: Aleksandr Savelev Date: Fri, 16 Feb 2024 11:38:31 +0300 Subject: [PATCH 27/27] fix static analysis issues for new classes --- .../python/bingo_elastic/elastic.py | 356 +++++++++++------- 1 file changed, 220 insertions(+), 136 deletions(-) diff --git a/bingo/bingo-elastic/python/bingo_elastic/elastic.py b/bingo/bingo-elastic/python/bingo_elastic/elastic.py index 2a332b7f93..0cfa7c82b7 100644 --- a/bingo/bingo-elastic/python/bingo_elastic/elastic.py +++ b/bingo/bingo-elastic/python/bingo_elastic/elastic.py @@ -10,7 +10,10 @@ Tuple, Type, TypeVar, - Union, Awaitable, Iterable, Iterator, + Union, + Awaitable, + Iterable, + Iterator, ) from elasticsearch import Elasticsearch @@ -40,6 +43,7 @@ class BingoElasticPageCriteria: """ Captures the criteria to make a paged query in Bingo """ + _pit_id: Optional[str] _page_size: int _pit_stay_alive_minutes: int @@ -58,7 +62,7 @@ def to_json(self) -> Dict[str, Any]: "stay_alive_minutes": self._pit_stay_alive_minutes, "sort": self._sort, "search_after": self._search_after, - "query": self._query + "query": self._query, } @staticmethod @@ -72,9 +76,14 @@ def from_json(json_dct: Dict[str, Any]) -> BingoElasticPageCriteria: _sort: List[Dict[str, str]] = json_dct.get("sort") _search_after: List[Any] = json_dct.get("search_after") _query: Dict[str, Any] = json_dct.get("query") - return BingoElasticPageCriteria(page_size=_page_size, pit_id=_pit_id, - sort=_sort, pit_stay_alive_minutes=_pit_stay_alive_minutes, - search_after=_search_after, query=_query) + return BingoElasticPageCriteria( + page_size=_page_size, + pit_id=_pit_id, + sort=_sort, + pit_stay_alive_minutes=_pit_stay_alive_minutes, + search_after=_search_after, + query=_query, + ) @property def query(self) -> Optional[Dict[str, Any]]: @@ -122,12 +131,15 @@ def search_after(self) -> Optional[List[Any]]: """ return self._search_after - def __init__(self, page_size: int = 10, - pit_id: Optional[str] = None, - sort: Optional[List[Dict[str, str]]] = None, - pit_stay_alive_minutes: int = 30, - search_after: Optional[List[Any]] = None, - query: Optional[Dict[str, Any]] = None): + def __init__( + self, + page_size: int = 10, + pit_id: Optional[str] = None, + sort: Optional[List[Dict[str, str]]] = None, + pit_stay_alive_minutes: int = 30, + search_after: Optional[List[Any]] = None, + query: Optional[Dict[str, Any]] = None, + ): """ Create custom page criteria to query any particular page with particular number of records to skip. Note: in order to continue the query, the sort order must not be changed. @@ -142,9 +154,7 @@ def __init__(self, page_size: int = 10, :param search_after To continue querying, obtain the last record's sort result and append it in this parameter. """ if not sort: - sort = [ - {"_score": "desc"} - ] + sort = [{"_score": "desc"}] # shard_doc in sort is implicit self._page_size = page_size self._pit_id = pit_id @@ -167,7 +177,9 @@ class BingoElasticPageResult(Awaitable, Iterable): _gen: Generator[IndigoRecord, None, None] _completed_processing: bool - def get_records(self, filter_false_positives: bool = True) -> Tuple[Optional[IndigoRecord], ...]: + def get_records( + self, filter_false_positives: bool = True + ) -> Tuple[Optional[IndigoRecord], ...]: """ Get records in this page. :param filter_false_positives: If true, the hits in elastic that are filtered out by post-processor will @@ -206,21 +218,30 @@ def has_next_page(self) -> bool: """ # If there isn't any hit in ELASTIC page at all for some reason (i.e. first page no result) then no next page. if not self._completed_processing: - raise AssertionError("Cannot test next page availability using async I/O " - "without fully retrieving current page result first..") + raise AssertionError( + "Cannot test next page availability using async I/O " + "without fully retrieving current page result first.." + ) if not self._last_hit_sort_object: return False - return self._num_hits_in_elastic >= self._current_page_criteria.page_size + 1 + return ( + self._num_hits_in_elastic + >= self._current_page_criteria.page_size + 1 + ) @property def next_page_criteria(self) -> Optional[BingoElasticPageCriteria]: if not self.has_next_page: return None cur = self.current_page - return BingoElasticPageCriteria(page_size=cur.page_size, pit_id=cur.pit_id, - sort=cur.sort_criteria, pit_stay_alive_minutes=cur.pit_stay_alive_minutes, - query=cur.query, - search_after=self._last_hit_sort_object) + return BingoElasticPageCriteria( + page_size=cur.page_size, + pit_id=cur.pit_id, + sort=cur.sort_criteria, + pit_stay_alive_minutes=cur.pit_stay_alive_minutes, + query=cur.query, + search_after=self._last_hit_sort_object, + ) def synchronized(self) -> None: """ @@ -231,8 +252,11 @@ def synchronized(self) -> None: for record in self.__await__(): pass - def __init__(self, gen: Generator[IndigoRecord, None, None], - current_page_criteria: BingoElasticPageCriteria): + def __init__( + self, + gen: Generator[IndigoRecord, None, None], + current_page_criteria: BingoElasticPageCriteria, + ): self._current_page_criteria = current_page_criteria self._gen = gen self._records_of_page = list() @@ -253,11 +277,16 @@ def __await__(self) -> Generator[IndigoRecord, None, None]: for record in self._gen: self._num_hits_in_elastic += 1 # Avoid returning the canary in the page. - if self._num_hits_in_elastic > self._current_page_criteria.page_size: + if ( + self._num_hits_in_elastic + > self._current_page_criteria.page_size + ): break # make sure we get canary of the last hit of actual last page instead of canary (must be after break) # noinspection PyProtectedMember - self._last_hit_sort_object = self._current_page_criteria._next_page_search_after + self._last_hit_sort_object = ( + self._current_page_criteria._next_page_search_after + ) self._records_of_page.append(record) # If post-processing filtered it out then it's not an actual hit. if record is not None: @@ -280,7 +309,7 @@ def get_index_type(record: IndigoRecord) -> IndexType: def get_record_by_index( - response: Dict, index_type: IndexType + response: Dict, index_type: IndexType ) -> Union[IndigoRecordMolecule, IndigoRecordReaction]: if index_type == IndexType.BINGO_MOLECULE: return IndigoRecordMolecule(elastic_response=response) @@ -290,31 +319,35 @@ def get_record_by_index( def elastic_repository_molecule(index_name: str, *args, **kwargs): - return ElasticRepository(IndexType.BINGO_MOLECULE, index_name, *args, **kwargs) + return ElasticRepository( + IndexType.BINGO_MOLECULE, index_name, *args, **kwargs + ) def elastic_repository_reaction(index_name: str, *args, **kwargs): - return ElasticRepository(IndexType.BINGO_REACTION, index_name, *args, **kwargs) + return ElasticRepository( + IndexType.BINGO_REACTION, index_name, *args, **kwargs + ) def get_client( - *, - client_type: Type[ElasticRepositoryT], - host: Union[str, List[str]] = "localhost", - port: int = 9200, - scheme: str = "", - http_auth: Optional[List[str]] = None, - ssl_context: Any = None, - request_timeout: int = 60, - timeout: int = 60, - retry_on_timeout: bool = True, + *, + client_type: Type[ElasticRepositoryT], + host: Union[str, List[str]] = "localhost", + port: int = 9200, + scheme: str = "", + http_auth: Optional[List[str]] = None, + ssl_context: Any = None, + request_timeout: int = 60, + timeout: int = 60, + retry_on_timeout: bool = True, ) -> ElasticRepositoryT: arguments = { "port": port, "scheme": "https" if scheme == "https" else "http", "request_timeout": request_timeout, "retry_on_timeout": retry_on_timeout, - "timeout": timeout + "timeout": timeout, } if isinstance(host, str): arguments["host"] = host @@ -350,8 +383,8 @@ def check_index_exception(err_: RequestError) -> None: raise err_ cause = err_.info.get("error", {}).get("root_cause", []) if ( - len(cause) == 1 - and cause[0].get("type") == "resource_already_exists_exception" + len(cause) == 1 + and cause[0].get("type") == "resource_already_exists_exception" ): return raise err_ @@ -365,7 +398,7 @@ def create_index(index_name: str, el_client: Elasticsearch) -> None: async def a_create_index( - index_name: str, el_client: "AsyncElasticsearch" + index_name: str, el_client: "AsyncElasticsearch" ) -> None: try: await el_client.indices.create(index=index_name, body=index_body) @@ -374,7 +407,7 @@ async def a_create_index( def prepare( - index_type: IndexType, records: Generator[IndigoRecord, None, None] + index_type: IndexType, records: Generator[IndigoRecord, None, None] ) -> Generator[Dict, None, None]: for record in records: if index_type != get_index_type(record): @@ -386,39 +419,43 @@ def prepare( def get_page_result( - res: dict, - index_type: IndexType, - page_criteria: BingoElasticPageCriteria, - postprocess_actions: PostprocessType = None, - indigo_session: Indigo = None, - options: str = "", + res: dict, + index_type: IndexType, + page_criteria: BingoElasticPageCriteria, + postprocess_actions: PostprocessType = None, + indigo_session: Indigo = None, + options: str = "", ) -> BingoElasticPageResult: def page_result_gen() -> Generator[IndigoRecord, None, None]: for el_response in res.get("hits", {}).get("hits", []): record = get_record_by_index(el_response, index_type) for action_fn in postprocess_actions: # type: ignore - record = action_fn(record, indigo_session, options) # type: ignore + record = action_fn( + record, indigo_session, options + ) # type: ignore if not record: continue yield record - return BingoElasticPageResult(gen=page_result_gen(), current_page_criteria=page_criteria) + return BingoElasticPageResult( + gen=page_result_gen(), current_page_criteria=page_criteria + ) class AsyncElasticRepository: def __init__( - self, - index_type: IndexType, - index_name: str, - *, - host: Union[str, List[str]] = "localhost", - port: int = 9200, - scheme: str = "", - http_auth: Optional[List[str]] = None, - ssl_context: Any = None, - request_timeout: int = 60, - timeout: int = 60, - retry_on_timeout: bool = True, + self, + index_type: IndexType, + index_name: str, + *, + host: Union[str, List[str]] = "localhost", + port: int = 9200, + scheme: str = "", + http_auth: Optional[List[str]] = None, + ssl_context: Any = None, + request_timeout: int = 60, + timeout: int = 60, + retry_on_timeout: bool = True, ) -> None: """ :param index_type: use function get_index_name for setting this argument @@ -458,20 +495,20 @@ async def index_records(self, records: Generator, chunk_size: int = 500): await a_create_index(self.index_name, self.el_client) # pylint: disable=unused-variable async for is_ok, action in async_streaming_bulk( - self.el_client, - prepare(self.index_type, records), - index=self.index_name, - chunk_size=chunk_size, + self.el_client, + prepare(self.index_type, records), + index=self.index_name, + chunk_size=chunk_size, ): pass async def filter( - self, - query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - indigo_session: Indigo = None, - page_criteria: Optional[BingoElasticPageCriteria] = None, - options: str = "", - **kwargs, + self, + query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, + indigo_session: Indigo = None, + page_criteria: Optional[BingoElasticPageCriteria] = None, + options: str = "", + **kwargs, ) -> BingoElasticPageResult: """ Return async page result without waiting for page's full post-processing to complete. @@ -491,12 +528,21 @@ async def filter( # We must NOT specify an index name as this is inherited by PIT. res = await self.el_client.search(body=page_criteria.query) ret = get_page_result( - res, self.index_type, page_criteria, postprocess_actions, indigo_session, options + res, + self.index_type, + page_criteria, + postprocess_actions, + indigo_session, + options, ) return ret - async def delete(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - limit: int = 1000, **kwargs, ) -> Dict[str, Any]: + async def delete( + self, + query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, + limit: int = 1000, + **kwargs, + ) -> Dict[str, Any]: """ Delete documents in index by a query filter. """ @@ -504,11 +550,13 @@ async def delete(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecor return dict() page_criteria = self.compile_query( query_subject=query_subject, - page_criteria=BingoElasticPageCriteria(page_size=limit-1), + page_criteria=BingoElasticPageCriteria(page_size=limit - 1), is_delete_query=True, **kwargs, ) - return await self.el_client.delete_by_query(index=self.index_name, body=page_criteria.query, slices="auto") + return await self.el_client.delete_by_query( + index=self.index_name, body=page_criteria.query, slices="auto" + ) async def close(self) -> None: await self.el_client.close() @@ -519,28 +567,39 @@ async def __aenter__(self, *args, **kwargs) -> "AsyncElasticRepository": async def __aexit__(self, *args, **kwargs) -> None: await self.close() - def compile_query(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - page_criteria: Optional[BingoElasticPageCriteria] = None, - postprocess_actions: PostprocessType = None, - is_delete_query: bool = True, **kwargs, ) -> BingoElasticPageCriteria: - return _compile_query(self.index_name, self.el_client, - query_subject, page_criteria, postprocess_actions, is_delete_query, **kwargs) + def compile_query( + self, + query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, + page_criteria: Optional[BingoElasticPageCriteria] = None, + postprocess_actions: PostprocessType = None, + is_delete_query: bool = True, + **kwargs, + ) -> BingoElasticPageCriteria: + return _compile_query( + self.index_name, + self.el_client, + query_subject, + page_criteria, + postprocess_actions, + is_delete_query, + **kwargs, + ) class ElasticRepository: def __init__( - self, - index_type: IndexType, - index_name: str, - *, - host: Union[str, List[str]] = "localhost", - port: int = 9200, - scheme: str = "", - http_auth: Optional[List[str]] = None, - ssl_context: Any = None, - request_timeout: int = 60, - timeout: int = 60, - retry_on_timeout: bool = True, + self, + index_type: IndexType, + index_name: str, + *, + host: Union[str, List[str]] = "localhost", + port: int = 9200, + scheme: str = "", + http_auth: Optional[List[str]] = None, + ssl_context: Any = None, + request_timeout: int = 60, + timeout: int = 60, + retry_on_timeout: bool = True, ) -> None: """ :param index_type: use function get_index_name for setting this argument @@ -580,10 +639,10 @@ def index_records(self, records: Generator, chunk_size: int = 500): create_index(self.index_name, self.el_client) # pylint: disable=unused-variable for is_ok, action in streaming_bulk( - self.el_client, - prepare(self.index_type, records), - index=self.index_name, - chunk_size=chunk_size, + self.el_client, + prepare(self.index_type, records), + index=self.index_name, + chunk_size=chunk_size, ): pass @@ -594,12 +653,12 @@ def delete_all_records(self): pass def filter( - self, - query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - indigo_session: Indigo = None, - page_criteria: Optional[BingoElasticPageCriteria] = None, - options: str = "", - **kwargs, + self, + query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, + indigo_session: Indigo = None, + page_criteria: Optional[BingoElasticPageCriteria] = None, + options: str = "", + **kwargs, ) -> BingoElasticPageResult: # actions needed to be called on elastic_search result postprocess_actions: PostprocessType = [] @@ -612,15 +671,22 @@ def filter( # We must NOT specify an index name as this is inherited by PIT. res = self.el_client.search(body=page_criteria.query) ret = get_page_result( - res, self.index_type, page_criteria, postprocess_actions, indigo_session, options + res, + self.index_type, + page_criteria, + postprocess_actions, + indigo_session, + options, ) ret.synchronized() return ret - def delete(self, - query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - limit: int = 1000, - **kwargs, ) -> Dict[str, Any]: + def delete( + self, + query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, + limit: int = 1000, + **kwargs, + ) -> Dict[str, Any]: """ Delete documents in index by a query filter. """ @@ -628,40 +694,58 @@ def delete(self, return dict() page_criteria = self.compile_query( query_subject=query_subject, - page_criteria=BingoElasticPageCriteria(page_size=limit-1), + page_criteria=BingoElasticPageCriteria(page_size=limit - 1), is_delete_query=True, **kwargs, ) - return self.el_client.delete_by_query(index=self.index_name, body=page_criteria.query, - slices="auto") - - def compile_query(self, query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - page_criteria: Optional[BingoElasticPageCriteria] = None, - postprocess_actions: PostprocessType = None, - is_delete_query: bool = False, **kwargs, ) -> BingoElasticPageCriteria: - return _compile_query(self.index_name, self.el_client, - query_subject, page_criteria, postprocess_actions, is_delete_query, **kwargs) - - -def _compile_query(index_name: str, el_client: ElasticRepositoryT, - query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, - page_criteria: Optional[BingoElasticPageCriteria] = None, - postprocess_actions: PostprocessType = None, - is_delete_query: bool = False, - **kwargs, - ) -> BingoElasticPageCriteria: + return self.el_client.delete_by_query( + index=self.index_name, body=page_criteria.query, slices="auto" + ) + + def compile_query( + self, + query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, + page_criteria: Optional[BingoElasticPageCriteria] = None, + postprocess_actions: PostprocessType = None, + is_delete_query: bool = False, + **kwargs, + ) -> BingoElasticPageCriteria: + return _compile_query( + self.index_name, + self.el_client, + query_subject, + page_criteria, + postprocess_actions, + is_delete_query, + **kwargs, + ) + + +def _compile_query( + index_name: str, + el_client: ElasticRepositoryT, + query_subject: Union[BaseMatch, IndigoObject, IndigoRecord] = None, + page_criteria: Optional[BingoElasticPageCriteria] = None, + postprocess_actions: PostprocessType = None, + is_delete_query: bool = False, + **kwargs, +) -> BingoElasticPageCriteria: # record last elastic hit's sort object, regardless of its post-process filtering status. if postprocess_actions is None: postprocess_actions = [] if page_criteria is None: page_criteria = BingoElasticPageCriteria() if not page_criteria.pit_id and not is_delete_query: - pit_result = el_client.open_point_in_time(index=index_name, - keep_alive=str(page_criteria.pit_stay_alive_minutes) + "m") + pit_result = el_client.open_point_in_time( + index=index_name, + keep_alive=str(page_criteria.pit_stay_alive_minutes) + "m", + ) pit_id: str = pit_result["id"] page_criteria._pit_id = pit_id - def page_processing_routine(record: IndigoRecord, indigo: Indigo, options: str) -> Optional[IndigoRecord]: + def page_processing_routine( + record: IndigoRecord, indigo: Indigo, options: str + ) -> Optional[IndigoRecord]: # This is the first post-processing action always, so it shouldn't return None assert record is not None page_criteria._next_page_search_after = record.sort @@ -684,12 +768,12 @@ def page_processing_routine(record: IndigoRecord, indigo: Indigo, options: str) ], }, # Sort is necessary for paging. - "sort": page_criteria.sort_criteria + "sort": page_criteria.sort_criteria, } if not is_delete_query: query["pit"] = { "id": page_criteria.pit_id, - "keep_alive": str(page_criteria.pit_stay_alive_minutes) + "m" + "keep_alive": str(page_criteria.pit_stay_alive_minutes) + "m", } if isinstance(query_subject, BaseMatch):