From d74117896cb62c6b2d39204e9d87e81ccfdc28fd Mon Sep 17 00:00:00 2001 From: kenneth Date: Fri, 19 Jul 2024 10:27:00 +0200 Subject: [PATCH 1/5] Revert "[FSTORE-1453] Move client, decorators, variable_api and constants to hopsworks_common (#229)" This reverts commit 85deafd4da93c99151a0e996ffbcfb66a2893185. --- python/hopsworks/client/__init__.py | 83 ++-- python/hopsworks/client/auth.py | 33 +- python/hopsworks/client/base.py | 175 +++++++- python/hopsworks/client/exceptions.py | 104 +++-- python/hopsworks/client/external.py | 157 ++++++- python/hopsworks/client/hopsworks.py | 227 +++++++++- .../client/online_store_rest_client.py | 28 -- python/hopsworks/connection.py | 7 + python/hopsworks/core/variable_api.py | 106 ++++- python/hopsworks/decorators.py | 59 ++- python/hopsworks_common/client/__init__.py | 72 ---- python/hopsworks_common/client/auth.py | 52 --- python/hopsworks_common/client/base.py | 293 ------------- python/hopsworks_common/client/exceptions.py | 143 ------ python/hopsworks_common/client/external.py | 407 ------------------ python/hopsworks_common/client/hopsworks.py | 236 ---------- .../client/online_store_rest_client.py | 385 ----------------- python/hopsworks_common/core/constants.py | 51 --- python/hopsworks_common/core/variable_api.py | 117 ----- python/hopsworks_common/decorators.py | 86 ---- python/hsfs/client/__init__.py | 80 ++-- python/hsfs/client/auth.py | 45 +- python/hsfs/client/base.py | 278 +++++++++++- python/hsfs/client/exceptions.py | 130 ++++-- python/hsfs/client/external.py | 372 +++++++++++++++- python/hsfs/client/hopsworks.py | 175 +++++++- .../hsfs/client/online_store_rest_client.py | 376 +++++++++++++++- python/hsfs/core/constants.py | 67 ++- python/hsfs/core/variable_api.py | 68 ++- python/hsfs/decorators.py | 83 +++- python/hsml/decorators.py | 59 ++- .../core/test_online_store_rest_client.py | 20 +- 32 files changed, 2394 insertions(+), 2180 deletions(-) delete mode 100644 python/hopsworks/client/online_store_rest_client.py delete mode 100644 python/hopsworks_common/client/__init__.py delete mode 100644 python/hopsworks_common/client/auth.py delete mode 100644 python/hopsworks_common/client/base.py delete mode 100644 python/hopsworks_common/client/exceptions.py delete mode 100644 python/hopsworks_common/client/external.py delete mode 100644 python/hopsworks_common/client/hopsworks.py delete mode 100644 python/hopsworks_common/client/online_store_rest_client.py delete mode 100644 python/hopsworks_common/core/constants.py delete mode 100644 python/hopsworks_common/core/variable_api.py delete mode 100644 python/hopsworks_common/decorators.py diff --git a/python/hopsworks/client/__init__.py b/python/hopsworks/client/__init__.py index 19e0feb8d..004e49c8b 100644 --- a/python/hopsworks/client/__init__.py +++ b/python/hopsworks/client/__init__.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2022 Logical Clocks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,27 +14,60 @@ # limitations under the License. # -from hopsworks_common.client import ( - auth, - base, - exceptions, - external, - get_instance, - hopsworks, - init, - online_store_rest_client, - stop, -) - - -__all__ = [ - auth, - base, - exceptions, - external, - get_instance, - hopsworks, - init, - online_store_rest_client, - stop, -] +from hopsworks.client import external, hopsworks + + +_client = None +_python_version = None + + +def init( + client_type, + host=None, + port=None, + project=None, + hostname_verification=None, + trust_store_path=None, + cert_folder=None, + api_key_file=None, + api_key_value=None, +): + global _client + if not _client: + if client_type == "hopsworks": + _client = hopsworks.Client() + elif client_type == "external": + _client = external.Client( + host, + port, + project, + hostname_verification, + trust_store_path, + cert_folder, + api_key_file, + api_key_value, + ) + + +def get_instance(): + global _client + if _client: + return _client + raise Exception("Couldn't find client. Try reconnecting to Hopsworks.") + + +def get_python_version(): + global _python_version + return _python_version + + +def set_python_version(python_version): + global _python_version + _python_version = python_version + + +def stop(): + global _client + if _client: + _client._close() + _client = None diff --git a/python/hopsworks/client/auth.py b/python/hopsworks/client/auth.py index e912b1daf..8bbd4ae53 100644 --- a/python/hopsworks/client/auth.py +++ b/python/hopsworks/client/auth.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2022 Logical Clocks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,15 +14,26 @@ # limitations under the License. # -from hopsworks_common.client.auth import ( - ApiKeyAuth, - BearerAuth, - OnlineStoreKeyAuth, -) +import requests -__all__ = [ - ApiKeyAuth, - BearerAuth, - OnlineStoreKeyAuth, -] +class BearerAuth(requests.auth.AuthBase): + """Class to encapsulate a Bearer token.""" + + def __init__(self, token): + self._token = token + + def __call__(self, r): + r.headers["Authorization"] = "Bearer " + self._token.strip() + return r + + +class ApiKeyAuth(requests.auth.AuthBase): + """Class to encapsulate an API key.""" + + def __init__(self, token): + self._token = token + + def __call__(self, r): + r.headers["Authorization"] = "ApiKey " + self._token.strip() + return r diff --git a/python/hopsworks/client/base.py b/python/hopsworks/client/base.py index 3ff35d800..852259639 100644 --- a/python/hopsworks/client/base.py +++ b/python/hopsworks/client/base.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2022 Logical Clocks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,11 +14,172 @@ # limitations under the License. # -from hopsworks_common.client.base import ( - Client, -) +import os +from abc import ABC, abstractmethod +import furl +import requests +import urllib3 +from hopsworks.client import auth, exceptions +from hopsworks.decorators import connected -__all__ = [ - Client, -] + +urllib3.disable_warnings(urllib3.exceptions.SecurityWarning) +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + + +class Client(ABC): + TOKEN_FILE = "token.jwt" + APIKEY_FILE = "api.key" + REST_ENDPOINT = "REST_ENDPOINT" + HOPSWORKS_PUBLIC_HOST = "HOPSWORKS_PUBLIC_HOST" + + @abstractmethod + def __init__(self): + """To be implemented by clients.""" + pass + + def _get_verify(self, verify, trust_store_path): + """Get verification method for sending HTTP requests to Hopsworks. + + Credit to https://gist.github.com/gdamjan/55a8b9eec6cf7b771f92021d93b87b2c + + :param verify: perform hostname verification, 'true' or 'false' + :type verify: str + :param trust_store_path: path of the truststore locally if it was uploaded manually to + the external environment such as AWS Sagemaker + :type trust_store_path: str + :return: if verify is true and the truststore is provided, then return the trust store location + if verify is true but the truststore wasn't provided, then return true + if verify is false, then return false + :rtype: str or boolean + """ + if verify == "true": + if trust_store_path is not None: + return trust_store_path + else: + return True + + return False + + def _get_host_port_pair(self): + """ + Removes "http or https" from the rest endpoint and returns a list + [endpoint, port], where endpoint is on the format /path.. without http:// + + :return: a list [endpoint, port] + :rtype: list + """ + endpoint = self._base_url + if "http" in endpoint: + last_index = endpoint.rfind("/") + endpoint = endpoint[last_index + 1 :] + host, port = endpoint.split(":") + return host, port + + def _read_jwt(self): + """Retrieve jwt from local container.""" + return self._read_file(self.TOKEN_FILE) + + def _read_apikey(self): + """Retrieve apikey from local container.""" + return self._read_file(self.APIKEY_FILE) + + def _read_file(self, secret_file): + """Retrieve secret from local container.""" + with open(os.path.join(self._secrets_dir, secret_file), "r") as secret: + return secret.read() + + def _get_credentials(self, project_id): + """Makes a REST call to hopsworks for getting the project user certificates needed to connect to services such as Hive + + :param project_id: id of the project + :type project_id: int + :return: JSON response with credentials + :rtype: dict + """ + return self._send_request("GET", ["project", project_id, "credentials"]) + + def _write_pem_file(self, content: str, path: str) -> None: + with open(path, "w") as f: + f.write(content) + + @connected + def _send_request( + self, + method, + path_params, + query_params=None, + headers=None, + data=None, + stream=False, + files=None, + with_base_path_params=True, + ): + """Send REST request to Hopsworks. + + Uses the client it is executed from. Path parameters are url encoded automatically. + + :param method: 'GET', 'PUT' or 'POST' + :type method: str + :param path_params: a list of path params to build the query url from starting after + the api resource, for example `["project", 119, "featurestores", 67]`. + :type path_params: list + :param query_params: A dictionary of key/value pairs to be added as query parameters, + defaults to None + :type query_params: dict, optional + :param headers: Additional header information, defaults to None + :type headers: dict, optional + :param data: The payload as a python dictionary to be sent as json, defaults to None + :type data: dict, optional + :param stream: Set if response should be a stream, defaults to False + :type stream: boolean, optional + :param files: dictionary for multipart encoding upload + :type files: dict, optional + :raises RestAPIError: Raised when request wasn't correctly received, understood or accepted + :return: Response json + :rtype: dict + """ + f_url = furl.furl(self._base_url) + if with_base_path_params: + base_path_params = ["hopsworks-api", "api"] + f_url.path.segments = base_path_params + path_params + else: + f_url.path.segments = path_params + url = str(f_url) + + request = requests.Request( + method, + url=url, + headers=headers, + data=data, + params=query_params, + auth=self._auth, + files=files, + ) + + prepped = self._session.prepare_request(request) + response = self._session.send(prepped, verify=self._verify, stream=stream) + + if response.status_code == 401 and self.REST_ENDPOINT in os.environ: + # refresh token and retry request - only on hopsworks + self._auth = auth.BearerAuth(self._read_jwt()) + # Update request with the new token + request.auth = self._auth + prepped = self._session.prepare_request(request) + response = self._session.send(prepped, verify=self._verify, stream=stream) + + if response.status_code // 100 != 2: + raise exceptions.RestAPIError(url, response) + + if stream: + return response + else: + # handle different success response codes + if len(response.content) == 0: + return None + return response.json() + + def _close(self): + """Closes a client. Can be implemented for clean up purposes, not mandatory.""" + self._connected = False diff --git a/python/hopsworks/client/exceptions.py b/python/hopsworks/client/exceptions.py index b34ef198f..637146492 100644 --- a/python/hopsworks/client/exceptions.py +++ b/python/hopsworks/client/exceptions.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2022 Logical Clocks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,37 +14,71 @@ # limitations under the License. # -from hopsworks_common.client.exceptions import ( - DatasetException, - DataValidationException, - EnvironmentException, - ExternalClientError, - FeatureStoreException, - GitException, - JobException, - JobExecutionException, - KafkaException, - OpenSearchException, - ProjectException, - RestAPIError, - UnknownSecretStorageError, - VectorDatabaseException, -) - - -__all__ = [ - DatasetException, - DataValidationException, - EnvironmentException, - ExternalClientError, - FeatureStoreException, - GitException, - JobException, - JobExecutionException, - KafkaException, - OpenSearchException, - ProjectException, - RestAPIError, - UnknownSecretStorageError, - VectorDatabaseException, -] + +class RestAPIError(Exception): + """REST Exception encapsulating the response object and url.""" + + def __init__(self, url, response): + try: + error_object = response.json() + except Exception: + error_object = {} + message = ( + "Metadata operation error: (url: {}). Server response: \n" + "HTTP code: {}, HTTP reason: {}, body: {}, error code: {}, error msg: {}, user " + "msg: {}".format( + url, + response.status_code, + response.reason, + response.content, + error_object.get("errorCode", ""), + error_object.get("errorMsg", ""), + error_object.get("usrMsg", ""), + ) + ) + super().__init__(message) + self.url = url + self.response = response + + +class UnknownSecretStorageError(Exception): + """This exception will be raised if an unused secrets storage is passed as a parameter.""" + + +class GitException(Exception): + """Generic git exception""" + + +class JobException(Exception): + """Generic job exception""" + + +class EnvironmentException(Exception): + """Generic python environment exception""" + + +class KafkaException(Exception): + """Generic kafka exception""" + + +class DatasetException(Exception): + """Generic dataset exception""" + + +class ProjectException(Exception): + """Generic project exception""" + + +class OpenSearchException(Exception): + """Generic opensearch exception""" + + +class JobExecutionException(Exception): + """Generic job executions exception""" + + +class ExternalClientError(TypeError): + """Raised when external client cannot be initialized due to missing arguments.""" + + def __init__(self, message): + super().__init__(message) diff --git a/python/hopsworks/client/external.py b/python/hopsworks/client/external.py index 1384b1c20..d0a277e71 100644 --- a/python/hopsworks/client/external.py +++ b/python/hopsworks/client/external.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2022 Logical Clocks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,11 +14,154 @@ # limitations under the License. # -from hopsworks_common.client.external import ( - Client, -) +import base64 +import os +import requests +from hopsworks.client import auth, base, exceptions -__all__ = [ - Client, -] + +class Client(base.Client): + def __init__( + self, + host, + port, + project, + hostname_verification, + trust_store_path, + cert_folder, + api_key_file, + api_key_value, + ): + """Initializes a client in an external environment such as AWS Sagemaker.""" + if not host: + raise exceptions.ExternalClientError("host") + + self._host = host + self._port = port + self._base_url = "https://" + self._host + ":" + str(self._port) + self._project_name = project + if project is not None: + project_info = self._get_project_info(project) + self._project_id = str(project_info["projectId"]) + else: + self._project_id = None + + if api_key_value is not None: + api_key = api_key_value + elif api_key_file is not None: + file = None + if os.path.exists(api_key_file): + try: + file = open(api_key_file, mode="r") + api_key = file.read() + finally: + file.close() + else: + raise IOError( + "Could not find api key file on path: {}".format(api_key_file) + ) + else: + raise exceptions.ExternalClientError( + "Either api_key_file or api_key_value must be set when connecting to" + " hopsworks from an external environment." + ) + + self._auth = auth.ApiKeyAuth(api_key) + + self._session = requests.session() + self._connected = True + self._verify = self._get_verify(self._host, trust_store_path) + + self._cert_folder_base = os.path.join(cert_folder, host) + + def download_certs(self, project_name): + project_info = self._get_project_info(project_name) + project_id = str(project_info["projectId"]) + + project_cert_folder = os.path.join(self._cert_folder_base, project_name) + + trust_store_path = os.path.join(project_cert_folder, "trustStore.jks") + key_store_path = os.path.join(project_cert_folder, "keyStore.jks") + + os.makedirs(project_cert_folder, exist_ok=True) + credentials = self._get_credentials(project_id) + self._write_b64_cert_to_bytes( + str(credentials["kStore"]), + path=key_store_path, + ) + self._write_b64_cert_to_bytes( + str(credentials["tStore"]), + path=trust_store_path, + ) + + self._write_pem_file( + credentials["caChain"], self._get_ca_chain_path(project_name) + ) + self._write_pem_file( + credentials["clientCert"], self._get_client_cert_path(project_name) + ) + self._write_pem_file( + credentials["clientKey"], self._get_client_key_path(project_name) + ) + + with open(os.path.join(project_cert_folder, "material_passwd"), "w") as f: + f.write(str(credentials["password"])) + + def _close(self): + """Closes a client and deletes certificates.""" + # TODO: Implement certificate cleanup. Currently do not remove certificates as it may break users using hsfs python ingestion. + self._connected = False + + def _get_jks_trust_store_path(self): + return self._trust_store_path + + def _get_jks_key_store_path(self): + return self._key_store_path + + def _get_ca_chain_path(self, project_name) -> str: + return os.path.join(self._cert_folder_base, project_name, "ca_chain.pem") + + def _get_client_cert_path(self, project_name) -> str: + return os.path.join(self._cert_folder_base, project_name, "client_cert.pem") + + def _get_client_key_path(self, project_name) -> str: + return os.path.join(self._cert_folder_base, project_name, "client_key.pem") + + def _get_project_info(self, project_name): + """Makes a REST call to hopsworks to get all metadata of a project for the provided project. + + :param project_name: the name of the project + :type project_name: str + :return: JSON response with project info + :rtype: dict + """ + return self._send_request("GET", ["project", "getProjectInfo", project_name]) + + def _write_b64_cert_to_bytes(self, b64_string, path): + """Converts b64 encoded certificate to bytes file . + + :param b64_string: b64 encoded string of certificate + :type b64_string: str + :param path: path where file is saved, including file name. e.g. /path/key-store.jks + :type path: str + """ + + with open(path, "wb") as f: + cert_b64 = base64.b64decode(b64_string) + f.write(cert_b64) + + def _cleanup_file(self, file_path): + """Removes local files with `file_path`.""" + try: + os.remove(file_path) + except OSError: + pass + + def replace_public_host(self, url): + """no need to replace as we are already in external client""" + return url + + @property + def host(self): + return self._host diff --git a/python/hopsworks/client/hopsworks.py b/python/hopsworks/client/hopsworks.py index c360b8cb9..514e3fe48 100644 --- a/python/hopsworks/client/hopsworks.py +++ b/python/hopsworks/client/hopsworks.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2022 Logical Clocks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,11 +14,224 @@ # limitations under the License. # -from hopsworks_common.client.hopsworks import ( - Client, -) +import base64 +import os +import textwrap +from pathlib import Path +import requests +from hopsworks.client import auth, base -__all__ = [ - Client, -] + +try: + import jks +except ImportError: + pass + + +class Client(base.Client): + REQUESTS_VERIFY = "REQUESTS_VERIFY" + DOMAIN_CA_TRUSTSTORE_PEM = "DOMAIN_CA_TRUSTSTORE_PEM" + PROJECT_ID = "HOPSWORKS_PROJECT_ID" + PROJECT_NAME = "HOPSWORKS_PROJECT_NAME" + HADOOP_USER_NAME = "HADOOP_USER_NAME" + MATERIAL_DIRECTORY = "MATERIAL_DIRECTORY" + HDFS_USER = "HDFS_USER" + T_CERTIFICATE = "t_certificate" + K_CERTIFICATE = "k_certificate" + TRUSTSTORE_SUFFIX = "__tstore.jks" + KEYSTORE_SUFFIX = "__kstore.jks" + PEM_CA_CHAIN = "ca_chain.pem" + CERT_KEY_SUFFIX = "__cert.key" + MATERIAL_PWD = "material_passwd" + SECRETS_DIR = "SECRETS_DIR" + + def __init__(self): + """Initializes a client being run from a job/notebook directly on Hopsworks.""" + self._base_url = self._get_hopsworks_rest_endpoint() + self._host, self._port = self._get_host_port_pair() + self._secrets_dir = ( + os.environ[self.SECRETS_DIR] if self.SECRETS_DIR in os.environ else "" + ) + self._cert_key = self._get_cert_pw() + trust_store_path = self._get_trust_store_path() + hostname_verification = ( + os.environ[self.REQUESTS_VERIFY] + if self.REQUESTS_VERIFY in os.environ + else "true" + ) + self._project_id = os.environ[self.PROJECT_ID] + self._project_name = self._project_name() + try: + self._auth = auth.BearerAuth(self._read_jwt()) + except FileNotFoundError: + self._auth = auth.ApiKeyAuth(self._read_apikey()) + self._verify = self._get_verify(hostname_verification, trust_store_path) + self._session = requests.session() + + self._connected = True + + credentials = self._get_credentials(self._project_id) + + self._write_pem_file( + credentials["caChain"], self._get_ca_chain_path(self._project_name) + ) + self._write_pem_file( + credentials["clientCert"], self._get_client_cert_path(self._project_name) + ) + self._write_pem_file( + credentials["clientKey"], self._get_client_key_path(self._project_name) + ) + + def _get_hopsworks_rest_endpoint(self): + """Get the hopsworks REST endpoint for making requests to the REST API.""" + return os.environ[self.REST_ENDPOINT] + + def _get_trust_store_path(self): + """Convert truststore from jks to pem and return the location""" + ca_chain_path = Path(self.PEM_CA_CHAIN) + if not ca_chain_path.exists(): + self._write_ca_chain(ca_chain_path) + return str(ca_chain_path) + + def _get_ca_chain_path(self, project_name) -> str: + return os.path.join("/tmp", "ca_chain.pem") + + def _get_client_cert_path(self, project_name) -> str: + return os.path.join("/tmp", "client_cert.pem") + + def _get_client_key_path(self, project_name) -> str: + return os.path.join("/tmp", "client_key.pem") + + def _write_ca_chain(self, ca_chain_path): + """ + Converts JKS trustore file into PEM to be compatible with Python libraries + """ + keystore_pw = self._cert_key + keystore_ca_cert = self._convert_jks_to_pem( + self._get_jks_key_store_path(), keystore_pw + ) + truststore_ca_cert = self._convert_jks_to_pem( + self._get_jks_trust_store_path(), keystore_pw + ) + + with ca_chain_path.open("w") as f: + f.write(keystore_ca_cert + truststore_ca_cert) + + def _convert_jks_to_pem(self, jks_path, keystore_pw): + """ + Converts a keystore JKS that contains client private key, + client certificate and CA certificate that was used to + sign the certificate to PEM format and returns the CA certificate. + Args: + :jks_path: path to the JKS file + :pw: password for decrypting the JKS file + Returns: + strings: (ca_cert) + """ + # load the keystore and decrypt it with password + ks = jks.KeyStore.load(jks_path, keystore_pw, try_decrypt_keys=True) + ca_certs = "" + + # Convert CA Certificates into PEM format and append to string + for _alias, c in ks.certs.items(): + ca_certs = ca_certs + self._bytes_to_pem_str(c.cert, "CERTIFICATE") + return ca_certs + + def _bytes_to_pem_str(self, der_bytes, pem_type): + """ + Utility function for creating PEM files + + Args: + der_bytes: DER encoded bytes + pem_type: type of PEM, e.g Certificate, Private key, or RSA private key + + Returns: + PEM String for a DER-encoded certificate or private key + """ + pem_str = "" + pem_str = pem_str + "-----BEGIN {}-----".format(pem_type) + "\n" + pem_str = ( + pem_str + + "\r\n".join( + textwrap.wrap(base64.b64encode(der_bytes).decode("ascii"), 64) + ) + + "\n" + ) + pem_str = pem_str + "-----END {}-----".format(pem_type) + "\n" + return pem_str + + def _get_jks_trust_store_path(self): + """ + Get truststore location + + Returns: + truststore location + """ + t_certificate = Path(self.T_CERTIFICATE) + if t_certificate.exists(): + return str(t_certificate) + else: + username = os.environ[self.HADOOP_USER_NAME] + material_directory = Path(os.environ[self.MATERIAL_DIRECTORY]) + return str(material_directory.joinpath(username + self.TRUSTSTORE_SUFFIX)) + + def _get_jks_key_store_path(self): + """ + Get keystore location + + Returns: + keystore location + """ + k_certificate = Path(self.K_CERTIFICATE) + if k_certificate.exists(): + return str(k_certificate) + else: + username = os.environ[self.HADOOP_USER_NAME] + material_directory = Path(os.environ[self.MATERIAL_DIRECTORY]) + return str(material_directory.joinpath(username + self.KEYSTORE_SUFFIX)) + + def _project_name(self): + try: + return os.environ[self.PROJECT_NAME] + except KeyError: + pass + + hops_user = self._project_user() + hops_user_split = hops_user.split( + "__" + ) # project users have username project__user + project = hops_user_split[0] + return project + + def _project_user(self): + try: + hops_user = os.environ[self.HADOOP_USER_NAME] + except KeyError: + hops_user = os.environ[self.HDFS_USER] + return hops_user + + def _get_cert_pw(self): + """ + Get keystore password from local container + + Returns: + Certificate password + """ + pwd_path = Path(self.MATERIAL_PWD) + if not pwd_path.exists(): + username = os.environ[self.HADOOP_USER_NAME] + material_directory = Path(os.environ[self.MATERIAL_DIRECTORY]) + pwd_path = material_directory.joinpath(username + self.CERT_KEY_SUFFIX) + + with pwd_path.open() as f: + return f.read() + + def replace_public_host(self, url): + """replace hostname to public hostname set in HOPSWORKS_PUBLIC_HOST""" + ui_url = url._replace(netloc=os.environ[self.HOPSWORKS_PUBLIC_HOST]) + return ui_url + + @property + def host(self): + return self._host diff --git a/python/hopsworks/client/online_store_rest_client.py b/python/hopsworks/client/online_store_rest_client.py deleted file mode 100644 index c75be81b7..000000000 --- a/python/hopsworks/client/online_store_rest_client.py +++ /dev/null @@ -1,28 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from hopsworks_common.client.online_store_rest_client import ( - OnlineStoreRestClientSingleton, - get_instance, - init_or_reset_online_store_rest_client, -) - - -__all__ = [ - OnlineStoreRestClientSingleton, - get_instance, - init_or_reset_online_store_rest_client, -] diff --git a/python/hopsworks/connection.py b/python/hopsworks/connection.py index c43cfeeb9..61f2e3d6a 100644 --- a/python/hopsworks/connection.py +++ b/python/hopsworks/connection.py @@ -215,6 +215,12 @@ def _check_compatibility(self): ) sys.stderr.flush() + def _set_client_variables(self): + python_version = self._variable_api.get_variable( + "docker_base_image_python_version" + ) + client.set_python_version(python_version) + @not_connected def connect(self): """Instantiate the connection. @@ -265,6 +271,7 @@ def connect(self): ) self._check_compatibility() + self._set_client_variables() def close(self): """Close a connection gracefully. diff --git a/python/hopsworks/core/variable_api.py b/python/hopsworks/core/variable_api.py index 9d6e9765f..d4e8d188c 100644 --- a/python/hopsworks/core/variable_api.py +++ b/python/hopsworks/core/variable_api.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2022 Hopsworks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,12 +13,104 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations -from hopsworks_common.core.variable_api import ( - VariableApi, -) +import re +from typing import Optional, Tuple +from hopsworks import client +from hopsworks.client.exceptions import RestAPIError -__all__ = [ - VariableApi, -] + +class VariableApi: + def __init__(self): + pass + + def get_variable(self, variable: str): + """Get the configured value of a variable. + + # Arguments + vairable: Name of the variable. + # Returns + The vairable's value + # Raises + `RestAPIError`: If unable to get the variable + """ + + _client = client.get_instance() + + path_params = ["variables", variable] + domain = _client._send_request("GET", path_params) + + return domain["successMessage"] + + def get_version(self, software: str) -> Optional[str]: + """Get version of a software component. + + # Arguments + software: Name of the software. + # Returns + The software's version, if the software is available, otherwise `None`. + # Raises + `RestAPIError`: If unable to get the version + """ + + _client = client.get_instance() + + path_params = ["variables", "versions"] + resp = _client._send_request("GET", path_params) + + for entry in resp: + if entry["software"] == software: + return entry["version"] + return None + + def parse_major_and_minor( + self, backend_version: str + ) -> Tuple[Optional[str], Optional[str]]: + """Extract major and minor version from full version. + + # Arguments + backend_version: The full version. + # Returns + (major, minor): The pair of major and minor parts of the version, or (None, None) if the version format is incorrect. + """ + + version_pattern = r"(\d+)\.(\d+)" + matches = re.match(version_pattern, backend_version) + + if matches is None: + return (None, None) + return matches.group(1), matches.group(2) + + def get_flyingduck_enabled(self) -> bool: + """Check if Flying Duck is enabled on the backend. + + # Returns + `True`: If flying duck is availalbe, `False` otherwise. + # Raises + `RestAPIError`: If unable to obtain the flag's value. + """ + return self.get_variable("enable_flyingduck") == "true" + + def get_loadbalancer_external_domain(self) -> str: + """Get domain of external loadbalancer. + + # Returns + `str`: The domain of external loadbalancer, if it is set up, otherwise empty string `""`. + """ + try: + return self.get_variable("loadbalancer_external_domain") + except RestAPIError: + return "" + + def get_service_discovery_domain(self) -> str: + """Get domain of service discovery server. + + # Returns + `str`: The domain of service discovery server, if it is set up, otherwise empty string `""`. + """ + try: + return self.get_variable("service_discovery_domain") + except RestAPIError: + return "" diff --git a/python/hopsworks/decorators.py b/python/hopsworks/decorators.py index 1165a2daa..51b7d635a 100644 --- a/python/hopsworks/decorators.py +++ b/python/hopsworks/decorators.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2022 Logical Clocks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,21 +14,42 @@ # limitations under the License. # -from hopsworks_common.decorators import ( - HopsworksConnectionError, - NoHopsworksConnectionError, - connected, - not_connected, - typechecked, - uses_great_expectations, -) - - -__all__ = [ - HopsworksConnectionError, - NoHopsworksConnectionError, - connected, - not_connected, - typechecked, - uses_great_expectations, -] +import functools + + +def not_connected(fn): + @functools.wraps(fn) + def if_not_connected(inst, *args, **kwargs): + if inst._connected: + raise HopsworksConnectionError + return fn(inst, *args, **kwargs) + + return if_not_connected + + +def connected(fn): + @functools.wraps(fn) + def if_connected(inst, *args, **kwargs): + if not inst._connected: + raise NoHopsworksConnectionError + return fn(inst, *args, **kwargs) + + return if_connected + + +class HopsworksConnectionError(Exception): + """Thrown when attempted to change connection attributes while connected.""" + + def __init__(self): + super().__init__( + "Connection is currently in use. Needs to be closed for modification." + ) + + +class NoHopsworksConnectionError(Exception): + """Thrown when attempted to perform operation on connection while not connected.""" + + def __init__(self): + super().__init__( + "Connection is not active. Needs to be connected for hopsworks operations." + ) diff --git a/python/hopsworks_common/client/__init__.py b/python/hopsworks_common/client/__init__.py deleted file mode 100644 index 2cd86bb83..000000000 --- a/python/hopsworks_common/client/__init__.py +++ /dev/null @@ -1,72 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from __future__ import annotations - -from typing import Literal, Optional, Union - -from hopsworks_common.client import external, hopsworks - - -_client: Union[hopsworks.Client, external.Client, None] = None - - -def init( - client_type: Union[Literal["hopsworks"], Literal["external"]], - host: Optional[str] = None, - port: Optional[int] = None, - project: Optional[str] = None, - engine: Optional[str] = None, - region_name: Optional[str] = None, - secrets_store=None, - hostname_verification: Optional[bool] = None, - trust_store_path: Optional[str] = None, - cert_folder: Optional[str] = None, - api_key_file: Optional[str] = None, - api_key_value: Optional[str] = None, -) -> None: - global _client - if not _client: - if client_type == "hopsworks": - _client = hopsworks.Client() - elif client_type == "external": - _client = external.Client( - host, - port, - project, - engine, - region_name, - secrets_store, - hostname_verification, - trust_store_path, - cert_folder, - api_key_file, - api_key_value, - ) - - -def get_instance() -> Union[hopsworks.Client, external.Client]: - global _client - if _client: - return _client - raise Exception("Couldn't find client. Try reconnecting to Hopsworks.") - - -def stop() -> None: - global _client - if _client: - _client._close() - _client = None diff --git a/python/hopsworks_common/client/auth.py b/python/hopsworks_common/client/auth.py deleted file mode 100644 index f90b06cf4..000000000 --- a/python/hopsworks_common/client/auth.py +++ /dev/null @@ -1,52 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from __future__ import annotations - -import requests - - -class BearerAuth(requests.auth.AuthBase): - """Class to encapsulate a Bearer token.""" - - def __init__(self, token: str) -> None: - self._token = token.strip() - - def __call__(self, r: requests.Request) -> requests.Request: - r.headers["Authorization"] = "Bearer " + self._token - return r - - -class ApiKeyAuth(requests.auth.AuthBase): - """Class to encapsulate an API key.""" - - def __init__(self, token: str) -> None: - self._token = token.strip() - - def __call__(self, r: requests.Request) -> requests.Request: - r.headers["Authorization"] = "ApiKey " + self._token - return r - - -class OnlineStoreKeyAuth(requests.auth.AuthBase): - """Class to encapsulate an API key.""" - - def __init__(self, token): - self._token = token.strip() - - def __call__(self, r): - r.headers["X-API-KEY"] = self._token - return r diff --git a/python/hopsworks_common/client/base.py b/python/hopsworks_common/client/base.py deleted file mode 100644 index 7c7b4e602..000000000 --- a/python/hopsworks_common/client/base.py +++ /dev/null @@ -1,293 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from __future__ import annotations - -import base64 -import os -import textwrap -import time -from pathlib import Path - -import furl -import requests -import urllib3 -from hopsworks_common.client import auth, exceptions -from hopsworks_common.decorators import connected - - -try: - import jks -except ImportError: - pass - - -urllib3.disable_warnings(urllib3.exceptions.SecurityWarning) -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - - -class Client: - TOKEN_FILE = "token.jwt" - TOKEN_EXPIRED_RETRY_INTERVAL = 0.6 - TOKEN_EXPIRED_MAX_RETRIES = 10 - - APIKEY_FILE = "api.key" - REST_ENDPOINT = "REST_ENDPOINT" - DEFAULT_DATABRICKS_ROOT_VIRTUALENV_ENV = "DEFAULT_DATABRICKS_ROOT_VIRTUALENV_ENV" - HOPSWORKS_PUBLIC_HOST = "HOPSWORKS_PUBLIC_HOST" - - def _get_verify(self, verify, trust_store_path): - """Get verification method for sending HTTP requests to Hopsworks. - - Credit to https://gist.github.com/gdamjan/55a8b9eec6cf7b771f92021d93b87b2c - - :param verify: perform hostname verification, 'true' or 'false' - :type verify: str - :param trust_store_path: path of the truststore locally if it was uploaded manually to - the external environment such as AWS Sagemaker - :type trust_store_path: str - :return: if verify is true and the truststore is provided, then return the trust store location - if verify is true but the truststore wasn't provided, then return true - if verify is false, then return false - :rtype: str or boolean - """ - if verify == "true": - if trust_store_path is not None: - return trust_store_path - else: - return True - - return False - - def _get_host_port_pair(self): - """ - Removes "http or https" from the rest endpoint and returns a list - [endpoint, port], where endpoint is on the format /path.. without http:// - - :return: a list [endpoint, port] - :rtype: list - """ - endpoint = self._base_url - if "http" in endpoint: - last_index = endpoint.rfind("/") - endpoint = endpoint[last_index + 1 :] - host, port = endpoint.split(":") - return host, port - - def _read_jwt(self): - """Retrieve jwt from local container.""" - return self._read_file(self.TOKEN_FILE) - - def _read_apikey(self): - """Retrieve apikey from local container.""" - return self._read_file(self.APIKEY_FILE) - - def _read_file(self, secret_file): - """Retrieve secret from local container.""" - with open(os.path.join(self._secrets_dir, secret_file), "r") as secret: - return secret.read() - - def _get_credentials(self, project_id): - """Makes a REST call to hopsworks for getting the project user certificates needed to connect to services such as Hive - - :param project_id: id of the project - :type project_id: int - :return: JSON response with credentials - :rtype: dict - """ - return self._send_request("GET", ["project", project_id, "credentials"]) - - def _write_pem_file(self, content: str, path: str) -> None: - with open(path, "w") as f: - f.write(content) - - @connected - def _send_request( - self, - method, - path_params, - query_params=None, - headers=None, - data=None, - stream=False, - files=None, - with_base_path_params=True, - ): - """Send REST request to Hopsworks. - - Uses the client it is executed from. Path parameters are url encoded automatically. - - :param method: 'GET', 'PUT' or 'POST' - :type method: str - :param path_params: a list of path params to build the query url from starting after - the api resource, for example `["project", 119, "featurestores", 67]`. - :type path_params: list - :param query_params: A dictionary of key/value pairs to be added as query parameters, - defaults to None - :type query_params: dict, optional - :param headers: Additional header information, defaults to None - :type headers: dict, optional - :param data: The payload as a python dictionary to be sent as json, defaults to None - :type data: dict, optional - :param stream: Set if response should be a stream, defaults to False - :type stream: boolean, optional - :param files: dictionary for multipart encoding upload - :type files: dict, optional - :raises RestAPIError: Raised when request wasn't correctly received, understood or accepted - :return: Response json - :rtype: dict - """ - f_url = furl.furl(self._base_url) - if with_base_path_params: - base_path_params = ["hopsworks-api", "api"] - f_url.path.segments = base_path_params + path_params - else: - f_url.path.segments = path_params - url = str(f_url) - - request = requests.Request( - method, - url=url, - headers=headers, - data=data, - params=query_params, - auth=self._auth, - files=files, - ) - - prepped = self._session.prepare_request(request) - response = self._session.send(prepped, verify=self._verify, stream=stream) - - if response.status_code == 401 and self.REST_ENDPOINT in os.environ: - # refresh token and retry request - only on hopsworks - response = self._retry_token_expired( - request, stream, self.TOKEN_EXPIRED_RETRY_INTERVAL, 1 - ) - - if response.status_code // 100 != 2: - raise exceptions.RestAPIError(url, response) - - if stream: - return response - else: - # handle different success response codes - if len(response.content) == 0: - return None - return response.json() - - def _retry_token_expired(self, request, stream, wait, retries): - """Refresh the JWT token and retry the request. Only on Hopsworks. - As the token might take a while to get refreshed. Keep trying - """ - # Sleep the waited time before re-issuing the request - time.sleep(wait) - - self._auth = auth.BearerAuth(self._read_jwt()) - # Update request with the new token - request.auth = self._auth - prepped = self._session.prepare_request(request) - response = self._session.send(prepped, verify=self._verify, stream=stream) - - if response.status_code == 401 and retries < self.TOKEN_EXPIRED_MAX_RETRIES: - # Try again. - return self._retry_token_expired(request, stream, wait * 2, retries + 1) - else: - # If the number of retries have expired, the _send_request method - # will throw an exception to the user as part of the status_code validation. - return response - - def _close(self): - """Closes a client. Can be implemented for clean up purposes, not mandatory.""" - self._connected = False - - def _write_pem( - self, keystore_path, keystore_pw, truststore_path, truststore_pw, prefix - ): - ks = jks.KeyStore.load(Path(keystore_path), keystore_pw, try_decrypt_keys=True) - ts = jks.KeyStore.load( - Path(truststore_path), truststore_pw, try_decrypt_keys=True - ) - - ca_chain_path = os.path.join("/tmp", f"{prefix}_ca_chain.pem") - self._write_ca_chain(ks, ts, ca_chain_path) - - client_cert_path = os.path.join("/tmp", f"{prefix}_client_cert.pem") - self._write_client_cert(ks, client_cert_path) - - client_key_path = os.path.join("/tmp", f"{prefix}_client_key.pem") - self._write_client_key(ks, client_key_path) - - return ca_chain_path, client_cert_path, client_key_path - - def _write_ca_chain(self, ks, ts, ca_chain_path): - """ - Converts JKS keystore and truststore file into ca chain PEM to be compatible with Python libraries - """ - ca_chain = "" - for store in [ks, ts]: - for _, c in store.certs.items(): - ca_chain = ca_chain + self._bytes_to_pem_str(c.cert, "CERTIFICATE") - - with Path(ca_chain_path).open("w") as f: - f.write(ca_chain) - - def _write_client_cert(self, ks, client_cert_path): - """ - Converts JKS keystore file into client cert PEM to be compatible with Python libraries - """ - client_cert = "" - for _, pk in ks.private_keys.items(): - for c in pk.cert_chain: - client_cert = client_cert + self._bytes_to_pem_str(c[1], "CERTIFICATE") - - with Path(client_cert_path).open("w") as f: - f.write(client_cert) - - def _write_client_key(self, ks, client_key_path): - """ - Converts JKS keystore file into client key PEM to be compatible with Python libraries - """ - client_key = "" - for _, pk in ks.private_keys.items(): - client_key = client_key + self._bytes_to_pem_str( - pk.pkey_pkcs8, "PRIVATE KEY" - ) - - with Path(client_key_path).open("w") as f: - f.write(client_key) - - def _bytes_to_pem_str(self, der_bytes, pem_type): - """ - Utility function for creating PEM files - - Args: - der_bytes: DER encoded bytes - pem_type: type of PEM, e.g Certificate, Private key, or RSA private key - - Returns: - PEM String for a DER-encoded certificate or private key - """ - pem_str = "" - pem_str = pem_str + "-----BEGIN {}-----".format(pem_type) + "\n" - pem_str = ( - pem_str - + "\r\n".join( - textwrap.wrap(base64.b64encode(der_bytes).decode("ascii"), 64) - ) - + "\n" - ) - pem_str = pem_str + "-----END {}-----".format(pem_type) + "\n" - return pem_str diff --git a/python/hopsworks_common/client/exceptions.py b/python/hopsworks_common/client/exceptions.py deleted file mode 100644 index 4e8ba9b08..000000000 --- a/python/hopsworks_common/client/exceptions.py +++ /dev/null @@ -1,143 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from __future__ import annotations - -from enum import Enum -from typing import Any, Union - -import requests - - -class RestAPIError(Exception): - """REST Exception encapsulating the response object and url.""" - - class FeatureStoreErrorCode(int, Enum): - FEATURE_GROUP_COMMIT_NOT_FOUND = 270227 - STATISTICS_NOT_FOUND = 270228 - - def __eq__(self, other: Union[int, Any]) -> bool: - if isinstance(other, int): - return self.value == other - if isinstance(other, self.__class__): - return self is other - return False - - def __init__(self, url: str, response: requests.Response) -> None: - try: - error_object = response.json() - if isinstance(error_object, str): - error_object = {"errorMsg": error_object} - except Exception: - error_object = {} - message = ( - "Metadata operation error: (url: {}). Server response: \n" - "HTTP code: {}, HTTP reason: {}, body: {}, error code: {}, error msg: {}, user " - "msg: {}".format( - url, - response.status_code, - response.reason, - response.content, - error_object.get("errorCode", ""), - error_object.get("errorMsg", ""), - error_object.get("usrMsg", ""), - ) - ) - super().__init__(message) - self.url = url - self.response = response - - -class UnknownSecretStorageError(Exception): - """This exception will be raised if an unused secrets storage is passed as a parameter.""" - - -class FeatureStoreException(Exception): - """Generic feature store exception""" - - -class VectorDatabaseException(Exception): - # reason - REQUESTED_K_TOO_LARGE = "REQUESTED_K_TOO_LARGE" - REQUESTED_NUM_RESULT_TOO_LARGE = "REQUESTED_NUM_RESULT_TOO_LARGE" - OTHERS = "OTHERS" - - # info - REQUESTED_K_TOO_LARGE_INFO_K = "k" - REQUESTED_NUM_RESULT_TOO_LARGE_INFO_N = "n" - - def __init__(self, reason: str, message: str, info: str) -> None: - super().__init__(message) - self._info = info - self._reason = reason - - @property - def reason(self) -> str: - return self._reason - - @property - def info(self) -> str: - return self._info - - -class DataValidationException(FeatureStoreException): - """Raised when data validation fails only when using "STRICT" validation ingestion policy.""" - - def __init__(self, message: str) -> None: - super().__init__(message) - - -class ExternalClientError(TypeError): - """Raised when external client cannot be initialized due to missing arguments.""" - - def __init__(self, missing_argument: str) -> None: - message = ( - "{0} cannot be of type NoneType, {0} is a non-optional " - "argument to connect to hopsworks from an external environment." - ).format(missing_argument) - super().__init__(message) - - -class GitException(Exception): - """Generic git exception""" - - -class JobException(Exception): - """Generic job exception""" - - -class EnvironmentException(Exception): - """Generic python environment exception""" - - -class KafkaException(Exception): - """Generic kafka exception""" - - -class DatasetException(Exception): - """Generic dataset exception""" - - -class ProjectException(Exception): - """Generic project exception""" - - -class OpenSearchException(Exception): - """Generic opensearch exception""" - - -class JobExecutionException(Exception): - """Generic job executions exception""" diff --git a/python/hopsworks_common/client/external.py b/python/hopsworks_common/client/external.py deleted file mode 100644 index c01045af8..000000000 --- a/python/hopsworks_common/client/external.py +++ /dev/null @@ -1,407 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from __future__ import annotations - -import base64 -import json -import logging -import os - -import boto3 -import requests -from hopsworks_common.client import auth, base, exceptions -from hopsworks_common.client.exceptions import FeatureStoreException - - -try: - from pyspark.sql import SparkSession -except ImportError: - pass - - -_logger = logging.getLogger(__name__) - - -class Client(base.Client): - DEFAULT_REGION = "default" - SECRETS_MANAGER = "secretsmanager" - PARAMETER_STORE = "parameterstore" - LOCAL_STORE = "local" - - def __init__( - self, - host, - port, - project, - engine, - region_name, - secrets_store, - hostname_verification, - trust_store_path, - cert_folder, - api_key_file, - api_key_value, - ): - """Initializes a client in an external environment such as AWS Sagemaker.""" - _logger.info("Initializing external client") - if not host: - raise exceptions.ExternalClientError("host") - - self._host = host - self._port = port - self._base_url = "https://" + self._host + ":" + str(self._port) - _logger.info("Base URL: %s", self._base_url) - self._project_name = project - if project is not None: - project_info = self._get_project_info(project) - self._project_id = str(project_info["projectId"]) - _logger.debug("Setting Project ID: %s", self._project_id) - else: - self._project_id = None - _logger.debug("Project name: %s", self._project_name) - self._region_name = region_name or self.DEFAULT_REGION - _logger.debug("Region name: %s", self._region_name) - - if api_key_value is not None: - _logger.debug("Using provided API key value") - api_key = api_key_value - else: - _logger.debug("Querying secrets store for API key") - if secrets_store is None: - secrets_store = self.LOCAL_STORE - api_key = self._get_secret(secrets_store, "api-key", api_key_file) - - _logger.debug("Using api key to setup header authentification") - self._auth = auth.ApiKeyAuth(api_key) - - _logger.debug("Setting up requests session") - self._session = requests.session() - self._connected = True - - self._verify = self._get_verify(self._host, trust_store_path) - _logger.debug("Verify: %s", self._verify) - - self._cert_key = None - self._cert_folder_base = cert_folder - self._cert_folder = None - - if project is None: - return - - if engine == "python": - self.download_certs(project) - - elif engine == "spark": - # When using the Spark engine with metastore connection, the certificates - # are needed when the application starts (before user code is run) - # So in this case, we can't materialize the certificates on the fly. - _logger.debug("Running in Spark environment, initializing Spark session") - _spark_session = SparkSession.builder.enableHiveSupport().getOrCreate() - - self._validate_spark_configuration(_spark_session) - with open( - _spark_session.conf.get("spark.hadoop.hops.ssl.keystores.passwd.name"), - "r", - ) as f: - self._cert_key = f.read() - - self._trust_store_path = _spark_session.conf.get( - "spark.hadoop.hops.ssl.trustore.name" - ) - self._key_store_path = _spark_session.conf.get( - "spark.hadoop.hops.ssl.keystore.name" - ) - - elif engine == "spark-no-metastore": - _logger.debug( - "Running in Spark environment with no metastore, initializing Spark session" - ) - _spark_session = SparkSession.builder.getOrCreate() - self.download_certs(project) - - # Set credentials location in the Spark configuration - # Set other options in the Spark configuration - configuration_dict = { - "hops.ssl.trustore.name": self._trust_store_path, - "hops.ssl.keystore.name": self._key_store_path, - "hops.ssl.keystores.passwd.name": self._cert_key_path, - "fs.permissions.umask-mode": "0002", - "fs.hopsfs.impl": "io.hops.hopsfs.client.HopsFileSystem", - "hops.rpc.socket.factory.class.default": "io.hops.hadoop.shaded.org.apache.hadoop.net.HopsSSLSocketFactory", - "client.rpc.ssl.enabled.protocol": "TLSv1.2", - "hops.ssl.hostname.verifier": "ALLOW_ALL", - "hops.ipc.server.ssl.enabled": "true", - } - - for conf_key, conf_value in configuration_dict.items(): - _spark_session._jsc.hadoopConfiguration().set(conf_key, conf_value) - - def download_certs(self, project): - res = self._materialize_certs(self, project) - self._write_pem_file(res["caChain"], self._get_ca_chain_path()) - self._write_pem_file(res["clientCert"], self._get_client_cert_path()) - self._write_pem_file(res["clientKey"], self._get_client_key_path()) - return res - - def _materialize_certs(self, project): - if project != self._project_name: - self._project_name = project - _logger.debug("Project name: %s", self._project_name) - project_info = self._get_project_info(project) - self._project_id = str(project_info["projectId"]) - _logger.debug("Setting Project ID: %s", self._project_id) - - self._cert_folder = os.path.join(self._cert_folder_base, self._host, project) - self._trust_store_path = os.path.join(self._cert_folder, "trustStore.jks") - self._key_store_path = os.path.join(self._cert_folder, "keyStore.jks") - - if os.path.exists(self._cert_folder): - _logger.debug( - f"Running in Python environment, reading certificates from certificates folder {self._cert_folder_base}" - ) - _logger.debug("Found certificates: %s", os.listdir(self._cert_folder_base)) - else: - _logger.debug( - f"Running in Python environment, creating certificates folder {self._cert_folder_base}" - ) - os.makedirs(self._cert_folder, exist_ok=True) - - credentials = self._get_credentials(self._project_id) - self._write_b64_cert_to_bytes( - str(credentials["kStore"]), - path=self._get_jks_key_store_path(), - ) - self._write_b64_cert_to_bytes( - str(credentials["tStore"]), - path=self._get_jks_trust_store_path(), - ) - - self._cert_key = str(credentials["password"]) - self._cert_key_path = os.path.join(self._cert_folder, "material_passwd") - with open(self._cert_key_path, "w") as f: - f.write(str(credentials["password"])) - - # Return the credentials object for the Python engine to materialize the pem files. - return credentials - - def _validate_spark_configuration(self, _spark_session): - exception_text = "Spark is misconfigured for communication with Hopsworks, missing or invalid property: " - - configuration_dict = { - "spark.hadoop.hops.ssl.trustore.name": None, - "spark.hadoop.hops.rpc.socket.factory.class.default": "io.hops.hadoop.shaded.org.apache.hadoop.net.HopsSSLSocketFactory", - "spark.serializer": "org.apache.spark.serializer.KryoSerializer", - "spark.hadoop.hops.ssl.hostname.verifier": "ALLOW_ALL", - "spark.hadoop.hops.ssl.keystore.name": None, - "spark.hadoop.fs.hopsfs.impl": "io.hops.hopsfs.client.HopsFileSystem", - "spark.hadoop.hops.ssl.keystores.passwd.name": None, - "spark.hadoop.hops.ipc.server.ssl.enabled": "true", - "spark.hadoop.client.rpc.ssl.enabled.protocol": "TLSv1.2", - "spark.hadoop.hive.metastore.uris": None, - "spark.sql.hive.metastore.jars": None, - } - _logger.debug("Configuration dict: %s", configuration_dict) - - for key, value in configuration_dict.items(): - _logger.debug("Validating key: %s", key) - if not ( - _spark_session.conf.get(key, "not_found") != "not_found" - and (value is None or _spark_session.conf.get(key, None) == value) - ): - raise FeatureStoreException(exception_text + key) - - def _close(self): - """Closes a client and deletes certificates.""" - _logger.info("Closing external client and cleaning up certificates.") - self._connected = False - if self._cert_folder is None: - _logger.debug("No certificates to clean up.") - # On external Spark clients (Databricks, Spark Cluster), - # certificates need to be provided before the Spark application starts. - return - - # Clean up only on AWS - _logger.debug("Cleaning up certificates. AWS only.") - self._cleanup_file(self._get_jks_key_store_path()) - self._cleanup_file(self._get_jks_trust_store_path()) - self._cleanup_file(os.path.join(self._cert_folder, "material_passwd")) - self._cleanup_file(self._get_ca_chain_path()) - self._cleanup_file(self._get_client_cert_path()) - self._cleanup_file(self._get_client_key_path()) - - try: - # delete project level - os.rmdir(self._cert_folder) - # delete host level - os.rmdir(os.path.dirname(self._cert_folder)) - # on AWS base dir will be empty, and can be deleted otherwise raises OSError - os.rmdir(self._cert_folder_base) - except OSError: - pass - - self._cert_folder = None - - def _get_jks_trust_store_path(self): - _logger.debug("Getting trust store path: %s", self._trust_store_path) - return self._trust_store_path - - def _get_jks_key_store_path(self): - _logger.debug("Getting key store path: %s", self._key_store_path) - return self._key_store_path - - def _get_ca_chain_path(self, project_name=None) -> str: - if project_name is None: - project_name = self._project_name - path = os.path.join( - self._cert_folder_base, self._host, project_name, "ca_chain.pem" - ) - _logger.debug(f"Getting ca chain path {path}") - return path - - def _get_client_cert_path(self, project_name=None) -> str: - if project_name is None: - project_name = self._project_name - path = os.path.join( - self._cert_folder_base, self._host, project_name, "client_cert.pem" - ) - _logger.debug(f"Getting client cert path {path}") - return path - - def _get_client_key_path(self, project_name=None) -> str: - if project_name is None: - project_name = self._project_name - path = os.path.join( - self._cert_folder_base, self._host, project_name, "client_key.pem" - ) - _logger.debug(f"Getting client key path {path}") - return path - - def _get_secret(self, secrets_store, secret_key=None, api_key_file=None): - """Returns secret value from the AWS Secrets Manager or Parameter Store. - - :param secrets_store: the underlying secrets storage to be used, e.g. `secretsmanager` or `parameterstore` - :type secrets_store: str - :param secret_key: key for the secret value, e.g. `api-key`, `cert-key`, `trust-store`, `key-store`, defaults to None - :type secret_key: str, optional - :param api_key_file: path to a file containing an api key, defaults to None - :type api_key_file: str optional - :raises hsfs.client.exceptions.ExternalClientError: `api_key_file` needs to be set for local mode - :raises hsfs.client.exceptions.UnknownSecretStorageError: Provided secrets storage not supported - :return: secret - :rtype: str - """ - _logger.debug(f"Querying secrets store {secrets_store} for secret {secret_key}") - if secrets_store == self.SECRETS_MANAGER: - return self._query_secrets_manager(secret_key) - elif secrets_store == self.PARAMETER_STORE: - return self._query_parameter_store(secret_key) - elif secrets_store == self.LOCAL_STORE: - if not api_key_file: - raise exceptions.ExternalClientError( - "api_key_file needs to be set for local mode" - ) - _logger.debug(f"Reading api key from {api_key_file}") - with open(api_key_file) as f: - return f.readline().strip() - else: - raise exceptions.UnknownSecretStorageError( - "Secrets storage " + secrets_store + " is not supported." - ) - - def _query_secrets_manager(self, secret_key): - _logger.debug("Querying secrets manager for secret key: %s", secret_key) - secret_name = "hopsworks/role/" + self._assumed_role() - args = {"service_name": "secretsmanager"} - region_name = self._get_region() - if region_name: - args["region_name"] = region_name - client = boto3.client(**args) - get_secret_value_response = client.get_secret_value(SecretId=secret_name) - return json.loads(get_secret_value_response["SecretString"])[secret_key] - - def _assumed_role(self): - _logger.debug("Getting assumed role") - client = boto3.client("sts") - response = client.get_caller_identity() - # arns for assumed roles in SageMaker follow the following schema - # arn:aws:sts::123456789012:assumed-role/my-role-name/my-role-session-name - local_identifier = response["Arn"].split(":")[-1].split("/") - if len(local_identifier) != 3 or local_identifier[0] != "assumed-role": - raise Exception( - "Failed to extract assumed role from arn: " + response["Arn"] - ) - return local_identifier[1] - - def _get_region(self): - if self._region_name != self.DEFAULT_REGION: - _logger.debug(f"Region name is not default, returning {self._region_name}") - return self._region_name - else: - _logger.debug("Region name is default, returning None") - return None - - def _query_parameter_store(self, secret_key): - _logger.debug("Querying parameter store for secret key: %s", secret_key) - args = {"service_name": "ssm"} - region_name = self._get_region() - if region_name: - args["region_name"] = region_name - client = boto3.client(**args) - name = "/hopsworks/role/" + self._assumed_role() + "/type/" + secret_key - return client.get_parameter(Name=name, WithDecryption=True)["Parameter"][ - "Value" - ] - - def _get_project_info(self, project_name): - """Makes a REST call to hopsworks to get all metadata of a project for the provided project. - - :param project_name: the name of the project - :type project_name: str - :return: JSON response with project info - :rtype: dict - """ - _logger.debug("Getting project info for project: %s", project_name) - return self._send_request("GET", ["project", "getProjectInfo", project_name]) - - def _write_b64_cert_to_bytes(self, b64_string, path): - """Converts b64 encoded certificate to bytes file . - - :param b64_string: b64 encoded string of certificate - :type b64_string: str - :param path: path where file is saved, including file name. e.g. /path/key-store.jks - :type path: str - """ - _logger.debug(f"Writing b64 encoded certificate to {path}") - with open(path, "wb") as f: - cert_b64 = base64.b64decode(b64_string) - f.write(cert_b64) - - def _cleanup_file(self, file_path): - """Removes local files with `file_path`.""" - _logger.debug(f"Cleaning up file {file_path}") - try: - os.remove(file_path) - except OSError: - pass - - def replace_public_host(self, url): - """no need to replace as we are already in external client""" - return url - - @property - def host(self): - return self._host diff --git a/python/hopsworks_common/client/hopsworks.py b/python/hopsworks_common/client/hopsworks.py deleted file mode 100644 index ddc81fc20..000000000 --- a/python/hopsworks_common/client/hopsworks.py +++ /dev/null @@ -1,236 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import base64 -import os -import textwrap -from pathlib import Path - -import requests -from hopsworks_common.client import auth, base - - -try: - import jks -except ImportError: - pass - - -class Client(base.Client): - REQUESTS_VERIFY = "REQUESTS_VERIFY" - DOMAIN_CA_TRUSTSTORE_PEM = "DOMAIN_CA_TRUSTSTORE_PEM" - PROJECT_ID = "HOPSWORKS_PROJECT_ID" - PROJECT_NAME = "HOPSWORKS_PROJECT_NAME" - HADOOP_USER_NAME = "HADOOP_USER_NAME" - MATERIAL_DIRECTORY = "MATERIAL_DIRECTORY" - HDFS_USER = "HDFS_USER" - T_CERTIFICATE = "t_certificate" - K_CERTIFICATE = "k_certificate" - TRUSTSTORE_SUFFIX = "__tstore.jks" - KEYSTORE_SUFFIX = "__kstore.jks" - PEM_CA_CHAIN = "ca_chain.pem" - CERT_KEY_SUFFIX = "__cert.key" - MATERIAL_PWD = "material_passwd" - SECRETS_DIR = "SECRETS_DIR" - - def __init__(self): - """Initializes a client being run from a job/notebook directly on Hopsworks.""" - self._base_url = self._get_hopsworks_rest_endpoint() - self._host, self._port = self._get_host_port_pair() - self._secrets_dir = ( - os.environ[self.SECRETS_DIR] if self.SECRETS_DIR in os.environ else "" - ) - self._cert_key = self._get_cert_pw() - trust_store_path = self._get_trust_store_path() - hostname_verification = ( - os.environ[self.REQUESTS_VERIFY] - if self.REQUESTS_VERIFY in os.environ - else "true" - ) - self._project_id = os.environ[self.PROJECT_ID] - self._project_name = self._project_name() - try: - self._auth = auth.BearerAuth(self._read_jwt()) - except FileNotFoundError: - self._auth = auth.ApiKeyAuth(self._read_apikey()) - self._verify = self._get_verify(hostname_verification, trust_store_path) - self._session = requests.session() - - self._connected = True - - credentials = self._get_credentials(self._project_id) - - self._write_pem_file(credentials["caChain"], self._get_ca_chain_path()) - self._write_pem_file(credentials["clientCert"], self._get_client_cert_path()) - self._write_pem_file(credentials["clientKey"], self._get_client_key_path()) - - def _get_hopsworks_rest_endpoint(self): - """Get the hopsworks REST endpoint for making requests to the REST API.""" - return os.environ[self.REST_ENDPOINT] - - def _get_trust_store_path(self): - """Convert truststore from jks to pem and return the location""" - ca_chain_path = Path(self.PEM_CA_CHAIN) - if not ca_chain_path.exists(): - ks = jks.KeyStore.load( - self._get_jks_key_store_path(), self._cert_key, try_decrypt_keys=True - ) - ts = jks.KeyStore.load( - self._get_jks_trust_store_path(), self._cert_key, try_decrypt_keys=True - ) - self._write_ca_chain(ks, ts, ca_chain_path) - return str(ca_chain_path) - - def _get_ca_chain_path(self, project_name=None) -> str: - return os.path.join("/tmp", "ca_chain.pem") - - def _get_client_cert_path(self, project_name=None) -> str: - return os.path.join("/tmp", "client_cert.pem") - - def _get_client_key_path(self, project_name=None) -> str: - return os.path.join("/tmp", "client_key.pem") - - def _write_ca_chain(self, ca_chain_path): - """ - Converts JKS trustore file into PEM to be compatible with Python libraries - """ - keystore_pw = self._cert_key - keystore_ca_cert = self._convert_jks_to_pem( - self._get_jks_key_store_path(), keystore_pw - ) - truststore_ca_cert = self._convert_jks_to_pem( - self._get_jks_trust_store_path(), keystore_pw - ) - - with ca_chain_path.open("w") as f: - f.write(keystore_ca_cert + truststore_ca_cert) - - def _convert_jks_to_pem(self, jks_path, keystore_pw): - """ - Converts a keystore JKS that contains client private key, - client certificate and CA certificate that was used to - sign the certificate to PEM format and returns the CA certificate. - Args: - :jks_path: path to the JKS file - :pw: password for decrypting the JKS file - Returns: - strings: (ca_cert) - """ - # load the keystore and decrypt it with password - ks = jks.KeyStore.load(jks_path, keystore_pw, try_decrypt_keys=True) - ca_certs = "" - - # Convert CA Certificates into PEM format and append to string - for _alias, c in ks.certs.items(): - ca_certs = ca_certs + self._bytes_to_pem_str(c.cert, "CERTIFICATE") - return ca_certs - - def _bytes_to_pem_str(self, der_bytes, pem_type): - """ - Utility function for creating PEM files - - Args: - der_bytes: DER encoded bytes - pem_type: type of PEM, e.g Certificate, Private key, or RSA private key - - Returns: - PEM String for a DER-encoded certificate or private key - """ - pem_str = "" - pem_str = pem_str + "-----BEGIN {}-----".format(pem_type) + "\n" - pem_str = ( - pem_str - + "\r\n".join( - textwrap.wrap(base64.b64encode(der_bytes).decode("ascii"), 64) - ) - + "\n" - ) - pem_str = pem_str + "-----END {}-----".format(pem_type) + "\n" - return pem_str - - def _get_jks_trust_store_path(self): - """ - Get truststore location - - Returns: - truststore location - """ - t_certificate = Path(self.T_CERTIFICATE) - if t_certificate.exists(): - return str(t_certificate) - else: - username = os.environ[self.HADOOP_USER_NAME] - material_directory = Path(os.environ[self.MATERIAL_DIRECTORY]) - return str(material_directory.joinpath(username + self.TRUSTSTORE_SUFFIX)) - - def _get_jks_key_store_path(self): - """ - Get keystore location - - Returns: - keystore location - """ - k_certificate = Path(self.K_CERTIFICATE) - if k_certificate.exists(): - return str(k_certificate) - else: - username = os.environ[self.HADOOP_USER_NAME] - material_directory = Path(os.environ[self.MATERIAL_DIRECTORY]) - return str(material_directory.joinpath(username + self.KEYSTORE_SUFFIX)) - - def _project_name(self): - try: - return os.environ[self.PROJECT_NAME] - except KeyError: - pass - - hops_user = self._project_user() - # project users have username project__user: - hops_user_split = hops_user.split("__") - project = hops_user_split[0] - return project - - def _project_user(self): - try: - hops_user = os.environ[self.HADOOP_USER_NAME] - except KeyError: - hops_user = os.environ[self.HDFS_USER] - return hops_user - - def _get_cert_pw(self): - """ - Get keystore password from local container - - Returns: - Certificate password - """ - pwd_path = Path(self.MATERIAL_PWD) - if not pwd_path.exists(): - username = os.environ[self.HADOOP_USER_NAME] - material_directory = Path(os.environ[self.MATERIAL_DIRECTORY]) - pwd_path = material_directory.joinpath(username + self.CERT_KEY_SUFFIX) - - with pwd_path.open() as f: - return f.read() - - def replace_public_host(self, url): - """replace hostname to public hostname set in HOPSWORKS_PUBLIC_HOST""" - ui_url = url._replace(netloc=os.environ[self.HOPSWORKS_PUBLIC_HOST]) - return ui_url - - @property - def host(self): - return self._host diff --git a/python/hopsworks_common/client/online_store_rest_client.py b/python/hopsworks_common/client/online_store_rest_client.py deleted file mode 100644 index 03d77471c..000000000 --- a/python/hopsworks_common/client/online_store_rest_client.py +++ /dev/null @@ -1,385 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from __future__ import annotations - -import logging -from typing import Any, Dict, List, Optional, Union -from warnings import warn - -import requests -import requests.adapters -from furl import furl -from hopsworks_common import client -from hopsworks_common.client.exceptions import FeatureStoreException -from hopsworks_common.core import variable_api - - -_logger = logging.getLogger(__name__) - -_online_store_rest_client = None - - -def init_or_reset_online_store_rest_client( - transport: Optional[ - Union[requests.adapters.HTTPAdapter, requests.adapters.BaseAdapter] - ] = None, - optional_config: Optional[Dict[str, Any]] = None, - reset_client: bool = False, -): - global _online_store_rest_client - if not _online_store_rest_client: - _online_store_rest_client = OnlineStoreRestClientSingleton( - transport=transport, optional_config=optional_config - ) - elif reset_client: - _online_store_rest_client.reset_client( - transport=transport, optional_config=optional_config - ) - else: - _logger.warning( - "Online Store Rest Client is already initialised. To reset connection or/and override configuration, " - + "use reset_online_store_rest_client flag.", - stacklevel=2, - ) - - -def get_instance() -> OnlineStoreRestClientSingleton: - global _online_store_rest_client - if _online_store_rest_client is None: - _logger.warning( - "Online Store Rest Client is not initialised. Initialising with default configuration." - ) - _online_store_rest_client = OnlineStoreRestClientSingleton() - _logger.debug("Accessing global Online Store Rest Client instance.") - return _online_store_rest_client - - -class OnlineStoreRestClientSingleton: - HOST = "host" - PORT = "port" - VERIFY_CERTS = "verify_certs" - USE_SSL = "use_ssl" - CA_CERTS = "ca_certs" - HTTP_AUTHORIZATION = "http_authorization" - TIMEOUT = "timeout" - SERVER_API_VERSION = "server_api_version" - API_KEY = "api_key" - _DEFAULT_ONLINE_STORE_REST_CLIENT_PORT = 4406 - _DEFAULT_ONLINE_STORE_REST_CLIENT_TIMEOUT_SECOND = 2 - _DEFAULT_ONLINE_STORE_REST_CLIENT_VERIFY_CERTS = True - _DEFAULT_ONLINE_STORE_REST_CLIENT_USE_SSL = True - _DEFAULT_ONLINE_STORE_REST_CLIENT_SERVER_API_VERSION = "0.1.0" - _DEFAULT_ONLINE_STORE_REST_CLIENT_HTTP_AUTHORIZATION = "X-API-KEY" - - def __init__( - self, - transport: Optional[ - Union[requests.adapaters.HTTPadapter, requests.adapters.BaseAdapter] - ] = None, - optional_config: Optional[Dict[str, Any]] = None, - ): - _logger.debug( - f"Initialising Online Store Rest Client {'with optional configuration' if optional_config else ''}." - ) - if optional_config: - _logger.debug(f"Optional Config: {optional_config!r}") - self._check_hopsworks_connection() - self.variable_api = variable_api.VariableApi() - self._auth: client.auth.OnlineStoreKeyAuth - self._session: requests.Session - self._current_config: Dict[str, Any] - self._base_url: furl - self._setup_rest_client( - transport=transport, - optional_config=optional_config, - use_current_config=False, - ) - self.is_connected() - - def reset_client( - self, - transport: Optional[ - Union[requests.adapters.HttpAdapter, requests.adapters.BaseAdapter] - ] = None, - optional_config: Optional[Dict[str, Any]] = None, - ): - _logger.debug( - f"Resetting Online Store Rest Client {'with optional configuration' if optional_config else ''}." - ) - if optional_config: - _logger.debug(f"Optional Config: {optional_config}") - self._check_hopsworks_connection() - if hasattr(self, "_session") and self._session: - _logger.debug("Closing existing session.") - self._session.close() - delattr(self, "_session") - self._setup_rest_client( - transport=transport, - optional_config=optional_config, - use_current_config=False if optional_config else True, - ) - - def _setup_rest_client( - self, - transport: Optional[ - Union[requests.adapters.HttpAdapter, requests.adapters.BaseAdapter] - ] = None, - optional_config: Optional[Dict[str, Any]] = None, - use_current_config: bool = True, - ): - _logger.debug("Setting up Online Store Rest Client.") - if optional_config and not isinstance(optional_config, dict): - raise ValueError( - "optional_config must be a dictionary. See documentation for allowed keys and values." - ) - _logger.debug("Optional Config: %s", optional_config) - if not use_current_config: - _logger.debug( - "Retrieving default configuration for Online Store REST Client." - ) - self._current_config = self._get_default_client_config() - if optional_config: - _logger.debug( - "Updating default configuration with provided optional configuration." - ) - self._current_config.update(optional_config) - - self._set_auth(optional_config) - if not hasattr(self, "_session") or not self._session: - _logger.debug("Initialising new requests session.") - self._session = requests.Session() - else: - raise ValueError( - "Use the init_or_reset_online_store_connection method with reset_connection flag set " - + "to True to reset the online_store_client_connection" - ) - if transport is not None: - _logger.debug("Setting custom transport adapter.") - self._session.mount("https://", transport) - self._session.mount("http://", transport) - - if not self._current_config[self.VERIFY_CERTS]: - _logger.warning( - "Disabling SSL certificate verification. This is not recommended for production environments." - ) - self._session.verify = False - else: - _logger.debug( - f"Setting SSL certificate verification using CA Certs path: {self._current_config[self.CA_CERTS]}" - ) - self._session.verify = self._current_config[self.CA_CERTS] - - # Set base_url - scheme = "https" if self._current_config[self.USE_SSL] else "http" - self._base_url = furl( - f"{scheme}://{self._current_config[self.HOST]}:{self._current_config[self.PORT]}/{self._current_config[self.SERVER_API_VERSION]}" - ) - - assert ( - self._session is not None - ), "Online Store REST Client failed to initialise." - assert ( - self._auth is not None - ), "Online Store REST Client Authentication failed to initialise. Check API Key." - assert ( - self._base_url is not None - ), "Online Store REST Client Base URL failed to initialise. Check host and port parameters." - assert ( - self._current_config is not None - ), "Online Store REST Client Configuration failed to initialise." - - def _get_default_client_config(self) -> Dict[str, Any]: - _logger.debug("Retrieving default configuration for Online Store REST Client.") - default_config = self._get_default_static_parameters_config() - default_config.update(self._get_default_dynamic_parameters_config()) - return default_config - - def _get_default_static_parameters_config(self) -> Dict[str, Any]: - _logger.debug( - "Retrieving default static configuration for Online Store REST Client." - ) - return { - self.TIMEOUT: self._DEFAULT_ONLINE_STORE_REST_CLIENT_TIMEOUT_SECOND, - self.VERIFY_CERTS: self._DEFAULT_ONLINE_STORE_REST_CLIENT_VERIFY_CERTS, - self.USE_SSL: self._DEFAULT_ONLINE_STORE_REST_CLIENT_USE_SSL, - self.SERVER_API_VERSION: self._DEFAULT_ONLINE_STORE_REST_CLIENT_SERVER_API_VERSION, - self.HTTP_AUTHORIZATION: self._DEFAULT_ONLINE_STORE_REST_CLIENT_HTTP_AUTHORIZATION, - } - - def _get_default_dynamic_parameters_config( - self, - ) -> Dict[str, Any]: - _logger.debug( - "Retrieving default dynamic configuration for Online Store REST Client." - ) - url = furl(self._get_rondb_rest_server_endpoint()) - _logger.debug(f"Default RonDB Rest Server host and port: {url.host}:{url.port}") - _logger.debug( - f"Using CA Certs from Hopsworks Client: {client.get_instance()._get_ca_chain_path()}" - ) - return { - self.HOST: url.host, - self.PORT: url.port, - self.CA_CERTS: client.get_instance()._get_ca_chain_path(), - } - - def _get_rondb_rest_server_endpoint(self) -> str: - """Retrieve RonDB Rest Server endpoint based on whether the client is running internally or externally. - - If the client is running externally, the endpoint is retrieved via the loadbalancer. - If the client is running internally, the endpoint is retrieved via (consul) service discovery. - The default port for the RonDB Rest Server is 4406 and always used unless specifying a different port - in the configuration. - - Returns: - str: RonDB Rest Server endpoint with default port. - """ - if client.get_instance()._is_external(): - _logger.debug( - "External Online Store REST Client : Retrieving RonDB Rest Server endpoint via loadbalancer." - ) - external_domain = self.variable_api.get_loadbalancer_external_domain() - if external_domain == "": - _logger.debug( - "External Online Store REST Client : Loadbalancer external domain is not set. Using client host as endpoint." - ) - external_domain = client.get_instance().host - default_url = f"https://{external_domain}:{self._DEFAULT_ONLINE_STORE_REST_CLIENT_PORT}" - _logger.debug( - f"External Online Store REST Client : Default RonDB Rest Server endpoint: {default_url}" - ) - return default_url - else: - _logger.debug( - "Internal Online Store REST Client : Retrieving RonDB Rest Server endpoint via service discovery." - ) - service_discovery_domain = self.variable_api.get_service_discovery_domain() - if service_discovery_domain == "": - raise FeatureStoreException("Service discovery domain is not set.") - default_url = f"https://rdrs.service.{service_discovery_domain}:{self._DEFAULT_ONLINE_STORE_REST_CLIENT_PORT}" - _logger.debug( - f"Internal Online Store REST Client : Default RonDB Rest Server endpoint: {default_url}" - ) - return default_url - - def send_request( - self, - method: str, - path_params: List[str], - headers: Optional[Dict[str, Any]] = None, - data: Optional[str] = None, - ) -> requests.Response: - url = self._base_url.copy() - url.path.segments.extend(path_params) - _logger.debug(f"Sending {method} request to {url.url}.") - _logger.debug(f"Provided Data: {data}") - _logger.debug(f"Provided Headers: {headers}") - prepped_request = self._session.prepare_request( - requests.Request( - method, url=url.url, headers=headers, data=data, auth=self.auth - ) - ) - timeout = self._current_config[self.TIMEOUT] - return self._session.send( - prepped_request, - # compatibility with 3.7 - timeout=timeout if timeout < 500 else timeout / 1000, - ) - - def _check_hopsworks_connection(self) -> None: - _logger.debug("Checking Hopsworks connection.") - assert ( - client.get_instance() is not None and client.get_instance()._connected - ), """Hopsworks Client is not connected. Please connect to Hopsworks cluster - via hopsworks.login or hsfs.connection before initialising the Online Store REST Client. - """ - _logger.debug("Hopsworks connection is active.") - - def _set_auth(self, optional_config: Optional[Dict[str, Any]] = None) -> None: - """Set authentication object for the Online Store REST Client. - - RonDB Rest Server uses Hopsworks Api Key to authenticate requests via the X-API-KEY header by default. - The api key determines the permissions of the user making the request for access to a given Feature Store. - """ - _logger.debug("Setting authentication for Online Store REST Client.") - if client.get_instance()._is_external(): - assert hasattr( - client.get_instance()._auth, "_token" - ), "External client must use API Key authentication. Contact your system administrator." - _logger.debug( - "External Online Store REST Client : Setting authentication using Hopsworks Client API Key." - ) - self._auth = client.auth.OnlineStoreKeyAuth( - client.get_instance()._auth._token - ) - elif isinstance(optional_config, dict) and optional_config.get( - self.API_KEY, False - ): - _logger.debug( - "Setting authentication using provided API Key from optional configuration." - ) - self._auth = client.auth.OnlineStoreKeyAuth(optional_config[self.API_KEY]) - elif hasattr(self, "_auth") and self._auth is not None: - _logger.debug( - "Authentication for Online Store REST Client is already set. Using existing authentication api key." - ) - else: - raise FeatureStoreException( - "RonDB Rest Server uses Hopsworks Api Key to authenticate request." - + f"Provide a configuration with the {self.API_KEY} key." - ) - - def is_connected(self): - """If Online Store Rest Client is initialised, ping RonDB Rest Server to ensure connection is active.""" - if self._session is None: - _logger.debug( - "Checking Online Store REST Client is connected. Session is not initialised." - ) - raise FeatureStoreException("Online Store REST Client is not initialised.") - - _logger.debug( - "Checking Online Store REST Client is connected. Pinging RonDB Rest Server." - ) - if not self.send_request("GET", ["ping"]): - warn("Ping failed, RonDB Rest Server is not reachable.", stacklevel=2) - return False - return True - - @property - def session(self) -> requests.Session: - """Requests session object used to send requests to the Online Store REST API.""" - return self._session - - @property - def base_url(self) -> furl: - """Base URL for the Online Store REST API. - - This the url of the RonDB REST Server and should not be confused with the Opensearch Vector DB which also serves as an Online Store for features belonging to Feature Group containing embeddings.""" - return self._base_url - - @property - def current_config(self) -> Dict[str, Any]: - """Current configuration of the Online Store REST Client.""" - return self._current_config - - @property - def auth(self) -> "client.auth.OnlineStoreKeyAuth": - """Authentication object used to authenticate requests to the Online Store REST API. - - Extends the requests.auth.AuthBase class. - """ - return self._auth diff --git a/python/hopsworks_common/core/constants.py b/python/hopsworks_common/core/constants.py deleted file mode 100644 index 4e522de6a..000000000 --- a/python/hopsworks_common/core/constants.py +++ /dev/null @@ -1,51 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import importlib.util - - -# Avro -HAS_FAST_AVRO: bool = importlib.util.find_spec("fastavro") is not None -HAS_AVRO: bool = importlib.util.find_spec("avro") is not None - -# Confluent Kafka -HAS_CONFLUENT_KAFKA: bool = importlib.util.find_spec("confluent_kafka") is not None -confluent_kafka_not_installed_message = ( - "Confluent Kafka package not found. " - "If you want to use Kafka with Hopsworks you can install the corresponding extras " - """`pip install hopsworks[python]` or `pip install "hopsworks[python]"` if using zsh. """ - "You can also install confluent-kafka directly in your environment e.g `pip install confluent-kafka`. " - "You will need to restart your kernel if applicable." -) -# Data Validation / Great Expectations -HAS_GREAT_EXPECTATIONS: bool = ( - importlib.util.find_spec("great_expectations") is not None -) -great_expectations_not_installed_message = ( - "Great Expectations package not found. " - "If you want to use data validation with Hopsworks you can install the corresponding extras " - """`pip install hopsworks[great_expectations]` or `pip install "hopsworks[great_expectations]"` if using zsh. """ - "You can also install great-expectations directly in your environment e.g `pip install great-expectations`. " - "You will need to restart your kernel if applicable." -) -initialise_expectation_suite_for_single_expectation_api_message = "Initialize Expectation Suite by attaching to a Feature Group to enable single expectation API" - -# Numpy -HAS_NUMPY: bool = importlib.util.find_spec("numpy") is not None - -# SQL packages -HAS_SQLALCHEMY: bool = importlib.util.find_spec("sqlalchemy") is not None -HAS_AIOMYSQL: bool = importlib.util.find_spec("aiomysql") is not None diff --git a/python/hopsworks_common/core/variable_api.py b/python/hopsworks_common/core/variable_api.py deleted file mode 100644 index 7b3c74575..000000000 --- a/python/hopsworks_common/core/variable_api.py +++ /dev/null @@ -1,117 +0,0 @@ -# -# Copyright 2022 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from __future__ import annotations - -import re -from typing import Optional, Tuple - -from hopsworks_common import client -from hopsworks_common.client.exceptions import RestAPIError - - -class VariableApi: - def __init__(self): - pass - - def get_variable(self, variable: str): - """Get the configured value of a variable. - - # Arguments - vairable: Name of the variable. - # Returns - The vairable's value - # Raises - `RestAPIError`: If unable to get the variable - """ - - _client = client.get_instance() - - path_params = ["variables", variable] - domain = _client._send_request("GET", path_params) - - return domain["successMessage"] - - def get_version(self, software: str) -> Optional[str]: - """Get version of a software component. - - # Arguments - software: Name of the software. - # Returns - The software's version, if the software is available, otherwise `None`. - # Raises - `RestAPIError`: If unable to get the version - """ - - _client = client.get_instance() - - path_params = ["variables", "versions"] - resp = _client._send_request("GET", path_params) - - for entry in resp: - if entry["software"] == software: - return entry["version"] - return None - - def parse_major_and_minor( - self, backend_version: str - ) -> Tuple[Optional[str], Optional[str]]: - """Extract major and minor version from full version. - - # Arguments - backend_version: The full version. - # Returns - (major, minor): The pair of major and minor parts of the version, or (None, None) if the version format is incorrect. - """ - - version_pattern = r"(\d+)\.(\d+)" - matches = re.match(version_pattern, backend_version) - - if matches is None: - return (None, None) - return matches.group(1), matches.group(2) - - def get_flyingduck_enabled(self) -> bool: - """Check if Flying Duck is enabled on the backend. - - # Returns - `True`: If flying duck is availalbe, `False` otherwise. - # Raises - `RestAPIError`: If unable to obtain the flag's value. - """ - return self.get_variable("enable_flyingduck") == "true" - - def get_loadbalancer_external_domain(self) -> str: - """Get domain of external loadbalancer. - - # Returns - `str`: The domain of external loadbalancer, if it is set up, otherwise empty string `""`. - """ - try: - return self.get_variable("loadbalancer_external_domain") - except RestAPIError: - return "" - - def get_service_discovery_domain(self) -> str: - """Get domain of service discovery server. - - # Returns - `str`: The domain of service discovery server, if it is set up, otherwise empty string `""`. - """ - try: - return self.get_variable("service_discovery_domain") - except RestAPIError: - return "" diff --git a/python/hopsworks_common/decorators.py b/python/hopsworks_common/decorators.py deleted file mode 100644 index fd83f290d..000000000 --- a/python/hopsworks_common/decorators.py +++ /dev/null @@ -1,86 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from __future__ import annotations - -import functools -import os - -from hopsworks_common.core.constants import ( - HAS_GREAT_EXPECTATIONS, - great_expectations_not_installed_message, -) - - -def not_connected(fn): - @functools.wraps(fn) - def if_not_connected(inst, *args, **kwargs): - if inst._connected: - raise HopsworksConnectionError - return fn(inst, *args, **kwargs) - - return if_not_connected - - -def connected(fn): - @functools.wraps(fn) - def if_connected(inst, *args, **kwargs): - if not inst._connected: - raise NoHopsworksConnectionError - return fn(inst, *args, **kwargs) - - return if_connected - - -class HopsworksConnectionError(Exception): - """Thrown when attempted to change connection attributes while connected.""" - - def __init__(self): - super().__init__( - "Connection is currently in use. Needs to be closed for modification." - ) - - -class NoHopsworksConnectionError(Exception): - """Thrown when attempted to perform operation on connection while not connected.""" - - def __init__(self): - super().__init__( - "Connection is not active. Needs to be connected for hopsworks operations." - ) - - -if os.environ.get("HOPSWORKS_RUN_WITH_TYPECHECK", False): - from typeguard import typechecked -else: - from typing import TypeVar - - _T = TypeVar("_T") - - def typechecked( - target: _T, - ) -> _T: - return target if target else typechecked - - -def uses_great_expectations(f): - @functools.wraps(f) - def g(*args, **kwds): - if not HAS_GREAT_EXPECTATIONS: - raise ModuleNotFoundError(great_expectations_not_installed_message) - return f(*args, **kwds) - - return g diff --git a/python/hsfs/client/__init__.py b/python/hsfs/client/__init__.py index 19e0feb8d..736b2006f 100644 --- a/python/hsfs/client/__init__.py +++ b/python/hsfs/client/__init__.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2020 Logical Clocks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,28 +13,58 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations -from hopsworks_common.client import ( - auth, - base, - exceptions, - external, - get_instance, - hopsworks, - init, - online_store_rest_client, - stop, -) - - -__all__ = [ - auth, - base, - exceptions, - external, - get_instance, - hopsworks, - init, - online_store_rest_client, - stop, -] +from typing import Literal, Optional, Union + +from hsfs.client import external, hopsworks + + +_client = None + + +def init( + client_type: Union[Literal["hopsworks"], Literal["external"]], + host: Optional[str] = None, + port: Optional[int] = None, + project: Optional[str] = None, + engine: Optional[str] = None, + region_name: Optional[str] = None, + secrets_store=None, + hostname_verification: Optional[bool] = None, + trust_store_path: Optional[str] = None, + cert_folder: Optional[str] = None, + api_key_file: Optional[str] = None, + api_key_value: Optional[str] = None, +) -> None: + global _client + if not _client: + if client_type == "hopsworks": + _client = hopsworks.Client() + elif client_type == "external": + _client = external.Client( + host, + port, + project, + engine, + region_name, + secrets_store, + hostname_verification, + trust_store_path, + cert_folder, + api_key_file, + api_key_value, + ) + + +def get_instance() -> Union[hopsworks.Client, external.Client]: + global _client + if _client: + return _client + raise Exception("Couldn't find client. Try reconnecting to Hopsworks.") + + +def stop() -> None: + global _client + _client._close() + _client = None diff --git a/python/hsfs/client/auth.py b/python/hsfs/client/auth.py index e912b1daf..1556a5b4c 100644 --- a/python/hsfs/client/auth.py +++ b/python/hsfs/client/auth.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2020 Logical Clocks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,16 +13,39 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations -from hopsworks_common.client.auth import ( - ApiKeyAuth, - BearerAuth, - OnlineStoreKeyAuth, -) +import requests -__all__ = [ - ApiKeyAuth, - BearerAuth, - OnlineStoreKeyAuth, -] +class BearerAuth(requests.auth.AuthBase): + """Class to encapsulate a Bearer token.""" + + def __init__(self, token: str) -> None: + self._token = token.strip() + + def __call__(self, r: requests.Request) -> requests.Request: + r.headers["Authorization"] = "Bearer " + self._token + return r + + +class ApiKeyAuth(requests.auth.AuthBase): + """Class to encapsulate an API key.""" + + def __init__(self, token: str) -> None: + self._token = token.strip() + + def __call__(self, r: requests.Request) -> requests.Request: + r.headers["Authorization"] = "ApiKey " + self._token + return r + + +class OnlineStoreKeyAuth(requests.auth.AuthBase): + """Class to encapsulate an API key.""" + + def __init__(self, token): + self._token = token.strip() + + def __call__(self, r): + r.headers["X-API-KEY"] = self._token + return r diff --git a/python/hsfs/client/base.py b/python/hsfs/client/base.py index 3ff35d800..eeb6eb369 100644 --- a/python/hsfs/client/base.py +++ b/python/hsfs/client/base.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2020 Logical Clocks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,12 +13,276 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations -from hopsworks_common.client.base import ( - Client, -) +import base64 +import os +import textwrap +import time +from pathlib import Path +import furl +import requests +import urllib3 +from hsfs.client import auth, exceptions +from hsfs.decorators import connected -__all__ = [ - Client, -] + +try: + import jks +except ImportError: + pass + + +urllib3.disable_warnings(urllib3.exceptions.SecurityWarning) +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + + +class Client: + TOKEN_FILE = "token.jwt" + TOKEN_EXPIRED_RETRY_INTERVAL = 0.6 + TOKEN_EXPIRED_MAX_RETRIES = 10 + + APIKEY_FILE = "api.key" + REST_ENDPOINT = "REST_ENDPOINT" + DEFAULT_DATABRICKS_ROOT_VIRTUALENV_ENV = "DEFAULT_DATABRICKS_ROOT_VIRTUALENV_ENV" + HOPSWORKS_PUBLIC_HOST = "HOPSWORKS_PUBLIC_HOST" + + def _get_verify(self, verify, trust_store_path): + """Get verification method for sending HTTP requests to Hopsworks. + + Credit to https://gist.github.com/gdamjan/55a8b9eec6cf7b771f92021d93b87b2c + + :param verify: perform hostname verification, 'true' or 'false' + :type verify: str + :param trust_store_path: path of the truststore locally if it was uploaded manually to + the external environment such as AWS Sagemaker + :type trust_store_path: str + :return: if verify is true and the truststore is provided, then return the trust store location + if verify is true but the truststore wasn't provided, then return true + if verify is false, then return false + :rtype: str or boolean + """ + if verify == "true": + if trust_store_path is not None: + return trust_store_path + else: + return True + + return False + + def _get_host_port_pair(self): + """ + Removes "http or https" from the rest endpoint and returns a list + [endpoint, port], where endpoint is on the format /path.. without http:// + + :return: a list [endpoint, port] + :rtype: list + """ + endpoint = self._base_url + if "http" in endpoint: + last_index = endpoint.rfind("/") + endpoint = endpoint[last_index + 1 :] + host, port = endpoint.split(":") + return host, port + + def _read_jwt(self): + """Retrieve jwt from local container.""" + return self._read_file(self.TOKEN_FILE) + + def _read_apikey(self): + """Retrieve apikey from local container.""" + return self._read_file(self.APIKEY_FILE) + + def _read_file(self, secret_file): + """Retrieve secret from local container.""" + with open(os.path.join(self._secrets_dir, secret_file), "r") as secret: + return secret.read() + + def _get_credentials(self, project_id): + """Makes a REST call to hopsworks for getting the project user certificates needed to connect to services such as Hive + + :param project_id: id of the project + :type project_id: int + :return: JSON response with credentials + :rtype: dict + """ + return self._send_request("GET", ["project", project_id, "credentials"]) + + def _write_pem_file(self, content: str, path: str) -> None: + with open(path, "w") as f: + f.write(content) + + @connected + def _send_request( + self, + method, + path_params, + query_params=None, + headers=None, + data=None, + stream=False, + files=None, + ): + """Send REST request to Hopsworks. + + Uses the client it is executed from. Path parameters are url encoded automatically. + + :param method: 'GET', 'PUT' or 'POST' + :type method: str + :param path_params: a list of path params to build the query url from starting after + the api resource, for example `["project", 119, "featurestores", 67]`. + :type path_params: list + :param query_params: A dictionary of key/value pairs to be added as query parameters, + defaults to None + :type query_params: dict, optional + :param headers: Additional header information, defaults to None + :type headers: dict, optional + :param data: The payload as a python dictionary to be sent as json, defaults to None + :type data: dict, optional + :param stream: Set if response should be a stream, defaults to False + :type stream: boolean, optional + :param files: dictionary for multipart encoding upload + :type files: dict, optional + :raises hsfs.client.exceptions.RestAPIError: Raised when request wasn't correctly received, understood or accepted + :return: Response json + :rtype: dict + """ + base_path_params = ["hopsworks-api", "api"] + f_url = furl.furl(self._base_url) + f_url.path.segments = base_path_params + path_params + url = str(f_url) + + request = requests.Request( + method, + url=url, + headers=headers, + data=data, + params=query_params, + auth=self._auth, + files=files, + ) + + prepped = self._session.prepare_request(request) + response = self._session.send(prepped, verify=self._verify, stream=stream) + + if response.status_code == 401 and self.REST_ENDPOINT in os.environ: + # refresh token and retry request - only on hopsworks + response = self._retry_token_expired( + request, stream, self.TOKEN_EXPIRED_RETRY_INTERVAL, 1 + ) + + if response.status_code // 100 != 2: + raise exceptions.RestAPIError(url, response) + + if stream: + return response + else: + # handle different success response codes + if len(response.content) == 0: + return None + return response.json() + + def _retry_token_expired(self, request, stream, wait, retries): + """Refresh the JWT token and retry the request. Only on Hopsworks. + As the token might take a while to get refreshed. Keep trying + """ + # Sleep the waited time before re-issuing the request + time.sleep(wait) + + self._auth = auth.BearerAuth(self._read_jwt()) + # Update request with the new token + request.auth = self._auth + prepped = self._session.prepare_request(request) + response = self._session.send(prepped, verify=self._verify, stream=stream) + + if response.status_code == 401 and retries < self.TOKEN_EXPIRED_MAX_RETRIES: + # Try again. + return self._retry_token_expired(request, stream, wait * 2, retries + 1) + else: + # If the number of retries have expired, the _send_request method + # will throw an exception to the user as part of the status_code validation. + return response + + def _close(self): + """Closes a client. Can be implemented for clean up purposes, not mandatory.""" + self._connected = False + + def _write_pem( + self, keystore_path, keystore_pw, truststore_path, truststore_pw, prefix + ): + ks = jks.KeyStore.load(Path(keystore_path), keystore_pw, try_decrypt_keys=True) + ts = jks.KeyStore.load( + Path(truststore_path), truststore_pw, try_decrypt_keys=True + ) + + ca_chain_path = os.path.join("/tmp", f"{prefix}_ca_chain.pem") + self._write_ca_chain(ks, ts, ca_chain_path) + + client_cert_path = os.path.join("/tmp", f"{prefix}_client_cert.pem") + self._write_client_cert(ks, client_cert_path) + + client_key_path = os.path.join("/tmp", f"{prefix}_client_key.pem") + self._write_client_key(ks, client_key_path) + + return ca_chain_path, client_cert_path, client_key_path + + def _write_ca_chain(self, ks, ts, ca_chain_path): + """ + Converts JKS keystore and truststore file into ca chain PEM to be compatible with Python libraries + """ + ca_chain = "" + for store in [ks, ts]: + for _, c in store.certs.items(): + ca_chain = ca_chain + self._bytes_to_pem_str(c.cert, "CERTIFICATE") + + with Path(ca_chain_path).open("w") as f: + f.write(ca_chain) + + def _write_client_cert(self, ks, client_cert_path): + """ + Converts JKS keystore file into client cert PEM to be compatible with Python libraries + """ + client_cert = "" + for _, pk in ks.private_keys.items(): + for c in pk.cert_chain: + client_cert = client_cert + self._bytes_to_pem_str(c[1], "CERTIFICATE") + + with Path(client_cert_path).open("w") as f: + f.write(client_cert) + + def _write_client_key(self, ks, client_key_path): + """ + Converts JKS keystore file into client key PEM to be compatible with Python libraries + """ + client_key = "" + for _, pk in ks.private_keys.items(): + client_key = client_key + self._bytes_to_pem_str( + pk.pkey_pkcs8, "PRIVATE KEY" + ) + + with Path(client_key_path).open("w") as f: + f.write(client_key) + + def _bytes_to_pem_str(self, der_bytes, pem_type): + """ + Utility function for creating PEM files + + Args: + der_bytes: DER encoded bytes + pem_type: type of PEM, e.g Certificate, Private key, or RSA private key + + Returns: + PEM String for a DER-encoded certificate or private key + """ + pem_str = "" + pem_str = pem_str + "-----BEGIN {}-----".format(pem_type) + "\n" + pem_str = ( + pem_str + + "\r\n".join( + textwrap.wrap(base64.b64encode(der_bytes).decode("ascii"), 64) + ) + + "\n" + ) + pem_str = pem_str + "-----END {}-----".format(pem_type) + "\n" + return pem_str diff --git a/python/hsfs/client/exceptions.py b/python/hsfs/client/exceptions.py index b34ef198f..7a7f67d5c 100644 --- a/python/hsfs/client/exceptions.py +++ b/python/hsfs/client/exceptions.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2020 Logical Clocks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,38 +13,98 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations -from hopsworks_common.client.exceptions import ( - DatasetException, - DataValidationException, - EnvironmentException, - ExternalClientError, - FeatureStoreException, - GitException, - JobException, - JobExecutionException, - KafkaException, - OpenSearchException, - ProjectException, - RestAPIError, - UnknownSecretStorageError, - VectorDatabaseException, -) - - -__all__ = [ - DatasetException, - DataValidationException, - EnvironmentException, - ExternalClientError, - FeatureStoreException, - GitException, - JobException, - JobExecutionException, - KafkaException, - OpenSearchException, - ProjectException, - RestAPIError, - UnknownSecretStorageError, - VectorDatabaseException, -] +from enum import Enum +from typing import Any, Union + +import requests + + +class RestAPIError(Exception): + """REST Exception encapsulating the response object and url.""" + + class FeatureStoreErrorCode(int, Enum): + FEATURE_GROUP_COMMIT_NOT_FOUND = 270227 + STATISTICS_NOT_FOUND = 270228 + + def __eq__(self, other: Union[int, Any]) -> bool: + if isinstance(other, int): + return self.value == other + if isinstance(other, self.__class__): + return self is other + return False + + def __init__(self, url: str, response: requests.Response) -> None: + try: + error_object = response.json() + if isinstance(error_object, str): + error_object = {"errorMsg": error_object} + except Exception: + error_object = {} + message = ( + "Metadata operation error: (url: {}). Server response: \n" + "HTTP code: {}, HTTP reason: {}, body: {}, error code: {}, error msg: {}, user " + "msg: {}".format( + url, + response.status_code, + response.reason, + response.content, + error_object.get("errorCode", ""), + error_object.get("errorMsg", ""), + error_object.get("usrMsg", ""), + ) + ) + super().__init__(message) + self.url = url + self.response = response + + +class UnknownSecretStorageError(Exception): + """This exception will be raised if an unused secrets storage is passed as a parameter.""" + + +class FeatureStoreException(Exception): + """Generic feature store exception""" + + +class VectorDatabaseException(Exception): + # reason + REQUESTED_K_TOO_LARGE = "REQUESTED_K_TOO_LARGE" + REQUESTED_NUM_RESULT_TOO_LARGE = "REQUESTED_NUM_RESULT_TOO_LARGE" + OTHERS = "OTHERS" + + # info + REQUESTED_K_TOO_LARGE_INFO_K = "k" + REQUESTED_NUM_RESULT_TOO_LARGE_INFO_N = "n" + + def __init__(self, reason: str, message: str, info: str) -> None: + super().__init__(message) + self._info = info + self._reason = reason + + @property + def reason(self) -> str: + return self._reason + + @property + def info(self) -> str: + return self._info + + +class DataValidationException(FeatureStoreException): + """Raised when data validation fails only when using "STRICT" validation ingestion policy.""" + + def __init__(self, message: str) -> None: + super().__init__(message) + + +class ExternalClientError(TypeError): + """Raised when external client cannot be initialized due to missing arguments.""" + + def __init__(self, missing_argument: str) -> None: + message = ( + "{0} cannot be of type NoneType, {0} is a non-optional " + "argument to connect to hopsworks from an external environment." + ).format(missing_argument) + super().__init__(message) diff --git a/python/hsfs/client/external.py b/python/hsfs/client/external.py index 1384b1c20..e99fc20b4 100644 --- a/python/hsfs/client/external.py +++ b/python/hsfs/client/external.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2020 Logical Clocks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,12 +13,370 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations -from hopsworks_common.client.external import ( - Client, -) +import base64 +import json +import logging +import os +import boto3 +import requests -__all__ = [ - Client, -] + +try: + from pyspark.sql import SparkSession +except ImportError: + pass + +from hsfs.client import auth, base, exceptions +from hsfs.client.exceptions import FeatureStoreException + + +_logger = logging.getLogger(__name__) + + +class Client(base.Client): + DEFAULT_REGION = "default" + SECRETS_MANAGER = "secretsmanager" + PARAMETER_STORE = "parameterstore" + LOCAL_STORE = "local" + + def __init__( + self, + host, + port, + project, + engine, + region_name, + secrets_store, + hostname_verification, + trust_store_path, + cert_folder, + api_key_file, + api_key_value, + ): + """Initializes a client in an external environment such as AWS Sagemaker.""" + _logger.info("Initializing external client") + if not host: + raise exceptions.ExternalClientError("host") + if not project: + raise exceptions.ExternalClientError("project") + + self._host = host + self._port = port + self._base_url = "https://" + self._host + ":" + str(self._port) + _logger.info("Base URL: %s", self._base_url) + self._project_name = project + _logger.debug("Project name: %s", self._project_name) + self._region_name = region_name or self.DEFAULT_REGION + _logger.debug("Region name: %s", self._region_name) + + if api_key_value is not None: + _logger.debug("Using provided API key value") + api_key = api_key_value + else: + _logger.debug("Querying secrets store for API key") + api_key = self._get_secret(secrets_store, "api-key", api_key_file) + + _logger.debug("Using api key to setup header authentification") + self._auth = auth.ApiKeyAuth(api_key) + + _logger.debug("Setting up requests session") + self._session = requests.session() + self._connected = True + + self._verify = self._get_verify(self._host, trust_store_path) + _logger.debug("Verify: %s", self._verify) + + project_info = self._get_project_info(self._project_name) + + self._project_id = str(project_info["projectId"]) + _logger.debug("Setting Project ID: %s", self._project_id) + + self._cert_key = None + self._cert_folder_base = None + + if engine == "python": + credentials = self._materialize_certs(cert_folder, host, project) + + self._write_pem_file(credentials["caChain"], self._get_ca_chain_path()) + self._write_pem_file( + credentials["clientCert"], self._get_client_cert_path() + ) + self._write_pem_file(credentials["clientKey"], self._get_client_key_path()) + + elif engine == "spark": + # When using the Spark engine with metastore connection, the certificates + # are needed when the application starts (before user code is run) + # So in this case, we can't materialize the certificates on the fly. + _logger.debug("Running in Spark environment, initializing Spark session") + _spark_session = SparkSession.builder.enableHiveSupport().getOrCreate() + + self._validate_spark_configuration(_spark_session) + with open( + _spark_session.conf.get("spark.hadoop.hops.ssl.keystores.passwd.name"), + "r", + ) as f: + self._cert_key = f.read() + + self._trust_store_path = _spark_session.conf.get( + "spark.hadoop.hops.ssl.trustore.name" + ) + self._key_store_path = _spark_session.conf.get( + "spark.hadoop.hops.ssl.keystore.name" + ) + elif engine == "spark-no-metastore": + _logger.debug( + "Running in Spark environment with no metastore, initializing Spark session" + ) + _spark_session = SparkSession.builder.getOrCreate() + self._materialize_certs(cert_folder, host, project) + + # Set credentials location in the Spark configuration + # Set other options in the Spark configuration + configuration_dict = { + "hops.ssl.trustore.name": self._trust_store_path, + "hops.ssl.keystore.name": self._key_store_path, + "hops.ssl.keystores.passwd.name": self._cert_key_path, + "fs.permissions.umask-mode": "0002", + "fs.hopsfs.impl": "io.hops.hopsfs.client.HopsFileSystem", + "hops.rpc.socket.factory.class.default": "io.hops.hadoop.shaded.org.apache.hadoop.net.HopsSSLSocketFactory", + "client.rpc.ssl.enabled.protocol": "TLSv1.2", + "hops.ssl.hostname.verifier": "ALLOW_ALL", + "hops.ipc.server.ssl.enabled": "true", + } + + for conf_key, conf_value in configuration_dict.items(): + _spark_session._jsc.hadoopConfiguration().set(conf_key, conf_value) + + def _materialize_certs(self, cert_folder, host, project): + self._cert_folder_base = cert_folder + self._cert_folder = os.path.join(cert_folder, host, project) + self._trust_store_path = os.path.join(self._cert_folder, "trustStore.jks") + self._key_store_path = os.path.join(self._cert_folder, "keyStore.jks") + + if os.path.exists(self._cert_folder): + _logger.debug( + f"Running in Python environment, reading certificates from certificates folder {cert_folder}" + ) + _logger.debug("Found certificates: %s", os.listdir(cert_folder)) + else: + _logger.debug( + f"Running in Python environment, creating certificates folder {cert_folder}" + ) + os.makedirs(self._cert_folder, exist_ok=True) + + credentials = self._get_credentials(self._project_id) + self._write_b64_cert_to_bytes( + str(credentials["kStore"]), + path=self._get_jks_key_store_path(), + ) + self._write_b64_cert_to_bytes( + str(credentials["tStore"]), + path=self._get_jks_trust_store_path(), + ) + self._cert_key = str(credentials["password"]) + self._cert_key_path = os.path.join(self._cert_folder, "material_passwd") + with open(self._cert_key_path, "w") as f: + f.write(str(credentials["password"])) + + # Return the credentials object for the Python engine to materialize the pem files. + return credentials + + def _validate_spark_configuration(self, _spark_session): + exception_text = "Spark is misconfigured for communication with Hopsworks, missing or invalid property: " + + configuration_dict = { + "spark.hadoop.hops.ssl.trustore.name": None, + "spark.hadoop.hops.rpc.socket.factory.class.default": "io.hops.hadoop.shaded.org.apache.hadoop.net.HopsSSLSocketFactory", + "spark.serializer": "org.apache.spark.serializer.KryoSerializer", + "spark.hadoop.hops.ssl.hostname.verifier": "ALLOW_ALL", + "spark.hadoop.hops.ssl.keystore.name": None, + "spark.hadoop.fs.hopsfs.impl": "io.hops.hopsfs.client.HopsFileSystem", + "spark.hadoop.hops.ssl.keystores.passwd.name": None, + "spark.hadoop.hops.ipc.server.ssl.enabled": "true", + "spark.hadoop.client.rpc.ssl.enabled.protocol": "TLSv1.2", + "spark.hadoop.hive.metastore.uris": None, + "spark.sql.hive.metastore.jars": None, + } + _logger.debug("Configuration dict: %s", configuration_dict) + + for key, value in configuration_dict.items(): + _logger.debug("Validating key: %s", key) + if not ( + _spark_session.conf.get(key, "not_found") != "not_found" + and (value is None or _spark_session.conf.get(key, None) == value) + ): + raise FeatureStoreException(exception_text + key) + + def _close(self): + """Closes a client and deletes certificates.""" + _logger.info("Closing external client and cleaning up certificates.") + if self._cert_folder_base is None: + _logger.debug("No certificates to clean up.") + # On external Spark clients (Databricks, Spark Cluster), + # certificates need to be provided before the Spark application starts. + return + + # Clean up only on AWS + _logger.debug("Cleaning up certificates. AWS only.") + self._cleanup_file(self._get_jks_key_store_path()) + self._cleanup_file(self._get_jks_trust_store_path()) + self._cleanup_file(os.path.join(self._cert_folder, "material_passwd")) + self._cleanup_file(self._get_ca_chain_path()) + self._cleanup_file(self._get_client_cert_path()) + self._cleanup_file(self._get_client_key_path()) + + try: + # delete project level + os.rmdir(self._cert_folder) + # delete host level + os.rmdir(os.path.dirname(self._cert_folder)) + # on AWS base dir will be empty, and can be deleted otherwise raises OSError + os.rmdir(self._cert_folder_base) + except OSError: + pass + self._connected = False + + def _get_jks_trust_store_path(self): + _logger.debug("Getting trust store path: %s", self._trust_store_path) + return self._trust_store_path + + def _get_jks_key_store_path(self): + _logger.debug("Getting key store path: %s", self._key_store_path) + return self._key_store_path + + def _get_ca_chain_path(self) -> str: + path = os.path.join(self._cert_folder, "ca_chain.pem") + _logger.debug(f"Getting ca chain path {path}") + return path + + def _get_client_cert_path(self) -> str: + path = os.path.join(self._cert_folder, "client_cert.pem") + _logger.debug(f"Getting client cert path {path}") + return path + + def _get_client_key_path(self) -> str: + path = os.path.join(self._cert_folder, "client_key.pem") + _logger.debug(f"Getting client key path {path}") + return path + + def _get_secret(self, secrets_store, secret_key=None, api_key_file=None): + """Returns secret value from the AWS Secrets Manager or Parameter Store. + + :param secrets_store: the underlying secrets storage to be used, e.g. `secretsmanager` or `parameterstore` + :type secrets_store: str + :param secret_key: key for the secret value, e.g. `api-key`, `cert-key`, `trust-store`, `key-store`, defaults to None + :type secret_key: str, optional + :param api_key_file: path to a file containing an api key, defaults to None + :type api_key_file: str optional + :raises hsfs.client.exceptions.ExternalClientError: `api_key_file` needs to be set for local mode + :raises hsfs.client.exceptions.UnknownSecretStorageError: Provided secrets storage not supported + :return: secret + :rtype: str + """ + _logger.debug(f"Querying secrets store {secrets_store} for secret {secret_key}") + if secrets_store == self.SECRETS_MANAGER: + return self._query_secrets_manager(secret_key) + elif secrets_store == self.PARAMETER_STORE: + return self._query_parameter_store(secret_key) + elif secrets_store == self.LOCAL_STORE: + if not api_key_file: + raise exceptions.ExternalClientError( + "api_key_file needs to be set for local mode" + ) + _logger.debug(f"Reading api key from {api_key_file}") + with open(api_key_file) as f: + return f.readline().strip() + else: + raise exceptions.UnknownSecretStorageError( + "Secrets storage " + secrets_store + " is not supported." + ) + + def _query_secrets_manager(self, secret_key): + _logger.debug("Querying secrets manager for secret key: %s", secret_key) + secret_name = "hopsworks/role/" + self._assumed_role() + args = {"service_name": "secretsmanager"} + region_name = self._get_region() + if region_name: + args["region_name"] = region_name + client = boto3.client(**args) + get_secret_value_response = client.get_secret_value(SecretId=secret_name) + return json.loads(get_secret_value_response["SecretString"])[secret_key] + + def _assumed_role(self): + _logger.debug("Getting assumed role") + client = boto3.client("sts") + response = client.get_caller_identity() + # arns for assumed roles in SageMaker follow the following schema + # arn:aws:sts::123456789012:assumed-role/my-role-name/my-role-session-name + local_identifier = response["Arn"].split(":")[-1].split("/") + if len(local_identifier) != 3 or local_identifier[0] != "assumed-role": + raise Exception( + "Failed to extract assumed role from arn: " + response["Arn"] + ) + return local_identifier[1] + + def _get_region(self): + if self._region_name != self.DEFAULT_REGION: + _logger.debug(f"Region name is not default, returning {self._region_name}") + return self._region_name + else: + _logger.debug("Region name is default, returning None") + return None + + def _query_parameter_store(self, secret_key): + _logger.debug("Querying parameter store for secret key: %s", secret_key) + args = {"service_name": "ssm"} + region_name = self._get_region() + if region_name: + args["region_name"] = region_name + client = boto3.client(**args) + name = "/hopsworks/role/" + self._assumed_role() + "/type/" + secret_key + return client.get_parameter(Name=name, WithDecryption=True)["Parameter"][ + "Value" + ] + + def _get_project_info(self, project_name): + """Makes a REST call to hopsworks to get all metadata of a project for the provided project. + + :param project_name: the name of the project + :type project_name: str + :return: JSON response with project info + :rtype: dict + """ + _logger.debug("Getting project info for project: %s", project_name) + return self._send_request("GET", ["project", "getProjectInfo", project_name]) + + def _write_b64_cert_to_bytes(self, b64_string, path): + """Converts b64 encoded certificate to bytes file . + + :param b64_string: b64 encoded string of certificate + :type b64_string: str + :param path: path where file is saved, including file name. e.g. /path/key-store.jks + :type path: str + """ + _logger.debug(f"Writing b64 encoded certificate to {path}") + with open(path, "wb") as f: + cert_b64 = base64.b64decode(b64_string) + f.write(cert_b64) + + def _cleanup_file(self, file_path): + """Removes local files with `file_path`.""" + _logger.debug(f"Cleaning up file {file_path}") + try: + os.remove(file_path) + except OSError: + pass + + def replace_public_host(self, url): + """no need to replace as we are already in external client""" + return url + + def _is_external(self) -> bool: + return True + + @property + def host(self) -> str: + return self._host diff --git a/python/hsfs/client/hopsworks.py b/python/hsfs/client/hopsworks.py index 1384b1c20..2134756b1 100644 --- a/python/hsfs/client/hopsworks.py +++ b/python/hsfs/client/hopsworks.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2020 Logical Clocks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,12 +13,173 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations -from hopsworks_common.client.external import ( - Client, -) +import os +from pathlib import Path +import requests +from hsfs.client import auth, base -__all__ = [ - Client, -] + +try: + import jks +except ImportError: + pass + + +class Client(base.Client): + REQUESTS_VERIFY = "REQUESTS_VERIFY" + DOMAIN_CA_TRUSTSTORE_PEM = "DOMAIN_CA_TRUSTSTORE_PEM" + PROJECT_ID = "HOPSWORKS_PROJECT_ID" + PROJECT_NAME = "HOPSWORKS_PROJECT_NAME" + HADOOP_USER_NAME = "HADOOP_USER_NAME" + MATERIAL_DIRECTORY = "MATERIAL_DIRECTORY" + HDFS_USER = "HDFS_USER" + T_CERTIFICATE = "t_certificate" + K_CERTIFICATE = "k_certificate" + TRUSTSTORE_SUFFIX = "__tstore.jks" + KEYSTORE_SUFFIX = "__kstore.jks" + PEM_CA_CHAIN = "ca_chain.pem" + CERT_KEY_SUFFIX = "__cert.key" + MATERIAL_PWD = "material_passwd" + SECRETS_DIR = "SECRETS_DIR" + + def __init__(self): + """Initializes a client being run from a job/notebook directly on Hopsworks.""" + self._base_url = self._get_hopsworks_rest_endpoint() + self._host, self._port = self._get_host_port_pair() + self._secrets_dir = ( + os.environ[self.SECRETS_DIR] if self.SECRETS_DIR in os.environ else "" + ) + self._cert_key = self._get_cert_pw() + trust_store_path = self._get_trust_store_path() + hostname_verification = ( + os.environ[self.REQUESTS_VERIFY] + if self.REQUESTS_VERIFY in os.environ + else "true" + ) + self._project_id = os.environ[self.PROJECT_ID] + self._project_name = self._project_name() + try: + self._auth = auth.BearerAuth(self._read_jwt()) + except FileNotFoundError: + self._auth = auth.ApiKeyAuth(self._read_apikey()) + self._verify = self._get_verify(hostname_verification, trust_store_path) + self._session = requests.session() + + self._connected = True + + credentials = self._get_credentials(self._project_id) + + self._write_pem_file(credentials["caChain"], self._get_ca_chain_path()) + self._write_pem_file(credentials["clientCert"], self._get_client_cert_path()) + self._write_pem_file(credentials["clientKey"], self._get_client_key_path()) + + def _get_hopsworks_rest_endpoint(self): + """Get the hopsworks REST endpoint for making requests to the REST API.""" + return os.environ[self.REST_ENDPOINT] + + def _get_trust_store_path(self): + """Convert truststore from jks to pem and return the location""" + ca_chain_path = Path(self.PEM_CA_CHAIN) + if not ca_chain_path.exists(): + ks = jks.KeyStore.load( + self._get_jks_key_store_path(), self._cert_key, try_decrypt_keys=True + ) + ts = jks.KeyStore.load( + self._get_jks_trust_store_path(), self._cert_key, try_decrypt_keys=True + ) + self._write_ca_chain( + ks, + ts, + ca_chain_path, + ) + return str(ca_chain_path) + + def _get_ca_chain_path(self) -> str: + return os.path.join("/tmp", "ca_chain.pem") + + def _get_client_cert_path(self) -> str: + return os.path.join("/tmp", "client_cert.pem") + + def _get_client_key_path(self) -> str: + return os.path.join("/tmp", "client_key.pem") + + def _get_jks_trust_store_path(self): + """ + Get truststore location + + Returns: + truststore location + """ + t_certificate = Path(self.T_CERTIFICATE) + if t_certificate.exists(): + return str(t_certificate) + else: + username = os.environ[self.HADOOP_USER_NAME] + material_directory = Path(os.environ[self.MATERIAL_DIRECTORY]) + return str(material_directory.joinpath(username + self.TRUSTSTORE_SUFFIX)) + + def _get_jks_key_store_path(self): + """ + Get keystore location + + Returns: + keystore location + """ + k_certificate = Path(self.K_CERTIFICATE) + if k_certificate.exists(): + return str(k_certificate) + else: + username = os.environ[self.HADOOP_USER_NAME] + material_directory = Path(os.environ[self.MATERIAL_DIRECTORY]) + return str(material_directory.joinpath(username + self.KEYSTORE_SUFFIX)) + + def _project_name(self): + try: + return os.environ[self.PROJECT_NAME] + except KeyError: + pass + + hops_user = self._project_user() + hops_user_split = hops_user.split( + "__" + ) # project users have username project__user + project = hops_user_split[0] + return project + + def _project_user(self): + try: + hops_user = os.environ[self.HADOOP_USER_NAME] + except KeyError: + hops_user = os.environ[self.HDFS_USER] + return hops_user + + def _get_cert_pw(self): + """ + Get keystore password from local container + + Returns: + Certificate password + """ + pwd_path = Path(self.MATERIAL_PWD) + if not pwd_path.exists(): + username = os.environ[self.HADOOP_USER_NAME] + material_directory = Path(os.environ[self.MATERIAL_DIRECTORY]) + pwd_path = material_directory.joinpath(username + self.CERT_KEY_SUFFIX) + + with pwd_path.open() as f: + return f.read() + + def replace_public_host(self, url): + """replace hostname to public hostname set in HOPSWORKS_PUBLIC_HOST""" + ui_url = url._replace(netloc=os.environ[self.HOPSWORKS_PUBLIC_HOST]) + return ui_url + + def _is_external(self): + return False + + @property + def host(self): + return self._host diff --git a/python/hsfs/client/online_store_rest_client.py b/python/hsfs/client/online_store_rest_client.py index c75be81b7..b733269a1 100644 --- a/python/hsfs/client/online_store_rest_client.py +++ b/python/hsfs/client/online_store_rest_client.py @@ -13,16 +13,372 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations -from hopsworks_common.client.online_store_rest_client import ( - OnlineStoreRestClientSingleton, - get_instance, - init_or_reset_online_store_rest_client, -) +import logging +from typing import Any, Dict, List, Optional, Union +from warnings import warn +import requests +import requests.adapters +from furl import furl +from hsfs import client +from hsfs.client.exceptions import FeatureStoreException +from hsfs.core import variable_api -__all__ = [ - OnlineStoreRestClientSingleton, - get_instance, - init_or_reset_online_store_rest_client, -] + +_logger = logging.getLogger(__name__) + +_online_store_rest_client = None + + +def init_or_reset_online_store_rest_client( + transport: Optional[ + Union[requests.adapters.HTTPAdapter, requests.adapters.BaseAdapter] + ] = None, + optional_config: Optional[Dict[str, Any]] = None, + reset_client: bool = False, +): + global _online_store_rest_client + if not _online_store_rest_client: + _online_store_rest_client = OnlineStoreRestClientSingleton( + transport=transport, optional_config=optional_config + ) + elif reset_client: + _online_store_rest_client.reset_client( + transport=transport, optional_config=optional_config + ) + else: + _logger.warning( + "Online Store Rest Client is already initialised. To reset connection or/and override configuration, " + + "use reset_online_store_rest_client flag.", + stacklevel=2, + ) + + +def get_instance() -> OnlineStoreRestClientSingleton: + global _online_store_rest_client + if _online_store_rest_client is None: + _logger.warning( + "Online Store Rest Client is not initialised. Initialising with default configuration." + ) + _online_store_rest_client = OnlineStoreRestClientSingleton() + _logger.debug("Accessing global Online Store Rest Client instance.") + return _online_store_rest_client + + +class OnlineStoreRestClientSingleton: + HOST = "host" + PORT = "port" + VERIFY_CERTS = "verify_certs" + USE_SSL = "use_ssl" + CA_CERTS = "ca_certs" + HTTP_AUTHORIZATION = "http_authorization" + TIMEOUT = "timeout" + SERVER_API_VERSION = "server_api_version" + API_KEY = "api_key" + _DEFAULT_ONLINE_STORE_REST_CLIENT_PORT = 4406 + _DEFAULT_ONLINE_STORE_REST_CLIENT_TIMEOUT_SECOND = 2 + _DEFAULT_ONLINE_STORE_REST_CLIENT_VERIFY_CERTS = True + _DEFAULT_ONLINE_STORE_REST_CLIENT_USE_SSL = True + _DEFAULT_ONLINE_STORE_REST_CLIENT_SERVER_API_VERSION = "0.1.0" + _DEFAULT_ONLINE_STORE_REST_CLIENT_HTTP_AUTHORIZATION = "X-API-KEY" + + def __init__( + self, + transport: Optional[ + Union[requests.adapaters.HTTPadapter, requests.adapters.BaseAdapter] + ] = None, + optional_config: Optional[Dict[str, Any]] = None, + ): + _logger.debug( + f"Initialising Online Store Rest Client {'with optional configuration' if optional_config else ''}." + ) + if optional_config: + _logger.debug(f"Optional Config: {optional_config!r}") + self._check_hopsworks_connection() + self.variable_api = variable_api.VariableApi() + self._auth: client.auth.OnlineStoreKeyAuth + self._session: requests.Session + self._current_config: Dict[str, Any] + self._base_url: furl + self._setup_rest_client( + transport=transport, + optional_config=optional_config, + use_current_config=False, + ) + self.is_connected() + + def reset_client( + self, + transport: Optional[ + Union[requests.adapters.HttpAdapter, requests.adapters.BaseAdapter] + ] = None, + optional_config: Optional[Dict[str, Any]] = None, + ): + _logger.debug( + f"Resetting Online Store Rest Client {'with optional configuration' if optional_config else ''}." + ) + if optional_config: + _logger.debug(f"Optional Config: {optional_config}") + self._check_hopsworks_connection() + if hasattr(self, "_session") and self._session: + _logger.debug("Closing existing session.") + self._session.close() + delattr(self, "_session") + self._setup_rest_client( + transport=transport, + optional_config=optional_config, + use_current_config=False if optional_config else True, + ) + + def _setup_rest_client( + self, + transport: Optional[ + Union[requests.adapters.HttpAdapter, requests.adapters.BaseAdapter] + ] = None, + optional_config: Optional[Dict[str, Any]] = None, + use_current_config: bool = True, + ): + _logger.debug("Setting up Online Store Rest Client.") + if optional_config and not isinstance(optional_config, dict): + raise ValueError( + "optional_config must be a dictionary. See documentation for allowed keys and values." + ) + _logger.debug("Optional Config: %s", optional_config) + if not use_current_config: + _logger.debug( + "Retrieving default configuration for Online Store REST Client." + ) + self._current_config = self._get_default_client_config() + if optional_config: + _logger.debug( + "Updating default configuration with provided optional configuration." + ) + self._current_config.update(optional_config) + + self._set_auth(optional_config) + if not hasattr(self, "_session") or not self._session: + _logger.debug("Initialising new requests session.") + self._session = requests.Session() + else: + raise ValueError( + "Use the init_or_reset_online_store_connection method with reset_connection flag set " + + "to True to reset the online_store_client_connection" + ) + if transport is not None: + _logger.debug("Setting custom transport adapter.") + self._session.mount("https://", transport) + self._session.mount("http://", transport) + + if not self._current_config[self.VERIFY_CERTS]: + _logger.warning( + "Disabling SSL certificate verification. This is not recommended for production environments." + ) + self._session.verify = False + else: + _logger.debug( + f"Setting SSL certificate verification using CA Certs path: {self._current_config[self.CA_CERTS]}" + ) + self._session.verify = self._current_config[self.CA_CERTS] + + # Set base_url + scheme = "https" if self._current_config[self.USE_SSL] else "http" + self._base_url = furl( + f"{scheme}://{self._current_config[self.HOST]}:{self._current_config[self.PORT]}/{self._current_config[self.SERVER_API_VERSION]}" + ) + + assert ( + self._session is not None + ), "Online Store REST Client failed to initialise." + assert ( + self._auth is not None + ), "Online Store REST Client Authentication failed to initialise. Check API Key." + assert ( + self._base_url is not None + ), "Online Store REST Client Base URL failed to initialise. Check host and port parameters." + assert ( + self._current_config is not None + ), "Online Store REST Client Configuration failed to initialise." + + def _get_default_client_config(self) -> Dict[str, Any]: + _logger.debug("Retrieving default configuration for Online Store REST Client.") + default_config = self._get_default_static_parameters_config() + default_config.update(self._get_default_dynamic_parameters_config()) + return default_config + + def _get_default_static_parameters_config(self) -> Dict[str, Any]: + _logger.debug( + "Retrieving default static configuration for Online Store REST Client." + ) + return { + self.TIMEOUT: self._DEFAULT_ONLINE_STORE_REST_CLIENT_TIMEOUT_SECOND, + self.VERIFY_CERTS: self._DEFAULT_ONLINE_STORE_REST_CLIENT_VERIFY_CERTS, + self.USE_SSL: self._DEFAULT_ONLINE_STORE_REST_CLIENT_USE_SSL, + self.SERVER_API_VERSION: self._DEFAULT_ONLINE_STORE_REST_CLIENT_SERVER_API_VERSION, + self.HTTP_AUTHORIZATION: self._DEFAULT_ONLINE_STORE_REST_CLIENT_HTTP_AUTHORIZATION, + } + + def _get_default_dynamic_parameters_config( + self, + ) -> Dict[str, Any]: + _logger.debug( + "Retrieving default dynamic configuration for Online Store REST Client." + ) + url = furl(self._get_rondb_rest_server_endpoint()) + _logger.debug(f"Default RonDB Rest Server host and port: {url.host}:{url.port}") + _logger.debug( + f"Using CA Certs from Hopsworks Client: {client.get_instance()._get_ca_chain_path()}" + ) + return { + self.HOST: url.host, + self.PORT: url.port, + self.CA_CERTS: client.get_instance()._get_ca_chain_path(), + } + + def _get_rondb_rest_server_endpoint(self) -> str: + """Retrieve RonDB Rest Server endpoint based on whether the client is running internally or externally. + + If the client is running externally, the endpoint is retrieved via the loadbalancer. + If the client is running internally, the endpoint is retrieved via (consul) service discovery. + The default port for the RonDB Rest Server is 4406 and always used unless specifying a different port + in the configuration. + + Returns: + str: RonDB Rest Server endpoint with default port. + """ + if client.get_instance()._is_external(): + _logger.debug( + "External Online Store REST Client : Retrieving RonDB Rest Server endpoint via loadbalancer." + ) + external_domain = self.variable_api.get_loadbalancer_external_domain() + if external_domain == "": + _logger.debug( + "External Online Store REST Client : Loadbalancer external domain is not set. Using client host as endpoint." + ) + external_domain = client.get_instance().host + default_url = f"https://{external_domain}:{self._DEFAULT_ONLINE_STORE_REST_CLIENT_PORT}" + _logger.debug( + f"External Online Store REST Client : Default RonDB Rest Server endpoint: {default_url}" + ) + return default_url + else: + _logger.debug( + "Internal Online Store REST Client : Retrieving RonDB Rest Server endpoint via service discovery." + ) + service_discovery_domain = self.variable_api.get_service_discovery_domain() + if service_discovery_domain == "": + raise FeatureStoreException("Service discovery domain is not set.") + default_url = f"https://rdrs.service.{service_discovery_domain}:{self._DEFAULT_ONLINE_STORE_REST_CLIENT_PORT}" + _logger.debug( + f"Internal Online Store REST Client : Default RonDB Rest Server endpoint: {default_url}" + ) + return default_url + + def send_request( + self, + method: str, + path_params: List[str], + headers: Optional[Dict[str, Any]] = None, + data: Optional[str] = None, + ) -> requests.Response: + url = self._base_url.copy() + url.path.segments.extend(path_params) + _logger.debug(f"Sending {method} request to {url.url}.") + _logger.debug(f"Provided Data: {data}") + _logger.debug(f"Provided Headers: {headers}") + prepped_request = self._session.prepare_request( + requests.Request( + method, url=url.url, headers=headers, data=data, auth=self.auth + ) + ) + timeout = self._current_config[self.TIMEOUT] + return self._session.send( + prepped_request, + # compatibility with 3.7 + timeout=timeout if timeout < 500 else timeout / 1000, + ) + + def _check_hopsworks_connection(self) -> None: + _logger.debug("Checking Hopsworks connection.") + assert ( + client.get_instance() is not None and client.get_instance()._connected + ), """Hopsworks Client is not connected. Please connect to Hopsworks cluster + via hopsworks.login or hsfs.connection before initialising the Online Store REST Client. + """ + _logger.debug("Hopsworks connection is active.") + + def _set_auth(self, optional_config: Optional[Dict[str, Any]] = None) -> None: + """Set authentication object for the Online Store REST Client. + + RonDB Rest Server uses Hopsworks Api Key to authenticate requests via the X-API-KEY header by default. + The api key determines the permissions of the user making the request for access to a given Feature Store. + """ + _logger.debug("Setting authentication for Online Store REST Client.") + if client.get_instance()._is_external(): + assert hasattr( + client.get_instance()._auth, "_token" + ), "External client must use API Key authentication. Contact your system administrator." + _logger.debug( + "External Online Store REST Client : Setting authentication using Hopsworks Client API Key." + ) + self._auth = client.auth.OnlineStoreKeyAuth( + client.get_instance()._auth._token + ) + elif isinstance(optional_config, dict) and optional_config.get( + self.API_KEY, False + ): + _logger.debug( + "Setting authentication using provided API Key from optional configuration." + ) + self._auth = client.auth.OnlineStoreKeyAuth(optional_config[self.API_KEY]) + elif hasattr(self, "_auth") and self._auth is not None: + _logger.debug( + "Authentication for Online Store REST Client is already set. Using existing authentication api key." + ) + else: + raise FeatureStoreException( + "RonDB Rest Server uses Hopsworks Api Key to authenticate request." + + f"Provide a configuration with the {self.API_KEY} key." + ) + + def is_connected(self): + """If Online Store Rest Client is initialised, ping RonDB Rest Server to ensure connection is active.""" + if self._session is None: + _logger.debug( + "Checking Online Store REST Client is connected. Session is not initialised." + ) + raise FeatureStoreException("Online Store REST Client is not initialised.") + + _logger.debug( + "Checking Online Store REST Client is connected. Pinging RonDB Rest Server." + ) + if not self.send_request("GET", ["ping"]): + warn("Ping failed, RonDB Rest Server is not reachable.", stacklevel=2) + return False + return True + + @property + def session(self) -> requests.Session: + """Requests session object used to send requests to the Online Store REST API.""" + return self._session + + @property + def base_url(self) -> furl: + """Base URL for the Online Store REST API. + + This the url of the RonDB REST Server and should not be confused with the Opensearch Vector DB which also serves as an Online Store for features belonging to Feature Group containing embeddings.""" + return self._base_url + + @property + def current_config(self) -> Dict[str, Any]: + """Current configuration of the Online Store REST Client.""" + return self._current_config + + @property + def auth(self) -> "client.auth.OnlineStoreKeyAuth": + """Authentication object used to authenticate requests to the Online Store REST API. + + Extends the requests.auth.AuthBase class. + """ + return self._auth diff --git a/python/hsfs/core/constants.py b/python/hsfs/core/constants.py index a9bc0b1df..d6af38018 100644 --- a/python/hsfs/core/constants.py +++ b/python/hsfs/core/constants.py @@ -1,40 +1,35 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# +import importlib.util -from hopsworks_common.core.constants import ( - HAS_AIOMYSQL, - HAS_AVRO, - HAS_CONFLUENT_KAFKA, - HAS_FAST_AVRO, - HAS_GREAT_EXPECTATIONS, - HAS_NUMPY, - HAS_SQLALCHEMY, - great_expectations_not_installed_message, - initialise_expectation_suite_for_single_expectation_api_message, + +# Avro +HAS_FAST_AVRO: bool = importlib.util.find_spec("fastavro") is not None +HAS_AVRO: bool = importlib.util.find_spec("avro") is not None + +# Confluent Kafka +HAS_CONFLUENT_KAFKA: bool = importlib.util.find_spec("confluent_kafka") is not None +confluent_kafka_not_installed_message = ( + "Confluent Kafka package not found. " + "If you want to use Kafka with Hopsworks you can install the corresponding extras " + """`pip install hopsworks[python]` or `pip install "hopsworks[python]"` if using zsh. """ + "You can also install confluent-kafka directly in your environment e.g `pip install confluent-kafka`. " + "You will need to restart your kernel if applicable." +) +# Data Validation / Great Expectations +HAS_GREAT_EXPECTATIONS: bool = ( + importlib.util.find_spec("great_expectations") is not None +) +great_expectations_not_installed_message = ( + "Great Expectations package not found. " + "If you want to use data validation with Hopsworks you can install the corresponding extras " + """`pip install hopsworks[great_expectations]` or `pip install "hopsworks[great_expectations]"` if using zsh. """ + "You can also install great-expectations directly in your environment e.g `pip install great-expectations`. " + "You will need to restart your kernel if applicable." ) +initialise_expectation_suite_for_single_expectation_api_message = "Initialize Expectation Suite by attaching to a Feature Group to enable single expectation API" +# Numpy +HAS_NUMPY: bool = importlib.util.find_spec("numpy") is not None -__all__ = [ - HAS_AIOMYSQL, - HAS_AVRO, - HAS_CONFLUENT_KAFKA, - HAS_FAST_AVRO, - HAS_GREAT_EXPECTATIONS, - HAS_NUMPY, - HAS_SQLALCHEMY, - great_expectations_not_installed_message, - initialise_expectation_suite_for_single_expectation_api_message, -] +# SQL packages +HAS_SQLALCHEMY: bool = importlib.util.find_spec("sqlalchemy") is not None +HAS_AIOMYSQL: bool = importlib.util.find_spec("aiomysql") is not None diff --git a/python/hsfs/core/variable_api.py b/python/hsfs/core/variable_api.py index 9d6e9765f..b499bd9b4 100644 --- a/python/hsfs/core/variable_api.py +++ b/python/hsfs/core/variable_api.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2022 Hopsworks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,12 +13,66 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations -from hopsworks_common.core.variable_api import ( - VariableApi, -) +import re +from hsfs import client +from hsfs.client.exceptions import RestAPIError -__all__ = [ - VariableApi, -] + +class VariableApi: + def get_version(self, software: str): + _client = client.get_instance() + path_params = [ + "variables", + "versions", + ] + + resp = _client._send_request("GET", path_params) + for entry in resp: + if entry["software"] == software: + return entry["version"] + return None + + def parse_major_and_minor(self, backend_version): + version_pattern = r"(\d+)\.(\d+)" + matches = re.match(version_pattern, backend_version) + + return matches.group(1), matches.group(2) + + def get_flyingduck_enabled(self): + _client = client.get_instance() + path_params = [ + "variables", + "enable_flyingduck", + ] + + resp = _client._send_request("GET", path_params) + return resp["successMessage"] == "true" + + def get_loadbalancer_external_domain(self): + _client = client.get_instance() + path_params = [ + "variables", + "loadbalancer_external_domain", + ] + + try: + resp = _client._send_request("GET", path_params) + return resp["successMessage"] + except RestAPIError: + return "" + + def get_service_discovery_domain(self): + _client = client.get_instance() + path_params = [ + "variables", + "service_discovery_domain", + ] + + try: + resp = _client._send_request("GET", path_params) + return resp["successMessage"] + except RestAPIError: + return "" diff --git a/python/hsfs/decorators.py b/python/hsfs/decorators.py index 1165a2daa..3ce15277f 100644 --- a/python/hsfs/decorators.py +++ b/python/hsfs/decorators.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2020 Logical Clocks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,22 +13,73 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations -from hopsworks_common.decorators import ( - HopsworksConnectionError, - NoHopsworksConnectionError, - connected, - not_connected, - typechecked, - uses_great_expectations, +import functools +import os + +from hsfs.core.constants import ( + HAS_GREAT_EXPECTATIONS, + great_expectations_not_installed_message, ) -__all__ = [ - HopsworksConnectionError, - NoHopsworksConnectionError, - connected, - not_connected, - typechecked, - uses_great_expectations, -] +def not_connected(fn): + @functools.wraps(fn) + def if_not_connected(inst, *args, **kwargs): + if inst._connected: + raise HopsworksConnectionError + return fn(inst, *args, **kwargs) + + return if_not_connected + + +def connected(fn): + @functools.wraps(fn) + def if_connected(inst, *args, **kwargs): + if not inst._connected: + raise NoHopsworksConnectionError + return fn(inst, *args, **kwargs) + + return if_connected + + +class HopsworksConnectionError(Exception): + """Thrown when attempted to change connection attributes while connected.""" + + def __init__(self): + super().__init__( + "Connection is currently in use. Needs to be closed for modification." + ) + + +class NoHopsworksConnectionError(Exception): + """Thrown when attempted to perform operation on connection while not connected.""" + + def __init__(self): + super().__init__( + "Connection is not active. Needs to be connected for feature store operations." + ) + + +if os.environ.get("HOPSWORKS_RUN_WITH_TYPECHECK", False): + from typeguard import typechecked +else: + from typing import TypeVar + + _T = TypeVar("_T") + + def typechecked( + target: _T, + ) -> _T: + return target if target else typechecked + + +def uses_great_expectations(f): + @functools.wraps(f) + def g(*args, **kwds): + if not HAS_GREAT_EXPECTATIONS: + raise ModuleNotFoundError(great_expectations_not_installed_message) + return f(*args, **kwds) + + return g diff --git a/python/hsml/decorators.py b/python/hsml/decorators.py index 1165a2daa..826fd5aa2 100644 --- a/python/hsml/decorators.py +++ b/python/hsml/decorators.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2021 Logical Clocks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,21 +14,42 @@ # limitations under the License. # -from hopsworks_common.decorators import ( - HopsworksConnectionError, - NoHopsworksConnectionError, - connected, - not_connected, - typechecked, - uses_great_expectations, -) - - -__all__ = [ - HopsworksConnectionError, - NoHopsworksConnectionError, - connected, - not_connected, - typechecked, - uses_great_expectations, -] +import functools + + +def not_connected(fn): + @functools.wraps(fn) + def if_not_connected(inst, *args, **kwargs): + if inst._connected: + raise HopsworksConnectionError + return fn(inst, *args, **kwargs) + + return if_not_connected + + +def connected(fn): + @functools.wraps(fn) + def if_connected(inst, *args, **kwargs): + if not inst._connected: + raise NoHopsworksConnectionError + return fn(inst, *args, **kwargs) + + return if_connected + + +class HopsworksConnectionError(Exception): + """Thrown when attempted to change connection attributes while connected.""" + + def __init__(self): + super().__init__( + "Connection is currently in use. Needs to be closed for modification." + ) + + +class NoHopsworksConnectionError(Exception): + """Thrown when attempted to perform operation on connection while not connected.""" + + def __init__(self): + super().__init__( + "Connection is not active. Needs to be connected for model registry operations." + ) diff --git a/python/tests/core/test_online_store_rest_client.py b/python/tests/core/test_online_store_rest_client.py index 39ed1f640..90d368dfd 100644 --- a/python/tests/core/test_online_store_rest_client.py +++ b/python/tests/core/test_online_store_rest_client.py @@ -13,10 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import hopsworks_common +import hsfs import pytest from furl import furl -from hopsworks_common.client import auth, exceptions, online_store_rest_client +from hsfs.client import auth, exceptions, online_store_rest_client class MockExternalClient: @@ -50,15 +50,13 @@ def test_setup_rest_client_external(self, mocker, monkeypatch): def client_get_instance(): return MockExternalClient() - monkeypatch.setattr( - hopsworks_common.client, "get_instance", client_get_instance - ) + monkeypatch.setattr(hsfs.client, "get_instance", client_get_instance) variable_api_mock = mocker.patch( - "hopsworks_common.core.variable_api.VariableApi.get_loadbalancer_external_domain", + "hsfs.core.variable_api.VariableApi.get_loadbalancer_external_domain", return_value="app.hopsworks.ai", ) ping_rdrs_mock = mocker.patch( - "hopsworks_common.client.online_store_rest_client.OnlineStoreRestClientSingleton.is_connected", + "hsfs.client.online_store_rest_client.OnlineStoreRestClientSingleton.is_connected", ) # Act @@ -88,16 +86,14 @@ def test_setup_online_store_rest_client_internal(self, mocker, monkeypatch): def client_get_instance(): return MockInternalClient() - monkeypatch.setattr( - hopsworks_common.client, "get_instance", client_get_instance - ) + monkeypatch.setattr(hsfs.client, "get_instance", client_get_instance) variable_api_mock = mocker.patch( - "hopsworks_common.core.variable_api.VariableApi.get_service_discovery_domain", + "hsfs.core.variable_api.VariableApi.get_service_discovery_domain", return_value="consul", ) optional_config = {"api_key": "provided_api_key"} ping_rdrs_mock = mocker.patch( - "hopsworks_common.client.online_store_rest_client.OnlineStoreRestClientSingleton.is_connected", + "hsfs.client.online_store_rest_client.OnlineStoreRestClientSingleton.is_connected", ) # Act From 59d705e2530e6eb6e2be780bc43e8dbd703dcf18 Mon Sep 17 00:00:00 2001 From: kenneth Date: Fri, 19 Jul 2024 10:27:07 +0200 Subject: [PATCH 2/5] Revert "[FSTORE-1439][APPEND] Apply ruff formatting and fixes to the merged repo (#232)" This reverts commit a612df1594f7a04f7495cbeebcc7ee3ebe851918. --- python/hopsworks/client/__init__.py | 1 - python/hopsworks/client/base.py | 5 +- python/hopsworks/client/hopsworks.py | 4 +- python/hopsworks/connection.py | 11 +-- python/hopsworks/core/dataset_api.py | 13 +-- python/hopsworks/core/environment_api.py | 23 ++--- python/hopsworks/core/flink_cluster_api.py | 10 +- python/hopsworks/core/git_api.py | 18 ++-- python/hopsworks/core/git_provider_api.py | 6 +- python/hopsworks/core/job_api.py | 2 +- python/hopsworks/core/kafka_api.py | 7 +- python/hopsworks/core/secret_api.py | 18 ++-- python/hopsworks/engine/execution_engine.py | 4 +- python/hopsworks/engine/git_engine.py | 7 +- python/hopsworks/environment.py | 20 ++-- python/hopsworks/flink_cluster.py | 6 +- python/hopsworks/git_commit.py | 5 +- python/hopsworks/git_file_status.py | 2 +- python/hopsworks/git_provider.py | 5 +- python/hopsworks/git_remote.py | 5 +- python/hopsworks/git_repo.py | 7 +- python/hopsworks/job_schedule.py | 4 +- python/hopsworks/kafka_schema.py | 5 +- python/hopsworks/kafka_topic.py | 5 +- python/hopsworks/library.py | 1 + python/hopsworks/project.py | 10 +- python/hopsworks/user.py | 2 +- python/hopsworks/util.py | 4 +- python/hsfs/client/base.py | 3 +- python/hsfs/core/explicit_provenance.py | 4 +- python/hsfs/core/feature_logging.py | 32 +++--- python/hsfs/core/feature_view_api.py | 16 ++- python/hsfs/core/feature_view_engine.py | 98 ++++++++----------- python/hsfs/core/opensearch.py | 18 ++-- python/hsfs/core/storage_connector_api.py | 5 +- python/hsfs/core/vector_db_client.py | 61 ++++-------- python/hsfs/core/vector_server.py | 22 ++--- python/hsfs/engine/spark.py | 4 +- python/hsfs/feature.py | 25 +++-- python/hsfs/storage_connector.py | 4 +- python/hsfs/usage.py | 22 ++--- python/hsml/client/istio/utils/numpy_codec.py | 2 +- python/hsml/connection.py | 3 - python/hsml/model_serving.py | 7 +- python/hsml/util.py | 2 - python/tests/core/test_feature_group_api.py | 4 +- python/tests/core/test_opensearch.py | 1 + python/tests/core/test_vector_db_client.py | 4 +- python/tests/test_util.py | 4 +- 49 files changed, 229 insertions(+), 322 deletions(-) diff --git a/python/hopsworks/client/__init__.py b/python/hopsworks/client/__init__.py index 004e49c8b..1e4a7ea8f 100644 --- a/python/hopsworks/client/__init__.py +++ b/python/hopsworks/client/__init__.py @@ -16,7 +16,6 @@ from hopsworks.client import external, hopsworks - _client = None _python_version = None diff --git a/python/hopsworks/client/base.py b/python/hopsworks/client/base.py index 852259639..1d343aaa6 100644 --- a/python/hopsworks/client/base.py +++ b/python/hopsworks/client/base.py @@ -15,12 +15,13 @@ # import os +import furl from abc import ABC, abstractmethod -import furl import requests import urllib3 -from hopsworks.client import auth, exceptions + +from hopsworks.client import exceptions, auth from hopsworks.decorators import connected diff --git a/python/hopsworks/client/hopsworks.py b/python/hopsworks/client/hopsworks.py index 514e3fe48..884dc4000 100644 --- a/python/hopsworks/client/hopsworks.py +++ b/python/hopsworks/client/hopsworks.py @@ -20,8 +20,8 @@ from pathlib import Path import requests -from hopsworks.client import auth, base +from hopsworks.client import base, auth try: import jks @@ -134,7 +134,7 @@ def _convert_jks_to_pem(self, jks_path, keystore_pw): ca_certs = "" # Convert CA Certificates into PEM format and append to string - for _alias, c in ks.certs.items(): + for alias, c in ks.certs.items(): ca_certs = ca_certs + self._bytes_to_pem_str(c.cert, "CERTIFICATE") return ca_certs diff --git a/python/hopsworks/connection.py b/python/hopsworks/connection.py index 61f2e3d6a..1fe984030 100644 --- a/python/hopsworks/connection.py +++ b/python/hopsworks/connection.py @@ -16,14 +16,14 @@ import os import re -import sys import warnings +import sys -from hopsworks import client, version -from hopsworks.core import project_api, secret_api, variable_api -from hopsworks.decorators import connected, not_connected from requests.exceptions import ConnectionError +from hopsworks.decorators import connected, not_connected +from hopsworks import client, version +from hopsworks.core import project_api, secret_api, variable_api HOPSWORKS_PORT_DEFAULT = 443 HOSTNAME_VERIFICATION_DEFAULT = True @@ -210,8 +210,7 @@ def _check_compatibility(self): warnings.warn( "The installed hopsworks client version {0} may not be compatible with the connected Hopsworks backend version {1}. \nTo ensure compatibility please install the latest bug fix release matching the minor version of your backend ({2}) by running 'pip install hopsworks=={2}.*'".format( client_version, backend_version, major_minor_backend - ), - stacklevel=1, + ) ) sys.stderr.flush() diff --git a/python/hopsworks/core/dataset_api.py b/python/hopsworks/core/dataset_api.py index a0e84d235..285083cac 100644 --- a/python/hopsworks/core/dataset_api.py +++ b/python/hopsworks/core/dataset_api.py @@ -14,17 +14,18 @@ # limitations under the License. # -import copy -import logging import math import os -import shutil import time -from concurrent.futures import ThreadPoolExecutor, wait +from tqdm.auto import tqdm +import shutil +import logging +import copy from hopsworks import client -from hopsworks.client.exceptions import DatasetException, RestAPIError -from tqdm.auto import tqdm +from hopsworks.client.exceptions import RestAPIError +from hopsworks.client.exceptions import DatasetException +from concurrent.futures import ThreadPoolExecutor, wait class Chunk: diff --git a/python/hopsworks/core/environment_api.py b/python/hopsworks/core/environment_api.py index 6a9ccf2ea..18c0c55d1 100644 --- a/python/hopsworks/core/environment_api.py +++ b/python/hopsworks/core/environment_api.py @@ -32,13 +32,7 @@ def __init__( self._environment_engine = environment_engine.EnvironmentEngine(project_id) - def create_environment( - self, - name: str, - description: Optional[str] = None, - base_environment_name: Optional[str] = "python-feature-pipeline", - await_creation: Optional[bool] = True, - ) -> environment.Environment: + def create_environment(self, name: str, description: Optional[str] = None, base_environment_name: Optional[str] = "python-feature-pipeline", await_creation: Optional[bool] = True) -> environment.Environment: """Create Python environment for the project ```python @@ -72,14 +66,13 @@ def create_environment( name, ] headers = {"content-type": "application/json"} - data = { - "name": name, - "baseImage": {"name": base_environment_name, "description": description}, - } + data = {"name": name, + "baseImage": { + "name": base_environment_name, + "description": description + }} env = environment.Environment.from_response_json( - _client._send_request( - "POST", path_params, headers=headers, data=json.dumps(data) - ), + _client._send_request("POST", path_params, headers=headers, data=json.dumps(data)), self._project_id, self._project_name, ) @@ -155,4 +148,4 @@ def _delete(self, name): name, ] headers = {"content-type": "application/json"} - (_client._send_request("DELETE", path_params, headers=headers),) + _client._send_request("DELETE", path_params, headers=headers), diff --git a/python/hopsworks/core/flink_cluster_api.py b/python/hopsworks/core/flink_cluster_api.py index 53b13b3ed..825f7d42d 100644 --- a/python/hopsworks/core/flink_cluster_api.py +++ b/python/hopsworks/core/flink_cluster_api.py @@ -14,11 +14,9 @@ # limitations under the License. # -import json import os - -from hopsworks import client, flink_cluster, job, util -from hopsworks.client.exceptions import RestAPIError +import json +from hopsworks import client, flink_cluster, util, job from hopsworks.core import job_api @@ -71,9 +69,7 @@ def setup_cluster(self, name: str, config=None): # If the job already exists, retrieve it _flink_cluster = self.get_cluster(name) if _flink_cluster._job.job_type != "FLINK": - raise RestAPIError( - "This is not a Flink cluster. Please use different name to create new Flink cluster" - ) + raise "This is not a Flink cluster. Please use different name to create new Flink cluster" return _flink_cluster else: # If the job doesn't exists, create a new job diff --git a/python/hopsworks/core/git_api.py b/python/hopsworks/core/git_api.py index 581b18243..6eaa8afd9 100644 --- a/python/hopsworks/core/git_api.py +++ b/python/hopsworks/core/git_api.py @@ -14,23 +14,23 @@ # limitations under the License. # -import json -import logging -from typing import List, Union - from hopsworks import ( client, - git_commit, - git_file_status, - git_op_execution, git_repo, + git_op_execution, util, + git_commit, + git_file_status, ) from hopsworks.client.exceptions import GitException -from hopsworks.core import git_provider_api from hopsworks.engine import git_engine +from hopsworks.core import git_provider_api +from typing import List, Union from hopsworks.git_file_status import GitFileStatus +import json +import logging + class GitApi: def __init__( @@ -347,7 +347,7 @@ def _status(self, repo_id): status_dict = json.loads(git_op.command_result_message) file_status = None - if status_dict is not None and isinstance(status_dict["status"], list): + if status_dict is not None and type(status_dict["status"]) is list: file_status = [] for status in status_dict["status"]: file_status.append( diff --git a/python/hopsworks/core/git_provider_api.py b/python/hopsworks/core/git_provider_api.py index 139109928..b06c95fb4 100644 --- a/python/hopsworks/core/git_provider_api.py +++ b/python/hopsworks/core/git_provider_api.py @@ -14,11 +14,11 @@ # limitations under the License. # -import json - from hopsworks import client, git_provider -from hopsworks.client.exceptions import GitException from hopsworks.engine import git_engine +from hopsworks.client.exceptions import GitException + +import json class GitProviderApi: diff --git a/python/hopsworks/core/job_api.py b/python/hopsworks/core/job_api.py index 4a93f1bfd..e40afe8c0 100644 --- a/python/hopsworks/core/job_api.py +++ b/python/hopsworks/core/job_api.py @@ -16,7 +16,7 @@ import json -from hopsworks import client, job, job_schedule, util +from hopsworks import client, job, util, job_schedule from hopsworks.client.exceptions import RestAPIError diff --git a/python/hopsworks/core/kafka_api.py b/python/hopsworks/core/kafka_api.py index b597a89b9..f1ae2ece9 100644 --- a/python/hopsworks/core/kafka_api.py +++ b/python/hopsworks/core/kafka_api.py @@ -14,11 +14,10 @@ # limitations under the License. # +from hopsworks import client, kafka_topic, kafka_schema, constants +from hopsworks.client.exceptions import KafkaException import json import socket - -from hopsworks import client, constants, kafka_schema, kafka_topic -from hopsworks.client.exceptions import KafkaException from hopsworks.client.external import Client @@ -367,7 +366,7 @@ def get_default_config(self): constants.KAFKA_SSL_CONFIG.SSL_ENDPOINT_IDENTIFICATION_ALGORITHM_CONFIG: "none", } _client = client.get_instance() - if type(_client) is Client: + if type(_client) == Client: config[constants.KAFKA_PRODUCER_CONFIG.BOOTSTRAP_SERVERS_CONFIG] = ",".join( [ endpoint.replace("EXTERNAL://", "") diff --git a/python/hopsworks/core/secret_api.py b/python/hopsworks/core/secret_api.py index bf47b6ad8..169ac6ff1 100644 --- a/python/hopsworks/core/secret_api.py +++ b/python/hopsworks/core/secret_api.py @@ -72,9 +72,7 @@ def get_secret(self, name: str, owner: str = None) -> secret.Secret: "shared", ] - return secret.Secret.from_response_json( - _client._send_request("GET", path_params, query_params=query_params) - )[0] + return secret.Secret.from_response_json(_client._send_request("GET", path_params, query_params=query_params))[0] def get(self, name: str, owner: str = None) -> str: """Get the secret's value. @@ -92,20 +90,16 @@ def get(self, name: str, owner: str = None) -> str: return self.get_secret(name=name, owner=owner).value except RestAPIError as e: if ( - e.response.json().get("errorCode", "") == 160048 - and e.response.status_code == 404 - and util.is_interactive() + e.response.json().get("errorCode", "") == 160048 + and e.response.status_code == 404 + and util.is_interactive() ): - secret_input = getpass.getpass( - prompt="\nCould not find secret, enter value here to create it: " - ) + secret_input = getpass.getpass(prompt="\nCould not find secret, enter value here to create it: ") return self.create_secret(name, secret_input).value else: raise e - def create_secret( - self, name: str, value: str, project: str = None - ) -> secret.Secret: + def create_secret(self, name: str, value: str, project: str = None) -> secret.Secret: """Create a new secret. ```python diff --git a/python/hopsworks/engine/execution_engine.py b/python/hopsworks/engine/execution_engine.py index 5ff14cee7..7a7af92ff 100644 --- a/python/hopsworks/engine/execution_engine.py +++ b/python/hopsworks/engine/execution_engine.py @@ -14,13 +14,13 @@ # limitations under the License. # -import logging +from hopsworks.core import dataset_api, execution_api import os +import logging import time import uuid from hopsworks.client.exceptions import JobExecutionException, RestAPIError -from hopsworks.core import dataset_api, execution_api class ExecutionEngine: diff --git a/python/hopsworks/engine/git_engine.py b/python/hopsworks/engine/git_engine.py index 3fb506e91..f0aa74d3c 100644 --- a/python/hopsworks/engine/git_engine.py +++ b/python/hopsworks/engine/git_engine.py @@ -14,11 +14,10 @@ # limitations under the License. # -import logging -import time - -from hopsworks.client.exceptions import GitException from hopsworks.core import git_op_execution_api +from hopsworks.client.exceptions import GitException +import time +import logging class GitEngine: diff --git a/python/hopsworks/environment.py b/python/hopsworks/environment.py index f286bdf8c..3d087cad0 100644 --- a/python/hopsworks/environment.py +++ b/python/hopsworks/environment.py @@ -133,18 +133,16 @@ def install_wheel(self, path: str, await_installation: Optional[bool] = True): "packageSource": "WHEEL", } - library_rest = self._library_api._install(library_name, self.name, library_spec) + library_rest = self._library_api._install( + library_name, self.name, library_spec + ) if await_installation: - return self._environment_engine.await_library_command( - self.name, library_name - ) + return self._environment_engine.await_library_command(self.name, library_name) return library_rest - def install_requirements( - self, path: str, await_installation: Optional[bool] = True - ): + def install_requirements(self, path: str, await_installation: Optional[bool] = True): """Install libraries specified in a requirements.txt file ```python @@ -186,12 +184,12 @@ def install_requirements( "packageSource": "REQUIREMENTS_TXT", } - library_rest = self._library_api._install(library_name, self.name, library_spec) + library_rest = self._library_api._install( + library_name, self.name, library_spec + ) if await_installation: - return self._environment_engine.await_library_command( - self.name, library_name - ) + return self._environment_engine.await_library_command(self.name, library_name) return library_rest diff --git a/python/hopsworks/flink_cluster.py b/python/hopsworks/flink_cluster.py index 443ead2f8..5f2936aad 100644 --- a/python/hopsworks/flink_cluster.py +++ b/python/hopsworks/flink_cluster.py @@ -15,10 +15,10 @@ # import time - -from hopsworks import util -from hopsworks.core import execution_api, flink_cluster_api from hopsworks.engine import execution_engine +from hopsworks.core import execution_api +from hopsworks.core import flink_cluster_api +from hopsworks import util class FlinkCluster: diff --git a/python/hopsworks/git_commit.py b/python/hopsworks/git_commit.py index 53524b850..9018162a4 100644 --- a/python/hopsworks/git_commit.py +++ b/python/hopsworks/git_commit.py @@ -14,10 +14,9 @@ # limitations under the License. # -import json - -import humps from hopsworks import util +import humps +import json class GitCommit: diff --git a/python/hopsworks/git_file_status.py b/python/hopsworks/git_file_status.py index 5001974cd..016346623 100644 --- a/python/hopsworks/git_file_status.py +++ b/python/hopsworks/git_file_status.py @@ -14,9 +14,9 @@ # limitations under the License. # +import humps import json -import humps from hopsworks import util diff --git a/python/hopsworks/git_provider.py b/python/hopsworks/git_provider.py index 4a8649456..f804fb28c 100644 --- a/python/hopsworks/git_provider.py +++ b/python/hopsworks/git_provider.py @@ -14,11 +14,10 @@ # limitations under the License. # -import json - import humps -from hopsworks import util +import json from hopsworks.core import git_provider_api +from hopsworks import util class GitProvider: diff --git a/python/hopsworks/git_remote.py b/python/hopsworks/git_remote.py index 89712346b..1733bbac6 100644 --- a/python/hopsworks/git_remote.py +++ b/python/hopsworks/git_remote.py @@ -14,11 +14,10 @@ # limitations under the License. # -import json - import humps -from hopsworks import util +import json from hopsworks.core import git_remote_api +from hopsworks import util class GitRemote: diff --git a/python/hopsworks/git_repo.py b/python/hopsworks/git_repo.py index a651d0e8f..bbdceb36f 100644 --- a/python/hopsworks/git_repo.py +++ b/python/hopsworks/git_repo.py @@ -14,12 +14,11 @@ # limitations under the License. # +import humps import json +from hopsworks import user, git_commit, util +from hopsworks.core import git_api, git_remote_api, dataset_api from typing import List, Union - -import humps -from hopsworks import git_commit, user, util -from hopsworks.core import dataset_api, git_api, git_remote_api from hopsworks.git_file_status import GitFileStatus diff --git a/python/hopsworks/job_schedule.py b/python/hopsworks/job_schedule.py index 301b04122..48e022572 100644 --- a/python/hopsworks/job_schedule.py +++ b/python/hopsworks/job_schedule.py @@ -14,10 +14,10 @@ # limitations under the License. # +import humps import json from datetime import datetime, timezone -import humps from hopsworks import util @@ -30,7 +30,7 @@ def __init__( next_execution_date_time=None, id=None, end_date_time=None, - **kwargs, + **kwargs ): self._id = id self._start_date_time = ( diff --git a/python/hopsworks/kafka_schema.py b/python/hopsworks/kafka_schema.py index c57831809..539db920a 100644 --- a/python/hopsworks/kafka_schema.py +++ b/python/hopsworks/kafka_schema.py @@ -14,11 +14,10 @@ # limitations under the License. # -import json - import humps -from hopsworks import util +import json from hopsworks.core import kafka_api +from hopsworks import util class KafkaSchema: diff --git a/python/hopsworks/kafka_topic.py b/python/hopsworks/kafka_topic.py index fc5a8a71c..0ad0fbe2d 100644 --- a/python/hopsworks/kafka_topic.py +++ b/python/hopsworks/kafka_topic.py @@ -14,11 +14,10 @@ # limitations under the License. # -import json - import humps -from hopsworks import util +import json from hopsworks.core import kafka_api +from hopsworks import util class KafkaTopic: diff --git a/python/hopsworks/library.py b/python/hopsworks/library.py index b0891f298..ac73f261a 100644 --- a/python/hopsworks/library.py +++ b/python/hopsworks/library.py @@ -15,6 +15,7 @@ # import humps + from hopsworks import command diff --git a/python/hopsworks/project.py b/python/hopsworks/project.py index d975eb987..79ccff369 100644 --- a/python/hopsworks/project.py +++ b/python/hopsworks/project.py @@ -133,7 +133,7 @@ def get_feature_store( from hsfs import connection _client = client.get_instance() - if type(_client) is Client: # If external client + if type(_client) == Client: # If external client if _client._host == constants.HOSTS.APP_HOST and engine is None: engine = "python" return connection( @@ -168,7 +168,7 @@ def get_model_registry(self): from hsml import connection _client = client.get_instance() - if type(_client) is Client: # If external client + if type(_client) == Client: # If external client return connection( host=_client._host, port=_client._port, @@ -198,7 +198,7 @@ def get_model_serving(self): from hsml import connection _client = client.get_instance() - if type(_client) is Client: # If external client + if type(_client) == Client: # If external client return connection( host=_client._host, port=_client._port, @@ -215,7 +215,7 @@ def get_kafka_api(self): `KafkaApi`: The Kafka Api handle """ _client = client.get_instance() - if type(_client) is Client: + if type(_client) == Client: _client.download_certs(self.name) return self._kafka_api @@ -226,7 +226,7 @@ def get_opensearch_api(self): `OpenSearchApi`: The OpenSearch Api handle """ _client = client.get_instance() - if type(_client) is Client: + if type(_client) == Client: _client.download_certs(self.name) return self._opensearch_api diff --git a/python/hopsworks/user.py b/python/hopsworks/user.py index 51a862fa3..3b08cb277 100644 --- a/python/hopsworks/user.py +++ b/python/hopsworks/user.py @@ -15,8 +15,8 @@ # import json - import humps + from hopsworks import util diff --git a/python/hopsworks/util.py b/python/hopsworks/util.py index b5f46f29b..35785783f 100644 --- a/python/hopsworks/util.py +++ b/python/hopsworks/util.py @@ -81,8 +81,6 @@ def get_hostname_replaced_url(sub_path: str): url_parsed = client.get_instance().replace_public_host(urlparse(href)) return url_parsed.geturl() - def is_interactive(): import __main__ as main - - return not hasattr(main, "__file__") + return not hasattr(main, '__file__') diff --git a/python/hsfs/client/base.py b/python/hsfs/client/base.py index eeb6eb369..25850833e 100644 --- a/python/hsfs/client/base.py +++ b/python/hsfs/client/base.py @@ -19,6 +19,7 @@ import os import textwrap import time +from abc import ABC from pathlib import Path import furl @@ -38,7 +39,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) -class Client: +class Client(ABC): TOKEN_FILE = "token.jwt" TOKEN_EXPIRED_RETRY_INTERVAL = 0.6 TOKEN_EXPIRED_MAX_RETRIES = 10 diff --git a/python/hsfs/core/explicit_provenance.py b/python/hsfs/core/explicit_provenance.py index 450a00310..2ce4f8c80 100644 --- a/python/hsfs/core/explicit_provenance.py +++ b/python/hsfs/core/explicit_provenance.py @@ -415,7 +415,9 @@ def default(self, obj): } elif isinstance( obj, - (storage_connector.StorageConnector), + ( + storage_connector.StorageConnector + ), ): return { "name": obj.name, diff --git a/python/hsfs/core/feature_logging.py b/python/hsfs/core/feature_logging.py index bdf68d2ca..b29a7317d 100644 --- a/python/hsfs/core/feature_logging.py +++ b/python/hsfs/core/feature_logging.py @@ -6,32 +6,25 @@ class FeatureLogging: - def __init__( - self, - id: int, - transformed_features: "feature_group.FeatureGroup", - untransformed_features: "feature_group.FeatureGroup", - ): + + def __init__(self, id: int, + transformed_features: "feature_group.FeatureGroup", + untransformed_features: "feature_group.FeatureGroup"): self._id = id self._transformed_features = transformed_features self._untransformed_features = untransformed_features @classmethod - def from_response_json(cls, json_dict: Dict[str, Any]) -> "FeatureLogging": + def from_response_json(cls, json_dict: Dict[str, Any]) -> 'FeatureLogging': from hsfs.feature_group import FeatureGroup # avoid circular import - json_decamelized = humps.decamelize(json_dict) - transformed_features = json_decamelized.get("transformed_log") - untransformed_features = json_decamelized.get("untransformed_log") + transformed_features = json_decamelized.get('transformed_log') + untransformed_features = json_decamelized.get('untransformed_log') if transformed_features: transformed_features = FeatureGroup.from_response_json(transformed_features) if untransformed_features: - untransformed_features = FeatureGroup.from_response_json( - untransformed_features - ) - return cls( - json_decamelized.get("id"), transformed_features, untransformed_features - ) + untransformed_features = FeatureGroup.from_response_json(untransformed_features) + return cls(json_decamelized.get('id'), transformed_features, untransformed_features) @property def transformed_features(self) -> "feature_group.FeatureGroup": @@ -47,9 +40,9 @@ def id(self) -> str: def to_dict(self): return { - "id": self._id, - "transformed_log": self._transformed_features, - "untransformed_log": self._untransformed_features, + 'id': self._id, + 'transformed_log': self._transformed_features, + 'untransformed_log': self._untransformed_features, } def json(self) -> Dict[str, Any]: @@ -57,3 +50,4 @@ def json(self) -> Dict[str, Any]: def __repr__(self): return self.json() + diff --git a/python/hsfs/core/feature_view_api.py b/python/hsfs/core/feature_view_api.py index ac6a8ef84..cf67b0216 100644 --- a/python/hsfs/core/feature_view_api.py +++ b/python/hsfs/core/feature_view_api.py @@ -46,6 +46,7 @@ class FeatureViewApi: _TRANSFORMED_lOG = "transformed" _UNTRANSFORMED_LOG = "untransformed" + def __init__(self, feature_store_id: int) -> None: self._feature_store_id = feature_store_id self._client = client.get_instance() @@ -406,8 +407,7 @@ def get_models_provenance( def enable_feature_logging( self, feature_view_name: str, - feature_view_version: int, - ): + feature_view_version: int,): _client = client.get_instance() path_params = self._base_path + [ feature_view_name, @@ -420,8 +420,7 @@ def enable_feature_logging( def pause_feature_logging( self, feature_view_name: str, - feature_view_version: int, - ): + feature_view_version: int,): _client = client.get_instance() path_params = self._base_path + [ feature_view_name, @@ -435,8 +434,7 @@ def pause_feature_logging( def resume_feature_logging( self, feature_view_name: str, - feature_view_version: int, - ): + feature_view_version: int,): _client = client.get_instance() path_params = self._base_path + [ feature_view_name, @@ -450,8 +448,7 @@ def resume_feature_logging( def materialize_feature_logging( self, feature_view_name: str, - feature_view_version: int, - ): + feature_view_version: int,): _client = client.get_instance() path_params = self._base_path + [ feature_view_name, @@ -472,8 +469,7 @@ def materialize_feature_logging( def get_feature_logging( self, feature_view_name: str, - feature_view_version: int, - ): + feature_view_version: int,): _client = client.get_instance() path_params = self._base_path + [ feature_view_name, diff --git a/python/hsfs/core/feature_view_engine.py b/python/hsfs/core/feature_view_engine.py index 4fdc7fdbf..a29acf89f 100644 --- a/python/hsfs/core/feature_view_engine.py +++ b/python/hsfs/core/feature_view_engine.py @@ -822,8 +822,8 @@ def get_batch_data( def transform_batch_data(self, features, transformation_functions): return engine.get_instance()._apply_transformation_function( - transformation_functions, dataset=features, inplace=False - ) + transformation_functions, dataset=features, inplace=False + ) def add_tag( self, feature_view_obj, name: str, value, training_dataset_version=None @@ -996,16 +996,7 @@ def _get_logging_fg(self, fv, transformed): else: return feature_logging.untransformed_features - def log_features( - self, - fv, - features, - prediction=None, - transformed=False, - write_options=None, - training_dataset_version=None, - hsml_model=None, - ): + def log_features(self, fv, features, prediction=None, transformed=False, write_options=None, training_dataset_version=None, hsml_model=None): default_write_options = { "start_offline_materialization": False, } @@ -1026,41 +1017,29 @@ def log_features( ) return fg.insert(df, write_options=default_write_options) - def read_feature_logs( - self, - fv, - start_time: Optional[Union[str, int, datetime, datetime.date]] = None, - end_time: Optional[Union[str, int, datetime, datetime.date]] = None, - filter: Optional[Union[Filter, Logic]] = None, - transformed: Optional[bool] = False, - training_dataset_version=None, - hsml_model=None, - ): + def read_feature_logs(self, fv, + start_time: Optional[ + Union[str, int, datetime, datetime.date]] = None, + end_time: Optional[ + Union[str, int, datetime, datetime.date]] = None, + filter: Optional[Union[Filter, Logic]]=None, + transformed: Optional[bool]=False, + training_dataset_version=None, + hsml_model=None, + ): fg = self._get_logging_fg(fv, transformed) fv_feat_name_map = self._get_fv_feature_name_map(fv) query = fg.select_all() if start_time: - query = query.filter( - fg.get_feature(FeatureViewEngine._LOG_TIME) >= start_time - ) + query = query.filter(fg.get_feature(FeatureViewEngine._LOG_TIME) >= start_time) if end_time: - query = query.filter( - fg.get_feature(FeatureViewEngine._LOG_TIME) <= end_time - ) + query = query.filter(fg.get_feature(FeatureViewEngine._LOG_TIME) <= end_time) if training_dataset_version: - query = query.filter( - fg.get_feature(FeatureViewEngine._LOG_TD_VERSION) - == training_dataset_version - ) + query = query.filter(fg.get_feature(FeatureViewEngine._LOG_TD_VERSION) == training_dataset_version) if hsml_model: - query = query.filter( - fg.get_feature(FeatureViewEngine._HSML_MODEL) - == self.get_hsml_model_value(hsml_model) - ) + query = query.filter(fg.get_feature(FeatureViewEngine._HSML_MODEL) == self.get_hsml_model_value(hsml_model)) if filter: - query = query.filter( - self._convert_to_log_fg_filter(fg, fv, filter, fv_feat_name_map) - ) + query = query.filter(self._convert_to_log_fg_filter(fg, fv, filter, fv_feat_name_map)) df = query.read() df = df.drop(["log_id", FeatureViewEngine._LOG_TIME], axis=1) return df @@ -1083,12 +1062,9 @@ def _convert_to_log_fg_filter(self, fg, fv, filter, fv_feat_name_map): ) elif isinstance(filter, Filter): fv_feature_name = fv_feat_name_map.get( - f"{filter.feature.feature_group_id}_{filter.feature.name}" - ) + f"{filter.feature.feature_group_id}_{filter.feature.name}") if fv_feature_name is None: - raise FeatureStoreException( - "Filter feature {filter.feature.name} does not exist in feature view feature." - ) + raise FeatureStoreException("Filter feature {filter.feature.name} does not exist in feature view feature.") return Filter( fg.get_feature(filter.feature.name), filter.condition, @@ -1100,30 +1076,32 @@ def _convert_to_log_fg_filter(self, fg, fv, filter, fv_feat_name_map): def _get_fv_feature_name_map(self, fv) -> Dict[str, str]: result_dict = {} for td_feature in fv.features: - fg_feature_key = ( - f"{td_feature.feature_group.id}_{td_feature.feature_group_feature_name}" - ) + fg_feature_key = f"{td_feature.feature_group.id}_{td_feature.feature_group_feature_name}" result_dict[fg_feature_key] = td_feature.name return result_dict - def get_log_timeline( - self, - fv, - wallclock_time: Optional[Union[str, int, datetime, datetime.date]] = None, - limit: Optional[int] = None, - transformed: Optional[bool] = False, - ) -> Dict[str, Dict[str, str]]: + def get_log_timeline(self, fv, + wallclock_time: Optional[ + Union[str, int, datetime, datetime.date]] = None, + limit: Optional[int] = None, + transformed: Optional[bool]=False, + ) -> Dict[str, Dict[str, str]]: fg = self._get_logging_fg(fv, transformed) return fg.commit_details(wallclock_time=wallclock_time, limit=limit) def pause_logging(self, fv): - self._feature_view_api.pause_feature_logging(fv.name, fv.version) - + self._feature_view_api.pause_feature_logging( + fv.name, fv.version + ) def resume_logging(self, fv): - self._feature_view_api.resume_feature_logging(fv.name, fv.version) + self._feature_view_api.resume_feature_logging( + fv.name, fv.version + ) def materialize_feature_logs(self, fv, wait): - jobs = self._feature_view_api.materialize_feature_logging(fv.name, fv.version) + jobs = self._feature_view_api.materialize_feature_logging( + fv.name, fv.version + ) if wait: for job in jobs: try: @@ -1133,4 +1111,6 @@ def materialize_feature_logs(self, fv, wait): return jobs def delete_feature_logs(self, fv, transformed): - self._feature_view_api.delete_feature_logs(fv.name, fv.version, transformed) + self._feature_view_api.delete_feature_logs( + fv.name, fv.version, transformed + ) diff --git a/python/hsfs/core/opensearch.py b/python/hsfs/core/opensearch.py index 6e1ca5091..3865c7ab0 100644 --- a/python/hsfs/core/opensearch.py +++ b/python/hsfs/core/opensearch.py @@ -54,8 +54,7 @@ def error_handler_wrapper(*args, **kw): caused_by = e.info.get("error") and e.info["error"].get("caused_by") if caused_by and caused_by["type"] == "illegal_argument_exception": raise OpenSearchClientSingleton()._create_vector_database_exception( - caused_by["reason"] - ) from e + caused_by["reason"]) from e raise VectorDatabaseException( VectorDatabaseException.OTHERS, f"Error in Opensearch request: {e}", @@ -101,19 +100,16 @@ def get_options(cls, options: dict): attribute values of the OpensearchRequestOption class, and values are obtained either from the provided options or default values if not available. """ - default_option = ( - cls.DEFAULT_OPTION_MAP - if cls.get_version() < (2, 3) - else cls.DEFAULT_OPTION_MAP_V2_3 - ) + default_option = (cls.DEFAULT_OPTION_MAP + if cls.get_version() < (2, 3) + else cls.DEFAULT_OPTION_MAP_V2_3) if options: # make lower case to avoid issues with cases options = {k.lower(): v for k, v in options.items()} new_options = {} for option, value in default_option.items(): if option in options: - if ( - option == "timeout" + if (option == "timeout" and cls.get_version() < (2, 3) and isinstance(options[option], int) ): @@ -165,9 +161,7 @@ def _refresh_opensearch_connection(self): ) @_handle_opensearch_exception def search(self, index=None, body=None, options=None): - return self._opensearch_client.search( - body=body, index=index, params=OpensearchRequestOption.get_options(options) - ) + return self._opensearch_client.search(body=body, index=index, params=OpensearchRequestOption.get_options(options)) @retry( wait_exponential_multiplier=1000, diff --git a/python/hsfs/core/storage_connector_api.py b/python/hsfs/core/storage_connector_api.py index 01d1898de..d30201a11 100644 --- a/python/hsfs/core/storage_connector_api.py +++ b/python/hsfs/core/storage_connector_api.py @@ -101,7 +101,9 @@ def get_kafka_connector( _client._send_request("GET", path_params, query_params=query_params) ) - def get_feature_groups_provenance(self, storage_connector_instance): + def get_feature_groups_provenance( + self, storage_connector_instance + ): """Get the generated feature groups using this storage connector, based on explicit provenance. These feature groups can be accessible or inaccessible. Explicit provenance does not track deleted generated feature group links, so deleted @@ -133,7 +135,6 @@ def get_feature_groups_provenance(self, storage_connector_instance): } links_json = _client._send_request("GET", path_params, query_params) from hsfs.core import explicit_provenance - return explicit_provenance.Links.from_response_json( links_json, explicit_provenance.Links.Direction.DOWNSTREAM, diff --git a/python/hsfs/core/vector_db_client.py b/python/hsfs/core/vector_db_client.py index 71060c983..b9fdc86ab 100644 --- a/python/hsfs/core/vector_db_client.py +++ b/python/hsfs/core/vector_db_client.py @@ -96,9 +96,7 @@ def init(self): ) self._embedding_fg_by_join_index[i] = join_fg for embedding_feature in join_fg.embedding_index.get_embeddings(): - self._td_embedding_feature_names.add( - (join.prefix or "") + embedding_feature.name - ) + self._td_embedding_feature_names.add((join.prefix or "") + embedding_feature.name) vdb_col_td_col_map = {} for feat in join_fg.features: vdb_col_td_col_map[ @@ -193,13 +191,10 @@ def find_neighbors( return [ ( 1 / item["_score"] - 1, - self._convert_to_pandas_type( - embedding_feature.feature_group.features, - self._rewrite_result_key( - item["_source"], - self._fg_vdb_col_td_col_map[embedding_feature.feature_group.id], - ), - ), + self._convert_to_pandas_type(embedding_feature.feature_group.features, self._rewrite_result_key( + item["_source"], + self._fg_vdb_col_td_col_map[embedding_feature.feature_group.id], + )), ) for item in results["hits"]["hits"] ] @@ -212,15 +207,11 @@ def _convert_to_pandas_type(self, schema, result): if not feature_value: # Feature value can be null continue elif feature_type == "date": - result[feature_name] = datetime.utcfromtimestamp( - feature_value // 10**3 - ).date() + result[feature_name] = datetime.utcfromtimestamp(feature_value // 10**3).date() elif feature_type == "timestamp": # convert timestamp in ms to datetime in s result[feature_name] = datetime.utcfromtimestamp(feature_value // 10**3) - elif feature_type == "binary" or ( - feature.is_complex() and feature not in self._embedding_features - ): + elif feature_type == "binary" or (feature.is_complex() and feature not in self._embedding_features): result[feature_name] = base64.b64decode(feature_value) return result @@ -346,19 +337,17 @@ def read(self, fg_id, schema, keys=None, pk=None, index_name=None, n=10): if VectorDbClient._index_result_limit_n.get(index_name) is None: try: query["size"] = 2**31 - 1 - self._opensearch_client.search(body=query, index=index_name) + self._opensearch_client.search(body=query, + index=index_name) except VectorDatabaseException as e: if ( - e.reason - == VectorDatabaseException.REQUESTED_NUM_RESULT_TOO_LARGE + e.reason == VectorDatabaseException.REQUESTED_NUM_RESULT_TOO_LARGE and e.info.get( - VectorDatabaseException.REQUESTED_NUM_RESULT_TOO_LARGE_INFO_N - ) + VectorDatabaseException.REQUESTED_NUM_RESULT_TOO_LARGE_INFO_N + ) ): - VectorDbClient._index_result_limit_n[index_name] = ( - e.info.get( - VectorDatabaseException.REQUESTED_NUM_RESULT_TOO_LARGE_INFO_N - ) + VectorDbClient._index_result_limit_n[index_name] = e.info.get( + VectorDatabaseException.REQUESTED_NUM_RESULT_TOO_LARGE_INFO_N ) else: raise e @@ -367,32 +356,24 @@ def read(self, fg_id, schema, keys=None, pk=None, index_name=None, n=10): results = self._opensearch_client.search(body=query, index=index_name) # https://opensearch.org/docs/latest/search-plugins/knn/approximate-knn/#spaces return [ - self._convert_to_pandas_type( - schema, - self._rewrite_result_key( - item["_source"], self._fg_vdb_col_td_col_map[fg_id] - ), - ) + self._convert_to_pandas_type(schema, self._rewrite_result_key( + item["_source"], self._fg_vdb_col_td_col_map[fg_id] + )) for item in results["hits"]["hits"] ] @staticmethod - def read_feature_group( - feature_group: "hsfs.feature_group.FeatureGroup", n: int = None - ) -> list: + def read_feature_group(feature_group: "hsfs.feature_group.FeatureGroup", n: int =None) -> list: if feature_group.embedding_index: vector_db_client = VectorDbClient(feature_group.select_all()) results = vector_db_client.read( feature_group.id, feature_group.features, - pk=feature_group.embedding_index.col_prefix - + feature_group.primary_key[0], + pk=feature_group.embedding_index.col_prefix + feature_group.primary_key[0], index_name=feature_group.embedding_index.index_name, - n=n, + n=n ) - return [ - [result[f.name] for f in feature_group.features] for result in results - ] + return [[result[f.name] for f in feature_group.features] for result in results] else: raise FeatureStoreException("Feature group does not have embedding.") diff --git a/python/hsfs/core/vector_server.py b/python/hsfs/core/vector_server.py index 97d9b83b0..44a522564 100755 --- a/python/hsfs/core/vector_server.py +++ b/python/hsfs/core/vector_server.py @@ -99,7 +99,10 @@ def __init__( self._untransformed_feature_vector_col_name = [ feat.name for feat in features - if not (feat.label or feat.training_helper_column) + if not ( + feat.label + or feat.training_helper_column + ) ] self._inference_helper_col_name = [ feat.name for feat in features if feat.inference_helper_column @@ -448,26 +451,17 @@ def assemble_feature_vector( for fname in self.transformed_feature_vector_col_name ] else: - return [ - result_dict.get(fname, None) - for fname in self._untransformed_feature_vector_col_name - ] + return [result_dict.get(fname, None) for fname in self._untransformed_feature_vector_col_name] def transform_feature_vectors(self, batch_features): - return [ - self.apply_transformation(self.get_untransformed_features_map(features)) + return [self.apply_transformation(self.get_untransformed_features_map(features)) for features in batch_features ] def get_untransformed_features_map(self, features) -> Dict[str, Any]: return dict( - [ - (fname, fvalue) - for fname, fvalue in zip( - self._untransformed_feature_vector_col_name, features - ) - ] - ) + [(fname, fvalue) for fname, fvalue + in zip(self._untransformed_feature_vector_col_name, features)]) def handle_feature_vector_return_type( self, diff --git a/python/hsfs/engine/spark.py b/python/hsfs/engine/spark.py index fa71374d3..8b49897cf 100644 --- a/python/hsfs/engine/spark.py +++ b/python/hsfs/engine/spark.py @@ -1389,9 +1389,9 @@ def convert_spark_type_to_offline_type(spark_type, using_hudi): if not using_hudi: return spark_type.simpleString() - elif type(spark_type) is ByteType: + elif type(spark_type) == ByteType: return "int" - elif type(spark_type) is ShortType: + elif type(spark_type) == ShortType: return "int" elif type(spark_type) in [ BooleanType, diff --git a/python/hsfs/feature.py b/python/hsfs/feature.py index f66fa9807..896980567 100644 --- a/python/hsfs/feature.py +++ b/python/hsfs/feature.py @@ -209,29 +209,36 @@ def feature_group_id(self) -> Optional[int]: def _get_filter_value(self, value: Any) -> Any: if self.type == "timestamp": - return datetime.fromtimestamp( - util.convert_event_time_to_timestamp(value) / 1000 - ).strftime("%Y-%m-%d %H:%M:%S") + return (datetime.fromtimestamp( + util.convert_event_time_to_timestamp(value)/1000) + .strftime("%Y-%m-%d %H:%M:%S") + ) else: return value def __lt__(self, other: Any) -> "filter.Filter": - return filter.Filter(self, filter.Filter.LT, self._get_filter_value(other)) + return filter.Filter(self, filter.Filter.LT, + self._get_filter_value(other)) def __le__(self, other: Any) -> "filter.Filter": - return filter.Filter(self, filter.Filter.LE, self._get_filter_value(other)) + return filter.Filter(self, filter.Filter.LE, + self._get_filter_value(other)) def __eq__(self, other: Any) -> "filter.Filter": - return filter.Filter(self, filter.Filter.EQ, self._get_filter_value(other)) + return filter.Filter(self, filter.Filter.EQ, + self._get_filter_value(other)) def __ne__(self, other: Any) -> "filter.Filter": - return filter.Filter(self, filter.Filter.NE, self._get_filter_value(other)) + return filter.Filter(self, filter.Filter.NE, + self._get_filter_value(other)) def __ge__(self, other: Any) -> "filter.Filter": - return filter.Filter(self, filter.Filter.GE, self._get_filter_value(other)) + return filter.Filter(self, filter.Filter.GE, + self._get_filter_value(other)) def __gt__(self, other: Any) -> "filter.Filter": - return filter.Filter(self, filter.Filter.GT, self._get_filter_value(other)) + return filter.Filter(self, filter.Filter.GT, + self._get_filter_value(other)) def contains(self, other: Union[str, List[Any]]) -> "filter.Filter": """ diff --git a/python/hsfs/storage_connector.py b/python/hsfs/storage_connector.py index 8e0c90b0b..96596a5b0 100644 --- a/python/hsfs/storage_connector.py +++ b/python/hsfs/storage_connector.py @@ -211,9 +211,7 @@ def get_feature_groups(self): feature_groups_provenance = self.get_feature_groups_provenance() if feature_groups_provenance.inaccessible or feature_groups_provenance.deleted: - _logger.info( - "There are deleted or inaccessible feature groups. For more details access `get_feature_groups_provenance`" - ) + _logger.info("There are deleted or inaccessible feature groups. For more details access `get_feature_groups_provenance`") if feature_groups_provenance.accessible: return feature_groups_provenance.accessible diff --git a/python/hsfs/usage.py b/python/hsfs/usage.py index bd724c293..3428de21f 100644 --- a/python/hsfs/usage.py +++ b/python/hsfs/usage.py @@ -85,18 +85,16 @@ def get_timezone(self): return self._timezone def json(self): - return json.dumps( - { - "platform": self.get_platform(), - "hsml_version": self.get_hsml_version(), - "hsfs_version": self.get_hsfs_version(), - "hopsworks_version": self.get_hopsworks_version(), - "user_id": self.get_user_id(), - "backend_version": self.get_backend_version(), - "timezone": str(self.get_timezone()), - "python_version": self.get_python_version(), - } - ) + return json.dumps({ + "platform": self.get_platform(), + "hsml_version": self.get_hsml_version(), + "hsfs_version": self.get_hsfs_version(), + "hopsworks_version": self.get_hopsworks_version(), + "user_id": self.get_user_id(), + "backend_version": self.get_backend_version(), + "timezone": str(self.get_timezone()), + "python_version": self.get_python_version(), + }) class MethodCounter: diff --git a/python/hsml/client/istio/utils/numpy_codec.py b/python/hsml/client/istio/utils/numpy_codec.py index bf22bcf34..3c6ecb606 100644 --- a/python/hsml/client/istio/utils/numpy_codec.py +++ b/python/hsml/client/istio/utils/numpy_codec.py @@ -38,7 +38,7 @@ def to_np_dtype(dtype): def from_np_dtype(np_dtype): - if np_dtype is bool: + if np_dtype == bool: return "BOOL" elif np_dtype == np.int8: return "INT8" diff --git a/python/hsml/connection.py b/python/hsml/connection.py index f4ca72512..899589a4e 100644 --- a/python/hsml/connection.py +++ b/python/hsml/connection.py @@ -97,7 +97,6 @@ def __init__( api_key_value: str = None, ): from hsml.core import model_api, model_registry_api, model_serving_api - self._host = host self._port = port self._project = project @@ -164,7 +163,6 @@ def connect(self): """ from hsml import client from hsml.core import model_api - self._connected = True try: # init client @@ -198,7 +196,6 @@ def close(self): Usage is recommended but optional. """ from hsml import client - client.stop() self._model_api = None self._connected = False diff --git a/python/hsml/model_serving.py b/python/hsml/model_serving.py index d298e669f..21d04b833 100644 --- a/python/hsml/model_serving.py +++ b/python/hsml/model_serving.py @@ -285,12 +285,7 @@ def postprocess(self, outputs): return Transformer(script_file=script_file, resources=resources) - def create_deployment( - self, - predictor: Predictor, - name: Optional[str] = None, - environment: Optional[str] = None, - ): + def create_deployment(self, predictor: Predictor, name: Optional[str] = None, environment: Optional[str] = None): """Create a Deployment metadata object. !!! example diff --git a/python/hsml/util.py b/python/hsml/util.py index 6fffc4033..6ef6d9053 100644 --- a/python/hsml/util.py +++ b/python/hsml/util.py @@ -100,7 +100,6 @@ def set_model_class(model): from hsml.sklearn.model import Model as SkLearnModel from hsml.tensorflow.model import Model as TFModel from hsml.torch.model import Model as TorchModel - if "href" in model: _ = model.pop("href") if "type" in model: # backwards compatibility @@ -242,7 +241,6 @@ def get_predictor_for_model(model, **kwargs): from hsml.tensorflow.predictor import Predictor as TFPredictor from hsml.torch.model import Model as TorchModel from hsml.torch.predictor import Predictor as TorchPredictor - if not isinstance(model, BaseModel): raise ValueError( "model is of type {}, but an instance of {} class is expected".format( diff --git a/python/tests/core/test_feature_group_api.py b/python/tests/core/test_feature_group_api.py index 9366f4401..37459d897 100644 --- a/python/tests/core/test_feature_group_api.py +++ b/python/tests/core/test_feature_group_api.py @@ -54,7 +54,9 @@ def test_get_smart_with_infer_type(self, mocker, backend_fixtures): def test_check_features(self, mocker, backend_fixtures): # Arrange fg_api = feature_group_api.FeatureGroupApi() - json = backend_fixtures["feature_group"]["get_basic_info"]["response"] + json = backend_fixtures["feature_group"]["get_basic_info"][ + "response" + ] fg = fg_mod.FeatureGroup.from_response_json(json) # Act diff --git a/python/tests/core/test_opensearch.py b/python/tests/core/test_opensearch.py index 5a4bcb681..3ae804cdc 100644 --- a/python/tests/core/test_opensearch.py +++ b/python/tests/core/test_opensearch.py @@ -69,6 +69,7 @@ def test_create_vector_database_exception( class TestOpensearchRequestOption: + def test_version_1_no_options(self): OpensearchRequestOption.get_version = lambda: (1, 1) options = OpensearchRequestOption.get_options({}) diff --git a/python/tests/core/test_vector_db_client.py b/python/tests/core/test_vector_db_client.py index a4261a5dd..4f17a1dbe 100644 --- a/python/tests/core/test_vector_db_client.py +++ b/python/tests/core/test_vector_db_client.py @@ -220,9 +220,7 @@ def test_check_filter_when_filter_is_not_logic_or_filter(self): self.target._check_filter("f1 > 20", self.fg2) def test_read_with_keys(self): - actual = self.target.read( - self.fg.id, self.fg.features, keys={"f1": 10, "f2": 20} - ) + actual = self.target.read(self.fg.id, self.fg.features, keys={"f1": 10, "f2": 20}) expected_query = { "query": {"bool": {"must": [{"match": {"f1": 10}}, {"match": {"f2": 20}}]}}, diff --git a/python/tests/test_util.py b/python/tests/test_util.py index 330c76b5c..b39501162 100644 --- a/python/tests/test_util.py +++ b/python/tests/test_util.py @@ -736,9 +736,7 @@ def test_get_dataset_type_HIVEDB_with_dfs(self): assert db_type == "HIVEDB" def test_get_dataset_type_DATASET(self): - db_type = hsfs.util.get_dataset_type( - "/Projects/temp/Resources/kafka__tstore.jks" - ) + db_type = hsfs.util.get_dataset_type("/Projects/temp/Resources/kafka__tstore.jks") assert db_type == "DATASET" def test_get_dataset_type_DATASET_with_dfs(self): From f4cb0c15c0035a9b88c21287772aaad77e9067b2 Mon Sep 17 00:00:00 2001 From: kenneth Date: Fri, 19 Jul 2024 10:27:27 +0200 Subject: [PATCH 3/5] Revert "[FSTORE-1439] Merge hopsworks-api, feature-store-api and machine-learning-api" This reverts commit 67484a9f05aeb80b4541b3da3b5c9fd8ecdbb864. --- .github/workflows/mkdocs-main.yml | 19 +- .github/workflows/mkdocs-release.yml | 22 +- .github/workflows/python-lint.yml | 49 + .gitignore | 11 - CONTRIBUTING.md | 116 +- README.md | 94 +- auto_doc.py | 215 ++++ docs/CONTRIBUTING.md | 215 ++++ .../.github}/pull_request_template.md | 0 .../.github/workflows/java-ut.yml | 16 +- hsfs/.github/workflows/mkdocs-master.yml | 53 + hsfs/.github/workflows/mkdocs-release.yml | 59 + .../.github/workflows/optional-dependency.yml | 29 + .../.github/workflows/python-lint.yml | 72 +- hsfs/.gitignore | 145 +++ hsfs/CONTRIBUTING.md | 220 ++++ Dockerfile => hsfs/Dockerfile | 0 hsfs/Jenkinsfile | 23 + hsfs/LICENSE | 201 ++++ hsfs/README.md | 201 ++++ hsfs/auto_doc.py | 384 ++++++ hsfs/docs/CONTRIBUTING.md | 220 ++++ hsfs/docs/assets/images/favicon.ico | Bin 0 -> 2699 bytes hsfs/docs/assets/images/hops-logo.png | Bin 0 -> 6356 bytes .../docs}/assets/images/hopsworks-logo.png | Bin hsfs/docs/css/custom.css | 114 ++ hsfs/docs/css/dropdown.css | 55 + hsfs/docs/css/marctech.css | 1047 +++++++++++++++++ hsfs/docs/css/version-select.css | 36 + hsfs/docs/index.md | 201 ++++ hsfs/docs/js/dropdown.js | 2 + hsfs/docs/js/inject-api-links.js | 32 + hsfs/docs/js/version-select.js | 64 + hsfs/docs/overrides/main.html | 8 + .../docs}/templates/api/connection_api.md | 0 .../templates/api/embedding_feature_api.md | 0 .../templates/api/embedding_index_api.md | 0 .../docs}/templates/api/expectation_api.md | 0 .../templates/api/expectation_suite_api.md | 0 .../api/external_feature_group_api.md | 0 .../docs}/templates/api/feature_api.md | 0 .../api/feature_descriptive_statistics_api.md | 0 .../docs}/templates/api/feature_group_api.md | 0 .../api/feature_monitoring_config_api.md | 0 .../api/feature_monitoring_result_api.md | 0 .../feature_monitoring_window_config_api.md | 0 .../docs}/templates/api/feature_store_api.md | 0 .../docs}/templates/api/feature_view_api.md | 0 {docs => hsfs/docs}/templates/api/job.md | 0 {docs => hsfs/docs}/templates/api/links.md | 0 .../docs}/templates/api/query_api.md | 0 {docs => hsfs/docs}/templates/api/rule_api.md | 0 .../templates/api/rule_definition_api.md | 0 .../api/similarity_function_type_api.md | 0 .../docs}/templates/api/spine_group_api.md | 0 .../templates/api/split_statistics_api.md | 0 .../docs}/templates/api/statistics_api.md | 0 .../templates/api/statistics_config_api.md | 0 .../templates/api/storage_connector_api.md | 0 .../templates/api/training_dataset_api.md | 0 .../api/transformation_functions_api.md | 0 .../docs}/templates/api/validation_api.md | 0 .../templates/api/validation_report_api.md | 0 {java => hsfs/java}/beam/pom.xml | 0 .../logicalclocks/hsfs/beam/FeatureStore.java | 0 .../logicalclocks/hsfs/beam/FeatureView.java | 0 .../hsfs/beam/HopsworksConnection.java | 0 .../hsfs/beam/StreamFeatureGroup.java | 0 .../hsfs/beam/constructor/Query.java | 0 .../hsfs/beam/engine/BeamEngine.java | 0 .../hsfs/beam/engine/BeamKafkaProducer.java | 0 .../hsfs/beam/engine/BeamProducer.java | 0 .../hsfs/beam/engine/FeatureGroupEngine.java | 0 .../hsfs/beam/engine/FeatureViewEngine.java | 0 .../beam/engine/GenericAvroSerializer.java | 0 .../hsfs/beam/engine/KeySerializer.java | 0 {java => hsfs/java}/flink/pom.xml | 0 .../hsfs/flink/FeatureStore.java | 0 .../logicalclocks/hsfs/flink/FeatureView.java | 0 .../hsfs/flink/HopsworksConnection.java | 0 .../hsfs/flink/StreamFeatureGroup.java | 0 .../hsfs/flink/constructor/FsQuery.java | 0 .../hsfs/flink/constructor/Query.java | 0 .../hsfs/flink/engine/FeatureGroupEngine.java | 0 .../hsfs/flink/engine/FeatureViewEngine.java | 0 .../hsfs/flink/engine/FlinkEngine.java | 0 .../flink/engine/KafkaRecordSerializer.java | 0 .../hsfs/flink/engine/PojoToAvroRecord.java | 0 {java => hsfs/java}/hsfs/pom.xml | 0 .../com/logicalclocks/hsfs/DataFormat.java | 0 .../hsfs/DeltaStreamerJobConf.java | 0 .../hsfs/EntityEndpointType.java | 0 .../hsfs/ExternalDataFormat.java | 0 .../java/com/logicalclocks/hsfs/Feature.java | 0 .../logicalclocks/hsfs/FeatureGroupBase.java | 0 .../hsfs/FeatureGroupBaseForApi.java | 0 .../hsfs/FeatureGroupCommit.java | 0 .../logicalclocks/hsfs/FeatureStoreBase.java | 0 .../hsfs/FeatureStoreException.java | 0 .../com/logicalclocks/hsfs/FeatureType.java | 0 .../logicalclocks/hsfs/FeatureViewBase.java | 0 .../hsfs/HopsworksConnectionBase.java | 0 .../logicalclocks/hsfs/HudiOperationType.java | 0 .../logicalclocks/hsfs/JobConfiguration.java | 0 .../java/com/logicalclocks/hsfs/Project.java | 0 .../com/logicalclocks/hsfs/SecretStore.java | 0 .../logicalclocks/hsfs/SecurityProtocol.java | 0 .../java/com/logicalclocks/hsfs/Split.java | 0 .../SslEndpointIdentificationAlgorithm.java | 0 .../logicalclocks/hsfs/StatisticsConfig.java | 0 .../java/com/logicalclocks/hsfs/Storage.java | 0 .../logicalclocks/hsfs/StorageConnector.java | 0 .../hsfs/StorageConnectorType.java | 0 .../logicalclocks/hsfs/TimeTravelFormat.java | 0 .../hsfs/TrainingDatasetBase.java | 0 .../hsfs/TrainingDatasetFeature.java | 0 .../hsfs/TrainingDatasetType.java | 0 .../hsfs/TransformationFunction.java | 0 .../hsfs/constructor/FeatureGroupAlias.java | 0 .../hsfs/constructor/Filter.java | 0 .../hsfs/constructor/FilterLogic.java | 0 .../hsfs/constructor/FsQueryBase.java | 0 .../logicalclocks/hsfs/constructor/Join.java | 0 .../hsfs/constructor/JoinType.java | 0 .../PreparedStatementParameter.java | 0 .../hsfs/constructor/QueryBase.java | 0 .../constructor/ServingPreparedStatement.java | 0 .../hsfs/constructor/SqlFilterCondition.java | 0 .../hsfs/constructor/SqlFilterLogic.java | 0 .../logicalclocks/hsfs/engine/CodeEngine.java | 0 .../logicalclocks/hsfs/engine/EngineBase.java | 0 .../hsfs/engine/FeatureGroupEngineBase.java | 0 .../hsfs/engine/FeatureGroupUtils.java | 0 .../hsfs/engine/FeatureViewEngineBase.java | 0 .../hsfs/engine/VectorServer.java | 0 .../hsfs/metadata/AuthorizationHandler.java | 0 .../com/logicalclocks/hsfs/metadata/Code.java | 0 .../logicalclocks/hsfs/metadata/CodeApi.java | 0 .../hsfs/metadata/Credentials.java | 0 .../hsfs/metadata/DatasetApi.java | 0 .../FeatureDescriptiveStatistics.java | 0 .../hsfs/metadata/FeatureGroupApi.java | 0 .../hsfs/metadata/FeatureStoreApi.java | 0 .../hsfs/metadata/FeatureViewApi.java | 0 .../hsfs/metadata/HopsworksClient.java | 0 .../metadata/HopsworksExternalClient.java | 0 .../metadata/HopsworksHostnameVerifier.java | 0 .../hsfs/metadata/HopsworksHttpClient.java | 0 .../metadata/HopsworksInternalClient.java | 0 .../hsfs/metadata/InternalException.java | 0 .../logicalclocks/hsfs/metadata/KafkaApi.java | 0 .../hsfs/metadata/KafkaClusterInfo.java | 0 .../hsfs/metadata/OnDemandOptions.java | 0 .../logicalclocks/hsfs/metadata/Option.java | 0 .../hsfs/metadata/ProjectApi.java | 0 .../hsfs/metadata/QueryConstructorApi.java | 0 .../logicalclocks/hsfs/metadata/RestDto.java | 0 .../hsfs/metadata/SplitStatistics.java | 0 .../hsfs/metadata/Statistics.java | 0 .../hsfs/metadata/StatisticsApi.java | 0 .../hsfs/metadata/StorageConnectorApi.java | 0 .../logicalclocks/hsfs/metadata/Subject.java | 0 .../com/logicalclocks/hsfs/metadata/Tags.java | 0 .../logicalclocks/hsfs/metadata/TagsApi.java | 0 .../hsfs/metadata/TrainingDatasetApi.java | 0 .../TransformationFunctionAttached.java | 0 .../hsfs/metadata/UnauthorizedException.java | 0 .../com/logicalclocks/hsfs/metadata/User.java | 0 .../logicalclocks/hsfs/metadata/Variable.java | 0 .../hsfs/metadata/VariablesApi.java | 0 .../logicalclocks/hsfs/util/Constants.java | 0 .../hsfs/TestFeatureGroupBaseForApi.java | 0 .../hsfs/TestHopsworksExternalClient.java | 0 .../hsfs/engine/TestFeatureGroupUtils.java | 0 .../hsfs/metadata/TestHopsworksClient.java | 0 .../hsfs/metadata/TestTagsApi.java | 0 hsfs/java/pom.xml | 308 +++++ {java => hsfs/java}/spark/pom.xml | 0 .../hsfs/spark/ExternalFeatureGroup.java | 0 .../hsfs/spark/FeatureGroup.java | 0 .../hsfs/spark/FeatureStore.java | 0 .../logicalclocks/hsfs/spark/FeatureView.java | 0 .../hsfs/spark/HopsworksConnection.java | 0 .../logicalclocks/hsfs/spark/MainClass.java | 0 .../hsfs/spark/StreamFeatureGroup.java | 0 .../hsfs/spark/TrainingDataset.java | 0 .../hsfs/spark/TrainingDatasetBundle.java | 0 .../hsfs/spark/constructor/FsQuery.java | 0 .../hsfs/spark/constructor/Query.java | 0 .../hsfs/spark/engine/FeatureGroupEngine.java | 0 .../hsfs/spark/engine/FeatureViewEngine.java | 0 .../hsfs/spark/engine/SparkEngine.java | 0 .../hsfs/spark/engine/StatisticsEngine.java | 0 .../spark/engine/TrainingDatasetEngine.java | 0 .../spark/engine/TrainingDatasetUtils.java | 0 .../hudi/DeltaStreamerAvroDeserializer.java | 0 .../engine/hudi/DeltaStreamerConfig.java | 0 .../engine/hudi/DeltaStreamerKafkaSource.java | 0 .../hudi/DeltaStreamerSchemaProvider.java | 0 .../engine/hudi/DeltaStreamerTransformer.java | 0 .../hsfs/spark/engine/hudi/HudiEngine.java | 0 .../spark/util/StorageConnectorUtils.java | 0 .../hsfs/spark/TestExternalFeatureGroup.java | 0 .../logicalclocks/hsfs/spark/TestFeature.java | 0 .../hsfs/spark/TestFeatureGroup.java | 0 .../hsfs/spark/TestFeatureView.java | 0 .../hsfs/spark/TestStorageConnector.java | 0 .../hsfs/spark/constructor/TestQuery.java | 0 .../spark/engine/TestFeatureViewEngine.java | 0 .../hsfs/spark/engine/TestHudiEngine.java | 0 .../hsfs/spark/engine/TestSparkEngine.java | 0 .../test/resources/hadoop/bin/winutils.exe | Bin .../src/test/resources/system.properties | 0 .../java}/src/main/resources/checkstyle.xml | 0 .../java}/src/main/resources/suppressions.xml | 0 .../test/resources/hadoop/bin/winutils.exe | Bin .../src/test/resources/system.properties | 0 .../locust_benchmark}/Dockerfile | 0 .../locust_benchmark}/README.md | 0 .../locust_benchmark}/common/__init__.py | 0 .../common/hopsworks_client.py | 0 .../locust_benchmark}/common/stop_watch.py | 0 .../locust_benchmark}/create_feature_group.py | 0 .../locust_benchmark}/docker-compose.yml | 0 .../locust_benchmark}/hopsworks_config.json | 0 .../locust_benchmark}/locustfile.py | 0 .../locust_benchmark}/requirements.txt | 0 hsfs/mkdocs.yml | 130 ++ hsfs/python/.pre-commit-config.yaml | 8 + {python => hsfs/python}/hsfs/__init__.py | 0 .../python}/hsfs/builtin_transformations.py | 0 .../python}/hsfs/client/__init__.py | 0 {python => hsfs/python}/hsfs/client/auth.py | 0 {python => hsfs/python}/hsfs/client/base.py | 0 .../python}/hsfs/client/exceptions.py | 0 .../python}/hsfs/client/external.py | 0 .../python}/hsfs/client/hopsworks.py | 0 .../hsfs/client/online_store_rest_client.py | 0 {python => hsfs/python}/hsfs/code.py | 0 {python => hsfs/python}/hsfs/connection.py | 0 .../python}/hsfs/constructor/__init__.py | 0 .../external_feature_group_alias.py | 0 .../python}/hsfs/constructor/filter.py | 0 .../python}/hsfs/constructor/fs_query.py | 0 .../constructor/hudi_feature_group_alias.py | 0 .../python}/hsfs/constructor/join.py | 0 .../prepared_statement_parameter.py | 0 .../python}/hsfs/constructor/query.py | 0 .../constructor/serving_prepared_statement.py | 0 {python => hsfs/python}/hsfs/core/__init__.py | 0 .../python}/hsfs/core/arrow_flight_client.py | 0 {python => hsfs/python}/hsfs/core/code_api.py | 0 .../python}/hsfs/core/code_engine.py | 0 .../python}/hsfs/core/constants.py | 0 .../python}/hsfs/core/dataset_api.py | 0 .../python}/hsfs/core/delta_engine.py | 0 .../hsfs/core/deltastreamer_jobconf.py | 0 .../python}/hsfs/core/execution.py | 0 .../python}/hsfs/core/expectation_api.py | 0 .../python}/hsfs/core/expectation_engine.py | 0 .../hsfs/core/expectation_suite_api.py | 0 .../hsfs/core/expectation_suite_engine.py | 0 .../python}/hsfs/core/explicit_provenance.py | 0 .../core/external_feature_group_engine.py | 0 .../core/feature_descriptive_statistics.py | 0 .../python}/hsfs/core/feature_group_api.py | 0 .../hsfs/core/feature_group_base_engine.py | 0 .../python}/hsfs/core/feature_group_engine.py | 0 .../python}/hsfs/core/feature_logging.py | 0 .../hsfs/core/feature_monitoring_config.py | 0 .../core/feature_monitoring_config_api.py | 0 .../core/feature_monitoring_config_engine.py | 0 .../hsfs/core/feature_monitoring_result.py | 0 .../core/feature_monitoring_result_api.py | 0 .../core/feature_monitoring_result_engine.py | 0 .../python}/hsfs/core/feature_store_api.py | 6 +- .../python}/hsfs/core/feature_view_api.py | 0 .../python}/hsfs/core/feature_view_engine.py | 0 .../hsfs/core/great_expectation_engine.py | 0 .../python}/hsfs/core/hosts_api.py | 0 .../python}/hsfs/core/hudi_engine.py | 0 .../python}/hsfs/core/ingestion_job.py | 0 .../python}/hsfs/core/ingestion_job_conf.py | 0 {python => hsfs/python}/hsfs/core/inode.py | 0 {python => hsfs/python}/hsfs/core/job.py | 0 {python => hsfs/python}/hsfs/core/job_api.py | 0 .../python}/hsfs/core/job_configuration.py | 0 .../python}/hsfs/core/job_schedule.py | 0 .../python}/hsfs/core/kafka_api.py | 0 .../python}/hsfs/core/kafka_engine.py | 0 .../hsfs/core/monitoring_window_config.py | 0 .../core/monitoring_window_config_engine.py | 0 .../hsfs/core/online_store_rest_client_api.py | 0 .../core/online_store_rest_client_engine.py | 0 .../hsfs/core/online_store_sql_engine.py | 0 .../python}/hsfs/core/opensearch.py | 0 .../python}/hsfs/core/opensearch_api.py | 0 .../python}/hsfs/core/project_api.py | 0 .../hsfs/core/query_constructor_api.py | 0 .../python}/hsfs/core/services_api.py | 0 .../python}/hsfs/core/spine_group_engine.py | 0 .../python}/hsfs/core/statistics_api.py | 0 .../python}/hsfs/core/statistics_engine.py | 0 .../hsfs/core/storage_connector_api.py | 0 {python => hsfs/python}/hsfs/core/tags_api.py | 0 .../python}/hsfs/core/training_dataset_api.py | 0 .../hsfs/core/training_dataset_engine.py | 0 .../hsfs/core/training_dataset_job_conf.py | 0 .../hsfs/core/transformation_function_api.py | 0 .../core/transformation_function_engine.py | 0 {python => hsfs/python}/hsfs/core/util_sql.py | 0 .../hsfs/core/validation_report_api.py | 0 .../hsfs/core/validation_report_engine.py | 0 .../hsfs/core/validation_result_api.py | 0 .../hsfs/core/validation_result_engine.py | 0 .../python}/hsfs/core/variable_api.py | 0 .../python}/hsfs/core/vector_db_client.py | 0 .../python}/hsfs/core/vector_server.py | 0 {python => hsfs/python}/hsfs/decorators.py | 0 {python => hsfs/python}/hsfs/embedding.py | 0 .../python}/hsfs/engine/__init__.py | 0 {python => hsfs/python}/hsfs/engine/python.py | 0 {python => hsfs/python}/hsfs/engine/spark.py | 0 .../python}/hsfs/engine/spark_no_metastore.py | 0 .../python}/hsfs/expectation_suite.py | 0 {python => hsfs/python}/hsfs/feature.py | 0 {python => hsfs/python}/hsfs/feature_group.py | 65 +- .../python}/hsfs/feature_group_commit.py | 0 .../python}/hsfs/feature_group_writer.py | 0 {python => hsfs/python}/hsfs/feature_store.py | 0 {python => hsfs/python}/hsfs/feature_view.py | 0 .../python}/hsfs/ge_expectation.py | 0 .../python}/hsfs/ge_validation_result.py | 0 {python => hsfs/python}/hsfs/hopsworks_udf.py | 0 {python => hsfs/python}/hsfs/serving_key.py | 0 .../python}/hsfs/split_statistics.py | 0 {python => hsfs/python}/hsfs/statistics.py | 0 .../python}/hsfs/statistics_config.py | 0 .../python}/hsfs/storage_connector.py | 0 {python => hsfs/python}/hsfs/tag.py | 0 .../python}/hsfs/training_dataset.py | 0 .../python}/hsfs/training_dataset_feature.py | 0 .../python}/hsfs/training_dataset_split.py | 0 .../python}/hsfs/transformation_function.py | 0 .../python}/hsfs/transformation_statistics.py | 0 {python => hsfs/python}/hsfs/usage.py | 0 {python => hsfs/python}/hsfs/user.py | 0 {python => hsfs/python}/hsfs/util.py | 0 .../python}/hsfs/validation_report.py | 0 {python => hsfs/python}/hsfs/version.py | 0 hsfs/python/pyproject.toml | 173 +++ hsfs/python/setup.py | 4 + hsfs/python/tests/__init__.py | 15 + .../python}/tests/client/test_base_client.py | 1 - {python => hsfs/python}/tests/conftest.py | 3 +- .../test_external_feature_group_alias.py | 0 .../python}/tests/constructor/test_filter.py | 0 .../tests/constructor/test_fs_query.py | 0 .../test_hudi_feature_group_alias.py | 0 .../python}/tests/constructor/test_join.py | 0 .../test_prepared_statement_parameter.py | 0 .../python}/tests/constructor/test_query.py | 0 .../test_serving_prepared_statement.py | 0 .../python}/tests/core/__init__.py | 0 .../tests/core/test_arrow_flight_client.py | 0 .../python}/tests/core/test_code_engine.py | 0 .../python}/tests/core/test_execution.py | 0 .../tests/core/test_expectation_engine.py | 0 .../core/test_expectation_suite_engine.py | 0 .../test_external_feature_group_engine.py | 0 .../test_feature_descriptive_statistics.py | 0 .../tests/core/test_feature_group_api.py | 0 .../core/test_feature_group_base_engine.py | 0 .../tests/core/test_feature_group_engine.py | 0 .../core/test_feature_monitoring_config.py | 0 .../test_feature_monitoring_config_engine.py | 0 .../core/test_feature_monitoring_result.py | 0 .../test_feature_monitoring_result_engine.py | 0 .../tests/core/test_feature_view_engine.py | 0 .../core/test_great_expectation_engine.py | 0 .../python}/tests/core/test_hudi_engine.py | 0 .../python}/tests/core/test_ingestion_job.py | 0 .../python}/tests/core/test_inode.py | 0 .../python}/tests/core/test_job.py | 0 .../tests/core/test_job_configuration.py | 0 .../python}/tests/core/test_kafka_engine.py | 0 .../core/test_monitoring_window_config.py | 0 .../test_monitoring_window_config_engine.py | 0 .../core/test_online_store_rest_client.py | 0 .../core/test_online_store_rest_client_api.py | 0 .../test_online_store_rest_client_engine.py | 0 .../python}/tests/core/test_opensearch.py | 0 .../tests/core/test_statistics_engine.py | 0 .../core/test_training_dataset_engine.py | 0 .../test_transformation_function_engine.py | 0 .../core/test_validation_report_engine.py | 0 .../core/test_validation_result_engine.py | 0 .../tests/core/test_vector_db_client.py | 0 .../tests/data/hadoop/bin/winutils.exe | Bin .../python}/tests/data/test_basic.csv | 0 .../python}/tests/data/test_basic.parquet | Bin .../python}/tests/data/test_basic.tsv | 0 .../python}/tests/engine/__init__.py | 0 .../python}/tests/engine/test_python.py | 0 .../tests/engine/test_python_reader.py | 0 .../test_python_spark_convert_dataframe.py | 0 ...t_python_spark_transformation_functions.py | 0 .../tests/engine/test_python_writer.py | 0 .../python}/tests/engine/test_spark.py | 0 .../python/tests/fixtures}/__init__.py | 2 +- .../tests/fixtures/backend_fixtures.py | 27 +- .../tests/fixtures/dataframe_fixtures.py | 0 .../tests/fixtures/execution_fixtures.json | 0 .../fixtures/expectation_suite_fixtures.json | 0 ...external_feature_group_alias_fixtures.json | 0 .../external_feature_group_fixtures.json | 0 ...ature_descriptive_statistics_fixtures.json | 0 .../tests/fixtures/feature_fixtures.json | 0 .../feature_group_commit_fixtures.json | 0 .../fixtures/feature_group_fixtures.json | 0 .../feature_monitoring_config_fixtures.json | 0 .../feature_monitoring_result_fixtures.json | 0 .../fixtures/feature_store_fixtures.json | 0 .../tests/fixtures/feature_view_fixtures.json | 0 .../tests/fixtures/filter_fixtures.json | 0 .../tests/fixtures/fs_query_fixtures.json | 0 .../fixtures/ge_expectation_fixtures.json | 0 .../ge_validation_result_fixtures.json | 0 .../fixtures/generate_backend_fixtures.ipynb | 0 .../hudi_feature_group_alias_fixtures.json | 0 .../fixtures/ingestion_job_fixtures.json | 0 .../tests/fixtures/inode_fixtures.json | 0 .../python}/tests/fixtures/job_fixtures.json | 0 .../python}/tests/fixtures/join_fixtures.json | 0 .../tests/fixtures/logic_fixtures.json | 0 ...prepared_statement_parameter_fixtures.json | 0 .../tests/fixtures/query_fixtures.json | 0 .../tests/fixtures/rondb_server_fixtures.json | 0 .../tests/fixtures/serving_keys_fixtures.json | 0 .../serving_prepared_statement_fixtures.json | 0 .../tests/fixtures/spine_group_fixtures.json | 0 .../fixtures/split_statistics_fixtures.json | 0 .../fixtures/statistics_config_fixtures.json | 0 .../tests/fixtures/statistics_fixtures.json | 0 .../fixtures/storage_connector_fixtures.json | 0 hsfs/python/tests/fixtures/tag_fixtures.json | 25 + .../training_dataset_feature_fixtures.json | 0 .../fixtures/training_dataset_fixtures.json | 0 .../training_dataset_split_fixtures.json | 0 ...sformation_function_attached_fixtures.json | 0 .../transformation_function_fixtures.json | 0 .../python}/tests/fixtures/user_fixtures.json | 0 .../fixtures/validation_report_fixtures.json | 0 hsfs/python/tests/pyproject.toml | 37 + .../python}/tests/test_expectation_suite.py | 0 {python => hsfs/python}/tests/test_feature.py | 0 .../python}/tests/test_feature_group.py | 0 .../tests/test_feature_group_commit.py | 0 .../tests/test_feature_group_writer.py | 0 .../python}/tests/test_feature_store.py | 0 .../python}/tests/test_feature_view.py | 0 .../python}/tests/test_ge_expectation.py | 0 .../tests/test_ge_validation_result.py | 0 .../python}/tests/test_helpers/__init__.py | 0 .../transformation_test_helper.py | 0 .../python}/tests/test_hopswork_udf.py | 32 +- .../python}/tests/test_serving_keys.py | 0 .../python}/tests/test_split_statistics.py | 0 .../python}/tests/test_statistics.py | 0 .../python}/tests/test_statistics_config.py | 0 .../python}/tests/test_storage_connector.py | 0 hsfs/python/tests/test_tag.py | 43 + .../python}/tests/test_training_dataset.py | 0 .../tests/test_training_dataset_feature.py | 0 .../tests/test_training_dataset_split.py | 0 .../tests/test_transformation_function.py | 0 {python => hsfs/python}/tests/test_user.py | 0 hsfs/python/tests/test_util.py | 230 ++++ .../python}/tests/test_validation_report.py | 0 {python => hsfs/python}/tests/util.py | 0 hsfs/requirements-docs.txt | 12 + {utils => hsfs/utils}/java/pom.xml | 0 .../com/logicalclocks/utils/MainClass.java | 0 .../java/src/main/resources/checkstyle.xml | 0 .../java/src/main/resources/suppressions.xml | 0 {utils => hsfs/utils}/python/hsfs_utils.py | 0 hsml/.github/workflows/mkdocs-main.yml | 35 + hsml/.github/workflows/mkdocs-release.yml | 42 + hsml/.github/workflows/python-lint.yml | 163 +++ hsml/.gitignore | 130 ++ hsml/CONTRIBUTING.md | 215 ++++ hsml/Dockerfile | 9 + hsml/Jenkinsfile | 23 + hsml/LICENSE | 201 ++++ hsml/README.md | 141 +++ hsml/auto_doc.py | 210 ++++ hsml/docs/CONTRIBUTING.md | 215 ++++ hsml/docs/assets/images/favicon.ico | Bin 0 -> 2699 bytes hsml/docs/assets/images/hops-logo.png | Bin 0 -> 6356 bytes hsml/docs/css/custom.css | 115 ++ hsml/docs/css/dropdown.css | 55 + hsml/docs/css/marctech.css | 1047 +++++++++++++++++ hsml/docs/css/version-select.css | 36 + hsml/docs/index.md | 141 +++ hsml/docs/js/dropdown.js | 2 + hsml/docs/js/inject-api-links.js | 31 + hsml/docs/js/version-select.js | 64 + hsml/docs/overrides/main.html | 8 + .../docs}/templates/connection_api.md | 0 .../docs}/templates/model-registry/links.md | 0 .../templates/model-registry/model_api.md | 0 .../model-registry/model_registry_api.md | 0 .../model-registry/model_schema_api.md | 0 .../templates/model-serving/deployment_api.md | 0 .../model-serving/inference_batcher_api.md | 0 .../model-serving/inference_logger_api.md | 0 .../model-serving/model_serving_api.md | 0 .../templates/model-serving/predictor_api.md | 0 .../model-serving/predictor_state_api.md | 0 .../predictor_state_condition_api.md | 0 .../templates/model-serving/resources_api.md | 0 .../model-serving/transformer_api.md | 0 hsml/java/pom.xml | 109 ++ hsml/java/src/main/resources/checkstyle.xml | 312 +++++ hsml/java/src/main/resources/suppressions.xml | 5 + hsml/mkdocs.yml | 120 ++ hsml/python/.pre-commit-config.yaml | 10 + {python => hsml/python}/hsml/__init__.py | 0 .../python}/hsml/client/__init__.py | 4 +- {python => hsml/python}/hsml/client/auth.py | 0 {python => hsml/python}/hsml/client/base.py | 0 .../python}/hsml/client/exceptions.py | 0 .../python}/hsml/client/hopsworks/__init__.py | 0 .../python}/hsml/client/hopsworks/base.py | 0 .../python}/hsml/client/hopsworks/external.py | 0 .../python}/hsml/client/hopsworks/internal.py | 0 .../python}/hsml/client/istio/__init__.py | 0 .../python}/hsml/client/istio/base.py | 0 .../python}/hsml/client/istio/external.py | 0 .../hsml/client/istio/grpc}/__init__.py | 0 .../python}/hsml/client/istio/grpc/errors.py | 0 .../hsml/client/istio/grpc/exceptions.py | 0 .../client/istio/grpc/inference_client.py | 0 .../hsml/client/istio/grpc/proto}/__init__.py | 0 .../istio/grpc/proto/grpc_predict_v2.proto | 0 .../istio/grpc/proto/grpc_predict_v2_pb2.py | 0 .../istio/grpc/proto/grpc_predict_v2_pb2.pyi | 0 .../grpc/proto/grpc_predict_v2_pb2_grpc.py | 0 .../python}/hsml/client/istio/internal.py | 0 .../hsml/client/istio/utils}/__init__.py | 0 .../hsml/client/istio/utils/infer_type.py | 0 .../hsml/client/istio/utils/numpy_codec.py | 0 {python => hsml/python}/hsml/connection.py | 8 +- {python => hsml/python}/hsml/constants.py | 4 - {python => hsml/python}/hsml/core/__init__.py | 0 .../python}/hsml/core/dataset_api.py | 0 .../python}/hsml/core/explicit_provenance.py | 0 .../python}/hsml/core/model_api.py | 0 .../python}/hsml/core/model_registry_api.py | 0 .../python}/hsml/core/model_serving_api.py | 0 .../python}/hsml/core/native_hdfs_api.py | 0 .../python}/hsml/core/serving_api.py | 0 {python => hsml/python}/hsml/decorators.py | 0 .../python}/hsml/deployable_component.py | 0 .../python}/hsml/deployable_component_logs.py | 0 {python => hsml/python}/hsml/deployment.py | 0 .../python}/hsml/engine/__init__.py | 0 .../python}/hsml/engine/hopsworks_engine.py | 0 .../python}/hsml/engine/local_engine.py | 0 .../python}/hsml/engine/model_engine.py | 0 .../python}/hsml/engine/serving_engine.py | 0 .../python}/hsml/inference_batcher.py | 0 .../python}/hsml/inference_endpoint.py | 0 .../python}/hsml/inference_logger.py | 0 {python => hsml/python}/hsml/kafka_topic.py | 0 {python => hsml/python}/hsml/model.py | 0 .../python}/hsml/model_registry.py | 0 {python => hsml/python}/hsml/model_schema.py | 0 {python => hsml/python}/hsml/model_serving.py | 0 {python => hsml/python}/hsml/predictor.py | 0 .../python}/hsml/predictor_state.py | 0 .../python}/hsml/predictor_state_condition.py | 0 .../python}/hsml/python/__init__.py | 0 {python => hsml/python}/hsml/python/model.py | 0 .../python}/hsml/python/predictor.py | 0 .../python}/hsml/python/signature.py | 0 {python => hsml/python}/hsml/resources.py | 0 {python => hsml/python}/hsml/schema.py | 0 .../python}/hsml/sklearn/__init__.py | 0 {python => hsml/python}/hsml/sklearn/model.py | 0 .../python}/hsml/sklearn/predictor.py | 0 .../python}/hsml/sklearn/signature.py | 0 {python => hsml/python}/hsml/tag.py | 0 .../python}/hsml/tensorflow/__init__.py | 0 .../python}/hsml/tensorflow/model.py | 0 .../python}/hsml/tensorflow/predictor.py | 0 .../python}/hsml/tensorflow/signature.py | 0 .../python}/hsml/torch/__init__.py | 0 {python => hsml/python}/hsml/torch/model.py | 0 .../python}/hsml/torch/predictor.py | 0 .../python}/hsml/torch/signature.py | 0 {python => hsml/python}/hsml/transformer.py | 0 {python => hsml/python}/hsml/util.py | 35 +- .../python}/hsml/utils/__init__.py | 0 .../python}/hsml/utils/schema/__init__.py | 0 .../python}/hsml/utils/schema/column.py | 0 .../hsml/utils/schema/columnar_schema.py | 0 .../python}/hsml/utils/schema/tensor.py | 0 .../hsml/utils/schema/tensor_schema.py | 0 {python => hsml/python}/hsml/version.py | 0 hsml/python/pyproject.toml | 136 +++ hsml/python/setup.py | 19 + hsml/python/tests/__init__.py | 15 + hsml/python/tests/conftest.py | 20 + .../python/tests/fixtures}/__init__.py | 0 .../python/tests/fixtures/backend_fixtures.py | 45 + .../fixtures/inference_batcher_fixtures.json | 0 .../fixtures/inference_endpoint_fixtures.json | 0 .../fixtures/inference_logger_fixtures.json | 0 .../tests/fixtures/kafka_topic_fixtures.json | 0 .../tests/fixtures/model_fixtures.json | 0 .../python}/tests/fixtures/model_fixtures.py | 0 .../tests/fixtures/predictor_fixtures.json | 0 .../tests/fixtures/resources_fixtures.json | 0 .../python}/tests/fixtures/tag_fixtures.json | 0 .../tests/fixtures/transformer_fixtures.json | 0 .../python}/tests/test_connection.py | 4 +- .../python}/tests/test_constants.py | 0 .../python}/tests/test_decorators.py | 0 .../tests/test_deployable_component.py | 0 .../tests/test_deployable_component_logs.py | 0 .../python}/tests/test_deployment.py | 0 .../python}/tests/test_explicit_provenance.py | 0 .../python}/tests/test_inference_batcher.py | 0 .../python}/tests/test_inference_endpoint.py | 0 .../python}/tests/test_inference_logger.py | 0 .../python}/tests/test_kafka_topic.py | 0 {python => hsml/python}/tests/test_model.py | 0 .../python}/tests/test_model_schema.py | 0 .../python}/tests/test_predictor.py | 0 .../python}/tests/test_predictor_state.py | 0 .../tests/test_predictor_state_condition.py | 0 .../python}/tests/test_resources.py | 0 {python => hsml/python}/tests/test_schema.py | 0 {python => hsml/python}/tests/test_tag.py | 0 .../python}/tests/test_transformer.py | 0 {python => hsml/python}/tests/test_util.py | 212 +--- .../python/tests/utils}/__init__.py | 0 .../python}/tests/utils/schema/test_column.py | 0 .../utils/schema/test_columnar_schema.py | 0 .../python}/tests/utils/schema/test_tensor.py | 0 .../tests/utils/schema/test_tensor_schema.py | 0 hsml/requirements-docs.txt | 11 + java/pom.xml | 300 +---- mkdocs.yml | 43 +- python/.pre-commit-config.yaml | 2 +- python/auto_doc.py | 721 ------------ python/hopsworks/project.py | 10 +- python/pyproject.toml | 67 +- python/tests/hopsworks/test_login.py | 23 +- requirements-docs.txt | 2 +- 661 files changed, 8453 insertions(+), 1711 deletions(-) create mode 100644 .github/workflows/python-lint.yml create mode 100644 auto_doc.py create mode 100644 docs/CONTRIBUTING.md rename {.github => hsfs/.github}/pull_request_template.md (100%) rename .github/workflows/java.yml => hsfs/.github/workflows/java-ut.yml (84%) create mode 100644 hsfs/.github/workflows/mkdocs-master.yml create mode 100644 hsfs/.github/workflows/mkdocs-release.yml create mode 100644 hsfs/.github/workflows/optional-dependency.yml rename .github/workflows/python.yml => hsfs/.github/workflows/python-lint.yml (78%) create mode 100644 hsfs/.gitignore create mode 100644 hsfs/CONTRIBUTING.md rename Dockerfile => hsfs/Dockerfile (100%) create mode 100644 hsfs/Jenkinsfile create mode 100644 hsfs/LICENSE create mode 100644 hsfs/README.md create mode 100644 hsfs/auto_doc.py create mode 100644 hsfs/docs/CONTRIBUTING.md create mode 100644 hsfs/docs/assets/images/favicon.ico create mode 100644 hsfs/docs/assets/images/hops-logo.png rename {docs => hsfs/docs}/assets/images/hopsworks-logo.png (100%) create mode 100644 hsfs/docs/css/custom.css create mode 100644 hsfs/docs/css/dropdown.css create mode 100644 hsfs/docs/css/marctech.css create mode 100644 hsfs/docs/css/version-select.css create mode 100644 hsfs/docs/index.md create mode 100644 hsfs/docs/js/dropdown.js create mode 100644 hsfs/docs/js/inject-api-links.js create mode 100644 hsfs/docs/js/version-select.js create mode 100644 hsfs/docs/overrides/main.html rename {docs => hsfs/docs}/templates/api/connection_api.md (100%) rename {docs => hsfs/docs}/templates/api/embedding_feature_api.md (100%) rename {docs => hsfs/docs}/templates/api/embedding_index_api.md (100%) rename {docs => hsfs/docs}/templates/api/expectation_api.md (100%) rename {docs => hsfs/docs}/templates/api/expectation_suite_api.md (100%) rename {docs => hsfs/docs}/templates/api/external_feature_group_api.md (100%) rename {docs => hsfs/docs}/templates/api/feature_api.md (100%) rename {docs => hsfs/docs}/templates/api/feature_descriptive_statistics_api.md (100%) rename {docs => hsfs/docs}/templates/api/feature_group_api.md (100%) rename {docs => hsfs/docs}/templates/api/feature_monitoring_config_api.md (100%) rename {docs => hsfs/docs}/templates/api/feature_monitoring_result_api.md (100%) rename {docs => hsfs/docs}/templates/api/feature_monitoring_window_config_api.md (100%) rename {docs => hsfs/docs}/templates/api/feature_store_api.md (100%) rename {docs => hsfs/docs}/templates/api/feature_view_api.md (100%) rename {docs => hsfs/docs}/templates/api/job.md (100%) rename {docs => hsfs/docs}/templates/api/links.md (100%) rename {docs => hsfs/docs}/templates/api/query_api.md (100%) rename {docs => hsfs/docs}/templates/api/rule_api.md (100%) rename {docs => hsfs/docs}/templates/api/rule_definition_api.md (100%) rename {docs => hsfs/docs}/templates/api/similarity_function_type_api.md (100%) rename {docs => hsfs/docs}/templates/api/spine_group_api.md (100%) rename {docs => hsfs/docs}/templates/api/split_statistics_api.md (100%) rename {docs => hsfs/docs}/templates/api/statistics_api.md (100%) rename {docs => hsfs/docs}/templates/api/statistics_config_api.md (100%) rename {docs => hsfs/docs}/templates/api/storage_connector_api.md (100%) rename {docs => hsfs/docs}/templates/api/training_dataset_api.md (100%) rename {docs => hsfs/docs}/templates/api/transformation_functions_api.md (100%) rename {docs => hsfs/docs}/templates/api/validation_api.md (100%) rename {docs => hsfs/docs}/templates/api/validation_report_api.md (100%) rename {java => hsfs/java}/beam/pom.xml (100%) rename {java => hsfs/java}/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureStore.java (100%) rename {java => hsfs/java}/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureView.java (100%) rename {java => hsfs/java}/beam/src/main/java/com/logicalclocks/hsfs/beam/HopsworksConnection.java (100%) rename {java => hsfs/java}/beam/src/main/java/com/logicalclocks/hsfs/beam/StreamFeatureGroup.java (100%) rename {java => hsfs/java}/beam/src/main/java/com/logicalclocks/hsfs/beam/constructor/Query.java (100%) rename {java => hsfs/java}/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/BeamEngine.java (100%) rename {java => hsfs/java}/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/BeamKafkaProducer.java (100%) rename {java => hsfs/java}/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/BeamProducer.java (100%) rename {java => hsfs/java}/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/FeatureGroupEngine.java (100%) rename {java => hsfs/java}/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/FeatureViewEngine.java (100%) rename {java => hsfs/java}/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/GenericAvroSerializer.java (100%) rename {java => hsfs/java}/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/KeySerializer.java (100%) rename {java => hsfs/java}/flink/pom.xml (100%) rename {java => hsfs/java}/flink/src/main/java/com/logicalclocks/hsfs/flink/FeatureStore.java (100%) rename {java => hsfs/java}/flink/src/main/java/com/logicalclocks/hsfs/flink/FeatureView.java (100%) rename {java => hsfs/java}/flink/src/main/java/com/logicalclocks/hsfs/flink/HopsworksConnection.java (100%) rename {java => hsfs/java}/flink/src/main/java/com/logicalclocks/hsfs/flink/StreamFeatureGroup.java (100%) rename {java => hsfs/java}/flink/src/main/java/com/logicalclocks/hsfs/flink/constructor/FsQuery.java (100%) rename {java => hsfs/java}/flink/src/main/java/com/logicalclocks/hsfs/flink/constructor/Query.java (100%) rename {java => hsfs/java}/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/FeatureGroupEngine.java (100%) rename {java => hsfs/java}/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/FeatureViewEngine.java (100%) rename {java => hsfs/java}/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/FlinkEngine.java (100%) rename {java => hsfs/java}/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/KafkaRecordSerializer.java (100%) rename {java => hsfs/java}/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/PojoToAvroRecord.java (100%) rename {java => hsfs/java}/hsfs/pom.xml (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/DataFormat.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/DeltaStreamerJobConf.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/EntityEndpointType.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/ExternalDataFormat.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/Feature.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureGroupBase.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureGroupBaseForApi.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureGroupCommit.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureStoreBase.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureStoreException.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureType.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureViewBase.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/HopsworksConnectionBase.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/HudiOperationType.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/JobConfiguration.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/Project.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/SecretStore.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/SecurityProtocol.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/Split.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/SslEndpointIdentificationAlgorithm.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/StatisticsConfig.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/Storage.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/StorageConnector.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/StorageConnectorType.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/TimeTravelFormat.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/TrainingDatasetBase.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/TrainingDatasetFeature.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/TrainingDatasetType.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/TransformationFunction.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/FeatureGroupAlias.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/Filter.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/FilterLogic.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/FsQueryBase.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/Join.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/JoinType.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/PreparedStatementParameter.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/QueryBase.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/ServingPreparedStatement.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/SqlFilterCondition.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/SqlFilterLogic.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/engine/CodeEngine.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/engine/EngineBase.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/engine/FeatureGroupEngineBase.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/engine/FeatureGroupUtils.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/engine/FeatureViewEngineBase.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/engine/VectorServer.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/AuthorizationHandler.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Code.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/CodeApi.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Credentials.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/DatasetApi.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureDescriptiveStatistics.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureGroupApi.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureStoreApi.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureViewApi.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksClient.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksExternalClient.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksHostnameVerifier.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksHttpClient.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksInternalClient.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/InternalException.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/KafkaApi.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/KafkaClusterInfo.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/OnDemandOptions.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Option.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/ProjectApi.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/QueryConstructorApi.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/RestDto.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/SplitStatistics.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Statistics.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/StatisticsApi.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/StorageConnectorApi.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Subject.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Tags.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/TagsApi.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/TrainingDatasetApi.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/TransformationFunctionAttached.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/UnauthorizedException.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/User.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Variable.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/VariablesApi.java (100%) rename {java => hsfs/java}/hsfs/src/main/java/com/logicalclocks/hsfs/util/Constants.java (100%) rename {java => hsfs/java}/hsfs/src/test/java/com/logicalclocks/hsfs/TestFeatureGroupBaseForApi.java (100%) rename {java => hsfs/java}/hsfs/src/test/java/com/logicalclocks/hsfs/TestHopsworksExternalClient.java (100%) rename {java => hsfs/java}/hsfs/src/test/java/com/logicalclocks/hsfs/engine/TestFeatureGroupUtils.java (100%) rename {java => hsfs/java}/hsfs/src/test/java/com/logicalclocks/hsfs/metadata/TestHopsworksClient.java (100%) rename {java => hsfs/java}/hsfs/src/test/java/com/logicalclocks/hsfs/metadata/TestTagsApi.java (100%) create mode 100644 hsfs/java/pom.xml rename {java => hsfs/java}/spark/pom.xml (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/ExternalFeatureGroup.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureGroup.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureStore.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureView.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/HopsworksConnection.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/MainClass.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/StreamFeatureGroup.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/TrainingDataset.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/TrainingDatasetBundle.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/constructor/FsQuery.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/constructor/Query.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/FeatureGroupEngine.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/FeatureViewEngine.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/SparkEngine.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/StatisticsEngine.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/TrainingDatasetEngine.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/TrainingDatasetUtils.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerAvroDeserializer.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerConfig.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerKafkaSource.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerSchemaProvider.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerTransformer.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/HudiEngine.java (100%) rename {java => hsfs/java}/spark/src/main/java/com/logicalclocks/hsfs/spark/util/StorageConnectorUtils.java (100%) rename {java => hsfs/java}/spark/src/test/java/com/logicalclocks/hsfs/spark/TestExternalFeatureGroup.java (100%) rename {java => hsfs/java}/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeature.java (100%) rename {java => hsfs/java}/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeatureGroup.java (100%) rename {java => hsfs/java}/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeatureView.java (100%) rename {java => hsfs/java}/spark/src/test/java/com/logicalclocks/hsfs/spark/TestStorageConnector.java (100%) rename {java => hsfs/java}/spark/src/test/java/com/logicalclocks/hsfs/spark/constructor/TestQuery.java (100%) rename {java => hsfs/java}/spark/src/test/java/com/logicalclocks/hsfs/spark/engine/TestFeatureViewEngine.java (100%) rename {java => hsfs/java}/spark/src/test/java/com/logicalclocks/hsfs/spark/engine/TestHudiEngine.java (100%) rename {java => hsfs/java}/spark/src/test/java/com/logicalclocks/hsfs/spark/engine/TestSparkEngine.java (100%) rename {java => hsfs/java}/spark/src/test/resources/hadoop/bin/winutils.exe (100%) rename {java => hsfs/java}/spark/src/test/resources/system.properties (100%) rename {java => hsfs/java}/src/main/resources/checkstyle.xml (100%) rename {java => hsfs/java}/src/main/resources/suppressions.xml (100%) rename {java => hsfs/java}/src/test/resources/hadoop/bin/winutils.exe (100%) rename {java => hsfs/java}/src/test/resources/system.properties (100%) rename {locust_benchmark => hsfs/locust_benchmark}/Dockerfile (100%) rename {locust_benchmark => hsfs/locust_benchmark}/README.md (100%) rename {locust_benchmark => hsfs/locust_benchmark}/common/__init__.py (100%) rename {locust_benchmark => hsfs/locust_benchmark}/common/hopsworks_client.py (100%) rename {locust_benchmark => hsfs/locust_benchmark}/common/stop_watch.py (100%) rename {locust_benchmark => hsfs/locust_benchmark}/create_feature_group.py (100%) rename {locust_benchmark => hsfs/locust_benchmark}/docker-compose.yml (100%) rename {locust_benchmark => hsfs/locust_benchmark}/hopsworks_config.json (100%) rename {locust_benchmark => hsfs/locust_benchmark}/locustfile.py (100%) rename {locust_benchmark => hsfs/locust_benchmark}/requirements.txt (100%) create mode 100644 hsfs/mkdocs.yml create mode 100644 hsfs/python/.pre-commit-config.yaml rename {python => hsfs/python}/hsfs/__init__.py (100%) rename {python => hsfs/python}/hsfs/builtin_transformations.py (100%) rename {python => hsfs/python}/hsfs/client/__init__.py (100%) rename {python => hsfs/python}/hsfs/client/auth.py (100%) rename {python => hsfs/python}/hsfs/client/base.py (100%) rename {python => hsfs/python}/hsfs/client/exceptions.py (100%) rename {python => hsfs/python}/hsfs/client/external.py (100%) rename {python => hsfs/python}/hsfs/client/hopsworks.py (100%) rename {python => hsfs/python}/hsfs/client/online_store_rest_client.py (100%) rename {python => hsfs/python}/hsfs/code.py (100%) rename {python => hsfs/python}/hsfs/connection.py (100%) rename {python => hsfs/python}/hsfs/constructor/__init__.py (100%) rename {python => hsfs/python}/hsfs/constructor/external_feature_group_alias.py (100%) rename {python => hsfs/python}/hsfs/constructor/filter.py (100%) rename {python => hsfs/python}/hsfs/constructor/fs_query.py (100%) rename {python => hsfs/python}/hsfs/constructor/hudi_feature_group_alias.py (100%) rename {python => hsfs/python}/hsfs/constructor/join.py (100%) rename {python => hsfs/python}/hsfs/constructor/prepared_statement_parameter.py (100%) rename {python => hsfs/python}/hsfs/constructor/query.py (100%) rename {python => hsfs/python}/hsfs/constructor/serving_prepared_statement.py (100%) rename {python => hsfs/python}/hsfs/core/__init__.py (100%) rename {python => hsfs/python}/hsfs/core/arrow_flight_client.py (100%) rename {python => hsfs/python}/hsfs/core/code_api.py (100%) rename {python => hsfs/python}/hsfs/core/code_engine.py (100%) rename {python => hsfs/python}/hsfs/core/constants.py (100%) rename {python => hsfs/python}/hsfs/core/dataset_api.py (100%) rename {python => hsfs/python}/hsfs/core/delta_engine.py (100%) rename {python => hsfs/python}/hsfs/core/deltastreamer_jobconf.py (100%) rename {python => hsfs/python}/hsfs/core/execution.py (100%) rename {python => hsfs/python}/hsfs/core/expectation_api.py (100%) rename {python => hsfs/python}/hsfs/core/expectation_engine.py (100%) rename {python => hsfs/python}/hsfs/core/expectation_suite_api.py (100%) rename {python => hsfs/python}/hsfs/core/expectation_suite_engine.py (100%) rename {python => hsfs/python}/hsfs/core/explicit_provenance.py (100%) rename {python => hsfs/python}/hsfs/core/external_feature_group_engine.py (100%) rename {python => hsfs/python}/hsfs/core/feature_descriptive_statistics.py (100%) rename {python => hsfs/python}/hsfs/core/feature_group_api.py (100%) rename {python => hsfs/python}/hsfs/core/feature_group_base_engine.py (100%) rename {python => hsfs/python}/hsfs/core/feature_group_engine.py (100%) rename {python => hsfs/python}/hsfs/core/feature_logging.py (100%) rename {python => hsfs/python}/hsfs/core/feature_monitoring_config.py (100%) rename {python => hsfs/python}/hsfs/core/feature_monitoring_config_api.py (100%) rename {python => hsfs/python}/hsfs/core/feature_monitoring_config_engine.py (100%) rename {python => hsfs/python}/hsfs/core/feature_monitoring_result.py (100%) rename {python => hsfs/python}/hsfs/core/feature_monitoring_result_api.py (100%) rename {python => hsfs/python}/hsfs/core/feature_monitoring_result_engine.py (100%) rename {python => hsfs/python}/hsfs/core/feature_store_api.py (86%) rename {python => hsfs/python}/hsfs/core/feature_view_api.py (100%) rename {python => hsfs/python}/hsfs/core/feature_view_engine.py (100%) rename {python => hsfs/python}/hsfs/core/great_expectation_engine.py (100%) rename {python => hsfs/python}/hsfs/core/hosts_api.py (100%) rename {python => hsfs/python}/hsfs/core/hudi_engine.py (100%) rename {python => hsfs/python}/hsfs/core/ingestion_job.py (100%) rename {python => hsfs/python}/hsfs/core/ingestion_job_conf.py (100%) rename {python => hsfs/python}/hsfs/core/inode.py (100%) rename {python => hsfs/python}/hsfs/core/job.py (100%) rename {python => hsfs/python}/hsfs/core/job_api.py (100%) rename {python => hsfs/python}/hsfs/core/job_configuration.py (100%) rename {python => hsfs/python}/hsfs/core/job_schedule.py (100%) rename {python => hsfs/python}/hsfs/core/kafka_api.py (100%) rename {python => hsfs/python}/hsfs/core/kafka_engine.py (100%) rename {python => hsfs/python}/hsfs/core/monitoring_window_config.py (100%) rename {python => hsfs/python}/hsfs/core/monitoring_window_config_engine.py (100%) rename {python => hsfs/python}/hsfs/core/online_store_rest_client_api.py (100%) rename {python => hsfs/python}/hsfs/core/online_store_rest_client_engine.py (100%) rename {python => hsfs/python}/hsfs/core/online_store_sql_engine.py (100%) rename {python => hsfs/python}/hsfs/core/opensearch.py (100%) rename {python => hsfs/python}/hsfs/core/opensearch_api.py (100%) rename {python => hsfs/python}/hsfs/core/project_api.py (100%) rename {python => hsfs/python}/hsfs/core/query_constructor_api.py (100%) rename {python => hsfs/python}/hsfs/core/services_api.py (100%) rename {python => hsfs/python}/hsfs/core/spine_group_engine.py (100%) rename {python => hsfs/python}/hsfs/core/statistics_api.py (100%) rename {python => hsfs/python}/hsfs/core/statistics_engine.py (100%) rename {python => hsfs/python}/hsfs/core/storage_connector_api.py (100%) rename {python => hsfs/python}/hsfs/core/tags_api.py (100%) rename {python => hsfs/python}/hsfs/core/training_dataset_api.py (100%) rename {python => hsfs/python}/hsfs/core/training_dataset_engine.py (100%) rename {python => hsfs/python}/hsfs/core/training_dataset_job_conf.py (100%) rename {python => hsfs/python}/hsfs/core/transformation_function_api.py (100%) rename {python => hsfs/python}/hsfs/core/transformation_function_engine.py (100%) rename {python => hsfs/python}/hsfs/core/util_sql.py (100%) rename {python => hsfs/python}/hsfs/core/validation_report_api.py (100%) rename {python => hsfs/python}/hsfs/core/validation_report_engine.py (100%) rename {python => hsfs/python}/hsfs/core/validation_result_api.py (100%) rename {python => hsfs/python}/hsfs/core/validation_result_engine.py (100%) rename {python => hsfs/python}/hsfs/core/variable_api.py (100%) rename {python => hsfs/python}/hsfs/core/vector_db_client.py (100%) rename {python => hsfs/python}/hsfs/core/vector_server.py (100%) rename {python => hsfs/python}/hsfs/decorators.py (100%) rename {python => hsfs/python}/hsfs/embedding.py (100%) rename {python => hsfs/python}/hsfs/engine/__init__.py (100%) rename {python => hsfs/python}/hsfs/engine/python.py (100%) rename {python => hsfs/python}/hsfs/engine/spark.py (100%) rename {python => hsfs/python}/hsfs/engine/spark_no_metastore.py (100%) rename {python => hsfs/python}/hsfs/expectation_suite.py (100%) rename {python => hsfs/python}/hsfs/feature.py (100%) rename {python => hsfs/python}/hsfs/feature_group.py (98%) rename {python => hsfs/python}/hsfs/feature_group_commit.py (100%) rename {python => hsfs/python}/hsfs/feature_group_writer.py (100%) rename {python => hsfs/python}/hsfs/feature_store.py (100%) rename {python => hsfs/python}/hsfs/feature_view.py (100%) rename {python => hsfs/python}/hsfs/ge_expectation.py (100%) rename {python => hsfs/python}/hsfs/ge_validation_result.py (100%) rename {python => hsfs/python}/hsfs/hopsworks_udf.py (100%) rename {python => hsfs/python}/hsfs/serving_key.py (100%) rename {python => hsfs/python}/hsfs/split_statistics.py (100%) rename {python => hsfs/python}/hsfs/statistics.py (100%) rename {python => hsfs/python}/hsfs/statistics_config.py (100%) rename {python => hsfs/python}/hsfs/storage_connector.py (100%) rename {python => hsfs/python}/hsfs/tag.py (100%) rename {python => hsfs/python}/hsfs/training_dataset.py (100%) rename {python => hsfs/python}/hsfs/training_dataset_feature.py (100%) rename {python => hsfs/python}/hsfs/training_dataset_split.py (100%) rename {python => hsfs/python}/hsfs/transformation_function.py (100%) rename {python => hsfs/python}/hsfs/transformation_statistics.py (100%) rename {python => hsfs/python}/hsfs/usage.py (100%) rename {python => hsfs/python}/hsfs/user.py (100%) rename {python => hsfs/python}/hsfs/util.py (100%) rename {python => hsfs/python}/hsfs/validation_report.py (100%) rename {python => hsfs/python}/hsfs/version.py (100%) create mode 100644 hsfs/python/pyproject.toml create mode 100644 hsfs/python/setup.py create mode 100644 hsfs/python/tests/__init__.py rename {python => hsfs/python}/tests/client/test_base_client.py (99%) rename {python => hsfs/python}/tests/conftest.py (93%) rename {python => hsfs/python}/tests/constructor/test_external_feature_group_alias.py (100%) rename {python => hsfs/python}/tests/constructor/test_filter.py (100%) rename {python => hsfs/python}/tests/constructor/test_fs_query.py (100%) rename {python => hsfs/python}/tests/constructor/test_hudi_feature_group_alias.py (100%) rename {python => hsfs/python}/tests/constructor/test_join.py (100%) rename {python => hsfs/python}/tests/constructor/test_prepared_statement_parameter.py (100%) rename {python => hsfs/python}/tests/constructor/test_query.py (100%) rename {python => hsfs/python}/tests/constructor/test_serving_prepared_statement.py (100%) rename {python => hsfs/python}/tests/core/__init__.py (100%) rename {python => hsfs/python}/tests/core/test_arrow_flight_client.py (100%) rename {python => hsfs/python}/tests/core/test_code_engine.py (100%) rename {python => hsfs/python}/tests/core/test_execution.py (100%) rename {python => hsfs/python}/tests/core/test_expectation_engine.py (100%) rename {python => hsfs/python}/tests/core/test_expectation_suite_engine.py (100%) rename {python => hsfs/python}/tests/core/test_external_feature_group_engine.py (100%) rename {python => hsfs/python}/tests/core/test_feature_descriptive_statistics.py (100%) rename {python => hsfs/python}/tests/core/test_feature_group_api.py (100%) rename {python => hsfs/python}/tests/core/test_feature_group_base_engine.py (100%) rename {python => hsfs/python}/tests/core/test_feature_group_engine.py (100%) rename {python => hsfs/python}/tests/core/test_feature_monitoring_config.py (100%) rename {python => hsfs/python}/tests/core/test_feature_monitoring_config_engine.py (100%) rename {python => hsfs/python}/tests/core/test_feature_monitoring_result.py (100%) rename {python => hsfs/python}/tests/core/test_feature_monitoring_result_engine.py (100%) rename {python => hsfs/python}/tests/core/test_feature_view_engine.py (100%) rename {python => hsfs/python}/tests/core/test_great_expectation_engine.py (100%) rename {python => hsfs/python}/tests/core/test_hudi_engine.py (100%) rename {python => hsfs/python}/tests/core/test_ingestion_job.py (100%) rename {python => hsfs/python}/tests/core/test_inode.py (100%) rename {python => hsfs/python}/tests/core/test_job.py (100%) rename {python => hsfs/python}/tests/core/test_job_configuration.py (100%) rename {python => hsfs/python}/tests/core/test_kafka_engine.py (100%) rename {python => hsfs/python}/tests/core/test_monitoring_window_config.py (100%) rename {python => hsfs/python}/tests/core/test_monitoring_window_config_engine.py (100%) rename {python => hsfs/python}/tests/core/test_online_store_rest_client.py (100%) rename {python => hsfs/python}/tests/core/test_online_store_rest_client_api.py (100%) rename {python => hsfs/python}/tests/core/test_online_store_rest_client_engine.py (100%) rename {python => hsfs/python}/tests/core/test_opensearch.py (100%) rename {python => hsfs/python}/tests/core/test_statistics_engine.py (100%) rename {python => hsfs/python}/tests/core/test_training_dataset_engine.py (100%) rename {python => hsfs/python}/tests/core/test_transformation_function_engine.py (100%) rename {python => hsfs/python}/tests/core/test_validation_report_engine.py (100%) rename {python => hsfs/python}/tests/core/test_validation_result_engine.py (100%) rename {python => hsfs/python}/tests/core/test_vector_db_client.py (100%) rename {python => hsfs/python}/tests/data/hadoop/bin/winutils.exe (100%) rename {python => hsfs/python}/tests/data/test_basic.csv (100%) rename {python => hsfs/python}/tests/data/test_basic.parquet (100%) rename {python => hsfs/python}/tests/data/test_basic.tsv (100%) rename {python => hsfs/python}/tests/engine/__init__.py (100%) rename {python => hsfs/python}/tests/engine/test_python.py (100%) rename {python => hsfs/python}/tests/engine/test_python_reader.py (100%) rename {python => hsfs/python}/tests/engine/test_python_spark_convert_dataframe.py (100%) rename {python => hsfs/python}/tests/engine/test_python_spark_transformation_functions.py (100%) rename {python => hsfs/python}/tests/engine/test_python_writer.py (100%) rename {python => hsfs/python}/tests/engine/test_spark.py (100%) rename {python/tests/utils => hsfs/python/tests/fixtures}/__init__.py (94%) rename {python => hsfs/python}/tests/fixtures/backend_fixtures.py (87%) rename {python => hsfs/python}/tests/fixtures/dataframe_fixtures.py (100%) rename {python => hsfs/python}/tests/fixtures/execution_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/expectation_suite_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/external_feature_group_alias_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/external_feature_group_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/feature_descriptive_statistics_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/feature_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/feature_group_commit_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/feature_group_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/feature_monitoring_config_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/feature_monitoring_result_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/feature_store_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/feature_view_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/filter_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/fs_query_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/ge_expectation_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/ge_validation_result_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/generate_backend_fixtures.ipynb (100%) rename {python => hsfs/python}/tests/fixtures/hudi_feature_group_alias_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/ingestion_job_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/inode_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/job_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/join_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/logic_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/prepared_statement_parameter_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/query_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/rondb_server_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/serving_keys_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/serving_prepared_statement_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/spine_group_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/split_statistics_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/statistics_config_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/statistics_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/storage_connector_fixtures.json (100%) create mode 100644 hsfs/python/tests/fixtures/tag_fixtures.json rename {python => hsfs/python}/tests/fixtures/training_dataset_feature_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/training_dataset_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/training_dataset_split_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/transformation_function_attached_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/transformation_function_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/user_fixtures.json (100%) rename {python => hsfs/python}/tests/fixtures/validation_report_fixtures.json (100%) create mode 100644 hsfs/python/tests/pyproject.toml rename {python => hsfs/python}/tests/test_expectation_suite.py (100%) rename {python => hsfs/python}/tests/test_feature.py (100%) rename {python => hsfs/python}/tests/test_feature_group.py (100%) rename {python => hsfs/python}/tests/test_feature_group_commit.py (100%) rename {python => hsfs/python}/tests/test_feature_group_writer.py (100%) rename {python => hsfs/python}/tests/test_feature_store.py (100%) rename {python => hsfs/python}/tests/test_feature_view.py (100%) rename {python => hsfs/python}/tests/test_ge_expectation.py (100%) rename {python => hsfs/python}/tests/test_ge_validation_result.py (100%) rename {python => hsfs/python}/tests/test_helpers/__init__.py (100%) rename {python => hsfs/python}/tests/test_helpers/transformation_test_helper.py (100%) rename {python => hsfs/python}/tests/test_hopswork_udf.py (94%) rename {python => hsfs/python}/tests/test_serving_keys.py (100%) rename {python => hsfs/python}/tests/test_split_statistics.py (100%) rename {python => hsfs/python}/tests/test_statistics.py (100%) rename {python => hsfs/python}/tests/test_statistics_config.py (100%) rename {python => hsfs/python}/tests/test_storage_connector.py (100%) create mode 100644 hsfs/python/tests/test_tag.py rename {python => hsfs/python}/tests/test_training_dataset.py (100%) rename {python => hsfs/python}/tests/test_training_dataset_feature.py (100%) rename {python => hsfs/python}/tests/test_training_dataset_split.py (100%) rename {python => hsfs/python}/tests/test_transformation_function.py (100%) rename {python => hsfs/python}/tests/test_user.py (100%) create mode 100644 hsfs/python/tests/test_util.py rename {python => hsfs/python}/tests/test_validation_report.py (100%) rename {python => hsfs/python}/tests/util.py (100%) create mode 100644 hsfs/requirements-docs.txt rename {utils => hsfs/utils}/java/pom.xml (100%) rename {utils => hsfs/utils}/java/src/main/java/com/logicalclocks/utils/MainClass.java (100%) rename {utils => hsfs/utils}/java/src/main/resources/checkstyle.xml (100%) rename {utils => hsfs/utils}/java/src/main/resources/suppressions.xml (100%) rename {utils => hsfs/utils}/python/hsfs_utils.py (100%) create mode 100644 hsml/.github/workflows/mkdocs-main.yml create mode 100644 hsml/.github/workflows/mkdocs-release.yml create mode 100644 hsml/.github/workflows/python-lint.yml create mode 100644 hsml/.gitignore create mode 100644 hsml/CONTRIBUTING.md create mode 100644 hsml/Dockerfile create mode 100644 hsml/Jenkinsfile create mode 100644 hsml/LICENSE create mode 100644 hsml/README.md create mode 100644 hsml/auto_doc.py create mode 100644 hsml/docs/CONTRIBUTING.md create mode 100644 hsml/docs/assets/images/favicon.ico create mode 100644 hsml/docs/assets/images/hops-logo.png create mode 100644 hsml/docs/css/custom.css create mode 100644 hsml/docs/css/dropdown.css create mode 100644 hsml/docs/css/marctech.css create mode 100644 hsml/docs/css/version-select.css create mode 100644 hsml/docs/index.md create mode 100644 hsml/docs/js/dropdown.js create mode 100644 hsml/docs/js/inject-api-links.js create mode 100644 hsml/docs/js/version-select.js create mode 100644 hsml/docs/overrides/main.html rename {docs => hsml/docs}/templates/connection_api.md (100%) rename {docs => hsml/docs}/templates/model-registry/links.md (100%) rename {docs => hsml/docs}/templates/model-registry/model_api.md (100%) rename {docs => hsml/docs}/templates/model-registry/model_registry_api.md (100%) rename {docs => hsml/docs}/templates/model-registry/model_schema_api.md (100%) rename {docs => hsml/docs}/templates/model-serving/deployment_api.md (100%) rename {docs => hsml/docs}/templates/model-serving/inference_batcher_api.md (100%) rename {docs => hsml/docs}/templates/model-serving/inference_logger_api.md (100%) rename {docs => hsml/docs}/templates/model-serving/model_serving_api.md (100%) rename {docs => hsml/docs}/templates/model-serving/predictor_api.md (100%) rename {docs => hsml/docs}/templates/model-serving/predictor_state_api.md (100%) rename {docs => hsml/docs}/templates/model-serving/predictor_state_condition_api.md (100%) rename {docs => hsml/docs}/templates/model-serving/resources_api.md (100%) rename {docs => hsml/docs}/templates/model-serving/transformer_api.md (100%) create mode 100644 hsml/java/pom.xml create mode 100644 hsml/java/src/main/resources/checkstyle.xml create mode 100644 hsml/java/src/main/resources/suppressions.xml create mode 100644 hsml/mkdocs.yml create mode 100644 hsml/python/.pre-commit-config.yaml rename {python => hsml/python}/hsml/__init__.py (100%) rename {python => hsml/python}/hsml/client/__init__.py (97%) rename {python => hsml/python}/hsml/client/auth.py (100%) rename {python => hsml/python}/hsml/client/base.py (100%) rename {python => hsml/python}/hsml/client/exceptions.py (100%) rename {python => hsml/python}/hsml/client/hopsworks/__init__.py (100%) rename {python => hsml/python}/hsml/client/hopsworks/base.py (100%) rename {python => hsml/python}/hsml/client/hopsworks/external.py (100%) rename {python => hsml/python}/hsml/client/hopsworks/internal.py (100%) rename {python => hsml/python}/hsml/client/istio/__init__.py (100%) rename {python => hsml/python}/hsml/client/istio/base.py (100%) rename {python => hsml/python}/hsml/client/istio/external.py (100%) rename {python/hopsworks_common => hsml/python/hsml/client/istio/grpc}/__init__.py (100%) rename {python => hsml/python}/hsml/client/istio/grpc/errors.py (100%) rename {python => hsml/python}/hsml/client/istio/grpc/exceptions.py (100%) rename {python => hsml/python}/hsml/client/istio/grpc/inference_client.py (100%) rename {python/hsml/client/istio/grpc => hsml/python/hsml/client/istio/grpc/proto}/__init__.py (100%) rename {python => hsml/python}/hsml/client/istio/grpc/proto/grpc_predict_v2.proto (100%) rename {python => hsml/python}/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.py (100%) rename {python => hsml/python}/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.pyi (100%) rename {python => hsml/python}/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2_grpc.py (100%) rename {python => hsml/python}/hsml/client/istio/internal.py (100%) rename {python/hsml/client/istio/grpc/proto => hsml/python/hsml/client/istio/utils}/__init__.py (100%) rename {python => hsml/python}/hsml/client/istio/utils/infer_type.py (100%) rename {python => hsml/python}/hsml/client/istio/utils/numpy_codec.py (100%) rename {python => hsml/python}/hsml/connection.py (98%) rename {python => hsml/python}/hsml/constants.py (97%) rename {python => hsml/python}/hsml/core/__init__.py (100%) rename {python => hsml/python}/hsml/core/dataset_api.py (100%) rename {python => hsml/python}/hsml/core/explicit_provenance.py (100%) rename {python => hsml/python}/hsml/core/model_api.py (100%) rename {python => hsml/python}/hsml/core/model_registry_api.py (100%) rename {python => hsml/python}/hsml/core/model_serving_api.py (100%) rename {python => hsml/python}/hsml/core/native_hdfs_api.py (100%) rename {python => hsml/python}/hsml/core/serving_api.py (100%) rename {python => hsml/python}/hsml/decorators.py (100%) rename {python => hsml/python}/hsml/deployable_component.py (100%) rename {python => hsml/python}/hsml/deployable_component_logs.py (100%) rename {python => hsml/python}/hsml/deployment.py (100%) rename {python => hsml/python}/hsml/engine/__init__.py (100%) rename {python => hsml/python}/hsml/engine/hopsworks_engine.py (100%) rename {python => hsml/python}/hsml/engine/local_engine.py (100%) rename {python => hsml/python}/hsml/engine/model_engine.py (100%) rename {python => hsml/python}/hsml/engine/serving_engine.py (100%) rename {python => hsml/python}/hsml/inference_batcher.py (100%) rename {python => hsml/python}/hsml/inference_endpoint.py (100%) rename {python => hsml/python}/hsml/inference_logger.py (100%) rename {python => hsml/python}/hsml/kafka_topic.py (100%) rename {python => hsml/python}/hsml/model.py (100%) rename {python => hsml/python}/hsml/model_registry.py (100%) rename {python => hsml/python}/hsml/model_schema.py (100%) rename {python => hsml/python}/hsml/model_serving.py (100%) rename {python => hsml/python}/hsml/predictor.py (100%) rename {python => hsml/python}/hsml/predictor_state.py (100%) rename {python => hsml/python}/hsml/predictor_state_condition.py (100%) rename {python => hsml/python}/hsml/python/__init__.py (100%) rename {python => hsml/python}/hsml/python/model.py (100%) rename {python => hsml/python}/hsml/python/predictor.py (100%) rename {python => hsml/python}/hsml/python/signature.py (100%) rename {python => hsml/python}/hsml/resources.py (100%) rename {python => hsml/python}/hsml/schema.py (100%) rename {python => hsml/python}/hsml/sklearn/__init__.py (100%) rename {python => hsml/python}/hsml/sklearn/model.py (100%) rename {python => hsml/python}/hsml/sklearn/predictor.py (100%) rename {python => hsml/python}/hsml/sklearn/signature.py (100%) rename {python => hsml/python}/hsml/tag.py (100%) rename {python => hsml/python}/hsml/tensorflow/__init__.py (100%) rename {python => hsml/python}/hsml/tensorflow/model.py (100%) rename {python => hsml/python}/hsml/tensorflow/predictor.py (100%) rename {python => hsml/python}/hsml/tensorflow/signature.py (100%) rename {python => hsml/python}/hsml/torch/__init__.py (100%) rename {python => hsml/python}/hsml/torch/model.py (100%) rename {python => hsml/python}/hsml/torch/predictor.py (100%) rename {python => hsml/python}/hsml/torch/signature.py (100%) rename {python => hsml/python}/hsml/transformer.py (100%) rename {python => hsml/python}/hsml/util.py (90%) rename {python => hsml/python}/hsml/utils/__init__.py (100%) rename {python => hsml/python}/hsml/utils/schema/__init__.py (100%) rename {python => hsml/python}/hsml/utils/schema/column.py (100%) rename {python => hsml/python}/hsml/utils/schema/columnar_schema.py (100%) rename {python => hsml/python}/hsml/utils/schema/tensor.py (100%) rename {python => hsml/python}/hsml/utils/schema/tensor_schema.py (100%) rename {python => hsml/python}/hsml/version.py (100%) create mode 100644 hsml/python/pyproject.toml create mode 100644 hsml/python/setup.py create mode 100644 hsml/python/tests/__init__.py create mode 100644 hsml/python/tests/conftest.py rename {python/hsml/client/istio/utils => hsml/python/tests/fixtures}/__init__.py (100%) create mode 100644 hsml/python/tests/fixtures/backend_fixtures.py rename {python => hsml/python}/tests/fixtures/inference_batcher_fixtures.json (100%) rename {python => hsml/python}/tests/fixtures/inference_endpoint_fixtures.json (100%) rename {python => hsml/python}/tests/fixtures/inference_logger_fixtures.json (100%) rename {python => hsml/python}/tests/fixtures/kafka_topic_fixtures.json (100%) rename {python => hsml/python}/tests/fixtures/model_fixtures.json (100%) rename {python => hsml/python}/tests/fixtures/model_fixtures.py (100%) rename {python => hsml/python}/tests/fixtures/predictor_fixtures.json (100%) rename {python => hsml/python}/tests/fixtures/resources_fixtures.json (100%) rename {python => hsml/python}/tests/fixtures/tag_fixtures.json (100%) rename {python => hsml/python}/tests/fixtures/transformer_fixtures.json (100%) rename {python => hsml/python}/tests/test_connection.py (98%) rename {python => hsml/python}/tests/test_constants.py (100%) rename {python => hsml/python}/tests/test_decorators.py (100%) rename {python => hsml/python}/tests/test_deployable_component.py (100%) rename {python => hsml/python}/tests/test_deployable_component_logs.py (100%) rename {python => hsml/python}/tests/test_deployment.py (100%) rename {python => hsml/python}/tests/test_explicit_provenance.py (100%) rename {python => hsml/python}/tests/test_inference_batcher.py (100%) rename {python => hsml/python}/tests/test_inference_endpoint.py (100%) rename {python => hsml/python}/tests/test_inference_logger.py (100%) rename {python => hsml/python}/tests/test_kafka_topic.py (100%) rename {python => hsml/python}/tests/test_model.py (100%) rename {python => hsml/python}/tests/test_model_schema.py (100%) rename {python => hsml/python}/tests/test_predictor.py (100%) rename {python => hsml/python}/tests/test_predictor_state.py (100%) rename {python => hsml/python}/tests/test_predictor_state_condition.py (100%) rename {python => hsml/python}/tests/test_resources.py (100%) rename {python => hsml/python}/tests/test_schema.py (100%) rename {python => hsml/python}/tests/test_tag.py (100%) rename {python => hsml/python}/tests/test_transformer.py (100%) rename {python => hsml/python}/tests/test_util.py (72%) rename {python/tests/fixtures => hsml/python/tests/utils}/__init__.py (100%) rename {python => hsml/python}/tests/utils/schema/test_column.py (100%) rename {python => hsml/python}/tests/utils/schema/test_columnar_schema.py (100%) rename {python => hsml/python}/tests/utils/schema/test_tensor.py (100%) rename {python => hsml/python}/tests/utils/schema/test_tensor_schema.py (100%) create mode 100644 hsml/requirements-docs.txt delete mode 100644 python/auto_doc.py diff --git a/.github/workflows/mkdocs-main.yml b/.github/workflows/mkdocs-main.yml index e8e14b4ea..001f1fad1 100644 --- a/.github/workflows/mkdocs-main.yml +++ b/.github/workflows/mkdocs-main.yml @@ -24,24 +24,7 @@ jobs: run: cp ../README.md . && pip3 install -r ../requirements-docs.txt && pip3 install -e .[dev] - name: generate autodoc - run: python3 ./python/auto_doc.py - - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-${{ hashFiles('java/pom.xml') }} - restore-keys: | - ${{ runner.os }}-maven- - - name: Set up JDK 8 - uses: actions/setup-java@v3 - with: - java-version: "8" - distribution: "adopt" - - - name: Build java doc documentation - working-directory: ./java - run: mvn clean install javadoc:javadoc javadoc:aggregate -DskipTests && cp -r target/site/apidocs ../docs/javadoc + run: python3 auto_doc.py - name: setup git run: | diff --git a/.github/workflows/mkdocs-release.yml b/.github/workflows/mkdocs-release.yml index f1c6bb814..e2b4b2b3f 100644 --- a/.github/workflows/mkdocs-release.yml +++ b/.github/workflows/mkdocs-release.yml @@ -2,7 +2,7 @@ name: mkdocs-release on: push: - branches: [branch-*] + branches: [branch-*\.*] jobs: publish-release: @@ -29,25 +29,7 @@ jobs: run: cp ../README.md . && pip3 install -r ../requirements-docs.txt && pip3 install -e .[dev] - name: generate autodoc - run: python3 ./python/auto_doc.py - - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-${{ hashFiles('java/pom.xml') }} - restore-keys: | - ${{ runner.os }}-maven- - - name: Set up JDK 8 - uses: actions/setup-java@v3 - with: - java-version: "8" - distribution: "adopt" - - - name: Build java doc documentation - working-directory: ./java - run: - mvn clean install javadoc:javadoc javadoc:aggregate -DskipTests && cp -r target/site/apidocs ../docs/javadoc + run: python3 auto_doc.py - name: setup git run: | diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml new file mode 100644 index 000000000..156847faf --- /dev/null +++ b/.github/workflows/python-lint.yml @@ -0,0 +1,49 @@ +name: python + +on: pull_request + +env: + APP_API_KEY: ${{ secrets.APP_API_KEY }} + +jobs: + lint_stylecheck: + name: Lint and Stylecheck + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Get all changed files + id: get-changed-files + uses: tj-actions/changed-files@v44 + with: + files_yaml: | + src: + - 'python/**/*.py' + - '!python/tests/**/*.py' + test: + - 'python/tests/**/*.py' + + - name: install deps + run: pip install ruff==0.4.2 + + - name: ruff on python files + if: steps.get-changed-files.outputs.src_any_changed == 'true' + env: + SRC_ALL_CHANGED_FILES: ${{ steps.get-changed-files.outputs.src_all_changed_files }} + run: ruff check --output-format=github $SRC_ALL_CHANGED_FILES + + - name: ruff on test files + if: steps.get-changed-files.outputs.test_any_changed == 'true' + env: + TEST_ALL_CHANGED_FILES: ${{ steps.get-changed-files.outputs.test_all_changed_files }} + run: ruff check --output-format=github $TEST_ALL_CHANGED_FILES + + - name: ruff format --check $ALL_CHANGED_FILES + env: + ALL_CHANGED_FILES: ${{ steps.get-changed-files.outputs.all_changed_files }} + run: ruff format $ALL_CHANGED_FILES \ No newline at end of file diff --git a/.gitignore b/.gitignore index 1581db87d..6e96d8144 100644 --- a/.gitignore +++ b/.gitignore @@ -51,8 +51,6 @@ coverage.xml .hypothesis/ .pytest_cache/ .ruff_cache/ -bigquery.json -metastore_db/ # Translations *.mo @@ -73,9 +71,6 @@ instance/ # Sphinx documentation docs/_build/ -# Mike Javadoc -docs/javadoc - # PyBuilder target/ @@ -133,9 +128,3 @@ target/ # mkdocs intemediate files docs/generated - -docs/CONTRIBUTING.md -docs/index.md - -# Test artifacts -keyFile.json diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e2801b11b..564734d53 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,17 +1,16 @@ ## Python development setup - --- - Fork and clone the repository -- Create a new Python environment with your favourite environment manager (e.g. virtualenv or conda) and Python 3.9 (newer versions will return a library conflict in `auto_doc.py`) +- Create a new Python environment with your favourite environment manager, e.g. virtualenv or conda - Install repository in editable mode with development dependencies: - ```bash - cd python - pip install -e ".[dev]" - ``` + ```bash + cd python + pip install -e ".[dev]" + ``` - Install [pre-commit](https://pre-commit.com/) and then activate its hooks. pre-commit is a framework for managing and maintaining multi-language pre-commit hooks. The library uses pre-commit to ensure code-style and code formatting through [ruff](https://docs.astral.sh/ruff/). Run the following commands from the `python` directory: @@ -38,67 +37,75 @@ We follow a few best practices for writing the Python documentation: 1. Use the google docstring style: - ```python - """[One Line Summary] + ```python + """[One Line Summary] - [Extended Summary] + [Extended Summary] - [!!! example - import xyz - ] + [!!! example + import xyz + ] - # Arguments - arg1: Type[, optional]. Description[, defaults to `default`] - arg2: Type[, optional]. Description[, defaults to `default`] + # Arguments + arg1: Type[, optional]. Description[, defaults to `default`] + arg2: Type[, optional]. Description[, defaults to `default`] - # Returns - Type. Description. + # Returns + Type. Description. - # Raises - Exception. Description. - """ - ``` + # Raises + Exception. Description. + """ + ``` + + If Python 3 type annotations are used, they are inserted automatically. - If Python 3 type annotations are used, they are inserted automatically. 2. Hopsworks entity engine methods (e.g. ExecutionEngine etc.) only require a single line docstring. -3. Private REST API implementations (e.g. FeatureGroupApi etc.) should be fully documented with docstrings without defaults. -4. Public API such as metadata objects and public REST API implementations should be fully documented with defaults. +3. Private REST Api implementations (e.g. GitRemoteApi etc.) should be fully documented with docstrings without defaults. +4. Public Api such as metadata objects and public REST Api implementations should be fully documented with defaults. #### Setup and Build Documentation We use `mkdocs` together with `mike` ([for versioning](https://github.com/jimporter/mike/)) to build the documentation and a plugin called `keras-autodoc` to auto generate Python API documentation from docstrings. **Background about `mike`:** -`mike` builds the documentation and commits it as a new directory to the gh-pages branch. Each directory corresponds to one version of the documentation. Additionally, `mike` maintains a json in the root of gh-pages with the mappings of versions/aliases for each of the directories available. With aliases you can define extra names like `dev` or `latest`, to indicate stable and unstable releases. + `mike` builds the documentation and commits it as a new directory to the gh-pages branch. Each directory corresponds to one version of the documentation. Additionally, `mike` maintains a json in the root of gh-pages with the mappings of versions/aliases for each of the directories available. With aliases you can define extra names like `dev` or `latest`, to indicate stable and unstable releases. -1. Install Hopsworks with `dev-docs` extras: +1. Currently we are using our own version of `keras-autodoc` - ```bash - pip install -e ".[dev-docs]" - ``` + ```bash + pip install git+https://github.com/logicalclocks/keras-autodoc + ``` -2. To build the docs, first run the auto doc script: +2. Install HOPSWORKS with `docs` extras: + + ```bash + pip install -e .[dev,docs] + ``` - ```bash - python auto_doc.py - ``` +3. To build the docs, first run the auto doc script: + + ```bash + cd .. + python auto_doc.py + ``` ##### Option 1: Build only current version of docs -3. Either build the docs, or serve them dynamically: +4. Either build the docs, or serve them dynamically: - Note: Links and pictures might not resolve properly later on when checking with this build. - The reason for that is that the docs are deployed with versioning on docs.hopsworks.ai and - therefore another level is added to all paths, e.g. `docs.hopsworks.ai/[version-or-alias]`. - Using relative links should not be affected by this, however, building the docs with version - (Option 2) is recommended. + Note: Links and pictures might not resolve properly later on when checking with this build. + The reason for that is that the docs are deployed with versioning on docs.hopsworks.ai and + therefore another level is added to all paths, e.g. `docs.hopsworks.ai/[version-or-alias]`. + Using relative links should not be affected by this, however, building the docs with version + (Option 2) is recommended. - ```bash - mkdocs build - # or - mkdocs serve - ``` + ```bash + mkdocs build + # or + mkdocs serve + ``` ##### Option 2 (Preferred): Build multi-version doc with `mike` @@ -106,36 +113,34 @@ We use `mkdocs` together with `mike` ([for versioning](https://github.com/jimpor On docs.hopsworks.ai we implement the following versioning scheme: -- current master branches (e.g. of hopsworks corresponding to master of Hopsworks): rendered as current Hopsworks snapshot version, e.g. **4.0.0-SNAPSHOT [dev]**, where `dev` is an alias to indicate that this is an unstable version. -- the latest release: rendered with full current version, e.g. **3.8.0 [latest]** with `latest` alias to indicate that this is the latest stable release. -- previous stable releases: rendered without alias, e.g. **3.4.4**. +- current master branches (e.g. of hopsworks corresponding to master of Hopsworks): rendered as current Hopsworks snapshot version, e.g. **3.1.0-SNAPSHOT [dev]**, where `dev` is an alias to indicate that this is an unstable version. +- the latest release: rendered with full current version, e.g. **3.0.1 [latest]** with `latest` alias to indicate that this is the latest stable release. +- previous stable releases: rendered without alias, e.g. **3.0.0**. ###### Build Instructions -4. For this you can either checkout and make a local copy of the `upstream/gh-pages` branch, where `mike` maintains the current state of docs.hopsworks.ai, or just build documentation for the branch you are updating: +4. For this you can either checkout and make a local copy of the `upstream/gh-pages` branch, where +`mike` maintains the current state of docs.hopsworks.ai, or just build documentation for the branch you are updating: Building *one* branch: Checkout your dev branch with modified docs: - ```bash git checkout [dev-branch] ``` Generate API docs if necessary: - ```bash python auto_doc.py ``` Build docs with a version and alias - ```bash mike deploy [version] [alias] --update-alias # for example, if you are updating documentation to be merged to master, # which will become the new SNAPSHOT version: - mike deploy 4.0.0-SNAPSHOT dev --update-alias + mike deploy 3.1.0-SNAPSHOT dev --update-alias # if you are updating docs of the latest stable release branch mike deploy [version] latest --update-alias @@ -153,20 +158,17 @@ On docs.hopsworks.ai we implement the following versioning scheme: ``` You can now checkout the gh-pages branch and serve: - ```bash git checkout gh-pages mike serve ``` You can also list all available versions/aliases: - ```bash mike list ``` Delete and reset your local gh-pages branch: - ```bash mike delete --all @@ -192,7 +194,7 @@ PAGES = { Now you can add a template markdown file to the `docs/templates` directory with the name you specified in the auto-doc script. The `new_template.md` file should contain a tag to identify the place at which the API documentation should be inserted: -```` +``` ## The XYZ package {{module}} @@ -205,7 +207,7 @@ Some extra content here. ``` {{xyz.asd}} -```` +``` Finally, run the `auto_doc.py` script, as decribed above, to update the documentation. diff --git a/README.md b/README.md index e523c059d..162c95f97 100644 --- a/README.md +++ b/README.md @@ -17,10 +17,6 @@ src="https://img.shields.io/pypi/v/hopsworks?color=blue" alt="PyPiStatus" /> - Scala/Java Artifacts Downloads *hopsworks* is the python API for interacting with a Hopsworks cluster. Don't have a Hopsworks cluster just yet? Register an account on [Hopsworks Serverless](https://app.hopsworks.ai/) and get started for free. Once connected to your project, you can: - -- Insert dataframes into the online or offline Store, create training datasets or *serve real-time* feature vectors in the Feature Store via the Feature Store API. Already have data somewhere you want to import, checkout our [Storage Connectors](https://docs.hopsworks.ai/latest/user_guides/fs/storage_connector/) documentation. -- register ML models in the model registry and *deploy* them via model serving via the Machine Learning API. -- manage environments, executions, kafka topics and more once you deploy your own Hopsworks cluster, either on-prem or in the cloud. Hopsworks is open-source and has its own [Community Edition](https://github.com/logicalclocks/hopsworks). + - Insert dataframes into the online or offline Store, create training datasets or *serve real-time* feature vectors in the Feature Store via the [Feature Store API](https://github.com/logicalclocks/feature-store-api). Already have data somewhere you want to import, checkout our [Storage Connectors](https://docs.hopsworks.ai/latest/user_guides/fs/storage_connector/) documentation. + - register ML models in the model registry and *deploy* them via model serving via the [Machine Learning API](https://gitub.com/logicalclocks/machine-learning-api). + - manage environments, executions, kafka topics and more once you deploy your own Hopsworks cluster, either on-prem or in the cloud. Hopsworks is open-source and has its own [Community Edition](https://github.com/logicalclocks/hopsworks). Our [tutorials](https://github.com/logicalclocks/hopsworks-tutorials) cover a wide range of use cases and example of what *you* can build using Hopsworks. @@ -48,19 +43,16 @@ Our [tutorials](https://github.com/logicalclocks/hopsworks-tutorials) cover a wi Once you created a project on [Hopsworks Serverless](https://app.hopsworks.ai) and created a new [Api Key](https://docs.hopsworks.ai/latest/user_guides/projects/api_key/create_api_key/), just use your favourite virtualenv and package manager to install the library: ```bash -pip install "hopsworks[python]" +pip install hopsworks ``` Fire up a notebook and connect to your project, you will be prompted to enter your newly created API key: - ```python import hopsworks project = hopsworks.login() ``` -### Feature Store API - Access the Feature Store of your project to use as a central repository for your feature data. Use *your* favourite data engineering library (pandas, polars, Spark, etc...) to insert data into the Feature Store, create training datasets or serve real-time feature vectors. Want to predict likelyhood of e-scooter accidents in real-time? Here's how you can do it: ```python @@ -68,9 +60,9 @@ fs = project.get_feature_store() # Write to Feature Groups bike_ride_fg = fs.get_or_create_feature_group( - name="bike_rides", - version=1, - primary_key=["ride_id"], + name="bike_rides", + version=1, + primary_key=["ride_id"], event_time="activation_time", online_enabled=True, ) @@ -81,13 +73,13 @@ fg.insert(bike_rides_df) profile_fg = fs.get_feature_group("user_profile", version=1) bike_ride_fv = fs.get_or_create_feature_view( - name="bike_rides_view", - version=1, + name="bike_rides_view", + version=1, query=bike_ride_fg.select_except(["ride_id"]).join(profile_fg.select(["age", "has_license"]), on="user_id") ) bike_rides_Q1_2021_df = bike_ride_fv.get_batch_data( - start_date="2021-01-01", + start_date="2021-01-01", end_date="2021-01-31" ) @@ -105,68 +97,22 @@ bike_ride_fv.init_serving() while True: new_ride_vector = poll_ride_queue() feature_vector = bike_ride_fv.get_online_feature_vector( - {"user_id": new_ride_vector["user_id"]}, + {"user_id": new_ride_vector["user_id"]}, passed_features=new_ride_vector ) accident_probability = model.predict(feature_vector) ``` -The API enables interaction with the Hopsworks Feature Store. It makes creating new features, feature groups and training datasets easy. - -The API is environment independent and can be used in two modes: - -- Spark mode: For data engineering jobs that create and write features into the feature store or generate training datasets. It requires a Spark environment such as the one provided in the Hopsworks platform or Databricks. In Spark mode, HSFS provides bindings both for Python and JVM languages. - -- Python mode: For data science jobs to explore the features available in the feature store, generate training datasets and feed them in a training pipeline. Python mode requires just a Python interpreter and can be used both in Hopsworks from Python Jobs/Jupyter Kernels, Amazon SageMaker or KubeFlow. - -Scala API is also available, here is a short sample of it: - -```scala -import com.logicalclocks.hsfs._ -val connection = HopsworksConnection.builder().build() -val fs = connection.getFeatureStore(); -val attendances_features_fg = fs.getFeatureGroup("games_features", 1); -attendances_features_fg.show(1) -``` - -### Machine Learning API - -Or you can use the Machine Learning API to interact with the Hopsworks Model Registry and Model Serving. The API makes it easy to export, manage and deploy models. For example, to register models and deploy them for serving you can do: - +Or you can use the Machine Learning API to register models and deploy them for serving: ```python mr = project.get_model_registry() # or -ms = connection.get_model_serving() - -# Create a new model: -model = mr.tensorflow.create_model(name="mnist", - version=1, - metrics={"accuracy": 0.94}, - description="mnist model description") -model.save("/tmp/model_directory") # or /tmp/model_file - -# Download a model: -model = mr.get_model("mnist", version=1) -model_path = model.download() - -# Delete the model: -model.delete() - -# Get the best-performing model -best_model = mr.get_best_model('mnist', 'accuracy', 'max') - -# Deploy the model: -deployment = model.deploy() -deployment.start() - -# Make predictions with a deployed model -data = { "instances": [ model.input_example ] } -predictions = deployment.predict(data) +ms = project.get_model_serving() ``` ## Tutorials -Need more inspiration or want to learn more about the Hopsworks platform? Check out our [tutorials](https://github.com/logicalclocks/hopsworks-tutorials). +Need more inspiration or want to learn more about the Hopsworks platform? Check out our [tutorials](https://github.com/logicalclocks/hopsworks-tutorials). ## Documentation @@ -178,17 +124,7 @@ For general questions about the usage of Hopsworks and the Feature Store please Please report any issue using [Github issue tracking](https://github.com/logicalclocks/hopsworks-api/issues). -### Related to Feautre Store API - -Please attach the client environment from the output below to your issue, if it is related to Feature Store API: - -```python -import hopsworks -import hsfs -hopsworks.login().get_feature_store() -print(hsfs.get_env()) -``` - ## Contributing If you would like to contribute to this library, please see the [Contribution Guidelines](CONTRIBUTING.md). + diff --git a/auto_doc.py b/auto_doc.py new file mode 100644 index 000000000..1fd5b40f8 --- /dev/null +++ b/auto_doc.py @@ -0,0 +1,215 @@ +# +# Copyright 2022 Logical Clocks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import pathlib +import shutil + +import keras_autodoc + +PAGES = { + "api/login.md": { + "login": ["hopsworks.login"], + "get_current_project": ["hopsworks.get_current_project"], + "fs_api": ["hopsworks.project.Project.get_feature_store"], + "mr_api": ["hopsworks.project.Project.get_model_registry"], + "ms_api": ["hopsworks.project.Project.get_model_serving"], + }, + "api/udf.md": { + "udf": ["hopsworks.udf"], + }, + "api/connection.md": { + "connection_create": ["hopsworks.connection.Connection.connection"], + "connection_properties": keras_autodoc.get_properties( + "hopsworks.connection.Connection" + ), + "connection_methods": keras_autodoc.get_methods( + "hopsworks.connection.Connection", exclude=["from_response_json", "json"] + ), + }, + "api/projects.md": { + "project_create": ["hopsworks.create_project"], + "project_properties": keras_autodoc.get_properties("hopsworks.project.Project"), + "project_methods": keras_autodoc.get_methods( + "hopsworks.project.Project", exclude=["from_response_json", "json"] + ), + }, + "api/jobs.md": { + "job_api_handle": ["hopsworks.project.Project.get_jobs_api"], + "job_create": ["hopsworks.core.job_api.JobsApi.create_job"], + "job_get": ["hopsworks.core.job_api.JobsApi.get_job"], + "job_get_all": ["hopsworks.core.job_api.JobsApi.get_jobs"], + "job_properties": keras_autodoc.get_properties("hopsworks.job.Job"), + "job_config": ["hopsworks.core.job_api.JobsApi.get_configuration"], + "job_methods": keras_autodoc.get_methods( + "hopsworks.job.Job", exclude=["from_response_json", "json"] + ), + }, + "api/executions.md": { + "execution_create": ["hopsworks.job.Job.run"], + "execution_get": ["hopsworks.job.Job.get_executions"], + "execution_properties": keras_autodoc.get_properties( + "hopsworks.execution.Execution" + ), + "execution_methods": keras_autodoc.get_methods( + "hopsworks.execution.Execution", + exclude=["from_response_json", "json", "update_from_response_json"], + ), + }, + "api/flink_cluster.md": { + "flink_api_handle": ["hopsworks.project.Project.get_flink_cluster_api"], + "setup_cluster": [ + "hopsworks.core.flink_cluster_api.FlinkClusterApi.setup_cluster" + ], + "get_cluster": ["hopsworks.core.flink_cluster_api.FlinkClusterApi.get_cluster"], + "start_cluster": ["hopsworks.flink_cluster.FlinkCluster.start"], + "submit_job_to_cluster": ["hopsworks.flink_cluster.FlinkCluster.submit_job"], + "flink_cluster_properties": keras_autodoc.get_properties( + "hopsworks.flink_cluster.FlinkCluster" + ), + "flink_cluster_methods": keras_autodoc.get_methods( + "hopsworks.flink_cluster.FlinkCluster", + exclude=["from_response_json", "json"], + ), + }, + "api/environment.md": { + "env_api_handle": ["hopsworks.project.Project.get_environment_api"], + "env_create": [ + "hopsworks.core.environment_api.EnvironmentApi.create_environment" + ], + "env_get": ["hopsworks.core.environment_api.EnvironmentApi.get_environment"], + "env_methods": keras_autodoc.get_methods( + "hopsworks.environment.Environment", exclude=["from_response_json", "json"] + ), + }, + "api/git_repo.md": { + "git_api_handle": ["hopsworks.project.Project.get_git_api"], + "git_repo_clone": ["hopsworks.core.git_api.GitApi.clone"], + "git_repo_get": ["hopsworks.core.git_api.GitApi.get_repo"], + "git_repo_get_all": ["hopsworks.core.git_api.GitApi.get_repos"], + "git_repo_properties": keras_autodoc.get_properties( + "hopsworks.git_repo.GitRepo" + ), + "git_repo_methods": keras_autodoc.get_methods( + "hopsworks.git_repo.GitRepo", exclude=["from_response_json", "json"] + ), + }, + "api/git_provider.md": { + "git_api_handle": ["hopsworks.project.Project.get_git_api"], + "git_provider_create": ["hopsworks.core.git_api.GitApi.set_provider"], + "git_provider_get": ["hopsworks.core.git_api.GitApi.get_provider"], + "git_provider_get_all": ["hopsworks.core.git_api.GitApi.get_providers"], + "git_provider_properties": keras_autodoc.get_properties( + "hopsworks.git_provider.GitProvider" + ), + "git_provider_methods": keras_autodoc.get_methods( + "hopsworks.git_provider.GitProvider", exclude=["from_response_json", "json"] + ), + }, + "api/git_remote.md": { + "git_api_handle": ["hopsworks.project.Project.get_git_api"], + "git_remote_create": ["hopsworks.git_repo.GitRepo.add_remote"], + "git_remote_get": ["hopsworks.git_repo.GitRepo.get_remote"], + "git_remote_get_all": ["hopsworks.git_repo.GitRepo.get_remotes"], + "git_remote_properties": keras_autodoc.get_properties( + "hopsworks.git_remote.GitRemote" + ), + "git_remote_methods": keras_autodoc.get_methods( + "hopsworks.git_remote.GitRemote", exclude=["from_response_json", "json"] + ), + }, + "api/datasets.md": { + "dataset_api_handle": ["hopsworks.project.Project.get_dataset_api"], + "dataset_methods": keras_autodoc.get_methods( + "hopsworks.core.dataset_api.DatasetApi" + ), + }, + "api/kafka_topic.md": { + "kafka_api_handle": ["hopsworks.project.Project.get_kafka_api"], + "kafka_config": ["hopsworks.core.kafka_api.KafkaApi.get_default_config"], + "kafka_topic_create": ["hopsworks.core.kafka_api.KafkaApi.create_topic"], + "kafka_topic_get": ["hopsworks.core.kafka_api.KafkaApi.get_topic"], + "kafka_topic_get_all": ["hopsworks.core.kafka_api.KafkaApi.get_topics"], + "kafka_topic_properties": keras_autodoc.get_properties( + "hopsworks.kafka_topic.KafkaTopic" + ), + "kafka_topic_methods": keras_autodoc.get_methods( + "hopsworks.kafka_topic.KafkaTopic", + exclude=["from_response_json", "json", "update_from_response_json"], + ), + }, + "api/kafka_schema.md": { + "kafka_api_handle": ["hopsworks.project.Project.get_kafka_api"], + "kafka_schema_create": ["hopsworks.core.kafka_api.KafkaApi.create_schema"], + "kafka_schema_get": ["hopsworks.core.kafka_api.KafkaApi.get_schema"], + "kafka_schema_get_all": ["hopsworks.core.kafka_api.KafkaApi.get_schemas"], + "kafka_schema_get_subjects": ["hopsworks.core.kafka_api.KafkaApi.get_subjects"], + "kafka_schema_properties": keras_autodoc.get_properties( + "hopsworks.kafka_schema.KafkaSchema" + ), + "kafka_schema_methods": keras_autodoc.get_methods( + "hopsworks.kafka_schema.KafkaSchema", + exclude=["from_response_json", "json", "update_from_response_json"], + ), + }, + "api/secrets.md": { + "secret_api_handle": ["hopsworks.get_secrets_api"], + "secret_create": ["hopsworks.core.secret_api.SecretsApi.create_secret"], + "secret_get": ["hopsworks.core.secret_api.SecretsApi.get_secret"], + "secret_get_simplified": ["hopsworks.core.secret_api.SecretsApi.get"], + "secret_get_all": ["hopsworks.core.secret_api.SecretsApi.get_secrets"], + "secret_properties": keras_autodoc.get_properties("hopsworks.secret.Secret"), + "secret_methods": keras_autodoc.get_methods( + "hopsworks.secret.Secret", exclude=["from_response_json", "json"] + ), + }, + "api/opensearch.md": { + "opensearch_api_handle": ["hopsworks.project.Project.get_opensearch_api"], + "opensearch_methods": keras_autodoc.get_methods( + "hopsworks.core.opensearch_api.OpenSearchApi" + ), + }, +} + +hw_dir = pathlib.Path(__file__).resolve().parents[0] +if "GITHUB_SHA" in os.environ: + commit_sha = os.environ["GITHUB_SHA"] + project_url = ( + f"https://github.com/logicalclocks/feature-store-api/tree/{commit_sha}/python" + ) +else: + branch_name = os.environ.get("GITHUB_BASE_REF", "master") + project_url = ( + f"https://github.com/logicalclocks/feature-store-api/blob/{branch_name}/python" + ) + + +def generate(dest_dir): + doc_generator = keras_autodoc.DocumentationGenerator( + PAGES, + project_url=project_url, + template_dir="./docs/templates", + titles_size="###", + extra_aliases={}, + max_signature_line_length=100, + ) + shutil.copyfile(hw_dir / "CONTRIBUTING.md", dest_dir / "CONTRIBUTING.md") + shutil.copyfile(hw_dir / "README.md", dest_dir / "index.md") + + doc_generator.generate(dest_dir / "generated") + + +if __name__ == "__main__": + generate(hw_dir / "docs") diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md new file mode 100644 index 000000000..b97326e6f --- /dev/null +++ b/docs/CONTRIBUTING.md @@ -0,0 +1,215 @@ +## Python development setup +--- + +- Fork and clone the repository + +- Create a new Python environment with your favourite environment manager, e.g. virtualenv or conda + +- Install repository in editable mode with development dependencies: + + ```bash + cd python + pip install -e ".[dev]" + ``` + +- Install [pre-commit](https://pre-commit.com/) and then activate its hooks. pre-commit is a framework for managing and maintaining multi-language pre-commit hooks. The Feature Store uses pre-commit to ensure code-style and code formatting through [ruff](https://docs.astral.sh/ruff/). Run the following commands from the `python` directory: + + ```bash + cd python + pip install --user pre-commit + pre-commit install + ``` + + Afterwards, pre-commit will run whenever you commit. + +- To run formatting and code-style separately, you can configure your IDE, such as VSCode, to use `ruff`, or run it via the command line: + + ```bash + # linting + ruff check python --fix + # formatting + ruff format python + ``` + +### Python documentation + +We follow a few best practices for writing the Python documentation: + +1. Use the google docstring style: + + ```python + """[One Line Summary] + + [Extended Summary] + + [!!! example + import xyz + ] + + # Arguments + arg1: Type[, optional]. Description[, defaults to `default`] + arg2: Type[, optional]. Description[, defaults to `default`] + + # Returns + Type. Description. + + # Raises + Exception. Description. + """ + ``` + + If Python 3 type annotations are used, they are inserted automatically. + + +2. Hopsworks entity engine methods (e.g. ExecutionEngine etc.) only require a single line docstring. +3. Private REST Api implementations (e.g. GitRemoteApi etc.) should be fully documented with docstrings without defaults. +4. Public Api such as metadata objects and public REST Api implementations should be fully documented with defaults. + +#### Setup and Build Documentation + +We use `mkdocs` together with `mike` ([for versioning](https://github.com/jimporter/mike/)) to build the documentation and a plugin called `keras-autodoc` to auto generate Python API documentation from docstrings. + +**Background about `mike`:** + `mike` builds the documentation and commits it as a new directory to the gh-pages branch. Each directory corresponds to one version of the documentation. Additionally, `mike` maintains a json in the root of gh-pages with the mappings of versions/aliases for each of the directories available. With aliases you can define extra names like `dev` or `latest`, to indicate stable and unstable releases. + +1. Currently we are using our own version of `keras-autodoc` + + ```bash + pip install git+https://github.com/logicalclocks/keras-autodoc + ``` + +2. Install HOPSWORKS with `docs` extras: + + ```bash + pip install -e .[dev,docs] + ``` + +3. To build the docs, first run the auto doc script: + + ```bash + cd .. + python auto_doc.py + ``` + +##### Option 1: Build only current version of docs + +4. Either build the docs, or serve them dynamically: + + Note: Links and pictures might not resolve properly later on when checking with this build. + The reason for that is that the docs are deployed with versioning on docs.hopsworks.ai and + therefore another level is added to all paths, e.g. `docs.hopsworks.ai/[version-or-alias]`. + Using relative links should not be affected by this, however, building the docs with version + (Option 2) is recommended. + + ```bash + mkdocs build + # or + mkdocs serve + ``` + +##### Option 2 (Preferred): Build multi-version doc with `mike` + +###### Versioning on docs.hopsworks.ai + +On docs.hopsworks.ai we implement the following versioning scheme: + +- current master branches (e.g. of hopsworks corresponding to master of Hopsworks): rendered as current Hopsworks snapshot version, e.g. **3.1.0-SNAPSHOT [dev]**, where `dev` is an alias to indicate that this is an unstable version. +- the latest release: rendered with full current version, e.g. **3.0.1 [latest]** with `latest` alias to indicate that this is the latest stable release. +- previous stable releases: rendered without alias, e.g. **3.0.0**. + +###### Build Instructions + +4. For this you can either checkout and make a local copy of the `upstream/gh-pages` branch, where +`mike` maintains the current state of docs.hopsworks.ai, or just build documentation for the branch you are updating: + + Building *one* branch: + + Checkout your dev branch with modified docs: + ```bash + git checkout [dev-branch] + ``` + + Generate API docs if necessary: + ```bash + python auto_doc.py + ``` + + Build docs with a version and alias + ```bash + mike deploy [version] [alias] --update-alias + + # for example, if you are updating documentation to be merged to master, + # which will become the new SNAPSHOT version: + mike deploy 3.1.0-SNAPSHOT dev --update-alias + + # if you are updating docs of the latest stable release branch + mike deploy [version] latest --update-alias + + # if you are updating docs of a previous stable release branch + mike deploy [version] + ``` + + If no gh-pages branch existed in your local repository, this will have created it. + + **Important**: If no previous docs were built, you will have to choose a version as default to be loaded as index, as follows + + ```bash + mike set-default [version-or-alias] + ``` + + You can now checkout the gh-pages branch and serve: + ```bash + git checkout gh-pages + mike serve + ``` + + You can also list all available versions/aliases: + ```bash + mike list + ``` + + Delete and reset your local gh-pages branch: + ```bash + mike delete --all + + # or delete single version + mike delete [version-or-alias] + ``` + +#### Adding new API documentation + +To add new documentation for APIs, you need to add information about the method/class to document to the `auto_doc.py` script: + +```python +PAGES = { + "connection.md": [ + "hopsworks.connection.Connection.connection" + ] + "new_template.md": [ + "module", + "xyz.asd" + ] +} +``` + +Now you can add a template markdown file to the `docs/templates` directory with the name you specified in the auto-doc script. The `new_template.md` file should contain a tag to identify the place at which the API documentation should be inserted: + +``` +## The XYZ package + +{{module}} + +Some extra content here. + +!!! example + ```python + import xyz + ``` + +{{xyz.asd}} +``` + +Finally, run the `auto_doc.py` script, as decribed above, to update the documentation. + +For information about Markdown syntax and possible Admonitions/Highlighting etc. see +the [Material for Mkdocs themes reference documentation](https://squidfunk.github.io/mkdocs-material/reference/abbreviations/). diff --git a/.github/pull_request_template.md b/hsfs/.github/pull_request_template.md similarity index 100% rename from .github/pull_request_template.md rename to hsfs/.github/pull_request_template.md diff --git a/.github/workflows/java.yml b/hsfs/.github/workflows/java-ut.yml similarity index 84% rename from .github/workflows/java.yml rename to hsfs/.github/workflows/java-ut.yml index 616a46773..f83f62caf 100644 --- a/.github/workflows/java.yml +++ b/hsfs/.github/workflows/java-ut.yml @@ -3,8 +3,8 @@ name: java on: pull_request jobs: - unit_tests: - name: Unit Tests + unit_tests_utc: + name: Java Unit Tests runs-on: ubuntu-latest steps: @@ -17,8 +17,8 @@ jobs: - name: Set up JDK 8 uses: actions/setup-java@v3 with: - java-version: "8" - distribution: "adopt" + java-version: '8' + distribution: 'adopt' - name: Cache local Maven repository uses: actions/cache@v2 @@ -32,8 +32,8 @@ jobs: working-directory: ./java run: mvn clean test - unit_tests_local_tz: - name: Unit Tests (Local TZ) + unit_tests_local: + name: Java Unit Tests (Local TZ) runs-on: ubuntu-latest steps: @@ -46,8 +46,8 @@ jobs: - name: Set up JDK 8 uses: actions/setup-java@v3 with: - java-version: "8" - distribution: "adopt" + java-version: '8' + distribution: 'adopt' - name: Cache local Maven repository uses: actions/cache@v2 diff --git a/hsfs/.github/workflows/mkdocs-master.yml b/hsfs/.github/workflows/mkdocs-master.yml new file mode 100644 index 000000000..1c904ad28 --- /dev/null +++ b/hsfs/.github/workflows/mkdocs-master.yml @@ -0,0 +1,53 @@ +name: mkdocs-master + +on: pull_request + +jobs: + publish-master: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: set dev version + working-directory: ./java + run: echo "DEV_VERSION=$(mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | grep -Ev 'Download|INFO|WARNING')" >> $GITHUB_ENV + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: install deps + working-directory: ./python + run: cp ../README.md . && pip3 install -r ../requirements-docs.txt && pip3 install -e .[python,dev] + + - name: generate autodoc + run: python3 auto_doc.py + + - name: Cache local Maven repository + uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('java/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: Set up JDK 8 + uses: actions/setup-java@v3 + with: + java-version: "8" + distribution: "adopt" + + - name: Build java doc documentation + working-directory: ./java + run: mvn clean install javadoc:javadoc javadoc:aggregate -DskipTests && cp -r target/site/apidocs ../docs/javadoc + + - name: setup git + run: | + git config --global user.name Mike + git config --global user.email mike@docs.hopsworks.ai + + - name: mike deploy docs + run: mike deploy ${{ env.DEV_VERSION }} dev -u diff --git a/hsfs/.github/workflows/mkdocs-release.yml b/hsfs/.github/workflows/mkdocs-release.yml new file mode 100644 index 000000000..66ca638ae --- /dev/null +++ b/hsfs/.github/workflows/mkdocs-release.yml @@ -0,0 +1,59 @@ +name: mkdocs-release + +on: + push: + branches: [branch-*] + +jobs: + publish-release: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: set major/minor/bugfix release version + working-directory: ./java + run: echo "RELEASE_VERSION=$(mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | grep -Ev 'Download|INFO|WARNING')" >> $GITHUB_ENV + + - name: set major/minor release version + run: echo "MAJOR_VERSION=$(echo $RELEASE_VERSION | sed 's/^\([0-9]*\.[0-9]*\).*$/\1/')" >> $GITHUB_ENV + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: install deps + working-directory: ./python + run: cp ../README.md . && pip3 install -r ../requirements-docs.txt && pip3 install -e .[python,dev] + + - name: generate autodoc + run: python3 auto_doc.py + + - name: Cache local Maven repository + uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('java/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: Set up JDK 8 + uses: actions/setup-java@v3 + with: + java-version: "8" + distribution: "adopt" + + - name: Build java doc documentation + working-directory: ./java + run: mvn clean install javadoc:javadoc javadoc:aggregate -DskipTests && cp -r target/site/apidocs ../docs/javadoc + + - name: setup git + run: | + git config --global user.name Mike + git config --global user.email mike@docs.hopsworks.ai + - name: mike deploy docs + run: | + mike deploy ${{ env.RELEASE_VERSION }} ${{ env.MAJOR_VERSION }} -u --push + mike alias ${{ env.RELEASE_VERSION }} latest -u --push diff --git a/hsfs/.github/workflows/optional-dependency.yml b/hsfs/.github/workflows/optional-dependency.yml new file mode 100644 index 000000000..547b02029 --- /dev/null +++ b/hsfs/.github/workflows/optional-dependency.yml @@ -0,0 +1,29 @@ +name: optional-dependency + +on: pull_request + +jobs: + unit_tests_no_great_expectations: + name: Unit Testing (No Great Expectations) + runs-on: ubuntu-latest + + steps: + - name: Set Timezone + run: sudo timedatectl set-timezone UTC + + - uses: actions/checkout@v4 + - name: Copy README + run: cp README.md python/ + + - uses: actions/setup-python@v5 + name: Setup Python + with: + python-version: "3.10" + cache: "pip" + cache-dependency-path: "python/setup.py" + - run: pip install -e python[python,dev-no-opt] + + - name: Run Pytest suite + env: + ENABLE_HOPSWORKS_USAGE: "false" + run: pytest python/tests \ No newline at end of file diff --git a/.github/workflows/python.yml b/hsfs/.github/workflows/python-lint.yml similarity index 78% rename from .github/workflows/python.yml rename to hsfs/.github/workflows/python-lint.yml index 0c5f12c32..f638b0128 100644 --- a/.github/workflows/python.yml +++ b/hsfs/.github/workflows/python-lint.yml @@ -2,10 +2,6 @@ name: python on: pull_request -env: - APP_API_KEY: ${{ secrets.APP_API_KEY }} - ENABLE_HOPSWORKS_USAGE: "false" - jobs: lint_stylecheck: name: Lint and Stylecheck @@ -35,25 +31,22 @@ jobs: - name: ruff on python files if: steps.get-changed-files.outputs.src_any_changed == 'true' env: - SRC_ALL_CHANGED_FILES: - ${{ steps.get-changed-files.outputs.src_all_changed_files }} + SRC_ALL_CHANGED_FILES: ${{ steps.get-changed-files.outputs.src_all_changed_files }} run: ruff check --output-format=github $SRC_ALL_CHANGED_FILES - name: ruff on test files if: steps.get-changed-files.outputs.test_any_changed == 'true' env: - TEST_ALL_CHANGED_FILES: - ${{ steps.get-changed-files.outputs.test_all_changed_files }} + TEST_ALL_CHANGED_FILES: ${{ steps.get-changed-files.outputs.test_all_changed_files }} run: ruff check --output-format=github $TEST_ALL_CHANGED_FILES - name: ruff format --check $ALL_CHANGED_FILES env: - ALL_CHANGED_FILES: - ${{ steps.get-changed-files.outputs.all_changed_files }} + ALL_CHANGED_FILES: ${{ steps.get-changed-files.outputs.all_changed_files }} run: ruff format $ALL_CHANGED_FILES - unit_tests: - name: Unit Tests + unit_tests_ubuntu_utc: + name: Unit Testing (Ubuntu) needs: lint_stylecheck runs-on: ubuntu-latest strategy: @@ -80,34 +73,12 @@ jobs: run: python --version - name: Run Pytest suite + env: + ENABLE_HOPSWORKS_USAGE: "false" run: pytest python/tests - unit_tests_no_opt: - name: Unit Tests (No Optional Dependencies) - needs: lint_stylecheck - runs-on: ubuntu-latest - - steps: - - name: Set Timezone - run: sudo timedatectl set-timezone UTC - - - uses: actions/checkout@v4 - - name: Copy README - run: cp README.md python/ - - - uses: actions/setup-python@v5 - name: Setup Python - with: - python-version: "3.10" - cache: "pip" - cache-dependency-path: "python/setup.py" - - run: pip install -e python[python,dev-no-opt] - - - name: Run Pytest suite - run: pytest python/tests - - unit_tests_pandas1: - name: Unit Tests (Pandas 1.x) + unit_tests_ubuntu_pandas: + name: Unit Testing (Ubuntu) (Pandas 1.x) needs: lint_stylecheck runs-on: ubuntu-latest @@ -131,10 +102,12 @@ jobs: run: python --version - name: Run Pytest suite + env: + ENABLE_HOPSWORKS_USAGE: "false" run: pytest python/tests - unit_tests_local_tz: - name: Unit Tests (Local TZ) + unit_tests_ubuntu_local: + name: Unit Testing (Ubuntu) (Local TZ) needs: lint_stylecheck runs-on: ubuntu-latest @@ -158,10 +131,12 @@ jobs: run: python --version - name: Run Pytest suite + env: + ENABLE_HOPSWORKS_USAGE: "false" run: pytest python/tests - unit_tests_typechecked: - name: Unit Tests (Typechecked) + unit_tests_ubuntu_typechecked: + name: Typechecked Unit Testing (Ubuntu) needs: lint_stylecheck runs-on: ubuntu-latest @@ -180,12 +155,13 @@ jobs: - name: Run Pytest suite env: + ENABLE_HOPSWORKS_USAGE: "false" HOPSWORKS_RUN_WITH_TYPECHECK: "true" run: pytest python/tests continue-on-error: true - unit_tests_windows: - name: Unit Tests (Windows) + unit_tests_windows_utc: + name: Unit Testing (Windows) needs: lint_stylecheck runs-on: windows-latest @@ -209,10 +185,12 @@ jobs: run: python --version - name: Run Pytest suite + env: + ENABLE_HOPSWORKS_USAGE: "false" run: pytest python/tests - unit_tests_windows_local_tz: - name: Unit Tests (Windows) (Local TZ) + unit_tests_windows_local: + name: Unit Testing (Windows) (Local TZ) needs: lint_stylecheck runs-on: windows-latest @@ -239,4 +217,6 @@ jobs: run: pip freeze - name: Run Pytest suite + env: + ENABLE_HOPSWORKS_USAGE: "false" run: pytest python/tests diff --git a/hsfs/.gitignore b/hsfs/.gitignore new file mode 100644 index 000000000..a8b4c5683 --- /dev/null +++ b/hsfs/.gitignore @@ -0,0 +1,145 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +python/README.md +python/LICENSE + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ +.ruff_cache/ +bigquery.json +metastore_db/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# Mike Javadoc +docs/javadoc + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Java +.idea +.vscode +*.iml +target/ + +# Mac +.DS_Store + +# mkdocs intemediate files +docs/generated + +# Test artifacts +keyFile.json + +# delombok dir +delombok + +# dev scripts dir +dev_scripts/ +dev_tools/ diff --git a/hsfs/CONTRIBUTING.md b/hsfs/CONTRIBUTING.md new file mode 100644 index 000000000..0df3de08e --- /dev/null +++ b/hsfs/CONTRIBUTING.md @@ -0,0 +1,220 @@ +## Python development setup + +--- + +- Fork and clone the repository + +- Create a new Python environment with your favourite environment manager (e.g. virtualenv or conda) and Python 3.9 (newer versions will return a library conflict in `auto_doc.py`) + +- Install repository in editable mode with development dependencies: + + ```bash + cd python + pip install -e ".[python,dev]" + ``` + +- Install [pre-commit](https://pre-commit.com/) and then activate its hooks. pre-commit is a framework for managing and maintaining multi-language pre-commit hooks. The Feature Store uses pre-commit to ensure code-style and code formatting through [ruff](https://docs.astral.sh/ruff/). Run the following commands from the `python` directory: + + ```bash + cd python + pip install --user pre-commit + pre-commit install + ``` + + Afterwards, pre-commit will run whenever you commit. + +- To run formatting and code-style separately, you can configure your IDE, such as VSCode, to use `ruff`, or run it via the command line: + + ```bash + # linting + ruff check python --fix + # formatting + ruff format python + ``` + +### Python documentation + +We follow a few best practices for writing the Python documentation: + +1. Use the google docstring style: + + ```python + """[One Line Summary] + + [Extended Summary] + + [!!! example + import xyz + ] + + # Arguments + arg1: Type[, optional]. Description[, defaults to `default`] + arg2: Type[, optional]. Description[, defaults to `default`] + + # Returns + Type. Description. + + # Raises + Exception. Description. + """ + ``` + + If Python 3 type annotations are used, they are inserted automatically. + +2. Feature store entity engine methods (e.g. FeatureGroupEngine etc.) only require a single line docstring. +3. REST Api implementations (e.g. FeatureGroupApi etc.) should be fully documented with docstrings without defaults. +4. Public Api such as metadata objects should be fully documented with defaults. + +#### Setup and Build Documentation + +We use `mkdocs` together with `mike` ([for versioning](https://github.com/jimporter/mike/)) to build the documentation and a plugin called `keras-autodoc` to auto generate Python API documentation from docstrings. + +**Background about `mike`:** +`mike` builds the documentation and commits it as a new directory to the gh-pages branch. Each directory corresponds to one version of the documentation. Additionally, `mike` maintains a json in the root of gh-pages with the mappings of versions/aliases for each of the directories available. With aliases you can define extra names like `dev` or `latest`, to indicate stable and unstable releases. + +1. Currently we are using our own version of `keras-autodoc` + + ```bash + pip install git+https://github.com/logicalclocks/keras-autodoc + ``` + +2. Install HSFS with `docs` extras: + + ```bash + pip install -e ".[python,dev]" && pip install -r ../requirements-docs.txt + ``` + +3. To build the docs, first run the auto doc script: + + ```bash + cd .. + python auto_doc.py + ``` + +##### Option 1: Build only current version of docs + +4. Either build the docs, or serve them dynamically: + + Note: Links and pictures might not resolve properly later on when checking with this build. + The reason for that is that the docs are deployed with versioning on docs.hopsworks.ai and + therefore another level is added to all paths, e.g. `docs.hopsworks.ai/[version-or-alias]`. + Using relative links should not be affected by this, however, building the docs with version + (Option 2) is recommended. + + ```bash + mkdocs build + # or + mkdocs serve + ``` + +##### Option 2 (Preferred): Build multi-version doc with `mike` + +###### Versioning on docs.hopsworks.ai + +On docs.hopsworks.ai we implement the following versioning scheme: + +- current master branches (e.g. of hsfs corresponding to master of Hopsworks): rendered as current Hopsworks snapshot version, e.g. **2.2.0-SNAPSHOT [dev]**, where `dev` is an alias to indicate that this is an unstable version. +- the latest release: rendered with full current version, e.g. **2.1.5 [latest]** with `latest` alias to indicate that this is the latest stable release. +- previous stable releases: rendered without alias, e.g. **2.1.4**. + +###### Build Instructions + +4. For this you can either checkout and make a local copy of the `upstream/gh-pages` branch, where `mike` maintains the current state of docs.hopsworks.ai, or just build documentation for the branch you are updating: + + Building _one_ branch: + + Checkout your dev branch with modified docs: + + ```bash + git checkout [dev-branch] + ``` + + Generate API docs if necessary: + + ```bash + python auto_doc.py + ``` + + Build docs with a version and alias + + ```bash + mike deploy [version] [alias] --update-alias + + # for example, if you are updating documentation to be merged to master, + # which will become the new SNAPSHOT version: + mike deploy 2.2.0-SNAPSHOT dev --update-alias + + # if you are updating docs of the latest stable release branch + mike deploy [version] latest --update-alias + + # if you are updating docs of a previous stable release branch + mike deploy [version] + ``` + + If no gh-pages branch existed in your local repository, this will have created it. + + **Important**: If no previous docs were built, you will have to choose a version as default to be loaded as index, as follows + + ```bash + mike set-default [version-or-alias] + ``` + + You can now checkout the gh-pages branch and serve: + + ```bash + git checkout gh-pages + mike serve + ``` + + You can also list all available versions/aliases: + + ```bash + mike list + ``` + + Delete and reset your local gh-pages branch: + + ```bash + mike delete --all + + # or delete single version + mike delete [version-or-alias] + ``` + +#### Adding new API documentation + +To add new documentation for APIs, you need to add information about the method/class to document to the `auto_doc.py` script: + +```python +PAGES = { + "connection.md": [ + "hsfs.connection.Connection.connection" + ] + "new_template.md": [ + "module", + "xyz.asd" + ] +} +``` + +Now you can add a template markdown file to the `docs/templates` directory with the name you specified in the auto-doc script. The `new_template.md` file should contain a tag to identify the place at which the API documentation should be inserted: + +```` +## The XYZ package + +{{module}} + +Some extra content here. + +!!! example + ```python + import xyz + ``` + +{{xyz.asd}} +```` + +Finally, run the `auto_doc.py` script, as decribed above, to update the documentation. + +For information about Markdown syntax and possible Admonitions/Highlighting etc. see +the [Material for Mkdocs themes reference documentation](https://squidfunk.github.io/mkdocs-material/reference/abbreviations/). diff --git a/Dockerfile b/hsfs/Dockerfile similarity index 100% rename from Dockerfile rename to hsfs/Dockerfile diff --git a/hsfs/Jenkinsfile b/hsfs/Jenkinsfile new file mode 100644 index 000000000..d2014d5cb --- /dev/null +++ b/hsfs/Jenkinsfile @@ -0,0 +1,23 @@ +pipeline { + agent { + docker { + label "local" + image "docker.hops.works/hopsworks_twine:0.0.1" + } + } + stages { + stage("publish") { + environment { + PYPI = credentials('977daeb0-e1c8-43a0-b35a-fc37bb9eee9b') + } + steps { + dir("python") { + sh "rm -f LICENSE README.md" + sh "cp -f ../LICENSE ../README.md ./" + sh "python3 -m build" + sh "twine upload -u $PYPI_USR -p $PYPI_PSW --skip-existing dist/*" + } + } + } + } +} diff --git a/hsfs/LICENSE b/hsfs/LICENSE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/hsfs/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/hsfs/README.md b/hsfs/README.md new file mode 100644 index 000000000..a13ea2ce5 --- /dev/null +++ b/hsfs/README.md @@ -0,0 +1,201 @@ +# Hopsworks Feature Store + +

+ Hopsworks Community + Hopsworks Feature Store Documentation + python + PyPiStatus + Scala/Java Artifacts + Downloads + Ruff + License +

+ +HSFS is the library to interact with the Hopsworks Feature Store. The library makes creating new features, feature groups and training datasets easy. + +The library is environment independent and can be used in two modes: + +- Spark mode: For data engineering jobs that create and write features into the feature store or generate training datasets. It requires a Spark environment such as the one provided in the Hopsworks platform or Databricks. In Spark mode, HSFS provides bindings both for Python and JVM languages. + +- Python mode: For data science jobs to explore the features available in the feature store, generate training datasets and feed them in a training pipeline. Python mode requires just a Python interpreter and can be used both in Hopsworks from Python Jobs/Jupyter Kernels, Amazon SageMaker or KubeFlow. + +The library automatically configures itself based on the environment it is run. +However, to connect from an external environment such as Databricks or AWS Sagemaker, +additional connection information, such as host and port, is required. For more information checkout the [Hopsworks documentation](https://docs.hopsworks.ai/latest/). + +## Getting Started On Hopsworks + +Get started easily by registering an account on [Hopsworks Serverless](https://app.hopsworks.ai/). Create your project and a [new Api key](https://docs.hopsworks.ai/latest/user_guides/projects/api_key/create_api_key/). In a new python environment with Python 3.8 or higher, install the [client library](https://docs.hopsworks.ai/latest/user_guides/client_installation/) using pip: + +```bash +# Get all Hopsworks SDKs: Feature Store, Model Serving and Platform SDK +pip install hopsworks +# or minimum install with the Feature Store SDK +pip install hsfs[python] +# if using zsh don't forget the quotes +pip install 'hsfs[python]' +``` + +You can start a notebook and instantiate a connection and get the project feature store handler. + +```python +import hopsworks + +project = hopsworks.login() # you will be prompted for your api key +fs = project.get_feature_store() +``` + +or using `hsfs` directly: + +```python +import hsfs + +connection = hsfs.connection( + host="c.app.hopsworks.ai", # + project="your-project", + api_key_value="your-api-key", +) +fs = connection.get_feature_store() +``` + +Create a new feature group to start inserting feature values. +```python +fg = fs.create_feature_group("rain", + version=1, + description="Rain features", + primary_key=['date', 'location_id'], + online_enabled=True) + +fg.save(dataframe) +``` + +Upsert new data in to the feature group with `time_travel_format="HUDI"`". +```python +fg.insert(upsert_df) +``` + +Retrieve commit timeline metdata of the feature group with `time_travel_format="HUDI"`". +```python +fg.commit_details() +``` + +"Reading feature group as of specific point in time". +```python +fg = fs.get_feature_group("rain", 1) +fg.read("2020-10-20 07:34:11").show() +``` + +Read updates that occurred between specified points in time. +```python +fg = fs.get_feature_group("rain", 1) +fg.read_changes("2020-10-20 07:31:38", "2020-10-20 07:34:11").show() +``` + +Join features together +```python +feature_join = rain_fg.select_all() + .join(temperature_fg.select_all(), on=["date", "location_id"]) + .join(location_fg.select_all()) +feature_join.show(5) +``` + +join feature groups that correspond to specific point in time +```python +feature_join = rain_fg.select_all() + .join(temperature_fg.select_all(), on=["date", "location_id"]) + .join(location_fg.select_all()) + .as_of("2020-10-31") +feature_join.show(5) +``` + +join feature groups that correspond to different time +```python +rain_fg_q = rain_fg.select_all().as_of("2020-10-20 07:41:43") +temperature_fg_q = temperature_fg.select_all().as_of("2020-10-20 07:32:33") +location_fg_q = location_fg.select_all().as_of("2020-10-20 07:33:08") +joined_features_q = rain_fg_q.join(temperature_fg_q).join(location_fg_q) +``` + +Use the query object to create a training dataset: +```python +td = fs.create_training_dataset("rain_dataset", + version=1, + data_format="tfrecords", + description="A test training dataset saved in TfRecords format", + splits={'train': 0.7, 'test': 0.2, 'validate': 0.1}) + +td.save(feature_join) +``` + +A short introduction to the Scala API: +```scala +import com.logicalclocks.hsfs._ +val connection = HopsworksConnection.builder().build() +val fs = connection.getFeatureStore(); +val attendances_features_fg = fs.getFeatureGroup("games_features", 1); +attendances_features_fg.show(1) +``` + +You can find more examples on how to use the library in our [hops-examples](https://github.com/logicalclocks/hops-examples) repository. + +## Usage + +Usage data is collected for improving quality of the library. It is turned on by default if the backend +is "c.app.hopsworks.ai". To turn it off, use one of the following way: +```python +# use environment variable +import os +os.environ["ENABLE_HOPSWORKS_USAGE"] = "false" + +# use `disable_usage_logging` +import hsfs +hsfs.disable_usage_logging() +``` + +The source code can be found in python/hsfs/usage.py. + +## Documentation + +Documentation is available at [Hopsworks Feature Store Documentation](https://docs.hopsworks.ai/). + +## Issues + +For general questions about the usage of Hopsworks and the Feature Store please open a topic on [Hopsworks Community](https://community.hopsworks.ai/). + +Please report any issue using [Github issue tracking](https://github.com/logicalclocks/feature-store-api/issues). + +Please attach the client environment from the output below in the issue: +```python +import hopsworks +import hsfs +hopsworks.login().get_feature_store() +print(hsfs.get_env()) +``` + +## Contributing + +If you would like to contribute to this library, please see the [Contribution Guidelines](CONTRIBUTING.md). diff --git a/hsfs/auto_doc.py b/hsfs/auto_doc.py new file mode 100644 index 000000000..a98af258b --- /dev/null +++ b/hsfs/auto_doc.py @@ -0,0 +1,384 @@ +import os +import pathlib +import shutil + +import keras_autodoc + +PAGES = { + "api/connection_api.md": { + "connection": ["hsfs.connection.Connection"], + "connection_properties": keras_autodoc.get_properties( + "hsfs.connection.Connection" + ), + "connection_methods": keras_autodoc.get_methods("hsfs.connection.Connection"), + }, + "api/spine_group_api.md": { + "fg": ["hsfs.feature_group.SpineGroup"], + "fg_create": ["hsfs.feature_store.FeatureStore.get_or_create_spine_group"], + "fg_get": ["hsfs.feature_store.FeatureStore.get_or_create_spine_group"], + "fg_properties": keras_autodoc.get_properties( + "hsfs.feature_group.SpineGroup", + exclude=[ + "expectation_suite", + "location", + "online_enabled", + "statistics", + "statistics_config", + "subject", + ], + ), + "fg_methods": keras_autodoc.get_methods( + "hsfs.feature_group.SpineGroup", + exclude=[ + "append_features", + "compute_statistics", + "delete_expectation_suite", + "from_response_json", + "get_all_validation_reports", + "get_expectation_suite", + "get_latest_validation_report", + "get_statistics", + "get_validation_history", + "save_expectation_suite", + "save_validation_report", + "update_from_response_json", + "update_statistics_config", + "validate", + ], + ), + }, + "api/training_dataset_api.md": { + "td": ["hsfs.training_dataset.TrainingDataset"], + "td_create": ["hsfs.feature_store.FeatureStore.create_training_dataset"], + "td_get": ["hsfs.feature_store.FeatureStore.get_training_dataset"], + "td_properties": keras_autodoc.get_properties( + "hsfs.training_dataset.TrainingDataset" + ), + "td_methods": keras_autodoc.get_methods( + "hsfs.training_dataset.TrainingDataset" + ), + }, + "api/feature_view_api.md": { + "fv": ["hsfs.feature_view.FeatureView"], + "fv_create": ["hsfs.feature_store.FeatureStore.create_feature_view"], + "fv_get": ["hsfs.feature_store.FeatureStore.get_feature_view"], + "fvs_get": ["hsfs.feature_store.FeatureStore.get_feature_views"], + "fv_properties": keras_autodoc.get_properties("hsfs.feature_view.FeatureView"), + "fv_methods": keras_autodoc.get_methods("hsfs.feature_view.FeatureView"), + }, + "api/feature_api.md": { + "feature": ["hsfs.feature.Feature"], + "feature_properties": keras_autodoc.get_properties("hsfs.feature.Feature"), + "feature_methods": keras_autodoc.get_methods("hsfs.feature.Feature"), + }, + "api/expectation_suite_api.md": { + "expectation_suite": ["hsfs.expectation_suite.ExpectationSuite"], + "expectation_suite_attach": [ + "hsfs.feature_group.FeatureGroup.save_expectation_suite" + ], + "single_expectation_api": [ + "hsfs.expectation_suite.ExpectationSuite.add_expectation", + "hsfs.expectation_suite.ExpectationSuite.replace_expectation", + "hsfs.expectation_suite.ExpectationSuite.remove_expectation", + ], + "expectation_suite_properties": keras_autodoc.get_properties( + "hsfs.expectation_suite.ExpectationSuite" + ), + "expectation_suite_methods": keras_autodoc.get_methods( + "hsfs.expectation_suite.ExpectationSuite" + ), + }, + "api/feature_store_api.md": { + "fs": ["hsfs.feature_store.FeatureStore"], + "fs_get": ["hsfs.connection.Connection.get_feature_store"], + "fs_properties": keras_autodoc.get_properties( + "hsfs.feature_store.FeatureStore" + ), + "fs_methods": keras_autodoc.get_methods("hsfs.feature_store.FeatureStore"), + }, + "api/feature_group_api.md": { + "fg": ["hsfs.feature_group.FeatureGroup"], + "fg_create": [ + "hsfs.feature_store.FeatureStore.create_feature_group", + "hsfs.feature_store.FeatureStore.get_or_create_feature_group", + ], + "fg_get": ["hsfs.feature_store.FeatureStore.get_feature_group"], + "fg_properties": keras_autodoc.get_properties( + "hsfs.feature_group.FeatureGroup" + ), + "fg_methods": keras_autodoc.get_methods("hsfs.feature_group.FeatureGroup"), + }, + "api/external_feature_group_api.md": { + "fg": ["hsfs.feature_group.ExternalFeatureGroup"], + "fg_create": ["hsfs.feature_store.FeatureStore.create_external_feature_group"], + "fg_get": ["hsfs.feature_store.FeatureStore.get_external_feature_group"], + "fg_properties": keras_autodoc.get_properties( + "hsfs.feature_group.ExternalFeatureGroup" + ), + "fg_methods": keras_autodoc.get_methods( + "hsfs.feature_group.ExternalFeatureGroup" + ), + }, + "api/storage_connector_api.md": { + "sc_get": [ + "hsfs.feature_store.FeatureStore.get_storage_connector", + "hsfs.feature_store.FeatureStore.get_online_storage_connector", + ], + "hopsfs_methods": keras_autodoc.get_methods( + "hsfs.storage_connector.HopsFSConnector", exclude=["from_response_json"] + ), + "hopsfs_properties": keras_autodoc.get_properties( + "hsfs.storage_connector.HopsFSConnector" + ), + "s3_methods": keras_autodoc.get_methods( + "hsfs.storage_connector.S3Connector", exclude=["from_response_json"] + ), + "s3_properties": keras_autodoc.get_properties( + "hsfs.storage_connector.S3Connector" + ), + "redshift_methods": keras_autodoc.get_methods( + "hsfs.storage_connector.RedshiftConnector", exclude=["from_response_json"] + ), + "redshift_properties": keras_autodoc.get_properties( + "hsfs.storage_connector.RedshiftConnector" + ), + "adls_methods": keras_autodoc.get_methods( + "hsfs.storage_connector.AdlsConnector", exclude=["from_response_json"] + ), + "adls_properties": keras_autodoc.get_properties( + "hsfs.storage_connector.AdlsConnector" + ), + "snowflake_methods": keras_autodoc.get_methods( + "hsfs.storage_connector.SnowflakeConnector", exclude=["from_response_json"] + ), + "snowflake_properties": keras_autodoc.get_properties( + "hsfs.storage_connector.SnowflakeConnector" + ), + "jdbc_methods": keras_autodoc.get_methods( + "hsfs.storage_connector.JdbcConnector", exclude=["from_response_json"] + ), + "jdbc_properties": keras_autodoc.get_properties( + "hsfs.storage_connector.JdbcConnector" + ), + "gcs_methods": keras_autodoc.get_methods( + "hsfs.storage_connector.GcsConnector", exclude=["from_response_json"] + ), + "gcs_properties": keras_autodoc.get_properties( + "hsfs.storage_connector.GcsConnector" + ), + "bigquery_methods": keras_autodoc.get_methods( + "hsfs.storage_connector.BigQueryConnector", exclude=["from_response_json"] + ), + "bigquery_properties": keras_autodoc.get_properties( + "hsfs.storage_connector.BigQueryConnector" + ), + "kafka_methods": keras_autodoc.get_methods( + "hsfs.storage_connector.KafkaConnector", exclude=["from_response_json"] + ), + "kafka_properties": keras_autodoc.get_properties( + "hsfs.storage_connector.KafkaConnector" + ), + }, + "api/statistics_config_api.md": { + "statistics_config": ["hsfs.statistics_config.StatisticsConfig"], + "statistics_config_properties": keras_autodoc.get_properties( + "hsfs.statistics_config.StatisticsConfig" + ), + }, + "api/transformation_functions_api.md": { + "transformation_function": [ + "hsfs.transformation_function.TransformationFunction" + ], + "transformation_function_properties": keras_autodoc.get_properties( + "hsfs.transformation_function.TransformationFunction" + ), + "transformation_function_methods": keras_autodoc.get_methods( + "hsfs.transformation_function.TransformationFunction", + exclude=[ + "from_response_json", + "update_from_response_json", + "json", + "to_dict", + ], + ), + "create_transformation_function": [ + "hsfs.feature_store.FeatureStore.create_transformation_function" + ], + "get_transformation_function": [ + "hsfs.feature_store.FeatureStore.get_transformation_function" + ], + "get_transformation_functions": [ + "hsfs.feature_store.FeatureStore.get_transformation_functions" + ], + }, + "api/validation_report_api.md": { + "validation_report": ["hsfs.validation_report.ValidationReport"], + "validation_report_validate": [ + "hsfs.feature_group.FeatureGroup.validate", + "hsfs.feature_group.FeatureGroup.insert", + ], + "validation_report_get": [ + "hsfs.feature_group.FeatureGroup.get_latest_validation_report", + "hsfs.feature_group.FeatureGroup.get_all_validation_reports", + ], + "validation_report_properties": keras_autodoc.get_properties( + "hsfs.validation_report.ValidationReport" + ), + "validation_report_methods": keras_autodoc.get_methods( + "hsfs.validation_report.ValidationReport" + ), + }, + "api/job.md": { + "job_configuration": ["hsfs.core.job_configuration.JobConfiguration"], + "job": ["hsfs.core.job.Job"], + "job_methods": [ + "hsfs.core.job.Job.get_state", + "hsfs.core.job.Job.get_final_state", + ], + }, + "api/query_api.md": { + "query_methods": keras_autodoc.get_methods( + "hsfs.constructor.query.Query", + exclude=["json", "to_dict"], + ), + "query_properties": keras_autodoc.get_properties( + "hsfs.constructor.query.Query" + ), + }, + "api/links.md": { + "links_properties": keras_autodoc.get_properties( + "hsfs.core.explicit_provenance.Links" + ), + "artifact_properties": keras_autodoc.get_properties( + "hsfs.core.explicit_provenance.Artifact" + ), + }, + "api/statistics_api.md": { + "statistics": ["hsfs.statistics.Statistics"], + "statistics_properties": keras_autodoc.get_properties( + "hsfs.statistics.Statistics" + ), + }, + "api/split_statistics_api.md": { + "split_statistics": ["hsfs.split_statistics.SplitStatistics"], + "split_statistics_properties": keras_autodoc.get_properties( + "hsfs.split_statistics.SplitStatistics" + ), + }, + "api/feature_descriptive_statistics_api.md": { + "feature_descriptive_statistics": [ + "hsfs.core.feature_descriptive_statistics.FeatureDescriptiveStatistics" + ], + "feature_descriptive_statistics_properties": keras_autodoc.get_properties( + "hsfs.core.feature_descriptive_statistics.FeatureDescriptiveStatistics" + ), + }, + "api/feature_monitoring_config_api.md": { + "feature_monitoring_config": [ + "hsfs.core.feature_monitoring_config.FeatureMonitoringConfig" + ], + "feature_monitoring_config_properties": keras_autodoc.get_properties( + "hsfs.core.feature_monitoring_config.FeatureMonitoringConfig" + ), + "feature_monitoring_config_methods": keras_autodoc.get_methods( + "hsfs.core.feature_monitoring_config.FeatureMonitoringConfig", + exclude=[ + "from_response_json", + "update_from_response_json", + "json", + "to_dict", + ], + ), + # from feature group + "feature_monitoring_config_creation_fg": [ + "hsfs.feature_group.FeatureGroup.create_statistics_monitoring", + "hsfs.feature_group.FeatureGroup.create_feature_monitoring", + ], + # from feature view + "feature_monitoring_config_creation_fv": [ + "hsfs.feature_view.FeatureView.create_statistics_monitoring", + "hsfs.feature_view.FeatureView.create_feature_monitoring", + ], + # retrieval + "feature_monitoring_config_retrieval_fg": [ + "hsfs.feature_group.FeatureGroup.get_feature_monitoring_configs", + ], + "feature_monitoring_config_retrieval_fv": [ + "hsfs.feature_view.FeatureView.get_feature_monitoring_configs", + ], + }, + "api/feature_monitoring_result_api.md": { + "feature_monitoring_result": [ + "hsfs.core.feature_monitoring_result.FeatureMonitoringResult" + ], + "feature_monitoring_result_retrieval": [ + "hsfs.core.feature_monitoring_config.FeatureMonitoringConfig.get_history" + ], + "feature_monitoring_result_properties": keras_autodoc.get_properties( + "hsfs.core.feature_monitoring_result.FeatureMonitoringResult" + ), + }, + "api/feature_monitoring_window_config_api.md": { + "feature_monitoring_window_config": [ + "hsfs.core.monitoring_window_config.MonitoringWindowConfig" + ], + "feature_monitoring_window_config_properties": keras_autodoc.get_properties( + "hsfs.core.monitoring_window_config.MonitoringWindowConfig" + ), + }, + "api/embedding_index_api.md": { + "embedding_index": ["hsfs.embedding.EmbeddingIndex"], + "embedding_index_properties": keras_autodoc.get_properties( + "hsfs.embedding.EmbeddingIndex" + ), + "embedding_index_methods": keras_autodoc.get_methods( + "hsfs.embedding.EmbeddingIndex", exclude=["from_response_json"] + ), + }, + "api/embedding_feature_api.md": { + "embedding_feature": ["hsfs.embedding.EmbeddingFeature"], + "embedding_feature_properties": keras_autodoc.get_properties( + "hsfs.embedding.EmbeddingFeature" + ), + }, + "api/similarity_function_type_api.md": { + "similarity_function_type": ["hsfs.embedding.SimilarityFunctionType"], + }, +} + +hsfs_dir = pathlib.Path(__file__).resolve().parents[0] +if "GITHUB_SHA" in os.environ: + commit_sha = os.environ["GITHUB_SHA"] + project_url = ( + f"https://github.com/logicalclocks/feature-store-api/tree/{commit_sha}/python" + ) +else: + branch_name = os.environ.get("GITHUB_BASE_REF", "master") + project_url = ( + f"https://github.com/logicalclocks/feature-store-api/blob/{branch_name}/python" + ) + + +def generate(dest_dir): + doc_generator = keras_autodoc.DocumentationGenerator( + PAGES, + project_url=project_url, + template_dir="./docs/templates", + titles_size="###", + extra_aliases={ + "hsfs.core.query.Query": "hsfs.Query", + "hsfs.storage_connector.StorageConnector": "hsfs.StorageConnector", + "hsfs.statistics_config.StatisticsConfig": "hsfs.StatisticsConfig", + "hsfs.training_dataset_feature.TrainingDatasetFeature": "hsfs.TrainingDatasetFeature", + "pandas.core.frame.DataFrame": "pandas.DataFrame", + }, + max_signature_line_length=100, + ) + shutil.copyfile(hsfs_dir / "CONTRIBUTING.md", dest_dir / "CONTRIBUTING.md") + shutil.copyfile(hsfs_dir / "README.md", dest_dir / "index.md") + + doc_generator.generate(dest_dir / "generated") + + +if __name__ == "__main__": + generate(hsfs_dir / "docs") diff --git a/hsfs/docs/CONTRIBUTING.md b/hsfs/docs/CONTRIBUTING.md new file mode 100644 index 000000000..0df3de08e --- /dev/null +++ b/hsfs/docs/CONTRIBUTING.md @@ -0,0 +1,220 @@ +## Python development setup + +--- + +- Fork and clone the repository + +- Create a new Python environment with your favourite environment manager (e.g. virtualenv or conda) and Python 3.9 (newer versions will return a library conflict in `auto_doc.py`) + +- Install repository in editable mode with development dependencies: + + ```bash + cd python + pip install -e ".[python,dev]" + ``` + +- Install [pre-commit](https://pre-commit.com/) and then activate its hooks. pre-commit is a framework for managing and maintaining multi-language pre-commit hooks. The Feature Store uses pre-commit to ensure code-style and code formatting through [ruff](https://docs.astral.sh/ruff/). Run the following commands from the `python` directory: + + ```bash + cd python + pip install --user pre-commit + pre-commit install + ``` + + Afterwards, pre-commit will run whenever you commit. + +- To run formatting and code-style separately, you can configure your IDE, such as VSCode, to use `ruff`, or run it via the command line: + + ```bash + # linting + ruff check python --fix + # formatting + ruff format python + ``` + +### Python documentation + +We follow a few best practices for writing the Python documentation: + +1. Use the google docstring style: + + ```python + """[One Line Summary] + + [Extended Summary] + + [!!! example + import xyz + ] + + # Arguments + arg1: Type[, optional]. Description[, defaults to `default`] + arg2: Type[, optional]. Description[, defaults to `default`] + + # Returns + Type. Description. + + # Raises + Exception. Description. + """ + ``` + + If Python 3 type annotations are used, they are inserted automatically. + +2. Feature store entity engine methods (e.g. FeatureGroupEngine etc.) only require a single line docstring. +3. REST Api implementations (e.g. FeatureGroupApi etc.) should be fully documented with docstrings without defaults. +4. Public Api such as metadata objects should be fully documented with defaults. + +#### Setup and Build Documentation + +We use `mkdocs` together with `mike` ([for versioning](https://github.com/jimporter/mike/)) to build the documentation and a plugin called `keras-autodoc` to auto generate Python API documentation from docstrings. + +**Background about `mike`:** +`mike` builds the documentation and commits it as a new directory to the gh-pages branch. Each directory corresponds to one version of the documentation. Additionally, `mike` maintains a json in the root of gh-pages with the mappings of versions/aliases for each of the directories available. With aliases you can define extra names like `dev` or `latest`, to indicate stable and unstable releases. + +1. Currently we are using our own version of `keras-autodoc` + + ```bash + pip install git+https://github.com/logicalclocks/keras-autodoc + ``` + +2. Install HSFS with `docs` extras: + + ```bash + pip install -e ".[python,dev]" && pip install -r ../requirements-docs.txt + ``` + +3. To build the docs, first run the auto doc script: + + ```bash + cd .. + python auto_doc.py + ``` + +##### Option 1: Build only current version of docs + +4. Either build the docs, or serve them dynamically: + + Note: Links and pictures might not resolve properly later on when checking with this build. + The reason for that is that the docs are deployed with versioning on docs.hopsworks.ai and + therefore another level is added to all paths, e.g. `docs.hopsworks.ai/[version-or-alias]`. + Using relative links should not be affected by this, however, building the docs with version + (Option 2) is recommended. + + ```bash + mkdocs build + # or + mkdocs serve + ``` + +##### Option 2 (Preferred): Build multi-version doc with `mike` + +###### Versioning on docs.hopsworks.ai + +On docs.hopsworks.ai we implement the following versioning scheme: + +- current master branches (e.g. of hsfs corresponding to master of Hopsworks): rendered as current Hopsworks snapshot version, e.g. **2.2.0-SNAPSHOT [dev]**, where `dev` is an alias to indicate that this is an unstable version. +- the latest release: rendered with full current version, e.g. **2.1.5 [latest]** with `latest` alias to indicate that this is the latest stable release. +- previous stable releases: rendered without alias, e.g. **2.1.4**. + +###### Build Instructions + +4. For this you can either checkout and make a local copy of the `upstream/gh-pages` branch, where `mike` maintains the current state of docs.hopsworks.ai, or just build documentation for the branch you are updating: + + Building _one_ branch: + + Checkout your dev branch with modified docs: + + ```bash + git checkout [dev-branch] + ``` + + Generate API docs if necessary: + + ```bash + python auto_doc.py + ``` + + Build docs with a version and alias + + ```bash + mike deploy [version] [alias] --update-alias + + # for example, if you are updating documentation to be merged to master, + # which will become the new SNAPSHOT version: + mike deploy 2.2.0-SNAPSHOT dev --update-alias + + # if you are updating docs of the latest stable release branch + mike deploy [version] latest --update-alias + + # if you are updating docs of a previous stable release branch + mike deploy [version] + ``` + + If no gh-pages branch existed in your local repository, this will have created it. + + **Important**: If no previous docs were built, you will have to choose a version as default to be loaded as index, as follows + + ```bash + mike set-default [version-or-alias] + ``` + + You can now checkout the gh-pages branch and serve: + + ```bash + git checkout gh-pages + mike serve + ``` + + You can also list all available versions/aliases: + + ```bash + mike list + ``` + + Delete and reset your local gh-pages branch: + + ```bash + mike delete --all + + # or delete single version + mike delete [version-or-alias] + ``` + +#### Adding new API documentation + +To add new documentation for APIs, you need to add information about the method/class to document to the `auto_doc.py` script: + +```python +PAGES = { + "connection.md": [ + "hsfs.connection.Connection.connection" + ] + "new_template.md": [ + "module", + "xyz.asd" + ] +} +``` + +Now you can add a template markdown file to the `docs/templates` directory with the name you specified in the auto-doc script. The `new_template.md` file should contain a tag to identify the place at which the API documentation should be inserted: + +```` +## The XYZ package + +{{module}} + +Some extra content here. + +!!! example + ```python + import xyz + ``` + +{{xyz.asd}} +```` + +Finally, run the `auto_doc.py` script, as decribed above, to update the documentation. + +For information about Markdown syntax and possible Admonitions/Highlighting etc. see +the [Material for Mkdocs themes reference documentation](https://squidfunk.github.io/mkdocs-material/reference/abbreviations/). diff --git a/hsfs/docs/assets/images/favicon.ico b/hsfs/docs/assets/images/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..ab757306798d8da0cea9ca008ac05cd5091eff1a GIT binary patch literal 2699 zcmb_e3se(V8jfIHc?nn*RJ79pw1AV$Ofq>)i3)^Atl?#-u~uX>Y4kGiR{>X1m?N-aL#6>&zU^vKO4!v?J2J z$2goBEn20@qEf8JNk-2lC`L_lZF(b%=5Qic*o*|3O%YjH6&JyTp3jx^lI)fRvNx?B*oNYV1c_1(bv1CiZ zC}%)GrAP*17!wT$xiCb+FbaqyTtq0~W9Tvfg;AIXBRoC~AsCLqIA07*{6IFEiPGSy zvbc#@?2{DCvRI5bk7u=7xmG@xF=g@)iA2JKQ67pyECMp;87zbiGMIxW8DzAXG--_% zEn@(jjD(uWu}DEy>39lyql?yHo+uMrFrJMt@(?cUOlb^AkuIDu$D|tzPLVuXN9$>W z#mr(67uJ}?SQv8_^8{+w@RR^-X%z}rjK^xx>s=wtme^c2jfsLh7HwAM8EIZBZDw*z zBpsW}$_#d9W5i=jG+|*(N`}!*E>!ZQWB_F|0iIi{HBgMz9QMcrT1HrCDd=1^2o^xF zM2QG+fdofH5ip9w@HkY#P+CpiR49r%p@;~_rb1cEP=tl}RWL>38pfn2*o3uuB9rDB z4VfU|DkC1l=ok|lm{rGrv|k<*lWbx%S{*xJPK}QSI%%t5D!_gV{nD zNLntFf^2cPS}lcR5)n#kNEG73s1Op70vHlY#3D$9i3p(@Lr4iBnw&3V$Q-8yob$(b z1I3Um$ItQ*kqA@sDLy11UX ziV=K5OhXu~Mj;pzNFcFDOh7QDK_mhpfg*^|Y5OrJ;R#wZyT*AF8)*&w%0x@21;)&S z6QpxfNI}x+2ATpVy0wp~$8U-}G2fa+v!uV^lJV$fMq{xOCORsUt=+G>koP3wPlp?c$ZXK4M#t4HUqZe105YgPfj z-g{}u{L|Yy2Uj;vw+}AqYJE=l?lZop64Zeeb3u46M3A`aJ%t>ujl<`6oIMp+yt^nUptAEm zh^#o7^<6+s+Lew2p+Rpxy=ALs%h`;<#=-q(ebRbAqy~;68oj)#@1-yH^(Pl}ua}Md z310Wsj7ueUnJ#|2&rt1ajpnNS{W^c*azc9bKS%db0m@TfDMCJ3`RO~_j~CwnhNds? zNXz-jvg5>$Gm^TcDf*mVG$rs}ud#pL>?Y9XV0y>HxVyt$l|HW@4D2XgkVTv@!lQ8?>o`iF4 z(TBh7al=}kM%}DEr3(s|`}f(4zi4ug{n!4ws;aL)HnfFFT0aXovUxV@_WnKZ{#C7S zc5km*-ne@M-Jt28^Wnp|PgS9B{3GvX-3L^_JxM~yKtP|J*)+V!w*=p{uu4^6xLbL& z{UEjQRS)`(XJK?pbsNv8{onB!NsHXpm5-j@SrL0Vn*Uz&rlRBv)#@LYgx|83`2KPA zM`zwHQ$)w!yfIV%m9{QJ`%BF?61%-e-LjvmZ?$UCTIc7Y NJT_5wGCE`1e*yWD-b4TZ literal 0 HcmV?d00001 diff --git a/hsfs/docs/assets/images/hops-logo.png b/hsfs/docs/assets/images/hops-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..d3625ae07da68430e3bd5f46a1a7baf88dd474cc GIT binary patch literal 6356 zcmb`M*H;tH^Y;T%LX#x)4nb-Vq>3~l2BZ@ZFcd*UmEHtJlom=tl}-dC6e-e+5S1pO zgY-~Dsx+m86oHQq-@o9w`CZJ}b7tQ&XJ>A9=e1a*@qH$GZh8O!z@(>(FarQ6gsw~i zNOR?xGfoSmU+Q-MS^0Er4r8CEI?2Y}cRKtMqRkV4ZOBm{`1hf)DdwJ-od z$yg8oLH&OwTD^b$?(-r~yfgaC<;1~5bw#84)X>*IG3nG$Du--t^f*^_Q(n@RHD=Qk z;r4IVN9i!~(UFgIU_8nb5%6}Wr-B&)6osnPQ~iM1A4PM5Hxe14=3%>A*y8G`T^vY8XIGgU{= zlMnB86hh`|)w1-1PQ8K1x{x%^(Td}11ZuGUTgzn)`=`~t5e$6=KUiQY0&}$i9e?`Y z+EqZ&+SOqqY5cji&CPZ%{Ds)JjJ9c)6+wgsl!Pjs6XRpp~M;Ro==k+rp zO0GlwC503rU{TLXDe~pVic3x22$8hwm1-h=5$Vi|+B7KmW=(2vzQt8ArAGg?lHlJ> zT9_0EP{HR}W(2jZetWRldAKHK%+b62pY6F;Y1pfr4byKyjUX2Nw^-Yv35N9H;YMR0 z5=8$kq1Exxd3b5qg4<0G1ZK=djD0loe@QOP3KcQH)lRtKaqIClQ{{(^bYNM+MOLr2 zzPjnm+$5x?`>I)DW2Y9#1nnzf-#6WrmArHVHaJE#<8ChF=vQ@EX2BzQg!uJbU(;>g zV&_c8HW@rR3J9h*fT4Yr>sE~G>>YTx#n5cE{;h^MRwzQ8*8q&qEPpoz@!^LdWa#+P zw0z+h;F*hLN0)y^>il$tEc?)I)E9C6*xtmmLbj=m4{+eI<(L3kHeuCtuKxF6BF~p^ z7J8kdHJFQ<5ASI-_CPybD zYIDXbU0GncY*K=g6!O$q7uAVP&YK8sXzg|Jdm0^dkoWNV0?XHZp%SC&*yP0T2ya6H zOH+r~QN{srSRK8&Fnm3=9FToFyM=w5ApoN@7h?|JJnYp9+eU>M zeL9`GeIdk#lRhXv+z#Nr;kN+vtf>n(5C>`rE;zpZZ7#rvY-}s#& z1RvHTfJSdKLv?tD+e*x3hu=8tk3a%a4WOSsUJkxHF<5;AG)qjvvU`xsu*2S@*i9O*phhG=!7)CH z`5ocLMxai})N(Du8{F`rUCKGe}1Pfub?(-_^2;?~}vx$VKq4em#mTCO6R zqTN^sTwc?z3X>mE@E4#-?s7fMtnI^s8`1AiZ)IDvD(jiMmx_X|k!;&k3933^SB++V zx)7zt3A$%viGY=}rNN+l7pc1i6HJq6IuTQ5QvjaggcNxotZ!n)1Xbv|>7#S>%X+V0 zO^3Id23#O2eqAG`I9x!QS3&B7#3PI|N^4XTb8w^C!15FDO$zOZFV|lRvpV3AUf(qo zc!FEf3X}4qo+?>cYXx468%f*olsUDQV0yZ*Na^PA@x+vT;9{quS1AHP$H)dsz2M%$e)}lZ!e+#KcsErt~ z3i-qZe(nbB7hcdoY;NjdM2l8)>w-EoWW)1=zkGHS!GbK4RV|^vPKrZ>YG}Y_VZa0z z@5_)325%3>SbTxpo;1AJ&@!j*6S~Gl1im8&c(fh7@Kreo|DLTF1ut6N_I-NEJ9huC z@gjTmA9yVCM}>AsNP=322<#eSgUxw@#>)35N2Up3q=GggU|+?)lnRW_|CIP@YyD9^ zy{i;KZjU=hS_>nNQ)QMvX3^)Z!7N(ng9*cQA&F^Rzh1+WK6gPRuT`gy7gN3WCUv32 z_N>g%FpJ?TIs3}7gu(?zyW$3>icX^g1Il+IN>k8pRkj7#rN3^04@_a^G~OY^sba;N zouZZVR6iS6y}3S>h2qXQ=TO6c$x=U~qC8P z|IFe399BpEs{#ux!+laxnmidTM?Zgk*ByCk$mc@u0(I_*NWrJwgi?u>Jz+XZol_8? zXDRI1_PeQ38NYjai2#>&E8NE{-cXDkX?LJNH>CSvZu8@fcslY4 zxj9K2Vpy)?dB>T!$wOnbthBt{wOrDuvDIBs?{qG?i19tBB5tHz2dz`-IKBgc6jcI$ z(w>F*C$V?I=g$idcAw_On>TjGUGx&p;$Ax777=qPPunk@Y>1{Y4`ZQiWu(_0Cx6^U zWp>)et25eY>1n~i-^RlyW$!$HChRq*W1E`rg?}|DYOD4aBBYyMDg&fU0S?Z+XUe@1 zADn%xTNCJV(ej9rV%Bn5ubMh{SR00BW|tN)2S6Q%*V;YxpNWUSeBVXI5DoAz5J|Bh z-omx^fKA3%0X*2AQ_hR`zJ1ekZ*I>^y+D*2Mtk68_PIu4N21MNk)OxeWQfBJWG}s6 zB^4Vojvt(PI__QAHc)%`M=x{=iC~fLYL^QJfMXkK^aNk?BO#K@;@7X-cwHm*cKb9a zoQ2q)R2i<+Ra#WZE03)YVOR`oIliOdL4pQ5uwuz7fyKdicr<szdL=6dU?r{0=)xO$UKd?X;Wye(irh6$E*LHn)wUn#)l6I zMPcUD=dy$4#}3q$178h5AcFARbIz4dDz5KhJ3uSG7UrB0!xS+aK{8*g@55UZ-P#*0 zwwtnZoBTJ^nFb{N*a%3mmp3_UqYe+)-T_nKsCY7sMZ|WgjO31r5G9K9CDCh16CfHKGo-NU~L77Yz zo6?%gH$_=QzkF8j4IHf#dRvC`bKg*p7YnRUBphWFd`fA)ZpM z0vqSQ_795?-V^93JTKQ-dg|{NyXc?L|t~Y0WRHANS%l1V>`~qf?rmx&HKOkeq z=SFtQ*qQ8(U{7xnvF&#kSNhS~ z*>>qwf3*8U(r5_MU+HRMlOMz8INaH5_#Cfj7vk& zD_sl->rMOvgt?+JFF%v==T((yxn<`Z)kvOvU#X4zApNqb`CL!J*&Of7u*WND&i3E? z+Vh;rvs?V3`-U`zAqOYYFo+DZ?4LN`rKm$c%_i&<(Hmpv3eQ9`GFi%r9y;Ol35v4e zYuP)xqrcWMrrpN0ha9&C+Q}Q{0ike-W{0FVVrw%6bkjpOL@CxCzNZTupxnju*A$MV zrsGSeGe*=NPy{3Ac^hvB%-(7O<&(d{`Odi%e>yTz!#EQp;GW%ztoSuj#-AQHekmSJ zMFwWu8f2n$pM(-?1;y?6z}1{8a*kkyAYaR>>|1TnIYS)E{yG_HA1N~7%v(B1HWq}d zWUzaoIHsB8YipxhPye9`fhy5AGJvbfgReCGBUFxE%1{Zt(OsYNJSqukF?nn<@JDOz zrV7sGO-qYWXa_zSh_07wCE?N6{gGB0aDR)Pjxb!&n5|(oPTLfnsTQkotqtHCoVR#@k?d5x2|A<)H+#WJiBL&A3*qg4M?yG--NK>}Z1>@Oj ztSG^LNk;LvR=3KJb>@4IzOu%et%1xieoYa<450$X^>w`@lDPiF-aJ(34Ltd;H2HK6 z8NO-R?Y|Z;DxQ0;(1LV5nOaYou9<0tsOs{m395toA(8|)lz+XeOgp!gRRfKbJl%0y zBcoYQd68WgB>(E#42P|;ZyTw|T2igHe_ejUI&d>?^5B3&2vM3wrcMApq08;=rtkM$ z{u1yh4@ zy9QSIMurMoj^T?826#euo&c=nERm5wEzO1uP3!gAoH;zH^byS^-|=4hIJ~0_3W? zYG9vz`9XWkSu85aVfmg?`mcE{%v>bwm^VXPW1U>^-oqnAypP`)0SxKP`?&I@aS-{# z+RL}a^7t-|@Ue8TxWIO7EIvbf=cdNn-4B%lbzB4<#w&3ln82N>J&$ty(6CR?t|-H< zhXKRAr^f^F3V*1bhq%KD@y=Gs4Hh}9g^h(D3Zb@%9AbjErcxI)V>l4m+*77XtiM9XX* zIm1K1483b~@oYyqAFP)hh0yR5v}o%a9#spiU#pvk#$GtbXbq8XmO8od60ySKuW}?4 z2a+g4r;NR}2)EoNtF1`2Oq~sFFJstEjCQ6Qi^6-l|J7g;A82PAls-DDmW>@Foo+gm z&kL%Ubel{`Iho(9G~R9k2j(u{dr4jv_Q&I>}DFjr}+`1ZyH}a_-GPf7``Uv8&Il79(i1 zb0d0v99Pt{cV`l%#RUBs<$)vFJ;IuN)30-wGD&Kisdk3*AMfAIS%`_*o31@sdqMc{=Ff;MfoAgu zQNZlUB~ca!!$>ZM(Qa)T180|5LwacoG0HjcZ`0a}7eGO1z-tCC)B^e^|rXH_3aW}WN2IOdHSuKz4~%dkk8L_gug?Rf#-H(`iwyiv6V zMRQVeeAZQ^6*5;7boL=Bw^$S&7r8ckLVMRizEZ7T;f+7$G=3d#WwD@lHNY^^H#)Q7 zkg$c9JAERRwK`8NY}~sg%(w&E_7qw>Mf?YZe%_Hw-I~|=sCina27C5$b@c30P?a3fp2?I)eB)ee=Q&d}eGdQk<6 zixh1jMQ{`=qLgG9u5=lkikfO+5XGrvnXTLaj&P9zd-VOgxal+lag4fU@)?T$DF$=x zI6$u6mI?v#;KxZnt4Eo(`>etxdBuf}iLyxWBQRZgN9zG@f!((G*ir%^VqS0pNxmP( zN@67>cecVuVcM>FTIr{p3e4Ga=cU2H4BN31@J1UgBkJ)Zj|_{oUTgkxR;YCtkf<#T z$6M(T3-0u&mLY}{RL}An&?v|Q%yoCPzUHAqH34hN!_hw^a&9Oy{Tz)$KDBJOej!U9 z)2lqoBgT$^!m5I$KETjeB(^F`lpl#k{wJc~TbAU zAyED;5`-}Je&Z=+dLt2iO+F6rtkfTg0 z{63Ho%9>|=lc0mpRrMWp`7x49D$gkTu@Vj>QW$Kgc~lUhn)RzTnD(<-POv77V98O) zqC~L9Y!T_(Xaj}(TIA^s1S7b72I5M;AnAKuiEZ>mSY>NUZnLIqTc2f47TEVEc5vKQ zmNL+XIFDT&D(b}OJ!Miz8|v)c8aE!hC}9^?i=D)5I8s4>(un@bb>0@QI=oSpPLvA) z@oz^^=wdiMdCqZ5)lrWCTaQAyw&{!fvvlLveeT?F)MYe?jeE}@Y*Hvwqs9Q$dHjjQ zP)dGX^MH{O^SE8FH!)m>9SF*~c2!}DQbH>gkM5EO8k;;IcQGg4V#Yy1$#&Iu1^Jd^ z$vp}x;KX7gQEQy}VSxzgdSDHWfw)>R7Nk1;g!M|)U@c^XYrO%sPK}PKCAodS;+OBz zkNiwS=nLB+ev!^P#&li-92tNJSS%e;>w8YNdnHna>(+JvB8;O0!rh}^2eAgY4e;iY(wo$P z@gvrpIKc1fSu}iTY)|59sA42DQ&QPs8{=LN|5GwL+%HRm*>9Wo{PCB4}cBn2Y3WsG=7_G`viM$s%;=adr zu;-6fzy-SO)bBFSVV7uLLTa&_Zd+}gO|fmEk73LA$P5L^b->AXJ1QCBSHtN*=H+Xq zgg6apS*pcv;xd|Hf+{GuFWqX2XK}+~UYL9l4U|>DUV4U%qSD3lrBH)$!xxXU7dS1U zYDqxmpOnxv8YS{bfA6098|xTbEzDScIt0_la9BQMROrDMRiODl6koJK@qf+Z|9@nk e-)Q{e?z``UxWtE+SHBbhfS$H7q6+>v;{O44aExI9 literal 0 HcmV?d00001 diff --git a/docs/assets/images/hopsworks-logo.png b/hsfs/docs/assets/images/hopsworks-logo.png similarity index 100% rename from docs/assets/images/hopsworks-logo.png rename to hsfs/docs/assets/images/hopsworks-logo.png diff --git a/hsfs/docs/css/custom.css b/hsfs/docs/css/custom.css new file mode 100644 index 000000000..45f87459a --- /dev/null +++ b/hsfs/docs/css/custom.css @@ -0,0 +1,114 @@ +[data-md-color-scheme="hopsworks"] { + --md-primary-fg-color: #1EB382; + --md-secondary-fg-color: #188a64; + --md-tertiary-fg-color: #0d493550; + --md-quaternary-fg-color: #fdfdfd; + --border-radius-variable: 5px; +} + +.md-footer__inner:not([hidden]) { + display: none +} + +/* Lex did stuff here */ +.svg_topnav{ + width: 12px; + filter: invert(100); +} +.svg_topnav:hover{ + width: 12px; + filter: invert(10); +} + +.md-header[data-md-state=shadow] { + box-shadow: 0 0 0 0; +} + +.md-tabs__item { + min-width: 2.25rem; + min-height: 1.5rem; +} + +.md-tabs__item:hover { + background-color: var(--md-tertiary-fg-color); + transition: background-color 450ms; +} + +/* +.md-sidebar__scrollwrap{ + background-color: var(--md-quaternary-fg-color); + padding: 15px 5px 5px 5px; + border-radius: var(--border-radius-variable); +} +*/ +.md-nav__link:focus{ +} + +.image_logo_02{ + width:450px; +} + +/* End of Lex did stuff here */ + +.md-header__button.md-logo { + margin: .1rem; + padding: .1rem; +} + +.md-header__button.md-logo img, .md-header__button.md-logo svg { + display: block; + width: 1.8rem; + height: 1.8rem; + fill: currentColor; +} + +.md-tabs { + width: 100%; + overflow: auto; + color: var(--md-primary-bg-color); + background-color: var(--md-secondary-fg-color); + transition: background-color 250ms; +} + +.wrapper { + display: grid; + grid-template-columns: repeat(4, 1fr); + gap: 10px; + grid-auto-rows: minmax(100px, auto); +} + +.wrapper * { + border: 2px solid green; + text-align: center; + padding: 70px 0; +} + +.one { + grid-column: 1 / 2; + grid-row: 1; +} +.two { + grid-column: 2 / 3; + grid-row: 1; +} +.three { + grid-column: 3 / 4; + grid-row: 1; +} +.four { + grid-column: 4 / 5; + grid-row: 1; +} +.five { + grid-column: 1 / 3; + grid-row: 2; +} +.six { + grid-column: 3 / 5; + grid-row: 2; +} + +/* Jupyter Stuff */ +.jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt { + display: none !important; +} diff --git a/hsfs/docs/css/dropdown.css b/hsfs/docs/css/dropdown.css new file mode 100644 index 000000000..531f7b10d --- /dev/null +++ b/hsfs/docs/css/dropdown.css @@ -0,0 +1,55 @@ +/* Style The Dropdown Button */ +.dropbtn { + color: white; + border: none; + cursor: pointer; +} + +.md-tabs__list { + contain: inherit; +} + +.md-tabs { + overflow: inherit; +} + + +/* The container
- needed to position the dropdown content */ +.dropdown { + position: absolute; + display: inline-block; +} + +/* Dropdown Content (Hidden by Default) */ +.dropdown-content { + display: none; + font-size: 13px; + position: absolute; + background-color: #f9f9f9; + min-width: 160px; + box-shadow: 0px 8px 16px 0px rgba(0, 0, 0, 0.2); + z-index: 1000; + border-radius: 2px; + left: -15px; +} + +/* Links inside the dropdown */ +.dropdown-content a { + color: black; + padding: 12px 16px; + text-decoration: none; + display: block; +} + +/* Change color of dropdown links on hover */ +.dropdown-content a:hover { + background-color: #f1f1f1 +} + +/* Show the dropdown menu on hover */ +.dropdown:hover .dropdown-content { + display: block; +} + +/* Change the background color of the dropdown button when the dropdown content is shown */ +.dropdown:hover .dropbtn {} \ No newline at end of file diff --git a/hsfs/docs/css/marctech.css b/hsfs/docs/css/marctech.css new file mode 100644 index 000000000..8bb58c97b --- /dev/null +++ b/hsfs/docs/css/marctech.css @@ -0,0 +1,1047 @@ +:root { + --md-primary-fg-color: #1EB382; + --md-secondary-fg-color: #188a64; + --md-tertiary-fg-color: #0d493550; + --md-quaternary-fg-color: #fdfdfd; + --md-fiftuary-fg-color: #2471cf; + --border-radius-variable: 5px; + --border-width:1px; + } + + .marctech_main a{ + color: var(--md-fiftuary-fg-color); + border-bottom: 1px dotted var(--md-fiftuary-fg-color) !important; + text-decoration: dotted !important;} + + .marctech_main a:hover{ + border-bottom: 1px dotted var(--md-primary-fg-color)!important; + } + + .marctech_main a:visited{ + color: var(--md-tertiary-fg-color); + border-bottom: 1px dotted var(--md-tertiary-fg-color) !important; + + } + + .w-layout-grid { + display: -ms-grid; + display: grid; + grid-auto-columns: 1fr; + -ms-grid-columns: 1fr 1fr; + grid-template-columns: 1fr 1fr; + -ms-grid-rows: auto auto; + grid-template-rows: auto auto; + grid-row-gap: 16px; + grid-column-gap: 16px; + } + + .image_logo{ + width: 69%; + background-color: white; + z-index: 50; + padding: 0px 15px 0px 15px; + margin-bottom: 10px; + } + + .layer_02{ + pointer-events: none; + } + + .round-frame{ + pointer-events: initial; + } + + .marctech_main { + margin-top:-20px; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + margin-bottom: 55px; + } + + .collumns { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + height: 100%; + -webkit-box-align: stretch; + -webkit-align-items: stretch; + -ms-flex-align: stretch; + align-items: stretch; + } + + .col_heading { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + } + + .enterprisefs { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + } + + .enterprise_ai { + -webkit-align-self: center; + -ms-flex-item-align: center; + -ms-grid-row-align: center; + align-self: center; + -webkit-box-flex: 1; + -webkit-flex: 1; + -ms-flex: 1; + flex: 1; + } + + .side-content { + z-index: 0; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + width: 240px; + height: 100%; + margin-top: 10px; + margin-bottom: 10px; + padding: 20px 10px; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + -webkit-align-content: flex-start; + -ms-flex-line-pack: start; + align-content: flex-start; + border-style: solid; + border-width: var(--border-width); + border-color: #585858; + border-radius: 10px; + background-color:var(--md-quaternary-fg-color); + } + .body { + padding: 40px; + font-family: Roboto, sans-serif; + } + + .green { + color: #1eb182; + font-size: 1.2vw; + } + + .rec_frame { + position: relative; + z-index: 1; + display: inline-block; + min-width: 150px; + margin-top: 10px; + margin-right: 10px; + margin-left: 10px; + padding: 10px 10px; + border-style: solid; + border-width: var(--border-width); + border-color: #585858; + border-radius: 10px; + background-color: #fff; + box-shadow: 4px 4px 0 0 rgba(88, 88, 88, 0.16); + -webkit-transition: box-shadow 200ms ease, border-color 200ms ease; + transition: box-shadow 200ms ease, border-color 200ms ease; + color: #585858; + text-align: center; + cursor: pointer; + } + + .rec_frame:hover { + border-color: #c2c2c2; + box-shadow: none; + } + + .name_item { + font-size: 0.7rem; + line-height: 120%; + font-weight: 700; + } + + .name_item.db { + position: relative; + z-index: 3; + text-align: left; + } + + .name_item.small { + font-size: 0.6rem; + font-weight: 500; + } + + .name_item.ingrey { + padding-bottom: 20px; + } + + .db_frame-mid { + position: relative; + z-index: 1; + margin-top: -8px; + padding: 5px 2px; + border-style: solid; + border-width: var(--border-width); + border-color: #585858; + border-radius: 0px 0% 50% 50%; + background-color: #fff; + color: #585858; + text-align: center; + } + + .db_frame-top { + position: relative; + z-index: 2; + padding: 5px 2px; + border-style: solid; + border-width: var(--border-width); + border-color: #585858; + border-radius: 50%; + background-color: #fff; + color: #585858; + text-align: center; + } + + .icondb { + position: relative; + width: 25px; + min-width: 25px; + margin-right: 10px; + } + + .db_frame { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + width: 150px; + height: 55px; + padding: 20px 10px; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + border-style: solid; + border-width: var(--border-width); + border-color: #585858; + border-radius: 10px; + background-color: #fff; + box-shadow: 4px 4px 0 0 rgba(88, 88, 88, 0.16); + -webkit-transition: box-shadow 200ms ease, border-color 200ms ease; + transition: box-shadow 200ms ease, border-color 200ms ease; + color: #585858; + text-align: center; + cursor: pointer; + } + + .db_frame:hover { + border-color: #c2c2c2; + box-shadow: none; + } + + .grid { + -ms-grid-rows: auto auto auto; + grid-template-rows: auto auto auto; + } + + .arrowdown { + position: relative; + z-index: 0; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + margin-top: -10px; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + } + + .heading_MT { + margin-top: 0px !important; + margin-bottom: 0px !important; + font-size: 1.3rem !important; + white-space: nowrap !important; + } + + .head_col { + padding-left: 10px; + } + + .MT_heading3 { + margin-top: 0px !important ; + font-size: 0.8rem !important; + } + + .MT_heading3.green { + color: #1eb182 !important; + } + + .column_sides { + position: relative; + z-index: 2; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: justify; + -webkit-justify-content: space-between; + -ms-flex-pack: justify; + justify-content: space-between; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + } + + .hopsicon { + width: 45px; + height: 45px; + } + + .column_center { + z-index: 10; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + } + + .center-content { + z-index: -50; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + width: 750px; + height: 670px; + margin-top: 10px; + margin-bottom: 10px; + padding: 20px 10px; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + -webkit-align-content: center; + -ms-flex-line-pack: center; + align-content: center; + border-radius: 10px; + background-color: transparent; + } + + .image { + width: 260px; + } + + .layer_01 { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + -webkit-box-align: stretch; + -webkit-align-items: stretch; + -ms-flex-align: stretch; + align-items: stretch; + } + + .name_center { + font-size: 1rem; + font-weight: 700; + } + + .rec_frame_main { + position: relative; + z-index: 1; + margin-top: 10px; + margin-right: 10px; + margin-left: 10px; + padding: 5px 10px; + border-style: solid; + border-width: var(--border-width); + border-color: #1eb182; + border-radius: 10px; + background-color: #e6fdf6; + box-shadow: 4px 4px 0 0 #dcf7ee; + -webkit-transition: box-shadow 200ms ease, border-color 200ms ease; + transition: box-shadow 200ms ease, border-color 200ms ease; + color: #1eb182; + text-align: center; + cursor: pointer; + } + + .rec_frame_main:hover { + border-color: #9fecd4; + box-shadow: none; + } + + .rec_frame_main.no_content { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + height: 100%; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + box-shadow: 4px 4px 0 0 #dcf7ee; + } + + .rec_frame_main.no_content:hover { + border-color: #1eb182; + box-shadow: 4px 4px 0 0 rgba(88, 88, 88, 0.16); + } + + .name_item_02 { + font-size: 0.85rem; + font-weight: 700; + } + + .grid-infra { + padding-top: 20px; + -ms-grid-columns: 1fr 1fr 1fr 1fr; + grid-template-columns: 1fr 1fr 1fr 1fr; + -ms-grid-rows: auto; + grid-template-rows: auto; + } + + .rec_frame_main-white { + position: relative; + z-index: 1; + display: inline-block; + width: 100%; + margin-top: 10px; + margin-bottom: 10px; + padding: 5px 10px; + border-style: solid; + border-width: var(--border-width); + border-color: #1eb182; + border-radius: 10px; + background-color: #fff; + box-shadow: 4px 4px 0 0 rgba(88, 88, 88, 0.16); + -webkit-transition: box-shadow 200ms ease, border-color 200ms ease; + transition: box-shadow 200ms ease, border-color 200ms ease; + color: #1eb182; + text-align: center; + cursor: pointer; + } + + .rec_frame_main-white:hover { + border-color: #c2c2c2; + box-shadow: none; + } + + .rec_frame_main-white.dotted { + border-style: dotted; + } + + .column { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: justify; + -webkit-justify-content: space-between; + -ms-flex-pack: justify; + justify-content: space-between; + -webkit-box-align: stretch; + -webkit-align-items: stretch; + -ms-flex-align: stretch; + align-items: stretch; + } + + .columns_center { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-orient: horizontal; + -webkit-box-direction: normal; + -webkit-flex-direction: row; + -ms-flex-direction: row; + flex-direction: row; + -webkit-box-pack: justify; + -webkit-justify-content: space-between; + -ms-flex-pack: justify; + justify-content: space-between; + } + + .non-bold { + font-weight: 400; + } + + .logo-holder { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + } + + .infra { + text-align: center; + position: relative; + z-index: 30; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + padding: 10px; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + border: 1px dashed #000; + border-radius: 6px; + background-color: #fff; + cursor: pointer; + } + + .infra:hover { + border-style: solid; + border-color: #585858; + } + + .text_and_icon { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + } + + .svg_icon { + width: 33px; + margin-right: 10px; + margin-left: 10px; + } + + .layer_02 { + position: absolute; + z-index: 10; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + width: 96%; + height: 90%; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + -webkit-box-align: stretch; + -webkit-align-items: stretch; + -ms-flex-align: stretch; + align-items: stretch; + border-style: solid; + border-width: calc (var(--border-width)*2); + border-color: #bbbbbb50 ; + border-radius: 100%; + background-color: transparent; + } + + .round-frame { + position: absolute; + left: 0%; + top: auto; + right: auto; + bottom: 0%; + z-index: 10; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + width: 120px; + height: 120px; + margin: 10px; + padding: 20px; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + border-style: solid; + border-width: var(--border-width); + border-color: #585858; + border-radius: 100%; + background-color: #fff; + outline-color: #fff; + outline-offset: 0px; + outline-style: solid; + outline-width: 7px; + -webkit-transition: box-shadow 200ms ease, border-color 200ms ease; + transition: box-shadow 200ms ease, border-color 200ms ease; + color: #585858; + text-align: center; + cursor: pointer; + } + + .round-frame:hover { + border-color: #c2c2c2; + box-shadow: none; + } + + .round-frame.top-left { + left: 4%; + top: 15%; + right: auto; + bottom: auto; + } + + .round-frame.bottom-left { + left: 4%; + bottom: 15%; + } + + .round-frame.top-right { + left: auto; + top: 15%; + right: 4%; + bottom: auto; + } + + .round-frame.bottom-right { + left: auto; + top: auto; + right: 4%; + bottom: 15%; + padding: 10px; + } + + .side-holder { + z-index: -1; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + height: 630px; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + } + + .infra-icon { + width: 25px; + height: 25px; + } + + .div-block { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + height: 100%; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: justify; + -webkit-justify-content: space-between; + -ms-flex-pack: justify; + justify-content: space-between; + } + + #w-node-a2a9b648-f5dd-74e5-e1c2-f7aaf4fa1fcd-46672785 { + -ms-grid-column: span 1; + grid-column-start: span 1; + -ms-grid-column-span: 1; + grid-column-end: span 1; + -ms-grid-row: span 1; + grid-row-start: span 1; + -ms-grid-row-span: 1; + grid-row-end: span 1; + } + + #w-node-_466aa2bf-88bf-5a65-eab4-fc1eb95e7384-46672785 { + -ms-grid-column: span 1; + grid-column-start: span 1; + -ms-grid-column-span: 1; + grid-column-end: span 1; + -ms-grid-row: span 1; + grid-row-start: span 1; + -ms-grid-row-span: 1; + grid-row-end: span 1; + } + + #w-node-_87009ba3-d9a6-e0b7-4cce-581190a19cf3-46672785 { + -ms-grid-column: span 1; + grid-column-start: span 1; + -ms-grid-column-span: 1; + grid-column-end: span 1; + -ms-grid-row: span 1; + grid-row-start: span 1; + -ms-grid-row-span: 1; + grid-row-end: span 1; + } + + #w-node-_4a479fbb-90c7-9f47-d439-20aa6a224339-46672785 { + -ms-grid-column: span 1; + grid-column-start: span 1; + -ms-grid-column-span: 1; + grid-column-end: span 1; + -ms-grid-row: span 1; + grid-row-start: span 1; + -ms-grid-row-span: 1; + grid-row-end: span 1; + } + + + /* + + + inherited from the original template + + */ + + .w-container .w-row { + margin-left: -10px; + margin-right: -10px; + } + .w-row:before, + .w-row:after { + content: " "; + display: table; + grid-column-start: 1; + grid-row-start: 1; + grid-column-end: 2; + grid-row-end: 2; + } + .w-row:after { + clear: both; + } + .w-row .w-row { + margin-left: 0; + margin-right: 0; + } + .w-col { + position: relative; + float: left; + width: 100%; + min-height: 1px; + padding-left: 10px; + padding-right: 10px; + } + .w-col .w-col { + padding-left: 0; + padding-right: 0; + } + .w-col-1 { + width: 8.33333333%; + } + .w-col-2 { + width: 16.66666667%; + } + .w-col-3 { + width: 25%; + } + .w-col-4 { + width: 33.33333333%; + } + .w-col-5 { + width: 41.66666667%; + } + .w-col-6 { + width: 50%; + } + .w-col-7 { + width: 58.33333333%; + } + .w-col-8 { + width: 66.66666667%; + } + .w-col-9 { + width: 75%; + } + .w-col-10 { + width: 83.33333333%; + } + .w-col-11 { + width: 91.66666667%; + } + .w-col-12 { + width: 100%; + } + .w-hidden-main { + display: none !important; + } + @media screen and (max-width: 991px) { + .w-container { + max-width: 728px; + } + .w-hidden-main { + display: inherit !important; + } + .w-hidden-medium { + display: none !important; + } + .w-col-medium-1 { + width: 8.33333333%; + } + .w-col-medium-2 { + width: 16.66666667%; + } + .w-col-medium-3 { + width: 25%; + } + .w-col-medium-4 { + width: 33.33333333%; + } + .w-col-medium-5 { + width: 41.66666667%; + } + .w-col-medium-6 { + width: 50%; + } + .w-col-medium-7 { + width: 58.33333333%; + } + .w-col-medium-8 { + width: 66.66666667%; + } + .w-col-medium-9 { + width: 75%; + } + .w-col-medium-10 { + width: 83.33333333%; + } + .w-col-medium-11 { + width: 91.66666667%; + } + .w-col-medium-12 { + width: 100%; + } + .w-col-stack { + width: 100%; + left: auto; + right: auto; + } + } + @media screen and (max-width: 767px) { + .w-hidden-main { + display: inherit !important; + } + .w-hidden-medium { + display: inherit !important; + } + .w-hidden-small { + display: none !important; + } + .w-row, + .w-container .w-row { + margin-left: 0; + margin-right: 0; + } + .w-col { + width: 100%; + left: auto; + right: auto; + } + .w-col-small-1 { + width: 8.33333333%; + } + .w-col-small-2 { + width: 16.66666667%; + } + .w-col-small-3 { + width: 25%; + } + .w-col-small-4 { + width: 33.33333333%; + } + .w-col-small-5 { + width: 41.66666667%; + } + .w-col-small-6 { + width: 50%; + } + .w-col-small-7 { + width: 58.33333333%; + } + .w-col-small-8 { + width: 66.66666667%; + } + .w-col-small-9 { + width: 75%; + } + .w-col-small-10 { + width: 83.33333333%; + } + .w-col-small-11 { + width: 91.66666667%; + } + .w-col-small-12 { + width: 100%; + } + } + @media screen and (max-width: 479px) { + .w-container { + max-width: none; + } + .w-hidden-main { + display: inherit !important; + } + .w-hidden-medium { + display: inherit !important; + } + .w-hidden-small { + display: inherit !important; + } + .w-hidden-tiny { + display: none !important; + } + .w-col { + width: 100%; + } + .w-col-tiny-1 { + width: 8.33333333%; + } + .w-col-tiny-2 { + width: 16.66666667%; + } + .w-col-tiny-3 { + width: 25%; + } + .w-col-tiny-4 { + width: 33.33333333%; + } + .w-col-tiny-5 { + width: 41.66666667%; + } + .w-col-tiny-6 { + width: 50%; + } + .w-col-tiny-7 { + width: 58.33333333%; + } + .w-col-tiny-8 { + width: 66.66666667%; + } + .w-col-tiny-9 { + width: 75%; + } + .w-col-tiny-10 { + width: 83.33333333%; + } + .w-col-tiny-11 { + width: 91.66666667%; + } + .w-col-tiny-12 { + width: 100%; + } + } diff --git a/hsfs/docs/css/version-select.css b/hsfs/docs/css/version-select.css new file mode 100644 index 000000000..3b908ae84 --- /dev/null +++ b/hsfs/docs/css/version-select.css @@ -0,0 +1,36 @@ +@media only screen and (max-width:76.1875em) { +} + +#version-selector select.form-control { + appearance: none; + -webkit-appearance: none; + -moz-appearance: none; + + background-color: #F5F5F5; + + background-position: center right; + background-repeat: no-repeat; + border: 0px; + border-radius: 2px; + /* box-shadow: 0px 1px 3px rgb(0 0 0 / 10%); */ + color: inherit; + width: -webkit-fill-available; + width: -moz-available; + max-width: 200px; + font-size: inherit; + /* font-weight: 600; */ + margin: 10px; + overflow: hidden; + padding: 7px 10px; + text-overflow: ellipsis; + white-space: nowrap; +} + +#version-selector::after { + content: '⌄'; + font-family: inherit; + font-size: 22px; + margin: -35px; + vertical-align: 7%; + padding-bottom: 10px; +} diff --git a/hsfs/docs/index.md b/hsfs/docs/index.md new file mode 100644 index 000000000..a13ea2ce5 --- /dev/null +++ b/hsfs/docs/index.md @@ -0,0 +1,201 @@ +# Hopsworks Feature Store + +

+ Hopsworks Community + Hopsworks Feature Store Documentation + python + PyPiStatus + Scala/Java Artifacts + Downloads + Ruff + License +

+ +HSFS is the library to interact with the Hopsworks Feature Store. The library makes creating new features, feature groups and training datasets easy. + +The library is environment independent and can be used in two modes: + +- Spark mode: For data engineering jobs that create and write features into the feature store or generate training datasets. It requires a Spark environment such as the one provided in the Hopsworks platform or Databricks. In Spark mode, HSFS provides bindings both for Python and JVM languages. + +- Python mode: For data science jobs to explore the features available in the feature store, generate training datasets and feed them in a training pipeline. Python mode requires just a Python interpreter and can be used both in Hopsworks from Python Jobs/Jupyter Kernels, Amazon SageMaker or KubeFlow. + +The library automatically configures itself based on the environment it is run. +However, to connect from an external environment such as Databricks or AWS Sagemaker, +additional connection information, such as host and port, is required. For more information checkout the [Hopsworks documentation](https://docs.hopsworks.ai/latest/). + +## Getting Started On Hopsworks + +Get started easily by registering an account on [Hopsworks Serverless](https://app.hopsworks.ai/). Create your project and a [new Api key](https://docs.hopsworks.ai/latest/user_guides/projects/api_key/create_api_key/). In a new python environment with Python 3.8 or higher, install the [client library](https://docs.hopsworks.ai/latest/user_guides/client_installation/) using pip: + +```bash +# Get all Hopsworks SDKs: Feature Store, Model Serving and Platform SDK +pip install hopsworks +# or minimum install with the Feature Store SDK +pip install hsfs[python] +# if using zsh don't forget the quotes +pip install 'hsfs[python]' +``` + +You can start a notebook and instantiate a connection and get the project feature store handler. + +```python +import hopsworks + +project = hopsworks.login() # you will be prompted for your api key +fs = project.get_feature_store() +``` + +or using `hsfs` directly: + +```python +import hsfs + +connection = hsfs.connection( + host="c.app.hopsworks.ai", # + project="your-project", + api_key_value="your-api-key", +) +fs = connection.get_feature_store() +``` + +Create a new feature group to start inserting feature values. +```python +fg = fs.create_feature_group("rain", + version=1, + description="Rain features", + primary_key=['date', 'location_id'], + online_enabled=True) + +fg.save(dataframe) +``` + +Upsert new data in to the feature group with `time_travel_format="HUDI"`". +```python +fg.insert(upsert_df) +``` + +Retrieve commit timeline metdata of the feature group with `time_travel_format="HUDI"`". +```python +fg.commit_details() +``` + +"Reading feature group as of specific point in time". +```python +fg = fs.get_feature_group("rain", 1) +fg.read("2020-10-20 07:34:11").show() +``` + +Read updates that occurred between specified points in time. +```python +fg = fs.get_feature_group("rain", 1) +fg.read_changes("2020-10-20 07:31:38", "2020-10-20 07:34:11").show() +``` + +Join features together +```python +feature_join = rain_fg.select_all() + .join(temperature_fg.select_all(), on=["date", "location_id"]) + .join(location_fg.select_all()) +feature_join.show(5) +``` + +join feature groups that correspond to specific point in time +```python +feature_join = rain_fg.select_all() + .join(temperature_fg.select_all(), on=["date", "location_id"]) + .join(location_fg.select_all()) + .as_of("2020-10-31") +feature_join.show(5) +``` + +join feature groups that correspond to different time +```python +rain_fg_q = rain_fg.select_all().as_of("2020-10-20 07:41:43") +temperature_fg_q = temperature_fg.select_all().as_of("2020-10-20 07:32:33") +location_fg_q = location_fg.select_all().as_of("2020-10-20 07:33:08") +joined_features_q = rain_fg_q.join(temperature_fg_q).join(location_fg_q) +``` + +Use the query object to create a training dataset: +```python +td = fs.create_training_dataset("rain_dataset", + version=1, + data_format="tfrecords", + description="A test training dataset saved in TfRecords format", + splits={'train': 0.7, 'test': 0.2, 'validate': 0.1}) + +td.save(feature_join) +``` + +A short introduction to the Scala API: +```scala +import com.logicalclocks.hsfs._ +val connection = HopsworksConnection.builder().build() +val fs = connection.getFeatureStore(); +val attendances_features_fg = fs.getFeatureGroup("games_features", 1); +attendances_features_fg.show(1) +``` + +You can find more examples on how to use the library in our [hops-examples](https://github.com/logicalclocks/hops-examples) repository. + +## Usage + +Usage data is collected for improving quality of the library. It is turned on by default if the backend +is "c.app.hopsworks.ai". To turn it off, use one of the following way: +```python +# use environment variable +import os +os.environ["ENABLE_HOPSWORKS_USAGE"] = "false" + +# use `disable_usage_logging` +import hsfs +hsfs.disable_usage_logging() +``` + +The source code can be found in python/hsfs/usage.py. + +## Documentation + +Documentation is available at [Hopsworks Feature Store Documentation](https://docs.hopsworks.ai/). + +## Issues + +For general questions about the usage of Hopsworks and the Feature Store please open a topic on [Hopsworks Community](https://community.hopsworks.ai/). + +Please report any issue using [Github issue tracking](https://github.com/logicalclocks/feature-store-api/issues). + +Please attach the client environment from the output below in the issue: +```python +import hopsworks +import hsfs +hopsworks.login().get_feature_store() +print(hsfs.get_env()) +``` + +## Contributing + +If you would like to contribute to this library, please see the [Contribution Guidelines](CONTRIBUTING.md). diff --git a/hsfs/docs/js/dropdown.js b/hsfs/docs/js/dropdown.js new file mode 100644 index 000000000..2618e0ce7 --- /dev/null +++ b/hsfs/docs/js/dropdown.js @@ -0,0 +1,2 @@ +document.getElementsByClassName("md-tabs__link")[7].style.display = "none"; +document.getElementsByClassName("md-tabs__link")[9].style.display = "none"; diff --git a/hsfs/docs/js/inject-api-links.js b/hsfs/docs/js/inject-api-links.js new file mode 100644 index 000000000..aa5852283 --- /dev/null +++ b/hsfs/docs/js/inject-api-links.js @@ -0,0 +1,32 @@ +window.addEventListener("DOMContentLoaded", function () { + var windowPathNameSplits = window.location.pathname.split("/"); + var majorVersionRegex = new RegExp("(\\d+[.]\\d+)") + var latestRegex = new RegExp("latest"); + if (majorVersionRegex.test(windowPathNameSplits[1])) { // On landing page docs.hopsworks.api/3.0 - URL contains major version + // Version API dropdown + document.getElementById("hopsworks_api_link").href = "https://docs.hopsworks.ai/hopsworks-api/" + windowPathNameSplits[1] + "/generated/api/login/"; + document.getElementById("hsfs_api_link").href = "https://docs.hopsworks.ai/feature-store-api/" + windowPathNameSplits[1] + "/generated/api/connection_api/"; + document.getElementById("hsml_api_link").href = "https://docs.hopsworks.ai/machine-learning-api/" + windowPathNameSplits[1] + "/generated/connection_api/"; + } else { // on docs.hopsworks.api/feature-store-api/3.0 / docs.hopsworks.api/hopsworks-api/3.0 / docs.hopsworks.api/machine-learning-api/3.0 + if (latestRegex.test(windowPathNameSplits[2]) || latestRegex.test(windowPathNameSplits[1])) { + var majorVersion = "latest"; + } else { + + var apiVersion = windowPathNameSplits[2]; + var majorVersion = apiVersion.match(majorVersionRegex)[0]; + } + // Version main navigation + document.getElementsByClassName("md-tabs__link")[0].href = "https://docs.hopsworks.ai/" + majorVersion; + document.getElementsByClassName("md-tabs__link")[1].href = "https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/quickstart.ipynb"; + document.getElementsByClassName("md-tabs__link")[2].href = "https://docs.hopsworks.ai/" + majorVersion + "/tutorials/"; + document.getElementsByClassName("md-tabs__link")[3].href = "https://docs.hopsworks.ai/" + majorVersion + "/concepts/hopsworks/"; + document.getElementsByClassName("md-tabs__link")[4].href = "https://docs.hopsworks.ai/" + majorVersion + "/user_guides/"; + document.getElementsByClassName("md-tabs__link")[5].href = "https://docs.hopsworks.ai/" + majorVersion + "/setup_installation/aws/getting_started/"; + document.getElementsByClassName("md-tabs__link")[6].href = "https://docs.hopsworks.ai/" + majorVersion + "/admin/"; + // Version API dropdown + document.getElementById("hopsworks_api_link").href = "https://docs.hopsworks.ai/hopsworks-api/" + majorVersion + "/generated/api/login/"; + document.getElementById("hsfs_api_link").href = "https://docs.hopsworks.ai/feature-store-api/" + majorVersion + "/generated/api/connection_api/"; + document.getElementById("hsfs_javadoc_link").href = "https://docs.hopsworks.ai/feature-store-api/" + majorVersion + "/javadoc"; + document.getElementById("hsml_api_link").href = "https://docs.hopsworks.ai/machine-learning-api/" + majorVersion + "/generated/connection_api/"; + } +}); diff --git a/hsfs/docs/js/version-select.js b/hsfs/docs/js/version-select.js new file mode 100644 index 000000000..fcac029e3 --- /dev/null +++ b/hsfs/docs/js/version-select.js @@ -0,0 +1,64 @@ +window.addEventListener("DOMContentLoaded", function() { + // This is a bit hacky. Figure out the base URL from a known CSS file the + // template refers to... + var ex = new RegExp("/?css/version-select.css$"); + var sheet = document.querySelector('link[href$="version-select.css"]'); + + var ABS_BASE_URL = sheet.href.replace(ex, ""); + var CURRENT_VERSION = ABS_BASE_URL.split("/").pop(); + + function makeSelect(options, selected) { + var select = document.createElement("select"); + select.classList.add("form-control"); + + options.forEach(function(i) { + var option = new Option(i.text, i.value, undefined, + i.value === selected); + select.add(option); + }); + + return select; + } + + var xhr = new XMLHttpRequest(); + xhr.open("GET", ABS_BASE_URL + "/../versions.json"); + xhr.onload = function() { + var versions = JSON.parse(this.responseText); + + var realVersion = versions.find(function(i) { + return i.version === CURRENT_VERSION || + i.aliases.includes(CURRENT_VERSION); + }).version; + var latestVersion = versions.find(function(i) { + return i.aliases.includes("latest"); + }).version; + let outdated_banner = document.querySelector('div[data-md-color-scheme="default"][data-md-component="outdated"]'); + if (realVersion !== latestVersion) { + outdated_banner.removeAttribute("hidden"); + } else { + outdated_banner.setAttribute("hidden", ""); + } + + var select = makeSelect(versions.map(function(i) { + var allowedAliases = ["dev", "latest"] + if (i.aliases.length > 0) { + var aliasString = " [" + i.aliases.filter(function (str) { return allowedAliases.includes(str); }).join(", ") + "]"; + } else { + var aliasString = ""; + } + return {text: i.title + aliasString, value: i.version}; + }), realVersion); + select.addEventListener("change", function(event) { + window.location.href = ABS_BASE_URL + "/../" + this.value + "/generated/api/connection_api/"; + }); + + var container = document.createElement("div"); + container.id = "version-selector"; + // container.className = "md-nav__item"; + container.appendChild(select); + + var sidebar = document.querySelector(".md-nav--primary > .md-nav__list"); + sidebar.parentNode.insertBefore(container, sidebar.nextSibling); + }; + xhr.send(); +}); diff --git a/hsfs/docs/overrides/main.html b/hsfs/docs/overrides/main.html new file mode 100644 index 000000000..ecb09de07 --- /dev/null +++ b/hsfs/docs/overrides/main.html @@ -0,0 +1,8 @@ +{% extends "base.html" %} + +{% block outdated %} +You're not viewing the latest version of the documentation. + + Click here to go to latest. + +{% endblock %} diff --git a/docs/templates/api/connection_api.md b/hsfs/docs/templates/api/connection_api.md similarity index 100% rename from docs/templates/api/connection_api.md rename to hsfs/docs/templates/api/connection_api.md diff --git a/docs/templates/api/embedding_feature_api.md b/hsfs/docs/templates/api/embedding_feature_api.md similarity index 100% rename from docs/templates/api/embedding_feature_api.md rename to hsfs/docs/templates/api/embedding_feature_api.md diff --git a/docs/templates/api/embedding_index_api.md b/hsfs/docs/templates/api/embedding_index_api.md similarity index 100% rename from docs/templates/api/embedding_index_api.md rename to hsfs/docs/templates/api/embedding_index_api.md diff --git a/docs/templates/api/expectation_api.md b/hsfs/docs/templates/api/expectation_api.md similarity index 100% rename from docs/templates/api/expectation_api.md rename to hsfs/docs/templates/api/expectation_api.md diff --git a/docs/templates/api/expectation_suite_api.md b/hsfs/docs/templates/api/expectation_suite_api.md similarity index 100% rename from docs/templates/api/expectation_suite_api.md rename to hsfs/docs/templates/api/expectation_suite_api.md diff --git a/docs/templates/api/external_feature_group_api.md b/hsfs/docs/templates/api/external_feature_group_api.md similarity index 100% rename from docs/templates/api/external_feature_group_api.md rename to hsfs/docs/templates/api/external_feature_group_api.md diff --git a/docs/templates/api/feature_api.md b/hsfs/docs/templates/api/feature_api.md similarity index 100% rename from docs/templates/api/feature_api.md rename to hsfs/docs/templates/api/feature_api.md diff --git a/docs/templates/api/feature_descriptive_statistics_api.md b/hsfs/docs/templates/api/feature_descriptive_statistics_api.md similarity index 100% rename from docs/templates/api/feature_descriptive_statistics_api.md rename to hsfs/docs/templates/api/feature_descriptive_statistics_api.md diff --git a/docs/templates/api/feature_group_api.md b/hsfs/docs/templates/api/feature_group_api.md similarity index 100% rename from docs/templates/api/feature_group_api.md rename to hsfs/docs/templates/api/feature_group_api.md diff --git a/docs/templates/api/feature_monitoring_config_api.md b/hsfs/docs/templates/api/feature_monitoring_config_api.md similarity index 100% rename from docs/templates/api/feature_monitoring_config_api.md rename to hsfs/docs/templates/api/feature_monitoring_config_api.md diff --git a/docs/templates/api/feature_monitoring_result_api.md b/hsfs/docs/templates/api/feature_monitoring_result_api.md similarity index 100% rename from docs/templates/api/feature_monitoring_result_api.md rename to hsfs/docs/templates/api/feature_monitoring_result_api.md diff --git a/docs/templates/api/feature_monitoring_window_config_api.md b/hsfs/docs/templates/api/feature_monitoring_window_config_api.md similarity index 100% rename from docs/templates/api/feature_monitoring_window_config_api.md rename to hsfs/docs/templates/api/feature_monitoring_window_config_api.md diff --git a/docs/templates/api/feature_store_api.md b/hsfs/docs/templates/api/feature_store_api.md similarity index 100% rename from docs/templates/api/feature_store_api.md rename to hsfs/docs/templates/api/feature_store_api.md diff --git a/docs/templates/api/feature_view_api.md b/hsfs/docs/templates/api/feature_view_api.md similarity index 100% rename from docs/templates/api/feature_view_api.md rename to hsfs/docs/templates/api/feature_view_api.md diff --git a/docs/templates/api/job.md b/hsfs/docs/templates/api/job.md similarity index 100% rename from docs/templates/api/job.md rename to hsfs/docs/templates/api/job.md diff --git a/docs/templates/api/links.md b/hsfs/docs/templates/api/links.md similarity index 100% rename from docs/templates/api/links.md rename to hsfs/docs/templates/api/links.md diff --git a/docs/templates/api/query_api.md b/hsfs/docs/templates/api/query_api.md similarity index 100% rename from docs/templates/api/query_api.md rename to hsfs/docs/templates/api/query_api.md diff --git a/docs/templates/api/rule_api.md b/hsfs/docs/templates/api/rule_api.md similarity index 100% rename from docs/templates/api/rule_api.md rename to hsfs/docs/templates/api/rule_api.md diff --git a/docs/templates/api/rule_definition_api.md b/hsfs/docs/templates/api/rule_definition_api.md similarity index 100% rename from docs/templates/api/rule_definition_api.md rename to hsfs/docs/templates/api/rule_definition_api.md diff --git a/docs/templates/api/similarity_function_type_api.md b/hsfs/docs/templates/api/similarity_function_type_api.md similarity index 100% rename from docs/templates/api/similarity_function_type_api.md rename to hsfs/docs/templates/api/similarity_function_type_api.md diff --git a/docs/templates/api/spine_group_api.md b/hsfs/docs/templates/api/spine_group_api.md similarity index 100% rename from docs/templates/api/spine_group_api.md rename to hsfs/docs/templates/api/spine_group_api.md diff --git a/docs/templates/api/split_statistics_api.md b/hsfs/docs/templates/api/split_statistics_api.md similarity index 100% rename from docs/templates/api/split_statistics_api.md rename to hsfs/docs/templates/api/split_statistics_api.md diff --git a/docs/templates/api/statistics_api.md b/hsfs/docs/templates/api/statistics_api.md similarity index 100% rename from docs/templates/api/statistics_api.md rename to hsfs/docs/templates/api/statistics_api.md diff --git a/docs/templates/api/statistics_config_api.md b/hsfs/docs/templates/api/statistics_config_api.md similarity index 100% rename from docs/templates/api/statistics_config_api.md rename to hsfs/docs/templates/api/statistics_config_api.md diff --git a/docs/templates/api/storage_connector_api.md b/hsfs/docs/templates/api/storage_connector_api.md similarity index 100% rename from docs/templates/api/storage_connector_api.md rename to hsfs/docs/templates/api/storage_connector_api.md diff --git a/docs/templates/api/training_dataset_api.md b/hsfs/docs/templates/api/training_dataset_api.md similarity index 100% rename from docs/templates/api/training_dataset_api.md rename to hsfs/docs/templates/api/training_dataset_api.md diff --git a/docs/templates/api/transformation_functions_api.md b/hsfs/docs/templates/api/transformation_functions_api.md similarity index 100% rename from docs/templates/api/transformation_functions_api.md rename to hsfs/docs/templates/api/transformation_functions_api.md diff --git a/docs/templates/api/validation_api.md b/hsfs/docs/templates/api/validation_api.md similarity index 100% rename from docs/templates/api/validation_api.md rename to hsfs/docs/templates/api/validation_api.md diff --git a/docs/templates/api/validation_report_api.md b/hsfs/docs/templates/api/validation_report_api.md similarity index 100% rename from docs/templates/api/validation_report_api.md rename to hsfs/docs/templates/api/validation_report_api.md diff --git a/java/beam/pom.xml b/hsfs/java/beam/pom.xml similarity index 100% rename from java/beam/pom.xml rename to hsfs/java/beam/pom.xml diff --git a/java/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureStore.java b/hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureStore.java similarity index 100% rename from java/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureStore.java rename to hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureStore.java diff --git a/java/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureView.java b/hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureView.java similarity index 100% rename from java/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureView.java rename to hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureView.java diff --git a/java/beam/src/main/java/com/logicalclocks/hsfs/beam/HopsworksConnection.java b/hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/HopsworksConnection.java similarity index 100% rename from java/beam/src/main/java/com/logicalclocks/hsfs/beam/HopsworksConnection.java rename to hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/HopsworksConnection.java diff --git a/java/beam/src/main/java/com/logicalclocks/hsfs/beam/StreamFeatureGroup.java b/hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/StreamFeatureGroup.java similarity index 100% rename from java/beam/src/main/java/com/logicalclocks/hsfs/beam/StreamFeatureGroup.java rename to hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/StreamFeatureGroup.java diff --git a/java/beam/src/main/java/com/logicalclocks/hsfs/beam/constructor/Query.java b/hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/constructor/Query.java similarity index 100% rename from java/beam/src/main/java/com/logicalclocks/hsfs/beam/constructor/Query.java rename to hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/constructor/Query.java diff --git a/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/BeamEngine.java b/hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/BeamEngine.java similarity index 100% rename from java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/BeamEngine.java rename to hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/BeamEngine.java diff --git a/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/BeamKafkaProducer.java b/hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/BeamKafkaProducer.java similarity index 100% rename from java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/BeamKafkaProducer.java rename to hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/BeamKafkaProducer.java diff --git a/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/BeamProducer.java b/hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/BeamProducer.java similarity index 100% rename from java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/BeamProducer.java rename to hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/BeamProducer.java diff --git a/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/FeatureGroupEngine.java b/hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/FeatureGroupEngine.java similarity index 100% rename from java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/FeatureGroupEngine.java rename to hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/FeatureGroupEngine.java diff --git a/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/FeatureViewEngine.java b/hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/FeatureViewEngine.java similarity index 100% rename from java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/FeatureViewEngine.java rename to hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/FeatureViewEngine.java diff --git a/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/GenericAvroSerializer.java b/hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/GenericAvroSerializer.java similarity index 100% rename from java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/GenericAvroSerializer.java rename to hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/GenericAvroSerializer.java diff --git a/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/KeySerializer.java b/hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/KeySerializer.java similarity index 100% rename from java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/KeySerializer.java rename to hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/engine/KeySerializer.java diff --git a/java/flink/pom.xml b/hsfs/java/flink/pom.xml similarity index 100% rename from java/flink/pom.xml rename to hsfs/java/flink/pom.xml diff --git a/java/flink/src/main/java/com/logicalclocks/hsfs/flink/FeatureStore.java b/hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/FeatureStore.java similarity index 100% rename from java/flink/src/main/java/com/logicalclocks/hsfs/flink/FeatureStore.java rename to hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/FeatureStore.java diff --git a/java/flink/src/main/java/com/logicalclocks/hsfs/flink/FeatureView.java b/hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/FeatureView.java similarity index 100% rename from java/flink/src/main/java/com/logicalclocks/hsfs/flink/FeatureView.java rename to hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/FeatureView.java diff --git a/java/flink/src/main/java/com/logicalclocks/hsfs/flink/HopsworksConnection.java b/hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/HopsworksConnection.java similarity index 100% rename from java/flink/src/main/java/com/logicalclocks/hsfs/flink/HopsworksConnection.java rename to hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/HopsworksConnection.java diff --git a/java/flink/src/main/java/com/logicalclocks/hsfs/flink/StreamFeatureGroup.java b/hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/StreamFeatureGroup.java similarity index 100% rename from java/flink/src/main/java/com/logicalclocks/hsfs/flink/StreamFeatureGroup.java rename to hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/StreamFeatureGroup.java diff --git a/java/flink/src/main/java/com/logicalclocks/hsfs/flink/constructor/FsQuery.java b/hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/constructor/FsQuery.java similarity index 100% rename from java/flink/src/main/java/com/logicalclocks/hsfs/flink/constructor/FsQuery.java rename to hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/constructor/FsQuery.java diff --git a/java/flink/src/main/java/com/logicalclocks/hsfs/flink/constructor/Query.java b/hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/constructor/Query.java similarity index 100% rename from java/flink/src/main/java/com/logicalclocks/hsfs/flink/constructor/Query.java rename to hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/constructor/Query.java diff --git a/java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/FeatureGroupEngine.java b/hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/FeatureGroupEngine.java similarity index 100% rename from java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/FeatureGroupEngine.java rename to hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/FeatureGroupEngine.java diff --git a/java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/FeatureViewEngine.java b/hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/FeatureViewEngine.java similarity index 100% rename from java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/FeatureViewEngine.java rename to hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/FeatureViewEngine.java diff --git a/java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/FlinkEngine.java b/hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/FlinkEngine.java similarity index 100% rename from java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/FlinkEngine.java rename to hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/FlinkEngine.java diff --git a/java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/KafkaRecordSerializer.java b/hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/KafkaRecordSerializer.java similarity index 100% rename from java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/KafkaRecordSerializer.java rename to hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/KafkaRecordSerializer.java diff --git a/java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/PojoToAvroRecord.java b/hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/PojoToAvroRecord.java similarity index 100% rename from java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/PojoToAvroRecord.java rename to hsfs/java/flink/src/main/java/com/logicalclocks/hsfs/flink/engine/PojoToAvroRecord.java diff --git a/java/hsfs/pom.xml b/hsfs/java/hsfs/pom.xml similarity index 100% rename from java/hsfs/pom.xml rename to hsfs/java/hsfs/pom.xml diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/DataFormat.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/DataFormat.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/DataFormat.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/DataFormat.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/DeltaStreamerJobConf.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/DeltaStreamerJobConf.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/DeltaStreamerJobConf.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/DeltaStreamerJobConf.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/EntityEndpointType.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/EntityEndpointType.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/EntityEndpointType.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/EntityEndpointType.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/ExternalDataFormat.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/ExternalDataFormat.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/ExternalDataFormat.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/ExternalDataFormat.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/Feature.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/Feature.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/Feature.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/Feature.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureGroupBase.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureGroupBase.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureGroupBase.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureGroupBase.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureGroupBaseForApi.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureGroupBaseForApi.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureGroupBaseForApi.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureGroupBaseForApi.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureGroupCommit.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureGroupCommit.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureGroupCommit.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureGroupCommit.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureStoreBase.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureStoreBase.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureStoreBase.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureStoreBase.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureStoreException.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureStoreException.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureStoreException.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureStoreException.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureType.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureType.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureType.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureType.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureViewBase.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureViewBase.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureViewBase.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureViewBase.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/HopsworksConnectionBase.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/HopsworksConnectionBase.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/HopsworksConnectionBase.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/HopsworksConnectionBase.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/HudiOperationType.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/HudiOperationType.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/HudiOperationType.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/HudiOperationType.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/JobConfiguration.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/JobConfiguration.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/JobConfiguration.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/JobConfiguration.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/Project.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/Project.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/Project.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/Project.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/SecretStore.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/SecretStore.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/SecretStore.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/SecretStore.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/SecurityProtocol.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/SecurityProtocol.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/SecurityProtocol.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/SecurityProtocol.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/Split.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/Split.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/Split.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/Split.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/SslEndpointIdentificationAlgorithm.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/SslEndpointIdentificationAlgorithm.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/SslEndpointIdentificationAlgorithm.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/SslEndpointIdentificationAlgorithm.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/StatisticsConfig.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/StatisticsConfig.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/StatisticsConfig.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/StatisticsConfig.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/Storage.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/Storage.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/Storage.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/Storage.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/StorageConnector.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/StorageConnector.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/StorageConnector.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/StorageConnector.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/StorageConnectorType.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/StorageConnectorType.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/StorageConnectorType.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/StorageConnectorType.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/TimeTravelFormat.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/TimeTravelFormat.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/TimeTravelFormat.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/TimeTravelFormat.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/TrainingDatasetBase.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/TrainingDatasetBase.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/TrainingDatasetBase.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/TrainingDatasetBase.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/TrainingDatasetFeature.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/TrainingDatasetFeature.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/TrainingDatasetFeature.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/TrainingDatasetFeature.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/TrainingDatasetType.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/TrainingDatasetType.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/TrainingDatasetType.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/TrainingDatasetType.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/TransformationFunction.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/TransformationFunction.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/TransformationFunction.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/TransformationFunction.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/FeatureGroupAlias.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/FeatureGroupAlias.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/FeatureGroupAlias.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/FeatureGroupAlias.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/Filter.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/Filter.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/Filter.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/Filter.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/FilterLogic.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/FilterLogic.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/FilterLogic.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/FilterLogic.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/FsQueryBase.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/FsQueryBase.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/FsQueryBase.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/FsQueryBase.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/Join.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/Join.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/Join.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/Join.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/JoinType.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/JoinType.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/JoinType.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/JoinType.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/PreparedStatementParameter.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/PreparedStatementParameter.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/PreparedStatementParameter.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/PreparedStatementParameter.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/QueryBase.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/QueryBase.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/QueryBase.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/QueryBase.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/ServingPreparedStatement.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/ServingPreparedStatement.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/ServingPreparedStatement.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/ServingPreparedStatement.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/SqlFilterCondition.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/SqlFilterCondition.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/SqlFilterCondition.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/SqlFilterCondition.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/SqlFilterLogic.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/SqlFilterLogic.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/SqlFilterLogic.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/constructor/SqlFilterLogic.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/CodeEngine.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/CodeEngine.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/CodeEngine.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/CodeEngine.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/EngineBase.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/EngineBase.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/EngineBase.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/EngineBase.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/FeatureGroupEngineBase.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/FeatureGroupEngineBase.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/FeatureGroupEngineBase.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/FeatureGroupEngineBase.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/FeatureGroupUtils.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/FeatureGroupUtils.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/FeatureGroupUtils.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/FeatureGroupUtils.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/FeatureViewEngineBase.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/FeatureViewEngineBase.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/FeatureViewEngineBase.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/FeatureViewEngineBase.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/VectorServer.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/VectorServer.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/VectorServer.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/engine/VectorServer.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/AuthorizationHandler.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/AuthorizationHandler.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/AuthorizationHandler.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/AuthorizationHandler.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Code.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Code.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Code.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Code.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/CodeApi.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/CodeApi.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/CodeApi.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/CodeApi.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Credentials.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Credentials.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Credentials.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Credentials.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/DatasetApi.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/DatasetApi.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/DatasetApi.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/DatasetApi.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureDescriptiveStatistics.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureDescriptiveStatistics.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureDescriptiveStatistics.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureDescriptiveStatistics.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureGroupApi.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureGroupApi.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureGroupApi.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureGroupApi.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureStoreApi.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureStoreApi.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureStoreApi.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureStoreApi.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureViewApi.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureViewApi.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureViewApi.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/FeatureViewApi.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksClient.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksClient.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksClient.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksClient.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksExternalClient.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksExternalClient.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksExternalClient.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksExternalClient.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksHostnameVerifier.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksHostnameVerifier.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksHostnameVerifier.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksHostnameVerifier.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksHttpClient.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksHttpClient.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksHttpClient.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksHttpClient.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksInternalClient.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksInternalClient.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksInternalClient.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/HopsworksInternalClient.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/InternalException.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/InternalException.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/InternalException.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/InternalException.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/KafkaApi.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/KafkaApi.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/KafkaApi.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/KafkaApi.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/KafkaClusterInfo.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/KafkaClusterInfo.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/KafkaClusterInfo.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/KafkaClusterInfo.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/OnDemandOptions.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/OnDemandOptions.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/OnDemandOptions.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/OnDemandOptions.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Option.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Option.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Option.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Option.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/ProjectApi.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/ProjectApi.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/ProjectApi.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/ProjectApi.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/QueryConstructorApi.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/QueryConstructorApi.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/QueryConstructorApi.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/QueryConstructorApi.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/RestDto.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/RestDto.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/RestDto.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/RestDto.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/SplitStatistics.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/SplitStatistics.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/SplitStatistics.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/SplitStatistics.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Statistics.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Statistics.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Statistics.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Statistics.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/StatisticsApi.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/StatisticsApi.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/StatisticsApi.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/StatisticsApi.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/StorageConnectorApi.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/StorageConnectorApi.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/StorageConnectorApi.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/StorageConnectorApi.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Subject.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Subject.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Subject.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Subject.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Tags.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Tags.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Tags.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Tags.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/TagsApi.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/TagsApi.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/TagsApi.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/TagsApi.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/TrainingDatasetApi.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/TrainingDatasetApi.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/TrainingDatasetApi.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/TrainingDatasetApi.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/TransformationFunctionAttached.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/TransformationFunctionAttached.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/TransformationFunctionAttached.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/TransformationFunctionAttached.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/UnauthorizedException.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/UnauthorizedException.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/UnauthorizedException.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/UnauthorizedException.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/User.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/User.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/User.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/User.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Variable.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Variable.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Variable.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/Variable.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/VariablesApi.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/VariablesApi.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/VariablesApi.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/metadata/VariablesApi.java diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/util/Constants.java b/hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/util/Constants.java similarity index 100% rename from java/hsfs/src/main/java/com/logicalclocks/hsfs/util/Constants.java rename to hsfs/java/hsfs/src/main/java/com/logicalclocks/hsfs/util/Constants.java diff --git a/java/hsfs/src/test/java/com/logicalclocks/hsfs/TestFeatureGroupBaseForApi.java b/hsfs/java/hsfs/src/test/java/com/logicalclocks/hsfs/TestFeatureGroupBaseForApi.java similarity index 100% rename from java/hsfs/src/test/java/com/logicalclocks/hsfs/TestFeatureGroupBaseForApi.java rename to hsfs/java/hsfs/src/test/java/com/logicalclocks/hsfs/TestFeatureGroupBaseForApi.java diff --git a/java/hsfs/src/test/java/com/logicalclocks/hsfs/TestHopsworksExternalClient.java b/hsfs/java/hsfs/src/test/java/com/logicalclocks/hsfs/TestHopsworksExternalClient.java similarity index 100% rename from java/hsfs/src/test/java/com/logicalclocks/hsfs/TestHopsworksExternalClient.java rename to hsfs/java/hsfs/src/test/java/com/logicalclocks/hsfs/TestHopsworksExternalClient.java diff --git a/java/hsfs/src/test/java/com/logicalclocks/hsfs/engine/TestFeatureGroupUtils.java b/hsfs/java/hsfs/src/test/java/com/logicalclocks/hsfs/engine/TestFeatureGroupUtils.java similarity index 100% rename from java/hsfs/src/test/java/com/logicalclocks/hsfs/engine/TestFeatureGroupUtils.java rename to hsfs/java/hsfs/src/test/java/com/logicalclocks/hsfs/engine/TestFeatureGroupUtils.java diff --git a/java/hsfs/src/test/java/com/logicalclocks/hsfs/metadata/TestHopsworksClient.java b/hsfs/java/hsfs/src/test/java/com/logicalclocks/hsfs/metadata/TestHopsworksClient.java similarity index 100% rename from java/hsfs/src/test/java/com/logicalclocks/hsfs/metadata/TestHopsworksClient.java rename to hsfs/java/hsfs/src/test/java/com/logicalclocks/hsfs/metadata/TestHopsworksClient.java diff --git a/java/hsfs/src/test/java/com/logicalclocks/hsfs/metadata/TestTagsApi.java b/hsfs/java/hsfs/src/test/java/com/logicalclocks/hsfs/metadata/TestTagsApi.java similarity index 100% rename from java/hsfs/src/test/java/com/logicalclocks/hsfs/metadata/TestTagsApi.java rename to hsfs/java/hsfs/src/test/java/com/logicalclocks/hsfs/metadata/TestTagsApi.java diff --git a/hsfs/java/pom.xml b/hsfs/java/pom.xml new file mode 100644 index 000000000..23136cb24 --- /dev/null +++ b/hsfs/java/pom.xml @@ -0,0 +1,308 @@ + + + 4.0.0 + + com.logicalclocks + hsfs-parent + pom + 4.0.0-SNAPSHOT + + hsfs + spark + flink + beam + + + + 1.8 + 1.8 + 14.0.1 + 4.5.6 + 4.4.13 + 1.7.30 + 1.2.17 + 2.1.8 + 1.18.10 + 2.10.0 + 1.1.0.6-SNAPSHOT + 0.12.3.0 + 2.10.40 + 2.12.10 + 2.12 + 0.0.5 + 20231013 + 0.12.2 + 5.9.1 + 2.22.0 + 4.3.1 + 1.8.2 + + spark3.1 + + UTF-8 + ${project.basedir}/delombok + + + + + org.projectlombok + lombok + ${lombok.version} + + + + com.damnhandy + handy-uri-templates + ${handy.version} + + + + com.google.guava + guava + ${guava.version} + provided + + + + org.apache.httpcomponents + httpclient + ${httpclient.version} + provided + + + + org.apache.httpcomponents + httpcore + ${httpcore.version} + provided + + + + org.slf4j + slf4j-api + ${slf4j.version} + provided + + + + org.slf4j + slf4j-log4j12 + ${slf4j.version} + provided + + + + log4j + log4j + ${log4j.version} + provided + + + + org.json + json + ${json.version} + + + + io.specto + hoverfly-java + ${hoverfly.version} + test + + + + org.junit.jupiter + junit-jupiter-api + ${junit.version} + test + + + + org.junit.jupiter + junit-jupiter-engine + ${junit.version} + test + + + + org.mockito + mockito-core + ${mockito.version} + test + + + + + + + org.scala-tools + maven-scala-plugin + + ${scala.version} + + + + scala-compile-first + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 2.4.1 + + + + jar-with-dependencies + + + + + make-assembly + + package + + single + + + + + + org.apache.maven.plugins + maven-checkstyle-plugin + 3.1.1 + + + validate + validate + + check + + + + + src/main/resources/checkstyle.xml + src/main/resources/suppressions.xml + true + true + true + true + + src/main/java + + + + + org.apache.maven.plugins + maven-surefire-plugin + ${surefire-plugin.version} + + + + hadoop.home.dir + ${project.basedir}/src/test/resources/hadoop/ + + + src/test/resources/system.properties + + + + org.projectlombok + lombok-maven-plugin + ${lombok.version}.0 + + ${project.basedir}/src/main/java + ${delombok.output} + false + + + + + delombok + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.5.0 + + + + **/MainClass.java + + **/beam/constructor/* + **/flink/constructor/* + + + + + aggregate + + aggregate + + site + + + + + + + + + + + src/test/resources + + + + + + + spark-3.3 + + 2.0.4.0-spark-3.3 + spark3.3 + + + + + + + Hops + Hops Repo + https://archiva.hops.works/repository/Hops/ + + true + + + true + + + + + + + Hops + Hops Repo + https://archiva.hops.works/repository/Hops/ + + + diff --git a/java/spark/pom.xml b/hsfs/java/spark/pom.xml similarity index 100% rename from java/spark/pom.xml rename to hsfs/java/spark/pom.xml diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/ExternalFeatureGroup.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/ExternalFeatureGroup.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/ExternalFeatureGroup.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/ExternalFeatureGroup.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureGroup.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureGroup.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureGroup.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureGroup.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureStore.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureStore.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureStore.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureStore.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureView.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureView.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureView.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureView.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/HopsworksConnection.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/HopsworksConnection.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/HopsworksConnection.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/HopsworksConnection.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/MainClass.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/MainClass.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/MainClass.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/MainClass.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/StreamFeatureGroup.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/StreamFeatureGroup.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/StreamFeatureGroup.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/StreamFeatureGroup.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/TrainingDataset.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/TrainingDataset.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/TrainingDataset.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/TrainingDataset.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/TrainingDatasetBundle.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/TrainingDatasetBundle.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/TrainingDatasetBundle.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/TrainingDatasetBundle.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/constructor/FsQuery.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/constructor/FsQuery.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/constructor/FsQuery.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/constructor/FsQuery.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/constructor/Query.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/constructor/Query.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/constructor/Query.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/constructor/Query.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/FeatureGroupEngine.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/FeatureGroupEngine.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/FeatureGroupEngine.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/FeatureGroupEngine.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/FeatureViewEngine.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/FeatureViewEngine.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/FeatureViewEngine.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/FeatureViewEngine.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/SparkEngine.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/SparkEngine.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/SparkEngine.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/SparkEngine.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/StatisticsEngine.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/StatisticsEngine.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/StatisticsEngine.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/StatisticsEngine.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/TrainingDatasetEngine.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/TrainingDatasetEngine.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/TrainingDatasetEngine.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/TrainingDatasetEngine.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/TrainingDatasetUtils.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/TrainingDatasetUtils.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/TrainingDatasetUtils.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/TrainingDatasetUtils.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerAvroDeserializer.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerAvroDeserializer.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerAvroDeserializer.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerAvroDeserializer.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerConfig.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerConfig.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerConfig.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerConfig.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerKafkaSource.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerKafkaSource.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerKafkaSource.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerKafkaSource.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerSchemaProvider.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerSchemaProvider.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerSchemaProvider.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerSchemaProvider.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerTransformer.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerTransformer.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerTransformer.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/DeltaStreamerTransformer.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/HudiEngine.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/HudiEngine.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/HudiEngine.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/hudi/HudiEngine.java diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/util/StorageConnectorUtils.java b/hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/util/StorageConnectorUtils.java similarity index 100% rename from java/spark/src/main/java/com/logicalclocks/hsfs/spark/util/StorageConnectorUtils.java rename to hsfs/java/spark/src/main/java/com/logicalclocks/hsfs/spark/util/StorageConnectorUtils.java diff --git a/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestExternalFeatureGroup.java b/hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestExternalFeatureGroup.java similarity index 100% rename from java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestExternalFeatureGroup.java rename to hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestExternalFeatureGroup.java diff --git a/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeature.java b/hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeature.java similarity index 100% rename from java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeature.java rename to hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeature.java diff --git a/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeatureGroup.java b/hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeatureGroup.java similarity index 100% rename from java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeatureGroup.java rename to hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeatureGroup.java diff --git a/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeatureView.java b/hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeatureView.java similarity index 100% rename from java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeatureView.java rename to hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeatureView.java diff --git a/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestStorageConnector.java b/hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestStorageConnector.java similarity index 100% rename from java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestStorageConnector.java rename to hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestStorageConnector.java diff --git a/java/spark/src/test/java/com/logicalclocks/hsfs/spark/constructor/TestQuery.java b/hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/constructor/TestQuery.java similarity index 100% rename from java/spark/src/test/java/com/logicalclocks/hsfs/spark/constructor/TestQuery.java rename to hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/constructor/TestQuery.java diff --git a/java/spark/src/test/java/com/logicalclocks/hsfs/spark/engine/TestFeatureViewEngine.java b/hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/engine/TestFeatureViewEngine.java similarity index 100% rename from java/spark/src/test/java/com/logicalclocks/hsfs/spark/engine/TestFeatureViewEngine.java rename to hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/engine/TestFeatureViewEngine.java diff --git a/java/spark/src/test/java/com/logicalclocks/hsfs/spark/engine/TestHudiEngine.java b/hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/engine/TestHudiEngine.java similarity index 100% rename from java/spark/src/test/java/com/logicalclocks/hsfs/spark/engine/TestHudiEngine.java rename to hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/engine/TestHudiEngine.java diff --git a/java/spark/src/test/java/com/logicalclocks/hsfs/spark/engine/TestSparkEngine.java b/hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/engine/TestSparkEngine.java similarity index 100% rename from java/spark/src/test/java/com/logicalclocks/hsfs/spark/engine/TestSparkEngine.java rename to hsfs/java/spark/src/test/java/com/logicalclocks/hsfs/spark/engine/TestSparkEngine.java diff --git a/java/spark/src/test/resources/hadoop/bin/winutils.exe b/hsfs/java/spark/src/test/resources/hadoop/bin/winutils.exe similarity index 100% rename from java/spark/src/test/resources/hadoop/bin/winutils.exe rename to hsfs/java/spark/src/test/resources/hadoop/bin/winutils.exe diff --git a/java/spark/src/test/resources/system.properties b/hsfs/java/spark/src/test/resources/system.properties similarity index 100% rename from java/spark/src/test/resources/system.properties rename to hsfs/java/spark/src/test/resources/system.properties diff --git a/java/src/main/resources/checkstyle.xml b/hsfs/java/src/main/resources/checkstyle.xml similarity index 100% rename from java/src/main/resources/checkstyle.xml rename to hsfs/java/src/main/resources/checkstyle.xml diff --git a/java/src/main/resources/suppressions.xml b/hsfs/java/src/main/resources/suppressions.xml similarity index 100% rename from java/src/main/resources/suppressions.xml rename to hsfs/java/src/main/resources/suppressions.xml diff --git a/java/src/test/resources/hadoop/bin/winutils.exe b/hsfs/java/src/test/resources/hadoop/bin/winutils.exe similarity index 100% rename from java/src/test/resources/hadoop/bin/winutils.exe rename to hsfs/java/src/test/resources/hadoop/bin/winutils.exe diff --git a/java/src/test/resources/system.properties b/hsfs/java/src/test/resources/system.properties similarity index 100% rename from java/src/test/resources/system.properties rename to hsfs/java/src/test/resources/system.properties diff --git a/locust_benchmark/Dockerfile b/hsfs/locust_benchmark/Dockerfile similarity index 100% rename from locust_benchmark/Dockerfile rename to hsfs/locust_benchmark/Dockerfile diff --git a/locust_benchmark/README.md b/hsfs/locust_benchmark/README.md similarity index 100% rename from locust_benchmark/README.md rename to hsfs/locust_benchmark/README.md diff --git a/locust_benchmark/common/__init__.py b/hsfs/locust_benchmark/common/__init__.py similarity index 100% rename from locust_benchmark/common/__init__.py rename to hsfs/locust_benchmark/common/__init__.py diff --git a/locust_benchmark/common/hopsworks_client.py b/hsfs/locust_benchmark/common/hopsworks_client.py similarity index 100% rename from locust_benchmark/common/hopsworks_client.py rename to hsfs/locust_benchmark/common/hopsworks_client.py diff --git a/locust_benchmark/common/stop_watch.py b/hsfs/locust_benchmark/common/stop_watch.py similarity index 100% rename from locust_benchmark/common/stop_watch.py rename to hsfs/locust_benchmark/common/stop_watch.py diff --git a/locust_benchmark/create_feature_group.py b/hsfs/locust_benchmark/create_feature_group.py similarity index 100% rename from locust_benchmark/create_feature_group.py rename to hsfs/locust_benchmark/create_feature_group.py diff --git a/locust_benchmark/docker-compose.yml b/hsfs/locust_benchmark/docker-compose.yml similarity index 100% rename from locust_benchmark/docker-compose.yml rename to hsfs/locust_benchmark/docker-compose.yml diff --git a/locust_benchmark/hopsworks_config.json b/hsfs/locust_benchmark/hopsworks_config.json similarity index 100% rename from locust_benchmark/hopsworks_config.json rename to hsfs/locust_benchmark/hopsworks_config.json diff --git a/locust_benchmark/locustfile.py b/hsfs/locust_benchmark/locustfile.py similarity index 100% rename from locust_benchmark/locustfile.py rename to hsfs/locust_benchmark/locustfile.py diff --git a/locust_benchmark/requirements.txt b/hsfs/locust_benchmark/requirements.txt similarity index 100% rename from locust_benchmark/requirements.txt rename to hsfs/locust_benchmark/requirements.txt diff --git a/hsfs/mkdocs.yml b/hsfs/mkdocs.yml new file mode 100644 index 000000000..21fb704e1 --- /dev/null +++ b/hsfs/mkdocs.yml @@ -0,0 +1,130 @@ +site_name: "Hopsworks Documentation" +site_description: "Official documentation for Hopsworks and its Feature Store - an open source data-intensive AI platform used for the development and operation of machine learning models at scale." +site_author: "Logical Clocks" +site_url: "https://docs.hopsworks.ai/feature-store-api/latest" + +# Repository +repo_name: logicalclocks/hopsworks +repo_url: https://github.com/logicalclocks/hopsworks +edit_uri: "" + +nav: + - Home: https://docs.hopsworks.ai/ + - Getting Started ↗: https://docs.hopsworks.ai/ + - Tutorials: https://docs.hopsworks.ai/ + - Concepts: https://docs.hopsworks.ai/ + - Guides: https://docs.hopsworks.ai/ + - Setup and Installation: https://docs.hopsworks.ai/ + - Administration: https://docs.hopsworks.ai/ + - API: + - Feature Store API Reference: + - Connection: generated/api/connection_api.md + - ExpectationSuite: generated/api/expectation_suite_api.md + - FeatureStore: generated/api/feature_store_api.md + - FeatureGroup: generated/api/feature_group_api.md + - ExternalFeatureGroup: generated/api/external_feature_group_api.md + - SpineGroup: generated/api/spine_group_api.md + - FeatureView: generated/api/feature_view_api.md + - TrainingDataset: generated/api/training_dataset_api.md + - Storage Connector: generated/api/storage_connector_api.md + - Feature: generated/api/feature_api.md + - Query: generated/api/query_api.md + - Transformation Functions: generated/api/transformation_functions_api.md + - ValidationReport: generated/api/validation_report_api.md + - Job: generated/api/job.md + - Provenance Links: generated/api/links.md + - Statistics: + - Statistics: generated/api/statistics_api.md + - Split Statistics: generated/api/split_statistics_api.md + - Feature descriptive statistics: generated/api/feature_descriptive_statistics_api.md + - Feature Monitoring: + - Configuration: generated/api/feature_monitoring_config_api.md + - Result: generated/api/feature_monitoring_result_api.md + - Window: generated/api/feature_monitoring_window_config_api.md + - Embedding: + - EmbeddingIndex: generated/api/embedding_index_api.md + - EmbeddingFeature: generated/api/embedding_feature_api.md + - SimilarityFunctionType: generated/api/similarity_function_type_api.md + # Added to allow navigation using the side drawer + - Hopsworks API: https://docs.hopsworks.ai/hopsworks-api/latest/ + - MLOps API: https://docs.hopsworks.ai/machine-learning-api/latest/ + - Feature Store JavaDoc: https://docs.hopsworks.ai/feature-store-javadoc/latest/ + - Contributing: CONTRIBUTING.md + - Community ↗: https://community.hopsworks.ai/ + +theme: + name: material + custom_dir: docs/overrides + favicon: assets/images/favicon.ico + logo: assets/images/hops-logo.png + icon: + repo: fontawesome/brands/github + font: + text: "Roboto" + code: "IBM Plex Mono" + palette: + accent: teal + scheme: hopsworks + features: + - navigation.tabs + - navigation.tabs.sticky + - navigation.sections + - navigation.indexes + +extra: + analytics: + provider: google + property: G-64FEEXPSDN + generator: false + version: + - provider: mike + - default: latest + social: + - icon: fontawesome/brands/twitter + link: https://twitter.com/logicalclocks + - icon: fontawesome/brands/github + link: https://github.com/logicalclocks/hopsworks + - icon: fontawesome/brands/discourse + link: https://community.hopsworks.ai/ + - icon: fontawesome/brands/linkedin + link: https://www.linkedin.com/company/logicalclocks/ + +extra_css: + - css/custom.css + - css/version-select.css + - css/dropdown.css + - css/marctech.css + +extra_javascript: + - js/version-select.js + - js/inject-api-links.js + - js/dropdown.js + +plugins: + - search + - minify: + minify_html: true + minify_css: true + minify_js: true + - mike: + canonical_version: latest + +markdown_extensions: + - admonition + - codehilite + - footnotes + - pymdownx.tabbed: + alternate_style: true + - pymdownx.arithmatex + - pymdownx.superfences + - pymdownx.details + - pymdownx.caret + - pymdownx.mark + - pymdownx.tilde + - pymdownx.critic + - toc: + permalink: "#" + - pymdownx.tasklist: + custom_checkbox: true + - markdown_include.include: + base_path: docs diff --git a/hsfs/python/.pre-commit-config.yaml b/hsfs/python/.pre-commit-config.yaml new file mode 100644 index 000000000..98e886d9d --- /dev/null +++ b/hsfs/python/.pre-commit-config.yaml @@ -0,0 +1,8 @@ +exclude: setup.py +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.5.0 + hooks: + - id: ruff + args: [--fix] + - id: ruff-format \ No newline at end of file diff --git a/python/hsfs/__init__.py b/hsfs/python/hsfs/__init__.py similarity index 100% rename from python/hsfs/__init__.py rename to hsfs/python/hsfs/__init__.py diff --git a/python/hsfs/builtin_transformations.py b/hsfs/python/hsfs/builtin_transformations.py similarity index 100% rename from python/hsfs/builtin_transformations.py rename to hsfs/python/hsfs/builtin_transformations.py diff --git a/python/hsfs/client/__init__.py b/hsfs/python/hsfs/client/__init__.py similarity index 100% rename from python/hsfs/client/__init__.py rename to hsfs/python/hsfs/client/__init__.py diff --git a/python/hsfs/client/auth.py b/hsfs/python/hsfs/client/auth.py similarity index 100% rename from python/hsfs/client/auth.py rename to hsfs/python/hsfs/client/auth.py diff --git a/python/hsfs/client/base.py b/hsfs/python/hsfs/client/base.py similarity index 100% rename from python/hsfs/client/base.py rename to hsfs/python/hsfs/client/base.py diff --git a/python/hsfs/client/exceptions.py b/hsfs/python/hsfs/client/exceptions.py similarity index 100% rename from python/hsfs/client/exceptions.py rename to hsfs/python/hsfs/client/exceptions.py diff --git a/python/hsfs/client/external.py b/hsfs/python/hsfs/client/external.py similarity index 100% rename from python/hsfs/client/external.py rename to hsfs/python/hsfs/client/external.py diff --git a/python/hsfs/client/hopsworks.py b/hsfs/python/hsfs/client/hopsworks.py similarity index 100% rename from python/hsfs/client/hopsworks.py rename to hsfs/python/hsfs/client/hopsworks.py diff --git a/python/hsfs/client/online_store_rest_client.py b/hsfs/python/hsfs/client/online_store_rest_client.py similarity index 100% rename from python/hsfs/client/online_store_rest_client.py rename to hsfs/python/hsfs/client/online_store_rest_client.py diff --git a/python/hsfs/code.py b/hsfs/python/hsfs/code.py similarity index 100% rename from python/hsfs/code.py rename to hsfs/python/hsfs/code.py diff --git a/python/hsfs/connection.py b/hsfs/python/hsfs/connection.py similarity index 100% rename from python/hsfs/connection.py rename to hsfs/python/hsfs/connection.py diff --git a/python/hsfs/constructor/__init__.py b/hsfs/python/hsfs/constructor/__init__.py similarity index 100% rename from python/hsfs/constructor/__init__.py rename to hsfs/python/hsfs/constructor/__init__.py diff --git a/python/hsfs/constructor/external_feature_group_alias.py b/hsfs/python/hsfs/constructor/external_feature_group_alias.py similarity index 100% rename from python/hsfs/constructor/external_feature_group_alias.py rename to hsfs/python/hsfs/constructor/external_feature_group_alias.py diff --git a/python/hsfs/constructor/filter.py b/hsfs/python/hsfs/constructor/filter.py similarity index 100% rename from python/hsfs/constructor/filter.py rename to hsfs/python/hsfs/constructor/filter.py diff --git a/python/hsfs/constructor/fs_query.py b/hsfs/python/hsfs/constructor/fs_query.py similarity index 100% rename from python/hsfs/constructor/fs_query.py rename to hsfs/python/hsfs/constructor/fs_query.py diff --git a/python/hsfs/constructor/hudi_feature_group_alias.py b/hsfs/python/hsfs/constructor/hudi_feature_group_alias.py similarity index 100% rename from python/hsfs/constructor/hudi_feature_group_alias.py rename to hsfs/python/hsfs/constructor/hudi_feature_group_alias.py diff --git a/python/hsfs/constructor/join.py b/hsfs/python/hsfs/constructor/join.py similarity index 100% rename from python/hsfs/constructor/join.py rename to hsfs/python/hsfs/constructor/join.py diff --git a/python/hsfs/constructor/prepared_statement_parameter.py b/hsfs/python/hsfs/constructor/prepared_statement_parameter.py similarity index 100% rename from python/hsfs/constructor/prepared_statement_parameter.py rename to hsfs/python/hsfs/constructor/prepared_statement_parameter.py diff --git a/python/hsfs/constructor/query.py b/hsfs/python/hsfs/constructor/query.py similarity index 100% rename from python/hsfs/constructor/query.py rename to hsfs/python/hsfs/constructor/query.py diff --git a/python/hsfs/constructor/serving_prepared_statement.py b/hsfs/python/hsfs/constructor/serving_prepared_statement.py similarity index 100% rename from python/hsfs/constructor/serving_prepared_statement.py rename to hsfs/python/hsfs/constructor/serving_prepared_statement.py diff --git a/python/hsfs/core/__init__.py b/hsfs/python/hsfs/core/__init__.py similarity index 100% rename from python/hsfs/core/__init__.py rename to hsfs/python/hsfs/core/__init__.py diff --git a/python/hsfs/core/arrow_flight_client.py b/hsfs/python/hsfs/core/arrow_flight_client.py similarity index 100% rename from python/hsfs/core/arrow_flight_client.py rename to hsfs/python/hsfs/core/arrow_flight_client.py diff --git a/python/hsfs/core/code_api.py b/hsfs/python/hsfs/core/code_api.py similarity index 100% rename from python/hsfs/core/code_api.py rename to hsfs/python/hsfs/core/code_api.py diff --git a/python/hsfs/core/code_engine.py b/hsfs/python/hsfs/core/code_engine.py similarity index 100% rename from python/hsfs/core/code_engine.py rename to hsfs/python/hsfs/core/code_engine.py diff --git a/python/hsfs/core/constants.py b/hsfs/python/hsfs/core/constants.py similarity index 100% rename from python/hsfs/core/constants.py rename to hsfs/python/hsfs/core/constants.py diff --git a/python/hsfs/core/dataset_api.py b/hsfs/python/hsfs/core/dataset_api.py similarity index 100% rename from python/hsfs/core/dataset_api.py rename to hsfs/python/hsfs/core/dataset_api.py diff --git a/python/hsfs/core/delta_engine.py b/hsfs/python/hsfs/core/delta_engine.py similarity index 100% rename from python/hsfs/core/delta_engine.py rename to hsfs/python/hsfs/core/delta_engine.py diff --git a/python/hsfs/core/deltastreamer_jobconf.py b/hsfs/python/hsfs/core/deltastreamer_jobconf.py similarity index 100% rename from python/hsfs/core/deltastreamer_jobconf.py rename to hsfs/python/hsfs/core/deltastreamer_jobconf.py diff --git a/python/hsfs/core/execution.py b/hsfs/python/hsfs/core/execution.py similarity index 100% rename from python/hsfs/core/execution.py rename to hsfs/python/hsfs/core/execution.py diff --git a/python/hsfs/core/expectation_api.py b/hsfs/python/hsfs/core/expectation_api.py similarity index 100% rename from python/hsfs/core/expectation_api.py rename to hsfs/python/hsfs/core/expectation_api.py diff --git a/python/hsfs/core/expectation_engine.py b/hsfs/python/hsfs/core/expectation_engine.py similarity index 100% rename from python/hsfs/core/expectation_engine.py rename to hsfs/python/hsfs/core/expectation_engine.py diff --git a/python/hsfs/core/expectation_suite_api.py b/hsfs/python/hsfs/core/expectation_suite_api.py similarity index 100% rename from python/hsfs/core/expectation_suite_api.py rename to hsfs/python/hsfs/core/expectation_suite_api.py diff --git a/python/hsfs/core/expectation_suite_engine.py b/hsfs/python/hsfs/core/expectation_suite_engine.py similarity index 100% rename from python/hsfs/core/expectation_suite_engine.py rename to hsfs/python/hsfs/core/expectation_suite_engine.py diff --git a/python/hsfs/core/explicit_provenance.py b/hsfs/python/hsfs/core/explicit_provenance.py similarity index 100% rename from python/hsfs/core/explicit_provenance.py rename to hsfs/python/hsfs/core/explicit_provenance.py diff --git a/python/hsfs/core/external_feature_group_engine.py b/hsfs/python/hsfs/core/external_feature_group_engine.py similarity index 100% rename from python/hsfs/core/external_feature_group_engine.py rename to hsfs/python/hsfs/core/external_feature_group_engine.py diff --git a/python/hsfs/core/feature_descriptive_statistics.py b/hsfs/python/hsfs/core/feature_descriptive_statistics.py similarity index 100% rename from python/hsfs/core/feature_descriptive_statistics.py rename to hsfs/python/hsfs/core/feature_descriptive_statistics.py diff --git a/python/hsfs/core/feature_group_api.py b/hsfs/python/hsfs/core/feature_group_api.py similarity index 100% rename from python/hsfs/core/feature_group_api.py rename to hsfs/python/hsfs/core/feature_group_api.py diff --git a/python/hsfs/core/feature_group_base_engine.py b/hsfs/python/hsfs/core/feature_group_base_engine.py similarity index 100% rename from python/hsfs/core/feature_group_base_engine.py rename to hsfs/python/hsfs/core/feature_group_base_engine.py diff --git a/python/hsfs/core/feature_group_engine.py b/hsfs/python/hsfs/core/feature_group_engine.py similarity index 100% rename from python/hsfs/core/feature_group_engine.py rename to hsfs/python/hsfs/core/feature_group_engine.py diff --git a/python/hsfs/core/feature_logging.py b/hsfs/python/hsfs/core/feature_logging.py similarity index 100% rename from python/hsfs/core/feature_logging.py rename to hsfs/python/hsfs/core/feature_logging.py diff --git a/python/hsfs/core/feature_monitoring_config.py b/hsfs/python/hsfs/core/feature_monitoring_config.py similarity index 100% rename from python/hsfs/core/feature_monitoring_config.py rename to hsfs/python/hsfs/core/feature_monitoring_config.py diff --git a/python/hsfs/core/feature_monitoring_config_api.py b/hsfs/python/hsfs/core/feature_monitoring_config_api.py similarity index 100% rename from python/hsfs/core/feature_monitoring_config_api.py rename to hsfs/python/hsfs/core/feature_monitoring_config_api.py diff --git a/python/hsfs/core/feature_monitoring_config_engine.py b/hsfs/python/hsfs/core/feature_monitoring_config_engine.py similarity index 100% rename from python/hsfs/core/feature_monitoring_config_engine.py rename to hsfs/python/hsfs/core/feature_monitoring_config_engine.py diff --git a/python/hsfs/core/feature_monitoring_result.py b/hsfs/python/hsfs/core/feature_monitoring_result.py similarity index 100% rename from python/hsfs/core/feature_monitoring_result.py rename to hsfs/python/hsfs/core/feature_monitoring_result.py diff --git a/python/hsfs/core/feature_monitoring_result_api.py b/hsfs/python/hsfs/core/feature_monitoring_result_api.py similarity index 100% rename from python/hsfs/core/feature_monitoring_result_api.py rename to hsfs/python/hsfs/core/feature_monitoring_result_api.py diff --git a/python/hsfs/core/feature_monitoring_result_engine.py b/hsfs/python/hsfs/core/feature_monitoring_result_engine.py similarity index 100% rename from python/hsfs/core/feature_monitoring_result_engine.py rename to hsfs/python/hsfs/core/feature_monitoring_result_engine.py diff --git a/python/hsfs/core/feature_store_api.py b/hsfs/python/hsfs/core/feature_store_api.py similarity index 86% rename from python/hsfs/core/feature_store_api.py rename to hsfs/python/hsfs/core/feature_store_api.py index 2b77d6aca..377a3f934 100644 --- a/python/hsfs/core/feature_store_api.py +++ b/hsfs/python/hsfs/core/feature_store_api.py @@ -17,12 +17,12 @@ from typing import Union -import hsfs.feature_store from hsfs import client +from hsfs.feature_store import FeatureStore class FeatureStoreApi: - def get(self, identifier: Union[int, str]) -> hsfs.feature_store.FeatureStore: + def get(self, identifier: Union[int, str]) -> FeatureStore: """Get feature store with specific id or name. :param identifier: id or name of the feature store @@ -32,6 +32,6 @@ def get(self, identifier: Union[int, str]) -> hsfs.feature_store.FeatureStore: """ _client = client.get_instance() path_params = ["project", _client._project_id, "featurestores", identifier] - return hsfs.feature_store.FeatureStore.from_response_json( + return FeatureStore.from_response_json( _client._send_request("GET", path_params) ) diff --git a/python/hsfs/core/feature_view_api.py b/hsfs/python/hsfs/core/feature_view_api.py similarity index 100% rename from python/hsfs/core/feature_view_api.py rename to hsfs/python/hsfs/core/feature_view_api.py diff --git a/python/hsfs/core/feature_view_engine.py b/hsfs/python/hsfs/core/feature_view_engine.py similarity index 100% rename from python/hsfs/core/feature_view_engine.py rename to hsfs/python/hsfs/core/feature_view_engine.py diff --git a/python/hsfs/core/great_expectation_engine.py b/hsfs/python/hsfs/core/great_expectation_engine.py similarity index 100% rename from python/hsfs/core/great_expectation_engine.py rename to hsfs/python/hsfs/core/great_expectation_engine.py diff --git a/python/hsfs/core/hosts_api.py b/hsfs/python/hsfs/core/hosts_api.py similarity index 100% rename from python/hsfs/core/hosts_api.py rename to hsfs/python/hsfs/core/hosts_api.py diff --git a/python/hsfs/core/hudi_engine.py b/hsfs/python/hsfs/core/hudi_engine.py similarity index 100% rename from python/hsfs/core/hudi_engine.py rename to hsfs/python/hsfs/core/hudi_engine.py diff --git a/python/hsfs/core/ingestion_job.py b/hsfs/python/hsfs/core/ingestion_job.py similarity index 100% rename from python/hsfs/core/ingestion_job.py rename to hsfs/python/hsfs/core/ingestion_job.py diff --git a/python/hsfs/core/ingestion_job_conf.py b/hsfs/python/hsfs/core/ingestion_job_conf.py similarity index 100% rename from python/hsfs/core/ingestion_job_conf.py rename to hsfs/python/hsfs/core/ingestion_job_conf.py diff --git a/python/hsfs/core/inode.py b/hsfs/python/hsfs/core/inode.py similarity index 100% rename from python/hsfs/core/inode.py rename to hsfs/python/hsfs/core/inode.py diff --git a/python/hsfs/core/job.py b/hsfs/python/hsfs/core/job.py similarity index 100% rename from python/hsfs/core/job.py rename to hsfs/python/hsfs/core/job.py diff --git a/python/hsfs/core/job_api.py b/hsfs/python/hsfs/core/job_api.py similarity index 100% rename from python/hsfs/core/job_api.py rename to hsfs/python/hsfs/core/job_api.py diff --git a/python/hsfs/core/job_configuration.py b/hsfs/python/hsfs/core/job_configuration.py similarity index 100% rename from python/hsfs/core/job_configuration.py rename to hsfs/python/hsfs/core/job_configuration.py diff --git a/python/hsfs/core/job_schedule.py b/hsfs/python/hsfs/core/job_schedule.py similarity index 100% rename from python/hsfs/core/job_schedule.py rename to hsfs/python/hsfs/core/job_schedule.py diff --git a/python/hsfs/core/kafka_api.py b/hsfs/python/hsfs/core/kafka_api.py similarity index 100% rename from python/hsfs/core/kafka_api.py rename to hsfs/python/hsfs/core/kafka_api.py diff --git a/python/hsfs/core/kafka_engine.py b/hsfs/python/hsfs/core/kafka_engine.py similarity index 100% rename from python/hsfs/core/kafka_engine.py rename to hsfs/python/hsfs/core/kafka_engine.py diff --git a/python/hsfs/core/monitoring_window_config.py b/hsfs/python/hsfs/core/monitoring_window_config.py similarity index 100% rename from python/hsfs/core/monitoring_window_config.py rename to hsfs/python/hsfs/core/monitoring_window_config.py diff --git a/python/hsfs/core/monitoring_window_config_engine.py b/hsfs/python/hsfs/core/monitoring_window_config_engine.py similarity index 100% rename from python/hsfs/core/monitoring_window_config_engine.py rename to hsfs/python/hsfs/core/monitoring_window_config_engine.py diff --git a/python/hsfs/core/online_store_rest_client_api.py b/hsfs/python/hsfs/core/online_store_rest_client_api.py similarity index 100% rename from python/hsfs/core/online_store_rest_client_api.py rename to hsfs/python/hsfs/core/online_store_rest_client_api.py diff --git a/python/hsfs/core/online_store_rest_client_engine.py b/hsfs/python/hsfs/core/online_store_rest_client_engine.py similarity index 100% rename from python/hsfs/core/online_store_rest_client_engine.py rename to hsfs/python/hsfs/core/online_store_rest_client_engine.py diff --git a/python/hsfs/core/online_store_sql_engine.py b/hsfs/python/hsfs/core/online_store_sql_engine.py similarity index 100% rename from python/hsfs/core/online_store_sql_engine.py rename to hsfs/python/hsfs/core/online_store_sql_engine.py diff --git a/python/hsfs/core/opensearch.py b/hsfs/python/hsfs/core/opensearch.py similarity index 100% rename from python/hsfs/core/opensearch.py rename to hsfs/python/hsfs/core/opensearch.py diff --git a/python/hsfs/core/opensearch_api.py b/hsfs/python/hsfs/core/opensearch_api.py similarity index 100% rename from python/hsfs/core/opensearch_api.py rename to hsfs/python/hsfs/core/opensearch_api.py diff --git a/python/hsfs/core/project_api.py b/hsfs/python/hsfs/core/project_api.py similarity index 100% rename from python/hsfs/core/project_api.py rename to hsfs/python/hsfs/core/project_api.py diff --git a/python/hsfs/core/query_constructor_api.py b/hsfs/python/hsfs/core/query_constructor_api.py similarity index 100% rename from python/hsfs/core/query_constructor_api.py rename to hsfs/python/hsfs/core/query_constructor_api.py diff --git a/python/hsfs/core/services_api.py b/hsfs/python/hsfs/core/services_api.py similarity index 100% rename from python/hsfs/core/services_api.py rename to hsfs/python/hsfs/core/services_api.py diff --git a/python/hsfs/core/spine_group_engine.py b/hsfs/python/hsfs/core/spine_group_engine.py similarity index 100% rename from python/hsfs/core/spine_group_engine.py rename to hsfs/python/hsfs/core/spine_group_engine.py diff --git a/python/hsfs/core/statistics_api.py b/hsfs/python/hsfs/core/statistics_api.py similarity index 100% rename from python/hsfs/core/statistics_api.py rename to hsfs/python/hsfs/core/statistics_api.py diff --git a/python/hsfs/core/statistics_engine.py b/hsfs/python/hsfs/core/statistics_engine.py similarity index 100% rename from python/hsfs/core/statistics_engine.py rename to hsfs/python/hsfs/core/statistics_engine.py diff --git a/python/hsfs/core/storage_connector_api.py b/hsfs/python/hsfs/core/storage_connector_api.py similarity index 100% rename from python/hsfs/core/storage_connector_api.py rename to hsfs/python/hsfs/core/storage_connector_api.py diff --git a/python/hsfs/core/tags_api.py b/hsfs/python/hsfs/core/tags_api.py similarity index 100% rename from python/hsfs/core/tags_api.py rename to hsfs/python/hsfs/core/tags_api.py diff --git a/python/hsfs/core/training_dataset_api.py b/hsfs/python/hsfs/core/training_dataset_api.py similarity index 100% rename from python/hsfs/core/training_dataset_api.py rename to hsfs/python/hsfs/core/training_dataset_api.py diff --git a/python/hsfs/core/training_dataset_engine.py b/hsfs/python/hsfs/core/training_dataset_engine.py similarity index 100% rename from python/hsfs/core/training_dataset_engine.py rename to hsfs/python/hsfs/core/training_dataset_engine.py diff --git a/python/hsfs/core/training_dataset_job_conf.py b/hsfs/python/hsfs/core/training_dataset_job_conf.py similarity index 100% rename from python/hsfs/core/training_dataset_job_conf.py rename to hsfs/python/hsfs/core/training_dataset_job_conf.py diff --git a/python/hsfs/core/transformation_function_api.py b/hsfs/python/hsfs/core/transformation_function_api.py similarity index 100% rename from python/hsfs/core/transformation_function_api.py rename to hsfs/python/hsfs/core/transformation_function_api.py diff --git a/python/hsfs/core/transformation_function_engine.py b/hsfs/python/hsfs/core/transformation_function_engine.py similarity index 100% rename from python/hsfs/core/transformation_function_engine.py rename to hsfs/python/hsfs/core/transformation_function_engine.py diff --git a/python/hsfs/core/util_sql.py b/hsfs/python/hsfs/core/util_sql.py similarity index 100% rename from python/hsfs/core/util_sql.py rename to hsfs/python/hsfs/core/util_sql.py diff --git a/python/hsfs/core/validation_report_api.py b/hsfs/python/hsfs/core/validation_report_api.py similarity index 100% rename from python/hsfs/core/validation_report_api.py rename to hsfs/python/hsfs/core/validation_report_api.py diff --git a/python/hsfs/core/validation_report_engine.py b/hsfs/python/hsfs/core/validation_report_engine.py similarity index 100% rename from python/hsfs/core/validation_report_engine.py rename to hsfs/python/hsfs/core/validation_report_engine.py diff --git a/python/hsfs/core/validation_result_api.py b/hsfs/python/hsfs/core/validation_result_api.py similarity index 100% rename from python/hsfs/core/validation_result_api.py rename to hsfs/python/hsfs/core/validation_result_api.py diff --git a/python/hsfs/core/validation_result_engine.py b/hsfs/python/hsfs/core/validation_result_engine.py similarity index 100% rename from python/hsfs/core/validation_result_engine.py rename to hsfs/python/hsfs/core/validation_result_engine.py diff --git a/python/hsfs/core/variable_api.py b/hsfs/python/hsfs/core/variable_api.py similarity index 100% rename from python/hsfs/core/variable_api.py rename to hsfs/python/hsfs/core/variable_api.py diff --git a/python/hsfs/core/vector_db_client.py b/hsfs/python/hsfs/core/vector_db_client.py similarity index 100% rename from python/hsfs/core/vector_db_client.py rename to hsfs/python/hsfs/core/vector_db_client.py diff --git a/python/hsfs/core/vector_server.py b/hsfs/python/hsfs/core/vector_server.py similarity index 100% rename from python/hsfs/core/vector_server.py rename to hsfs/python/hsfs/core/vector_server.py diff --git a/python/hsfs/decorators.py b/hsfs/python/hsfs/decorators.py similarity index 100% rename from python/hsfs/decorators.py rename to hsfs/python/hsfs/decorators.py diff --git a/python/hsfs/embedding.py b/hsfs/python/hsfs/embedding.py similarity index 100% rename from python/hsfs/embedding.py rename to hsfs/python/hsfs/embedding.py diff --git a/python/hsfs/engine/__init__.py b/hsfs/python/hsfs/engine/__init__.py similarity index 100% rename from python/hsfs/engine/__init__.py rename to hsfs/python/hsfs/engine/__init__.py diff --git a/python/hsfs/engine/python.py b/hsfs/python/hsfs/engine/python.py similarity index 100% rename from python/hsfs/engine/python.py rename to hsfs/python/hsfs/engine/python.py diff --git a/python/hsfs/engine/spark.py b/hsfs/python/hsfs/engine/spark.py similarity index 100% rename from python/hsfs/engine/spark.py rename to hsfs/python/hsfs/engine/spark.py diff --git a/python/hsfs/engine/spark_no_metastore.py b/hsfs/python/hsfs/engine/spark_no_metastore.py similarity index 100% rename from python/hsfs/engine/spark_no_metastore.py rename to hsfs/python/hsfs/engine/spark_no_metastore.py diff --git a/python/hsfs/expectation_suite.py b/hsfs/python/hsfs/expectation_suite.py similarity index 100% rename from python/hsfs/expectation_suite.py rename to hsfs/python/hsfs/expectation_suite.py diff --git a/python/hsfs/feature.py b/hsfs/python/hsfs/feature.py similarity index 100% rename from python/hsfs/feature.py rename to hsfs/python/hsfs/feature.py diff --git a/python/hsfs/feature_group.py b/hsfs/python/hsfs/feature_group.py similarity index 98% rename from python/hsfs/feature_group.py rename to hsfs/python/hsfs/feature_group.py index 409ae6ecd..bbd92c2f1 100644 --- a/python/hsfs/feature_group.py +++ b/hsfs/python/hsfs/feature_group.py @@ -39,7 +39,6 @@ import avro.schema import confluent_kafka -import hsfs.expectation_suite import humps import numpy as np import pandas as pd @@ -90,6 +89,7 @@ # if great_expectations is not installed, we will default to using native Hopsworks class as return values from hsfs.decorators import typechecked, uses_great_expectations from hsfs.embedding import EmbeddingIndex +from hsfs.expectation_suite import ExpectationSuite from hsfs.ge_validation_result import ValidationResult from hsfs.statistics import Statistics from hsfs.statistics_config import StatisticsConfig @@ -117,7 +117,7 @@ def __init__( embedding_index: Optional[EmbeddingIndex] = None, expectation_suite: Optional[ Union[ - hsfs.expectation_suite.ExpectationSuite, + ExpectationSuite, great_expectations.core.ExpectationSuite, Dict[str, Any], ] @@ -911,11 +911,7 @@ def append_features( def get_expectation_suite( self, ge_type: bool = HAS_GREAT_EXPECTATIONS - ) -> Union[ - hsfs.expectation_suite.ExpectationSuite, - great_expectations.core.ExpectationSuite, - None, - ]: + ) -> Union[ExpectationSuite, great_expectations.core.ExpectationSuite, None]: """Return the expectation suite attached to the feature group if it exists. !!! example @@ -953,16 +949,12 @@ def get_expectation_suite( def save_expectation_suite( self, expectation_suite: Union[ - hsfs.expectation_suite.ExpectationSuite, - great_expectations.core.ExpectationSuite, + ExpectationSuite, great_expectations.core.ExpectationSuite ], run_validation: bool = True, validation_ingestion_policy: Literal["always", "strict"] = "always", overwrite: bool = False, - ) -> Union[ - hsfs.expectation_suite.ExpectationSuite, - great_expectations.core.ExpectationSuite, - ]: + ) -> Union[ExpectationSuite, great_expectations.core.ExpectationSuite]: """Attach an expectation suite to a feature group and saves it for future use. If an expectation suite is already attached, it is replaced. Note that the provided expectation suite is modified inplace to include expectationId fields. @@ -993,22 +985,18 @@ def save_expectation_suite( if HAS_GREAT_EXPECTATIONS and isinstance( expectation_suite, great_expectations.core.ExpectationSuite ): - tmp_expectation_suite = ( - hsfs.expectation_suite.ExpectationSuite.from_ge_type( - ge_expectation_suite=expectation_suite, - run_validation=run_validation, - validation_ingestion_policy=validation_ingestion_policy, - feature_store_id=self._feature_store_id, - feature_group_id=self._id, - ) + tmp_expectation_suite = ExpectationSuite.from_ge_type( + ge_expectation_suite=expectation_suite, + run_validation=run_validation, + validation_ingestion_policy=validation_ingestion_policy, + feature_store_id=self._feature_store_id, + feature_group_id=self._id, ) - elif isinstance(expectation_suite, hsfs.expectation_suite.ExpectationSuite): + elif isinstance(expectation_suite, ExpectationSuite): tmp_expectation_suite = expectation_suite.to_json_dict(decamelize=True) tmp_expectation_suite["feature_group_id"] = self._id tmp_expectation_suite["feature_store_id"] = self._feature_store_id - tmp_expectation_suite = hsfs.expectation_suite.ExpectationSuite( - **tmp_expectation_suite - ) + tmp_expectation_suite = ExpectationSuite(**tmp_expectation_suite) else: raise TypeError( "The provided expectation suite type `{}` is not supported. Use Great Expectation `ExpectationSuite` or HSFS' own `ExpectationSuite` object.".format( @@ -1255,7 +1243,7 @@ def validate( dataframe: Optional[ Union[pd.DataFrame, TypeVar("pyspark.sql.DataFrame")] # noqa: F821 ] = None, - expectation_suite: Optional[hsfs.expectation_suite.ExpectationSuite] = None, + expectation_suite: Optional[ExpectationSuite] = None, save_report: Optional[bool] = False, validation_options: Optional[Dict[str, Any]] = None, ingestion_result: Literal[ @@ -1871,7 +1859,7 @@ def location(self) -> Optional[str]: @property def expectation_suite( self, - ) -> Optional[hsfs.expectation_suite.ExpectationSuite]: + ) -> Optional[ExpectationSuite]: """Expectation Suite configuration object defining the settings for data validation of the feature group.""" return self._expectation_suite @@ -1880,24 +1868,22 @@ def expectation_suite( def expectation_suite( self, expectation_suite: Union[ - hsfs.expectation_suite.ExpectationSuite, + ExpectationSuite, great_expectations.core.ExpectationSuite, Dict[str, Any], None, ], ) -> None: - if isinstance(expectation_suite, hsfs.expectation_suite.ExpectationSuite): + if isinstance(expectation_suite, ExpectationSuite): tmp_expectation_suite = expectation_suite.to_json_dict(decamelize=True) tmp_expectation_suite["feature_group_id"] = self._id tmp_expectation_suite["feature_store_id"] = self._feature_store_id - self._expectation_suite = hsfs.expectation_suite.ExpectationSuite( - **tmp_expectation_suite - ) + self._expectation_suite = ExpectationSuite(**tmp_expectation_suite) elif HAS_GREAT_EXPECTATIONS and isinstance( expectation_suite, great_expectations.core.expectation_suite.ExpectationSuite, ): - self._expectation_suite = hsfs.expectation_suite.ExpectationSuite( + self._expectation_suite = ExpectationSuite( **expectation_suite.to_json_dict(), feature_store_id=self._feature_store_id, feature_group_id=self._id, @@ -1906,9 +1892,7 @@ def expectation_suite( tmp_expectation_suite = expectation_suite.copy() tmp_expectation_suite["feature_store_id"] = self._feature_store_id tmp_expectation_suite["feature_group_id"] = self._id - self._expectation_suite = hsfs.expectation_suite.ExpectationSuite( - **tmp_expectation_suite - ) + self._expectation_suite = ExpectationSuite(**tmp_expectation_suite) elif expectation_suite is None: self._expectation_suite = None else: @@ -2093,7 +2077,7 @@ def __init__( expectation_suite: Optional[ Union[ great_expectations.core.ExpectationSuite, - hsfs.expectation_suite.ExpectationSuite, + ExpectationSuite, Dict[str, Any], ] ] = None, @@ -3509,7 +3493,7 @@ def __init__( event_time: Optional[str] = None, expectation_suite: Optional[ Union[ - hsfs.expectation_suite.ExpectationSuite, + ExpectationSuite, great_expectations.core.ExpectationSuite, Dict[str, Any], ] @@ -4049,10 +4033,7 @@ def __init__( statistics_config: Optional[StatisticsConfig] = None, event_time: Optional[str] = None, expectation_suite: Optional[ - Union[ - hsfs.expectation_suite.ExpectationSuite, - great_expectations.core.ExpectationSuite, - ] + Union[ExpectationSuite, great_expectations.core.ExpectationSuite] ] = None, online_enabled: bool = False, href: Optional[str] = None, diff --git a/python/hsfs/feature_group_commit.py b/hsfs/python/hsfs/feature_group_commit.py similarity index 100% rename from python/hsfs/feature_group_commit.py rename to hsfs/python/hsfs/feature_group_commit.py diff --git a/python/hsfs/feature_group_writer.py b/hsfs/python/hsfs/feature_group_writer.py similarity index 100% rename from python/hsfs/feature_group_writer.py rename to hsfs/python/hsfs/feature_group_writer.py diff --git a/python/hsfs/feature_store.py b/hsfs/python/hsfs/feature_store.py similarity index 100% rename from python/hsfs/feature_store.py rename to hsfs/python/hsfs/feature_store.py diff --git a/python/hsfs/feature_view.py b/hsfs/python/hsfs/feature_view.py similarity index 100% rename from python/hsfs/feature_view.py rename to hsfs/python/hsfs/feature_view.py diff --git a/python/hsfs/ge_expectation.py b/hsfs/python/hsfs/ge_expectation.py similarity index 100% rename from python/hsfs/ge_expectation.py rename to hsfs/python/hsfs/ge_expectation.py diff --git a/python/hsfs/ge_validation_result.py b/hsfs/python/hsfs/ge_validation_result.py similarity index 100% rename from python/hsfs/ge_validation_result.py rename to hsfs/python/hsfs/ge_validation_result.py diff --git a/python/hsfs/hopsworks_udf.py b/hsfs/python/hsfs/hopsworks_udf.py similarity index 100% rename from python/hsfs/hopsworks_udf.py rename to hsfs/python/hsfs/hopsworks_udf.py diff --git a/python/hsfs/serving_key.py b/hsfs/python/hsfs/serving_key.py similarity index 100% rename from python/hsfs/serving_key.py rename to hsfs/python/hsfs/serving_key.py diff --git a/python/hsfs/split_statistics.py b/hsfs/python/hsfs/split_statistics.py similarity index 100% rename from python/hsfs/split_statistics.py rename to hsfs/python/hsfs/split_statistics.py diff --git a/python/hsfs/statistics.py b/hsfs/python/hsfs/statistics.py similarity index 100% rename from python/hsfs/statistics.py rename to hsfs/python/hsfs/statistics.py diff --git a/python/hsfs/statistics_config.py b/hsfs/python/hsfs/statistics_config.py similarity index 100% rename from python/hsfs/statistics_config.py rename to hsfs/python/hsfs/statistics_config.py diff --git a/python/hsfs/storage_connector.py b/hsfs/python/hsfs/storage_connector.py similarity index 100% rename from python/hsfs/storage_connector.py rename to hsfs/python/hsfs/storage_connector.py diff --git a/python/hsfs/tag.py b/hsfs/python/hsfs/tag.py similarity index 100% rename from python/hsfs/tag.py rename to hsfs/python/hsfs/tag.py diff --git a/python/hsfs/training_dataset.py b/hsfs/python/hsfs/training_dataset.py similarity index 100% rename from python/hsfs/training_dataset.py rename to hsfs/python/hsfs/training_dataset.py diff --git a/python/hsfs/training_dataset_feature.py b/hsfs/python/hsfs/training_dataset_feature.py similarity index 100% rename from python/hsfs/training_dataset_feature.py rename to hsfs/python/hsfs/training_dataset_feature.py diff --git a/python/hsfs/training_dataset_split.py b/hsfs/python/hsfs/training_dataset_split.py similarity index 100% rename from python/hsfs/training_dataset_split.py rename to hsfs/python/hsfs/training_dataset_split.py diff --git a/python/hsfs/transformation_function.py b/hsfs/python/hsfs/transformation_function.py similarity index 100% rename from python/hsfs/transformation_function.py rename to hsfs/python/hsfs/transformation_function.py diff --git a/python/hsfs/transformation_statistics.py b/hsfs/python/hsfs/transformation_statistics.py similarity index 100% rename from python/hsfs/transformation_statistics.py rename to hsfs/python/hsfs/transformation_statistics.py diff --git a/python/hsfs/usage.py b/hsfs/python/hsfs/usage.py similarity index 100% rename from python/hsfs/usage.py rename to hsfs/python/hsfs/usage.py diff --git a/python/hsfs/user.py b/hsfs/python/hsfs/user.py similarity index 100% rename from python/hsfs/user.py rename to hsfs/python/hsfs/user.py diff --git a/python/hsfs/util.py b/hsfs/python/hsfs/util.py similarity index 100% rename from python/hsfs/util.py rename to hsfs/python/hsfs/util.py diff --git a/python/hsfs/validation_report.py b/hsfs/python/hsfs/validation_report.py similarity index 100% rename from python/hsfs/validation_report.py rename to hsfs/python/hsfs/validation_report.py diff --git a/python/hsfs/version.py b/hsfs/python/hsfs/version.py similarity index 100% rename from python/hsfs/version.py rename to hsfs/python/hsfs/version.py diff --git a/hsfs/python/pyproject.toml b/hsfs/python/pyproject.toml new file mode 100644 index 000000000..4869bf25b --- /dev/null +++ b/hsfs/python/pyproject.toml @@ -0,0 +1,173 @@ +[project] +name = "hsfs" +dynamic = ["version"] +requires-python = ">=3.8,<3.13" +readme = "README.md" +description = "HSFS Python SDK to interact with Hopsworks Feature Store" +keywords = [ + "Hopsworks", + "Feature Store", + "hsfs", + "Spark", + "Machine Learning", + "MLOps", + "DataOps", +] +authors = [{ name = "Hopsworks AB", email = "robin@hopsworks.ai" }] +license = { text = "Apache-2.0" } + +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Topic :: Utilities", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Intended Audience :: Developers", +] + +dependencies = [ + "pyhumps==1.6.1", + "requests", + "furl", + "boto3", + "pandas<2.2.0", + "numpy<2", + "pyjks", + "mock", + "avro==1.11.3", + "sqlalchemy", + "PyMySQL[rsa]", + "tzlocal", + "fsspec", + "retrying", + "hopsworks_aiomysql[sa]==0.2.1", + "polars>=0.20.18,<=0.21.0", + "opensearch-py>=1.1.0,<=2.4.2", +] + +[project.optional-dependencies] +python = [ + "pyarrow>=10.0", + "confluent-kafka<=2.3.0", + "fastavro>=1.4.11,<=1.8.4", + "tqdm", +] +great-expectations = ["great_expectations==0.18.12"] +dev-no-opt = [ + "pytest==7.4.4", + "pytest-mock==3.12.0", + "ruff", + "pyspark==3.1.1", + "moto[s3]==5.0.0", + "typeguard==4.2.1", +] +dev-pandas1 = [ + "pytest==7.4.4", + "pytest-mock==3.12.0", + "ruff", + "pyspark==3.1.1", + "moto[s3]==5.0.0", + "pandas<=1.5.3", + "sqlalchemy<=1.4.48", +] +dev = ["hsfs[dev-no-opt]", "hsfs[great-expectations]"] + + +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +exclude = ["tests*"] +include = ["../Readme.md", "../LICENSE", "hsfs", "hsfs.*"] + +[tool.setuptools.dynamic] +version = { attr = "hsfs.version.__version__" } + +[project.urls] +Documentation = "https://docs.hopsworks.ai/latest" +Repository = "https://github.com/logicalclocks/feature-store-api" +Homepage = "https://www.hopsworks.ai" +Community = "https://community.hopsworks.ai" + + +[tool.ruff] +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", + "java" +] + +# Same as Black. +line-length = 88 +indent-width = 4 + +# Assume Python 3.8+ syntax. +target-version = "py38" + +[tool.ruff.lint] +# 1. Enable flake8-bugbear (`B`) rules, in addition to the defaults. +select = ["E4", "E7", "E9", "F", "B", "I", "W"] #, "ANN"] +ignore = [ + "B905", # zip has no strict kwarg until Python 3.10 + "ANN101", # Missing type annotation for self in method + "ANN102", # Missing type annotation for cls in classmethod + "ANN003", # Missing type annotation for **kwarg in function + "ANN002", # Missing type annotation for *args in function + "ANN401", # Allow Any in type annotations + "W505", # Doc line too long +] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.lint.isort] +lines-after-imports = 2 +known-third-party = ["hopsworks", "hsfs", "hsml"] + + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" diff --git a/hsfs/python/setup.py b/hsfs/python/setup.py new file mode 100644 index 000000000..b024da80e --- /dev/null +++ b/hsfs/python/setup.py @@ -0,0 +1,4 @@ +from setuptools import setup + + +setup() diff --git a/hsfs/python/tests/__init__.py b/hsfs/python/tests/__init__.py new file mode 100644 index 000000000..11e65f162 --- /dev/null +++ b/hsfs/python/tests/__init__.py @@ -0,0 +1,15 @@ +# +# Copyright 2020 Logical Clocks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/tests/client/test_base_client.py b/hsfs/python/tests/client/test_base_client.py similarity index 99% rename from python/tests/client/test_base_client.py rename to hsfs/python/tests/client/test_base_client.py index 7717f6bbe..b90b3b6f1 100644 --- a/python/tests/client/test_base_client.py +++ b/hsfs/python/tests/client/test_base_client.py @@ -20,7 +20,6 @@ import requests from hsfs.client.base import Client from hsfs.client.exceptions import RestAPIError - from tests.util import changes_environ diff --git a/python/tests/conftest.py b/hsfs/python/tests/conftest.py similarity index 93% rename from python/tests/conftest.py rename to hsfs/python/tests/conftest.py index 52132b0bb..5d20f6318 100644 --- a/python/tests/conftest.py +++ b/hsfs/python/tests/conftest.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2022 Hopsworks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,7 +20,6 @@ pytest_plugins = [ "tests.fixtures.backend_fixtures", - "tests.fixtures.model_fixtures", "tests.fixtures.dataframe_fixtures", ] diff --git a/python/tests/constructor/test_external_feature_group_alias.py b/hsfs/python/tests/constructor/test_external_feature_group_alias.py similarity index 100% rename from python/tests/constructor/test_external_feature_group_alias.py rename to hsfs/python/tests/constructor/test_external_feature_group_alias.py diff --git a/python/tests/constructor/test_filter.py b/hsfs/python/tests/constructor/test_filter.py similarity index 100% rename from python/tests/constructor/test_filter.py rename to hsfs/python/tests/constructor/test_filter.py diff --git a/python/tests/constructor/test_fs_query.py b/hsfs/python/tests/constructor/test_fs_query.py similarity index 100% rename from python/tests/constructor/test_fs_query.py rename to hsfs/python/tests/constructor/test_fs_query.py diff --git a/python/tests/constructor/test_hudi_feature_group_alias.py b/hsfs/python/tests/constructor/test_hudi_feature_group_alias.py similarity index 100% rename from python/tests/constructor/test_hudi_feature_group_alias.py rename to hsfs/python/tests/constructor/test_hudi_feature_group_alias.py diff --git a/python/tests/constructor/test_join.py b/hsfs/python/tests/constructor/test_join.py similarity index 100% rename from python/tests/constructor/test_join.py rename to hsfs/python/tests/constructor/test_join.py diff --git a/python/tests/constructor/test_prepared_statement_parameter.py b/hsfs/python/tests/constructor/test_prepared_statement_parameter.py similarity index 100% rename from python/tests/constructor/test_prepared_statement_parameter.py rename to hsfs/python/tests/constructor/test_prepared_statement_parameter.py diff --git a/python/tests/constructor/test_query.py b/hsfs/python/tests/constructor/test_query.py similarity index 100% rename from python/tests/constructor/test_query.py rename to hsfs/python/tests/constructor/test_query.py diff --git a/python/tests/constructor/test_serving_prepared_statement.py b/hsfs/python/tests/constructor/test_serving_prepared_statement.py similarity index 100% rename from python/tests/constructor/test_serving_prepared_statement.py rename to hsfs/python/tests/constructor/test_serving_prepared_statement.py diff --git a/python/tests/core/__init__.py b/hsfs/python/tests/core/__init__.py similarity index 100% rename from python/tests/core/__init__.py rename to hsfs/python/tests/core/__init__.py diff --git a/python/tests/core/test_arrow_flight_client.py b/hsfs/python/tests/core/test_arrow_flight_client.py similarity index 100% rename from python/tests/core/test_arrow_flight_client.py rename to hsfs/python/tests/core/test_arrow_flight_client.py diff --git a/python/tests/core/test_code_engine.py b/hsfs/python/tests/core/test_code_engine.py similarity index 100% rename from python/tests/core/test_code_engine.py rename to hsfs/python/tests/core/test_code_engine.py diff --git a/python/tests/core/test_execution.py b/hsfs/python/tests/core/test_execution.py similarity index 100% rename from python/tests/core/test_execution.py rename to hsfs/python/tests/core/test_execution.py diff --git a/python/tests/core/test_expectation_engine.py b/hsfs/python/tests/core/test_expectation_engine.py similarity index 100% rename from python/tests/core/test_expectation_engine.py rename to hsfs/python/tests/core/test_expectation_engine.py diff --git a/python/tests/core/test_expectation_suite_engine.py b/hsfs/python/tests/core/test_expectation_suite_engine.py similarity index 100% rename from python/tests/core/test_expectation_suite_engine.py rename to hsfs/python/tests/core/test_expectation_suite_engine.py diff --git a/python/tests/core/test_external_feature_group_engine.py b/hsfs/python/tests/core/test_external_feature_group_engine.py similarity index 100% rename from python/tests/core/test_external_feature_group_engine.py rename to hsfs/python/tests/core/test_external_feature_group_engine.py diff --git a/python/tests/core/test_feature_descriptive_statistics.py b/hsfs/python/tests/core/test_feature_descriptive_statistics.py similarity index 100% rename from python/tests/core/test_feature_descriptive_statistics.py rename to hsfs/python/tests/core/test_feature_descriptive_statistics.py diff --git a/python/tests/core/test_feature_group_api.py b/hsfs/python/tests/core/test_feature_group_api.py similarity index 100% rename from python/tests/core/test_feature_group_api.py rename to hsfs/python/tests/core/test_feature_group_api.py diff --git a/python/tests/core/test_feature_group_base_engine.py b/hsfs/python/tests/core/test_feature_group_base_engine.py similarity index 100% rename from python/tests/core/test_feature_group_base_engine.py rename to hsfs/python/tests/core/test_feature_group_base_engine.py diff --git a/python/tests/core/test_feature_group_engine.py b/hsfs/python/tests/core/test_feature_group_engine.py similarity index 100% rename from python/tests/core/test_feature_group_engine.py rename to hsfs/python/tests/core/test_feature_group_engine.py diff --git a/python/tests/core/test_feature_monitoring_config.py b/hsfs/python/tests/core/test_feature_monitoring_config.py similarity index 100% rename from python/tests/core/test_feature_monitoring_config.py rename to hsfs/python/tests/core/test_feature_monitoring_config.py diff --git a/python/tests/core/test_feature_monitoring_config_engine.py b/hsfs/python/tests/core/test_feature_monitoring_config_engine.py similarity index 100% rename from python/tests/core/test_feature_monitoring_config_engine.py rename to hsfs/python/tests/core/test_feature_monitoring_config_engine.py diff --git a/python/tests/core/test_feature_monitoring_result.py b/hsfs/python/tests/core/test_feature_monitoring_result.py similarity index 100% rename from python/tests/core/test_feature_monitoring_result.py rename to hsfs/python/tests/core/test_feature_monitoring_result.py diff --git a/python/tests/core/test_feature_monitoring_result_engine.py b/hsfs/python/tests/core/test_feature_monitoring_result_engine.py similarity index 100% rename from python/tests/core/test_feature_monitoring_result_engine.py rename to hsfs/python/tests/core/test_feature_monitoring_result_engine.py diff --git a/python/tests/core/test_feature_view_engine.py b/hsfs/python/tests/core/test_feature_view_engine.py similarity index 100% rename from python/tests/core/test_feature_view_engine.py rename to hsfs/python/tests/core/test_feature_view_engine.py diff --git a/python/tests/core/test_great_expectation_engine.py b/hsfs/python/tests/core/test_great_expectation_engine.py similarity index 100% rename from python/tests/core/test_great_expectation_engine.py rename to hsfs/python/tests/core/test_great_expectation_engine.py diff --git a/python/tests/core/test_hudi_engine.py b/hsfs/python/tests/core/test_hudi_engine.py similarity index 100% rename from python/tests/core/test_hudi_engine.py rename to hsfs/python/tests/core/test_hudi_engine.py diff --git a/python/tests/core/test_ingestion_job.py b/hsfs/python/tests/core/test_ingestion_job.py similarity index 100% rename from python/tests/core/test_ingestion_job.py rename to hsfs/python/tests/core/test_ingestion_job.py diff --git a/python/tests/core/test_inode.py b/hsfs/python/tests/core/test_inode.py similarity index 100% rename from python/tests/core/test_inode.py rename to hsfs/python/tests/core/test_inode.py diff --git a/python/tests/core/test_job.py b/hsfs/python/tests/core/test_job.py similarity index 100% rename from python/tests/core/test_job.py rename to hsfs/python/tests/core/test_job.py diff --git a/python/tests/core/test_job_configuration.py b/hsfs/python/tests/core/test_job_configuration.py similarity index 100% rename from python/tests/core/test_job_configuration.py rename to hsfs/python/tests/core/test_job_configuration.py diff --git a/python/tests/core/test_kafka_engine.py b/hsfs/python/tests/core/test_kafka_engine.py similarity index 100% rename from python/tests/core/test_kafka_engine.py rename to hsfs/python/tests/core/test_kafka_engine.py diff --git a/python/tests/core/test_monitoring_window_config.py b/hsfs/python/tests/core/test_monitoring_window_config.py similarity index 100% rename from python/tests/core/test_monitoring_window_config.py rename to hsfs/python/tests/core/test_monitoring_window_config.py diff --git a/python/tests/core/test_monitoring_window_config_engine.py b/hsfs/python/tests/core/test_monitoring_window_config_engine.py similarity index 100% rename from python/tests/core/test_monitoring_window_config_engine.py rename to hsfs/python/tests/core/test_monitoring_window_config_engine.py diff --git a/python/tests/core/test_online_store_rest_client.py b/hsfs/python/tests/core/test_online_store_rest_client.py similarity index 100% rename from python/tests/core/test_online_store_rest_client.py rename to hsfs/python/tests/core/test_online_store_rest_client.py diff --git a/python/tests/core/test_online_store_rest_client_api.py b/hsfs/python/tests/core/test_online_store_rest_client_api.py similarity index 100% rename from python/tests/core/test_online_store_rest_client_api.py rename to hsfs/python/tests/core/test_online_store_rest_client_api.py diff --git a/python/tests/core/test_online_store_rest_client_engine.py b/hsfs/python/tests/core/test_online_store_rest_client_engine.py similarity index 100% rename from python/tests/core/test_online_store_rest_client_engine.py rename to hsfs/python/tests/core/test_online_store_rest_client_engine.py diff --git a/python/tests/core/test_opensearch.py b/hsfs/python/tests/core/test_opensearch.py similarity index 100% rename from python/tests/core/test_opensearch.py rename to hsfs/python/tests/core/test_opensearch.py diff --git a/python/tests/core/test_statistics_engine.py b/hsfs/python/tests/core/test_statistics_engine.py similarity index 100% rename from python/tests/core/test_statistics_engine.py rename to hsfs/python/tests/core/test_statistics_engine.py diff --git a/python/tests/core/test_training_dataset_engine.py b/hsfs/python/tests/core/test_training_dataset_engine.py similarity index 100% rename from python/tests/core/test_training_dataset_engine.py rename to hsfs/python/tests/core/test_training_dataset_engine.py diff --git a/python/tests/core/test_transformation_function_engine.py b/hsfs/python/tests/core/test_transformation_function_engine.py similarity index 100% rename from python/tests/core/test_transformation_function_engine.py rename to hsfs/python/tests/core/test_transformation_function_engine.py diff --git a/python/tests/core/test_validation_report_engine.py b/hsfs/python/tests/core/test_validation_report_engine.py similarity index 100% rename from python/tests/core/test_validation_report_engine.py rename to hsfs/python/tests/core/test_validation_report_engine.py diff --git a/python/tests/core/test_validation_result_engine.py b/hsfs/python/tests/core/test_validation_result_engine.py similarity index 100% rename from python/tests/core/test_validation_result_engine.py rename to hsfs/python/tests/core/test_validation_result_engine.py diff --git a/python/tests/core/test_vector_db_client.py b/hsfs/python/tests/core/test_vector_db_client.py similarity index 100% rename from python/tests/core/test_vector_db_client.py rename to hsfs/python/tests/core/test_vector_db_client.py diff --git a/python/tests/data/hadoop/bin/winutils.exe b/hsfs/python/tests/data/hadoop/bin/winutils.exe similarity index 100% rename from python/tests/data/hadoop/bin/winutils.exe rename to hsfs/python/tests/data/hadoop/bin/winutils.exe diff --git a/python/tests/data/test_basic.csv b/hsfs/python/tests/data/test_basic.csv similarity index 100% rename from python/tests/data/test_basic.csv rename to hsfs/python/tests/data/test_basic.csv diff --git a/python/tests/data/test_basic.parquet b/hsfs/python/tests/data/test_basic.parquet similarity index 100% rename from python/tests/data/test_basic.parquet rename to hsfs/python/tests/data/test_basic.parquet diff --git a/python/tests/data/test_basic.tsv b/hsfs/python/tests/data/test_basic.tsv similarity index 100% rename from python/tests/data/test_basic.tsv rename to hsfs/python/tests/data/test_basic.tsv diff --git a/python/tests/engine/__init__.py b/hsfs/python/tests/engine/__init__.py similarity index 100% rename from python/tests/engine/__init__.py rename to hsfs/python/tests/engine/__init__.py diff --git a/python/tests/engine/test_python.py b/hsfs/python/tests/engine/test_python.py similarity index 100% rename from python/tests/engine/test_python.py rename to hsfs/python/tests/engine/test_python.py diff --git a/python/tests/engine/test_python_reader.py b/hsfs/python/tests/engine/test_python_reader.py similarity index 100% rename from python/tests/engine/test_python_reader.py rename to hsfs/python/tests/engine/test_python_reader.py diff --git a/python/tests/engine/test_python_spark_convert_dataframe.py b/hsfs/python/tests/engine/test_python_spark_convert_dataframe.py similarity index 100% rename from python/tests/engine/test_python_spark_convert_dataframe.py rename to hsfs/python/tests/engine/test_python_spark_convert_dataframe.py diff --git a/python/tests/engine/test_python_spark_transformation_functions.py b/hsfs/python/tests/engine/test_python_spark_transformation_functions.py similarity index 100% rename from python/tests/engine/test_python_spark_transformation_functions.py rename to hsfs/python/tests/engine/test_python_spark_transformation_functions.py diff --git a/python/tests/engine/test_python_writer.py b/hsfs/python/tests/engine/test_python_writer.py similarity index 100% rename from python/tests/engine/test_python_writer.py rename to hsfs/python/tests/engine/test_python_writer.py diff --git a/python/tests/engine/test_spark.py b/hsfs/python/tests/engine/test_spark.py similarity index 100% rename from python/tests/engine/test_spark.py rename to hsfs/python/tests/engine/test_spark.py diff --git a/python/tests/utils/__init__.py b/hsfs/python/tests/fixtures/__init__.py similarity index 94% rename from python/tests/utils/__init__.py rename to hsfs/python/tests/fixtures/__init__.py index ff8055b9b..3ed3ff38a 100644 --- a/python/tests/utils/__init__.py +++ b/hsfs/python/tests/fixtures/__init__.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2022 Hopsworks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/tests/fixtures/backend_fixtures.py b/hsfs/python/tests/fixtures/backend_fixtures.py similarity index 87% rename from python/tests/fixtures/backend_fixtures.py rename to hsfs/python/tests/fixtures/backend_fixtures.py index dd455b699..5a7029172 100644 --- a/python/tests/fixtures/backend_fixtures.py +++ b/hsfs/python/tests/fixtures/backend_fixtures.py @@ -1,5 +1,5 @@ # -# Copyright 2024 Hopsworks AB +# Copyright 2022 Hopsworks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ # limitations under the License. # -import copy import json import os @@ -26,12 +25,12 @@ FIXTURES = [ "execution", "expectation_suite", - "external_feature_group", "external_feature_group_alias", + "external_feature_group", "feature", "feature_descriptive_statistics", - "feature_group", "feature_group_commit", + "feature_group", "feature_monitoring_config", "feature_monitoring_result", "feature_store", @@ -41,36 +40,28 @@ "ge_expectation", "ge_validation_result", "hudi_feature_group_alias", - "inference_batcher", - "inference_endpoint", - "inference_logger", "ingestion_job", "inode", "job", "join", - "kafka_topic", "logic", - "model", - "predictor", "prepared_statement_parameter", "query", - "resources", - "rondb_server", - "serving_keys", "serving_prepared_statement", - "spine_group", "split_statistics", - "statistics", "statistics_config", + "statistics", "storage_connector", "tag", - "training_dataset", "training_dataset_feature", + "training_dataset", "training_dataset_split", "transformation_function", - "transformer", "user", "validation_report", + "serving_keys", + "rondb_server", + "spine_group", ] backend_fixtures_json = {} @@ -81,4 +72,4 @@ @pytest.fixture def backend_fixtures(): - return copy.deepcopy(backend_fixtures_json) + return backend_fixtures_json diff --git a/python/tests/fixtures/dataframe_fixtures.py b/hsfs/python/tests/fixtures/dataframe_fixtures.py similarity index 100% rename from python/tests/fixtures/dataframe_fixtures.py rename to hsfs/python/tests/fixtures/dataframe_fixtures.py diff --git a/python/tests/fixtures/execution_fixtures.json b/hsfs/python/tests/fixtures/execution_fixtures.json similarity index 100% rename from python/tests/fixtures/execution_fixtures.json rename to hsfs/python/tests/fixtures/execution_fixtures.json diff --git a/python/tests/fixtures/expectation_suite_fixtures.json b/hsfs/python/tests/fixtures/expectation_suite_fixtures.json similarity index 100% rename from python/tests/fixtures/expectation_suite_fixtures.json rename to hsfs/python/tests/fixtures/expectation_suite_fixtures.json diff --git a/python/tests/fixtures/external_feature_group_alias_fixtures.json b/hsfs/python/tests/fixtures/external_feature_group_alias_fixtures.json similarity index 100% rename from python/tests/fixtures/external_feature_group_alias_fixtures.json rename to hsfs/python/tests/fixtures/external_feature_group_alias_fixtures.json diff --git a/python/tests/fixtures/external_feature_group_fixtures.json b/hsfs/python/tests/fixtures/external_feature_group_fixtures.json similarity index 100% rename from python/tests/fixtures/external_feature_group_fixtures.json rename to hsfs/python/tests/fixtures/external_feature_group_fixtures.json diff --git a/python/tests/fixtures/feature_descriptive_statistics_fixtures.json b/hsfs/python/tests/fixtures/feature_descriptive_statistics_fixtures.json similarity index 100% rename from python/tests/fixtures/feature_descriptive_statistics_fixtures.json rename to hsfs/python/tests/fixtures/feature_descriptive_statistics_fixtures.json diff --git a/python/tests/fixtures/feature_fixtures.json b/hsfs/python/tests/fixtures/feature_fixtures.json similarity index 100% rename from python/tests/fixtures/feature_fixtures.json rename to hsfs/python/tests/fixtures/feature_fixtures.json diff --git a/python/tests/fixtures/feature_group_commit_fixtures.json b/hsfs/python/tests/fixtures/feature_group_commit_fixtures.json similarity index 100% rename from python/tests/fixtures/feature_group_commit_fixtures.json rename to hsfs/python/tests/fixtures/feature_group_commit_fixtures.json diff --git a/python/tests/fixtures/feature_group_fixtures.json b/hsfs/python/tests/fixtures/feature_group_fixtures.json similarity index 100% rename from python/tests/fixtures/feature_group_fixtures.json rename to hsfs/python/tests/fixtures/feature_group_fixtures.json diff --git a/python/tests/fixtures/feature_monitoring_config_fixtures.json b/hsfs/python/tests/fixtures/feature_monitoring_config_fixtures.json similarity index 100% rename from python/tests/fixtures/feature_monitoring_config_fixtures.json rename to hsfs/python/tests/fixtures/feature_monitoring_config_fixtures.json diff --git a/python/tests/fixtures/feature_monitoring_result_fixtures.json b/hsfs/python/tests/fixtures/feature_monitoring_result_fixtures.json similarity index 100% rename from python/tests/fixtures/feature_monitoring_result_fixtures.json rename to hsfs/python/tests/fixtures/feature_monitoring_result_fixtures.json diff --git a/python/tests/fixtures/feature_store_fixtures.json b/hsfs/python/tests/fixtures/feature_store_fixtures.json similarity index 100% rename from python/tests/fixtures/feature_store_fixtures.json rename to hsfs/python/tests/fixtures/feature_store_fixtures.json diff --git a/python/tests/fixtures/feature_view_fixtures.json b/hsfs/python/tests/fixtures/feature_view_fixtures.json similarity index 100% rename from python/tests/fixtures/feature_view_fixtures.json rename to hsfs/python/tests/fixtures/feature_view_fixtures.json diff --git a/python/tests/fixtures/filter_fixtures.json b/hsfs/python/tests/fixtures/filter_fixtures.json similarity index 100% rename from python/tests/fixtures/filter_fixtures.json rename to hsfs/python/tests/fixtures/filter_fixtures.json diff --git a/python/tests/fixtures/fs_query_fixtures.json b/hsfs/python/tests/fixtures/fs_query_fixtures.json similarity index 100% rename from python/tests/fixtures/fs_query_fixtures.json rename to hsfs/python/tests/fixtures/fs_query_fixtures.json diff --git a/python/tests/fixtures/ge_expectation_fixtures.json b/hsfs/python/tests/fixtures/ge_expectation_fixtures.json similarity index 100% rename from python/tests/fixtures/ge_expectation_fixtures.json rename to hsfs/python/tests/fixtures/ge_expectation_fixtures.json diff --git a/python/tests/fixtures/ge_validation_result_fixtures.json b/hsfs/python/tests/fixtures/ge_validation_result_fixtures.json similarity index 100% rename from python/tests/fixtures/ge_validation_result_fixtures.json rename to hsfs/python/tests/fixtures/ge_validation_result_fixtures.json diff --git a/python/tests/fixtures/generate_backend_fixtures.ipynb b/hsfs/python/tests/fixtures/generate_backend_fixtures.ipynb similarity index 100% rename from python/tests/fixtures/generate_backend_fixtures.ipynb rename to hsfs/python/tests/fixtures/generate_backend_fixtures.ipynb diff --git a/python/tests/fixtures/hudi_feature_group_alias_fixtures.json b/hsfs/python/tests/fixtures/hudi_feature_group_alias_fixtures.json similarity index 100% rename from python/tests/fixtures/hudi_feature_group_alias_fixtures.json rename to hsfs/python/tests/fixtures/hudi_feature_group_alias_fixtures.json diff --git a/python/tests/fixtures/ingestion_job_fixtures.json b/hsfs/python/tests/fixtures/ingestion_job_fixtures.json similarity index 100% rename from python/tests/fixtures/ingestion_job_fixtures.json rename to hsfs/python/tests/fixtures/ingestion_job_fixtures.json diff --git a/python/tests/fixtures/inode_fixtures.json b/hsfs/python/tests/fixtures/inode_fixtures.json similarity index 100% rename from python/tests/fixtures/inode_fixtures.json rename to hsfs/python/tests/fixtures/inode_fixtures.json diff --git a/python/tests/fixtures/job_fixtures.json b/hsfs/python/tests/fixtures/job_fixtures.json similarity index 100% rename from python/tests/fixtures/job_fixtures.json rename to hsfs/python/tests/fixtures/job_fixtures.json diff --git a/python/tests/fixtures/join_fixtures.json b/hsfs/python/tests/fixtures/join_fixtures.json similarity index 100% rename from python/tests/fixtures/join_fixtures.json rename to hsfs/python/tests/fixtures/join_fixtures.json diff --git a/python/tests/fixtures/logic_fixtures.json b/hsfs/python/tests/fixtures/logic_fixtures.json similarity index 100% rename from python/tests/fixtures/logic_fixtures.json rename to hsfs/python/tests/fixtures/logic_fixtures.json diff --git a/python/tests/fixtures/prepared_statement_parameter_fixtures.json b/hsfs/python/tests/fixtures/prepared_statement_parameter_fixtures.json similarity index 100% rename from python/tests/fixtures/prepared_statement_parameter_fixtures.json rename to hsfs/python/tests/fixtures/prepared_statement_parameter_fixtures.json diff --git a/python/tests/fixtures/query_fixtures.json b/hsfs/python/tests/fixtures/query_fixtures.json similarity index 100% rename from python/tests/fixtures/query_fixtures.json rename to hsfs/python/tests/fixtures/query_fixtures.json diff --git a/python/tests/fixtures/rondb_server_fixtures.json b/hsfs/python/tests/fixtures/rondb_server_fixtures.json similarity index 100% rename from python/tests/fixtures/rondb_server_fixtures.json rename to hsfs/python/tests/fixtures/rondb_server_fixtures.json diff --git a/python/tests/fixtures/serving_keys_fixtures.json b/hsfs/python/tests/fixtures/serving_keys_fixtures.json similarity index 100% rename from python/tests/fixtures/serving_keys_fixtures.json rename to hsfs/python/tests/fixtures/serving_keys_fixtures.json diff --git a/python/tests/fixtures/serving_prepared_statement_fixtures.json b/hsfs/python/tests/fixtures/serving_prepared_statement_fixtures.json similarity index 100% rename from python/tests/fixtures/serving_prepared_statement_fixtures.json rename to hsfs/python/tests/fixtures/serving_prepared_statement_fixtures.json diff --git a/python/tests/fixtures/spine_group_fixtures.json b/hsfs/python/tests/fixtures/spine_group_fixtures.json similarity index 100% rename from python/tests/fixtures/spine_group_fixtures.json rename to hsfs/python/tests/fixtures/spine_group_fixtures.json diff --git a/python/tests/fixtures/split_statistics_fixtures.json b/hsfs/python/tests/fixtures/split_statistics_fixtures.json similarity index 100% rename from python/tests/fixtures/split_statistics_fixtures.json rename to hsfs/python/tests/fixtures/split_statistics_fixtures.json diff --git a/python/tests/fixtures/statistics_config_fixtures.json b/hsfs/python/tests/fixtures/statistics_config_fixtures.json similarity index 100% rename from python/tests/fixtures/statistics_config_fixtures.json rename to hsfs/python/tests/fixtures/statistics_config_fixtures.json diff --git a/python/tests/fixtures/statistics_fixtures.json b/hsfs/python/tests/fixtures/statistics_fixtures.json similarity index 100% rename from python/tests/fixtures/statistics_fixtures.json rename to hsfs/python/tests/fixtures/statistics_fixtures.json diff --git a/python/tests/fixtures/storage_connector_fixtures.json b/hsfs/python/tests/fixtures/storage_connector_fixtures.json similarity index 100% rename from python/tests/fixtures/storage_connector_fixtures.json rename to hsfs/python/tests/fixtures/storage_connector_fixtures.json diff --git a/hsfs/python/tests/fixtures/tag_fixtures.json b/hsfs/python/tests/fixtures/tag_fixtures.json new file mode 100644 index 000000000..b171d806f --- /dev/null +++ b/hsfs/python/tests/fixtures/tag_fixtures.json @@ -0,0 +1,25 @@ +{ + "get": { + "response": { + "count": 1, + "items": [ + { + "name": "test_name", + "value": "test_value", + "schema": "test_schema", + "href": "test_href", + "expand": "test_expand", + "items": [], + "count": 0, + "type": "tagDTO" + } + ] + } + }, + "get_empty": { + "response": { + "count": 0, + "items": [] + } + } +} \ No newline at end of file diff --git a/python/tests/fixtures/training_dataset_feature_fixtures.json b/hsfs/python/tests/fixtures/training_dataset_feature_fixtures.json similarity index 100% rename from python/tests/fixtures/training_dataset_feature_fixtures.json rename to hsfs/python/tests/fixtures/training_dataset_feature_fixtures.json diff --git a/python/tests/fixtures/training_dataset_fixtures.json b/hsfs/python/tests/fixtures/training_dataset_fixtures.json similarity index 100% rename from python/tests/fixtures/training_dataset_fixtures.json rename to hsfs/python/tests/fixtures/training_dataset_fixtures.json diff --git a/python/tests/fixtures/training_dataset_split_fixtures.json b/hsfs/python/tests/fixtures/training_dataset_split_fixtures.json similarity index 100% rename from python/tests/fixtures/training_dataset_split_fixtures.json rename to hsfs/python/tests/fixtures/training_dataset_split_fixtures.json diff --git a/python/tests/fixtures/transformation_function_attached_fixtures.json b/hsfs/python/tests/fixtures/transformation_function_attached_fixtures.json similarity index 100% rename from python/tests/fixtures/transformation_function_attached_fixtures.json rename to hsfs/python/tests/fixtures/transformation_function_attached_fixtures.json diff --git a/python/tests/fixtures/transformation_function_fixtures.json b/hsfs/python/tests/fixtures/transformation_function_fixtures.json similarity index 100% rename from python/tests/fixtures/transformation_function_fixtures.json rename to hsfs/python/tests/fixtures/transformation_function_fixtures.json diff --git a/python/tests/fixtures/user_fixtures.json b/hsfs/python/tests/fixtures/user_fixtures.json similarity index 100% rename from python/tests/fixtures/user_fixtures.json rename to hsfs/python/tests/fixtures/user_fixtures.json diff --git a/python/tests/fixtures/validation_report_fixtures.json b/hsfs/python/tests/fixtures/validation_report_fixtures.json similarity index 100% rename from python/tests/fixtures/validation_report_fixtures.json rename to hsfs/python/tests/fixtures/validation_report_fixtures.json diff --git a/hsfs/python/tests/pyproject.toml b/hsfs/python/tests/pyproject.toml new file mode 100644 index 000000000..050735f85 --- /dev/null +++ b/hsfs/python/tests/pyproject.toml @@ -0,0 +1,37 @@ +[tool.ruff.lint] +# 1. Enable flake8-bugbear (`B`) rules, in addition to the defaults. +select = ["E4", "E7", "E9", "F", "B", "I"] +ignore = [ + "B905", # zip has no strict kwarg until Python 3.10 +] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.lint.isort] +lines-after-imports = 2 +known-third-party = ["hopsworks", "hsfs", "hsml"] + + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" + +[tool.pytest.ini_options] +pythonpath = [ + ".", "tests" +] +addopts = "--ignore=python/tests/test_helper/" diff --git a/python/tests/test_expectation_suite.py b/hsfs/python/tests/test_expectation_suite.py similarity index 100% rename from python/tests/test_expectation_suite.py rename to hsfs/python/tests/test_expectation_suite.py diff --git a/python/tests/test_feature.py b/hsfs/python/tests/test_feature.py similarity index 100% rename from python/tests/test_feature.py rename to hsfs/python/tests/test_feature.py diff --git a/python/tests/test_feature_group.py b/hsfs/python/tests/test_feature_group.py similarity index 100% rename from python/tests/test_feature_group.py rename to hsfs/python/tests/test_feature_group.py diff --git a/python/tests/test_feature_group_commit.py b/hsfs/python/tests/test_feature_group_commit.py similarity index 100% rename from python/tests/test_feature_group_commit.py rename to hsfs/python/tests/test_feature_group_commit.py diff --git a/python/tests/test_feature_group_writer.py b/hsfs/python/tests/test_feature_group_writer.py similarity index 100% rename from python/tests/test_feature_group_writer.py rename to hsfs/python/tests/test_feature_group_writer.py diff --git a/python/tests/test_feature_store.py b/hsfs/python/tests/test_feature_store.py similarity index 100% rename from python/tests/test_feature_store.py rename to hsfs/python/tests/test_feature_store.py diff --git a/python/tests/test_feature_view.py b/hsfs/python/tests/test_feature_view.py similarity index 100% rename from python/tests/test_feature_view.py rename to hsfs/python/tests/test_feature_view.py diff --git a/python/tests/test_ge_expectation.py b/hsfs/python/tests/test_ge_expectation.py similarity index 100% rename from python/tests/test_ge_expectation.py rename to hsfs/python/tests/test_ge_expectation.py diff --git a/python/tests/test_ge_validation_result.py b/hsfs/python/tests/test_ge_validation_result.py similarity index 100% rename from python/tests/test_ge_validation_result.py rename to hsfs/python/tests/test_ge_validation_result.py diff --git a/python/tests/test_helpers/__init__.py b/hsfs/python/tests/test_helpers/__init__.py similarity index 100% rename from python/tests/test_helpers/__init__.py rename to hsfs/python/tests/test_helpers/__init__.py diff --git a/python/tests/test_helpers/transformation_test_helper.py b/hsfs/python/tests/test_helpers/transformation_test_helper.py similarity index 100% rename from python/tests/test_helpers/transformation_test_helper.py rename to hsfs/python/tests/test_helpers/transformation_test_helper.py diff --git a/python/tests/test_hopswork_udf.py b/hsfs/python/tests/test_hopswork_udf.py similarity index 94% rename from python/tests/test_hopswork_udf.py rename to hsfs/python/tests/test_hopswork_udf.py index c175f64fb..8494d018f 100644 --- a/python/tests/test_hopswork_udf.py +++ b/hsfs/python/tests/test_hopswork_udf.py @@ -99,7 +99,7 @@ def test_get_module_imports(self): ] def test_extract_source_code(self): - from .test_helpers.transformation_test_helper import test_function + from test_helpers.transformation_test_helper import test_function assert """import pandas as pd from hsfs.transformation_statistics import TransformationStatistics @@ -107,7 +107,7 @@ def test_function(): return True""" == HopsworksUdf._extract_source_code(test_function).strip() def test_extract_function_arguments_no_arguments(self): - from .test_helpers.transformation_test_helper import test_function + from test_helpers.transformation_test_helper import test_function with pytest.raises(FeatureStoreException) as exception: HopsworksUdf._extract_function_arguments(test_function) @@ -118,7 +118,7 @@ def test_extract_function_arguments_no_arguments(self): ) def test_extract_function_arguments_one_argument(self): - from .test_helpers.transformation_test_helper import test_function_one_argument + from test_helpers.transformation_test_helper import test_function_one_argument function_argument = HopsworksUdf._extract_function_arguments( test_function_one_argument @@ -129,7 +129,7 @@ def test_extract_function_arguments_one_argument(self): ] def test_extract_function_arguments_one_argument_with_statistics(self): - from .test_helpers.transformation_test_helper import ( + from test_helpers.transformation_test_helper import ( test_function_one_argument_with_statistics, ) @@ -142,7 +142,7 @@ def test_extract_function_arguments_one_argument_with_statistics(self): ] def test_extract_function_arguments_one_argument_with_typehint(self): - from .test_helpers.transformation_test_helper import ( + from test_helpers.transformation_test_helper import ( test_function_one_argument_with_typehints, ) @@ -157,7 +157,7 @@ def test_extract_function_arguments_one_argument_with_typehint(self): def test_extract_function_arguments_one_argument_with_statistics_and_typehints( self, ): - from .test_helpers.transformation_test_helper import ( + from test_helpers.transformation_test_helper import ( test_function_one_argument_with_statistics_and_typehints, ) @@ -170,7 +170,7 @@ def test_extract_function_arguments_one_argument_with_statistics_and_typehints( ] def test_extract_function_arguments_multiple_argument(self): - from .test_helpers.transformation_test_helper import ( + from test_helpers.transformation_test_helper import ( test_function_multiple_argument, ) @@ -184,7 +184,7 @@ def test_extract_function_arguments_multiple_argument(self): ] def test_extract_function_arguments_multiple_argument_with_statistics(self): - from .test_helpers.transformation_test_helper import ( + from test_helpers.transformation_test_helper import ( test_function_multiple_argument_with_statistics, ) @@ -199,7 +199,7 @@ def test_extract_function_arguments_multiple_argument_with_statistics(self): ] def test_extract_function_arguments_multiple_argument_with_typehints(self): - from .test_helpers.transformation_test_helper import ( + from test_helpers.transformation_test_helper import ( test_function_multiple_argument_with_typehints, ) @@ -215,7 +215,7 @@ def test_extract_function_arguments_multiple_argument_with_typehints(self): def test_extract_function_arguments_multiple_argument_with_statistics_and_typehints( self, ): - from .test_helpers.transformation_test_helper import ( + from test_helpers.transformation_test_helper import ( test_function_multiple_argument_with_statistics_and_typehints, ) @@ -231,7 +231,7 @@ def test_extract_function_arguments_multiple_argument_with_statistics_and_typehi def test_extract_function_arguments_multiple_argument_with_mixed_statistics_and_typehints( self, ): - from .test_helpers.transformation_test_helper import ( + from test_helpers.transformation_test_helper import ( test_function_multiple_argument_with_mixed_statistics_and_typehints, ) @@ -248,7 +248,7 @@ def test_extract_function_arguments_multiple_argument_with_mixed_statistics_and_ def test_extract_function_arguments_multiple_argument_all_parameter_with_spaces( self, ): - from .test_helpers.transformation_test_helper import ( + from test_helpers.transformation_test_helper import ( test_function_multiple_argument_all_parameter_with_spaces, ) @@ -262,7 +262,7 @@ def test_extract_function_arguments_multiple_argument_all_parameter_with_spaces( ] def test_extract_function_arguments_multiple_argument_all_parameter_multiline(self): - from .test_helpers.transformation_test_helper import ( + from test_helpers.transformation_test_helper import ( test_function_multiple_argument_all_parameter_multiline, ) @@ -279,7 +279,7 @@ def test_extract_function_arguments_multiple_argument_all_parameter_multiline(se def test_extract_function_arguments_multiple_argumen_all_parameter_multiline_with_comments( self, ): - from .test_helpers.transformation_test_helper import ( + from test_helpers.transformation_test_helper import ( test_function_multiple_argument_all_parameter_multiline_with_comments, ) @@ -294,7 +294,7 @@ def test_extract_function_arguments_multiple_argumen_all_parameter_multiline_wit ] def test_extract_function_arguments_statistics_invalid(self): - from .test_helpers.transformation_test_helper import ( + from test_helpers.transformation_test_helper import ( test_function_statistics_invalid, ) @@ -307,7 +307,7 @@ def test_extract_function_arguments_statistics_invalid(self): ) def test_format_source_code(self): - from .test_helpers.transformation_test_helper import ( + from test_helpers.transformation_test_helper import ( test_function_multiple_argument_all_parameter_multiline_with_comments, ) diff --git a/python/tests/test_serving_keys.py b/hsfs/python/tests/test_serving_keys.py similarity index 100% rename from python/tests/test_serving_keys.py rename to hsfs/python/tests/test_serving_keys.py diff --git a/python/tests/test_split_statistics.py b/hsfs/python/tests/test_split_statistics.py similarity index 100% rename from python/tests/test_split_statistics.py rename to hsfs/python/tests/test_split_statistics.py diff --git a/python/tests/test_statistics.py b/hsfs/python/tests/test_statistics.py similarity index 100% rename from python/tests/test_statistics.py rename to hsfs/python/tests/test_statistics.py diff --git a/python/tests/test_statistics_config.py b/hsfs/python/tests/test_statistics_config.py similarity index 100% rename from python/tests/test_statistics_config.py rename to hsfs/python/tests/test_statistics_config.py diff --git a/python/tests/test_storage_connector.py b/hsfs/python/tests/test_storage_connector.py similarity index 100% rename from python/tests/test_storage_connector.py rename to hsfs/python/tests/test_storage_connector.py diff --git a/hsfs/python/tests/test_tag.py b/hsfs/python/tests/test_tag.py new file mode 100644 index 000000000..b66672cbe --- /dev/null +++ b/hsfs/python/tests/test_tag.py @@ -0,0 +1,43 @@ +# +# Copyright 2022 Hopsworks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +from hsfs import tag + + +class TestTag: + def test_from_response_json(self, backend_fixtures): + # Arrange + json = backend_fixtures["tag"]["get"]["response"] + + # Act + t_list = tag.Tag.from_response_json(json) + + # Assert + assert len(t_list) == 1 + t = t_list[0] + assert t.name == "test_name" + assert t.value == "test_value" + + def test_from_response_json_empty(self, backend_fixtures): + # Arrange + json = backend_fixtures["tag"]["get_empty"]["response"] + + # Act + t_list = tag.Tag.from_response_json(json) + + # Assert + assert len(t_list) == 0 diff --git a/python/tests/test_training_dataset.py b/hsfs/python/tests/test_training_dataset.py similarity index 100% rename from python/tests/test_training_dataset.py rename to hsfs/python/tests/test_training_dataset.py diff --git a/python/tests/test_training_dataset_feature.py b/hsfs/python/tests/test_training_dataset_feature.py similarity index 100% rename from python/tests/test_training_dataset_feature.py rename to hsfs/python/tests/test_training_dataset_feature.py diff --git a/python/tests/test_training_dataset_split.py b/hsfs/python/tests/test_training_dataset_split.py similarity index 100% rename from python/tests/test_training_dataset_split.py rename to hsfs/python/tests/test_training_dataset_split.py diff --git a/python/tests/test_transformation_function.py b/hsfs/python/tests/test_transformation_function.py similarity index 100% rename from python/tests/test_transformation_function.py rename to hsfs/python/tests/test_transformation_function.py diff --git a/python/tests/test_user.py b/hsfs/python/tests/test_user.py similarity index 100% rename from python/tests/test_user.py rename to hsfs/python/tests/test_user.py diff --git a/hsfs/python/tests/test_util.py b/hsfs/python/tests/test_util.py new file mode 100644 index 000000000..217611bd1 --- /dev/null +++ b/hsfs/python/tests/test_util.py @@ -0,0 +1,230 @@ +# +# Copyright 2022 Hopsworks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import asyncio +from datetime import date, datetime + +import pytest +import pytz +from hsfs import util +from hsfs.client.exceptions import FeatureStoreException +from hsfs.core.constants import HAS_AIOMYSQL, HAS_SQLALCHEMY +from hsfs.embedding import EmbeddingFeature, EmbeddingIndex +from hsfs.feature import Feature +from mock import patch + + +if HAS_SQLALCHEMY and HAS_AIOMYSQL: + from hsfs.core import util_sql + + +class TestUtil: + def test_get_hudi_datestr_from_timestamp(self): + dt = util.get_hudi_datestr_from_timestamp(1640995200000) + assert dt == "20220101000000000" + + def test_convert_event_time_to_timestamp_timestamp(self): + dt = util.convert_event_time_to_timestamp(1640995200) + assert dt == 1640995200000 + + def test_convert_event_time_to_timestamp_datetime(self): + dt = util.convert_event_time_to_timestamp(datetime(2022, 1, 1, 0, 0, 0)) + assert dt == 1640995200000 + + def test_convert_event_time_to_timestamp_datetime_tz(self): + dt = util.convert_event_time_to_timestamp( + pytz.timezone("US/Pacific").localize(datetime(2021, 12, 31, 16, 0, 0)) + ) + assert dt == 1640995200000 + + def test_convert_event_time_to_timestamp_date(self): + dt = util.convert_event_time_to_timestamp(date(2022, 1, 1)) + assert dt == 1640995200000 + + def test_convert_event_time_to_timestamp_string(self): + dt = util.convert_event_time_to_timestamp("2022-01-01 00:00:00") + assert dt == 1640995200000 + + def test_convert_iso_event_time_to_timestamp_string(self): + dt = util.convert_event_time_to_timestamp("2022-01-01T00:00:00.000000Z") + assert dt == 1640995200000 + + def test_convert_event_time_to_timestamp_yyyy_mm_dd(self): + timestamp = util.get_timestamp_from_date_string("2022-01-01") + assert timestamp == 1640995200000 + + def test_convert_event_time_to_timestamp_yyyy_mm_dd_hh(self): + timestamp = util.get_timestamp_from_date_string("2022-01-01 00") + assert timestamp == 1640995200000 + + def test_convert_event_time_to_timestamp_yyyy_mm_dd_hh_mm(self): + timestamp = util.get_timestamp_from_date_string("2022-01-01 00:00") + assert timestamp == 1640995200000 + + def test_convert_event_time_to_timestamp_yyyy_mm_dd_hh_mm_ss(self): + timestamp = util.get_timestamp_from_date_string("2022-01-01 00:00:00") + assert timestamp == 1640995200000 + + def test_convert_event_time_to_timestamp_yyyy_mm_dd_hh_mm_ss_f(self): + timestamp = util.get_timestamp_from_date_string("2022-01-01 00:00:00.000") + assert timestamp == 1640995200000 + + def test_convert_event_time_to_timestamp_yyyy_mm_dd_hh_mm_ss_error(self): + with pytest.raises(ValueError): + util.get_timestamp_from_date_string("2022-13-01 00:00:00") + + def test_convert_event_time_to_timestamp_yyyy_mm_dd_hh_mm_ss_error2(self): + with pytest.raises(ValueError): + util.get_timestamp_from_date_string("202-13-01 00:00:00") + + def test_convert_event_time_to_timestamp_yyyy_mm_dd_hh_mm_ss_error3(self): + with pytest.raises(ValueError): + util.get_timestamp_from_date_string("00:00:00 2022-01-01") + + def test_convert_hudi_commit_time_to_timestamp(self): + timestamp = util.get_timestamp_from_date_string("20221118095233099") + assert timestamp == 1668765153099 + + def test_get_dataset_type_HIVEDB(self): + db_type = util.get_dataset_type( + "/apps/hive/warehouse/temp_featurestore.db/storage_connector_resources/kafka__tstore.jks" + ) + assert db_type == "HIVEDB" + + def test_get_dataset_type_HIVEDB_with_dfs(self): + db_type = util.get_dataset_type( + "hdfs:///apps/hive/warehouse/temp_featurestore.db/storage_connector_resources/kafka__tstore.jks" + ) + assert db_type == "HIVEDB" + + def test_get_dataset_type_DATASET(self): + db_type = util.get_dataset_type("/Projects/temp/Resources/kafka__tstore.jks") + assert db_type == "DATASET" + + def test_get_dataset_type_DATASET_with_dfs(self): + db_type = util.get_dataset_type( + "hdfs:///Projects/temp/Resources/kafka__tstore.jks" + ) + assert db_type == "DATASET" + + def test_get_job_url(self, mocker): + # Arrange + mock_client_get_instance = mocker.patch("hsfs.client.get_instance") + + # Act + util.get_job_url(href="1/2/3/4/5/6/7/8") + + # Assert + assert ( + mock_client_get_instance.return_value.replace_public_host.call_args[0][ + 0 + ].path + == "p/5/jobs/named/7/executions" + ) + + def test_get_feature_group_url(self, mocker): + # Arrange + feature_store_id = 99 + feature_group_id = 10 + mock_client_get_instance = mocker.patch("hsfs.client.get_instance") + mock_util_get_hostname_replaced_url = mocker.patch( + "hsfs.util.get_hostname_replaced_url" + ) + mock_client_get_instance.return_value._project_id = 50 + + # Act + util.get_feature_group_url( + feature_group_id=feature_group_id, feature_store_id=feature_store_id + ) + + # Assert + assert mock_util_get_hostname_replaced_url.call_count == 1 + assert ( + mock_util_get_hostname_replaced_url.call_args[0][0] == "/p/50/fs/99/fg/10" + ) + + def test_valid_embedding_type(self): + embedding_index = EmbeddingIndex( + features=[ + EmbeddingFeature("feature1", 3), + EmbeddingFeature("feature2", 3), + EmbeddingFeature("feature3", 3), + EmbeddingFeature("feature4", 3), + ] + ) + # Define a schema with valid feature types + schema = [ + Feature(name="feature1", type="array"), + Feature(name="feature2", type="array"), + Feature(name="feature3", type="array"), + Feature(name="feature4", type="array"), + ] + # Call the method and expect no exceptions + util.validate_embedding_feature_type(embedding_index, schema) + + def test_invalid_embedding_type(self): + embedding_index = EmbeddingIndex( + features=[ + EmbeddingFeature("feature1", 3), + EmbeddingFeature("feature2", 3), + ] + ) + # Define a schema with an invalid feature type + schema = [ + Feature(name="feature1", type="array"), + Feature(name="feature2", type="array"), # Invalid type + ] + # Call the method and expect a FeatureStoreException + with pytest.raises(FeatureStoreException): + util.validate_embedding_feature_type(embedding_index, schema) + + def test_missing_embedding_index(self): + # Define a schema without an embedding index + schema = [ + Feature(name="feature1", type="array"), + Feature(name="feature2", type="array"), + ] + # Call the method with an empty feature_group (no embedding index) + util.validate_embedding_feature_type(None, schema) + # No exception should be raised + + def test_empty_schema(self): + embedding_index = EmbeddingIndex( + features=[ + EmbeddingFeature("feature1", 3), + EmbeddingFeature("feature2", 3), + ] + ) + # Define an empty schema + schema = [] + # Call the method with an empty schema + util.validate_embedding_feature_type(embedding_index, schema) + # No exception should be raised + + @pytest.mark.skipif( + not HAS_SQLALCHEMY or not HAS_AIOMYSQL, + reason="SQLAlchemy or aiomysql is not installed", + ) + def test_create_async_engine(self, mocker): + # Test when get_running_loop() raises a RuntimeError + with patch("asyncio.get_running_loop", side_effect=RuntimeError): + # mock storage connector + online_connector = patch.object(util, "get_online_connector") + with pytest.raises( + RuntimeError, + match="Event loop is not running. Please invoke this co-routine from a running loop or provide an event loop.", + ): + asyncio.run(util_sql.create_async_engine(online_connector, True, 1)) diff --git a/python/tests/test_validation_report.py b/hsfs/python/tests/test_validation_report.py similarity index 100% rename from python/tests/test_validation_report.py rename to hsfs/python/tests/test_validation_report.py diff --git a/python/tests/util.py b/hsfs/python/tests/util.py similarity index 100% rename from python/tests/util.py rename to hsfs/python/tests/util.py diff --git a/hsfs/requirements-docs.txt b/hsfs/requirements-docs.txt new file mode 100644 index 000000000..2a2e7927b --- /dev/null +++ b/hsfs/requirements-docs.txt @@ -0,0 +1,12 @@ +mkdocs==1.5.3 +mkdocs-material==9.5.17 +mike==2.0.0 +sphinx==7.3.7 +keras_autodoc @ git+https://git@github.com/logicalclocks/keras-autodoc +markdown-include==0.8.1 +mkdocs-jupyter==0.24.3 +markdown==3.6 +pymdown-extensions==10.7.1 +mkdocs-macros-plugin==1.0.4 +mkdocs-minify-plugin>=0.2.0 + diff --git a/utils/java/pom.xml b/hsfs/utils/java/pom.xml similarity index 100% rename from utils/java/pom.xml rename to hsfs/utils/java/pom.xml diff --git a/utils/java/src/main/java/com/logicalclocks/utils/MainClass.java b/hsfs/utils/java/src/main/java/com/logicalclocks/utils/MainClass.java similarity index 100% rename from utils/java/src/main/java/com/logicalclocks/utils/MainClass.java rename to hsfs/utils/java/src/main/java/com/logicalclocks/utils/MainClass.java diff --git a/utils/java/src/main/resources/checkstyle.xml b/hsfs/utils/java/src/main/resources/checkstyle.xml similarity index 100% rename from utils/java/src/main/resources/checkstyle.xml rename to hsfs/utils/java/src/main/resources/checkstyle.xml diff --git a/utils/java/src/main/resources/suppressions.xml b/hsfs/utils/java/src/main/resources/suppressions.xml similarity index 100% rename from utils/java/src/main/resources/suppressions.xml rename to hsfs/utils/java/src/main/resources/suppressions.xml diff --git a/utils/python/hsfs_utils.py b/hsfs/utils/python/hsfs_utils.py similarity index 100% rename from utils/python/hsfs_utils.py rename to hsfs/utils/python/hsfs_utils.py diff --git a/hsml/.github/workflows/mkdocs-main.yml b/hsml/.github/workflows/mkdocs-main.yml new file mode 100644 index 000000000..001f1fad1 --- /dev/null +++ b/hsml/.github/workflows/mkdocs-main.yml @@ -0,0 +1,35 @@ +name: mkdocs-main + +on: pull_request + +jobs: + publish-main: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: set dev version + working-directory: ./java + run: echo "DEV_VERSION=$(mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | grep -Ev 'Download|INFO|WARNING')" >> $GITHUB_ENV + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: install deps + working-directory: ./python + run: cp ../README.md . && pip3 install -r ../requirements-docs.txt && pip3 install -e .[dev] + + - name: generate autodoc + run: python3 auto_doc.py + + - name: setup git + run: | + git config --global user.name Mike + git config --global user.email mike@docs.hopsworks.ai + + - name: mike deploy docs + run: mike deploy ${{ env.DEV_VERSION }} dev -u diff --git a/hsml/.github/workflows/mkdocs-release.yml b/hsml/.github/workflows/mkdocs-release.yml new file mode 100644 index 000000000..e2b4b2b3f --- /dev/null +++ b/hsml/.github/workflows/mkdocs-release.yml @@ -0,0 +1,42 @@ +name: mkdocs-release + +on: + push: + branches: [branch-*\.*] + +jobs: + publish-release: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: set major/minor/bugfix release version + working-directory: ./java + run: echo "RELEASE_VERSION=$(mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | grep -Ev 'Download|INFO|WARNING')" >> $GITHUB_ENV + + - name: set major/minor release version + run: echo "MAJOR_VERSION=$(echo $RELEASE_VERSION | sed 's/^\([0-9]*\.[0-9]*\).*$/\1/')" >> $GITHUB_ENV + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: install deps + working-directory: ./python + run: cp ../README.md . && pip3 install -r ../requirements-docs.txt && pip3 install -e .[dev] + + - name: generate autodoc + run: python3 auto_doc.py + + - name: setup git + run: | + git config --global user.name Mike + git config --global user.email mike@docs.hopsworks.ai + + - name: mike deploy docs + run: | + mike deploy ${{ env.RELEASE_VERSION }} ${{ env.MAJOR_VERSION }} -u --push + mike alias ${{ env.RELEASE_VERSION }} latest -u --push diff --git a/hsml/.github/workflows/python-lint.yml b/hsml/.github/workflows/python-lint.yml new file mode 100644 index 000000000..88225add7 --- /dev/null +++ b/hsml/.github/workflows/python-lint.yml @@ -0,0 +1,163 @@ +name: python + +on: pull_request + +jobs: + lint_stylecheck: + name: Lint and Stylecheck + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Get all changed files + id: get-changed-files + uses: tj-actions/changed-files@v44 + with: + files_yaml: | + src: + - 'python/**/*.py' + - '!python/tests/**/*.py' + test: + - 'python/tests/**/*.py' + + - name: install deps + run: pip install ruff==0.4.2 + + - name: ruff on python files + if: steps.get-changed-files.outputs.src_any_changed == 'true' + env: + SRC_ALL_CHANGED_FILES: ${{ steps.get-changed-files.outputs.src_all_changed_files }} + run: ruff check --output-format=github $SRC_ALL_CHANGED_FILES + + - name: ruff on test files + if: steps.get-changed-files.outputs.test_any_changed == 'true' + env: + TEST_ALL_CHANGED_FILES: ${{ steps.get-changed-files.outputs.test_all_changed_files }} + run: ruff check --output-format=github $TEST_ALL_CHANGED_FILES + + - name: ruff format --check $ALL_CHANGED_FILES + env: + ALL_CHANGED_FILES: ${{ steps.get-changed-files.outputs.all_changed_files }} + run: ruff format $ALL_CHANGED_FILES + + unit_tests_ubuntu_utc: + name: Unit Testing (Ubuntu) + needs: lint_stylecheck + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + + steps: + - name: Set Timezone + run: sudo timedatectl set-timezone UTC + + - uses: actions/checkout@v4 + - name: Copy README + run: cp README.md python/ + + - uses: actions/setup-python@v5 + name: Setup Python + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + cache-dependency-path: "python/setup.py" + - run: pip install -e python[dev] + + - name: Display Python version + run: python --version + + - name: Run Pytest suite + run: pytest python/tests + + unit_tests_ubuntu_local: + name: Unit Testing (Ubuntu) (Local TZ) + needs: lint_stylecheck + runs-on: ubuntu-latest + + steps: + - name: Set Timezone + run: sudo timedatectl set-timezone Europe/Amsterdam + + - uses: actions/checkout@v4 + - name: Copy README + run: cp README.md python/ + + - uses: actions/setup-python@v5 + name: Setup Python + with: + python-version: "3.12" + cache: "pip" + cache-dependency-path: "python/setup.py" + - run: pip install -e python[dev] + + - name: Display Python version + run: python --version + + - name: Run Pytest suite + run: pytest python/tests + + unit_tests_windows_utc: + name: Unit Testing (Windows) + needs: lint_stylecheck + runs-on: windows-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + + steps: + - name: Set Timezone + run: tzutil /s "UTC" + + - uses: actions/checkout@v4 + - name: Copy README + run: cp README.md python/ + + - uses: actions/setup-python@v5 + name: Setup Python + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + cache-dependency-path: "python/setup.py" + - run: pip install -e python[dev] + + - name: Display Python version + run: python --version + + - name: Run Pytest suite + run: pytest python/tests + + unit_tests_windows_local: + name: Unit Testing (Windows) (Local TZ) + needs: lint_stylecheck + runs-on: windows-latest + + steps: + - name: Set Timezone + run: tzutil /s "W. Europe Standard Time" + + - uses: actions/checkout@v4 + - name: Copy README + run: cp README.md python/ + + - uses: actions/setup-python@v5 + name: Setup Python + with: + python-version: "3.12" + cache: "pip" + cache-dependency-path: "python/setup.py" + - run: pip install -e python[dev] + + - name: Display Python version + run: python --version + + - name: Display pip freeze + run: pip freeze + + - name: Run Pytest suite + run: pytest python/tests diff --git a/hsml/.gitignore b/hsml/.gitignore new file mode 100644 index 000000000..6e96d8144 --- /dev/null +++ b/hsml/.gitignore @@ -0,0 +1,130 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +python/README.md +python/LICENSE + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ +.ruff_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Java +.idea +.vscode +*.iml +target/ + +# Mac +.DS_Store + +# mkdocs intemediate files +docs/generated diff --git a/hsml/CONTRIBUTING.md b/hsml/CONTRIBUTING.md new file mode 100644 index 000000000..b287467c6 --- /dev/null +++ b/hsml/CONTRIBUTING.md @@ -0,0 +1,215 @@ +## Python development setup +--- + +- Fork and clone the repository + +- Create a new Python environment with your favourite environment manager, e.g. virtualenv or conda + +- Install repository in editable mode with development dependencies: + + ```bash + cd python + pip install -e ".[dev]" + ``` + +- Install [pre-commit](https://pre-commit.com/) and then activate its hooks. pre-commit is a framework for managing and maintaining multi-language pre-commit hooks. The Model Registry uses pre-commit to ensure code-style and code formatting through [ruff](https://docs.astral.sh/ruff/). Run the following commands from the `python` directory: + + ```bash + cd python + pip install --user pre-commit + pre-commit install + ``` + + Afterwards, pre-commit will run whenever you commit. + +- To run formatting and code-style separately, you can configure your IDE, such as VSCode, to use [ruff](https://docs.astral.sh/ruff/tutorial/#getting-started): + + ```bash + cd python + ruff check --fix + ruff format + ``` + +### Python documentation + +We follow a few best practices for writing the Python documentation: + +1. Use the google docstring style: + + ```python + """[One Line Summary] + + [Extended Summary] + + [!!! example + import xyz + ] + + # Arguments + arg1: Type[, optional]. Description[, defaults to `default`] + arg2: Type[, optional]. Description[, defaults to `default`] + + # Returns + Type. Description. + + # Raises + Exception. Description. + """ + ``` + + If Python 3 type annotations are used, they are inserted automatically. + + +2. Model registry entity engine methods (e.g. ModelEngine etc.) only require a single line docstring. +3. REST Api implementations (e.g. ModelApi etc.) should be fully documented with docstrings without defaults. +4. Public Api such as metadata objects should be fully documented with defaults. + +#### Setup and Build Documentation + +We use `mkdocs` together with `mike` ([for versioning](https://github.com/jimporter/mike/)) to build the documentation and a plugin called `keras-autodoc` to auto generate Python API documentation from docstrings. + +**Background about `mike`:** + `mike` builds the documentation and commits it as a new directory to the gh-pages branch. Each directory corresponds to one version of the documentation. Additionally, `mike` maintains a json in the root of gh-pages with the mappings of versions/aliases for each of the directories available. With aliases you can define extra names like `dev` or `latest`, to indicate stable and unstable releases. + +1. Currently we are using our own version of `keras-autodoc` + + ```bash + pip install git+https://github.com/logicalclocks/keras-autodoc + ``` + +2. Install HSML with `docs` extras: + + ```bash + pip install -e .[dev,docs] + ``` + +3. To build the docs, first run the auto doc script: + + ```bash + cd .. + python auto_doc.py + ``` + +##### Option 1: Build only current version of docs + +4. Either build the docs, or serve them dynamically: + + Note: Links and pictures might not resolve properly later on when checking with this build. + The reason for that is that the docs are deployed with versioning on docs.hopsworks.ai and + therefore another level is added to all paths, e.g. `docs.hopsworks.ai/[version-or-alias]`. + Using relative links should not be affected by this, however, building the docs with version + (Option 2) is recommended. + + ```bash + mkdocs build + # or + mkdocs serve + ``` + +##### Option 2 (Preferred): Build multi-version doc with `mike` + +###### Versioning on docs.hopsworks.ai + +On docs.hopsworks.ai we implement the following versioning scheme: + +- current master branches (e.g. of hsml corresponding to master of Hopsworks): rendered as current Hopsworks snapshot version, e.g. **2.2.0-SNAPSHOT [dev]**, where `dev` is an alias to indicate that this is an unstable version. +- the latest release: rendered with full current version, e.g. **2.1.5 [latest]** with `latest` alias to indicate that this is the latest stable release. +- previous stable releases: rendered without alias, e.g. **2.1.4**. + +###### Build Instructions + +4. For this you can either checkout and make a local copy of the `upstream/gh-pages` branch, where +`mike` maintains the current state of docs.hopsworks.ai, or just build documentation for the branch you are updating: + + Building *one* branch: + + Checkout your dev branch with modified docs: + ```bash + git checkout [dev-branch] + ``` + + Generate API docs if necessary: + ```bash + python auto_doc.py + ``` + + Build docs with a version and alias + ```bash + mike deploy [version] [alias] --update-alias + + # for example, if you are updating documentation to be merged to master, + # which will become the new SNAPSHOT version: + mike deploy 2.2.0-SNAPSHOT dev --update-alias + + # if you are updating docs of the latest stable release branch + mike deploy [version] latest --update-alias + + # if you are updating docs of a previous stable release branch + mike deploy [version] + ``` + + If no gh-pages branch existed in your local repository, this will have created it. + + **Important**: If no previous docs were built, you will have to choose a version as default to be loaded as index, as follows + + ```bash + mike set-default [version-or-alias] + ``` + + You can now checkout the gh-pages branch and serve: + ```bash + git checkout gh-pages + mike serve + ``` + + You can also list all available versions/aliases: + ```bash + mike list + ``` + + Delete and reset your local gh-pages branch: + ```bash + mike delete --all + + # or delete single version + mike delete [version-or-alias] + ``` + +#### Adding new API documentation + +To add new documentation for APIs, you need to add information about the method/class to document to the `auto_doc.py` script: + +```python +PAGES = { + "connection.md": [ + "hsml.connection.Connection.connection", + "hsml.connection.Connection.setup_databricks", + ] + "new_template.md": [ + "module", + "xyz.asd" + ] +} +``` + +Now you can add a template markdown file to the `docs/templates` directory with the name you specified in the auto-doc script. The `new_template.md` file should contain a tag to identify the place at which the API documentation should be inserted: + +``` +## The XYZ package + +{{module}} + +Some extra content here. + +!!! example + ```python + import xyz + ``` + +{{xyz.asd}} +``` + +Finally, run the `auto_doc.py` script, as decribed above, to update the documentation. + +For information about Markdown syntax and possible Admonitions/Highlighting etc. see +the [Material for Mkdocs themes reference documentation](https://squidfunk.github.io/mkdocs-material/reference/abbreviations/). diff --git a/hsml/Dockerfile b/hsml/Dockerfile new file mode 100644 index 000000000..7f87ca293 --- /dev/null +++ b/hsml/Dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:20.04 + +RUN apt-get update && \ + apt-get install -y python3-pip git && apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN pip3 install twine + +RUN mkdir -p /.local && chmod -R 777 /.local diff --git a/hsml/Jenkinsfile b/hsml/Jenkinsfile new file mode 100644 index 000000000..d2014d5cb --- /dev/null +++ b/hsml/Jenkinsfile @@ -0,0 +1,23 @@ +pipeline { + agent { + docker { + label "local" + image "docker.hops.works/hopsworks_twine:0.0.1" + } + } + stages { + stage("publish") { + environment { + PYPI = credentials('977daeb0-e1c8-43a0-b35a-fc37bb9eee9b') + } + steps { + dir("python") { + sh "rm -f LICENSE README.md" + sh "cp -f ../LICENSE ../README.md ./" + sh "python3 -m build" + sh "twine upload -u $PYPI_USR -p $PYPI_PSW --skip-existing dist/*" + } + } + } + } +} diff --git a/hsml/LICENSE b/hsml/LICENSE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/hsml/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/hsml/README.md b/hsml/README.md new file mode 100644 index 000000000..ee835ddc7 --- /dev/null +++ b/hsml/README.md @@ -0,0 +1,141 @@ +# Hopsworks Model Management + +

+ Hopsworks Community + Hopsworks Model Management Documentation + python + PyPiStatus + Scala/Java Artifacts + Downloads + Ruff + License +

+ +HSML is the library to interact with the Hopsworks Model Registry and Model Serving. The library makes it easy to export, manage and deploy models. + +However, to connect from an external Python environment additional connection information, such as host and port, is required. + +## Getting Started On Hopsworks + +Get started easily by registering an account on [Hopsworks Serverless](https://app.hopsworks.ai/). Create your project and a [new Api key](https://docs.hopsworks.ai/latest/user_guides/projects/api_key/create_api_key/). In a new python environment with Python 3.8 or higher, install the [client library](https://docs.hopsworks.ai/latest/user_guides/client_installation/) using pip: + +```bash +# Get all Hopsworks SDKs: Feature Store, Model Serving and Platform SDK +pip install hopsworks +# or just the Model Registry and Model Serving SDK +pip install hsml +``` + +You can start a notebook and instantiate a connection and get the project feature store handler. + +```python +import hopsworks + +project = hopsworks.login() # you will be prompted for your api key + +mr = project.get_model_registry() +# or +ms = project.get_model_serving() +``` + +or using `hsml` directly: + +```python +import hsml + +connection = hsml.connection( + host="c.app.hopsworks.ai", # + project="your-project", + api_key_value="your-api-key", +) + +mr = connection.get_model_registry() +# or +ms = connection.get_model_serving() +``` + +Create a new model +```python +model = mr.tensorflow.create_model(name="mnist", + version=1, + metrics={"accuracy": 0.94}, + description="mnist model description") +model.save("/tmp/model_directory") # or /tmp/model_file +``` + +Download a model +```python +model = mr.get_model("mnist", version=1) + +model_path = model.download() +``` + +Delete a model +```python +model.delete() +``` + +Get best performing model +```python +best_model = mr.get_best_model('mnist', 'accuracy', 'max') + +``` + +Deploy a model +```python +deployment = model.deploy() +``` + +Start a deployment +```python +deployment.start() +``` + +Make predictions with a deployed model +```python +data = { "instances": [ model.input_example ] } + +predictions = deployment.predict(data) +``` + +# Tutorials + +You can find more examples on how to use the library in our [tutorials](https://github.com/logicalclocks/hopsworks-tutorials). + +## Documentation + +Documentation is available at [Hopsworks Model Management Documentation](https://docs.hopsworks.ai/). + +## Issues + +For general questions about the usage of Hopsworks Machine Learning please open a topic on [Hopsworks Community](https://community.hopsworks.ai/). +Please report any issue using [Github issue tracking](https://github.com/logicalclocks/machine-learning-api/issues). + + +## Contributing + +If you would like to contribute to this library, please see the [Contribution Guidelines](CONTRIBUTING.md). diff --git a/hsml/auto_doc.py b/hsml/auto_doc.py new file mode 100644 index 000000000..4c7ae26ee --- /dev/null +++ b/hsml/auto_doc.py @@ -0,0 +1,210 @@ +# +# Copyright 2021 Logical Clocks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pathlib +import shutil +import os +import keras_autodoc + +JSON_METHODS = [ + "extract_fields_from_json", + "from_json", + "from_response_json", + "json", + "update_from_response_json", +] + +PAGES = { + # Model registry + "connection_api.md": { + "connection": ["hsml.connection.Connection"], + "connection_properties": keras_autodoc.get_properties( + "hsml.connection.Connection", exclude=["trust_store_path"] + ), + "connection_methods": keras_autodoc.get_methods("hsml.connection.Connection"), + }, + "model-registry/model_registry_api.md": { + "mr_get": ["hsml.connection.Connection.get_model_registry"], + "mr_modules": keras_autodoc.get_properties( + "hsml.model_registry.ModelRegistry", + exclude=[ + "project_id", + "project_name", + "model_registry_id", + "shared_registry_project_name", + ], + ), + "mr_properties": keras_autodoc.get_properties( + "hsml.model_registry.ModelRegistry", + exclude=[ + "python", + "sklearn", + "tensorflow", + "torch", + ], + ), + "mr_methods": keras_autodoc.get_methods( + "hsml.model_registry.ModelRegistry", exclude=["from_response_json"] + ), + }, + "model-registry/model_api.md": { + "ml_create_tf": ["hsml.model_registry.ModelRegistry.tensorflow.create_model"], + "ml_create_th": ["hsml.model_registry.ModelRegistry.torch.create_model"], + "ml_create_sl": ["hsml.model_registry.ModelRegistry.sklearn.create_model"], + "ml_create_py": ["hsml.model_registry.ModelRegistry.python.create_model"], + "ml_get": ["hsml.model_registry.ModelRegistry.get_model"], + "ml_properties": keras_autodoc.get_properties("hsml.model.Model"), + "ml_methods": keras_autodoc.get_methods( + "hsml.model.Model", + exclude=[ + "from_response_json", + "json", + "to_dict", + "update_from_response_json", + ], + ), + }, + "model-registry/model_schema.md": {}, + "model-registry/model_schema_api.md": { + "schema": ["hsml.schema.Schema"], + "schema_dict": ["hsml.schema.Schema.to_dict"], + "model_schema": ["hsml.model_schema.ModelSchema"], + "model_schema_dict": ["hsml.model_schema.ModelSchema.to_dict"], + }, + "model-registry/links.md": { + "links_properties": keras_autodoc.get_properties( + "hsml.core.explicit_provenance.Links" + ), + "artifact_properties": keras_autodoc.get_properties( + "hsml.core.explicit_provenance.Artifact" + ), + }, + # Model Serving + "model-serving/model_serving_api.md": { + "ms_get": ["hsml.connection.Connection.get_model_serving"], + "ms_properties": keras_autodoc.get_properties( + "hsml.model_serving.ModelServing" + ), + "ms_methods": keras_autodoc.get_methods( + "hsml.model_serving.ModelServing", exclude=["from_response_json"] + ), + }, + "model-serving/deployment_api.md": { + "ms_get_model_serving": ["hsml.connection.Connection.get_model_serving"], + "ms_get_deployments": [ + "hsml.model_serving.ModelServing.get_deployment", + "hsml.model_serving.ModelServing.get_deployment_by_id", + "hsml.model_serving.ModelServing.get_deployments", + ], + "ms_create_deployment": ["hsml.model_serving.ModelServing.create_deployment"], + "m_deploy": ["hsml.model.Model.deploy"], + "p_deploy": ["hsml.predictor.Predictor.deploy"], + "dep_properties": keras_autodoc.get_properties("hsml.deployment.Deployment"), + "dep_methods": keras_autodoc.get_methods( + "hsml.deployment.Deployment", exclude=JSON_METHODS + ["from_predictor"] + ), + }, + "model-serving/predictor_api.md": { + "ms_get_model_serving": ["hsml.connection.Connection.get_model_serving"], + "ms_create_predictor": ["hsml.model_serving.ModelServing.create_predictor"], + "pred_properties": keras_autodoc.get_properties("hsml.predictor.Predictor"), + "pred_methods": keras_autodoc.get_methods( + "hsml.predictor.Predictor", + exclude=JSON_METHODS + ["for_model"], + ), + }, + "model-serving/transformer_api.md": { + "ms_get_model_serving": ["hsml.connection.Connection.get_model_serving"], + "ms_create_transformer": ["hsml.model_serving.ModelServing.create_transformer"], + "trans_properties": keras_autodoc.get_properties( + "hsml.transformer.Transformer" + ), + "trans_methods": keras_autodoc.get_methods( + "hsml.transformer.Transformer", exclude=JSON_METHODS + ), + }, + "model-serving/inference_logger_api.md": { + "il": ["hsml.inference_logger.InferenceLogger"], + "il_properties": keras_autodoc.get_properties( + "hsml.inference_logger.InferenceLogger" + ), + "il_methods": keras_autodoc.get_methods( + "hsml.inference_logger.InferenceLogger", exclude=JSON_METHODS + ), + }, + "model-serving/inference_batcher_api.md": { + "ib": ["hsml.inference_batcher.InferenceBatcher"], + "ib_properties": keras_autodoc.get_properties( + "hsml.inference_batcher.InferenceBatcher" + ), + "ib_methods": keras_autodoc.get_methods( + "hsml.inference_batcher.InferenceBatcher", exclude=JSON_METHODS + ), + }, + "model-serving/resources_api.md": { + "res": ["hsml.resources.Resources"], + "res_properties": keras_autodoc.get_properties("hsml.resources.Resources"), + "res_methods": keras_autodoc.get_methods( + "hsml.resources.Resources", exclude=JSON_METHODS + ), + }, + "model-serving/predictor_state_api.md": { + "ps_get": ["hsml.deployment.Deployment.get_state"], + "ps_properties": keras_autodoc.get_properties( + "hsml.predictor_state.PredictorState" + ), + "ps_methods": keras_autodoc.get_methods( + "hsml.predictor_state.PredictorState", exclude=JSON_METHODS + ), + }, + "model-serving/predictor_state_condition_api.md": { + "psc_get": ["hsml.predictor_state.PredictorState.condition"], + "psc_properties": keras_autodoc.get_properties( + "hsml.predictor_state_condition.PredictorStateCondition" + ), + "psc_methods": keras_autodoc.get_methods( + "hsml.predictor_state_condition.PredictorStateCondition", + exclude=JSON_METHODS, + ), + }, +} + +hsml_dir = pathlib.Path(__file__).resolve().parents[0] +if "GITHUB_SHA" in os.environ: + commit_sha = os.environ["GITHUB_SHA"] + project_url = f"https://github.com/logicalclocks/machine-learning-api/tree/{commit_sha}/python" +else: + branch_name = os.environ.get("GITHUB_BASE_REF", "master") + project_url = f"https://github.com/logicalclocks/machine-learning-api/blob/{branch_name}/python" + + +def generate(dest_dir): + doc_generator = keras_autodoc.DocumentationGenerator( + PAGES, + project_url=project_url, + template_dir="./docs/templates", + titles_size="###", + extra_aliases={}, + max_signature_line_length=100, + ) + shutil.copyfile(hsml_dir / "CONTRIBUTING.md", dest_dir / "CONTRIBUTING.md") + shutil.copyfile(hsml_dir / "README.md", dest_dir / "index.md") + + doc_generator.generate(dest_dir / "generated") + + +if __name__ == "__main__": + generate(hsml_dir / "docs") diff --git a/hsml/docs/CONTRIBUTING.md b/hsml/docs/CONTRIBUTING.md new file mode 100644 index 000000000..b287467c6 --- /dev/null +++ b/hsml/docs/CONTRIBUTING.md @@ -0,0 +1,215 @@ +## Python development setup +--- + +- Fork and clone the repository + +- Create a new Python environment with your favourite environment manager, e.g. virtualenv or conda + +- Install repository in editable mode with development dependencies: + + ```bash + cd python + pip install -e ".[dev]" + ``` + +- Install [pre-commit](https://pre-commit.com/) and then activate its hooks. pre-commit is a framework for managing and maintaining multi-language pre-commit hooks. The Model Registry uses pre-commit to ensure code-style and code formatting through [ruff](https://docs.astral.sh/ruff/). Run the following commands from the `python` directory: + + ```bash + cd python + pip install --user pre-commit + pre-commit install + ``` + + Afterwards, pre-commit will run whenever you commit. + +- To run formatting and code-style separately, you can configure your IDE, such as VSCode, to use [ruff](https://docs.astral.sh/ruff/tutorial/#getting-started): + + ```bash + cd python + ruff check --fix + ruff format + ``` + +### Python documentation + +We follow a few best practices for writing the Python documentation: + +1. Use the google docstring style: + + ```python + """[One Line Summary] + + [Extended Summary] + + [!!! example + import xyz + ] + + # Arguments + arg1: Type[, optional]. Description[, defaults to `default`] + arg2: Type[, optional]. Description[, defaults to `default`] + + # Returns + Type. Description. + + # Raises + Exception. Description. + """ + ``` + + If Python 3 type annotations are used, they are inserted automatically. + + +2. Model registry entity engine methods (e.g. ModelEngine etc.) only require a single line docstring. +3. REST Api implementations (e.g. ModelApi etc.) should be fully documented with docstrings without defaults. +4. Public Api such as metadata objects should be fully documented with defaults. + +#### Setup and Build Documentation + +We use `mkdocs` together with `mike` ([for versioning](https://github.com/jimporter/mike/)) to build the documentation and a plugin called `keras-autodoc` to auto generate Python API documentation from docstrings. + +**Background about `mike`:** + `mike` builds the documentation and commits it as a new directory to the gh-pages branch. Each directory corresponds to one version of the documentation. Additionally, `mike` maintains a json in the root of gh-pages with the mappings of versions/aliases for each of the directories available. With aliases you can define extra names like `dev` or `latest`, to indicate stable and unstable releases. + +1. Currently we are using our own version of `keras-autodoc` + + ```bash + pip install git+https://github.com/logicalclocks/keras-autodoc + ``` + +2. Install HSML with `docs` extras: + + ```bash + pip install -e .[dev,docs] + ``` + +3. To build the docs, first run the auto doc script: + + ```bash + cd .. + python auto_doc.py + ``` + +##### Option 1: Build only current version of docs + +4. Either build the docs, or serve them dynamically: + + Note: Links and pictures might not resolve properly later on when checking with this build. + The reason for that is that the docs are deployed with versioning on docs.hopsworks.ai and + therefore another level is added to all paths, e.g. `docs.hopsworks.ai/[version-or-alias]`. + Using relative links should not be affected by this, however, building the docs with version + (Option 2) is recommended. + + ```bash + mkdocs build + # or + mkdocs serve + ``` + +##### Option 2 (Preferred): Build multi-version doc with `mike` + +###### Versioning on docs.hopsworks.ai + +On docs.hopsworks.ai we implement the following versioning scheme: + +- current master branches (e.g. of hsml corresponding to master of Hopsworks): rendered as current Hopsworks snapshot version, e.g. **2.2.0-SNAPSHOT [dev]**, where `dev` is an alias to indicate that this is an unstable version. +- the latest release: rendered with full current version, e.g. **2.1.5 [latest]** with `latest` alias to indicate that this is the latest stable release. +- previous stable releases: rendered without alias, e.g. **2.1.4**. + +###### Build Instructions + +4. For this you can either checkout and make a local copy of the `upstream/gh-pages` branch, where +`mike` maintains the current state of docs.hopsworks.ai, or just build documentation for the branch you are updating: + + Building *one* branch: + + Checkout your dev branch with modified docs: + ```bash + git checkout [dev-branch] + ``` + + Generate API docs if necessary: + ```bash + python auto_doc.py + ``` + + Build docs with a version and alias + ```bash + mike deploy [version] [alias] --update-alias + + # for example, if you are updating documentation to be merged to master, + # which will become the new SNAPSHOT version: + mike deploy 2.2.0-SNAPSHOT dev --update-alias + + # if you are updating docs of the latest stable release branch + mike deploy [version] latest --update-alias + + # if you are updating docs of a previous stable release branch + mike deploy [version] + ``` + + If no gh-pages branch existed in your local repository, this will have created it. + + **Important**: If no previous docs were built, you will have to choose a version as default to be loaded as index, as follows + + ```bash + mike set-default [version-or-alias] + ``` + + You can now checkout the gh-pages branch and serve: + ```bash + git checkout gh-pages + mike serve + ``` + + You can also list all available versions/aliases: + ```bash + mike list + ``` + + Delete and reset your local gh-pages branch: + ```bash + mike delete --all + + # or delete single version + mike delete [version-or-alias] + ``` + +#### Adding new API documentation + +To add new documentation for APIs, you need to add information about the method/class to document to the `auto_doc.py` script: + +```python +PAGES = { + "connection.md": [ + "hsml.connection.Connection.connection", + "hsml.connection.Connection.setup_databricks", + ] + "new_template.md": [ + "module", + "xyz.asd" + ] +} +``` + +Now you can add a template markdown file to the `docs/templates` directory with the name you specified in the auto-doc script. The `new_template.md` file should contain a tag to identify the place at which the API documentation should be inserted: + +``` +## The XYZ package + +{{module}} + +Some extra content here. + +!!! example + ```python + import xyz + ``` + +{{xyz.asd}} +``` + +Finally, run the `auto_doc.py` script, as decribed above, to update the documentation. + +For information about Markdown syntax and possible Admonitions/Highlighting etc. see +the [Material for Mkdocs themes reference documentation](https://squidfunk.github.io/mkdocs-material/reference/abbreviations/). diff --git a/hsml/docs/assets/images/favicon.ico b/hsml/docs/assets/images/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..ab757306798d8da0cea9ca008ac05cd5091eff1a GIT binary patch literal 2699 zcmb_e3se(V8jfIHc?nn*RJ79pw1AV$Ofq>)i3)^Atl?#-u~uX>Y4kGiR{>X1m?N-aL#6>&zU^vKO4!v?J2J z$2goBEn20@qEf8JNk-2lC`L_lZF(b%=5Qic*o*|3O%YjH6&JyTp3jx^lI)fRvNx?B*oNYV1c_1(bv1CiZ zC}%)GrAP*17!wT$xiCb+FbaqyTtq0~W9Tvfg;AIXBRoC~AsCLqIA07*{6IFEiPGSy zvbc#@?2{DCvRI5bk7u=7xmG@xF=g@)iA2JKQ67pyECMp;87zbiGMIxW8DzAXG--_% zEn@(jjD(uWu}DEy>39lyql?yHo+uMrFrJMt@(?cUOlb^AkuIDu$D|tzPLVuXN9$>W z#mr(67uJ}?SQv8_^8{+w@RR^-X%z}rjK^xx>s=wtme^c2jfsLh7HwAM8EIZBZDw*z zBpsW}$_#d9W5i=jG+|*(N`}!*E>!ZQWB_F|0iIi{HBgMz9QMcrT1HrCDd=1^2o^xF zM2QG+fdofH5ip9w@HkY#P+CpiR49r%p@;~_rb1cEP=tl}RWL>38pfn2*o3uuB9rDB z4VfU|DkC1l=ok|lm{rGrv|k<*lWbx%S{*xJPK}QSI%%t5D!_gV{nD zNLntFf^2cPS}lcR5)n#kNEG73s1Op70vHlY#3D$9i3p(@Lr4iBnw&3V$Q-8yob$(b z1I3Um$ItQ*kqA@sDLy11UX ziV=K5OhXu~Mj;pzNFcFDOh7QDK_mhpfg*^|Y5OrJ;R#wZyT*AF8)*&w%0x@21;)&S z6QpxfNI}x+2ATpVy0wp~$8U-}G2fa+v!uV^lJV$fMq{xOCORsUt=+G>koP3wPlp?c$ZXK4M#t4HUqZe105YgPfj z-g{}u{L|Yy2Uj;vw+}AqYJE=l?lZop64Zeeb3u46M3A`aJ%t>ujl<`6oIMp+yt^nUptAEm zh^#o7^<6+s+Lew2p+Rpxy=ALs%h`;<#=-q(ebRbAqy~;68oj)#@1-yH^(Pl}ua}Md z310Wsj7ueUnJ#|2&rt1ajpnNS{W^c*azc9bKS%db0m@TfDMCJ3`RO~_j~CwnhNds? zNXz-jvg5>$Gm^TcDf*mVG$rs}ud#pL>?Y9XV0y>HxVyt$l|HW@4D2XgkVTv@!lQ8?>o`iF4 z(TBh7al=}kM%}DEr3(s|`}f(4zi4ug{n!4ws;aL)HnfFFT0aXovUxV@_WnKZ{#C7S zc5km*-ne@M-Jt28^Wnp|PgS9B{3GvX-3L^_JxM~yKtP|J*)+V!w*=p{uu4^6xLbL& z{UEjQRS)`(XJK?pbsNv8{onB!NsHXpm5-j@SrL0Vn*Uz&rlRBv)#@LYgx|83`2KPA zM`zwHQ$)w!yfIV%m9{QJ`%BF?61%-e-LjvmZ?$UCTIc7Y NJT_5wGCE`1e*yWD-b4TZ literal 0 HcmV?d00001 diff --git a/hsml/docs/assets/images/hops-logo.png b/hsml/docs/assets/images/hops-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..d3625ae07da68430e3bd5f46a1a7baf88dd474cc GIT binary patch literal 6356 zcmb`M*H;tH^Y;T%LX#x)4nb-Vq>3~l2BZ@ZFcd*UmEHtJlom=tl}-dC6e-e+5S1pO zgY-~Dsx+m86oHQq-@o9w`CZJ}b7tQ&XJ>A9=e1a*@qH$GZh8O!z@(>(FarQ6gsw~i zNOR?xGfoSmU+Q-MS^0Er4r8CEI?2Y}cRKtMqRkV4ZOBm{`1hf)DdwJ-od z$yg8oLH&OwTD^b$?(-r~yfgaC<;1~5bw#84)X>*IG3nG$Du--t^f*^_Q(n@RHD=Qk z;r4IVN9i!~(UFgIU_8nb5%6}Wr-B&)6osnPQ~iM1A4PM5Hxe14=3%>A*y8G`T^vY8XIGgU{= zlMnB86hh`|)w1-1PQ8K1x{x%^(Td}11ZuGUTgzn)`=`~t5e$6=KUiQY0&}$i9e?`Y z+EqZ&+SOqqY5cji&CPZ%{Ds)JjJ9c)6+wgsl!Pjs6XRpp~M;Ro==k+rp zO0GlwC503rU{TLXDe~pVic3x22$8hwm1-h=5$Vi|+B7KmW=(2vzQt8ArAGg?lHlJ> zT9_0EP{HR}W(2jZetWRldAKHK%+b62pY6F;Y1pfr4byKyjUX2Nw^-Yv35N9H;YMR0 z5=8$kq1Exxd3b5qg4<0G1ZK=djD0loe@QOP3KcQH)lRtKaqIClQ{{(^bYNM+MOLr2 zzPjnm+$5x?`>I)DW2Y9#1nnzf-#6WrmArHVHaJE#<8ChF=vQ@EX2BzQg!uJbU(;>g zV&_c8HW@rR3J9h*fT4Yr>sE~G>>YTx#n5cE{;h^MRwzQ8*8q&qEPpoz@!^LdWa#+P zw0z+h;F*hLN0)y^>il$tEc?)I)E9C6*xtmmLbj=m4{+eI<(L3kHeuCtuKxF6BF~p^ z7J8kdHJFQ<5ASI-_CPybD zYIDXbU0GncY*K=g6!O$q7uAVP&YK8sXzg|Jdm0^dkoWNV0?XHZp%SC&*yP0T2ya6H zOH+r~QN{srSRK8&Fnm3=9FToFyM=w5ApoN@7h?|JJnYp9+eU>M zeL9`GeIdk#lRhXv+z#Nr;kN+vtf>n(5C>`rE;zpZZ7#rvY-}s#& z1RvHTfJSdKLv?tD+e*x3hu=8tk3a%a4WOSsUJkxHF<5;AG)qjvvU`xsu*2S@*i9O*phhG=!7)CH z`5ocLMxai})N(Du8{F`rUCKGe}1Pfub?(-_^2;?~}vx$VKq4em#mTCO6R zqTN^sTwc?z3X>mE@E4#-?s7fMtnI^s8`1AiZ)IDvD(jiMmx_X|k!;&k3933^SB++V zx)7zt3A$%viGY=}rNN+l7pc1i6HJq6IuTQ5QvjaggcNxotZ!n)1Xbv|>7#S>%X+V0 zO^3Id23#O2eqAG`I9x!QS3&B7#3PI|N^4XTb8w^C!15FDO$zOZFV|lRvpV3AUf(qo zc!FEf3X}4qo+?>cYXx468%f*olsUDQV0yZ*Na^PA@x+vT;9{quS1AHP$H)dsz2M%$e)}lZ!e+#KcsErt~ z3i-qZe(nbB7hcdoY;NjdM2l8)>w-EoWW)1=zkGHS!GbK4RV|^vPKrZ>YG}Y_VZa0z z@5_)325%3>SbTxpo;1AJ&@!j*6S~Gl1im8&c(fh7@Kreo|DLTF1ut6N_I-NEJ9huC z@gjTmA9yVCM}>AsNP=322<#eSgUxw@#>)35N2Up3q=GggU|+?)lnRW_|CIP@YyD9^ zy{i;KZjU=hS_>nNQ)QMvX3^)Z!7N(ng9*cQA&F^Rzh1+WK6gPRuT`gy7gN3WCUv32 z_N>g%FpJ?TIs3}7gu(?zyW$3>icX^g1Il+IN>k8pRkj7#rN3^04@_a^G~OY^sba;N zouZZVR6iS6y}3S>h2qXQ=TO6c$x=U~qC8P z|IFe399BpEs{#ux!+laxnmidTM?Zgk*ByCk$mc@u0(I_*NWrJwgi?u>Jz+XZol_8? zXDRI1_PeQ38NYjai2#>&E8NE{-cXDkX?LJNH>CSvZu8@fcslY4 zxj9K2Vpy)?dB>T!$wOnbthBt{wOrDuvDIBs?{qG?i19tBB5tHz2dz`-IKBgc6jcI$ z(w>F*C$V?I=g$idcAw_On>TjGUGx&p;$Ax777=qPPunk@Y>1{Y4`ZQiWu(_0Cx6^U zWp>)et25eY>1n~i-^RlyW$!$HChRq*W1E`rg?}|DYOD4aBBYyMDg&fU0S?Z+XUe@1 zADn%xTNCJV(ej9rV%Bn5ubMh{SR00BW|tN)2S6Q%*V;YxpNWUSeBVXI5DoAz5J|Bh z-omx^fKA3%0X*2AQ_hR`zJ1ekZ*I>^y+D*2Mtk68_PIu4N21MNk)OxeWQfBJWG}s6 zB^4Vojvt(PI__QAHc)%`M=x{=iC~fLYL^QJfMXkK^aNk?BO#K@;@7X-cwHm*cKb9a zoQ2q)R2i<+Ra#WZE03)YVOR`oIliOdL4pQ5uwuz7fyKdicr<szdL=6dU?r{0=)xO$UKd?X;Wye(irh6$E*LHn)wUn#)l6I zMPcUD=dy$4#}3q$178h5AcFARbIz4dDz5KhJ3uSG7UrB0!xS+aK{8*g@55UZ-P#*0 zwwtnZoBTJ^nFb{N*a%3mmp3_UqYe+)-T_nKsCY7sMZ|WgjO31r5G9K9CDCh16CfHKGo-NU~L77Yz zo6?%gH$_=QzkF8j4IHf#dRvC`bKg*p7YnRUBphWFd`fA)ZpM z0vqSQ_795?-V^93JTKQ-dg|{NyXc?L|t~Y0WRHANS%l1V>`~qf?rmx&HKOkeq z=SFtQ*qQ8(U{7xnvF&#kSNhS~ z*>>qwf3*8U(r5_MU+HRMlOMz8INaH5_#Cfj7vk& zD_sl->rMOvgt?+JFF%v==T((yxn<`Z)kvOvU#X4zApNqb`CL!J*&Of7u*WND&i3E? z+Vh;rvs?V3`-U`zAqOYYFo+DZ?4LN`rKm$c%_i&<(Hmpv3eQ9`GFi%r9y;Ol35v4e zYuP)xqrcWMrrpN0ha9&C+Q}Q{0ike-W{0FVVrw%6bkjpOL@CxCzNZTupxnju*A$MV zrsGSeGe*=NPy{3Ac^hvB%-(7O<&(d{`Odi%e>yTz!#EQp;GW%ztoSuj#-AQHekmSJ zMFwWu8f2n$pM(-?1;y?6z}1{8a*kkyAYaR>>|1TnIYS)E{yG_HA1N~7%v(B1HWq}d zWUzaoIHsB8YipxhPye9`fhy5AGJvbfgReCGBUFxE%1{Zt(OsYNJSqukF?nn<@JDOz zrV7sGO-qYWXa_zSh_07wCE?N6{gGB0aDR)Pjxb!&n5|(oPTLfnsTQkotqtHCoVR#@k?d5x2|A<)H+#WJiBL&A3*qg4M?yG--NK>}Z1>@Oj ztSG^LNk;LvR=3KJb>@4IzOu%et%1xieoYa<450$X^>w`@lDPiF-aJ(34Ltd;H2HK6 z8NO-R?Y|Z;DxQ0;(1LV5nOaYou9<0tsOs{m395toA(8|)lz+XeOgp!gRRfKbJl%0y zBcoYQd68WgB>(E#42P|;ZyTw|T2igHe_ejUI&d>?^5B3&2vM3wrcMApq08;=rtkM$ z{u1yh4@ zy9QSIMurMoj^T?826#euo&c=nERm5wEzO1uP3!gAoH;zH^byS^-|=4hIJ~0_3W? zYG9vz`9XWkSu85aVfmg?`mcE{%v>bwm^VXPW1U>^-oqnAypP`)0SxKP`?&I@aS-{# z+RL}a^7t-|@Ue8TxWIO7EIvbf=cdNn-4B%lbzB4<#w&3ln82N>J&$ty(6CR?t|-H< zhXKRAr^f^F3V*1bhq%KD@y=Gs4Hh}9g^h(D3Zb@%9AbjErcxI)V>l4m+*77XtiM9XX* zIm1K1483b~@oYyqAFP)hh0yR5v}o%a9#spiU#pvk#$GtbXbq8XmO8od60ySKuW}?4 z2a+g4r;NR}2)EoNtF1`2Oq~sFFJstEjCQ6Qi^6-l|J7g;A82PAls-DDmW>@Foo+gm z&kL%Ubel{`Iho(9G~R9k2j(u{dr4jv_Q&I>}DFjr}+`1ZyH}a_-GPf7``Uv8&Il79(i1 zb0d0v99Pt{cV`l%#RUBs<$)vFJ;IuN)30-wGD&Kisdk3*AMfAIS%`_*o31@sdqMc{=Ff;MfoAgu zQNZlUB~ca!!$>ZM(Qa)T180|5LwacoG0HjcZ`0a}7eGO1z-tCC)B^e^|rXH_3aW}WN2IOdHSuKz4~%dkk8L_gug?Rf#-H(`iwyiv6V zMRQVeeAZQ^6*5;7boL=Bw^$S&7r8ckLVMRizEZ7T;f+7$G=3d#WwD@lHNY^^H#)Q7 zkg$c9JAERRwK`8NY}~sg%(w&E_7qw>Mf?YZe%_Hw-I~|=sCina27C5$b@c30P?a3fp2?I)eB)ee=Q&d}eGdQk<6 zixh1jMQ{`=qLgG9u5=lkikfO+5XGrvnXTLaj&P9zd-VOgxal+lag4fU@)?T$DF$=x zI6$u6mI?v#;KxZnt4Eo(`>etxdBuf}iLyxWBQRZgN9zG@f!((G*ir%^VqS0pNxmP( zN@67>cecVuVcM>FTIr{p3e4Ga=cU2H4BN31@J1UgBkJ)Zj|_{oUTgkxR;YCtkf<#T z$6M(T3-0u&mLY}{RL}An&?v|Q%yoCPzUHAqH34hN!_hw^a&9Oy{Tz)$KDBJOej!U9 z)2lqoBgT$^!m5I$KETjeB(^F`lpl#k{wJc~TbAU zAyED;5`-}Je&Z=+dLt2iO+F6rtkfTg0 z{63Ho%9>|=lc0mpRrMWp`7x49D$gkTu@Vj>QW$Kgc~lUhn)RzTnD(<-POv77V98O) zqC~L9Y!T_(Xaj}(TIA^s1S7b72I5M;AnAKuiEZ>mSY>NUZnLIqTc2f47TEVEc5vKQ zmNL+XIFDT&D(b}OJ!Miz8|v)c8aE!hC}9^?i=D)5I8s4>(un@bb>0@QI=oSpPLvA) z@oz^^=wdiMdCqZ5)lrWCTaQAyw&{!fvvlLveeT?F)MYe?jeE}@Y*Hvwqs9Q$dHjjQ zP)dGX^MH{O^SE8FH!)m>9SF*~c2!}DQbH>gkM5EO8k;;IcQGg4V#Yy1$#&Iu1^Jd^ z$vp}x;KX7gQEQy}VSxzgdSDHWfw)>R7Nk1;g!M|)U@c^XYrO%sPK}PKCAodS;+OBz zkNiwS=nLB+ev!^P#&li-92tNJSS%e;>w8YNdnHna>(+JvB8;O0!rh}^2eAgY4e;iY(wo$P z@gvrpIKc1fSu}iTY)|59sA42DQ&QPs8{=LN|5GwL+%HRm*>9Wo{PCB4}cBn2Y3WsG=7_G`viM$s%;=adr zu;-6fzy-SO)bBFSVV7uLLTa&_Zd+}gO|fmEk73LA$P5L^b->AXJ1QCBSHtN*=H+Xq zgg6apS*pcv;xd|Hf+{GuFWqX2XK}+~UYL9l4U|>DUV4U%qSD3lrBH)$!xxXU7dS1U zYDqxmpOnxv8YS{bfA6098|xTbEzDScIt0_la9BQMROrDMRiODl6koJK@qf+Z|9@nk e-)Q{e?z``UxWtE+SHBbhfS$H7q6+>v;{O44aExI9 literal 0 HcmV?d00001 diff --git a/hsml/docs/css/custom.css b/hsml/docs/css/custom.css new file mode 100644 index 000000000..5ba3208e1 --- /dev/null +++ b/hsml/docs/css/custom.css @@ -0,0 +1,115 @@ +[data-md-color-scheme="hopsworks"] { + --md-primary-fg-color: #1EB382; + --md-secondary-fg-color: #188a64; + --md-tertiary-fg-color: #0d493550; + --md-quaternary-fg-color: #fdfdfd; + --border-radius-variable: 5px; +} + +.md-footer__inner:not([hidden]) { + display: none +} + +/* Lex did stuff here */ +.svg_topnav{ + width: 12px; + filter: invert(100); +} +.svg_topnav:hover{ + width: 12px; + filter: invert(10); +} + +.md-header[data-md-state=shadow] { + box-shadow: 0 0 0 0; +} + +.md-tabs__item { + min-width: 2.25rem; +} + +.md-tabs__item:hover { + background-color: var(--md-tertiary-fg-color); + transition: background-color 450ms; + +} + +/* +.md-sidebar__scrollwrap{ + background-color: var(--md-quaternary-fg-color); + padding: 15px 5px 5px 5px; + border-radius: var(--border-radius-variable); +} +*/ +.md-nav__link:focus{ +} + +.image_logo_02{ + width:450px; +} + +/* End of Lex did stuff here */ + +.md-header__button.md-logo { + margin: .1rem; + padding: .1rem; +} + +.md-header__button.md-logo img, .md-header__button.md-logo svg { + display: block; + width: 1.8rem; + height: 1.8rem; + fill: currentColor; +} + +.md-tabs { + width: 100%; + overflow: auto; + color: var(--md-primary-bg-color); + background-color: var(--md-secondary-fg-color); + transition: background-color 250ms; +} + + +.wrapper { + display: grid; + grid-template-columns: repeat(4, 1fr); + gap: 10px; + grid-auto-rows: minmax(100px, auto); +} + +.wrapper * { + border: 2px solid green; + text-align: center; + padding: 70px 0; +} + +.one { + grid-column: 1 / 2; + grid-row: 1; +} +.two { + grid-column: 2 / 3; + grid-row: 1; +} +.three { + grid-column: 3 / 4; + grid-row: 1; +} +.four { + grid-column: 4 / 5; + grid-row: 1; +} +.five { + grid-column: 1 / 3; + grid-row: 2; +} +.six { + grid-column: 3 / 5; + grid-row: 2; +} + +/* Jupyter Stuff */ +.jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt { + display: none !important; +} diff --git a/hsml/docs/css/dropdown.css b/hsml/docs/css/dropdown.css new file mode 100644 index 000000000..886858909 --- /dev/null +++ b/hsml/docs/css/dropdown.css @@ -0,0 +1,55 @@ +/* Style The Dropdown Button */ +.dropbtn { + color: white; + border: none; + cursor: pointer; +} + +.md-tabs__list { + contain: inherit; +} +.md-tabs { + overflow: inherit; +} +.md-header { + z-index: 1000 !important; +} + +/* The container
- needed to position the dropdown content */ +.dropdown { + position: absolute; + display: inline-block; +} + +/* Dropdown Content (Hidden by Default) */ +.dropdown-content { + display:none; + font-size: 13px; + position: absolute; + background-color: #f9f9f9; + min-width: 160px; + box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2); + z-index: 1000; + border-radius: 2px; + left:-15px; +} + +/* Links inside the dropdown */ +.dropdown-content a { + color: black; + padding: 12px 16px; + text-decoration: none; + display: block; +} + +/* Change color of dropdown links on hover */ +.dropdown-content a:hover {background-color: #f1f1f1} + +/* Show the dropdown menu on hover */ +.dropdown:hover .dropdown-content { + display: block; +} + +/* Change the background color of the dropdown button when the dropdown content is shown */ +.dropdown:hover .dropbtn { +} diff --git a/hsml/docs/css/marctech.css b/hsml/docs/css/marctech.css new file mode 100644 index 000000000..8bb58c97b --- /dev/null +++ b/hsml/docs/css/marctech.css @@ -0,0 +1,1047 @@ +:root { + --md-primary-fg-color: #1EB382; + --md-secondary-fg-color: #188a64; + --md-tertiary-fg-color: #0d493550; + --md-quaternary-fg-color: #fdfdfd; + --md-fiftuary-fg-color: #2471cf; + --border-radius-variable: 5px; + --border-width:1px; + } + + .marctech_main a{ + color: var(--md-fiftuary-fg-color); + border-bottom: 1px dotted var(--md-fiftuary-fg-color) !important; + text-decoration: dotted !important;} + + .marctech_main a:hover{ + border-bottom: 1px dotted var(--md-primary-fg-color)!important; + } + + .marctech_main a:visited{ + color: var(--md-tertiary-fg-color); + border-bottom: 1px dotted var(--md-tertiary-fg-color) !important; + + } + + .w-layout-grid { + display: -ms-grid; + display: grid; + grid-auto-columns: 1fr; + -ms-grid-columns: 1fr 1fr; + grid-template-columns: 1fr 1fr; + -ms-grid-rows: auto auto; + grid-template-rows: auto auto; + grid-row-gap: 16px; + grid-column-gap: 16px; + } + + .image_logo{ + width: 69%; + background-color: white; + z-index: 50; + padding: 0px 15px 0px 15px; + margin-bottom: 10px; + } + + .layer_02{ + pointer-events: none; + } + + .round-frame{ + pointer-events: initial; + } + + .marctech_main { + margin-top:-20px; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + margin-bottom: 55px; + } + + .collumns { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + height: 100%; + -webkit-box-align: stretch; + -webkit-align-items: stretch; + -ms-flex-align: stretch; + align-items: stretch; + } + + .col_heading { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + } + + .enterprisefs { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + } + + .enterprise_ai { + -webkit-align-self: center; + -ms-flex-item-align: center; + -ms-grid-row-align: center; + align-self: center; + -webkit-box-flex: 1; + -webkit-flex: 1; + -ms-flex: 1; + flex: 1; + } + + .side-content { + z-index: 0; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + width: 240px; + height: 100%; + margin-top: 10px; + margin-bottom: 10px; + padding: 20px 10px; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + -webkit-align-content: flex-start; + -ms-flex-line-pack: start; + align-content: flex-start; + border-style: solid; + border-width: var(--border-width); + border-color: #585858; + border-radius: 10px; + background-color:var(--md-quaternary-fg-color); + } + .body { + padding: 40px; + font-family: Roboto, sans-serif; + } + + .green { + color: #1eb182; + font-size: 1.2vw; + } + + .rec_frame { + position: relative; + z-index: 1; + display: inline-block; + min-width: 150px; + margin-top: 10px; + margin-right: 10px; + margin-left: 10px; + padding: 10px 10px; + border-style: solid; + border-width: var(--border-width); + border-color: #585858; + border-radius: 10px; + background-color: #fff; + box-shadow: 4px 4px 0 0 rgba(88, 88, 88, 0.16); + -webkit-transition: box-shadow 200ms ease, border-color 200ms ease; + transition: box-shadow 200ms ease, border-color 200ms ease; + color: #585858; + text-align: center; + cursor: pointer; + } + + .rec_frame:hover { + border-color: #c2c2c2; + box-shadow: none; + } + + .name_item { + font-size: 0.7rem; + line-height: 120%; + font-weight: 700; + } + + .name_item.db { + position: relative; + z-index: 3; + text-align: left; + } + + .name_item.small { + font-size: 0.6rem; + font-weight: 500; + } + + .name_item.ingrey { + padding-bottom: 20px; + } + + .db_frame-mid { + position: relative; + z-index: 1; + margin-top: -8px; + padding: 5px 2px; + border-style: solid; + border-width: var(--border-width); + border-color: #585858; + border-radius: 0px 0% 50% 50%; + background-color: #fff; + color: #585858; + text-align: center; + } + + .db_frame-top { + position: relative; + z-index: 2; + padding: 5px 2px; + border-style: solid; + border-width: var(--border-width); + border-color: #585858; + border-radius: 50%; + background-color: #fff; + color: #585858; + text-align: center; + } + + .icondb { + position: relative; + width: 25px; + min-width: 25px; + margin-right: 10px; + } + + .db_frame { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + width: 150px; + height: 55px; + padding: 20px 10px; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + border-style: solid; + border-width: var(--border-width); + border-color: #585858; + border-radius: 10px; + background-color: #fff; + box-shadow: 4px 4px 0 0 rgba(88, 88, 88, 0.16); + -webkit-transition: box-shadow 200ms ease, border-color 200ms ease; + transition: box-shadow 200ms ease, border-color 200ms ease; + color: #585858; + text-align: center; + cursor: pointer; + } + + .db_frame:hover { + border-color: #c2c2c2; + box-shadow: none; + } + + .grid { + -ms-grid-rows: auto auto auto; + grid-template-rows: auto auto auto; + } + + .arrowdown { + position: relative; + z-index: 0; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + margin-top: -10px; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + } + + .heading_MT { + margin-top: 0px !important; + margin-bottom: 0px !important; + font-size: 1.3rem !important; + white-space: nowrap !important; + } + + .head_col { + padding-left: 10px; + } + + .MT_heading3 { + margin-top: 0px !important ; + font-size: 0.8rem !important; + } + + .MT_heading3.green { + color: #1eb182 !important; + } + + .column_sides { + position: relative; + z-index: 2; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: justify; + -webkit-justify-content: space-between; + -ms-flex-pack: justify; + justify-content: space-between; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + } + + .hopsicon { + width: 45px; + height: 45px; + } + + .column_center { + z-index: 10; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + } + + .center-content { + z-index: -50; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + width: 750px; + height: 670px; + margin-top: 10px; + margin-bottom: 10px; + padding: 20px 10px; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + -webkit-align-content: center; + -ms-flex-line-pack: center; + align-content: center; + border-radius: 10px; + background-color: transparent; + } + + .image { + width: 260px; + } + + .layer_01 { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + -webkit-box-align: stretch; + -webkit-align-items: stretch; + -ms-flex-align: stretch; + align-items: stretch; + } + + .name_center { + font-size: 1rem; + font-weight: 700; + } + + .rec_frame_main { + position: relative; + z-index: 1; + margin-top: 10px; + margin-right: 10px; + margin-left: 10px; + padding: 5px 10px; + border-style: solid; + border-width: var(--border-width); + border-color: #1eb182; + border-radius: 10px; + background-color: #e6fdf6; + box-shadow: 4px 4px 0 0 #dcf7ee; + -webkit-transition: box-shadow 200ms ease, border-color 200ms ease; + transition: box-shadow 200ms ease, border-color 200ms ease; + color: #1eb182; + text-align: center; + cursor: pointer; + } + + .rec_frame_main:hover { + border-color: #9fecd4; + box-shadow: none; + } + + .rec_frame_main.no_content { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + height: 100%; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + box-shadow: 4px 4px 0 0 #dcf7ee; + } + + .rec_frame_main.no_content:hover { + border-color: #1eb182; + box-shadow: 4px 4px 0 0 rgba(88, 88, 88, 0.16); + } + + .name_item_02 { + font-size: 0.85rem; + font-weight: 700; + } + + .grid-infra { + padding-top: 20px; + -ms-grid-columns: 1fr 1fr 1fr 1fr; + grid-template-columns: 1fr 1fr 1fr 1fr; + -ms-grid-rows: auto; + grid-template-rows: auto; + } + + .rec_frame_main-white { + position: relative; + z-index: 1; + display: inline-block; + width: 100%; + margin-top: 10px; + margin-bottom: 10px; + padding: 5px 10px; + border-style: solid; + border-width: var(--border-width); + border-color: #1eb182; + border-radius: 10px; + background-color: #fff; + box-shadow: 4px 4px 0 0 rgba(88, 88, 88, 0.16); + -webkit-transition: box-shadow 200ms ease, border-color 200ms ease; + transition: box-shadow 200ms ease, border-color 200ms ease; + color: #1eb182; + text-align: center; + cursor: pointer; + } + + .rec_frame_main-white:hover { + border-color: #c2c2c2; + box-shadow: none; + } + + .rec_frame_main-white.dotted { + border-style: dotted; + } + + .column { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: justify; + -webkit-justify-content: space-between; + -ms-flex-pack: justify; + justify-content: space-between; + -webkit-box-align: stretch; + -webkit-align-items: stretch; + -ms-flex-align: stretch; + align-items: stretch; + } + + .columns_center { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-orient: horizontal; + -webkit-box-direction: normal; + -webkit-flex-direction: row; + -ms-flex-direction: row; + flex-direction: row; + -webkit-box-pack: justify; + -webkit-justify-content: space-between; + -ms-flex-pack: justify; + justify-content: space-between; + } + + .non-bold { + font-weight: 400; + } + + .logo-holder { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + } + + .infra { + text-align: center; + position: relative; + z-index: 30; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + padding: 10px; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + border: 1px dashed #000; + border-radius: 6px; + background-color: #fff; + cursor: pointer; + } + + .infra:hover { + border-style: solid; + border-color: #585858; + } + + .text_and_icon { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + } + + .svg_icon { + width: 33px; + margin-right: 10px; + margin-left: 10px; + } + + .layer_02 { + position: absolute; + z-index: 10; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + width: 96%; + height: 90%; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + -webkit-box-align: stretch; + -webkit-align-items: stretch; + -ms-flex-align: stretch; + align-items: stretch; + border-style: solid; + border-width: calc (var(--border-width)*2); + border-color: #bbbbbb50 ; + border-radius: 100%; + background-color: transparent; + } + + .round-frame { + position: absolute; + left: 0%; + top: auto; + right: auto; + bottom: 0%; + z-index: 10; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + width: 120px; + height: 120px; + margin: 10px; + padding: 20px; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + -webkit-box-align: center; + -webkit-align-items: center; + -ms-flex-align: center; + align-items: center; + border-style: solid; + border-width: var(--border-width); + border-color: #585858; + border-radius: 100%; + background-color: #fff; + outline-color: #fff; + outline-offset: 0px; + outline-style: solid; + outline-width: 7px; + -webkit-transition: box-shadow 200ms ease, border-color 200ms ease; + transition: box-shadow 200ms ease, border-color 200ms ease; + color: #585858; + text-align: center; + cursor: pointer; + } + + .round-frame:hover { + border-color: #c2c2c2; + box-shadow: none; + } + + .round-frame.top-left { + left: 4%; + top: 15%; + right: auto; + bottom: auto; + } + + .round-frame.bottom-left { + left: 4%; + bottom: 15%; + } + + .round-frame.top-right { + left: auto; + top: 15%; + right: 4%; + bottom: auto; + } + + .round-frame.bottom-right { + left: auto; + top: auto; + right: 4%; + bottom: 15%; + padding: 10px; + } + + .side-holder { + z-index: -1; + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + height: 630px; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: center; + -webkit-justify-content: center; + -ms-flex-pack: center; + justify-content: center; + } + + .infra-icon { + width: 25px; + height: 25px; + } + + .div-block { + display: -webkit-box; + display: -webkit-flex; + display: -ms-flexbox; + display: flex; + height: 100%; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -webkit-flex-direction: column; + -ms-flex-direction: column; + flex-direction: column; + -webkit-box-pack: justify; + -webkit-justify-content: space-between; + -ms-flex-pack: justify; + justify-content: space-between; + } + + #w-node-a2a9b648-f5dd-74e5-e1c2-f7aaf4fa1fcd-46672785 { + -ms-grid-column: span 1; + grid-column-start: span 1; + -ms-grid-column-span: 1; + grid-column-end: span 1; + -ms-grid-row: span 1; + grid-row-start: span 1; + -ms-grid-row-span: 1; + grid-row-end: span 1; + } + + #w-node-_466aa2bf-88bf-5a65-eab4-fc1eb95e7384-46672785 { + -ms-grid-column: span 1; + grid-column-start: span 1; + -ms-grid-column-span: 1; + grid-column-end: span 1; + -ms-grid-row: span 1; + grid-row-start: span 1; + -ms-grid-row-span: 1; + grid-row-end: span 1; + } + + #w-node-_87009ba3-d9a6-e0b7-4cce-581190a19cf3-46672785 { + -ms-grid-column: span 1; + grid-column-start: span 1; + -ms-grid-column-span: 1; + grid-column-end: span 1; + -ms-grid-row: span 1; + grid-row-start: span 1; + -ms-grid-row-span: 1; + grid-row-end: span 1; + } + + #w-node-_4a479fbb-90c7-9f47-d439-20aa6a224339-46672785 { + -ms-grid-column: span 1; + grid-column-start: span 1; + -ms-grid-column-span: 1; + grid-column-end: span 1; + -ms-grid-row: span 1; + grid-row-start: span 1; + -ms-grid-row-span: 1; + grid-row-end: span 1; + } + + + /* + + + inherited from the original template + + */ + + .w-container .w-row { + margin-left: -10px; + margin-right: -10px; + } + .w-row:before, + .w-row:after { + content: " "; + display: table; + grid-column-start: 1; + grid-row-start: 1; + grid-column-end: 2; + grid-row-end: 2; + } + .w-row:after { + clear: both; + } + .w-row .w-row { + margin-left: 0; + margin-right: 0; + } + .w-col { + position: relative; + float: left; + width: 100%; + min-height: 1px; + padding-left: 10px; + padding-right: 10px; + } + .w-col .w-col { + padding-left: 0; + padding-right: 0; + } + .w-col-1 { + width: 8.33333333%; + } + .w-col-2 { + width: 16.66666667%; + } + .w-col-3 { + width: 25%; + } + .w-col-4 { + width: 33.33333333%; + } + .w-col-5 { + width: 41.66666667%; + } + .w-col-6 { + width: 50%; + } + .w-col-7 { + width: 58.33333333%; + } + .w-col-8 { + width: 66.66666667%; + } + .w-col-9 { + width: 75%; + } + .w-col-10 { + width: 83.33333333%; + } + .w-col-11 { + width: 91.66666667%; + } + .w-col-12 { + width: 100%; + } + .w-hidden-main { + display: none !important; + } + @media screen and (max-width: 991px) { + .w-container { + max-width: 728px; + } + .w-hidden-main { + display: inherit !important; + } + .w-hidden-medium { + display: none !important; + } + .w-col-medium-1 { + width: 8.33333333%; + } + .w-col-medium-2 { + width: 16.66666667%; + } + .w-col-medium-3 { + width: 25%; + } + .w-col-medium-4 { + width: 33.33333333%; + } + .w-col-medium-5 { + width: 41.66666667%; + } + .w-col-medium-6 { + width: 50%; + } + .w-col-medium-7 { + width: 58.33333333%; + } + .w-col-medium-8 { + width: 66.66666667%; + } + .w-col-medium-9 { + width: 75%; + } + .w-col-medium-10 { + width: 83.33333333%; + } + .w-col-medium-11 { + width: 91.66666667%; + } + .w-col-medium-12 { + width: 100%; + } + .w-col-stack { + width: 100%; + left: auto; + right: auto; + } + } + @media screen and (max-width: 767px) { + .w-hidden-main { + display: inherit !important; + } + .w-hidden-medium { + display: inherit !important; + } + .w-hidden-small { + display: none !important; + } + .w-row, + .w-container .w-row { + margin-left: 0; + margin-right: 0; + } + .w-col { + width: 100%; + left: auto; + right: auto; + } + .w-col-small-1 { + width: 8.33333333%; + } + .w-col-small-2 { + width: 16.66666667%; + } + .w-col-small-3 { + width: 25%; + } + .w-col-small-4 { + width: 33.33333333%; + } + .w-col-small-5 { + width: 41.66666667%; + } + .w-col-small-6 { + width: 50%; + } + .w-col-small-7 { + width: 58.33333333%; + } + .w-col-small-8 { + width: 66.66666667%; + } + .w-col-small-9 { + width: 75%; + } + .w-col-small-10 { + width: 83.33333333%; + } + .w-col-small-11 { + width: 91.66666667%; + } + .w-col-small-12 { + width: 100%; + } + } + @media screen and (max-width: 479px) { + .w-container { + max-width: none; + } + .w-hidden-main { + display: inherit !important; + } + .w-hidden-medium { + display: inherit !important; + } + .w-hidden-small { + display: inherit !important; + } + .w-hidden-tiny { + display: none !important; + } + .w-col { + width: 100%; + } + .w-col-tiny-1 { + width: 8.33333333%; + } + .w-col-tiny-2 { + width: 16.66666667%; + } + .w-col-tiny-3 { + width: 25%; + } + .w-col-tiny-4 { + width: 33.33333333%; + } + .w-col-tiny-5 { + width: 41.66666667%; + } + .w-col-tiny-6 { + width: 50%; + } + .w-col-tiny-7 { + width: 58.33333333%; + } + .w-col-tiny-8 { + width: 66.66666667%; + } + .w-col-tiny-9 { + width: 75%; + } + .w-col-tiny-10 { + width: 83.33333333%; + } + .w-col-tiny-11 { + width: 91.66666667%; + } + .w-col-tiny-12 { + width: 100%; + } + } diff --git a/hsml/docs/css/version-select.css b/hsml/docs/css/version-select.css new file mode 100644 index 000000000..3b908ae84 --- /dev/null +++ b/hsml/docs/css/version-select.css @@ -0,0 +1,36 @@ +@media only screen and (max-width:76.1875em) { +} + +#version-selector select.form-control { + appearance: none; + -webkit-appearance: none; + -moz-appearance: none; + + background-color: #F5F5F5; + + background-position: center right; + background-repeat: no-repeat; + border: 0px; + border-radius: 2px; + /* box-shadow: 0px 1px 3px rgb(0 0 0 / 10%); */ + color: inherit; + width: -webkit-fill-available; + width: -moz-available; + max-width: 200px; + font-size: inherit; + /* font-weight: 600; */ + margin: 10px; + overflow: hidden; + padding: 7px 10px; + text-overflow: ellipsis; + white-space: nowrap; +} + +#version-selector::after { + content: '⌄'; + font-family: inherit; + font-size: 22px; + margin: -35px; + vertical-align: 7%; + padding-bottom: 10px; +} diff --git a/hsml/docs/index.md b/hsml/docs/index.md new file mode 100644 index 000000000..ee835ddc7 --- /dev/null +++ b/hsml/docs/index.md @@ -0,0 +1,141 @@ +# Hopsworks Model Management + +

+ Hopsworks Community + Hopsworks Model Management Documentation + python + PyPiStatus + Scala/Java Artifacts + Downloads + Ruff + License +

+ +HSML is the library to interact with the Hopsworks Model Registry and Model Serving. The library makes it easy to export, manage and deploy models. + +However, to connect from an external Python environment additional connection information, such as host and port, is required. + +## Getting Started On Hopsworks + +Get started easily by registering an account on [Hopsworks Serverless](https://app.hopsworks.ai/). Create your project and a [new Api key](https://docs.hopsworks.ai/latest/user_guides/projects/api_key/create_api_key/). In a new python environment with Python 3.8 or higher, install the [client library](https://docs.hopsworks.ai/latest/user_guides/client_installation/) using pip: + +```bash +# Get all Hopsworks SDKs: Feature Store, Model Serving and Platform SDK +pip install hopsworks +# or just the Model Registry and Model Serving SDK +pip install hsml +``` + +You can start a notebook and instantiate a connection and get the project feature store handler. + +```python +import hopsworks + +project = hopsworks.login() # you will be prompted for your api key + +mr = project.get_model_registry() +# or +ms = project.get_model_serving() +``` + +or using `hsml` directly: + +```python +import hsml + +connection = hsml.connection( + host="c.app.hopsworks.ai", # + project="your-project", + api_key_value="your-api-key", +) + +mr = connection.get_model_registry() +# or +ms = connection.get_model_serving() +``` + +Create a new model +```python +model = mr.tensorflow.create_model(name="mnist", + version=1, + metrics={"accuracy": 0.94}, + description="mnist model description") +model.save("/tmp/model_directory") # or /tmp/model_file +``` + +Download a model +```python +model = mr.get_model("mnist", version=1) + +model_path = model.download() +``` + +Delete a model +```python +model.delete() +``` + +Get best performing model +```python +best_model = mr.get_best_model('mnist', 'accuracy', 'max') + +``` + +Deploy a model +```python +deployment = model.deploy() +``` + +Start a deployment +```python +deployment.start() +``` + +Make predictions with a deployed model +```python +data = { "instances": [ model.input_example ] } + +predictions = deployment.predict(data) +``` + +# Tutorials + +You can find more examples on how to use the library in our [tutorials](https://github.com/logicalclocks/hopsworks-tutorials). + +## Documentation + +Documentation is available at [Hopsworks Model Management Documentation](https://docs.hopsworks.ai/). + +## Issues + +For general questions about the usage of Hopsworks Machine Learning please open a topic on [Hopsworks Community](https://community.hopsworks.ai/). +Please report any issue using [Github issue tracking](https://github.com/logicalclocks/machine-learning-api/issues). + + +## Contributing + +If you would like to contribute to this library, please see the [Contribution Guidelines](CONTRIBUTING.md). diff --git a/hsml/docs/js/dropdown.js b/hsml/docs/js/dropdown.js new file mode 100644 index 000000000..b897ba36a --- /dev/null +++ b/hsml/docs/js/dropdown.js @@ -0,0 +1,2 @@ +document.getElementsByClassName("md-tabs__link")[7].style.display = "none"; +document.getElementsByClassName("md-tabs__link")[9].style.display = "none"; \ No newline at end of file diff --git a/hsml/docs/js/inject-api-links.js b/hsml/docs/js/inject-api-links.js new file mode 100644 index 000000000..6c8a4a3b3 --- /dev/null +++ b/hsml/docs/js/inject-api-links.js @@ -0,0 +1,31 @@ +window.addEventListener("DOMContentLoaded", function () { + var windowPathNameSplits = window.location.pathname.split("/"); + var majorVersionRegex = new RegExp("(\\d+[.]\\d+)") + var latestRegex = new RegExp("latest"); + if (majorVersionRegex.test(windowPathNameSplits[1])) { // On landing page docs.hopsworks.api/3.0 - URL contains major version + // Version API dropdown + document.getElementById("hopsworks_api_link").href = "https://docs.hopsworks.ai/hopsworks-api/" + windowPathNameSplits[1] + "/generated/api/login/"; + document.getElementById("hsfs_api_link").href = "https://docs.hopsworks.ai/feature-store-api/" + windowPathNameSplits[1] + "/generated/api/connection_api/"; + document.getElementById("hsml_api_link").href = "https://docs.hopsworks.ai/machine-learning-api/" + windowPathNameSplits[1] + "/generated/connection_api/"; + } else { // on docs.hopsworks.api/feature-store-api/3.0 / docs.hopsworks.api/hopsworks-api/3.0 / docs.hopsworks.api/machine-learning-api/3.0 + if (latestRegex.test(windowPathNameSplits[2]) || latestRegex.test(windowPathNameSplits[1])) { + var majorVersion = "latest"; + } else { + var apiVersion = windowPathNameSplits[2]; + var majorVersion = apiVersion.match(majorVersionRegex)[0]; + } + // Version main navigation + document.getElementsByClassName("md-tabs__link")[0].href = "https://docs.hopsworks.ai/" + majorVersion; + document.getElementsByClassName("md-tabs__link")[1].href = "https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/quickstart.ipynb"; + document.getElementsByClassName("md-tabs__link")[2].href = "https://docs.hopsworks.ai/" + majorVersion + "/tutorials/"; + document.getElementsByClassName("md-tabs__link")[3].href = "https://docs.hopsworks.ai/" + majorVersion + "/concepts/hopsworks/"; + document.getElementsByClassName("md-tabs__link")[4].href = "https://docs.hopsworks.ai/" + majorVersion + "/user_guides/"; + document.getElementsByClassName("md-tabs__link")[5].href = "https://docs.hopsworks.ai/" + majorVersion + "/setup_installation/aws/getting_started/"; + document.getElementsByClassName("md-tabs__link")[6].href = "https://docs.hopsworks.ai/" + majorVersion + "/admin/"; + // Version API dropdown + document.getElementById("hopsworks_api_link").href = "https://docs.hopsworks.ai/hopsworks-api/" + majorVersion + "/generated/api/login/"; + document.getElementById("hsfs_api_link").href = "https://docs.hopsworks.ai/feature-store-api/" + majorVersion + "/generated/api/connection_api/"; + document.getElementById("hsfs_javadoc_link").href = "https://docs.hopsworks.ai/feature-store-api/" + majorVersion + "/javadoc"; + document.getElementById("hsml_api_link").href = "https://docs.hopsworks.ai/machine-learning-api/" + majorVersion + "/generated/connection_api/"; + } +}); diff --git a/hsml/docs/js/version-select.js b/hsml/docs/js/version-select.js new file mode 100644 index 000000000..9c8331660 --- /dev/null +++ b/hsml/docs/js/version-select.js @@ -0,0 +1,64 @@ +window.addEventListener("DOMContentLoaded", function() { + // This is a bit hacky. Figure out the base URL from a known CSS file the + // template refers to... + var ex = new RegExp("/?css/version-select.css$"); + var sheet = document.querySelector('link[href$="version-select.css"]'); + + var ABS_BASE_URL = sheet.href.replace(ex, ""); + var CURRENT_VERSION = ABS_BASE_URL.split("/").pop(); + + function makeSelect(options, selected) { + var select = document.createElement("select"); + select.classList.add("form-control"); + + options.forEach(function(i) { + var option = new Option(i.text, i.value, undefined, + i.value === selected); + select.add(option); + }); + + return select; + } + + var xhr = new XMLHttpRequest(); + xhr.open("GET", ABS_BASE_URL + "/../versions.json"); + xhr.onload = function() { + var versions = JSON.parse(this.responseText); + + var realVersion = versions.find(function(i) { + return i.version === CURRENT_VERSION || + i.aliases.includes(CURRENT_VERSION); + }).version; + var latestVersion = versions.find(function(i) { + return i.aliases.includes("latest"); + }).version; + let outdated_banner = document.querySelector('div[data-md-color-scheme="default"][data-md-component="outdated"]'); + if (realVersion !== latestVersion) { + outdated_banner.removeAttribute("hidden"); + } else { + outdated_banner.setAttribute("hidden", ""); + } + + var select = makeSelect(versions.map(function(i) { + var allowedAliases = ["dev", "latest"] + if (i.aliases.length > 0) { + var aliasString = " [" + i.aliases.filter(function (str) { return allowedAliases.includes(str); }).join(", ") + "]"; + } else { + var aliasString = ""; + } + return {text: i.title + aliasString, value: i.version}; + }), realVersion); + select.addEventListener("change", function(event) { + window.location.href = ABS_BASE_URL + "/../" + this.value + "/generated/connection_api/"; + }); + + var container = document.createElement("div"); + container.id = "version-selector"; + // container.className = "md-nav__item"; + container.appendChild(select); + + var sidebar = document.querySelector(".md-nav--primary > .md-nav__list"); + sidebar.parentNode.insertBefore(container, sidebar.nextSibling); + }; + xhr.send(); +}); diff --git a/hsml/docs/overrides/main.html b/hsml/docs/overrides/main.html new file mode 100644 index 000000000..a1bc45bb5 --- /dev/null +++ b/hsml/docs/overrides/main.html @@ -0,0 +1,8 @@ +{% extends "base.html" %} + +{% block outdated %} +You're not viewing the latest version of the documentation. + + Click here to go to latest. + +{% endblock %} \ No newline at end of file diff --git a/docs/templates/connection_api.md b/hsml/docs/templates/connection_api.md similarity index 100% rename from docs/templates/connection_api.md rename to hsml/docs/templates/connection_api.md diff --git a/docs/templates/model-registry/links.md b/hsml/docs/templates/model-registry/links.md similarity index 100% rename from docs/templates/model-registry/links.md rename to hsml/docs/templates/model-registry/links.md diff --git a/docs/templates/model-registry/model_api.md b/hsml/docs/templates/model-registry/model_api.md similarity index 100% rename from docs/templates/model-registry/model_api.md rename to hsml/docs/templates/model-registry/model_api.md diff --git a/docs/templates/model-registry/model_registry_api.md b/hsml/docs/templates/model-registry/model_registry_api.md similarity index 100% rename from docs/templates/model-registry/model_registry_api.md rename to hsml/docs/templates/model-registry/model_registry_api.md diff --git a/docs/templates/model-registry/model_schema_api.md b/hsml/docs/templates/model-registry/model_schema_api.md similarity index 100% rename from docs/templates/model-registry/model_schema_api.md rename to hsml/docs/templates/model-registry/model_schema_api.md diff --git a/docs/templates/model-serving/deployment_api.md b/hsml/docs/templates/model-serving/deployment_api.md similarity index 100% rename from docs/templates/model-serving/deployment_api.md rename to hsml/docs/templates/model-serving/deployment_api.md diff --git a/docs/templates/model-serving/inference_batcher_api.md b/hsml/docs/templates/model-serving/inference_batcher_api.md similarity index 100% rename from docs/templates/model-serving/inference_batcher_api.md rename to hsml/docs/templates/model-serving/inference_batcher_api.md diff --git a/docs/templates/model-serving/inference_logger_api.md b/hsml/docs/templates/model-serving/inference_logger_api.md similarity index 100% rename from docs/templates/model-serving/inference_logger_api.md rename to hsml/docs/templates/model-serving/inference_logger_api.md diff --git a/docs/templates/model-serving/model_serving_api.md b/hsml/docs/templates/model-serving/model_serving_api.md similarity index 100% rename from docs/templates/model-serving/model_serving_api.md rename to hsml/docs/templates/model-serving/model_serving_api.md diff --git a/docs/templates/model-serving/predictor_api.md b/hsml/docs/templates/model-serving/predictor_api.md similarity index 100% rename from docs/templates/model-serving/predictor_api.md rename to hsml/docs/templates/model-serving/predictor_api.md diff --git a/docs/templates/model-serving/predictor_state_api.md b/hsml/docs/templates/model-serving/predictor_state_api.md similarity index 100% rename from docs/templates/model-serving/predictor_state_api.md rename to hsml/docs/templates/model-serving/predictor_state_api.md diff --git a/docs/templates/model-serving/predictor_state_condition_api.md b/hsml/docs/templates/model-serving/predictor_state_condition_api.md similarity index 100% rename from docs/templates/model-serving/predictor_state_condition_api.md rename to hsml/docs/templates/model-serving/predictor_state_condition_api.md diff --git a/docs/templates/model-serving/resources_api.md b/hsml/docs/templates/model-serving/resources_api.md similarity index 100% rename from docs/templates/model-serving/resources_api.md rename to hsml/docs/templates/model-serving/resources_api.md diff --git a/docs/templates/model-serving/transformer_api.md b/hsml/docs/templates/model-serving/transformer_api.md similarity index 100% rename from docs/templates/model-serving/transformer_api.md rename to hsml/docs/templates/model-serving/transformer_api.md diff --git a/hsml/java/pom.xml b/hsml/java/pom.xml new file mode 100644 index 000000000..cb3e60028 --- /dev/null +++ b/hsml/java/pom.xml @@ -0,0 +1,109 @@ + + + 4.0.0 + + com.logicalclocks + hsml + 4.0.0-SNAPSHOT + + + 1.8 + 1.8 + + + + + + org.scala-tools + maven-scala-plugin + + + scala-compile-first + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 2.4.1 + + + + jar-with-dependencies + + + + + make-assembly + + package + + single + + + + + + org.apache.maven.plugins + maven-checkstyle-plugin + 3.1.1 + + + validate + validate + + check + + + + + src/main/resources/checkstyle.xml + src/main/resources/suppressions.xml + true + true + true + true + + src/main/java + + + + + + + + + Hops + Hops Repo + https://archiva.hops.works/repository/Hops/ + + true + + + true + + + + + + + Hops + Hops Repo + https://archiva.hops.works/repository/Hops/ + + + diff --git a/hsml/java/src/main/resources/checkstyle.xml b/hsml/java/src/main/resources/checkstyle.xml new file mode 100644 index 000000000..5f99eb681 --- /dev/null +++ b/hsml/java/src/main/resources/checkstyle.xml @@ -0,0 +1,312 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/hsml/java/src/main/resources/suppressions.xml b/hsml/java/src/main/resources/suppressions.xml new file mode 100644 index 000000000..a86fa8219 --- /dev/null +++ b/hsml/java/src/main/resources/suppressions.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/hsml/mkdocs.yml b/hsml/mkdocs.yml new file mode 100644 index 000000000..f20a7b1c5 --- /dev/null +++ b/hsml/mkdocs.yml @@ -0,0 +1,120 @@ +site_name: "Hopsworks Documentation" +site_description: "Official documentation for Hopsworks and its Feature Store - an open source data-intensive AI platform used for the development and operation of machine learning models at scale." +site_author: "Logical Clocks" +site_url: "https://docs.hopsworks.ai/machine-learning-api/latest" + +# Repository +repo_name: logicalclocks/hopsworks +repo_url: https://github.com/logicalclocks/hopsworks +edit_uri: "" + +nav: + - Home: https://docs.hopsworks.ai/ + - Getting Started ↗: https://docs.hopsworks.ai/ + - Tutorials: https://docs.hopsworks.ai/ + - Concepts: https://docs.hopsworks.ai/ + - Guides: https://docs.hopsworks.ai/ + - Setup and Installation: https://docs.hopsworks.ai/ + - Administration: https://docs.hopsworks.ai/ + - API: + - API Reference: + - Connection: generated/connection_api.md + - Model Registry: + - Model Registry: generated/model-registry/model_registry_api.md + - Model: generated/model-registry/model_api.md + - Model Schema: generated/model-registry/model_schema_api.md + - Model Serving: + - Model Serving: generated/model-serving/model_serving_api.md + - Deployment: generated/model-serving/deployment_api.md + - Deployment state: generated/model-serving/predictor_state_api.md + - Deployment state condition: generated/model-serving/predictor_state_condition_api.md + - Predictor: generated/model-serving/predictor_api.md + - Transformer: generated/model-serving/transformer_api.md + - Inference Logger: generated/model-serving/inference_logger_api.md + - Inference Batcher: generated/model-serving/inference_batcher_api.md + - Resources: generated/model-serving/resources_api.md + # Added to allow navigation using the side drawer + - Hopsworks API: https://docs.hopsworks.ai/ + - Feature Store API: https://docs.hopsworks.ai/ + - Feature Store JavaDoc: https://docs.hopsworks.ai/ + - Contributing: CONTRIBUTING.md + - Community ↗: https://community.hopsworks.ai/ + +theme: + name: material + custom_dir: docs/overrides + favicon: assets/images/favicon.ico + logo: assets/images/hops-logo.png + icon: + repo: fontawesome/brands/github + font: + text: "Roboto" + code: "IBM Plex Mono" + palette: + accent: teal + scheme: hopsworks + features: + - navigation.tabs + - navigation.tabs.sticky + - navigation.expand + + +extra: + analytics: + provider: google + property: G-64FEEXPSDN + generator: false + version: + - provider: mike + - version: latest + social: + - icon: fontawesome/brands/twitter + link: https://twitter.com/hopsworks + - icon: fontawesome/brands/github + link: https://github.com/logicalclocks/hopsworks + - icon: fontawesome/brands/discourse + link: https://community.hopsworks.ai/ + - icon: fontawesome/brands/linkedin + link: https://www.linkedin.com/company/hopsworks/ + +extra_css: + - css/custom.css + - css/version-select.css + - css/dropdown.css + - css/marctech.css + +extra_javascript: + - js/version-select.js + - js/inject-api-links.js + - js/dropdown.js + +plugins: + - search + - minify: + minify_html: true + minify_css: true + minify_js: true + - mike: + canonical_version: latest + +markdown_extensions: + - admonition + - codehilite + - footnotes + - pymdownx.tabbed: + alternate_style: true + - pymdownx.arithmatex + - pymdownx.superfences + - pymdownx.details + - pymdownx.caret + - pymdownx.mark + - pymdownx.tilde + - pymdownx.critic + - attr_list + - md_in_html + - toc: + permalink: "#" + - pymdownx.tasklist: + custom_checkbox: true + - markdown_include.include: + base_path: docs diff --git a/hsml/python/.pre-commit-config.yaml b/hsml/python/.pre-commit-config.yaml new file mode 100644 index 000000000..645dcf677 --- /dev/null +++ b/hsml/python/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +exclude: setup.py +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.2 + hooks: + # Run the linter + - id: ruff + args: [--fix] + # Run the formatter + - id: ruff-format diff --git a/python/hsml/__init__.py b/hsml/python/hsml/__init__.py similarity index 100% rename from python/hsml/__init__.py rename to hsml/python/hsml/__init__.py diff --git a/python/hsml/client/__init__.py b/hsml/python/hsml/client/__init__.py similarity index 97% rename from python/hsml/client/__init__.py rename to hsml/python/hsml/client/__init__.py index b3475258c..3982f0c56 100644 --- a/python/hsml/client/__init__.py +++ b/hsml/python/hsml/client/__init__.py @@ -20,7 +20,7 @@ from hsml.client.istio import base as ist_base from hsml.client.istio import external as ist_external from hsml.client.istio import internal as ist_internal -from hsml.constants import CONNECTION +from hsml.connection import CONNECTION_SAAS_HOSTNAME _client_type = None @@ -49,7 +49,7 @@ def init( _client_type = client_type global _saas_connection - _saas_connection = host == CONNECTION.SAAS_HOSTNAME + _saas_connection = host == CONNECTION_SAAS_HOSTNAME global _hopsworks_client if not _hopsworks_client: diff --git a/python/hsml/client/auth.py b/hsml/python/hsml/client/auth.py similarity index 100% rename from python/hsml/client/auth.py rename to hsml/python/hsml/client/auth.py diff --git a/python/hsml/client/base.py b/hsml/python/hsml/client/base.py similarity index 100% rename from python/hsml/client/base.py rename to hsml/python/hsml/client/base.py diff --git a/python/hsml/client/exceptions.py b/hsml/python/hsml/client/exceptions.py similarity index 100% rename from python/hsml/client/exceptions.py rename to hsml/python/hsml/client/exceptions.py diff --git a/python/hsml/client/hopsworks/__init__.py b/hsml/python/hsml/client/hopsworks/__init__.py similarity index 100% rename from python/hsml/client/hopsworks/__init__.py rename to hsml/python/hsml/client/hopsworks/__init__.py diff --git a/python/hsml/client/hopsworks/base.py b/hsml/python/hsml/client/hopsworks/base.py similarity index 100% rename from python/hsml/client/hopsworks/base.py rename to hsml/python/hsml/client/hopsworks/base.py diff --git a/python/hsml/client/hopsworks/external.py b/hsml/python/hsml/client/hopsworks/external.py similarity index 100% rename from python/hsml/client/hopsworks/external.py rename to hsml/python/hsml/client/hopsworks/external.py diff --git a/python/hsml/client/hopsworks/internal.py b/hsml/python/hsml/client/hopsworks/internal.py similarity index 100% rename from python/hsml/client/hopsworks/internal.py rename to hsml/python/hsml/client/hopsworks/internal.py diff --git a/python/hsml/client/istio/__init__.py b/hsml/python/hsml/client/istio/__init__.py similarity index 100% rename from python/hsml/client/istio/__init__.py rename to hsml/python/hsml/client/istio/__init__.py diff --git a/python/hsml/client/istio/base.py b/hsml/python/hsml/client/istio/base.py similarity index 100% rename from python/hsml/client/istio/base.py rename to hsml/python/hsml/client/istio/base.py diff --git a/python/hsml/client/istio/external.py b/hsml/python/hsml/client/istio/external.py similarity index 100% rename from python/hsml/client/istio/external.py rename to hsml/python/hsml/client/istio/external.py diff --git a/python/hopsworks_common/__init__.py b/hsml/python/hsml/client/istio/grpc/__init__.py similarity index 100% rename from python/hopsworks_common/__init__.py rename to hsml/python/hsml/client/istio/grpc/__init__.py diff --git a/python/hsml/client/istio/grpc/errors.py b/hsml/python/hsml/client/istio/grpc/errors.py similarity index 100% rename from python/hsml/client/istio/grpc/errors.py rename to hsml/python/hsml/client/istio/grpc/errors.py diff --git a/python/hsml/client/istio/grpc/exceptions.py b/hsml/python/hsml/client/istio/grpc/exceptions.py similarity index 100% rename from python/hsml/client/istio/grpc/exceptions.py rename to hsml/python/hsml/client/istio/grpc/exceptions.py diff --git a/python/hsml/client/istio/grpc/inference_client.py b/hsml/python/hsml/client/istio/grpc/inference_client.py similarity index 100% rename from python/hsml/client/istio/grpc/inference_client.py rename to hsml/python/hsml/client/istio/grpc/inference_client.py diff --git a/python/hsml/client/istio/grpc/__init__.py b/hsml/python/hsml/client/istio/grpc/proto/__init__.py similarity index 100% rename from python/hsml/client/istio/grpc/__init__.py rename to hsml/python/hsml/client/istio/grpc/proto/__init__.py diff --git a/python/hsml/client/istio/grpc/proto/grpc_predict_v2.proto b/hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2.proto similarity index 100% rename from python/hsml/client/istio/grpc/proto/grpc_predict_v2.proto rename to hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2.proto diff --git a/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.py b/hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.py similarity index 100% rename from python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.py rename to hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.py diff --git a/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.pyi b/hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.pyi similarity index 100% rename from python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.pyi rename to hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.pyi diff --git a/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2_grpc.py b/hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2_grpc.py similarity index 100% rename from python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2_grpc.py rename to hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2_grpc.py diff --git a/python/hsml/client/istio/internal.py b/hsml/python/hsml/client/istio/internal.py similarity index 100% rename from python/hsml/client/istio/internal.py rename to hsml/python/hsml/client/istio/internal.py diff --git a/python/hsml/client/istio/grpc/proto/__init__.py b/hsml/python/hsml/client/istio/utils/__init__.py similarity index 100% rename from python/hsml/client/istio/grpc/proto/__init__.py rename to hsml/python/hsml/client/istio/utils/__init__.py diff --git a/python/hsml/client/istio/utils/infer_type.py b/hsml/python/hsml/client/istio/utils/infer_type.py similarity index 100% rename from python/hsml/client/istio/utils/infer_type.py rename to hsml/python/hsml/client/istio/utils/infer_type.py diff --git a/python/hsml/client/istio/utils/numpy_codec.py b/hsml/python/hsml/client/istio/utils/numpy_codec.py similarity index 100% rename from python/hsml/client/istio/utils/numpy_codec.py rename to hsml/python/hsml/client/istio/utils/numpy_codec.py diff --git a/python/hsml/connection.py b/hsml/python/hsml/connection.py similarity index 98% rename from python/hsml/connection.py rename to hsml/python/hsml/connection.py index 899589a4e..d9d61b9e8 100644 --- a/python/hsml/connection.py +++ b/hsml/python/hsml/connection.py @@ -16,10 +16,14 @@ import os +from hsml import client +from hsml.core import model_api, model_registry_api, model_serving_api from hsml.decorators import connected, not_connected from requests.exceptions import ConnectionError +CONNECTION_SAAS_HOSTNAME = "c.app.hopsworks.ai" + HOPSWORKS_PORT_DEFAULT = 443 HOSTNAME_VERIFICATION_DEFAULT = True @@ -96,7 +100,6 @@ def __init__( api_key_file: str = None, api_key_value: str = None, ): - from hsml.core import model_api, model_registry_api, model_serving_api self._host = host self._port = port self._project = project @@ -161,8 +164,6 @@ def connect(self): conn.connect() ``` """ - from hsml import client - from hsml.core import model_api self._connected = True try: # init client @@ -195,7 +196,6 @@ def close(self): Usage is recommended but optional. """ - from hsml import client client.stop() self._model_api = None self._connected = False diff --git a/python/hsml/constants.py b/hsml/python/hsml/constants.py similarity index 97% rename from python/hsml/constants.py rename to hsml/python/hsml/constants.py index d7af16967..6ec99ff3c 100644 --- a/python/hsml/constants.py +++ b/hsml/python/hsml/constants.py @@ -18,10 +18,6 @@ DEFAULT = dict() # used as default parameter for a class object -class CONNECTION: - SAAS_HOSTNAME = "c.app.hopsworks.ai" - - class MODEL: FRAMEWORK_TENSORFLOW = "TENSORFLOW" FRAMEWORK_TORCH = "TORCH" diff --git a/python/hsml/core/__init__.py b/hsml/python/hsml/core/__init__.py similarity index 100% rename from python/hsml/core/__init__.py rename to hsml/python/hsml/core/__init__.py diff --git a/python/hsml/core/dataset_api.py b/hsml/python/hsml/core/dataset_api.py similarity index 100% rename from python/hsml/core/dataset_api.py rename to hsml/python/hsml/core/dataset_api.py diff --git a/python/hsml/core/explicit_provenance.py b/hsml/python/hsml/core/explicit_provenance.py similarity index 100% rename from python/hsml/core/explicit_provenance.py rename to hsml/python/hsml/core/explicit_provenance.py diff --git a/python/hsml/core/model_api.py b/hsml/python/hsml/core/model_api.py similarity index 100% rename from python/hsml/core/model_api.py rename to hsml/python/hsml/core/model_api.py diff --git a/python/hsml/core/model_registry_api.py b/hsml/python/hsml/core/model_registry_api.py similarity index 100% rename from python/hsml/core/model_registry_api.py rename to hsml/python/hsml/core/model_registry_api.py diff --git a/python/hsml/core/model_serving_api.py b/hsml/python/hsml/core/model_serving_api.py similarity index 100% rename from python/hsml/core/model_serving_api.py rename to hsml/python/hsml/core/model_serving_api.py diff --git a/python/hsml/core/native_hdfs_api.py b/hsml/python/hsml/core/native_hdfs_api.py similarity index 100% rename from python/hsml/core/native_hdfs_api.py rename to hsml/python/hsml/core/native_hdfs_api.py diff --git a/python/hsml/core/serving_api.py b/hsml/python/hsml/core/serving_api.py similarity index 100% rename from python/hsml/core/serving_api.py rename to hsml/python/hsml/core/serving_api.py diff --git a/python/hsml/decorators.py b/hsml/python/hsml/decorators.py similarity index 100% rename from python/hsml/decorators.py rename to hsml/python/hsml/decorators.py diff --git a/python/hsml/deployable_component.py b/hsml/python/hsml/deployable_component.py similarity index 100% rename from python/hsml/deployable_component.py rename to hsml/python/hsml/deployable_component.py diff --git a/python/hsml/deployable_component_logs.py b/hsml/python/hsml/deployable_component_logs.py similarity index 100% rename from python/hsml/deployable_component_logs.py rename to hsml/python/hsml/deployable_component_logs.py diff --git a/python/hsml/deployment.py b/hsml/python/hsml/deployment.py similarity index 100% rename from python/hsml/deployment.py rename to hsml/python/hsml/deployment.py diff --git a/python/hsml/engine/__init__.py b/hsml/python/hsml/engine/__init__.py similarity index 100% rename from python/hsml/engine/__init__.py rename to hsml/python/hsml/engine/__init__.py diff --git a/python/hsml/engine/hopsworks_engine.py b/hsml/python/hsml/engine/hopsworks_engine.py similarity index 100% rename from python/hsml/engine/hopsworks_engine.py rename to hsml/python/hsml/engine/hopsworks_engine.py diff --git a/python/hsml/engine/local_engine.py b/hsml/python/hsml/engine/local_engine.py similarity index 100% rename from python/hsml/engine/local_engine.py rename to hsml/python/hsml/engine/local_engine.py diff --git a/python/hsml/engine/model_engine.py b/hsml/python/hsml/engine/model_engine.py similarity index 100% rename from python/hsml/engine/model_engine.py rename to hsml/python/hsml/engine/model_engine.py diff --git a/python/hsml/engine/serving_engine.py b/hsml/python/hsml/engine/serving_engine.py similarity index 100% rename from python/hsml/engine/serving_engine.py rename to hsml/python/hsml/engine/serving_engine.py diff --git a/python/hsml/inference_batcher.py b/hsml/python/hsml/inference_batcher.py similarity index 100% rename from python/hsml/inference_batcher.py rename to hsml/python/hsml/inference_batcher.py diff --git a/python/hsml/inference_endpoint.py b/hsml/python/hsml/inference_endpoint.py similarity index 100% rename from python/hsml/inference_endpoint.py rename to hsml/python/hsml/inference_endpoint.py diff --git a/python/hsml/inference_logger.py b/hsml/python/hsml/inference_logger.py similarity index 100% rename from python/hsml/inference_logger.py rename to hsml/python/hsml/inference_logger.py diff --git a/python/hsml/kafka_topic.py b/hsml/python/hsml/kafka_topic.py similarity index 100% rename from python/hsml/kafka_topic.py rename to hsml/python/hsml/kafka_topic.py diff --git a/python/hsml/model.py b/hsml/python/hsml/model.py similarity index 100% rename from python/hsml/model.py rename to hsml/python/hsml/model.py diff --git a/python/hsml/model_registry.py b/hsml/python/hsml/model_registry.py similarity index 100% rename from python/hsml/model_registry.py rename to hsml/python/hsml/model_registry.py diff --git a/python/hsml/model_schema.py b/hsml/python/hsml/model_schema.py similarity index 100% rename from python/hsml/model_schema.py rename to hsml/python/hsml/model_schema.py diff --git a/python/hsml/model_serving.py b/hsml/python/hsml/model_serving.py similarity index 100% rename from python/hsml/model_serving.py rename to hsml/python/hsml/model_serving.py diff --git a/python/hsml/predictor.py b/hsml/python/hsml/predictor.py similarity index 100% rename from python/hsml/predictor.py rename to hsml/python/hsml/predictor.py diff --git a/python/hsml/predictor_state.py b/hsml/python/hsml/predictor_state.py similarity index 100% rename from python/hsml/predictor_state.py rename to hsml/python/hsml/predictor_state.py diff --git a/python/hsml/predictor_state_condition.py b/hsml/python/hsml/predictor_state_condition.py similarity index 100% rename from python/hsml/predictor_state_condition.py rename to hsml/python/hsml/predictor_state_condition.py diff --git a/python/hsml/python/__init__.py b/hsml/python/hsml/python/__init__.py similarity index 100% rename from python/hsml/python/__init__.py rename to hsml/python/hsml/python/__init__.py diff --git a/python/hsml/python/model.py b/hsml/python/hsml/python/model.py similarity index 100% rename from python/hsml/python/model.py rename to hsml/python/hsml/python/model.py diff --git a/python/hsml/python/predictor.py b/hsml/python/hsml/python/predictor.py similarity index 100% rename from python/hsml/python/predictor.py rename to hsml/python/hsml/python/predictor.py diff --git a/python/hsml/python/signature.py b/hsml/python/hsml/python/signature.py similarity index 100% rename from python/hsml/python/signature.py rename to hsml/python/hsml/python/signature.py diff --git a/python/hsml/resources.py b/hsml/python/hsml/resources.py similarity index 100% rename from python/hsml/resources.py rename to hsml/python/hsml/resources.py diff --git a/python/hsml/schema.py b/hsml/python/hsml/schema.py similarity index 100% rename from python/hsml/schema.py rename to hsml/python/hsml/schema.py diff --git a/python/hsml/sklearn/__init__.py b/hsml/python/hsml/sklearn/__init__.py similarity index 100% rename from python/hsml/sklearn/__init__.py rename to hsml/python/hsml/sklearn/__init__.py diff --git a/python/hsml/sklearn/model.py b/hsml/python/hsml/sklearn/model.py similarity index 100% rename from python/hsml/sklearn/model.py rename to hsml/python/hsml/sklearn/model.py diff --git a/python/hsml/sklearn/predictor.py b/hsml/python/hsml/sklearn/predictor.py similarity index 100% rename from python/hsml/sklearn/predictor.py rename to hsml/python/hsml/sklearn/predictor.py diff --git a/python/hsml/sklearn/signature.py b/hsml/python/hsml/sklearn/signature.py similarity index 100% rename from python/hsml/sklearn/signature.py rename to hsml/python/hsml/sklearn/signature.py diff --git a/python/hsml/tag.py b/hsml/python/hsml/tag.py similarity index 100% rename from python/hsml/tag.py rename to hsml/python/hsml/tag.py diff --git a/python/hsml/tensorflow/__init__.py b/hsml/python/hsml/tensorflow/__init__.py similarity index 100% rename from python/hsml/tensorflow/__init__.py rename to hsml/python/hsml/tensorflow/__init__.py diff --git a/python/hsml/tensorflow/model.py b/hsml/python/hsml/tensorflow/model.py similarity index 100% rename from python/hsml/tensorflow/model.py rename to hsml/python/hsml/tensorflow/model.py diff --git a/python/hsml/tensorflow/predictor.py b/hsml/python/hsml/tensorflow/predictor.py similarity index 100% rename from python/hsml/tensorflow/predictor.py rename to hsml/python/hsml/tensorflow/predictor.py diff --git a/python/hsml/tensorflow/signature.py b/hsml/python/hsml/tensorflow/signature.py similarity index 100% rename from python/hsml/tensorflow/signature.py rename to hsml/python/hsml/tensorflow/signature.py diff --git a/python/hsml/torch/__init__.py b/hsml/python/hsml/torch/__init__.py similarity index 100% rename from python/hsml/torch/__init__.py rename to hsml/python/hsml/torch/__init__.py diff --git a/python/hsml/torch/model.py b/hsml/python/hsml/torch/model.py similarity index 100% rename from python/hsml/torch/model.py rename to hsml/python/hsml/torch/model.py diff --git a/python/hsml/torch/predictor.py b/hsml/python/hsml/torch/predictor.py similarity index 100% rename from python/hsml/torch/predictor.py rename to hsml/python/hsml/torch/predictor.py diff --git a/python/hsml/torch/signature.py b/hsml/python/hsml/torch/signature.py similarity index 100% rename from python/hsml/torch/signature.py rename to hsml/python/hsml/torch/signature.py diff --git a/python/hsml/transformer.py b/hsml/python/hsml/transformer.py similarity index 100% rename from python/hsml/transformer.py rename to hsml/python/hsml/transformer.py diff --git a/python/hsml/util.py b/hsml/python/hsml/util.py similarity index 90% rename from python/hsml/util.py rename to hsml/python/hsml/util.py index 6ef6d9053..96380b6f4 100644 --- a/python/hsml/util.py +++ b/hsml/python/hsml/util.py @@ -28,6 +28,16 @@ import pandas as pd from hsml import client from hsml.constants import DEFAULT, MODEL, PREDICTOR +from hsml.model import Model as BaseModel +from hsml.predictor import Predictor as BasePredictor +from hsml.python.model import Model as PyModel +from hsml.python.predictor import Predictor as PyPredictor +from hsml.sklearn.model import Model as SkLearnModel +from hsml.sklearn.predictor import Predictor as SkLearnPredictor +from hsml.tensorflow.model import Model as TFModel +from hsml.tensorflow.predictor import Predictor as TFPredictor +from hsml.torch.model import Model as TorchModel +from hsml.torch.predictor import Predictor as TorchPredictor from six import string_types @@ -95,11 +105,6 @@ def default(self, obj): # pylint: disable=E0202 def set_model_class(model): - from hsml.model import Model as BaseModel - from hsml.python.model import Model as PyModel - from hsml.sklearn.model import Model as SkLearnModel - from hsml.tensorflow.model import Model as TFModel - from hsml.torch.model import Model as TorchModel if "href" in model: _ = model.pop("href") if "type" in model: # backwards compatibility @@ -231,16 +236,6 @@ def validate_metrics(metrics): def get_predictor_for_model(model, **kwargs): - from hsml.model import Model as BaseModel - from hsml.predictor import Predictor as BasePredictor - from hsml.python.model import Model as PyModel - from hsml.python.predictor import Predictor as PyPredictor - from hsml.sklearn.model import Model as SkLearnModel - from hsml.sklearn.predictor import Predictor as SkLearnPredictor - from hsml.tensorflow.model import Model as TFModel - from hsml.tensorflow.predictor import Predictor as TFPredictor - from hsml.torch.model import Model as TorchModel - from hsml.torch.predictor import Predictor as TorchPredictor if not isinstance(model, BaseModel): raise ValueError( "model is of type {}, but an instance of {} class is expected".format( @@ -248,15 +243,15 @@ def get_predictor_for_model(model, **kwargs): ) ) - if type(model) is TFModel: + if type(model) == TFModel: return TFPredictor(**kwargs) - if type(model) is TorchModel: + if type(model) == TorchModel: return TorchPredictor(**kwargs) - if type(model) is SkLearnModel: + if type(model) == SkLearnModel: return SkLearnPredictor(**kwargs) - if type(model) is PyModel: + if type(model) == PyModel: return PyPredictor(**kwargs) - if type(model) is BaseModel: + if type(model) == BaseModel: return BasePredictor( # python as default framework and model server model_framework=MODEL.FRAMEWORK_PYTHON, model_server=PREDICTOR.MODEL_SERVER_PYTHON, diff --git a/python/hsml/utils/__init__.py b/hsml/python/hsml/utils/__init__.py similarity index 100% rename from python/hsml/utils/__init__.py rename to hsml/python/hsml/utils/__init__.py diff --git a/python/hsml/utils/schema/__init__.py b/hsml/python/hsml/utils/schema/__init__.py similarity index 100% rename from python/hsml/utils/schema/__init__.py rename to hsml/python/hsml/utils/schema/__init__.py diff --git a/python/hsml/utils/schema/column.py b/hsml/python/hsml/utils/schema/column.py similarity index 100% rename from python/hsml/utils/schema/column.py rename to hsml/python/hsml/utils/schema/column.py diff --git a/python/hsml/utils/schema/columnar_schema.py b/hsml/python/hsml/utils/schema/columnar_schema.py similarity index 100% rename from python/hsml/utils/schema/columnar_schema.py rename to hsml/python/hsml/utils/schema/columnar_schema.py diff --git a/python/hsml/utils/schema/tensor.py b/hsml/python/hsml/utils/schema/tensor.py similarity index 100% rename from python/hsml/utils/schema/tensor.py rename to hsml/python/hsml/utils/schema/tensor.py diff --git a/python/hsml/utils/schema/tensor_schema.py b/hsml/python/hsml/utils/schema/tensor_schema.py similarity index 100% rename from python/hsml/utils/schema/tensor_schema.py rename to hsml/python/hsml/utils/schema/tensor_schema.py diff --git a/python/hsml/version.py b/hsml/python/hsml/version.py similarity index 100% rename from python/hsml/version.py rename to hsml/python/hsml/version.py diff --git a/hsml/python/pyproject.toml b/hsml/python/pyproject.toml new file mode 100644 index 000000000..e4770cd4a --- /dev/null +++ b/hsml/python/pyproject.toml @@ -0,0 +1,136 @@ +[project] +name="hsml" +dynamic = ["version"] +requires-python = ">=3.8,<3.13" +readme = "README.md" +description = "HSML Python SDK to interact with Hopsworks Model Registry" +keywords = ["Hopsworks", "Model Registry", "hsml", "Models", "ML", "Machine Learning Models", "TensorFlow", "PyTorch", "Machine Learning", "MLOps", "DataOps"] +authors = [{name = "Hopsworks AB", email = "robin@hopswors.ai"}] +license = { text = "Apache-2.0" } + +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Topic :: Utilities", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Intended Audience :: Developers", +] + +dependencies = [ + "pyhumps==1.6.1", + "requests", + "furl", + "boto3", + "pandas", + "numpy", + "pyjks", + "mock", + "tqdm", + "grpcio>=1.49.1,<2.0.0", # ^1.49.1 + "protobuf>=3.19.0,<4.0.0", # ^3.19.0 +] + +[project.optional-dependencies] +dev = ["pytest==7.4.4", "pytest-mock==3.12.0", "ruff"] + +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + + +[tool.setuptools.packages.find] +exclude = ["tests*"] +include = ["../Readme.md", "../LICENSE", "hsml", "hsml.*"] + + +[tool.setuptools.dynamic] +version = {attr = "hsml.version.__version__"} + +[project.urls] +Documentation = "https://docs.hopsworks.ai/latest" +Repository = "https://github.com/logicalclocks/machine-learning-api" +Homepage = "https://www.hopsworks.ai" +Community = "https://community.hopsworks.ai" + + +[tool.ruff] +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", + "java", +] + +# Same as Black. +line-length = 88 +indent-width = 4 + +# Assume Python 3.8+ syntax. +target-version = "py38" + +[tool.ruff.lint] +# 1. Enable flake8-bugbear (`B`) rules, in addition to the defaults. +select = ["E4", "E7", "E9", "F", "B", "I", "W"]#, "ANN"] +ignore = [ + "B905", # zip has no strict kwarg until Python 3.10 + "ANN101", # Missing type annotation for self in method + "ANN102", # Missing type annotation for cls in classmethod + "ANN003", # Missing type annotation for **kwarg in function + "ANN002", # Missing type annotation for *args in function + "ANN401", # Allow Any in type annotations + "W505", # Doc line too long +] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.lint.isort] +lines-after-imports = 2 +known-third-party = ["hopsworks", "hsfs", "hsml"] + + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" diff --git a/hsml/python/setup.py b/hsml/python/setup.py new file mode 100644 index 000000000..cb916d7e6 --- /dev/null +++ b/hsml/python/setup.py @@ -0,0 +1,19 @@ +# +# Copyright 2021 Logical Clocks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from setuptools import setup + + +setup() diff --git a/hsml/python/tests/__init__.py b/hsml/python/tests/__init__.py new file mode 100644 index 000000000..5b0cd48e7 --- /dev/null +++ b/hsml/python/tests/__init__.py @@ -0,0 +1,15 @@ +# +# Copyright 2024 Logical Clocks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/hsml/python/tests/conftest.py b/hsml/python/tests/conftest.py new file mode 100644 index 000000000..00d23a9fc --- /dev/null +++ b/hsml/python/tests/conftest.py @@ -0,0 +1,20 @@ +# +# Copyright 2024 Hopsworks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +pytest_plugins = [ + "tests.fixtures.backend_fixtures", + "tests.fixtures.model_fixtures", +] diff --git a/python/hsml/client/istio/utils/__init__.py b/hsml/python/tests/fixtures/__init__.py similarity index 100% rename from python/hsml/client/istio/utils/__init__.py rename to hsml/python/tests/fixtures/__init__.py diff --git a/hsml/python/tests/fixtures/backend_fixtures.py b/hsml/python/tests/fixtures/backend_fixtures.py new file mode 100644 index 000000000..c79bc6ddb --- /dev/null +++ b/hsml/python/tests/fixtures/backend_fixtures.py @@ -0,0 +1,45 @@ +# +# Copyright 2024 Hopsworks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import json +import os + +import pytest + + +FIXTURES_DIR = os.path.dirname(os.path.abspath(__file__)) + +FIXTURES = [ + "tag", + "model", + "resources", + "transformer", + "predictor", + "kafka_topic", + "inference_logger", + "inference_batcher", + "inference_endpoint", +] + +backend_fixtures_json = {} +for fixture in FIXTURES: + with open(os.path.join(FIXTURES_DIR, f"{fixture}_fixtures.json"), "r") as json_file: + backend_fixtures_json[fixture] = json.load(json_file) + + +@pytest.fixture +def backend_fixtures(): + return backend_fixtures_json diff --git a/python/tests/fixtures/inference_batcher_fixtures.json b/hsml/python/tests/fixtures/inference_batcher_fixtures.json similarity index 100% rename from python/tests/fixtures/inference_batcher_fixtures.json rename to hsml/python/tests/fixtures/inference_batcher_fixtures.json diff --git a/python/tests/fixtures/inference_endpoint_fixtures.json b/hsml/python/tests/fixtures/inference_endpoint_fixtures.json similarity index 100% rename from python/tests/fixtures/inference_endpoint_fixtures.json rename to hsml/python/tests/fixtures/inference_endpoint_fixtures.json diff --git a/python/tests/fixtures/inference_logger_fixtures.json b/hsml/python/tests/fixtures/inference_logger_fixtures.json similarity index 100% rename from python/tests/fixtures/inference_logger_fixtures.json rename to hsml/python/tests/fixtures/inference_logger_fixtures.json diff --git a/python/tests/fixtures/kafka_topic_fixtures.json b/hsml/python/tests/fixtures/kafka_topic_fixtures.json similarity index 100% rename from python/tests/fixtures/kafka_topic_fixtures.json rename to hsml/python/tests/fixtures/kafka_topic_fixtures.json diff --git a/python/tests/fixtures/model_fixtures.json b/hsml/python/tests/fixtures/model_fixtures.json similarity index 100% rename from python/tests/fixtures/model_fixtures.json rename to hsml/python/tests/fixtures/model_fixtures.json diff --git a/python/tests/fixtures/model_fixtures.py b/hsml/python/tests/fixtures/model_fixtures.py similarity index 100% rename from python/tests/fixtures/model_fixtures.py rename to hsml/python/tests/fixtures/model_fixtures.py diff --git a/python/tests/fixtures/predictor_fixtures.json b/hsml/python/tests/fixtures/predictor_fixtures.json similarity index 100% rename from python/tests/fixtures/predictor_fixtures.json rename to hsml/python/tests/fixtures/predictor_fixtures.json diff --git a/python/tests/fixtures/resources_fixtures.json b/hsml/python/tests/fixtures/resources_fixtures.json similarity index 100% rename from python/tests/fixtures/resources_fixtures.json rename to hsml/python/tests/fixtures/resources_fixtures.json diff --git a/python/tests/fixtures/tag_fixtures.json b/hsml/python/tests/fixtures/tag_fixtures.json similarity index 100% rename from python/tests/fixtures/tag_fixtures.json rename to hsml/python/tests/fixtures/tag_fixtures.json diff --git a/python/tests/fixtures/transformer_fixtures.json b/hsml/python/tests/fixtures/transformer_fixtures.json similarity index 100% rename from python/tests/fixtures/transformer_fixtures.json rename to hsml/python/tests/fixtures/transformer_fixtures.json diff --git a/python/tests/test_connection.py b/hsml/python/tests/test_connection.py similarity index 98% rename from python/tests/test_connection.py rename to hsml/python/tests/test_connection.py index 5ca15536a..c8d100279 100644 --- a/python/tests/test_connection.py +++ b/hsml/python/tests/test_connection.py @@ -15,11 +15,11 @@ # from hsml.connection import ( + CONNECTION_SAAS_HOSTNAME, HOPSWORKS_PORT_DEFAULT, HOSTNAME_VERIFICATION_DEFAULT, Connection, ) -from hsml.constants import CONNECTION from hsml.core import model_api, model_registry_api, model_serving_api @@ -30,7 +30,7 @@ def test_constants(self): # The purpose of this test is to ensure that (1) we don't make undesired changes to contant values # that might break things somewhere else, and (2) we remember to update the pytests accordingly by # adding / removing / updating tests, if necessary. - assert CONNECTION.SAAS_HOSTNAME == "c.app.hopsworks.ai" + assert CONNECTION_SAAS_HOSTNAME == "c.app.hopsworks.ai" assert HOPSWORKS_PORT_DEFAULT == 443 assert HOSTNAME_VERIFICATION_DEFAULT diff --git a/python/tests/test_constants.py b/hsml/python/tests/test_constants.py similarity index 100% rename from python/tests/test_constants.py rename to hsml/python/tests/test_constants.py diff --git a/python/tests/test_decorators.py b/hsml/python/tests/test_decorators.py similarity index 100% rename from python/tests/test_decorators.py rename to hsml/python/tests/test_decorators.py diff --git a/python/tests/test_deployable_component.py b/hsml/python/tests/test_deployable_component.py similarity index 100% rename from python/tests/test_deployable_component.py rename to hsml/python/tests/test_deployable_component.py diff --git a/python/tests/test_deployable_component_logs.py b/hsml/python/tests/test_deployable_component_logs.py similarity index 100% rename from python/tests/test_deployable_component_logs.py rename to hsml/python/tests/test_deployable_component_logs.py diff --git a/python/tests/test_deployment.py b/hsml/python/tests/test_deployment.py similarity index 100% rename from python/tests/test_deployment.py rename to hsml/python/tests/test_deployment.py diff --git a/python/tests/test_explicit_provenance.py b/hsml/python/tests/test_explicit_provenance.py similarity index 100% rename from python/tests/test_explicit_provenance.py rename to hsml/python/tests/test_explicit_provenance.py diff --git a/python/tests/test_inference_batcher.py b/hsml/python/tests/test_inference_batcher.py similarity index 100% rename from python/tests/test_inference_batcher.py rename to hsml/python/tests/test_inference_batcher.py diff --git a/python/tests/test_inference_endpoint.py b/hsml/python/tests/test_inference_endpoint.py similarity index 100% rename from python/tests/test_inference_endpoint.py rename to hsml/python/tests/test_inference_endpoint.py diff --git a/python/tests/test_inference_logger.py b/hsml/python/tests/test_inference_logger.py similarity index 100% rename from python/tests/test_inference_logger.py rename to hsml/python/tests/test_inference_logger.py diff --git a/python/tests/test_kafka_topic.py b/hsml/python/tests/test_kafka_topic.py similarity index 100% rename from python/tests/test_kafka_topic.py rename to hsml/python/tests/test_kafka_topic.py diff --git a/python/tests/test_model.py b/hsml/python/tests/test_model.py similarity index 100% rename from python/tests/test_model.py rename to hsml/python/tests/test_model.py diff --git a/python/tests/test_model_schema.py b/hsml/python/tests/test_model_schema.py similarity index 100% rename from python/tests/test_model_schema.py rename to hsml/python/tests/test_model_schema.py diff --git a/python/tests/test_predictor.py b/hsml/python/tests/test_predictor.py similarity index 100% rename from python/tests/test_predictor.py rename to hsml/python/tests/test_predictor.py diff --git a/python/tests/test_predictor_state.py b/hsml/python/tests/test_predictor_state.py similarity index 100% rename from python/tests/test_predictor_state.py rename to hsml/python/tests/test_predictor_state.py diff --git a/python/tests/test_predictor_state_condition.py b/hsml/python/tests/test_predictor_state_condition.py similarity index 100% rename from python/tests/test_predictor_state_condition.py rename to hsml/python/tests/test_predictor_state_condition.py diff --git a/python/tests/test_resources.py b/hsml/python/tests/test_resources.py similarity index 100% rename from python/tests/test_resources.py rename to hsml/python/tests/test_resources.py diff --git a/python/tests/test_schema.py b/hsml/python/tests/test_schema.py similarity index 100% rename from python/tests/test_schema.py rename to hsml/python/tests/test_schema.py diff --git a/python/tests/test_tag.py b/hsml/python/tests/test_tag.py similarity index 100% rename from python/tests/test_tag.py rename to hsml/python/tests/test_tag.py diff --git a/python/tests/test_transformer.py b/hsml/python/tests/test_transformer.py similarity index 100% rename from python/tests/test_transformer.py rename to hsml/python/tests/test_transformer.py diff --git a/python/tests/test_util.py b/hsml/python/tests/test_util.py similarity index 72% rename from python/tests/test_util.py rename to hsml/python/tests/test_util.py index b39501162..3e7d18166 100644 --- a/python/tests/test_util.py +++ b/hsml/python/tests/test_util.py @@ -14,18 +14,10 @@ # limitations under the License. # -import asyncio import os -from datetime import date, datetime from urllib.parse import ParseResult -import hsfs.util import pytest -import pytz -from hsfs.client.exceptions import FeatureStoreException -from hsfs.core.constants import HAS_AIOMYSQL, HAS_SQLALCHEMY -from hsfs.embedding import EmbeddingFeature, EmbeddingIndex -from hsfs.feature import Feature from hsml import util from hsml.constants import MODEL from hsml.model import Model as BaseModel @@ -38,11 +30,6 @@ from hsml.tensorflow.predictor import Predictor as TFPredictor from hsml.torch.model import Model as TorchModel from hsml.torch.predictor import Predictor as TorchPredictor -from mock import patch - - -if HAS_SQLALCHEMY and HAS_AIOMYSQL: - from hsfs.core import util_sql class TestUtil: @@ -596,7 +583,7 @@ def test_extract_field_from_json_as_instance_of_list_str(self, mocker): assert b == ["2", "2", "2"] assert get_obj_from_json.call_count == 3 assert get_obj_from_json.call_args[1]["obj"] == "2" - assert get_obj_from_json.call_args[1]["cls"] is str + assert get_obj_from_json.call_args[1]["cls"] == str def test_get_obj_from_json_cls(self, mocker): # Arrange @@ -656,200 +643,3 @@ class Test: # Assert assert "cannot be converted to class" in str(e_info.value) - - def test_get_hudi_datestr_from_timestamp(self): - dt = hsfs.util.get_hudi_datestr_from_timestamp(1640995200000) - assert dt == "20220101000000000" - - def test_convert_event_time_to_timestamp_timestamp(self): - dt = hsfs.util.convert_event_time_to_timestamp(1640995200) - assert dt == 1640995200000 - - def test_convert_event_time_to_timestamp_datetime(self): - dt = hsfs.util.convert_event_time_to_timestamp(datetime(2022, 1, 1, 0, 0, 0)) - assert dt == 1640995200000 - - def test_convert_event_time_to_timestamp_datetime_tz(self): - dt = hsfs.util.convert_event_time_to_timestamp( - pytz.timezone("US/Pacific").localize(datetime(2021, 12, 31, 16, 0, 0)) - ) - assert dt == 1640995200000 - - def test_convert_event_time_to_timestamp_date(self): - dt = hsfs.util.convert_event_time_to_timestamp(date(2022, 1, 1)) - assert dt == 1640995200000 - - def test_convert_event_time_to_timestamp_string(self): - dt = hsfs.util.convert_event_time_to_timestamp("2022-01-01 00:00:00") - assert dt == 1640995200000 - - def test_convert_iso_event_time_to_timestamp_string(self): - dt = hsfs.util.convert_event_time_to_timestamp("2022-01-01T00:00:00.000000Z") - assert dt == 1640995200000 - - def test_convert_event_time_to_timestamp_yyyy_mm_dd(self): - timestamp = hsfs.util.get_timestamp_from_date_string("2022-01-01") - assert timestamp == 1640995200000 - - def test_convert_event_time_to_timestamp_yyyy_mm_dd_hh(self): - timestamp = hsfs.util.get_timestamp_from_date_string("2022-01-01 00") - assert timestamp == 1640995200000 - - def test_convert_event_time_to_timestamp_yyyy_mm_dd_hh_mm(self): - timestamp = hsfs.util.get_timestamp_from_date_string("2022-01-01 00:00") - assert timestamp == 1640995200000 - - def test_convert_event_time_to_timestamp_yyyy_mm_dd_hh_mm_ss(self): - timestamp = hsfs.util.get_timestamp_from_date_string("2022-01-01 00:00:00") - assert timestamp == 1640995200000 - - def test_convert_event_time_to_timestamp_yyyy_mm_dd_hh_mm_ss_f(self): - timestamp = hsfs.util.get_timestamp_from_date_string("2022-01-01 00:00:00.000") - assert timestamp == 1640995200000 - - def test_convert_event_time_to_timestamp_yyyy_mm_dd_hh_mm_ss_error(self): - with pytest.raises(ValueError): - hsfs.util.get_timestamp_from_date_string("2022-13-01 00:00:00") - - def test_convert_event_time_to_timestamp_yyyy_mm_dd_hh_mm_ss_error2(self): - with pytest.raises(ValueError): - hsfs.util.get_timestamp_from_date_string("202-13-01 00:00:00") - - def test_convert_event_time_to_timestamp_yyyy_mm_dd_hh_mm_ss_error3(self): - with pytest.raises(ValueError): - hsfs.util.get_timestamp_from_date_string("00:00:00 2022-01-01") - - def test_convert_hudi_commit_time_to_timestamp(self): - timestamp = hsfs.util.get_timestamp_from_date_string("20221118095233099") - assert timestamp == 1668765153099 - - def test_get_dataset_type_HIVEDB(self): - db_type = hsfs.util.get_dataset_type( - "/apps/hive/warehouse/temp_featurestore.db/storage_connector_resources/kafka__tstore.jks" - ) - assert db_type == "HIVEDB" - - def test_get_dataset_type_HIVEDB_with_dfs(self): - db_type = hsfs.util.get_dataset_type( - "hdfs:///apps/hive/warehouse/temp_featurestore.db/storage_connector_resources/kafka__tstore.jks" - ) - assert db_type == "HIVEDB" - - def test_get_dataset_type_DATASET(self): - db_type = hsfs.util.get_dataset_type("/Projects/temp/Resources/kafka__tstore.jks") - assert db_type == "DATASET" - - def test_get_dataset_type_DATASET_with_dfs(self): - db_type = hsfs.util.get_dataset_type( - "hdfs:///Projects/temp/Resources/kafka__tstore.jks" - ) - assert db_type == "DATASET" - - def test_get_job_url(self, mocker): - # Arrange - mock_client_get_instance = mocker.patch("hsfs.client.get_instance") - - # Act - hsfs.util.get_job_url(href="1/2/3/4/5/6/7/8") - - # Assert - assert ( - mock_client_get_instance.return_value.replace_public_host.call_args[0][ - 0 - ].path - == "p/5/jobs/named/7/executions" - ) - - def test_get_feature_group_url(self, mocker): - # Arrange - feature_store_id = 99 - feature_group_id = 10 - mock_client_get_instance = mocker.patch("hsfs.client.get_instance") - mock_util_get_hostname_replaced_url = mocker.patch( - "hsfs.util.get_hostname_replaced_url" - ) - mock_client_get_instance.return_value._project_id = 50 - - # Act - hsfs.util.get_feature_group_url( - feature_group_id=feature_group_id, feature_store_id=feature_store_id - ) - - # Assert - assert mock_util_get_hostname_replaced_url.call_count == 1 - assert ( - mock_util_get_hostname_replaced_url.call_args[0][0] == "/p/50/fs/99/fg/10" - ) - - def test_valid_embedding_type(self): - embedding_index = EmbeddingIndex( - features=[ - EmbeddingFeature("feature1", 3), - EmbeddingFeature("feature2", 3), - EmbeddingFeature("feature3", 3), - EmbeddingFeature("feature4", 3), - ] - ) - # Define a schema with valid feature types - schema = [ - Feature(name="feature1", type="array"), - Feature(name="feature2", type="array"), - Feature(name="feature3", type="array"), - Feature(name="feature4", type="array"), - ] - # Call the method and expect no exceptions - hsfs.util.validate_embedding_feature_type(embedding_index, schema) - - def test_invalid_embedding_type(self): - embedding_index = EmbeddingIndex( - features=[ - EmbeddingFeature("feature1", 3), - EmbeddingFeature("feature2", 3), - ] - ) - # Define a schema with an invalid feature type - schema = [ - Feature(name="feature1", type="array"), - Feature(name="feature2", type="array"), # Invalid type - ] - # Call the method and expect a FeatureStoreException - with pytest.raises(FeatureStoreException): - hsfs.util.validate_embedding_feature_type(embedding_index, schema) - - def test_missing_embedding_index(self): - # Define a schema without an embedding index - schema = [ - Feature(name="feature1", type="array"), - Feature(name="feature2", type="array"), - ] - # Call the method with an empty feature_group (no embedding index) - hsfs.util.validate_embedding_feature_type(None, schema) - # No exception should be raised - - def test_empty_schema(self): - embedding_index = EmbeddingIndex( - features=[ - EmbeddingFeature("feature1", 3), - EmbeddingFeature("feature2", 3), - ] - ) - # Define an empty schema - schema = [] - # Call the method with an empty schema - hsfs.util.validate_embedding_feature_type(embedding_index, schema) - # No exception should be raised - - @pytest.mark.skipif( - not HAS_SQLALCHEMY or not HAS_AIOMYSQL, - reason="SQLAlchemy or aiomysql is not installed", - ) - def test_create_async_engine(self, mocker): - # Test when get_running_loop() raises a RuntimeError - with patch("asyncio.get_running_loop", side_effect=RuntimeError): - # mock storage connector - online_connector = patch.object(hsfs.util, "get_online_connector") - with pytest.raises( - RuntimeError, - match="Event loop is not running. Please invoke this co-routine from a running loop or provide an event loop.", - ): - asyncio.run(util_sql.create_async_engine(online_connector, True, 1)) diff --git a/python/tests/fixtures/__init__.py b/hsml/python/tests/utils/__init__.py similarity index 100% rename from python/tests/fixtures/__init__.py rename to hsml/python/tests/utils/__init__.py diff --git a/python/tests/utils/schema/test_column.py b/hsml/python/tests/utils/schema/test_column.py similarity index 100% rename from python/tests/utils/schema/test_column.py rename to hsml/python/tests/utils/schema/test_column.py diff --git a/python/tests/utils/schema/test_columnar_schema.py b/hsml/python/tests/utils/schema/test_columnar_schema.py similarity index 100% rename from python/tests/utils/schema/test_columnar_schema.py rename to hsml/python/tests/utils/schema/test_columnar_schema.py diff --git a/python/tests/utils/schema/test_tensor.py b/hsml/python/tests/utils/schema/test_tensor.py similarity index 100% rename from python/tests/utils/schema/test_tensor.py rename to hsml/python/tests/utils/schema/test_tensor.py diff --git a/python/tests/utils/schema/test_tensor_schema.py b/hsml/python/tests/utils/schema/test_tensor_schema.py similarity index 100% rename from python/tests/utils/schema/test_tensor_schema.py rename to hsml/python/tests/utils/schema/test_tensor_schema.py diff --git a/hsml/requirements-docs.txt b/hsml/requirements-docs.txt new file mode 100644 index 000000000..d1499a262 --- /dev/null +++ b/hsml/requirements-docs.txt @@ -0,0 +1,11 @@ +mkdocs==1.5.3 +mkdocs-material==9.5.17 +mike==2.0.0 +sphinx==7.2.6 +keras_autodoc @ git+https://git@github.com/logicalclocks/keras-autodoc +markdown-include==0.8.1 +mkdocs-jupyter==0.24.3 +markdown==3.6 +pymdown-extensions==10.7.1 +mkdocs-macros-plugin==1.0.4 +mkdocs-minify-plugin>=0.2.0 diff --git a/java/pom.xml b/java/pom.xml index 23136cb24..500d68f33 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -5,304 +5,6 @@ 4.0.0 com.logicalclocks - hsfs-parent - pom + hopsworks 4.0.0-SNAPSHOT - - hsfs - spark - flink - beam - - - - 1.8 - 1.8 - 14.0.1 - 4.5.6 - 4.4.13 - 1.7.30 - 1.2.17 - 2.1.8 - 1.18.10 - 2.10.0 - 1.1.0.6-SNAPSHOT - 0.12.3.0 - 2.10.40 - 2.12.10 - 2.12 - 0.0.5 - 20231013 - 0.12.2 - 5.9.1 - 2.22.0 - 4.3.1 - 1.8.2 - - spark3.1 - - UTF-8 - ${project.basedir}/delombok - - - - - org.projectlombok - lombok - ${lombok.version} - - - - com.damnhandy - handy-uri-templates - ${handy.version} - - - - com.google.guava - guava - ${guava.version} - provided - - - - org.apache.httpcomponents - httpclient - ${httpclient.version} - provided - - - - org.apache.httpcomponents - httpcore - ${httpcore.version} - provided - - - - org.slf4j - slf4j-api - ${slf4j.version} - provided - - - - org.slf4j - slf4j-log4j12 - ${slf4j.version} - provided - - - - log4j - log4j - ${log4j.version} - provided - - - - org.json - json - ${json.version} - - - - io.specto - hoverfly-java - ${hoverfly.version} - test - - - - org.junit.jupiter - junit-jupiter-api - ${junit.version} - test - - - - org.junit.jupiter - junit-jupiter-engine - ${junit.version} - test - - - - org.mockito - mockito-core - ${mockito.version} - test - - - - - - - org.scala-tools - maven-scala-plugin - - ${scala.version} - - - - scala-compile-first - process-resources - - add-source - compile - - - - scala-test-compile - process-test-resources - - testCompile - - - - - - org.apache.maven.plugins - maven-assembly-plugin - 2.4.1 - - - - jar-with-dependencies - - - - - make-assembly - - package - - single - - - - - - org.apache.maven.plugins - maven-checkstyle-plugin - 3.1.1 - - - validate - validate - - check - - - - - src/main/resources/checkstyle.xml - src/main/resources/suppressions.xml - true - true - true - true - - src/main/java - - - - - org.apache.maven.plugins - maven-surefire-plugin - ${surefire-plugin.version} - - - - hadoop.home.dir - ${project.basedir}/src/test/resources/hadoop/ - - - src/test/resources/system.properties - - - - org.projectlombok - lombok-maven-plugin - ${lombok.version}.0 - - ${project.basedir}/src/main/java - ${delombok.output} - false - - - - - delombok - - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - 3.5.0 - - - - **/MainClass.java - - **/beam/constructor/* - **/flink/constructor/* - - - - - aggregate - - aggregate - - site - - - - - - - - - - - src/test/resources - - - - - - - spark-3.3 - - 2.0.4.0-spark-3.3 - spark3.3 - - - - - - - Hops - Hops Repo - https://archiva.hops.works/repository/Hops/ - - true - - - true - - - - - - - Hops - Hops Repo - https://archiva.hops.works/repository/Hops/ - - diff --git a/mkdocs.yml b/mkdocs.yml index f59e2e4bc..ace10ae11 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -34,50 +34,9 @@ nav: - KafkaSchema: generated/api/kafka_schema.md - Secrets: generated/api/secrets.md - OpenSearch: generated/api/opensearch.md - - Connection (HSFS): generated/api/connection_api.md - - ExpectationSuite: generated/api/expectation_suite_api.md - - FeatureStore: generated/api/feature_store_api.md - - FeatureGroup: generated/api/feature_group_api.md - - ExternalFeatureGroup: generated/api/external_feature_group_api.md - - SpineGroup: generated/api/spine_group_api.md - - FeatureView: generated/api/feature_view_api.md - - TrainingDataset: generated/api/training_dataset_api.md - - Storage Connector: generated/api/storage_connector_api.md - - Feature: generated/api/feature_api.md - - Query: generated/api/query_api.md - - Transformation Functions: generated/api/transformation_functions_api.md - - ValidationReport: generated/api/validation_report_api.md - - Job: generated/api/job.md - - Provenance Links: generated/api/links.md - - Statistics: - - Statistics: generated/api/statistics_api.md - - Split Statistics: generated/api/split_statistics_api.md - - Feature descriptive statistics: generated/api/feature_descriptive_statistics_api.md - - Feature Monitoring: - - Configuration: generated/api/feature_monitoring_config_api.md - - Result: generated/api/feature_monitoring_result_api.md - - Window: generated/api/feature_monitoring_window_config_api.md - - Embedding: - - EmbeddingIndex: generated/api/embedding_index_api.md - - EmbeddingFeature: generated/api/embedding_feature_api.md - - SimilarityFunctionType: generated/api/similarity_function_type_api.md - - Connection (HSML): generated/connection_api.md - - Model Registry: - - Model Registry: generated/model-registry/model_registry_api.md - - Model: generated/model-registry/model_api.md - - Model Schema: generated/model-registry/model_schema_api.md - - Model Serving: - - Model Serving: generated/model-serving/model_serving_api.md - - Deployment: generated/model-serving/deployment_api.md - - Deployment state: generated/model-serving/predictor_state_api.md - - Deployment state condition: generated/model-serving/predictor_state_condition_api.md - - Predictor: generated/model-serving/predictor_api.md - - Transformer: generated/model-serving/transformer_api.md - - Inference Logger: generated/model-serving/inference_logger_api.md - - Inference Batcher: generated/model-serving/inference_batcher_api.md - - Resources: generated/model-serving/resources_api.md # Added to allow navigation using the side drawer - Hopsworks API: https://docs.hopsworks.ai/hopsworks-api/latest/ + - MLOps API: https://docs.hopsworks.ai/machine-learning-api/latest/ - Feature Store JavaDoc: https://docs.hopsworks.ai/feature-store-javadoc/latest/ - Contributing: CONTRIBUTING.md - Community ↗: https://community.hopsworks.ai/ diff --git a/python/.pre-commit-config.yaml b/python/.pre-commit-config.yaml index 50ff0bd9f..8c774780d 100644 --- a/python/.pre-commit-config.yaml +++ b/python/.pre-commit-config.yaml @@ -1,7 +1,7 @@ exclude: setup.py repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.0 + rev: v0.4.2 hooks: - id: ruff args: [--fix] diff --git a/python/auto_doc.py b/python/auto_doc.py deleted file mode 100644 index 342a7dcfd..000000000 --- a/python/auto_doc.py +++ /dev/null @@ -1,721 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -import pathlib -import shutil - -import keras_autodoc - - -JSON_METHODS = [ - "extract_fields_from_json", - "from_json", - "from_response_json", - "json", - "update_from_response_json", -] - -PAGES = { - "api/login.md": { - "login": ["hopsworks.login"], - "get_current_project": ["hopsworks.get_current_project"], - "fs_api": ["hopsworks.project.Project.get_feature_store"], - "mr_api": ["hopsworks.project.Project.get_model_registry"], - "ms_api": ["hopsworks.project.Project.get_model_serving"], - }, - "api/udf.md": { - "udf": ["hopsworks.udf"], - }, - "api/connection.md": { - "connection_create": ["hopsworks.connection.Connection.connection"], - "connection_properties": keras_autodoc.get_properties( - "hopsworks.connection.Connection" - ), - "connection_methods": keras_autodoc.get_methods( - "hopsworks.connection.Connection", exclude=["from_response_json", "json"] - ), - }, - "api/projects.md": { - "project_create": ["hopsworks.create_project"], - "project_properties": keras_autodoc.get_properties("hopsworks.project.Project"), - "project_methods": keras_autodoc.get_methods( - "hopsworks.project.Project", exclude=["from_response_json", "json"] - ), - }, - "api/jobs.md": { - "job_api_handle": ["hopsworks.project.Project.get_jobs_api"], - "job_create": ["hopsworks.core.job_api.JobsApi.create_job"], - "job_get": ["hopsworks.core.job_api.JobsApi.get_job"], - "job_get_all": ["hopsworks.core.job_api.JobsApi.get_jobs"], - "job_properties": keras_autodoc.get_properties("hopsworks.job.Job"), - "job_config": ["hopsworks.core.job_api.JobsApi.get_configuration"], - "job_methods": keras_autodoc.get_methods( - "hopsworks.job.Job", exclude=["from_response_json", "json"] - ), - }, - "api/executions.md": { - "execution_create": ["hopsworks.job.Job.run"], - "execution_get": ["hopsworks.job.Job.get_executions"], - "execution_properties": keras_autodoc.get_properties( - "hopsworks.execution.Execution" - ), - "execution_methods": keras_autodoc.get_methods( - "hopsworks.execution.Execution", - exclude=["from_response_json", "json", "update_from_response_json"], - ), - }, - "api/flink_cluster.md": { - "flink_api_handle": ["hopsworks.project.Project.get_flink_cluster_api"], - "setup_cluster": [ - "hopsworks.core.flink_cluster_api.FlinkClusterApi.setup_cluster" - ], - "get_cluster": ["hopsworks.core.flink_cluster_api.FlinkClusterApi.get_cluster"], - "start_cluster": ["hopsworks.flink_cluster.FlinkCluster.start"], - "submit_job_to_cluster": ["hopsworks.flink_cluster.FlinkCluster.submit_job"], - "flink_cluster_properties": keras_autodoc.get_properties( - "hopsworks.flink_cluster.FlinkCluster" - ), - "flink_cluster_methods": keras_autodoc.get_methods( - "hopsworks.flink_cluster.FlinkCluster", - exclude=["from_response_json", "json"], - ), - }, - "api/environment.md": { - "env_api_handle": ["hopsworks.project.Project.get_environment_api"], - "env_create": [ - "hopsworks.core.environment_api.EnvironmentApi.create_environment" - ], - "env_get": ["hopsworks.core.environment_api.EnvironmentApi.get_environment"], - "env_methods": keras_autodoc.get_methods( - "hopsworks.environment.Environment", exclude=["from_response_json", "json"] - ), - }, - "api/git_repo.md": { - "git_api_handle": ["hopsworks.project.Project.get_git_api"], - "git_repo_clone": ["hopsworks.core.git_api.GitApi.clone"], - "git_repo_get": ["hopsworks.core.git_api.GitApi.get_repo"], - "git_repo_get_all": ["hopsworks.core.git_api.GitApi.get_repos"], - "git_repo_properties": keras_autodoc.get_properties( - "hopsworks.git_repo.GitRepo" - ), - "git_repo_methods": keras_autodoc.get_methods( - "hopsworks.git_repo.GitRepo", exclude=["from_response_json", "json"] - ), - }, - "api/git_provider.md": { - "git_api_handle": ["hopsworks.project.Project.get_git_api"], - "git_provider_create": ["hopsworks.core.git_api.GitApi.set_provider"], - "git_provider_get": ["hopsworks.core.git_api.GitApi.get_provider"], - "git_provider_get_all": ["hopsworks.core.git_api.GitApi.get_providers"], - "git_provider_properties": keras_autodoc.get_properties( - "hopsworks.git_provider.GitProvider" - ), - "git_provider_methods": keras_autodoc.get_methods( - "hopsworks.git_provider.GitProvider", exclude=["from_response_json", "json"] - ), - }, - "api/git_remote.md": { - "git_api_handle": ["hopsworks.project.Project.get_git_api"], - "git_remote_create": ["hopsworks.git_repo.GitRepo.add_remote"], - "git_remote_get": ["hopsworks.git_repo.GitRepo.get_remote"], - "git_remote_get_all": ["hopsworks.git_repo.GitRepo.get_remotes"], - "git_remote_properties": keras_autodoc.get_properties( - "hopsworks.git_remote.GitRemote" - ), - "git_remote_methods": keras_autodoc.get_methods( - "hopsworks.git_remote.GitRemote", exclude=["from_response_json", "json"] - ), - }, - "api/datasets.md": { - "dataset_api_handle": ["hopsworks.project.Project.get_dataset_api"], - "dataset_methods": keras_autodoc.get_methods( - "hopsworks.core.dataset_api.DatasetApi" - ), - }, - "api/kafka_topic.md": { - "kafka_api_handle": ["hopsworks.project.Project.get_kafka_api"], - "kafka_config": ["hopsworks.core.kafka_api.KafkaApi.get_default_config"], - "kafka_topic_create": ["hopsworks.core.kafka_api.KafkaApi.create_topic"], - "kafka_topic_get": ["hopsworks.core.kafka_api.KafkaApi.get_topic"], - "kafka_topic_get_all": ["hopsworks.core.kafka_api.KafkaApi.get_topics"], - "kafka_topic_properties": keras_autodoc.get_properties( - "hopsworks.kafka_topic.KafkaTopic" - ), - "kafka_topic_methods": keras_autodoc.get_methods( - "hopsworks.kafka_topic.KafkaTopic", - exclude=["from_response_json", "json", "update_from_response_json"], - ), - }, - "api/kafka_schema.md": { - "kafka_api_handle": ["hopsworks.project.Project.get_kafka_api"], - "kafka_schema_create": ["hopsworks.core.kafka_api.KafkaApi.create_schema"], - "kafka_schema_get": ["hopsworks.core.kafka_api.KafkaApi.get_schema"], - "kafka_schema_get_all": ["hopsworks.core.kafka_api.KafkaApi.get_schemas"], - "kafka_schema_get_subjects": ["hopsworks.core.kafka_api.KafkaApi.get_subjects"], - "kafka_schema_properties": keras_autodoc.get_properties( - "hopsworks.kafka_schema.KafkaSchema" - ), - "kafka_schema_methods": keras_autodoc.get_methods( - "hopsworks.kafka_schema.KafkaSchema", - exclude=["from_response_json", "json", "update_from_response_json"], - ), - }, - "api/secrets.md": { - "secret_api_handle": ["hopsworks.get_secrets_api"], - "secret_create": ["hopsworks.core.secret_api.SecretsApi.create_secret"], - "secret_get": ["hopsworks.core.secret_api.SecretsApi.get_secret"], - "secret_get_simplified": ["hopsworks.core.secret_api.SecretsApi.get"], - "secret_get_all": ["hopsworks.core.secret_api.SecretsApi.get_secrets"], - "secret_properties": keras_autodoc.get_properties("hopsworks.secret.Secret"), - "secret_methods": keras_autodoc.get_methods( - "hopsworks.secret.Secret", exclude=["from_response_json", "json"] - ), - }, - "api/opensearch.md": { - "opensearch_api_handle": ["hopsworks.project.Project.get_opensearch_api"], - "opensearch_methods": keras_autodoc.get_methods( - "hopsworks.core.opensearch_api.OpenSearchApi" - ), - }, - "api/connection_api.md": { - "connection": ["hsfs.connection.Connection"], - "connection_properties": keras_autodoc.get_properties( - "hsfs.connection.Connection" - ), - "connection_methods": keras_autodoc.get_methods("hsfs.connection.Connection"), - }, - "api/spine_group_api.md": { - "fg": ["hsfs.feature_group.SpineGroup"], - "fg_create": ["hsfs.feature_store.FeatureStore.get_or_create_spine_group"], - "fg_get": ["hsfs.feature_store.FeatureStore.get_or_create_spine_group"], - "fg_properties": keras_autodoc.get_properties( - "hsfs.feature_group.SpineGroup", - exclude=[ - "expectation_suite", - "location", - "online_enabled", - "statistics", - "statistics_config", - "subject", - ], - ), - "fg_methods": keras_autodoc.get_methods( - "hsfs.feature_group.SpineGroup", - exclude=[ - "append_features", - "compute_statistics", - "delete_expectation_suite", - "from_response_json", - "get_all_validation_reports", - "get_expectation_suite", - "get_latest_validation_report", - "get_statistics", - "get_validation_history", - "save_expectation_suite", - "save_validation_report", - "update_from_response_json", - "update_statistics_config", - "validate", - ], - ), - }, - "api/training_dataset_api.md": { - "td": ["hsfs.training_dataset.TrainingDataset"], - "td_create": ["hsfs.feature_store.FeatureStore.create_training_dataset"], - "td_get": ["hsfs.feature_store.FeatureStore.get_training_dataset"], - "td_properties": keras_autodoc.get_properties( - "hsfs.training_dataset.TrainingDataset" - ), - "td_methods": keras_autodoc.get_methods( - "hsfs.training_dataset.TrainingDataset" - ), - }, - "api/feature_view_api.md": { - "fv": ["hsfs.feature_view.FeatureView"], - "fv_create": ["hsfs.feature_store.FeatureStore.create_feature_view"], - "fv_get": ["hsfs.feature_store.FeatureStore.get_feature_view"], - "fvs_get": ["hsfs.feature_store.FeatureStore.get_feature_views"], - "fv_properties": keras_autodoc.get_properties("hsfs.feature_view.FeatureView"), - "fv_methods": keras_autodoc.get_methods("hsfs.feature_view.FeatureView"), - }, - "api/feature_api.md": { - "feature": ["hsfs.feature.Feature"], - "feature_properties": keras_autodoc.get_properties("hsfs.feature.Feature"), - "feature_methods": keras_autodoc.get_methods("hsfs.feature.Feature"), - }, - "api/expectation_suite_api.md": { - "expectation_suite": ["hsfs.expectation_suite.ExpectationSuite"], - "expectation_suite_attach": [ - "hsfs.feature_group.FeatureGroup.save_expectation_suite" - ], - "single_expectation_api": [ - "hsfs.expectation_suite.ExpectationSuite.add_expectation", - "hsfs.expectation_suite.ExpectationSuite.replace_expectation", - "hsfs.expectation_suite.ExpectationSuite.remove_expectation", - ], - "expectation_suite_properties": keras_autodoc.get_properties( - "hsfs.expectation_suite.ExpectationSuite" - ), - "expectation_suite_methods": keras_autodoc.get_methods( - "hsfs.expectation_suite.ExpectationSuite" - ), - }, - "api/feature_store_api.md": { - "fs": ["hsfs.feature_store.FeatureStore"], - "fs_get": ["hsfs.connection.Connection.get_feature_store"], - "fs_properties": keras_autodoc.get_properties( - "hsfs.feature_store.FeatureStore" - ), - "fs_methods": keras_autodoc.get_methods("hsfs.feature_store.FeatureStore"), - }, - "api/feature_group_api.md": { - "fg": ["hsfs.feature_group.FeatureGroup"], - "fg_create": [ - "hsfs.feature_store.FeatureStore.create_feature_group", - "hsfs.feature_store.FeatureStore.get_or_create_feature_group", - ], - "fg_get": ["hsfs.feature_store.FeatureStore.get_feature_group"], - "fg_properties": keras_autodoc.get_properties( - "hsfs.feature_group.FeatureGroup" - ), - "fg_methods": keras_autodoc.get_methods("hsfs.feature_group.FeatureGroup"), - }, - "api/external_feature_group_api.md": { - "fg": ["hsfs.feature_group.ExternalFeatureGroup"], - "fg_create": ["hsfs.feature_store.FeatureStore.create_external_feature_group"], - "fg_get": ["hsfs.feature_store.FeatureStore.get_external_feature_group"], - "fg_properties": keras_autodoc.get_properties( - "hsfs.feature_group.ExternalFeatureGroup" - ), - "fg_methods": keras_autodoc.get_methods( - "hsfs.feature_group.ExternalFeatureGroup" - ), - }, - "api/storage_connector_api.md": { - "sc_get": [ - "hsfs.feature_store.FeatureStore.get_storage_connector", - "hsfs.feature_store.FeatureStore.get_online_storage_connector", - ], - "hopsfs_methods": keras_autodoc.get_methods( - "hsfs.storage_connector.HopsFSConnector", exclude=["from_response_json"] - ), - "hopsfs_properties": keras_autodoc.get_properties( - "hsfs.storage_connector.HopsFSConnector" - ), - "s3_methods": keras_autodoc.get_methods( - "hsfs.storage_connector.S3Connector", exclude=["from_response_json"] - ), - "s3_properties": keras_autodoc.get_properties( - "hsfs.storage_connector.S3Connector" - ), - "redshift_methods": keras_autodoc.get_methods( - "hsfs.storage_connector.RedshiftConnector", exclude=["from_response_json"] - ), - "redshift_properties": keras_autodoc.get_properties( - "hsfs.storage_connector.RedshiftConnector" - ), - "adls_methods": keras_autodoc.get_methods( - "hsfs.storage_connector.AdlsConnector", exclude=["from_response_json"] - ), - "adls_properties": keras_autodoc.get_properties( - "hsfs.storage_connector.AdlsConnector" - ), - "snowflake_methods": keras_autodoc.get_methods( - "hsfs.storage_connector.SnowflakeConnector", exclude=["from_response_json"] - ), - "snowflake_properties": keras_autodoc.get_properties( - "hsfs.storage_connector.SnowflakeConnector" - ), - "jdbc_methods": keras_autodoc.get_methods( - "hsfs.storage_connector.JdbcConnector", exclude=["from_response_json"] - ), - "jdbc_properties": keras_autodoc.get_properties( - "hsfs.storage_connector.JdbcConnector" - ), - "gcs_methods": keras_autodoc.get_methods( - "hsfs.storage_connector.GcsConnector", exclude=["from_response_json"] - ), - "gcs_properties": keras_autodoc.get_properties( - "hsfs.storage_connector.GcsConnector" - ), - "bigquery_methods": keras_autodoc.get_methods( - "hsfs.storage_connector.BigQueryConnector", exclude=["from_response_json"] - ), - "bigquery_properties": keras_autodoc.get_properties( - "hsfs.storage_connector.BigQueryConnector" - ), - "kafka_methods": keras_autodoc.get_methods( - "hsfs.storage_connector.KafkaConnector", exclude=["from_response_json"] - ), - "kafka_properties": keras_autodoc.get_properties( - "hsfs.storage_connector.KafkaConnector" - ), - }, - "api/statistics_config_api.md": { - "statistics_config": ["hsfs.statistics_config.StatisticsConfig"], - "statistics_config_properties": keras_autodoc.get_properties( - "hsfs.statistics_config.StatisticsConfig" - ), - }, - "api/transformation_functions_api.md": { - "transformation_function": [ - "hsfs.transformation_function.TransformationFunction" - ], - "transformation_function_properties": keras_autodoc.get_properties( - "hsfs.transformation_function.TransformationFunction" - ), - "transformation_function_methods": keras_autodoc.get_methods( - "hsfs.transformation_function.TransformationFunction", - exclude=[ - "from_response_json", - "update_from_response_json", - "json", - "to_dict", - ], - ), - "create_transformation_function": [ - "hsfs.feature_store.FeatureStore.create_transformation_function" - ], - "get_transformation_function": [ - "hsfs.feature_store.FeatureStore.get_transformation_function" - ], - "get_transformation_functions": [ - "hsfs.feature_store.FeatureStore.get_transformation_functions" - ], - }, - "api/validation_report_api.md": { - "validation_report": ["hsfs.validation_report.ValidationReport"], - "validation_report_validate": [ - "hsfs.feature_group.FeatureGroup.validate", - "hsfs.feature_group.FeatureGroup.insert", - ], - "validation_report_get": [ - "hsfs.feature_group.FeatureGroup.get_latest_validation_report", - "hsfs.feature_group.FeatureGroup.get_all_validation_reports", - ], - "validation_report_properties": keras_autodoc.get_properties( - "hsfs.validation_report.ValidationReport" - ), - "validation_report_methods": keras_autodoc.get_methods( - "hsfs.validation_report.ValidationReport" - ), - }, - "api/job.md": { - "job_configuration": ["hsfs.core.job_configuration.JobConfiguration"], - "job": ["hsfs.core.job.Job"], - "job_methods": [ - "hsfs.core.job.Job.get_state", - "hsfs.core.job.Job.get_final_state", - ], - }, - "api/query_api.md": { - "query_methods": keras_autodoc.get_methods( - "hsfs.constructor.query.Query", - exclude=["json", "to_dict"], - ), - "query_properties": keras_autodoc.get_properties( - "hsfs.constructor.query.Query" - ), - }, - "api/links.md": { - "links_properties": keras_autodoc.get_properties( - "hsfs.core.explicit_provenance.Links" - ), - "artifact_properties": keras_autodoc.get_properties( - "hsfs.core.explicit_provenance.Artifact" - ), - }, - "api/statistics_api.md": { - "statistics": ["hsfs.statistics.Statistics"], - "statistics_properties": keras_autodoc.get_properties( - "hsfs.statistics.Statistics" - ), - }, - "api/split_statistics_api.md": { - "split_statistics": ["hsfs.split_statistics.SplitStatistics"], - "split_statistics_properties": keras_autodoc.get_properties( - "hsfs.split_statistics.SplitStatistics" - ), - }, - "api/feature_descriptive_statistics_api.md": { - "feature_descriptive_statistics": [ - "hsfs.core.feature_descriptive_statistics.FeatureDescriptiveStatistics" - ], - "feature_descriptive_statistics_properties": keras_autodoc.get_properties( - "hsfs.core.feature_descriptive_statistics.FeatureDescriptiveStatistics" - ), - }, - "api/feature_monitoring_config_api.md": { - "feature_monitoring_config": [ - "hsfs.core.feature_monitoring_config.FeatureMonitoringConfig" - ], - "feature_monitoring_config_properties": keras_autodoc.get_properties( - "hsfs.core.feature_monitoring_config.FeatureMonitoringConfig" - ), - "feature_monitoring_config_methods": keras_autodoc.get_methods( - "hsfs.core.feature_monitoring_config.FeatureMonitoringConfig", - exclude=[ - "from_response_json", - "update_from_response_json", - "json", - "to_dict", - ], - ), - # from feature group - "feature_monitoring_config_creation_fg": [ - "hsfs.feature_group.FeatureGroup.create_statistics_monitoring", - "hsfs.feature_group.FeatureGroup.create_feature_monitoring", - ], - # from feature view - "feature_monitoring_config_creation_fv": [ - "hsfs.feature_view.FeatureView.create_statistics_monitoring", - "hsfs.feature_view.FeatureView.create_feature_monitoring", - ], - # retrieval - "feature_monitoring_config_retrieval_fg": [ - "hsfs.feature_group.FeatureGroup.get_feature_monitoring_configs", - ], - "feature_monitoring_config_retrieval_fv": [ - "hsfs.feature_view.FeatureView.get_feature_monitoring_configs", - ], - }, - "api/feature_monitoring_result_api.md": { - "feature_monitoring_result": [ - "hsfs.core.feature_monitoring_result.FeatureMonitoringResult" - ], - "feature_monitoring_result_retrieval": [ - "hsfs.core.feature_monitoring_config.FeatureMonitoringConfig.get_history" - ], - "feature_monitoring_result_properties": keras_autodoc.get_properties( - "hsfs.core.feature_monitoring_result.FeatureMonitoringResult" - ), - }, - "api/feature_monitoring_window_config_api.md": { - "feature_monitoring_window_config": [ - "hsfs.core.monitoring_window_config.MonitoringWindowConfig" - ], - "feature_monitoring_window_config_properties": keras_autodoc.get_properties( - "hsfs.core.monitoring_window_config.MonitoringWindowConfig" - ), - }, - "api/embedding_index_api.md": { - "embedding_index": ["hsfs.embedding.EmbeddingIndex"], - "embedding_index_properties": keras_autodoc.get_properties( - "hsfs.embedding.EmbeddingIndex" - ), - "embedding_index_methods": keras_autodoc.get_methods( - "hsfs.embedding.EmbeddingIndex", exclude=["from_response_json"] - ), - }, - "api/embedding_feature_api.md": { - "embedding_feature": ["hsfs.embedding.EmbeddingFeature"], - "embedding_feature_properties": keras_autodoc.get_properties( - "hsfs.embedding.EmbeddingFeature" - ), - }, - "api/similarity_function_type_api.md": { - "similarity_function_type": ["hsfs.embedding.SimilarityFunctionType"], - }, - # Model registry - "connection_api.md": { - "connection": ["hsml.connection.Connection"], - "connection_properties": keras_autodoc.get_properties( - "hsml.connection.Connection", exclude=["trust_store_path"] - ), - "connection_methods": keras_autodoc.get_methods("hsml.connection.Connection"), - }, - "model-registry/model_registry_api.md": { - "mr_get": ["hsml.connection.Connection.get_model_registry"], - "mr_modules": keras_autodoc.get_properties( - "hsml.model_registry.ModelRegistry", - exclude=[ - "project_id", - "project_name", - "model_registry_id", - "shared_registry_project_name", - ], - ), - "mr_properties": keras_autodoc.get_properties( - "hsml.model_registry.ModelRegistry", - exclude=[ - "python", - "sklearn", - "tensorflow", - "torch", - ], - ), - "mr_methods": keras_autodoc.get_methods( - "hsml.model_registry.ModelRegistry", exclude=["from_response_json"] - ), - }, - "model-registry/model_api.md": { - "ml_create_tf": ["hsml.model_registry.ModelRegistry.tensorflow.create_model"], - "ml_create_th": ["hsml.model_registry.ModelRegistry.torch.create_model"], - "ml_create_sl": ["hsml.model_registry.ModelRegistry.sklearn.create_model"], - "ml_create_py": ["hsml.model_registry.ModelRegistry.python.create_model"], - "ml_get": ["hsml.model_registry.ModelRegistry.get_model"], - "ml_properties": keras_autodoc.get_properties("hsml.model.Model"), - "ml_methods": keras_autodoc.get_methods( - "hsml.model.Model", - exclude=[ - "from_response_json", - "json", - "to_dict", - "update_from_response_json", - ], - ), - }, - "model-registry/model_schema.md": {}, - "model-registry/model_schema_api.md": { - "schema": ["hsml.schema.Schema"], - "schema_dict": ["hsml.schema.Schema.to_dict"], - "model_schema": ["hsml.model_schema.ModelSchema"], - "model_schema_dict": ["hsml.model_schema.ModelSchema.to_dict"], - }, - "model-registry/links.md": { - "links_properties": keras_autodoc.get_properties( - "hsml.core.explicit_provenance.Links" - ), - "artifact_properties": keras_autodoc.get_properties( - "hsml.core.explicit_provenance.Artifact" - ), - }, - # Model Serving - "model-serving/model_serving_api.md": { - "ms_get": ["hsml.connection.Connection.get_model_serving"], - "ms_properties": keras_autodoc.get_properties( - "hsml.model_serving.ModelServing" - ), - "ms_methods": keras_autodoc.get_methods( - "hsml.model_serving.ModelServing", exclude=["from_response_json"] - ), - }, - "model-serving/deployment_api.md": { - "ms_get_model_serving": ["hsml.connection.Connection.get_model_serving"], - "ms_get_deployments": [ - "hsml.model_serving.ModelServing.get_deployment", - "hsml.model_serving.ModelServing.get_deployment_by_id", - "hsml.model_serving.ModelServing.get_deployments", - ], - "ms_create_deployment": ["hsml.model_serving.ModelServing.create_deployment"], - "m_deploy": ["hsml.model.Model.deploy"], - "p_deploy": ["hsml.predictor.Predictor.deploy"], - "dep_properties": keras_autodoc.get_properties("hsml.deployment.Deployment"), - "dep_methods": keras_autodoc.get_methods( - "hsml.deployment.Deployment", exclude=JSON_METHODS + ["from_predictor"] - ), - }, - "model-serving/predictor_api.md": { - "ms_get_model_serving": ["hsml.connection.Connection.get_model_serving"], - "ms_create_predictor": ["hsml.model_serving.ModelServing.create_predictor"], - "pred_properties": keras_autodoc.get_properties("hsml.predictor.Predictor"), - "pred_methods": keras_autodoc.get_methods( - "hsml.predictor.Predictor", - exclude=JSON_METHODS + ["for_model"], - ), - }, - "model-serving/transformer_api.md": { - "ms_get_model_serving": ["hsml.connection.Connection.get_model_serving"], - "ms_create_transformer": ["hsml.model_serving.ModelServing.create_transformer"], - "trans_properties": keras_autodoc.get_properties( - "hsml.transformer.Transformer" - ), - "trans_methods": keras_autodoc.get_methods( - "hsml.transformer.Transformer", exclude=JSON_METHODS - ), - }, - "model-serving/inference_logger_api.md": { - "il": ["hsml.inference_logger.InferenceLogger"], - "il_properties": keras_autodoc.get_properties( - "hsml.inference_logger.InferenceLogger" - ), - "il_methods": keras_autodoc.get_methods( - "hsml.inference_logger.InferenceLogger", exclude=JSON_METHODS - ), - }, - "model-serving/inference_batcher_api.md": { - "ib": ["hsml.inference_batcher.InferenceBatcher"], - "ib_properties": keras_autodoc.get_properties( - "hsml.inference_batcher.InferenceBatcher" - ), - "ib_methods": keras_autodoc.get_methods( - "hsml.inference_batcher.InferenceBatcher", exclude=JSON_METHODS - ), - }, - "model-serving/resources_api.md": { - "res": ["hsml.resources.Resources"], - "res_properties": keras_autodoc.get_properties("hsml.resources.Resources"), - "res_methods": keras_autodoc.get_methods( - "hsml.resources.Resources", exclude=JSON_METHODS - ), - }, - "model-serving/predictor_state_api.md": { - "ps_get": ["hsml.deployment.Deployment.get_state"], - "ps_properties": keras_autodoc.get_properties( - "hsml.predictor_state.PredictorState" - ), - "ps_methods": keras_autodoc.get_methods( - "hsml.predictor_state.PredictorState", exclude=JSON_METHODS - ), - }, - "model-serving/predictor_state_condition_api.md": { - "psc_get": ["hsml.predictor_state.PredictorState.condition"], - "psc_properties": keras_autodoc.get_properties( - "hsml.predictor_state_condition.PredictorStateCondition" - ), - "psc_methods": keras_autodoc.get_methods( - "hsml.predictor_state_condition.PredictorStateCondition", - exclude=JSON_METHODS, - ), - }, -} - -hw_dir = pathlib.Path(__file__).resolve().parents[1] -if "GITHUB_SHA" in os.environ: - commit_sha = os.environ["GITHUB_SHA"] - project_url = ( - f"https://github.com/logicalclocks/hopsworks-api/tree/{commit_sha}/python" - ) -else: - branch_name = os.environ.get("GITHUB_BASE_REF", "master") - project_url = ( - f"https://github.com/logicalclocks/hopsworks-api/blob/{branch_name}/python" - ) - - -def generate(dest_dir): - doc_generator = keras_autodoc.DocumentationGenerator( - PAGES, - project_url=project_url, - template_dir="./docs/templates", - titles_size="###", - extra_aliases={ - "hsfs.core.query.Query": "hsfs.Query", - "hsfs.storage_connector.StorageConnector": "hsfs.StorageConnector", - "hsfs.statistics_config.StatisticsConfig": "hsfs.StatisticsConfig", - "hsfs.training_dataset_feature.TrainingDatasetFeature": "hsfs.TrainingDatasetFeature", - "pandas.core.frame.DataFrame": "pandas.DataFrame", - }, - max_signature_line_length=100, - ) - shutil.copyfile(hw_dir / "CONTRIBUTING.md", dest_dir / "CONTRIBUTING.md") - shutil.copyfile(hw_dir / "README.md", dest_dir / "index.md") - - doc_generator.generate(dest_dir / "generated") - - -if __name__ == "__main__": - generate(hw_dir / "docs") diff --git a/python/hopsworks/project.py b/python/hopsworks/project.py index 79ccff369..294a69435 100644 --- a/python/hopsworks/project.py +++ b/python/hopsworks/project.py @@ -17,7 +17,6 @@ import json -import hsfs.feature_store import humps from hopsworks import client, constants, util from hopsworks.client.external import Client @@ -30,6 +29,7 @@ kafka_api, opensearch_api, ) +from hsfs import feature_store class Project: @@ -103,9 +103,7 @@ def created(self): """Timestamp when the project was created""" return self._created - def get_feature_store( - self, name: str = None, engine: str = None - ) -> hsfs.feature_store.FeatureStore: + def get_feature_store(self, name: str = None, engine: str = None) -> feature_store.FeatureStore: """Connect to Project's Feature Store. Defaulting to the project name of default feature store. To get a @@ -144,9 +142,7 @@ def get_feature_store( engine=engine, ).get_feature_store(name) else: - return connection(engine=engine).get_feature_store( - name - ) # If internal client + return connection(engine=engine).get_feature_store(name) # If internal client def get_model_registry(self): """Connect to Project's Model Registry API. diff --git a/python/pyproject.toml b/python/pyproject.toml index e3970c466..4333adc8b 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,25 +4,10 @@ dynamic = ["version"] requires-python = ">=3.8,<3.13" readme = "README.md" description = "Hopsworks Python SDK to interact with Hopsworks Platform, Feature Store, Model Registry and Model Serving" -keywords = [ - "Hopsworks", - "Feature Store", - "hsfs", - "Spark", - "Machine Learning", - "MLOps", - "DataOps", - "Model Registry", - "hsml", - "Models", - "ML", - "Machine Learning Models", - "TensorFlow", - "PyTorch", -] +keywords = ["Hopsworks", "Feature Store", "Spark", "Machine Learning", "MLOps", "DataOps"] + authors = [{ name = "Hopsworks AB", email = "robin@hopsworks.ai" }] license = { text = "Apache-2.0" } - classifiers = [ "Development Status :: 5 - Production/Stable", "Topic :: Utilities", @@ -37,56 +22,19 @@ classifiers = [ ] dependencies = [ + "hsfs[python] @ git+https://git@github.com/logicalclocks/feature-store-api@master#subdirectory=python", + "hsml @ git+https://git@github.com/logicalclocks/machine-learning-api@main#subdirectory=python", "pyhumps==1.6.1", "requests", "furl", "boto3", - "pandas<2.2.0", - "numpy<2", "pyjks", "mock", - "avro==1.11.3", - "sqlalchemy", - "PyMySQL[rsa]", - "tzlocal", - "fsspec", - "retrying", - "hopsworks_aiomysql[sa]==0.2.1", - "polars>=0.20.18,<=0.21.0", - "opensearch-py>=1.1.0,<=2.4.2", "tqdm", - "grpcio>=1.49.1,<2.0.0", # ^1.49.1 - "protobuf>=3.19.0,<4.0.0", # ^3.19.0 ] [project.optional-dependencies] -python = [ - "pyarrow>=10.0", - "confluent-kafka<=2.3.0", - "fastavro>=1.4.11,<=1.8.4", - "tqdm", -] -great-expectations = ["great_expectations==0.18.12"] -dev-no-opt = [ - "hopsworks[python]", - "pytest==7.4.4", - "pytest-mock==3.12.0", - "ruff", - "pyspark==3.1.1", - "moto[s3]==5.0.0", - "typeguard==4.2.1", -] -dev-pandas1 = [ - "hopsworks[python]", - "pytest==7.4.4", - "pytest-mock==3.12.0", - "ruff", - "pyspark==3.1.1", - "moto[s3]==5.0.0", - "pandas<=1.5.3", - "sqlalchemy<=1.4.48", -] -dev = ["hopsworks[dev-no-opt,great-expectations]"] +dev = [ "ruff", "pytest"] [build-system] requires = ["setuptools", "wheel"] @@ -94,11 +42,12 @@ build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] exclude = ["tests*"] -include = ["../README.md", "../LICENSE", "hopsworks*", "hsfs*", "hsml*", "hopsworks_common*"] +include = ["../Readme.md", "../LICENSE", "hopsworks", "hopsworks.*"] [tool.setuptools.dynamic] version = { attr = "hopsworks.version.__version__" } + [project.urls] Documentation = "https://docs.hopsworks.ai/latest" Repository = "https://github.com/logicalclocks/hopsworks-api" @@ -166,7 +115,7 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" [tool.ruff.lint.isort] lines-after-imports = 2 -known-third-party = ["hopsworks", "hsfs", "hsml", "hopsworks_common"] +known-third-party = ["hopsworks", "hsfs", "hsml"] [tool.ruff.format] diff --git a/python/tests/hopsworks/test_login.py b/python/tests/hopsworks/test_login.py index 34d5c1787..bec4ba729 100644 --- a/python/tests/hopsworks/test_login.py +++ b/python/tests/hopsworks/test_login.py @@ -13,17 +13,15 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from unittest import TestCase, mock +from contextlib import contextmanager +import hopsworks import getpass -import importlib import os -import shutil -import tempfile import uuid -from contextlib import contextmanager -from datetime import date -from unittest import TestCase, mock, skipIf - -import hopsworks +import tempfile +import importlib +import shutil from hopsworks.client import exceptions from hopsworks.project import Project @@ -65,6 +63,7 @@ def tearDown(self): hopsworks.logout() def _check_api_key_existence(self): + path = hopsworks._get_cached_api_key_path() api_key_name = ".hw_api_key" @@ -88,7 +87,6 @@ def _check_api_key_existence(self): path == temp_api_key_path, ) - @skipIf(date.today() <= date(2024, 7, 22), "Robin is on vacation.") def test_login_api_key_as_input(self): # Should accept api key as input from command line @@ -108,7 +106,6 @@ def test_login_api_key_as_input(self): assert in_home is True and os.path.exists(path) assert in_tmp is False - @skipIf(date.today() <= date(2024, 7, 22), "Robin is on vacation.") def test_login_api_key_as_argument(self): # Should accept api key as argument path, in_cwd, in_home, in_tmp = self._check_api_key_existence() @@ -126,7 +123,6 @@ def test_login_api_key_as_argument(self): assert in_home is True and not os.path.exists(path) assert in_tmp is False - @skipIf(date.today() <= date(2024, 7, 22), "Robin is on vacation.") def test_login_cmd_input_incorrect(self): # Should fail to login with incorrect API key @@ -140,7 +136,6 @@ def test_login_cmd_input_incorrect(self): with input({"hidden": "incorrect_api_key"}): hopsworks.login() - @skipIf(date.today() <= date(2024, 7, 22), "Robin is on vacation.") def test_login_fallback_to_tmp(self): # Should fall back to storing api key in tmp folder if home is not write and executable for user os.chmod(self.home_dir, 0o400) @@ -160,7 +155,6 @@ def test_login_fallback_to_tmp(self): assert in_home is False assert in_tmp is True and os.path.exists(path) - @skipIf(date.today() <= date(2024, 7, 22), "Robin is on vacation.") def test_login_use_cwd_api_key(self): # Should use API key in cwd if exists @@ -183,7 +177,6 @@ def test_login_use_cwd_api_key(self): assert in_home is False assert in_tmp is False - @skipIf(date.today() <= date(2024, 7, 22), "Robin is on vacation.") def test_login_use_home_api_key(self): # Should use API key in home if exists @@ -209,7 +202,6 @@ def test_login_use_home_api_key(self): assert in_home is True and os.path.exists(path) assert in_tmp is False - @skipIf(date.today() <= date(2024, 7, 22), "Robin is on vacation.") def test_login_api_key_as_environ(self): # Should accept api key as environmet variable try: @@ -234,7 +226,6 @@ def test_login_api_key_as_environ(self): finally: del os.environ["HOPSWORKS_API_KEY"] - @skipIf(date.today() <= date(2024, 7, 22), "Robin is on vacation.") def test_login_newline_in_api_key(self): try: imaginaryApiKey = "ImaginaryApiKey\n" diff --git a/requirements-docs.txt b/requirements-docs.txt index 8bc8d6230..d1499a262 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -1,7 +1,7 @@ mkdocs==1.5.3 mkdocs-material==9.5.17 mike==2.0.0 -sphinx==7.3.7 +sphinx==7.2.6 keras_autodoc @ git+https://git@github.com/logicalclocks/keras-autodoc markdown-include==0.8.1 mkdocs-jupyter==0.24.3 From aa748e62cd745fd2f7b76d247b8d607bfdce5bb8 Mon Sep 17 00:00:00 2001 From: kenneth Date: Fri, 19 Jul 2024 10:27:42 +0200 Subject: [PATCH 4/5] Revert "[FSTORE-1454] Unify variable_api and project_api (#219)" This reverts commit dd4470a2e9432d147dc116026234c77a79338fe7. --- python/hopsworks/client/external.py | 11 +--- python/hopsworks/core/project_api.py | 14 +---- python/hopsworks/core/variable_api.py | 85 +++------------------------ 3 files changed, 13 insertions(+), 97 deletions(-) diff --git a/python/hopsworks/client/external.py b/python/hopsworks/client/external.py index d0a277e71..171c2ba7b 100644 --- a/python/hopsworks/client/external.py +++ b/python/hopsworks/client/external.py @@ -14,11 +14,11 @@ # limitations under the License. # -import base64 import os - +import base64 import requests -from hopsworks.client import auth, base, exceptions + +from hopsworks.client import base, auth, exceptions class Client(base.Client): @@ -41,11 +41,6 @@ def __init__( self._port = port self._base_url = "https://" + self._host + ":" + str(self._port) self._project_name = project - if project is not None: - project_info = self._get_project_info(project) - self._project_id = str(project_info["projectId"]) - else: - self._project_id = None if api_key_value is not None: api_key = api_key_value diff --git a/python/hopsworks/core/project_api.py b/python/hopsworks/core/project_api.py index 46e8a3df9..7795dd3e0 100644 --- a/python/hopsworks/core/project_api.py +++ b/python/hopsworks/core/project_api.py @@ -14,9 +14,8 @@ # limitations under the License. # +from hopsworks import client, project, constants import json - -from hopsworks import client, constants, project from hopsworks.client.exceptions import RestAPIError @@ -28,6 +27,8 @@ def _exists(self, name: str): name: Name of the project. # Returns `bool`: True if project exists, otherwise False + # Raises + `RestAPIError`: If unable to check the existence of the project """ try: self._get_project(name) @@ -110,12 +111,3 @@ def _create_project( project = self._get_project(name) print("Project created successfully, explore it at " + project.get_url()) return project - - def get_client(self): - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "client", - ] - return _client._send_request("GET", path_params, stream=True) diff --git a/python/hopsworks/core/variable_api.py b/python/hopsworks/core/variable_api.py index d4e8d188c..6c8e02a80 100644 --- a/python/hopsworks/core/variable_api.py +++ b/python/hopsworks/core/variable_api.py @@ -1,5 +1,5 @@ # -# Copyright 2022 Hopsworks AB +# Copyright 2022 Logical Clocks AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,13 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from __future__ import annotations - -import re -from typing import Optional, Tuple from hopsworks import client -from hopsworks.client.exceptions import RestAPIError class VariableApi: @@ -27,15 +22,7 @@ def __init__(self): pass def get_variable(self, variable: str): - """Get the configured value of a variable. - - # Arguments - vairable: Name of the variable. - # Returns - The vairable's value - # Raises - `RestAPIError`: If unable to get the variable - """ + """Get the configured value for a variable""" _client = client.get_instance() @@ -44,73 +31,15 @@ def get_variable(self, variable: str): return domain["successMessage"] - def get_version(self, software: str) -> Optional[str]: - """Get version of a software component. - - # Arguments - software: Name of the software. - # Returns - The software's version, if the software is available, otherwise `None`. - # Raises - `RestAPIError`: If unable to get the version - """ - + def get_version(self, software: str): _client = client.get_instance() + path_params = [ + "variables", + "versions", + ] - path_params = ["variables", "versions"] resp = _client._send_request("GET", path_params) - for entry in resp: if entry["software"] == software: return entry["version"] return None - - def parse_major_and_minor( - self, backend_version: str - ) -> Tuple[Optional[str], Optional[str]]: - """Extract major and minor version from full version. - - # Arguments - backend_version: The full version. - # Returns - (major, minor): The pair of major and minor parts of the version, or (None, None) if the version format is incorrect. - """ - - version_pattern = r"(\d+)\.(\d+)" - matches = re.match(version_pattern, backend_version) - - if matches is None: - return (None, None) - return matches.group(1), matches.group(2) - - def get_flyingduck_enabled(self) -> bool: - """Check if Flying Duck is enabled on the backend. - - # Returns - `True`: If flying duck is availalbe, `False` otherwise. - # Raises - `RestAPIError`: If unable to obtain the flag's value. - """ - return self.get_variable("enable_flyingduck") == "true" - - def get_loadbalancer_external_domain(self) -> str: - """Get domain of external loadbalancer. - - # Returns - `str`: The domain of external loadbalancer, if it is set up, otherwise empty string `""`. - """ - try: - return self.get_variable("loadbalancer_external_domain") - except RestAPIError: - return "" - - def get_service_discovery_domain(self) -> str: - """Get domain of service discovery server. - - # Returns - `str`: The domain of service discovery server, if it is set up, otherwise empty string `""`. - """ - try: - return self.get_variable("service_discovery_domain") - except RestAPIError: - return "" From a24ff7df60eb1547a50c99e5974b5b6faf9347da Mon Sep 17 00:00:00 2001 From: kenneth Date: Fri, 19 Jul 2024 10:28:08 +0200 Subject: [PATCH 5/5] Revert "Merge machine-learning-api into hsml subdirectory" This reverts commit c599739e8bfb1188595674db7ee2fb5dfa420189. --- hsml/.github/workflows/mkdocs-main.yml | 35 - hsml/.github/workflows/mkdocs-release.yml | 42 - hsml/.github/workflows/python-lint.yml | 163 --- hsml/.gitignore | 130 -- hsml/CONTRIBUTING.md | 215 ---- hsml/Dockerfile | 9 - hsml/Jenkinsfile | 23 - hsml/LICENSE | 201 ---- hsml/README.md | 141 --- hsml/auto_doc.py | 210 ---- hsml/docs/CONTRIBUTING.md | 215 ---- hsml/docs/assets/images/favicon.ico | Bin 2699 -> 0 bytes hsml/docs/assets/images/hops-logo.png | Bin 6356 -> 0 bytes hsml/docs/css/custom.css | 115 -- hsml/docs/css/dropdown.css | 55 - hsml/docs/css/marctech.css | 1047 ----------------- hsml/docs/css/version-select.css | 36 - hsml/docs/index.md | 141 --- hsml/docs/js/dropdown.js | 2 - hsml/docs/js/inject-api-links.js | 31 - hsml/docs/js/version-select.js | 64 - hsml/docs/overrides/main.html | 8 - hsml/docs/templates/connection_api.md | 11 - hsml/docs/templates/model-registry/links.md | 15 - .../templates/model-registry/model_api.md | 29 - .../model-registry/model_registry_api.md | 17 - .../model-registry/model_schema_api.md | 36 - .../templates/model-serving/deployment_api.md | 25 - .../model-serving/inference_batcher_api.md | 25 - .../model-serving/inference_logger_api.md | 25 - .../model-serving/model_serving_api.md | 13 - .../templates/model-serving/predictor_api.md | 29 - .../model-serving/predictor_state_api.md | 18 - .../predictor_state_condition_api.md | 15 - .../templates/model-serving/resources_api.md | 35 - .../model-serving/transformer_api.md | 29 - hsml/java/pom.xml | 109 -- hsml/java/src/main/resources/checkstyle.xml | 312 ----- hsml/java/src/main/resources/suppressions.xml | 5 - hsml/mkdocs.yml | 120 -- hsml/python/.pre-commit-config.yaml | 10 - hsml/python/hsml/__init__.py | 35 - hsml/python/hsml/client/__init__.py | 152 --- hsml/python/hsml/client/auth.py | 64 - hsml/python/hsml/client/base.py | 119 -- hsml/python/hsml/client/exceptions.py | 85 -- hsml/python/hsml/client/hopsworks/__init__.py | 15 - hsml/python/hsml/client/hopsworks/base.py | 111 -- hsml/python/hsml/client/hopsworks/external.py | 85 -- hsml/python/hsml/client/hopsworks/internal.py | 208 ---- hsml/python/hsml/client/istio/__init__.py | 15 - hsml/python/hsml/client/istio/base.py | 97 -- hsml/python/hsml/client/istio/external.py | 56 - .../python/hsml/client/istio/grpc/__init__.py | 15 - hsml/python/hsml/client/istio/grpc/errors.py | 30 - .../hsml/client/istio/grpc/exceptions.py | 123 -- .../client/istio/grpc/inference_client.py | 74 -- .../hsml/client/istio/grpc/proto/__init__.py | 15 - .../istio/grpc/proto/grpc_predict_v2.proto | 362 ------ .../istio/grpc/proto/grpc_predict_v2_pb2.py | 451 ------- .../istio/grpc/proto/grpc_predict_v2_pb2.pyi | 414 ------- .../grpc/proto/grpc_predict_v2_pb2_grpc.py | 419 ------- hsml/python/hsml/client/istio/internal.py | 206 ---- .../hsml/client/istio/utils/__init__.py | 15 - .../hsml/client/istio/utils/infer_type.py | 812 ------------- .../hsml/client/istio/utils/numpy_codec.py | 67 -- hsml/python/hsml/connection.py | 294 ----- hsml/python/hsml/constants.py | 119 -- hsml/python/hsml/core/__init__.py | 15 - hsml/python/hsml/core/dataset_api.py | 582 --------- hsml/python/hsml/core/explicit_provenance.py | 368 ------ hsml/python/hsml/core/model_api.py | 301 ----- hsml/python/hsml/core/model_registry_api.py | 67 -- hsml/python/hsml/core/model_serving_api.py | 148 --- hsml/python/hsml/core/native_hdfs_api.py | 59 - hsml/python/hsml/core/serving_api.py | 417 ------- hsml/python/hsml/decorators.py | 55 - hsml/python/hsml/deployable_component.py | 92 -- hsml/python/hsml/deployable_component_logs.py | 91 -- hsml/python/hsml/deployment.py | 479 -------- hsml/python/hsml/engine/__init__.py | 15 - hsml/python/hsml/engine/hopsworks_engine.py | 65 - hsml/python/hsml/engine/local_engine.py | 79 -- hsml/python/hsml/engine/model_engine.py | 549 --------- hsml/python/hsml/engine/serving_engine.py | 690 ----------- hsml/python/hsml/inference_batcher.py | 136 --- hsml/python/hsml/inference_endpoint.py | 163 --- hsml/python/hsml/inference_logger.py | 124 -- hsml/python/hsml/kafka_topic.py | 137 --- hsml/python/hsml/model.py | 575 --------- hsml/python/hsml/model_registry.py | 196 --- hsml/python/hsml/model_schema.py | 64 - hsml/python/hsml/model_serving.py | 375 ------ hsml/python/hsml/predictor.py | 482 -------- hsml/python/hsml/predictor_state.py | 147 --- hsml/python/hsml/predictor_state_condition.py | 90 -- hsml/python/hsml/python/__init__.py | 15 - hsml/python/hsml/python/model.py | 79 -- hsml/python/hsml/python/predictor.py | 33 - hsml/python/hsml/python/signature.py | 75 -- hsml/python/hsml/resources.py | 394 ------- hsml/python/hsml/schema.py | 83 -- hsml/python/hsml/sklearn/__init__.py | 15 - hsml/python/hsml/sklearn/model.py | 79 -- hsml/python/hsml/sklearn/predictor.py | 28 - hsml/python/hsml/sklearn/signature.py | 75 -- hsml/python/hsml/tag.py | 77 -- hsml/python/hsml/tensorflow/__init__.py | 15 - hsml/python/hsml/tensorflow/model.py | 79 -- hsml/python/hsml/tensorflow/predictor.py | 33 - hsml/python/hsml/tensorflow/signature.py | 75 -- hsml/python/hsml/torch/__init__.py | 15 - hsml/python/hsml/torch/model.py | 79 -- hsml/python/hsml/torch/predictor.py | 33 - hsml/python/hsml/torch/signature.py | 75 -- hsml/python/hsml/transformer.py | 93 -- hsml/python/hsml/util.py | 347 ------ hsml/python/hsml/utils/__init__.py | 15 - hsml/python/hsml/utils/schema/__init__.py | 15 - hsml/python/hsml/utils/schema/column.py | 28 - .../hsml/utils/schema/columnar_schema.py | 109 -- hsml/python/hsml/utils/schema/tensor.py | 30 - .../python/hsml/utils/schema/tensor_schema.py | 73 -- hsml/python/hsml/version.py | 17 - hsml/python/pyproject.toml | 136 --- hsml/python/setup.py | 19 - hsml/python/tests/__init__.py | 15 - hsml/python/tests/conftest.py | 20 - hsml/python/tests/fixtures/__init__.py | 15 - .../python/tests/fixtures/backend_fixtures.py | 45 - .../fixtures/inference_batcher_fixtures.json | 54 - .../fixtures/inference_endpoint_fixtures.json | 66 -- .../fixtures/inference_logger_fixtures.json | 96 -- .../tests/fixtures/kafka_topic_fixtures.json | 59 - .../python/tests/fixtures/model_fixtures.json | 203 ---- hsml/python/tests/fixtures/model_fixtures.py | 125 -- .../tests/fixtures/predictor_fixtures.json | 427 ------- .../tests/fixtures/resources_fixtures.json | 155 --- hsml/python/tests/fixtures/tag_fixtures.json | 25 - .../tests/fixtures/transformer_fixtures.json | 63 - hsml/python/tests/test_connection.py | 173 --- hsml/python/tests/test_constants.py | 383 ------ hsml/python/tests/test_decorators.py | 82 -- .../python/tests/test_deployable_component.py | 106 -- .../tests/test_deployable_component_logs.py | 110 -- hsml/python/tests/test_deployment.py | 795 ------------- hsml/python/tests/test_explicit_provenance.py | 78 -- hsml/python/tests/test_inference_batcher.py | 234 ---- hsml/python/tests/test_inference_endpoint.py | 298 ----- hsml/python/tests/test_inference_logger.py | 413 ------- hsml/python/tests/test_kafka_topic.py | 289 ----- hsml/python/tests/test_model.py | 472 -------- hsml/python/tests/test_model_schema.py | 30 - hsml/python/tests/test_predictor.py | 709 ----------- hsml/python/tests/test_predictor_state.py | 126 -- .../tests/test_predictor_state_condition.py | 81 -- hsml/python/tests/test_resources.py | 928 --------------- hsml/python/tests/test_schema.py | 199 ---- hsml/python/tests/test_tag.py | 62 - hsml/python/tests/test_transformer.py | 309 ----- hsml/python/tests/test_util.py | 645 ---------- hsml/python/tests/utils/__init__.py | 15 - hsml/python/tests/utils/schema/test_column.py | 44 - .../utils/schema/test_columnar_schema.py | 461 -------- hsml/python/tests/utils/schema/test_tensor.py | 48 - .../tests/utils/schema/test_tensor_schema.py | 204 ---- hsml/requirements-docs.txt | 11 - 167 files changed, 25692 deletions(-) delete mode 100644 hsml/.github/workflows/mkdocs-main.yml delete mode 100644 hsml/.github/workflows/mkdocs-release.yml delete mode 100644 hsml/.github/workflows/python-lint.yml delete mode 100644 hsml/.gitignore delete mode 100644 hsml/CONTRIBUTING.md delete mode 100644 hsml/Dockerfile delete mode 100644 hsml/Jenkinsfile delete mode 100644 hsml/LICENSE delete mode 100644 hsml/README.md delete mode 100644 hsml/auto_doc.py delete mode 100644 hsml/docs/CONTRIBUTING.md delete mode 100644 hsml/docs/assets/images/favicon.ico delete mode 100644 hsml/docs/assets/images/hops-logo.png delete mode 100644 hsml/docs/css/custom.css delete mode 100644 hsml/docs/css/dropdown.css delete mode 100644 hsml/docs/css/marctech.css delete mode 100644 hsml/docs/css/version-select.css delete mode 100644 hsml/docs/index.md delete mode 100644 hsml/docs/js/dropdown.js delete mode 100644 hsml/docs/js/inject-api-links.js delete mode 100644 hsml/docs/js/version-select.js delete mode 100644 hsml/docs/overrides/main.html delete mode 100644 hsml/docs/templates/connection_api.md delete mode 100644 hsml/docs/templates/model-registry/links.md delete mode 100644 hsml/docs/templates/model-registry/model_api.md delete mode 100644 hsml/docs/templates/model-registry/model_registry_api.md delete mode 100644 hsml/docs/templates/model-registry/model_schema_api.md delete mode 100644 hsml/docs/templates/model-serving/deployment_api.md delete mode 100644 hsml/docs/templates/model-serving/inference_batcher_api.md delete mode 100644 hsml/docs/templates/model-serving/inference_logger_api.md delete mode 100644 hsml/docs/templates/model-serving/model_serving_api.md delete mode 100644 hsml/docs/templates/model-serving/predictor_api.md delete mode 100644 hsml/docs/templates/model-serving/predictor_state_api.md delete mode 100644 hsml/docs/templates/model-serving/predictor_state_condition_api.md delete mode 100644 hsml/docs/templates/model-serving/resources_api.md delete mode 100644 hsml/docs/templates/model-serving/transformer_api.md delete mode 100644 hsml/java/pom.xml delete mode 100644 hsml/java/src/main/resources/checkstyle.xml delete mode 100644 hsml/java/src/main/resources/suppressions.xml delete mode 100644 hsml/mkdocs.yml delete mode 100644 hsml/python/.pre-commit-config.yaml delete mode 100644 hsml/python/hsml/__init__.py delete mode 100644 hsml/python/hsml/client/__init__.py delete mode 100644 hsml/python/hsml/client/auth.py delete mode 100644 hsml/python/hsml/client/base.py delete mode 100644 hsml/python/hsml/client/exceptions.py delete mode 100644 hsml/python/hsml/client/hopsworks/__init__.py delete mode 100644 hsml/python/hsml/client/hopsworks/base.py delete mode 100644 hsml/python/hsml/client/hopsworks/external.py delete mode 100644 hsml/python/hsml/client/hopsworks/internal.py delete mode 100644 hsml/python/hsml/client/istio/__init__.py delete mode 100644 hsml/python/hsml/client/istio/base.py delete mode 100644 hsml/python/hsml/client/istio/external.py delete mode 100644 hsml/python/hsml/client/istio/grpc/__init__.py delete mode 100644 hsml/python/hsml/client/istio/grpc/errors.py delete mode 100644 hsml/python/hsml/client/istio/grpc/exceptions.py delete mode 100644 hsml/python/hsml/client/istio/grpc/inference_client.py delete mode 100644 hsml/python/hsml/client/istio/grpc/proto/__init__.py delete mode 100644 hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2.proto delete mode 100644 hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.py delete mode 100644 hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.pyi delete mode 100644 hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2_grpc.py delete mode 100644 hsml/python/hsml/client/istio/internal.py delete mode 100644 hsml/python/hsml/client/istio/utils/__init__.py delete mode 100644 hsml/python/hsml/client/istio/utils/infer_type.py delete mode 100644 hsml/python/hsml/client/istio/utils/numpy_codec.py delete mode 100644 hsml/python/hsml/connection.py delete mode 100644 hsml/python/hsml/constants.py delete mode 100644 hsml/python/hsml/core/__init__.py delete mode 100644 hsml/python/hsml/core/dataset_api.py delete mode 100644 hsml/python/hsml/core/explicit_provenance.py delete mode 100644 hsml/python/hsml/core/model_api.py delete mode 100644 hsml/python/hsml/core/model_registry_api.py delete mode 100644 hsml/python/hsml/core/model_serving_api.py delete mode 100644 hsml/python/hsml/core/native_hdfs_api.py delete mode 100644 hsml/python/hsml/core/serving_api.py delete mode 100644 hsml/python/hsml/decorators.py delete mode 100644 hsml/python/hsml/deployable_component.py delete mode 100644 hsml/python/hsml/deployable_component_logs.py delete mode 100644 hsml/python/hsml/deployment.py delete mode 100644 hsml/python/hsml/engine/__init__.py delete mode 100644 hsml/python/hsml/engine/hopsworks_engine.py delete mode 100644 hsml/python/hsml/engine/local_engine.py delete mode 100644 hsml/python/hsml/engine/model_engine.py delete mode 100644 hsml/python/hsml/engine/serving_engine.py delete mode 100644 hsml/python/hsml/inference_batcher.py delete mode 100644 hsml/python/hsml/inference_endpoint.py delete mode 100644 hsml/python/hsml/inference_logger.py delete mode 100644 hsml/python/hsml/kafka_topic.py delete mode 100644 hsml/python/hsml/model.py delete mode 100644 hsml/python/hsml/model_registry.py delete mode 100644 hsml/python/hsml/model_schema.py delete mode 100644 hsml/python/hsml/model_serving.py delete mode 100644 hsml/python/hsml/predictor.py delete mode 100644 hsml/python/hsml/predictor_state.py delete mode 100644 hsml/python/hsml/predictor_state_condition.py delete mode 100644 hsml/python/hsml/python/__init__.py delete mode 100644 hsml/python/hsml/python/model.py delete mode 100644 hsml/python/hsml/python/predictor.py delete mode 100644 hsml/python/hsml/python/signature.py delete mode 100644 hsml/python/hsml/resources.py delete mode 100644 hsml/python/hsml/schema.py delete mode 100644 hsml/python/hsml/sklearn/__init__.py delete mode 100644 hsml/python/hsml/sklearn/model.py delete mode 100644 hsml/python/hsml/sklearn/predictor.py delete mode 100644 hsml/python/hsml/sklearn/signature.py delete mode 100644 hsml/python/hsml/tag.py delete mode 100644 hsml/python/hsml/tensorflow/__init__.py delete mode 100644 hsml/python/hsml/tensorflow/model.py delete mode 100644 hsml/python/hsml/tensorflow/predictor.py delete mode 100644 hsml/python/hsml/tensorflow/signature.py delete mode 100644 hsml/python/hsml/torch/__init__.py delete mode 100644 hsml/python/hsml/torch/model.py delete mode 100644 hsml/python/hsml/torch/predictor.py delete mode 100644 hsml/python/hsml/torch/signature.py delete mode 100644 hsml/python/hsml/transformer.py delete mode 100644 hsml/python/hsml/util.py delete mode 100644 hsml/python/hsml/utils/__init__.py delete mode 100644 hsml/python/hsml/utils/schema/__init__.py delete mode 100644 hsml/python/hsml/utils/schema/column.py delete mode 100644 hsml/python/hsml/utils/schema/columnar_schema.py delete mode 100644 hsml/python/hsml/utils/schema/tensor.py delete mode 100644 hsml/python/hsml/utils/schema/tensor_schema.py delete mode 100644 hsml/python/hsml/version.py delete mode 100644 hsml/python/pyproject.toml delete mode 100644 hsml/python/setup.py delete mode 100644 hsml/python/tests/__init__.py delete mode 100644 hsml/python/tests/conftest.py delete mode 100644 hsml/python/tests/fixtures/__init__.py delete mode 100644 hsml/python/tests/fixtures/backend_fixtures.py delete mode 100644 hsml/python/tests/fixtures/inference_batcher_fixtures.json delete mode 100644 hsml/python/tests/fixtures/inference_endpoint_fixtures.json delete mode 100644 hsml/python/tests/fixtures/inference_logger_fixtures.json delete mode 100644 hsml/python/tests/fixtures/kafka_topic_fixtures.json delete mode 100644 hsml/python/tests/fixtures/model_fixtures.json delete mode 100644 hsml/python/tests/fixtures/model_fixtures.py delete mode 100644 hsml/python/tests/fixtures/predictor_fixtures.json delete mode 100644 hsml/python/tests/fixtures/resources_fixtures.json delete mode 100644 hsml/python/tests/fixtures/tag_fixtures.json delete mode 100644 hsml/python/tests/fixtures/transformer_fixtures.json delete mode 100644 hsml/python/tests/test_connection.py delete mode 100644 hsml/python/tests/test_constants.py delete mode 100644 hsml/python/tests/test_decorators.py delete mode 100644 hsml/python/tests/test_deployable_component.py delete mode 100644 hsml/python/tests/test_deployable_component_logs.py delete mode 100644 hsml/python/tests/test_deployment.py delete mode 100644 hsml/python/tests/test_explicit_provenance.py delete mode 100644 hsml/python/tests/test_inference_batcher.py delete mode 100644 hsml/python/tests/test_inference_endpoint.py delete mode 100644 hsml/python/tests/test_inference_logger.py delete mode 100644 hsml/python/tests/test_kafka_topic.py delete mode 100644 hsml/python/tests/test_model.py delete mode 100644 hsml/python/tests/test_model_schema.py delete mode 100644 hsml/python/tests/test_predictor.py delete mode 100644 hsml/python/tests/test_predictor_state.py delete mode 100644 hsml/python/tests/test_predictor_state_condition.py delete mode 100644 hsml/python/tests/test_resources.py delete mode 100644 hsml/python/tests/test_schema.py delete mode 100644 hsml/python/tests/test_tag.py delete mode 100644 hsml/python/tests/test_transformer.py delete mode 100644 hsml/python/tests/test_util.py delete mode 100644 hsml/python/tests/utils/__init__.py delete mode 100644 hsml/python/tests/utils/schema/test_column.py delete mode 100644 hsml/python/tests/utils/schema/test_columnar_schema.py delete mode 100644 hsml/python/tests/utils/schema/test_tensor.py delete mode 100644 hsml/python/tests/utils/schema/test_tensor_schema.py delete mode 100644 hsml/requirements-docs.txt diff --git a/hsml/.github/workflows/mkdocs-main.yml b/hsml/.github/workflows/mkdocs-main.yml deleted file mode 100644 index 001f1fad1..000000000 --- a/hsml/.github/workflows/mkdocs-main.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: mkdocs-main - -on: pull_request - -jobs: - publish-main: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: set dev version - working-directory: ./java - run: echo "DEV_VERSION=$(mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | grep -Ev 'Download|INFO|WARNING')" >> $GITHUB_ENV - - - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - - name: install deps - working-directory: ./python - run: cp ../README.md . && pip3 install -r ../requirements-docs.txt && pip3 install -e .[dev] - - - name: generate autodoc - run: python3 auto_doc.py - - - name: setup git - run: | - git config --global user.name Mike - git config --global user.email mike@docs.hopsworks.ai - - - name: mike deploy docs - run: mike deploy ${{ env.DEV_VERSION }} dev -u diff --git a/hsml/.github/workflows/mkdocs-release.yml b/hsml/.github/workflows/mkdocs-release.yml deleted file mode 100644 index e2b4b2b3f..000000000 --- a/hsml/.github/workflows/mkdocs-release.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: mkdocs-release - -on: - push: - branches: [branch-*\.*] - -jobs: - publish-release: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: set major/minor/bugfix release version - working-directory: ./java - run: echo "RELEASE_VERSION=$(mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | grep -Ev 'Download|INFO|WARNING')" >> $GITHUB_ENV - - - name: set major/minor release version - run: echo "MAJOR_VERSION=$(echo $RELEASE_VERSION | sed 's/^\([0-9]*\.[0-9]*\).*$/\1/')" >> $GITHUB_ENV - - - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - - name: install deps - working-directory: ./python - run: cp ../README.md . && pip3 install -r ../requirements-docs.txt && pip3 install -e .[dev] - - - name: generate autodoc - run: python3 auto_doc.py - - - name: setup git - run: | - git config --global user.name Mike - git config --global user.email mike@docs.hopsworks.ai - - - name: mike deploy docs - run: | - mike deploy ${{ env.RELEASE_VERSION }} ${{ env.MAJOR_VERSION }} -u --push - mike alias ${{ env.RELEASE_VERSION }} latest -u --push diff --git a/hsml/.github/workflows/python-lint.yml b/hsml/.github/workflows/python-lint.yml deleted file mode 100644 index 88225add7..000000000 --- a/hsml/.github/workflows/python-lint.yml +++ /dev/null @@ -1,163 +0,0 @@ -name: python - -on: pull_request - -jobs: - lint_stylecheck: - name: Lint and Stylecheck - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - - name: Get all changed files - id: get-changed-files - uses: tj-actions/changed-files@v44 - with: - files_yaml: | - src: - - 'python/**/*.py' - - '!python/tests/**/*.py' - test: - - 'python/tests/**/*.py' - - - name: install deps - run: pip install ruff==0.4.2 - - - name: ruff on python files - if: steps.get-changed-files.outputs.src_any_changed == 'true' - env: - SRC_ALL_CHANGED_FILES: ${{ steps.get-changed-files.outputs.src_all_changed_files }} - run: ruff check --output-format=github $SRC_ALL_CHANGED_FILES - - - name: ruff on test files - if: steps.get-changed-files.outputs.test_any_changed == 'true' - env: - TEST_ALL_CHANGED_FILES: ${{ steps.get-changed-files.outputs.test_all_changed_files }} - run: ruff check --output-format=github $TEST_ALL_CHANGED_FILES - - - name: ruff format --check $ALL_CHANGED_FILES - env: - ALL_CHANGED_FILES: ${{ steps.get-changed-files.outputs.all_changed_files }} - run: ruff format $ALL_CHANGED_FILES - - unit_tests_ubuntu_utc: - name: Unit Testing (Ubuntu) - needs: lint_stylecheck - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] - - steps: - - name: Set Timezone - run: sudo timedatectl set-timezone UTC - - - uses: actions/checkout@v4 - - name: Copy README - run: cp README.md python/ - - - uses: actions/setup-python@v5 - name: Setup Python - with: - python-version: ${{ matrix.python-version }} - cache: "pip" - cache-dependency-path: "python/setup.py" - - run: pip install -e python[dev] - - - name: Display Python version - run: python --version - - - name: Run Pytest suite - run: pytest python/tests - - unit_tests_ubuntu_local: - name: Unit Testing (Ubuntu) (Local TZ) - needs: lint_stylecheck - runs-on: ubuntu-latest - - steps: - - name: Set Timezone - run: sudo timedatectl set-timezone Europe/Amsterdam - - - uses: actions/checkout@v4 - - name: Copy README - run: cp README.md python/ - - - uses: actions/setup-python@v5 - name: Setup Python - with: - python-version: "3.12" - cache: "pip" - cache-dependency-path: "python/setup.py" - - run: pip install -e python[dev] - - - name: Display Python version - run: python --version - - - name: Run Pytest suite - run: pytest python/tests - - unit_tests_windows_utc: - name: Unit Testing (Windows) - needs: lint_stylecheck - runs-on: windows-latest - strategy: - matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] - - steps: - - name: Set Timezone - run: tzutil /s "UTC" - - - uses: actions/checkout@v4 - - name: Copy README - run: cp README.md python/ - - - uses: actions/setup-python@v5 - name: Setup Python - with: - python-version: ${{ matrix.python-version }} - cache: "pip" - cache-dependency-path: "python/setup.py" - - run: pip install -e python[dev] - - - name: Display Python version - run: python --version - - - name: Run Pytest suite - run: pytest python/tests - - unit_tests_windows_local: - name: Unit Testing (Windows) (Local TZ) - needs: lint_stylecheck - runs-on: windows-latest - - steps: - - name: Set Timezone - run: tzutil /s "W. Europe Standard Time" - - - uses: actions/checkout@v4 - - name: Copy README - run: cp README.md python/ - - - uses: actions/setup-python@v5 - name: Setup Python - with: - python-version: "3.12" - cache: "pip" - cache-dependency-path: "python/setup.py" - - run: pip install -e python[dev] - - - name: Display Python version - run: python --version - - - name: Display pip freeze - run: pip freeze - - - name: Run Pytest suite - run: pytest python/tests diff --git a/hsml/.gitignore b/hsml/.gitignore deleted file mode 100644 index 6e96d8144..000000000 --- a/hsml/.gitignore +++ /dev/null @@ -1,130 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST -python/README.md -python/LICENSE - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -.hypothesis/ -.pytest_cache/ -.ruff_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Java -.idea -.vscode -*.iml -target/ - -# Mac -.DS_Store - -# mkdocs intemediate files -docs/generated diff --git a/hsml/CONTRIBUTING.md b/hsml/CONTRIBUTING.md deleted file mode 100644 index b287467c6..000000000 --- a/hsml/CONTRIBUTING.md +++ /dev/null @@ -1,215 +0,0 @@ -## Python development setup ---- - -- Fork and clone the repository - -- Create a new Python environment with your favourite environment manager, e.g. virtualenv or conda - -- Install repository in editable mode with development dependencies: - - ```bash - cd python - pip install -e ".[dev]" - ``` - -- Install [pre-commit](https://pre-commit.com/) and then activate its hooks. pre-commit is a framework for managing and maintaining multi-language pre-commit hooks. The Model Registry uses pre-commit to ensure code-style and code formatting through [ruff](https://docs.astral.sh/ruff/). Run the following commands from the `python` directory: - - ```bash - cd python - pip install --user pre-commit - pre-commit install - ``` - - Afterwards, pre-commit will run whenever you commit. - -- To run formatting and code-style separately, you can configure your IDE, such as VSCode, to use [ruff](https://docs.astral.sh/ruff/tutorial/#getting-started): - - ```bash - cd python - ruff check --fix - ruff format - ``` - -### Python documentation - -We follow a few best practices for writing the Python documentation: - -1. Use the google docstring style: - - ```python - """[One Line Summary] - - [Extended Summary] - - [!!! example - import xyz - ] - - # Arguments - arg1: Type[, optional]. Description[, defaults to `default`] - arg2: Type[, optional]. Description[, defaults to `default`] - - # Returns - Type. Description. - - # Raises - Exception. Description. - """ - ``` - - If Python 3 type annotations are used, they are inserted automatically. - - -2. Model registry entity engine methods (e.g. ModelEngine etc.) only require a single line docstring. -3. REST Api implementations (e.g. ModelApi etc.) should be fully documented with docstrings without defaults. -4. Public Api such as metadata objects should be fully documented with defaults. - -#### Setup and Build Documentation - -We use `mkdocs` together with `mike` ([for versioning](https://github.com/jimporter/mike/)) to build the documentation and a plugin called `keras-autodoc` to auto generate Python API documentation from docstrings. - -**Background about `mike`:** - `mike` builds the documentation and commits it as a new directory to the gh-pages branch. Each directory corresponds to one version of the documentation. Additionally, `mike` maintains a json in the root of gh-pages with the mappings of versions/aliases for each of the directories available. With aliases you can define extra names like `dev` or `latest`, to indicate stable and unstable releases. - -1. Currently we are using our own version of `keras-autodoc` - - ```bash - pip install git+https://github.com/logicalclocks/keras-autodoc - ``` - -2. Install HSML with `docs` extras: - - ```bash - pip install -e .[dev,docs] - ``` - -3. To build the docs, first run the auto doc script: - - ```bash - cd .. - python auto_doc.py - ``` - -##### Option 1: Build only current version of docs - -4. Either build the docs, or serve them dynamically: - - Note: Links and pictures might not resolve properly later on when checking with this build. - The reason for that is that the docs are deployed with versioning on docs.hopsworks.ai and - therefore another level is added to all paths, e.g. `docs.hopsworks.ai/[version-or-alias]`. - Using relative links should not be affected by this, however, building the docs with version - (Option 2) is recommended. - - ```bash - mkdocs build - # or - mkdocs serve - ``` - -##### Option 2 (Preferred): Build multi-version doc with `mike` - -###### Versioning on docs.hopsworks.ai - -On docs.hopsworks.ai we implement the following versioning scheme: - -- current master branches (e.g. of hsml corresponding to master of Hopsworks): rendered as current Hopsworks snapshot version, e.g. **2.2.0-SNAPSHOT [dev]**, where `dev` is an alias to indicate that this is an unstable version. -- the latest release: rendered with full current version, e.g. **2.1.5 [latest]** with `latest` alias to indicate that this is the latest stable release. -- previous stable releases: rendered without alias, e.g. **2.1.4**. - -###### Build Instructions - -4. For this you can either checkout and make a local copy of the `upstream/gh-pages` branch, where -`mike` maintains the current state of docs.hopsworks.ai, or just build documentation for the branch you are updating: - - Building *one* branch: - - Checkout your dev branch with modified docs: - ```bash - git checkout [dev-branch] - ``` - - Generate API docs if necessary: - ```bash - python auto_doc.py - ``` - - Build docs with a version and alias - ```bash - mike deploy [version] [alias] --update-alias - - # for example, if you are updating documentation to be merged to master, - # which will become the new SNAPSHOT version: - mike deploy 2.2.0-SNAPSHOT dev --update-alias - - # if you are updating docs of the latest stable release branch - mike deploy [version] latest --update-alias - - # if you are updating docs of a previous stable release branch - mike deploy [version] - ``` - - If no gh-pages branch existed in your local repository, this will have created it. - - **Important**: If no previous docs were built, you will have to choose a version as default to be loaded as index, as follows - - ```bash - mike set-default [version-or-alias] - ``` - - You can now checkout the gh-pages branch and serve: - ```bash - git checkout gh-pages - mike serve - ``` - - You can also list all available versions/aliases: - ```bash - mike list - ``` - - Delete and reset your local gh-pages branch: - ```bash - mike delete --all - - # or delete single version - mike delete [version-or-alias] - ``` - -#### Adding new API documentation - -To add new documentation for APIs, you need to add information about the method/class to document to the `auto_doc.py` script: - -```python -PAGES = { - "connection.md": [ - "hsml.connection.Connection.connection", - "hsml.connection.Connection.setup_databricks", - ] - "new_template.md": [ - "module", - "xyz.asd" - ] -} -``` - -Now you can add a template markdown file to the `docs/templates` directory with the name you specified in the auto-doc script. The `new_template.md` file should contain a tag to identify the place at which the API documentation should be inserted: - -``` -## The XYZ package - -{{module}} - -Some extra content here. - -!!! example - ```python - import xyz - ``` - -{{xyz.asd}} -``` - -Finally, run the `auto_doc.py` script, as decribed above, to update the documentation. - -For information about Markdown syntax and possible Admonitions/Highlighting etc. see -the [Material for Mkdocs themes reference documentation](https://squidfunk.github.io/mkdocs-material/reference/abbreviations/). diff --git a/hsml/Dockerfile b/hsml/Dockerfile deleted file mode 100644 index 7f87ca293..000000000 --- a/hsml/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -FROM ubuntu:20.04 - -RUN apt-get update && \ - apt-get install -y python3-pip git && apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -RUN pip3 install twine - -RUN mkdir -p /.local && chmod -R 777 /.local diff --git a/hsml/Jenkinsfile b/hsml/Jenkinsfile deleted file mode 100644 index d2014d5cb..000000000 --- a/hsml/Jenkinsfile +++ /dev/null @@ -1,23 +0,0 @@ -pipeline { - agent { - docker { - label "local" - image "docker.hops.works/hopsworks_twine:0.0.1" - } - } - stages { - stage("publish") { - environment { - PYPI = credentials('977daeb0-e1c8-43a0-b35a-fc37bb9eee9b') - } - steps { - dir("python") { - sh "rm -f LICENSE README.md" - sh "cp -f ../LICENSE ../README.md ./" - sh "python3 -m build" - sh "twine upload -u $PYPI_USR -p $PYPI_PSW --skip-existing dist/*" - } - } - } - } -} diff --git a/hsml/LICENSE b/hsml/LICENSE deleted file mode 100644 index 261eeb9e9..000000000 --- a/hsml/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/hsml/README.md b/hsml/README.md deleted file mode 100644 index ee835ddc7..000000000 --- a/hsml/README.md +++ /dev/null @@ -1,141 +0,0 @@ -# Hopsworks Model Management - -

- Hopsworks Community - Hopsworks Model Management Documentation - python - PyPiStatus - Scala/Java Artifacts - Downloads - Ruff - License -

- -HSML is the library to interact with the Hopsworks Model Registry and Model Serving. The library makes it easy to export, manage and deploy models. - -However, to connect from an external Python environment additional connection information, such as host and port, is required. - -## Getting Started On Hopsworks - -Get started easily by registering an account on [Hopsworks Serverless](https://app.hopsworks.ai/). Create your project and a [new Api key](https://docs.hopsworks.ai/latest/user_guides/projects/api_key/create_api_key/). In a new python environment with Python 3.8 or higher, install the [client library](https://docs.hopsworks.ai/latest/user_guides/client_installation/) using pip: - -```bash -# Get all Hopsworks SDKs: Feature Store, Model Serving and Platform SDK -pip install hopsworks -# or just the Model Registry and Model Serving SDK -pip install hsml -``` - -You can start a notebook and instantiate a connection and get the project feature store handler. - -```python -import hopsworks - -project = hopsworks.login() # you will be prompted for your api key - -mr = project.get_model_registry() -# or -ms = project.get_model_serving() -``` - -or using `hsml` directly: - -```python -import hsml - -connection = hsml.connection( - host="c.app.hopsworks.ai", # - project="your-project", - api_key_value="your-api-key", -) - -mr = connection.get_model_registry() -# or -ms = connection.get_model_serving() -``` - -Create a new model -```python -model = mr.tensorflow.create_model(name="mnist", - version=1, - metrics={"accuracy": 0.94}, - description="mnist model description") -model.save("/tmp/model_directory") # or /tmp/model_file -``` - -Download a model -```python -model = mr.get_model("mnist", version=1) - -model_path = model.download() -``` - -Delete a model -```python -model.delete() -``` - -Get best performing model -```python -best_model = mr.get_best_model('mnist', 'accuracy', 'max') - -``` - -Deploy a model -```python -deployment = model.deploy() -``` - -Start a deployment -```python -deployment.start() -``` - -Make predictions with a deployed model -```python -data = { "instances": [ model.input_example ] } - -predictions = deployment.predict(data) -``` - -# Tutorials - -You can find more examples on how to use the library in our [tutorials](https://github.com/logicalclocks/hopsworks-tutorials). - -## Documentation - -Documentation is available at [Hopsworks Model Management Documentation](https://docs.hopsworks.ai/). - -## Issues - -For general questions about the usage of Hopsworks Machine Learning please open a topic on [Hopsworks Community](https://community.hopsworks.ai/). -Please report any issue using [Github issue tracking](https://github.com/logicalclocks/machine-learning-api/issues). - - -## Contributing - -If you would like to contribute to this library, please see the [Contribution Guidelines](CONTRIBUTING.md). diff --git a/hsml/auto_doc.py b/hsml/auto_doc.py deleted file mode 100644 index 4c7ae26ee..000000000 --- a/hsml/auto_doc.py +++ /dev/null @@ -1,210 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import pathlib -import shutil -import os -import keras_autodoc - -JSON_METHODS = [ - "extract_fields_from_json", - "from_json", - "from_response_json", - "json", - "update_from_response_json", -] - -PAGES = { - # Model registry - "connection_api.md": { - "connection": ["hsml.connection.Connection"], - "connection_properties": keras_autodoc.get_properties( - "hsml.connection.Connection", exclude=["trust_store_path"] - ), - "connection_methods": keras_autodoc.get_methods("hsml.connection.Connection"), - }, - "model-registry/model_registry_api.md": { - "mr_get": ["hsml.connection.Connection.get_model_registry"], - "mr_modules": keras_autodoc.get_properties( - "hsml.model_registry.ModelRegistry", - exclude=[ - "project_id", - "project_name", - "model_registry_id", - "shared_registry_project_name", - ], - ), - "mr_properties": keras_autodoc.get_properties( - "hsml.model_registry.ModelRegistry", - exclude=[ - "python", - "sklearn", - "tensorflow", - "torch", - ], - ), - "mr_methods": keras_autodoc.get_methods( - "hsml.model_registry.ModelRegistry", exclude=["from_response_json"] - ), - }, - "model-registry/model_api.md": { - "ml_create_tf": ["hsml.model_registry.ModelRegistry.tensorflow.create_model"], - "ml_create_th": ["hsml.model_registry.ModelRegistry.torch.create_model"], - "ml_create_sl": ["hsml.model_registry.ModelRegistry.sklearn.create_model"], - "ml_create_py": ["hsml.model_registry.ModelRegistry.python.create_model"], - "ml_get": ["hsml.model_registry.ModelRegistry.get_model"], - "ml_properties": keras_autodoc.get_properties("hsml.model.Model"), - "ml_methods": keras_autodoc.get_methods( - "hsml.model.Model", - exclude=[ - "from_response_json", - "json", - "to_dict", - "update_from_response_json", - ], - ), - }, - "model-registry/model_schema.md": {}, - "model-registry/model_schema_api.md": { - "schema": ["hsml.schema.Schema"], - "schema_dict": ["hsml.schema.Schema.to_dict"], - "model_schema": ["hsml.model_schema.ModelSchema"], - "model_schema_dict": ["hsml.model_schema.ModelSchema.to_dict"], - }, - "model-registry/links.md": { - "links_properties": keras_autodoc.get_properties( - "hsml.core.explicit_provenance.Links" - ), - "artifact_properties": keras_autodoc.get_properties( - "hsml.core.explicit_provenance.Artifact" - ), - }, - # Model Serving - "model-serving/model_serving_api.md": { - "ms_get": ["hsml.connection.Connection.get_model_serving"], - "ms_properties": keras_autodoc.get_properties( - "hsml.model_serving.ModelServing" - ), - "ms_methods": keras_autodoc.get_methods( - "hsml.model_serving.ModelServing", exclude=["from_response_json"] - ), - }, - "model-serving/deployment_api.md": { - "ms_get_model_serving": ["hsml.connection.Connection.get_model_serving"], - "ms_get_deployments": [ - "hsml.model_serving.ModelServing.get_deployment", - "hsml.model_serving.ModelServing.get_deployment_by_id", - "hsml.model_serving.ModelServing.get_deployments", - ], - "ms_create_deployment": ["hsml.model_serving.ModelServing.create_deployment"], - "m_deploy": ["hsml.model.Model.deploy"], - "p_deploy": ["hsml.predictor.Predictor.deploy"], - "dep_properties": keras_autodoc.get_properties("hsml.deployment.Deployment"), - "dep_methods": keras_autodoc.get_methods( - "hsml.deployment.Deployment", exclude=JSON_METHODS + ["from_predictor"] - ), - }, - "model-serving/predictor_api.md": { - "ms_get_model_serving": ["hsml.connection.Connection.get_model_serving"], - "ms_create_predictor": ["hsml.model_serving.ModelServing.create_predictor"], - "pred_properties": keras_autodoc.get_properties("hsml.predictor.Predictor"), - "pred_methods": keras_autodoc.get_methods( - "hsml.predictor.Predictor", - exclude=JSON_METHODS + ["for_model"], - ), - }, - "model-serving/transformer_api.md": { - "ms_get_model_serving": ["hsml.connection.Connection.get_model_serving"], - "ms_create_transformer": ["hsml.model_serving.ModelServing.create_transformer"], - "trans_properties": keras_autodoc.get_properties( - "hsml.transformer.Transformer" - ), - "trans_methods": keras_autodoc.get_methods( - "hsml.transformer.Transformer", exclude=JSON_METHODS - ), - }, - "model-serving/inference_logger_api.md": { - "il": ["hsml.inference_logger.InferenceLogger"], - "il_properties": keras_autodoc.get_properties( - "hsml.inference_logger.InferenceLogger" - ), - "il_methods": keras_autodoc.get_methods( - "hsml.inference_logger.InferenceLogger", exclude=JSON_METHODS - ), - }, - "model-serving/inference_batcher_api.md": { - "ib": ["hsml.inference_batcher.InferenceBatcher"], - "ib_properties": keras_autodoc.get_properties( - "hsml.inference_batcher.InferenceBatcher" - ), - "ib_methods": keras_autodoc.get_methods( - "hsml.inference_batcher.InferenceBatcher", exclude=JSON_METHODS - ), - }, - "model-serving/resources_api.md": { - "res": ["hsml.resources.Resources"], - "res_properties": keras_autodoc.get_properties("hsml.resources.Resources"), - "res_methods": keras_autodoc.get_methods( - "hsml.resources.Resources", exclude=JSON_METHODS - ), - }, - "model-serving/predictor_state_api.md": { - "ps_get": ["hsml.deployment.Deployment.get_state"], - "ps_properties": keras_autodoc.get_properties( - "hsml.predictor_state.PredictorState" - ), - "ps_methods": keras_autodoc.get_methods( - "hsml.predictor_state.PredictorState", exclude=JSON_METHODS - ), - }, - "model-serving/predictor_state_condition_api.md": { - "psc_get": ["hsml.predictor_state.PredictorState.condition"], - "psc_properties": keras_autodoc.get_properties( - "hsml.predictor_state_condition.PredictorStateCondition" - ), - "psc_methods": keras_autodoc.get_methods( - "hsml.predictor_state_condition.PredictorStateCondition", - exclude=JSON_METHODS, - ), - }, -} - -hsml_dir = pathlib.Path(__file__).resolve().parents[0] -if "GITHUB_SHA" in os.environ: - commit_sha = os.environ["GITHUB_SHA"] - project_url = f"https://github.com/logicalclocks/machine-learning-api/tree/{commit_sha}/python" -else: - branch_name = os.environ.get("GITHUB_BASE_REF", "master") - project_url = f"https://github.com/logicalclocks/machine-learning-api/blob/{branch_name}/python" - - -def generate(dest_dir): - doc_generator = keras_autodoc.DocumentationGenerator( - PAGES, - project_url=project_url, - template_dir="./docs/templates", - titles_size="###", - extra_aliases={}, - max_signature_line_length=100, - ) - shutil.copyfile(hsml_dir / "CONTRIBUTING.md", dest_dir / "CONTRIBUTING.md") - shutil.copyfile(hsml_dir / "README.md", dest_dir / "index.md") - - doc_generator.generate(dest_dir / "generated") - - -if __name__ == "__main__": - generate(hsml_dir / "docs") diff --git a/hsml/docs/CONTRIBUTING.md b/hsml/docs/CONTRIBUTING.md deleted file mode 100644 index b287467c6..000000000 --- a/hsml/docs/CONTRIBUTING.md +++ /dev/null @@ -1,215 +0,0 @@ -## Python development setup ---- - -- Fork and clone the repository - -- Create a new Python environment with your favourite environment manager, e.g. virtualenv or conda - -- Install repository in editable mode with development dependencies: - - ```bash - cd python - pip install -e ".[dev]" - ``` - -- Install [pre-commit](https://pre-commit.com/) and then activate its hooks. pre-commit is a framework for managing and maintaining multi-language pre-commit hooks. The Model Registry uses pre-commit to ensure code-style and code formatting through [ruff](https://docs.astral.sh/ruff/). Run the following commands from the `python` directory: - - ```bash - cd python - pip install --user pre-commit - pre-commit install - ``` - - Afterwards, pre-commit will run whenever you commit. - -- To run formatting and code-style separately, you can configure your IDE, such as VSCode, to use [ruff](https://docs.astral.sh/ruff/tutorial/#getting-started): - - ```bash - cd python - ruff check --fix - ruff format - ``` - -### Python documentation - -We follow a few best practices for writing the Python documentation: - -1. Use the google docstring style: - - ```python - """[One Line Summary] - - [Extended Summary] - - [!!! example - import xyz - ] - - # Arguments - arg1: Type[, optional]. Description[, defaults to `default`] - arg2: Type[, optional]. Description[, defaults to `default`] - - # Returns - Type. Description. - - # Raises - Exception. Description. - """ - ``` - - If Python 3 type annotations are used, they are inserted automatically. - - -2. Model registry entity engine methods (e.g. ModelEngine etc.) only require a single line docstring. -3. REST Api implementations (e.g. ModelApi etc.) should be fully documented with docstrings without defaults. -4. Public Api such as metadata objects should be fully documented with defaults. - -#### Setup and Build Documentation - -We use `mkdocs` together with `mike` ([for versioning](https://github.com/jimporter/mike/)) to build the documentation and a plugin called `keras-autodoc` to auto generate Python API documentation from docstrings. - -**Background about `mike`:** - `mike` builds the documentation and commits it as a new directory to the gh-pages branch. Each directory corresponds to one version of the documentation. Additionally, `mike` maintains a json in the root of gh-pages with the mappings of versions/aliases for each of the directories available. With aliases you can define extra names like `dev` or `latest`, to indicate stable and unstable releases. - -1. Currently we are using our own version of `keras-autodoc` - - ```bash - pip install git+https://github.com/logicalclocks/keras-autodoc - ``` - -2. Install HSML with `docs` extras: - - ```bash - pip install -e .[dev,docs] - ``` - -3. To build the docs, first run the auto doc script: - - ```bash - cd .. - python auto_doc.py - ``` - -##### Option 1: Build only current version of docs - -4. Either build the docs, or serve them dynamically: - - Note: Links and pictures might not resolve properly later on when checking with this build. - The reason for that is that the docs are deployed with versioning on docs.hopsworks.ai and - therefore another level is added to all paths, e.g. `docs.hopsworks.ai/[version-or-alias]`. - Using relative links should not be affected by this, however, building the docs with version - (Option 2) is recommended. - - ```bash - mkdocs build - # or - mkdocs serve - ``` - -##### Option 2 (Preferred): Build multi-version doc with `mike` - -###### Versioning on docs.hopsworks.ai - -On docs.hopsworks.ai we implement the following versioning scheme: - -- current master branches (e.g. of hsml corresponding to master of Hopsworks): rendered as current Hopsworks snapshot version, e.g. **2.2.0-SNAPSHOT [dev]**, where `dev` is an alias to indicate that this is an unstable version. -- the latest release: rendered with full current version, e.g. **2.1.5 [latest]** with `latest` alias to indicate that this is the latest stable release. -- previous stable releases: rendered without alias, e.g. **2.1.4**. - -###### Build Instructions - -4. For this you can either checkout and make a local copy of the `upstream/gh-pages` branch, where -`mike` maintains the current state of docs.hopsworks.ai, or just build documentation for the branch you are updating: - - Building *one* branch: - - Checkout your dev branch with modified docs: - ```bash - git checkout [dev-branch] - ``` - - Generate API docs if necessary: - ```bash - python auto_doc.py - ``` - - Build docs with a version and alias - ```bash - mike deploy [version] [alias] --update-alias - - # for example, if you are updating documentation to be merged to master, - # which will become the new SNAPSHOT version: - mike deploy 2.2.0-SNAPSHOT dev --update-alias - - # if you are updating docs of the latest stable release branch - mike deploy [version] latest --update-alias - - # if you are updating docs of a previous stable release branch - mike deploy [version] - ``` - - If no gh-pages branch existed in your local repository, this will have created it. - - **Important**: If no previous docs were built, you will have to choose a version as default to be loaded as index, as follows - - ```bash - mike set-default [version-or-alias] - ``` - - You can now checkout the gh-pages branch and serve: - ```bash - git checkout gh-pages - mike serve - ``` - - You can also list all available versions/aliases: - ```bash - mike list - ``` - - Delete and reset your local gh-pages branch: - ```bash - mike delete --all - - # or delete single version - mike delete [version-or-alias] - ``` - -#### Adding new API documentation - -To add new documentation for APIs, you need to add information about the method/class to document to the `auto_doc.py` script: - -```python -PAGES = { - "connection.md": [ - "hsml.connection.Connection.connection", - "hsml.connection.Connection.setup_databricks", - ] - "new_template.md": [ - "module", - "xyz.asd" - ] -} -``` - -Now you can add a template markdown file to the `docs/templates` directory with the name you specified in the auto-doc script. The `new_template.md` file should contain a tag to identify the place at which the API documentation should be inserted: - -``` -## The XYZ package - -{{module}} - -Some extra content here. - -!!! example - ```python - import xyz - ``` - -{{xyz.asd}} -``` - -Finally, run the `auto_doc.py` script, as decribed above, to update the documentation. - -For information about Markdown syntax and possible Admonitions/Highlighting etc. see -the [Material for Mkdocs themes reference documentation](https://squidfunk.github.io/mkdocs-material/reference/abbreviations/). diff --git a/hsml/docs/assets/images/favicon.ico b/hsml/docs/assets/images/favicon.ico deleted file mode 100644 index ab757306798d8da0cea9ca008ac05cd5091eff1a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2699 zcmb_e3se(V8jfIHc?nn*RJ79pw1AV$Ofq>)i3)^Atl?#-u~uX>Y4kGiR{>X1m?N-aL#6>&zU^vKO4!v?J2J z$2goBEn20@qEf8JNk-2lC`L_lZF(b%=5Qic*o*|3O%YjH6&JyTp3jx^lI)fRvNx?B*oNYV1c_1(bv1CiZ zC}%)GrAP*17!wT$xiCb+FbaqyTtq0~W9Tvfg;AIXBRoC~AsCLqIA07*{6IFEiPGSy zvbc#@?2{DCvRI5bk7u=7xmG@xF=g@)iA2JKQ67pyECMp;87zbiGMIxW8DzAXG--_% zEn@(jjD(uWu}DEy>39lyql?yHo+uMrFrJMt@(?cUOlb^AkuIDu$D|tzPLVuXN9$>W z#mr(67uJ}?SQv8_^8{+w@RR^-X%z}rjK^xx>s=wtme^c2jfsLh7HwAM8EIZBZDw*z zBpsW}$_#d9W5i=jG+|*(N`}!*E>!ZQWB_F|0iIi{HBgMz9QMcrT1HrCDd=1^2o^xF zM2QG+fdofH5ip9w@HkY#P+CpiR49r%p@;~_rb1cEP=tl}RWL>38pfn2*o3uuB9rDB z4VfU|DkC1l=ok|lm{rGrv|k<*lWbx%S{*xJPK}QSI%%t5D!_gV{nD zNLntFf^2cPS}lcR5)n#kNEG73s1Op70vHlY#3D$9i3p(@Lr4iBnw&3V$Q-8yob$(b z1I3Um$ItQ*kqA@sDLy11UX ziV=K5OhXu~Mj;pzNFcFDOh7QDK_mhpfg*^|Y5OrJ;R#wZyT*AF8)*&w%0x@21;)&S z6QpxfNI}x+2ATpVy0wp~$8U-}G2fa+v!uV^lJV$fMq{xOCORsUt=+G>koP3wPlp?c$ZXK4M#t4HUqZe105YgPfj z-g{}u{L|Yy2Uj;vw+}AqYJE=l?lZop64Zeeb3u46M3A`aJ%t>ujl<`6oIMp+yt^nUptAEm zh^#o7^<6+s+Lew2p+Rpxy=ALs%h`;<#=-q(ebRbAqy~;68oj)#@1-yH^(Pl}ua}Md z310Wsj7ueUnJ#|2&rt1ajpnNS{W^c*azc9bKS%db0m@TfDMCJ3`RO~_j~CwnhNds? zNXz-jvg5>$Gm^TcDf*mVG$rs}ud#pL>?Y9XV0y>HxVyt$l|HW@4D2XgkVTv@!lQ8?>o`iF4 z(TBh7al=}kM%}DEr3(s|`}f(4zi4ug{n!4ws;aL)HnfFFT0aXovUxV@_WnKZ{#C7S zc5km*-ne@M-Jt28^Wnp|PgS9B{3GvX-3L^_JxM~yKtP|J*)+V!w*=p{uu4^6xLbL& z{UEjQRS)`(XJK?pbsNv8{onB!NsHXpm5-j@SrL0Vn*Uz&rlRBv)#@LYgx|83`2KPA zM`zwHQ$)w!yfIV%m9{QJ`%BF?61%-e-LjvmZ?$UCTIc7Y NJT_5wGCE`1e*yWD-b4TZ diff --git a/hsml/docs/assets/images/hops-logo.png b/hsml/docs/assets/images/hops-logo.png deleted file mode 100644 index d3625ae07da68430e3bd5f46a1a7baf88dd474cc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6356 zcmb`M*H;tH^Y;T%LX#x)4nb-Vq>3~l2BZ@ZFcd*UmEHtJlom=tl}-dC6e-e+5S1pO zgY-~Dsx+m86oHQq-@o9w`CZJ}b7tQ&XJ>A9=e1a*@qH$GZh8O!z@(>(FarQ6gsw~i zNOR?xGfoSmU+Q-MS^0Er4r8CEI?2Y}cRKtMqRkV4ZOBm{`1hf)DdwJ-od z$yg8oLH&OwTD^b$?(-r~yfgaC<;1~5bw#84)X>*IG3nG$Du--t^f*^_Q(n@RHD=Qk z;r4IVN9i!~(UFgIU_8nb5%6}Wr-B&)6osnPQ~iM1A4PM5Hxe14=3%>A*y8G`T^vY8XIGgU{= zlMnB86hh`|)w1-1PQ8K1x{x%^(Td}11ZuGUTgzn)`=`~t5e$6=KUiQY0&}$i9e?`Y z+EqZ&+SOqqY5cji&CPZ%{Ds)JjJ9c)6+wgsl!Pjs6XRpp~M;Ro==k+rp zO0GlwC503rU{TLXDe~pVic3x22$8hwm1-h=5$Vi|+B7KmW=(2vzQt8ArAGg?lHlJ> zT9_0EP{HR}W(2jZetWRldAKHK%+b62pY6F;Y1pfr4byKyjUX2Nw^-Yv35N9H;YMR0 z5=8$kq1Exxd3b5qg4<0G1ZK=djD0loe@QOP3KcQH)lRtKaqIClQ{{(^bYNM+MOLr2 zzPjnm+$5x?`>I)DW2Y9#1nnzf-#6WrmArHVHaJE#<8ChF=vQ@EX2BzQg!uJbU(;>g zV&_c8HW@rR3J9h*fT4Yr>sE~G>>YTx#n5cE{;h^MRwzQ8*8q&qEPpoz@!^LdWa#+P zw0z+h;F*hLN0)y^>il$tEc?)I)E9C6*xtmmLbj=m4{+eI<(L3kHeuCtuKxF6BF~p^ z7J8kdHJFQ<5ASI-_CPybD zYIDXbU0GncY*K=g6!O$q7uAVP&YK8sXzg|Jdm0^dkoWNV0?XHZp%SC&*yP0T2ya6H zOH+r~QN{srSRK8&Fnm3=9FToFyM=w5ApoN@7h?|JJnYp9+eU>M zeL9`GeIdk#lRhXv+z#Nr;kN+vtf>n(5C>`rE;zpZZ7#rvY-}s#& z1RvHTfJSdKLv?tD+e*x3hu=8tk3a%a4WOSsUJkxHF<5;AG)qjvvU`xsu*2S@*i9O*phhG=!7)CH z`5ocLMxai})N(Du8{F`rUCKGe}1Pfub?(-_^2;?~}vx$VKq4em#mTCO6R zqTN^sTwc?z3X>mE@E4#-?s7fMtnI^s8`1AiZ)IDvD(jiMmx_X|k!;&k3933^SB++V zx)7zt3A$%viGY=}rNN+l7pc1i6HJq6IuTQ5QvjaggcNxotZ!n)1Xbv|>7#S>%X+V0 zO^3Id23#O2eqAG`I9x!QS3&B7#3PI|N^4XTb8w^C!15FDO$zOZFV|lRvpV3AUf(qo zc!FEf3X}4qo+?>cYXx468%f*olsUDQV0yZ*Na^PA@x+vT;9{quS1AHP$H)dsz2M%$e)}lZ!e+#KcsErt~ z3i-qZe(nbB7hcdoY;NjdM2l8)>w-EoWW)1=zkGHS!GbK4RV|^vPKrZ>YG}Y_VZa0z z@5_)325%3>SbTxpo;1AJ&@!j*6S~Gl1im8&c(fh7@Kreo|DLTF1ut6N_I-NEJ9huC z@gjTmA9yVCM}>AsNP=322<#eSgUxw@#>)35N2Up3q=GggU|+?)lnRW_|CIP@YyD9^ zy{i;KZjU=hS_>nNQ)QMvX3^)Z!7N(ng9*cQA&F^Rzh1+WK6gPRuT`gy7gN3WCUv32 z_N>g%FpJ?TIs3}7gu(?zyW$3>icX^g1Il+IN>k8pRkj7#rN3^04@_a^G~OY^sba;N zouZZVR6iS6y}3S>h2qXQ=TO6c$x=U~qC8P z|IFe399BpEs{#ux!+laxnmidTM?Zgk*ByCk$mc@u0(I_*NWrJwgi?u>Jz+XZol_8? zXDRI1_PeQ38NYjai2#>&E8NE{-cXDkX?LJNH>CSvZu8@fcslY4 zxj9K2Vpy)?dB>T!$wOnbthBt{wOrDuvDIBs?{qG?i19tBB5tHz2dz`-IKBgc6jcI$ z(w>F*C$V?I=g$idcAw_On>TjGUGx&p;$Ax777=qPPunk@Y>1{Y4`ZQiWu(_0Cx6^U zWp>)et25eY>1n~i-^RlyW$!$HChRq*W1E`rg?}|DYOD4aBBYyMDg&fU0S?Z+XUe@1 zADn%xTNCJV(ej9rV%Bn5ubMh{SR00BW|tN)2S6Q%*V;YxpNWUSeBVXI5DoAz5J|Bh z-omx^fKA3%0X*2AQ_hR`zJ1ekZ*I>^y+D*2Mtk68_PIu4N21MNk)OxeWQfBJWG}s6 zB^4Vojvt(PI__QAHc)%`M=x{=iC~fLYL^QJfMXkK^aNk?BO#K@;@7X-cwHm*cKb9a zoQ2q)R2i<+Ra#WZE03)YVOR`oIliOdL4pQ5uwuz7fyKdicr<szdL=6dU?r{0=)xO$UKd?X;Wye(irh6$E*LHn)wUn#)l6I zMPcUD=dy$4#}3q$178h5AcFARbIz4dDz5KhJ3uSG7UrB0!xS+aK{8*g@55UZ-P#*0 zwwtnZoBTJ^nFb{N*a%3mmp3_UqYe+)-T_nKsCY7sMZ|WgjO31r5G9K9CDCh16CfHKGo-NU~L77Yz zo6?%gH$_=QzkF8j4IHf#dRvC`bKg*p7YnRUBphWFd`fA)ZpM z0vqSQ_795?-V^93JTKQ-dg|{NyXc?L|t~Y0WRHANS%l1V>`~qf?rmx&HKOkeq z=SFtQ*qQ8(U{7xnvF&#kSNhS~ z*>>qwf3*8U(r5_MU+HRMlOMz8INaH5_#Cfj7vk& zD_sl->rMOvgt?+JFF%v==T((yxn<`Z)kvOvU#X4zApNqb`CL!J*&Of7u*WND&i3E? z+Vh;rvs?V3`-U`zAqOYYFo+DZ?4LN`rKm$c%_i&<(Hmpv3eQ9`GFi%r9y;Ol35v4e zYuP)xqrcWMrrpN0ha9&C+Q}Q{0ike-W{0FVVrw%6bkjpOL@CxCzNZTupxnju*A$MV zrsGSeGe*=NPy{3Ac^hvB%-(7O<&(d{`Odi%e>yTz!#EQp;GW%ztoSuj#-AQHekmSJ zMFwWu8f2n$pM(-?1;y?6z}1{8a*kkyAYaR>>|1TnIYS)E{yG_HA1N~7%v(B1HWq}d zWUzaoIHsB8YipxhPye9`fhy5AGJvbfgReCGBUFxE%1{Zt(OsYNJSqukF?nn<@JDOz zrV7sGO-qYWXa_zSh_07wCE?N6{gGB0aDR)Pjxb!&n5|(oPTLfnsTQkotqtHCoVR#@k?d5x2|A<)H+#WJiBL&A3*qg4M?yG--NK>}Z1>@Oj ztSG^LNk;LvR=3KJb>@4IzOu%et%1xieoYa<450$X^>w`@lDPiF-aJ(34Ltd;H2HK6 z8NO-R?Y|Z;DxQ0;(1LV5nOaYou9<0tsOs{m395toA(8|)lz+XeOgp!gRRfKbJl%0y zBcoYQd68WgB>(E#42P|;ZyTw|T2igHe_ejUI&d>?^5B3&2vM3wrcMApq08;=rtkM$ z{u1yh4@ zy9QSIMurMoj^T?826#euo&c=nERm5wEzO1uP3!gAoH;zH^byS^-|=4hIJ~0_3W? zYG9vz`9XWkSu85aVfmg?`mcE{%v>bwm^VXPW1U>^-oqnAypP`)0SxKP`?&I@aS-{# z+RL}a^7t-|@Ue8TxWIO7EIvbf=cdNn-4B%lbzB4<#w&3ln82N>J&$ty(6CR?t|-H< zhXKRAr^f^F3V*1bhq%KD@y=Gs4Hh}9g^h(D3Zb@%9AbjErcxI)V>l4m+*77XtiM9XX* zIm1K1483b~@oYyqAFP)hh0yR5v}o%a9#spiU#pvk#$GtbXbq8XmO8od60ySKuW}?4 z2a+g4r;NR}2)EoNtF1`2Oq~sFFJstEjCQ6Qi^6-l|J7g;A82PAls-DDmW>@Foo+gm z&kL%Ubel{`Iho(9G~R9k2j(u{dr4jv_Q&I>}DFjr}+`1ZyH}a_-GPf7``Uv8&Il79(i1 zb0d0v99Pt{cV`l%#RUBs<$)vFJ;IuN)30-wGD&Kisdk3*AMfAIS%`_*o31@sdqMc{=Ff;MfoAgu zQNZlUB~ca!!$>ZM(Qa)T180|5LwacoG0HjcZ`0a}7eGO1z-tCC)B^e^|rXH_3aW}WN2IOdHSuKz4~%dkk8L_gug?Rf#-H(`iwyiv6V zMRQVeeAZQ^6*5;7boL=Bw^$S&7r8ckLVMRizEZ7T;f+7$G=3d#WwD@lHNY^^H#)Q7 zkg$c9JAERRwK`8NY}~sg%(w&E_7qw>Mf?YZe%_Hw-I~|=sCina27C5$b@c30P?a3fp2?I)eB)ee=Q&d}eGdQk<6 zixh1jMQ{`=qLgG9u5=lkikfO+5XGrvnXTLaj&P9zd-VOgxal+lag4fU@)?T$DF$=x zI6$u6mI?v#;KxZnt4Eo(`>etxdBuf}iLyxWBQRZgN9zG@f!((G*ir%^VqS0pNxmP( zN@67>cecVuVcM>FTIr{p3e4Ga=cU2H4BN31@J1UgBkJ)Zj|_{oUTgkxR;YCtkf<#T z$6M(T3-0u&mLY}{RL}An&?v|Q%yoCPzUHAqH34hN!_hw^a&9Oy{Tz)$KDBJOej!U9 z)2lqoBgT$^!m5I$KETjeB(^F`lpl#k{wJc~TbAU zAyED;5`-}Je&Z=+dLt2iO+F6rtkfTg0 z{63Ho%9>|=lc0mpRrMWp`7x49D$gkTu@Vj>QW$Kgc~lUhn)RzTnD(<-POv77V98O) zqC~L9Y!T_(Xaj}(TIA^s1S7b72I5M;AnAKuiEZ>mSY>NUZnLIqTc2f47TEVEc5vKQ zmNL+XIFDT&D(b}OJ!Miz8|v)c8aE!hC}9^?i=D)5I8s4>(un@bb>0@QI=oSpPLvA) z@oz^^=wdiMdCqZ5)lrWCTaQAyw&{!fvvlLveeT?F)MYe?jeE}@Y*Hvwqs9Q$dHjjQ zP)dGX^MH{O^SE8FH!)m>9SF*~c2!}DQbH>gkM5EO8k;;IcQGg4V#Yy1$#&Iu1^Jd^ z$vp}x;KX7gQEQy}VSxzgdSDHWfw)>R7Nk1;g!M|)U@c^XYrO%sPK}PKCAodS;+OBz zkNiwS=nLB+ev!^P#&li-92tNJSS%e;>w8YNdnHna>(+JvB8;O0!rh}^2eAgY4e;iY(wo$P z@gvrpIKc1fSu}iTY)|59sA42DQ&QPs8{=LN|5GwL+%HRm*>9Wo{PCB4}cBn2Y3WsG=7_G`viM$s%;=adr zu;-6fzy-SO)bBFSVV7uLLTa&_Zd+}gO|fmEk73LA$P5L^b->AXJ1QCBSHtN*=H+Xq zgg6apS*pcv;xd|Hf+{GuFWqX2XK}+~UYL9l4U|>DUV4U%qSD3lrBH)$!xxXU7dS1U zYDqxmpOnxv8YS{bfA6098|xTbEzDScIt0_la9BQMROrDMRiODl6koJK@qf+Z|9@nk e-)Q{e?z``UxWtE+SHBbhfS$H7q6+>v;{O44aExI9 diff --git a/hsml/docs/css/custom.css b/hsml/docs/css/custom.css deleted file mode 100644 index 5ba3208e1..000000000 --- a/hsml/docs/css/custom.css +++ /dev/null @@ -1,115 +0,0 @@ -[data-md-color-scheme="hopsworks"] { - --md-primary-fg-color: #1EB382; - --md-secondary-fg-color: #188a64; - --md-tertiary-fg-color: #0d493550; - --md-quaternary-fg-color: #fdfdfd; - --border-radius-variable: 5px; -} - -.md-footer__inner:not([hidden]) { - display: none -} - -/* Lex did stuff here */ -.svg_topnav{ - width: 12px; - filter: invert(100); -} -.svg_topnav:hover{ - width: 12px; - filter: invert(10); -} - -.md-header[data-md-state=shadow] { - box-shadow: 0 0 0 0; -} - -.md-tabs__item { - min-width: 2.25rem; -} - -.md-tabs__item:hover { - background-color: var(--md-tertiary-fg-color); - transition: background-color 450ms; - -} - -/* -.md-sidebar__scrollwrap{ - background-color: var(--md-quaternary-fg-color); - padding: 15px 5px 5px 5px; - border-radius: var(--border-radius-variable); -} -*/ -.md-nav__link:focus{ -} - -.image_logo_02{ - width:450px; -} - -/* End of Lex did stuff here */ - -.md-header__button.md-logo { - margin: .1rem; - padding: .1rem; -} - -.md-header__button.md-logo img, .md-header__button.md-logo svg { - display: block; - width: 1.8rem; - height: 1.8rem; - fill: currentColor; -} - -.md-tabs { - width: 100%; - overflow: auto; - color: var(--md-primary-bg-color); - background-color: var(--md-secondary-fg-color); - transition: background-color 250ms; -} - - -.wrapper { - display: grid; - grid-template-columns: repeat(4, 1fr); - gap: 10px; - grid-auto-rows: minmax(100px, auto); -} - -.wrapper * { - border: 2px solid green; - text-align: center; - padding: 70px 0; -} - -.one { - grid-column: 1 / 2; - grid-row: 1; -} -.two { - grid-column: 2 / 3; - grid-row: 1; -} -.three { - grid-column: 3 / 4; - grid-row: 1; -} -.four { - grid-column: 4 / 5; - grid-row: 1; -} -.five { - grid-column: 1 / 3; - grid-row: 2; -} -.six { - grid-column: 3 / 5; - grid-row: 2; -} - -/* Jupyter Stuff */ -.jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt { - display: none !important; -} diff --git a/hsml/docs/css/dropdown.css b/hsml/docs/css/dropdown.css deleted file mode 100644 index 886858909..000000000 --- a/hsml/docs/css/dropdown.css +++ /dev/null @@ -1,55 +0,0 @@ -/* Style The Dropdown Button */ -.dropbtn { - color: white; - border: none; - cursor: pointer; -} - -.md-tabs__list { - contain: inherit; -} -.md-tabs { - overflow: inherit; -} -.md-header { - z-index: 1000 !important; -} - -/* The container
- needed to position the dropdown content */ -.dropdown { - position: absolute; - display: inline-block; -} - -/* Dropdown Content (Hidden by Default) */ -.dropdown-content { - display:none; - font-size: 13px; - position: absolute; - background-color: #f9f9f9; - min-width: 160px; - box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2); - z-index: 1000; - border-radius: 2px; - left:-15px; -} - -/* Links inside the dropdown */ -.dropdown-content a { - color: black; - padding: 12px 16px; - text-decoration: none; - display: block; -} - -/* Change color of dropdown links on hover */ -.dropdown-content a:hover {background-color: #f1f1f1} - -/* Show the dropdown menu on hover */ -.dropdown:hover .dropdown-content { - display: block; -} - -/* Change the background color of the dropdown button when the dropdown content is shown */ -.dropdown:hover .dropbtn { -} diff --git a/hsml/docs/css/marctech.css b/hsml/docs/css/marctech.css deleted file mode 100644 index 8bb58c97b..000000000 --- a/hsml/docs/css/marctech.css +++ /dev/null @@ -1,1047 +0,0 @@ -:root { - --md-primary-fg-color: #1EB382; - --md-secondary-fg-color: #188a64; - --md-tertiary-fg-color: #0d493550; - --md-quaternary-fg-color: #fdfdfd; - --md-fiftuary-fg-color: #2471cf; - --border-radius-variable: 5px; - --border-width:1px; - } - - .marctech_main a{ - color: var(--md-fiftuary-fg-color); - border-bottom: 1px dotted var(--md-fiftuary-fg-color) !important; - text-decoration: dotted !important;} - - .marctech_main a:hover{ - border-bottom: 1px dotted var(--md-primary-fg-color)!important; - } - - .marctech_main a:visited{ - color: var(--md-tertiary-fg-color); - border-bottom: 1px dotted var(--md-tertiary-fg-color) !important; - - } - - .w-layout-grid { - display: -ms-grid; - display: grid; - grid-auto-columns: 1fr; - -ms-grid-columns: 1fr 1fr; - grid-template-columns: 1fr 1fr; - -ms-grid-rows: auto auto; - grid-template-rows: auto auto; - grid-row-gap: 16px; - grid-column-gap: 16px; - } - - .image_logo{ - width: 69%; - background-color: white; - z-index: 50; - padding: 0px 15px 0px 15px; - margin-bottom: 10px; - } - - .layer_02{ - pointer-events: none; - } - - .round-frame{ - pointer-events: initial; - } - - .marctech_main { - margin-top:-20px; - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - -webkit-box-orient: vertical; - -webkit-box-direction: normal; - -webkit-flex-direction: column; - -ms-flex-direction: column; - flex-direction: column; - -webkit-box-align: center; - -webkit-align-items: center; - -ms-flex-align: center; - align-items: center; - margin-bottom: 55px; - } - - .collumns { - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - height: 100%; - -webkit-box-align: stretch; - -webkit-align-items: stretch; - -ms-flex-align: stretch; - align-items: stretch; - } - - .col_heading { - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - -webkit-box-align: center; - -webkit-align-items: center; - -ms-flex-align: center; - align-items: center; - } - - .enterprisefs { - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - -webkit-box-orient: vertical; - -webkit-box-direction: normal; - -webkit-flex-direction: column; - -ms-flex-direction: column; - flex-direction: column; - -webkit-box-align: center; - -webkit-align-items: center; - -ms-flex-align: center; - align-items: center; - } - - .enterprise_ai { - -webkit-align-self: center; - -ms-flex-item-align: center; - -ms-grid-row-align: center; - align-self: center; - -webkit-box-flex: 1; - -webkit-flex: 1; - -ms-flex: 1; - flex: 1; - } - - .side-content { - z-index: 0; - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - width: 240px; - height: 100%; - margin-top: 10px; - margin-bottom: 10px; - padding: 20px 10px; - -webkit-box-orient: vertical; - -webkit-box-direction: normal; - -webkit-flex-direction: column; - -ms-flex-direction: column; - flex-direction: column; - -webkit-box-pack: center; - -webkit-justify-content: center; - -ms-flex-pack: center; - justify-content: center; - -webkit-box-align: center; - -webkit-align-items: center; - -ms-flex-align: center; - align-items: center; - -webkit-align-content: flex-start; - -ms-flex-line-pack: start; - align-content: flex-start; - border-style: solid; - border-width: var(--border-width); - border-color: #585858; - border-radius: 10px; - background-color:var(--md-quaternary-fg-color); - } - .body { - padding: 40px; - font-family: Roboto, sans-serif; - } - - .green { - color: #1eb182; - font-size: 1.2vw; - } - - .rec_frame { - position: relative; - z-index: 1; - display: inline-block; - min-width: 150px; - margin-top: 10px; - margin-right: 10px; - margin-left: 10px; - padding: 10px 10px; - border-style: solid; - border-width: var(--border-width); - border-color: #585858; - border-radius: 10px; - background-color: #fff; - box-shadow: 4px 4px 0 0 rgba(88, 88, 88, 0.16); - -webkit-transition: box-shadow 200ms ease, border-color 200ms ease; - transition: box-shadow 200ms ease, border-color 200ms ease; - color: #585858; - text-align: center; - cursor: pointer; - } - - .rec_frame:hover { - border-color: #c2c2c2; - box-shadow: none; - } - - .name_item { - font-size: 0.7rem; - line-height: 120%; - font-weight: 700; - } - - .name_item.db { - position: relative; - z-index: 3; - text-align: left; - } - - .name_item.small { - font-size: 0.6rem; - font-weight: 500; - } - - .name_item.ingrey { - padding-bottom: 20px; - } - - .db_frame-mid { - position: relative; - z-index: 1; - margin-top: -8px; - padding: 5px 2px; - border-style: solid; - border-width: var(--border-width); - border-color: #585858; - border-radius: 0px 0% 50% 50%; - background-color: #fff; - color: #585858; - text-align: center; - } - - .db_frame-top { - position: relative; - z-index: 2; - padding: 5px 2px; - border-style: solid; - border-width: var(--border-width); - border-color: #585858; - border-radius: 50%; - background-color: #fff; - color: #585858; - text-align: center; - } - - .icondb { - position: relative; - width: 25px; - min-width: 25px; - margin-right: 10px; - } - - .db_frame { - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - width: 150px; - height: 55px; - padding: 20px 10px; - -webkit-box-align: center; - -webkit-align-items: center; - -ms-flex-align: center; - align-items: center; - border-style: solid; - border-width: var(--border-width); - border-color: #585858; - border-radius: 10px; - background-color: #fff; - box-shadow: 4px 4px 0 0 rgba(88, 88, 88, 0.16); - -webkit-transition: box-shadow 200ms ease, border-color 200ms ease; - transition: box-shadow 200ms ease, border-color 200ms ease; - color: #585858; - text-align: center; - cursor: pointer; - } - - .db_frame:hover { - border-color: #c2c2c2; - box-shadow: none; - } - - .grid { - -ms-grid-rows: auto auto auto; - grid-template-rows: auto auto auto; - } - - .arrowdown { - position: relative; - z-index: 0; - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - margin-top: -10px; - -webkit-box-pack: center; - -webkit-justify-content: center; - -ms-flex-pack: center; - justify-content: center; - } - - .heading_MT { - margin-top: 0px !important; - margin-bottom: 0px !important; - font-size: 1.3rem !important; - white-space: nowrap !important; - } - - .head_col { - padding-left: 10px; - } - - .MT_heading3 { - margin-top: 0px !important ; - font-size: 0.8rem !important; - } - - .MT_heading3.green { - color: #1eb182 !important; - } - - .column_sides { - position: relative; - z-index: 2; - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - -webkit-box-orient: vertical; - -webkit-box-direction: normal; - -webkit-flex-direction: column; - -ms-flex-direction: column; - flex-direction: column; - -webkit-box-pack: justify; - -webkit-justify-content: space-between; - -ms-flex-pack: justify; - justify-content: space-between; - -webkit-box-align: center; - -webkit-align-items: center; - -ms-flex-align: center; - align-items: center; - } - - .hopsicon { - width: 45px; - height: 45px; - } - - .column_center { - z-index: 10; - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - -webkit-box-orient: vertical; - -webkit-box-direction: normal; - -webkit-flex-direction: column; - -ms-flex-direction: column; - flex-direction: column; - -webkit-box-pack: center; - -webkit-justify-content: center; - -ms-flex-pack: center; - justify-content: center; - -webkit-box-align: center; - -webkit-align-items: center; - -ms-flex-align: center; - align-items: center; - } - - .center-content { - z-index: -50; - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - width: 750px; - height: 670px; - margin-top: 10px; - margin-bottom: 10px; - padding: 20px 10px; - -webkit-box-orient: vertical; - -webkit-box-direction: normal; - -webkit-flex-direction: column; - -ms-flex-direction: column; - flex-direction: column; - -webkit-box-pack: center; - -webkit-justify-content: center; - -ms-flex-pack: center; - justify-content: center; - -webkit-box-align: center; - -webkit-align-items: center; - -ms-flex-align: center; - align-items: center; - -webkit-align-content: center; - -ms-flex-line-pack: center; - align-content: center; - border-radius: 10px; - background-color: transparent; - } - - .image { - width: 260px; - } - - .layer_01 { - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - -webkit-box-orient: vertical; - -webkit-box-direction: normal; - -webkit-flex-direction: column; - -ms-flex-direction: column; - flex-direction: column; - -webkit-box-pack: center; - -webkit-justify-content: center; - -ms-flex-pack: center; - justify-content: center; - -webkit-box-align: stretch; - -webkit-align-items: stretch; - -ms-flex-align: stretch; - align-items: stretch; - } - - .name_center { - font-size: 1rem; - font-weight: 700; - } - - .rec_frame_main { - position: relative; - z-index: 1; - margin-top: 10px; - margin-right: 10px; - margin-left: 10px; - padding: 5px 10px; - border-style: solid; - border-width: var(--border-width); - border-color: #1eb182; - border-radius: 10px; - background-color: #e6fdf6; - box-shadow: 4px 4px 0 0 #dcf7ee; - -webkit-transition: box-shadow 200ms ease, border-color 200ms ease; - transition: box-shadow 200ms ease, border-color 200ms ease; - color: #1eb182; - text-align: center; - cursor: pointer; - } - - .rec_frame_main:hover { - border-color: #9fecd4; - box-shadow: none; - } - - .rec_frame_main.no_content { - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - height: 100%; - -webkit-box-orient: vertical; - -webkit-box-direction: normal; - -webkit-flex-direction: column; - -ms-flex-direction: column; - flex-direction: column; - -webkit-box-pack: center; - -webkit-justify-content: center; - -ms-flex-pack: center; - justify-content: center; - -webkit-box-align: center; - -webkit-align-items: center; - -ms-flex-align: center; - align-items: center; - box-shadow: 4px 4px 0 0 #dcf7ee; - } - - .rec_frame_main.no_content:hover { - border-color: #1eb182; - box-shadow: 4px 4px 0 0 rgba(88, 88, 88, 0.16); - } - - .name_item_02 { - font-size: 0.85rem; - font-weight: 700; - } - - .grid-infra { - padding-top: 20px; - -ms-grid-columns: 1fr 1fr 1fr 1fr; - grid-template-columns: 1fr 1fr 1fr 1fr; - -ms-grid-rows: auto; - grid-template-rows: auto; - } - - .rec_frame_main-white { - position: relative; - z-index: 1; - display: inline-block; - width: 100%; - margin-top: 10px; - margin-bottom: 10px; - padding: 5px 10px; - border-style: solid; - border-width: var(--border-width); - border-color: #1eb182; - border-radius: 10px; - background-color: #fff; - box-shadow: 4px 4px 0 0 rgba(88, 88, 88, 0.16); - -webkit-transition: box-shadow 200ms ease, border-color 200ms ease; - transition: box-shadow 200ms ease, border-color 200ms ease; - color: #1eb182; - text-align: center; - cursor: pointer; - } - - .rec_frame_main-white:hover { - border-color: #c2c2c2; - box-shadow: none; - } - - .rec_frame_main-white.dotted { - border-style: dotted; - } - - .column { - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - -webkit-box-orient: vertical; - -webkit-box-direction: normal; - -webkit-flex-direction: column; - -ms-flex-direction: column; - flex-direction: column; - -webkit-box-pack: justify; - -webkit-justify-content: space-between; - -ms-flex-pack: justify; - justify-content: space-between; - -webkit-box-align: stretch; - -webkit-align-items: stretch; - -ms-flex-align: stretch; - align-items: stretch; - } - - .columns_center { - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - -webkit-box-orient: horizontal; - -webkit-box-direction: normal; - -webkit-flex-direction: row; - -ms-flex-direction: row; - flex-direction: row; - -webkit-box-pack: justify; - -webkit-justify-content: space-between; - -ms-flex-pack: justify; - justify-content: space-between; - } - - .non-bold { - font-weight: 400; - } - - .logo-holder { - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - -webkit-box-pack: center; - -webkit-justify-content: center; - -ms-flex-pack: center; - justify-content: center; - } - - .infra { - text-align: center; - position: relative; - z-index: 30; - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - padding: 10px; - -webkit-box-orient: vertical; - -webkit-box-direction: normal; - -webkit-flex-direction: column; - -ms-flex-direction: column; - flex-direction: column; - -webkit-box-align: center; - -webkit-align-items: center; - -ms-flex-align: center; - align-items: center; - border: 1px dashed #000; - border-radius: 6px; - background-color: #fff; - cursor: pointer; - } - - .infra:hover { - border-style: solid; - border-color: #585858; - } - - .text_and_icon { - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - -webkit-box-pack: center; - -webkit-justify-content: center; - -ms-flex-pack: center; - justify-content: center; - -webkit-box-align: center; - -webkit-align-items: center; - -ms-flex-align: center; - align-items: center; - } - - .svg_icon { - width: 33px; - margin-right: 10px; - margin-left: 10px; - } - - .layer_02 { - position: absolute; - z-index: 10; - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - width: 96%; - height: 90%; - -webkit-box-orient: vertical; - -webkit-box-direction: normal; - -webkit-flex-direction: column; - -ms-flex-direction: column; - flex-direction: column; - -webkit-box-pack: center; - -webkit-justify-content: center; - -ms-flex-pack: center; - justify-content: center; - -webkit-box-align: stretch; - -webkit-align-items: stretch; - -ms-flex-align: stretch; - align-items: stretch; - border-style: solid; - border-width: calc (var(--border-width)*2); - border-color: #bbbbbb50 ; - border-radius: 100%; - background-color: transparent; - } - - .round-frame { - position: absolute; - left: 0%; - top: auto; - right: auto; - bottom: 0%; - z-index: 10; - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - width: 120px; - height: 120px; - margin: 10px; - padding: 20px; - -webkit-box-pack: center; - -webkit-justify-content: center; - -ms-flex-pack: center; - justify-content: center; - -webkit-box-align: center; - -webkit-align-items: center; - -ms-flex-align: center; - align-items: center; - border-style: solid; - border-width: var(--border-width); - border-color: #585858; - border-radius: 100%; - background-color: #fff; - outline-color: #fff; - outline-offset: 0px; - outline-style: solid; - outline-width: 7px; - -webkit-transition: box-shadow 200ms ease, border-color 200ms ease; - transition: box-shadow 200ms ease, border-color 200ms ease; - color: #585858; - text-align: center; - cursor: pointer; - } - - .round-frame:hover { - border-color: #c2c2c2; - box-shadow: none; - } - - .round-frame.top-left { - left: 4%; - top: 15%; - right: auto; - bottom: auto; - } - - .round-frame.bottom-left { - left: 4%; - bottom: 15%; - } - - .round-frame.top-right { - left: auto; - top: 15%; - right: 4%; - bottom: auto; - } - - .round-frame.bottom-right { - left: auto; - top: auto; - right: 4%; - bottom: 15%; - padding: 10px; - } - - .side-holder { - z-index: -1; - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - height: 630px; - -webkit-box-orient: vertical; - -webkit-box-direction: normal; - -webkit-flex-direction: column; - -ms-flex-direction: column; - flex-direction: column; - -webkit-box-pack: center; - -webkit-justify-content: center; - -ms-flex-pack: center; - justify-content: center; - } - - .infra-icon { - width: 25px; - height: 25px; - } - - .div-block { - display: -webkit-box; - display: -webkit-flex; - display: -ms-flexbox; - display: flex; - height: 100%; - -webkit-box-orient: vertical; - -webkit-box-direction: normal; - -webkit-flex-direction: column; - -ms-flex-direction: column; - flex-direction: column; - -webkit-box-pack: justify; - -webkit-justify-content: space-between; - -ms-flex-pack: justify; - justify-content: space-between; - } - - #w-node-a2a9b648-f5dd-74e5-e1c2-f7aaf4fa1fcd-46672785 { - -ms-grid-column: span 1; - grid-column-start: span 1; - -ms-grid-column-span: 1; - grid-column-end: span 1; - -ms-grid-row: span 1; - grid-row-start: span 1; - -ms-grid-row-span: 1; - grid-row-end: span 1; - } - - #w-node-_466aa2bf-88bf-5a65-eab4-fc1eb95e7384-46672785 { - -ms-grid-column: span 1; - grid-column-start: span 1; - -ms-grid-column-span: 1; - grid-column-end: span 1; - -ms-grid-row: span 1; - grid-row-start: span 1; - -ms-grid-row-span: 1; - grid-row-end: span 1; - } - - #w-node-_87009ba3-d9a6-e0b7-4cce-581190a19cf3-46672785 { - -ms-grid-column: span 1; - grid-column-start: span 1; - -ms-grid-column-span: 1; - grid-column-end: span 1; - -ms-grid-row: span 1; - grid-row-start: span 1; - -ms-grid-row-span: 1; - grid-row-end: span 1; - } - - #w-node-_4a479fbb-90c7-9f47-d439-20aa6a224339-46672785 { - -ms-grid-column: span 1; - grid-column-start: span 1; - -ms-grid-column-span: 1; - grid-column-end: span 1; - -ms-grid-row: span 1; - grid-row-start: span 1; - -ms-grid-row-span: 1; - grid-row-end: span 1; - } - - - /* - - - inherited from the original template - - */ - - .w-container .w-row { - margin-left: -10px; - margin-right: -10px; - } - .w-row:before, - .w-row:after { - content: " "; - display: table; - grid-column-start: 1; - grid-row-start: 1; - grid-column-end: 2; - grid-row-end: 2; - } - .w-row:after { - clear: both; - } - .w-row .w-row { - margin-left: 0; - margin-right: 0; - } - .w-col { - position: relative; - float: left; - width: 100%; - min-height: 1px; - padding-left: 10px; - padding-right: 10px; - } - .w-col .w-col { - padding-left: 0; - padding-right: 0; - } - .w-col-1 { - width: 8.33333333%; - } - .w-col-2 { - width: 16.66666667%; - } - .w-col-3 { - width: 25%; - } - .w-col-4 { - width: 33.33333333%; - } - .w-col-5 { - width: 41.66666667%; - } - .w-col-6 { - width: 50%; - } - .w-col-7 { - width: 58.33333333%; - } - .w-col-8 { - width: 66.66666667%; - } - .w-col-9 { - width: 75%; - } - .w-col-10 { - width: 83.33333333%; - } - .w-col-11 { - width: 91.66666667%; - } - .w-col-12 { - width: 100%; - } - .w-hidden-main { - display: none !important; - } - @media screen and (max-width: 991px) { - .w-container { - max-width: 728px; - } - .w-hidden-main { - display: inherit !important; - } - .w-hidden-medium { - display: none !important; - } - .w-col-medium-1 { - width: 8.33333333%; - } - .w-col-medium-2 { - width: 16.66666667%; - } - .w-col-medium-3 { - width: 25%; - } - .w-col-medium-4 { - width: 33.33333333%; - } - .w-col-medium-5 { - width: 41.66666667%; - } - .w-col-medium-6 { - width: 50%; - } - .w-col-medium-7 { - width: 58.33333333%; - } - .w-col-medium-8 { - width: 66.66666667%; - } - .w-col-medium-9 { - width: 75%; - } - .w-col-medium-10 { - width: 83.33333333%; - } - .w-col-medium-11 { - width: 91.66666667%; - } - .w-col-medium-12 { - width: 100%; - } - .w-col-stack { - width: 100%; - left: auto; - right: auto; - } - } - @media screen and (max-width: 767px) { - .w-hidden-main { - display: inherit !important; - } - .w-hidden-medium { - display: inherit !important; - } - .w-hidden-small { - display: none !important; - } - .w-row, - .w-container .w-row { - margin-left: 0; - margin-right: 0; - } - .w-col { - width: 100%; - left: auto; - right: auto; - } - .w-col-small-1 { - width: 8.33333333%; - } - .w-col-small-2 { - width: 16.66666667%; - } - .w-col-small-3 { - width: 25%; - } - .w-col-small-4 { - width: 33.33333333%; - } - .w-col-small-5 { - width: 41.66666667%; - } - .w-col-small-6 { - width: 50%; - } - .w-col-small-7 { - width: 58.33333333%; - } - .w-col-small-8 { - width: 66.66666667%; - } - .w-col-small-9 { - width: 75%; - } - .w-col-small-10 { - width: 83.33333333%; - } - .w-col-small-11 { - width: 91.66666667%; - } - .w-col-small-12 { - width: 100%; - } - } - @media screen and (max-width: 479px) { - .w-container { - max-width: none; - } - .w-hidden-main { - display: inherit !important; - } - .w-hidden-medium { - display: inherit !important; - } - .w-hidden-small { - display: inherit !important; - } - .w-hidden-tiny { - display: none !important; - } - .w-col { - width: 100%; - } - .w-col-tiny-1 { - width: 8.33333333%; - } - .w-col-tiny-2 { - width: 16.66666667%; - } - .w-col-tiny-3 { - width: 25%; - } - .w-col-tiny-4 { - width: 33.33333333%; - } - .w-col-tiny-5 { - width: 41.66666667%; - } - .w-col-tiny-6 { - width: 50%; - } - .w-col-tiny-7 { - width: 58.33333333%; - } - .w-col-tiny-8 { - width: 66.66666667%; - } - .w-col-tiny-9 { - width: 75%; - } - .w-col-tiny-10 { - width: 83.33333333%; - } - .w-col-tiny-11 { - width: 91.66666667%; - } - .w-col-tiny-12 { - width: 100%; - } - } diff --git a/hsml/docs/css/version-select.css b/hsml/docs/css/version-select.css deleted file mode 100644 index 3b908ae84..000000000 --- a/hsml/docs/css/version-select.css +++ /dev/null @@ -1,36 +0,0 @@ -@media only screen and (max-width:76.1875em) { -} - -#version-selector select.form-control { - appearance: none; - -webkit-appearance: none; - -moz-appearance: none; - - background-color: #F5F5F5; - - background-position: center right; - background-repeat: no-repeat; - border: 0px; - border-radius: 2px; - /* box-shadow: 0px 1px 3px rgb(0 0 0 / 10%); */ - color: inherit; - width: -webkit-fill-available; - width: -moz-available; - max-width: 200px; - font-size: inherit; - /* font-weight: 600; */ - margin: 10px; - overflow: hidden; - padding: 7px 10px; - text-overflow: ellipsis; - white-space: nowrap; -} - -#version-selector::after { - content: '⌄'; - font-family: inherit; - font-size: 22px; - margin: -35px; - vertical-align: 7%; - padding-bottom: 10px; -} diff --git a/hsml/docs/index.md b/hsml/docs/index.md deleted file mode 100644 index ee835ddc7..000000000 --- a/hsml/docs/index.md +++ /dev/null @@ -1,141 +0,0 @@ -# Hopsworks Model Management - -

- Hopsworks Community - Hopsworks Model Management Documentation - python - PyPiStatus - Scala/Java Artifacts - Downloads - Ruff - License -

- -HSML is the library to interact with the Hopsworks Model Registry and Model Serving. The library makes it easy to export, manage and deploy models. - -However, to connect from an external Python environment additional connection information, such as host and port, is required. - -## Getting Started On Hopsworks - -Get started easily by registering an account on [Hopsworks Serverless](https://app.hopsworks.ai/). Create your project and a [new Api key](https://docs.hopsworks.ai/latest/user_guides/projects/api_key/create_api_key/). In a new python environment with Python 3.8 or higher, install the [client library](https://docs.hopsworks.ai/latest/user_guides/client_installation/) using pip: - -```bash -# Get all Hopsworks SDKs: Feature Store, Model Serving and Platform SDK -pip install hopsworks -# or just the Model Registry and Model Serving SDK -pip install hsml -``` - -You can start a notebook and instantiate a connection and get the project feature store handler. - -```python -import hopsworks - -project = hopsworks.login() # you will be prompted for your api key - -mr = project.get_model_registry() -# or -ms = project.get_model_serving() -``` - -or using `hsml` directly: - -```python -import hsml - -connection = hsml.connection( - host="c.app.hopsworks.ai", # - project="your-project", - api_key_value="your-api-key", -) - -mr = connection.get_model_registry() -# or -ms = connection.get_model_serving() -``` - -Create a new model -```python -model = mr.tensorflow.create_model(name="mnist", - version=1, - metrics={"accuracy": 0.94}, - description="mnist model description") -model.save("/tmp/model_directory") # or /tmp/model_file -``` - -Download a model -```python -model = mr.get_model("mnist", version=1) - -model_path = model.download() -``` - -Delete a model -```python -model.delete() -``` - -Get best performing model -```python -best_model = mr.get_best_model('mnist', 'accuracy', 'max') - -``` - -Deploy a model -```python -deployment = model.deploy() -``` - -Start a deployment -```python -deployment.start() -``` - -Make predictions with a deployed model -```python -data = { "instances": [ model.input_example ] } - -predictions = deployment.predict(data) -``` - -# Tutorials - -You can find more examples on how to use the library in our [tutorials](https://github.com/logicalclocks/hopsworks-tutorials). - -## Documentation - -Documentation is available at [Hopsworks Model Management Documentation](https://docs.hopsworks.ai/). - -## Issues - -For general questions about the usage of Hopsworks Machine Learning please open a topic on [Hopsworks Community](https://community.hopsworks.ai/). -Please report any issue using [Github issue tracking](https://github.com/logicalclocks/machine-learning-api/issues). - - -## Contributing - -If you would like to contribute to this library, please see the [Contribution Guidelines](CONTRIBUTING.md). diff --git a/hsml/docs/js/dropdown.js b/hsml/docs/js/dropdown.js deleted file mode 100644 index b897ba36a..000000000 --- a/hsml/docs/js/dropdown.js +++ /dev/null @@ -1,2 +0,0 @@ -document.getElementsByClassName("md-tabs__link")[7].style.display = "none"; -document.getElementsByClassName("md-tabs__link")[9].style.display = "none"; \ No newline at end of file diff --git a/hsml/docs/js/inject-api-links.js b/hsml/docs/js/inject-api-links.js deleted file mode 100644 index 6c8a4a3b3..000000000 --- a/hsml/docs/js/inject-api-links.js +++ /dev/null @@ -1,31 +0,0 @@ -window.addEventListener("DOMContentLoaded", function () { - var windowPathNameSplits = window.location.pathname.split("/"); - var majorVersionRegex = new RegExp("(\\d+[.]\\d+)") - var latestRegex = new RegExp("latest"); - if (majorVersionRegex.test(windowPathNameSplits[1])) { // On landing page docs.hopsworks.api/3.0 - URL contains major version - // Version API dropdown - document.getElementById("hopsworks_api_link").href = "https://docs.hopsworks.ai/hopsworks-api/" + windowPathNameSplits[1] + "/generated/api/login/"; - document.getElementById("hsfs_api_link").href = "https://docs.hopsworks.ai/feature-store-api/" + windowPathNameSplits[1] + "/generated/api/connection_api/"; - document.getElementById("hsml_api_link").href = "https://docs.hopsworks.ai/machine-learning-api/" + windowPathNameSplits[1] + "/generated/connection_api/"; - } else { // on docs.hopsworks.api/feature-store-api/3.0 / docs.hopsworks.api/hopsworks-api/3.0 / docs.hopsworks.api/machine-learning-api/3.0 - if (latestRegex.test(windowPathNameSplits[2]) || latestRegex.test(windowPathNameSplits[1])) { - var majorVersion = "latest"; - } else { - var apiVersion = windowPathNameSplits[2]; - var majorVersion = apiVersion.match(majorVersionRegex)[0]; - } - // Version main navigation - document.getElementsByClassName("md-tabs__link")[0].href = "https://docs.hopsworks.ai/" + majorVersion; - document.getElementsByClassName("md-tabs__link")[1].href = "https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/quickstart.ipynb"; - document.getElementsByClassName("md-tabs__link")[2].href = "https://docs.hopsworks.ai/" + majorVersion + "/tutorials/"; - document.getElementsByClassName("md-tabs__link")[3].href = "https://docs.hopsworks.ai/" + majorVersion + "/concepts/hopsworks/"; - document.getElementsByClassName("md-tabs__link")[4].href = "https://docs.hopsworks.ai/" + majorVersion + "/user_guides/"; - document.getElementsByClassName("md-tabs__link")[5].href = "https://docs.hopsworks.ai/" + majorVersion + "/setup_installation/aws/getting_started/"; - document.getElementsByClassName("md-tabs__link")[6].href = "https://docs.hopsworks.ai/" + majorVersion + "/admin/"; - // Version API dropdown - document.getElementById("hopsworks_api_link").href = "https://docs.hopsworks.ai/hopsworks-api/" + majorVersion + "/generated/api/login/"; - document.getElementById("hsfs_api_link").href = "https://docs.hopsworks.ai/feature-store-api/" + majorVersion + "/generated/api/connection_api/"; - document.getElementById("hsfs_javadoc_link").href = "https://docs.hopsworks.ai/feature-store-api/" + majorVersion + "/javadoc"; - document.getElementById("hsml_api_link").href = "https://docs.hopsworks.ai/machine-learning-api/" + majorVersion + "/generated/connection_api/"; - } -}); diff --git a/hsml/docs/js/version-select.js b/hsml/docs/js/version-select.js deleted file mode 100644 index 9c8331660..000000000 --- a/hsml/docs/js/version-select.js +++ /dev/null @@ -1,64 +0,0 @@ -window.addEventListener("DOMContentLoaded", function() { - // This is a bit hacky. Figure out the base URL from a known CSS file the - // template refers to... - var ex = new RegExp("/?css/version-select.css$"); - var sheet = document.querySelector('link[href$="version-select.css"]'); - - var ABS_BASE_URL = sheet.href.replace(ex, ""); - var CURRENT_VERSION = ABS_BASE_URL.split("/").pop(); - - function makeSelect(options, selected) { - var select = document.createElement("select"); - select.classList.add("form-control"); - - options.forEach(function(i) { - var option = new Option(i.text, i.value, undefined, - i.value === selected); - select.add(option); - }); - - return select; - } - - var xhr = new XMLHttpRequest(); - xhr.open("GET", ABS_BASE_URL + "/../versions.json"); - xhr.onload = function() { - var versions = JSON.parse(this.responseText); - - var realVersion = versions.find(function(i) { - return i.version === CURRENT_VERSION || - i.aliases.includes(CURRENT_VERSION); - }).version; - var latestVersion = versions.find(function(i) { - return i.aliases.includes("latest"); - }).version; - let outdated_banner = document.querySelector('div[data-md-color-scheme="default"][data-md-component="outdated"]'); - if (realVersion !== latestVersion) { - outdated_banner.removeAttribute("hidden"); - } else { - outdated_banner.setAttribute("hidden", ""); - } - - var select = makeSelect(versions.map(function(i) { - var allowedAliases = ["dev", "latest"] - if (i.aliases.length > 0) { - var aliasString = " [" + i.aliases.filter(function (str) { return allowedAliases.includes(str); }).join(", ") + "]"; - } else { - var aliasString = ""; - } - return {text: i.title + aliasString, value: i.version}; - }), realVersion); - select.addEventListener("change", function(event) { - window.location.href = ABS_BASE_URL + "/../" + this.value + "/generated/connection_api/"; - }); - - var container = document.createElement("div"); - container.id = "version-selector"; - // container.className = "md-nav__item"; - container.appendChild(select); - - var sidebar = document.querySelector(".md-nav--primary > .md-nav__list"); - sidebar.parentNode.insertBefore(container, sidebar.nextSibling); - }; - xhr.send(); -}); diff --git a/hsml/docs/overrides/main.html b/hsml/docs/overrides/main.html deleted file mode 100644 index a1bc45bb5..000000000 --- a/hsml/docs/overrides/main.html +++ /dev/null @@ -1,8 +0,0 @@ -{% extends "base.html" %} - -{% block outdated %} -You're not viewing the latest version of the documentation. - - Click here to go to latest. - -{% endblock %} \ No newline at end of file diff --git a/hsml/docs/templates/connection_api.md b/hsml/docs/templates/connection_api.md deleted file mode 100644 index 19e13f3eb..000000000 --- a/hsml/docs/templates/connection_api.md +++ /dev/null @@ -1,11 +0,0 @@ -# Connection - -{{connection}} - -## Properties - -{{connection_properties}} - -## Methods - -{{connection_methods}} diff --git a/hsml/docs/templates/model-registry/links.md b/hsml/docs/templates/model-registry/links.md deleted file mode 100644 index 07abe3177..000000000 --- a/hsml/docs/templates/model-registry/links.md +++ /dev/null @@ -1,15 +0,0 @@ -# Provenance Links - -Provenance Links are objects returned by methods such as [get_feature_view_provenance](../model_api/#get_feature_view_provenance), [get_training_dataset_provenance](../model_api/#get_training_dataset_provenance). These methods use the provenance graph to return the parent feature view/training dataset of a model. These methods will return the actual instances of the feature view/training dataset if available. If the instance was deleted, or it belongs to a featurestore that the current project doesn't have access anymore, an Artifact object is returned. - -There is an additional method using the provenance graph: [get_feature_view](../model_api/#get_feature_view). This method wraps the `get_feature_view_provenance` and always returns a correct, usable Feature View object or throws an exception if the returned object is an Artifact. Thus an exception is thrown if the feature view was deleted or the featurestore it belongs to was unshared. -## Properties - -{{links_properties}} - -# Artifact - -Artifacts objects are part of the provenance graph and contain a minimal set of information regarding the entities (feature views, training datasets) they represent. -The provenance graph contains Artifact objects when the underlying entities have been deleted or they are corrupted or they are not accessible by the current project anymore. - -{{artifact_properties}} diff --git a/hsml/docs/templates/model-registry/model_api.md b/hsml/docs/templates/model-registry/model_api.md deleted file mode 100644 index edb2e5ade..000000000 --- a/hsml/docs/templates/model-registry/model_api.md +++ /dev/null @@ -1,29 +0,0 @@ -# Model - -## Creation of a TensorFlow model - -{{ml_create_tf}} - -## Creation of a Torch model - -{{ml_create_th}} - -## Creation of a scikit-learn model - -{{ml_create_sl}} - -## Creation of a generic model - -{{ml_create_py}} - -## Retrieval - -{{ml_get}} - -## Properties - -{{ml_properties}} - -## Methods - -{{ml_methods}} diff --git a/hsml/docs/templates/model-registry/model_registry_api.md b/hsml/docs/templates/model-registry/model_registry_api.md deleted file mode 100644 index d577e91e3..000000000 --- a/hsml/docs/templates/model-registry/model_registry_api.md +++ /dev/null @@ -1,17 +0,0 @@ -# Model Registry - -## Retrieval - -{{mr_get}} - -## Modules - -{{mr_modules}} - -## Properties - -{{mr_properties}} - -## Methods - -{{mr_methods}} diff --git a/hsml/docs/templates/model-registry/model_schema_api.md b/hsml/docs/templates/model-registry/model_schema_api.md deleted file mode 100644 index 28170a419..000000000 --- a/hsml/docs/templates/model-registry/model_schema_api.md +++ /dev/null @@ -1,36 +0,0 @@ -# Model Schema - -## Creation - -To create a ModelSchema, the schema of the Model inputs and/or Model ouputs has to be defined beforehand. - -{{schema}} - -After defining the Model inputs and/or outputs schemas, a ModelSchema can be created using its class constructor. - -{{model_schema}} - -## Retrieval - -### Model Schema - -Model schemas can be accessed from the model metadata objects. - -``` python -model.model_schema -``` - -### Model Input & Ouput Schemas - -The schemas of the Model inputs and outputs can be accessed from the ModelSchema metadata objects. - -``` python -model_schema.input_schema -model_schema.output_schema -``` - -## Methods - -{{schema_dict}} - -{{model_schema_dict}} diff --git a/hsml/docs/templates/model-serving/deployment_api.md b/hsml/docs/templates/model-serving/deployment_api.md deleted file mode 100644 index aebccca55..000000000 --- a/hsml/docs/templates/model-serving/deployment_api.md +++ /dev/null @@ -1,25 +0,0 @@ -# Deployment - -## Handle - -{{ms_get_model_serving}} - -## Creation - -{{ms_create_deployment}} - -{{m_deploy}} - -{{p_deploy}} - -## Retrieval - -{{ms_get_deployments}} - -## Properties - -{{dep_properties}} - -## Methods - -{{dep_methods}} diff --git a/hsml/docs/templates/model-serving/inference_batcher_api.md b/hsml/docs/templates/model-serving/inference_batcher_api.md deleted file mode 100644 index 3a2609962..000000000 --- a/hsml/docs/templates/model-serving/inference_batcher_api.md +++ /dev/null @@ -1,25 +0,0 @@ -# Inference batcher - -## Creation - -{{ib}} - -## Retrieval - -### predictor.inference_batcher - -Inference batchers can be accessed from the predictor metadata objects. - -``` python -predictor.inference_batcher -``` - -Predictors can be found in the deployment metadata objects (see [Predictor Reference](../predictor_api/#retrieval)). To retrieve a deployment, see the [Deployment Reference](../deployment_api/#retrieval). - -## Properties - -{{ib_properties}} - -## Methods - -{{ib_methods}} diff --git a/hsml/docs/templates/model-serving/inference_logger_api.md b/hsml/docs/templates/model-serving/inference_logger_api.md deleted file mode 100644 index 2cf68d652..000000000 --- a/hsml/docs/templates/model-serving/inference_logger_api.md +++ /dev/null @@ -1,25 +0,0 @@ -# Inference logger - -## Creation - -{{il}} - -## Retrieval - -### predictor.inference_logger - -Inference loggers can be accessed from the predictor metadata objects. - -``` python -predictor.inference_logger -``` - -Predictors can be found in the deployment metadata objects (see [Predictor Reference](../predictor_api/#retrieval)). To retrieve a deployment, see the [Deployment Reference](../deployment_api/#retrieval). - -## Properties - -{{il_properties}} - -## Methods - -{{il_methods}} diff --git a/hsml/docs/templates/model-serving/model_serving_api.md b/hsml/docs/templates/model-serving/model_serving_api.md deleted file mode 100644 index 0eb557213..000000000 --- a/hsml/docs/templates/model-serving/model_serving_api.md +++ /dev/null @@ -1,13 +0,0 @@ -# Model Serving - -## Retrieval - -{{ms_get}} - -## Properties - -{{ms_properties}} - -## Methods - -{{ms_methods}} diff --git a/hsml/docs/templates/model-serving/predictor_api.md b/hsml/docs/templates/model-serving/predictor_api.md deleted file mode 100644 index 3dd9df195..000000000 --- a/hsml/docs/templates/model-serving/predictor_api.md +++ /dev/null @@ -1,29 +0,0 @@ -# Predictor - -## Handle - -{{ms_get_model_serving}} - -## Creation - -{{ms_create_predictor}} - -## Retrieval - -### deployment.predictor - -Predictors can be accessed from the deployment metadata objects. - -``` python -deployment.predictor -``` - -To retrieve a deployment, see the [Deployment Reference](../deployment_api/#retrieval). - -## Properties - -{{pred_properties}} - -## Methods - -{{pred_methods}} diff --git a/hsml/docs/templates/model-serving/predictor_state_api.md b/hsml/docs/templates/model-serving/predictor_state_api.md deleted file mode 100644 index 2640b9b48..000000000 --- a/hsml/docs/templates/model-serving/predictor_state_api.md +++ /dev/null @@ -1,18 +0,0 @@ -# Deployment state - -The state of a deployment corresponds to the state of the predictor configured in it. - -!!! note - Currently, only one predictor is supported in a deployment. Support for multiple predictors (the inference graphs) is coming soon. - -## Retrieval - -{{ps_get}} - -## Properties - -{{ps_properties}} - -## Methods - -{{ps_methods}} diff --git a/hsml/docs/templates/model-serving/predictor_state_condition_api.md b/hsml/docs/templates/model-serving/predictor_state_condition_api.md deleted file mode 100644 index e1566d2b1..000000000 --- a/hsml/docs/templates/model-serving/predictor_state_condition_api.md +++ /dev/null @@ -1,15 +0,0 @@ -# Deployment state condition - -The state condition of a deployment is a more detailed representation of a deployment state. - -## Retrieval - -{{psc_get}} - -## Properties - -{{psc_properties}} - -## Methods - -{{psc_methods}} diff --git a/hsml/docs/templates/model-serving/resources_api.md b/hsml/docs/templates/model-serving/resources_api.md deleted file mode 100644 index addc7f51e..000000000 --- a/hsml/docs/templates/model-serving/resources_api.md +++ /dev/null @@ -1,35 +0,0 @@ -# Resources - -## Creation - -{{res}} - -## Retrieval - -### predictor.resources - -Resources allocated for a preditor can be accessed from the predictor metadata object. - -``` python -predictor.resources -``` - -Predictors can be found in the deployment metadata objects (see [Predictor Reference](../predictor_api/#retrieval)). To retrieve a deployment, see the [Deployment Reference](../deployment_api/#retrieval). - -### transformer.resources - -Resources allocated for a transformer can be accessed from the transformer metadata object. - -``` python -transformer.resources -``` - -Transformer can be found in the predictor metadata objects (see [Predictor Reference](../predictor_api/#retrieval)). - -## Properties - -{{res_properties}} - -## Methods - -{{res_methods}} diff --git a/hsml/docs/templates/model-serving/transformer_api.md b/hsml/docs/templates/model-serving/transformer_api.md deleted file mode 100644 index ae81e84ef..000000000 --- a/hsml/docs/templates/model-serving/transformer_api.md +++ /dev/null @@ -1,29 +0,0 @@ -# Transformer - -## Handle - -{{ms_get_model_serving}} - -## Creation - -{{ms_create_transformer}} - -## Retrieval - -### predictor.transformer - -Transformers can be accessed from the predictor metadata objects. - -``` python -predictor.transformer -``` - -Predictors can be found in the deployment metadata objects (see [Predictor Reference](../predictor_api/#retrieval)). To retrieve a deployment, see the [Deployment Reference](../deployment_api/#retrieval). - -## Properties - -{{trans_properties}} - -## Methods - -{{trans_methods}} diff --git a/hsml/java/pom.xml b/hsml/java/pom.xml deleted file mode 100644 index cb3e60028..000000000 --- a/hsml/java/pom.xml +++ /dev/null @@ -1,109 +0,0 @@ - - - 4.0.0 - - com.logicalclocks - hsml - 4.0.0-SNAPSHOT - - - 1.8 - 1.8 - - - - - - org.scala-tools - maven-scala-plugin - - - scala-compile-first - process-resources - - add-source - compile - - - - scala-test-compile - process-test-resources - - testCompile - - - - - - org.apache.maven.plugins - maven-assembly-plugin - 2.4.1 - - - - jar-with-dependencies - - - - - make-assembly - - package - - single - - - - - - org.apache.maven.plugins - maven-checkstyle-plugin - 3.1.1 - - - validate - validate - - check - - - - - src/main/resources/checkstyle.xml - src/main/resources/suppressions.xml - true - true - true - true - - src/main/java - - - - - - - - - Hops - Hops Repo - https://archiva.hops.works/repository/Hops/ - - true - - - true - - - - - - - Hops - Hops Repo - https://archiva.hops.works/repository/Hops/ - - - diff --git a/hsml/java/src/main/resources/checkstyle.xml b/hsml/java/src/main/resources/checkstyle.xml deleted file mode 100644 index 5f99eb681..000000000 --- a/hsml/java/src/main/resources/checkstyle.xml +++ /dev/null @@ -1,312 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/hsml/java/src/main/resources/suppressions.xml b/hsml/java/src/main/resources/suppressions.xml deleted file mode 100644 index a86fa8219..000000000 --- a/hsml/java/src/main/resources/suppressions.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - \ No newline at end of file diff --git a/hsml/mkdocs.yml b/hsml/mkdocs.yml deleted file mode 100644 index f20a7b1c5..000000000 --- a/hsml/mkdocs.yml +++ /dev/null @@ -1,120 +0,0 @@ -site_name: "Hopsworks Documentation" -site_description: "Official documentation for Hopsworks and its Feature Store - an open source data-intensive AI platform used for the development and operation of machine learning models at scale." -site_author: "Logical Clocks" -site_url: "https://docs.hopsworks.ai/machine-learning-api/latest" - -# Repository -repo_name: logicalclocks/hopsworks -repo_url: https://github.com/logicalclocks/hopsworks -edit_uri: "" - -nav: - - Home: https://docs.hopsworks.ai/ - - Getting Started ↗: https://docs.hopsworks.ai/ - - Tutorials: https://docs.hopsworks.ai/ - - Concepts: https://docs.hopsworks.ai/ - - Guides: https://docs.hopsworks.ai/ - - Setup and Installation: https://docs.hopsworks.ai/ - - Administration: https://docs.hopsworks.ai/ - - API: - - API Reference: - - Connection: generated/connection_api.md - - Model Registry: - - Model Registry: generated/model-registry/model_registry_api.md - - Model: generated/model-registry/model_api.md - - Model Schema: generated/model-registry/model_schema_api.md - - Model Serving: - - Model Serving: generated/model-serving/model_serving_api.md - - Deployment: generated/model-serving/deployment_api.md - - Deployment state: generated/model-serving/predictor_state_api.md - - Deployment state condition: generated/model-serving/predictor_state_condition_api.md - - Predictor: generated/model-serving/predictor_api.md - - Transformer: generated/model-serving/transformer_api.md - - Inference Logger: generated/model-serving/inference_logger_api.md - - Inference Batcher: generated/model-serving/inference_batcher_api.md - - Resources: generated/model-serving/resources_api.md - # Added to allow navigation using the side drawer - - Hopsworks API: https://docs.hopsworks.ai/ - - Feature Store API: https://docs.hopsworks.ai/ - - Feature Store JavaDoc: https://docs.hopsworks.ai/ - - Contributing: CONTRIBUTING.md - - Community ↗: https://community.hopsworks.ai/ - -theme: - name: material - custom_dir: docs/overrides - favicon: assets/images/favicon.ico - logo: assets/images/hops-logo.png - icon: - repo: fontawesome/brands/github - font: - text: "Roboto" - code: "IBM Plex Mono" - palette: - accent: teal - scheme: hopsworks - features: - - navigation.tabs - - navigation.tabs.sticky - - navigation.expand - - -extra: - analytics: - provider: google - property: G-64FEEXPSDN - generator: false - version: - - provider: mike - - version: latest - social: - - icon: fontawesome/brands/twitter - link: https://twitter.com/hopsworks - - icon: fontawesome/brands/github - link: https://github.com/logicalclocks/hopsworks - - icon: fontawesome/brands/discourse - link: https://community.hopsworks.ai/ - - icon: fontawesome/brands/linkedin - link: https://www.linkedin.com/company/hopsworks/ - -extra_css: - - css/custom.css - - css/version-select.css - - css/dropdown.css - - css/marctech.css - -extra_javascript: - - js/version-select.js - - js/inject-api-links.js - - js/dropdown.js - -plugins: - - search - - minify: - minify_html: true - minify_css: true - minify_js: true - - mike: - canonical_version: latest - -markdown_extensions: - - admonition - - codehilite - - footnotes - - pymdownx.tabbed: - alternate_style: true - - pymdownx.arithmatex - - pymdownx.superfences - - pymdownx.details - - pymdownx.caret - - pymdownx.mark - - pymdownx.tilde - - pymdownx.critic - - attr_list - - md_in_html - - toc: - permalink: "#" - - pymdownx.tasklist: - custom_checkbox: true - - markdown_include.include: - base_path: docs diff --git a/hsml/python/.pre-commit-config.yaml b/hsml/python/.pre-commit-config.yaml deleted file mode 100644 index 645dcf677..000000000 --- a/hsml/python/.pre-commit-config.yaml +++ /dev/null @@ -1,10 +0,0 @@ -exclude: setup.py -repos: - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.2 - hooks: - # Run the linter - - id: ruff - args: [--fix] - # Run the formatter - - id: ruff-format diff --git a/hsml/python/hsml/__init__.py b/hsml/python/hsml/__init__.py deleted file mode 100644 index 4fb8156e3..000000000 --- a/hsml/python/hsml/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import warnings - -from hsml import util, version -from hsml.connection import Connection - - -connection = Connection.connection - -__version__ = version.__version__ - - -def ml_formatwarning(message, category, filename, lineno, line=None): - return "{}: {}\n".format(category.__name__, message) - - -warnings.formatwarning = ml_formatwarning -warnings.simplefilter("always", util.VersionWarning) - -__all__ = ["connection"] diff --git a/hsml/python/hsml/client/__init__.py b/hsml/python/hsml/client/__init__.py deleted file mode 100644 index 3982f0c56..000000000 --- a/hsml/python/hsml/client/__init__.py +++ /dev/null @@ -1,152 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from hsml.client.hopsworks import base as hw_base -from hsml.client.hopsworks import external as hw_external -from hsml.client.hopsworks import internal as hw_internal -from hsml.client.istio import base as ist_base -from hsml.client.istio import external as ist_external -from hsml.client.istio import internal as ist_internal -from hsml.connection import CONNECTION_SAAS_HOSTNAME - - -_client_type = None -_saas_connection = None - -_hopsworks_client = None -_istio_client = None - -_kserve_installed = None -_serving_resource_limits = None -_serving_num_instances_limits = None -_knative_domain = None - - -def init( - client_type, - host=None, - port=None, - project=None, - hostname_verification=None, - trust_store_path=None, - api_key_file=None, - api_key_value=None, -): - global _client_type - _client_type = client_type - - global _saas_connection - _saas_connection = host == CONNECTION_SAAS_HOSTNAME - - global _hopsworks_client - if not _hopsworks_client: - if client_type == "internal": - _hopsworks_client = hw_internal.Client() - elif client_type == "external": - _hopsworks_client = hw_external.Client( - host, - port, - project, - hostname_verification, - trust_store_path, - api_key_file, - api_key_value, - ) - - -def get_instance() -> hw_base.Client: - global _hopsworks_client - if _hopsworks_client: - return _hopsworks_client - raise Exception("Couldn't find client. Try reconnecting to Hopsworks.") - - -def set_istio_client(host, port, project=None, api_key_value=None): - global _client_type, _istio_client - - if not _istio_client: - if _client_type == "internal": - _istio_client = ist_internal.Client(host, port) - elif _client_type == "external": - _istio_client = ist_external.Client(host, port, project, api_key_value) - - -def get_istio_instance() -> ist_base.Client: - global _istio_client - return _istio_client - - -def get_client_type() -> str: - global _client_type - return _client_type - - -def is_saas_connection() -> bool: - global _saas_connection - return _saas_connection - - -def set_kserve_installed(kserve_installed): - global _kserve_installed - _kserve_installed = kserve_installed - - -def is_kserve_installed() -> bool: - global _kserve_installed - return _kserve_installed - - -def set_serving_resource_limits(max_resources): - global _serving_resource_limits - _serving_resource_limits = max_resources - - -def get_serving_resource_limits(): - global _serving_resource_limits - return _serving_resource_limits - - -def set_serving_num_instances_limits(num_instances_range): - global _serving_num_instances_limits - _serving_num_instances_limits = num_instances_range - - -def get_serving_num_instances_limits(): - global _serving_num_instances_limits - return _serving_num_instances_limits - - -def is_scale_to_zero_required(): - # scale-to-zero is required for KServe deployments if the Hopsworks variable `kube_serving_min_num_instances` - # is set to 0. Other possible values are -1 (unlimited num instances) or >1 num instances. - return get_serving_num_instances_limits()[0] == 0 - - -def get_knative_domain(): - global _knative_domain - return _knative_domain - - -def set_knative_domain(knative_domain): - global _knative_domain - _knative_domain = knative_domain - - -def stop(): - global _hopsworks_client, _istio_client - _hopsworks_client._close() - _istio_client._close() - _hopsworks_client = _istio_client = None diff --git a/hsml/python/hsml/client/auth.py b/hsml/python/hsml/client/auth.py deleted file mode 100644 index 696aaad2e..000000000 --- a/hsml/python/hsml/client/auth.py +++ /dev/null @@ -1,64 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os - -import requests -from hsml.client import exceptions - - -class BearerAuth(requests.auth.AuthBase): - """Class to encapsulate a Bearer token.""" - - def __init__(self, token): - self._token = token - - def __call__(self, r): - r.headers["Authorization"] = "Bearer " + self._token - return r - - -class ApiKeyAuth(requests.auth.AuthBase): - """Class to encapsulate an API key.""" - - def __init__(self, token): - self._token = token - - def __call__(self, r): - r.headers["Authorization"] = "ApiKey " + self._token - return r - - -def get_api_key(api_key_value, api_key_file): - if api_key_value is not None: - return api_key_value - elif api_key_file is not None: - file = None - if os.path.exists(api_key_file): - try: - file = open(api_key_file, mode="r") - return file.read() - finally: - file.close() - else: - raise IOError( - "Could not find api key file on path: {}".format(api_key_file) - ) - else: - raise exceptions.ExternalClientError( - "Either api_key_file or api_key_value must be set when connecting to" - " hopsworks from an external environment." - ) diff --git a/hsml/python/hsml/client/base.py b/hsml/python/hsml/client/base.py deleted file mode 100644 index d36e366c5..000000000 --- a/hsml/python/hsml/client/base.py +++ /dev/null @@ -1,119 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from abc import ABC, abstractmethod - -import furl -import requests -import urllib3 -from hsml.client import exceptions -from hsml.decorators import connected - - -urllib3.disable_warnings(urllib3.exceptions.SecurityWarning) -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - - -class Client(ABC): - @abstractmethod - def __init__(self): - """To be implemented by clients.""" - pass - - @abstractmethod - def _get_verify(self, verify, trust_store_path): - """To be implemented by clients.""" - pass - - @abstractmethod - def _get_retry(self, session, request, response): - """To be implemented by clients.""" - pass - - @abstractmethod - def _get_host_port_pair(self): - """To be implemented by clients.""" - pass - - @connected - def _send_request( - self, - method, - path_params, - query_params=None, - headers=None, - data=None, - stream=False, - files=None, - ): - """Send REST request to a REST endpoint. - - Uses the client it is executed from. Path parameters are url encoded automatically. - - :param method: 'GET', 'PUT' or 'POST' - :type method: str - :param path_params: a list of path params to build the query url from starting after - the api resource, for example `["project", 119]`. - :type path_params: list - :param query_params: A dictionary of key/value pairs to be added as query parameters, - defaults to None - :type query_params: dict, optional - :param headers: Additional header information, defaults to None - :type headers: dict, optional - :param data: The payload as a python dictionary to be sent as json, defaults to None - :type data: dict, optional - :param stream: Set if response should be a stream, defaults to False - :type stream: boolean, optional - :param files: dictionary for multipart encoding upload - :type files: dict, optional - :raises RestAPIError: Raised when request wasn't correctly received, understood or accepted - :return: Response json - :rtype: dict - """ - f_url = furl.furl(self._base_url) - f_url.path.segments = self.BASE_PATH_PARAMS + path_params - url = str(f_url) - request = requests.Request( - method, - url=url, - headers=headers, - data=data, - params=query_params, - auth=self._auth, - files=files, - ) - - prepped = self._session.prepare_request(request) - response = self._session.send(prepped, verify=self._verify, stream=stream) - - if self._get_retry(request, response): - prepped = self._session.prepare_request(request) - response = self._session.send(prepped, verify=self._verify, stream=stream) - - if response.status_code // 100 != 2: - raise exceptions.RestAPIError(url, response) - - if stream: - return response - else: - # handle different success response codes - if len(response.content) == 0: - return None - return response.json() - - def _close(self): - """Closes a client. Can be implemented for clean up purposes, not mandatory.""" - self._connected = False diff --git a/hsml/python/hsml/client/exceptions.py b/hsml/python/hsml/client/exceptions.py deleted file mode 100644 index 6a59909db..000000000 --- a/hsml/python/hsml/client/exceptions.py +++ /dev/null @@ -1,85 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - -class RestAPIError(Exception): - """REST Exception encapsulating the response object and url.""" - - def __init__(self, url, response): - try: - error_object = response.json() - except Exception: - self.error_code = error_object = None - - message = ( - "Metadata operation error: (url: {}). Server response: \n" - "HTTP code: {}, HTTP reason: {}, body: {}".format( - url, - response.status_code, - response.reason, - response.content, - ) - ) - - if error_object is not None: - self.error_code = error_object.get("errorCode", "") - message += ", error code: {}, error msg: {}, user msg: {}".format( - self.error_code, - error_object.get("errorMsg", ""), - error_object.get("usrMsg", ""), - ) - - super().__init__(message) - self.url = url - self.response = response - - STATUS_CODE_BAD_REQUEST = 400 - STATUS_CODE_UNAUTHORIZED = 401 - STATUS_CODE_FORBIDDEN = 403 - STATUS_CODE_NOT_FOUND = 404 - STATUS_CODE_INTERNAL_SERVER_ERROR = 500 - - -class UnknownSecretStorageError(Exception): - """This exception will be raised if an unused secrets storage is passed as a parameter.""" - - -class ModelRegistryException(Exception): - """Generic model registry exception""" - - -class ModelServingException(Exception): - """Generic model serving exception""" - - ERROR_CODE_SERVING_NOT_FOUND = 240000 - ERROR_CODE_ILLEGAL_ARGUMENT = 240001 - ERROR_CODE_DUPLICATED_ENTRY = 240011 - - ERROR_CODE_DEPLOYMENT_NOT_RUNNING = 250001 - - -class InternalClientError(TypeError): - """Raised when internal client cannot be initialized due to missing arguments.""" - - def __init__(self, message): - super().__init__(message) - - -class ExternalClientError(TypeError): - """Raised when external client cannot be initialized due to missing arguments.""" - - def __init__(self, message): - super().__init__(message) diff --git a/hsml/python/hsml/client/hopsworks/__init__.py b/hsml/python/hsml/client/hopsworks/__init__.py deleted file mode 100644 index 7fa8fd556..000000000 --- a/hsml/python/hsml/client/hopsworks/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/hsml/python/hsml/client/hopsworks/base.py b/hsml/python/hsml/client/hopsworks/base.py deleted file mode 100644 index a0326b2d5..000000000 --- a/hsml/python/hsml/client/hopsworks/base.py +++ /dev/null @@ -1,111 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os -from abc import abstractmethod - -from hsml.client import auth, base - - -class Client(base.Client): - TOKEN_FILE = "token.jwt" - APIKEY_FILE = "api.key" - REST_ENDPOINT = "REST_ENDPOINT" - HOPSWORKS_PUBLIC_HOST = "HOPSWORKS_PUBLIC_HOST" - - BASE_PATH_PARAMS = ["hopsworks-api", "api"] - - @abstractmethod - def __init__(self): - """To be extended by clients.""" - pass - - def _get_verify(self, verify, trust_store_path): - """Get verification method for sending HTTP requests to Hopsworks. - - Credit to https://gist.github.com/gdamjan/55a8b9eec6cf7b771f92021d93b87b2c - - :param verify: perform hostname verification, 'true' or 'false' - :type verify: str - :param trust_store_path: path of the truststore locally if it was uploaded manually to - the external environment - :type trust_store_path: str - :return: if verify is true and the truststore is provided, then return the trust store location - if verify is true but the truststore wasn't provided, then return true - if verify is false, then return false - :rtype: str or boolean - """ - if verify == "true": - if trust_store_path is not None: - return trust_store_path - else: - return True - - return False - - def _get_retry(self, request, response): - """Get retry method for resending HTTP requests to Hopsworks - - :param request: original HTTP request already sent - :type request: requests.Request - :param response: response of the original HTTP request - :type response: requests.Response - """ - if response.status_code == 401 and self.REST_ENDPOINT in os.environ: - # refresh token and retry request - only on hopsworks - self._auth = auth.BearerAuth(self._read_jwt()) - # Update request with the new token - request.auth = self._auth - # retry request - return True - return False - - def _get_host_port_pair(self): - """ - Removes "http or https" from the rest endpoint and returns a list - [endpoint, port], where endpoint is on the format /path.. without http:// - - :return: a list [endpoint, port] - :rtype: list - """ - endpoint = self._base_url - if endpoint.startswith("http"): - last_index = endpoint.rfind("/") - endpoint = endpoint[last_index + 1 :] - host, port = endpoint.split(":") - return host, port - - def _read_jwt(self): - """Retrieve jwt from local container.""" - return self._read_file(self.TOKEN_FILE) - - def _read_apikey(self): - """Retrieve apikey from local container.""" - return self._read_file(self.APIKEY_FILE) - - def _read_file(self, secret_file): - """Retrieve secret from local container.""" - with open(os.path.join(self._secrets_dir, secret_file), "r") as secret: - return secret.read() - - def _close(self): - """Closes a client. Can be implemented for clean up purposes, not mandatory.""" - self._connected = False - - def _replace_public_host(self, url): - """replace hostname to public hostname set in HOPSWORKS_PUBLIC_HOST""" - ui_url = url._replace(netloc=os.environ[self.HOPSWORKS_PUBLIC_HOST]) - return ui_url diff --git a/hsml/python/hsml/client/hopsworks/external.py b/hsml/python/hsml/client/hopsworks/external.py deleted file mode 100644 index 6da14a4d3..000000000 --- a/hsml/python/hsml/client/hopsworks/external.py +++ /dev/null @@ -1,85 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import requests -from hsml.client import auth, exceptions -from hsml.client.hopsworks import base as hopsworks - - -class Client(hopsworks.Client): - def __init__( - self, - host, - port, - project, - hostname_verification, - trust_store_path, - api_key_file, - api_key_value, - ): - """Initializes a client in an external environment.""" - if not host: - raise exceptions.ExternalClientError( - "host cannot be of type NoneType, host is a non-optional " - "argument to connect to hopsworks from an external environment." - ) - if not project: - raise exceptions.ExternalClientError( - "project cannot be of type NoneType, project is a non-optional " - "argument to connect to hopsworks from an external environment." - ) - - self._host = host - self._port = port - self._base_url = "https://" + self._host + ":" + str(self._port) - self._project_name = project - - api_key = auth.get_api_key(api_key_value, api_key_file) - self._auth = auth.ApiKeyAuth(api_key) - - self._session = requests.session() - self._connected = True - self._verify = self._get_verify(self._host, trust_store_path) - - if self._project_name is not None: - project_info = self._get_project_info(self._project_name) - self._project_id = str(project_info["projectId"]) - else: - self._project_id = None - - self._cert_key = None - - def _close(self): - """Closes a client.""" - self._connected = False - - def _get_project_info(self, project_name): - """Makes a REST call to hopsworks to get all metadata of a project for the provided project. - - :param project_name: the name of the project - :type project_name: str - :return: JSON response with project info - :rtype: dict - """ - return self._send_request("GET", ["project", "getProjectInfo", project_name]) - - def _replace_public_host(self, url): - """no need to replace as we are already in external client""" - return url - - @property - def host(self): - return self._host diff --git a/hsml/python/hsml/client/hopsworks/internal.py b/hsml/python/hsml/client/hopsworks/internal.py deleted file mode 100644 index 760251540..000000000 --- a/hsml/python/hsml/client/hopsworks/internal.py +++ /dev/null @@ -1,208 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import base64 -import os -import textwrap -from pathlib import Path - -import requests -from hsml.client import auth -from hsml.client.hopsworks import base as hopsworks - - -try: - import jks -except ImportError: - pass - - -class Client(hopsworks.Client): - REQUESTS_VERIFY = "REQUESTS_VERIFY" - DOMAIN_CA_TRUSTSTORE_PEM = "DOMAIN_CA_TRUSTSTORE_PEM" - PROJECT_ID = "HOPSWORKS_PROJECT_ID" - PROJECT_NAME = "HOPSWORKS_PROJECT_NAME" - HADOOP_USER_NAME = "HADOOP_USER_NAME" - MATERIAL_DIRECTORY = "MATERIAL_DIRECTORY" - HDFS_USER = "HDFS_USER" - T_CERTIFICATE = "t_certificate" - K_CERTIFICATE = "k_certificate" - TRUSTSTORE_SUFFIX = "__tstore.jks" - KEYSTORE_SUFFIX = "__kstore.jks" - PEM_CA_CHAIN = "ca_chain.pem" - CERT_KEY_SUFFIX = "__cert.key" - MATERIAL_PWD = "material_passwd" - SECRETS_DIR = "SECRETS_DIR" - - def __init__(self): - """Initializes a client being run from a job/notebook directly on Hopsworks.""" - self._base_url = self._get_hopsworks_rest_endpoint() - self._host, self._port = self._get_host_port_pair() - self._secrets_dir = ( - os.environ[self.SECRETS_DIR] if self.SECRETS_DIR in os.environ else "" - ) - self._cert_key = self._get_cert_pw() - trust_store_path = self._get_trust_store_path() - hostname_verification = ( - os.environ[self.REQUESTS_VERIFY] - if self.REQUESTS_VERIFY in os.environ - else "true" - ) - self._project_id = os.environ[self.PROJECT_ID] - self._project_name = self._project_name() - try: - self._auth = auth.BearerAuth(self._read_jwt()) - except FileNotFoundError: - self._auth = auth.ApiKeyAuth(self._read_apikey()) - self._verify = self._get_verify(hostname_verification, trust_store_path) - self._session = requests.session() - - self._connected = True - - def _get_hopsworks_rest_endpoint(self): - """Get the hopsworks REST endpoint for making requests to the REST API.""" - return os.environ[self.REST_ENDPOINT] - - def _get_trust_store_path(self): - """Convert truststore from jks to pem and return the location""" - ca_chain_path = Path(self.PEM_CA_CHAIN) - if not ca_chain_path.exists(): - self._write_ca_chain(ca_chain_path) - return str(ca_chain_path) - - def _write_ca_chain(self, ca_chain_path): - """ - Converts JKS trustore file into PEM to be compatible with Python libraries - """ - keystore_pw = self._cert_key - keystore_ca_cert = self._convert_jks_to_pem( - self._get_jks_key_store_path(), keystore_pw - ) - truststore_ca_cert = self._convert_jks_to_pem( - self._get_jks_trust_store_path(), keystore_pw - ) - - with ca_chain_path.open("w") as f: - f.write(keystore_ca_cert + truststore_ca_cert) - - def _convert_jks_to_pem(self, jks_path, keystore_pw): - """ - Converts a keystore JKS that contains client private key, - client certificate and CA certificate that was used to - sign the certificate to PEM format and returns the CA certificate. - Args: - :jks_path: path to the JKS file - :pw: password for decrypting the JKS file - Returns: - strings: (ca_cert) - """ - # load the keystore and decrypt it with password - ks = jks.KeyStore.load(jks_path, keystore_pw, try_decrypt_keys=True) - ca_certs = "" - - # Convert CA Certificates into PEM format and append to string - for _alias, c in ks.certs.items(): - ca_certs = ca_certs + self._bytes_to_pem_str(c.cert, "CERTIFICATE") - return ca_certs - - def _bytes_to_pem_str(self, der_bytes, pem_type): - """ - Utility function for creating PEM files - - Args: - der_bytes: DER encoded bytes - pem_type: type of PEM, e.g Certificate, Private key, or RSA private key - - Returns: - PEM String for a DER-encoded certificate or private key - """ - pem_str = "" - pem_str = pem_str + "-----BEGIN {}-----".format(pem_type) + "\n" - pem_str = ( - pem_str - + "\r\n".join( - textwrap.wrap(base64.b64encode(der_bytes).decode("ascii"), 64) - ) - + "\n" - ) - pem_str = pem_str + "-----END {}-----".format(pem_type) + "\n" - return pem_str - - def _get_jks_trust_store_path(self): - """ - Get truststore location - - Returns: - truststore location - """ - t_certificate = Path(self.T_CERTIFICATE) - if t_certificate.exists(): - return str(t_certificate) - else: - username = os.environ[self.HADOOP_USER_NAME] - material_directory = Path(os.environ[self.MATERIAL_DIRECTORY]) - return str(material_directory.joinpath(username + self.TRUSTSTORE_SUFFIX)) - - def _get_jks_key_store_path(self): - """ - Get keystore location - - Returns: - keystore location - """ - k_certificate = Path(self.K_CERTIFICATE) - if k_certificate.exists(): - return str(k_certificate) - else: - username = os.environ[self.HADOOP_USER_NAME] - material_directory = Path(os.environ[self.MATERIAL_DIRECTORY]) - return str(material_directory.joinpath(username + self.KEYSTORE_SUFFIX)) - - def _project_name(self): - try: - return os.environ[self.PROJECT_NAME] - except KeyError: - pass - - hops_user = self._project_user() - hops_user_split = hops_user.split( - "__" - ) # project users have username project__user - project = hops_user_split[0] - return project - - def _project_user(self): - try: - hops_user = os.environ[self.HADOOP_USER_NAME] - except KeyError: - hops_user = os.environ[self.HDFS_USER] - return hops_user - - def _get_cert_pw(self): - """ - Get keystore password from local container - - Returns: - Certificate password - """ - pwd_path = Path(self.MATERIAL_PWD) - if not pwd_path.exists(): - username = os.environ[self.HADOOP_USER_NAME] - material_directory = Path(os.environ[self.MATERIAL_DIRECTORY]) - pwd_path = material_directory.joinpath(username + self.CERT_KEY_SUFFIX) - - with pwd_path.open() as f: - return f.read() diff --git a/hsml/python/hsml/client/istio/__init__.py b/hsml/python/hsml/client/istio/__init__.py deleted file mode 100644 index 7fa8fd556..000000000 --- a/hsml/python/hsml/client/istio/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/hsml/python/hsml/client/istio/base.py b/hsml/python/hsml/client/istio/base.py deleted file mode 100644 index 9aaab9ba0..000000000 --- a/hsml/python/hsml/client/istio/base.py +++ /dev/null @@ -1,97 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os -from abc import abstractmethod - -from hsml.client import base -from hsml.client.istio.grpc.inference_client import GRPCInferenceServerClient - - -class Client(base.Client): - SERVING_API_KEY = "SERVING_API_KEY" - HOPSWORKS_PUBLIC_HOST = "HOPSWORKS_PUBLIC_HOST" - - BASE_PATH_PARAMS = [] - - @abstractmethod - def __init__(self): - """To be implemented by clients.""" - pass - - def _get_verify(self, verify, trust_store_path): - """Get verification method for sending inference requests to Istio. - - Credit to https://gist.github.com/gdamjan/55a8b9eec6cf7b771f92021d93b87b2c - - :param verify: perform hostname verification, 'true' or 'false' - :type verify: str - :param trust_store_path: path of the truststore locally if it was uploaded manually to - the external environment such as EKS or AKS - :type trust_store_path: str - :return: if verify is true and the truststore is provided, then return the trust store location - if verify is true but the truststore wasn't provided, then return true - if verify is false, then return false - :rtype: str or boolean - """ - if verify == "true": - if trust_store_path is not None: - return trust_store_path - else: - return True - - return False - - def _get_retry(self, request, response): - """Get retry method for resending HTTP requests to Istio - - :param request: original HTTP request already sent - :type request: requests.Request - :param response: response of the original HTTP request - :type response: requests.Response - """ - return False - - def _get_host_port_pair(self): - """ - Removes "http or https" from the rest endpoint and returns a list - [endpoint, port], where endpoint is on the format /path.. without http:// - - :return: a list [endpoint, port] - :rtype: list - """ - endpoint = self._base_url - if endpoint.startswith("http"): - last_index = endpoint.rfind("/") - endpoint = endpoint[last_index + 1 :] - host, port = endpoint.split(":") - return host, port - - def _close(self): - """Closes a client. Can be implemented for clean up purposes, not mandatory.""" - self._connected = False - - def _replace_public_host(self, url): - """replace hostname to public hostname set in HOPSWORKS_PUBLIC_HOST""" - ui_url = url._replace(netloc=os.environ[self.HOPSWORKS_PUBLIC_HOST]) - return ui_url - - def _create_grpc_channel(self, service_hostname: str) -> GRPCInferenceServerClient: - return GRPCInferenceServerClient( - url=self._host + ":" + str(self._port), - channel_args=(("grpc.ssl_target_name_override", service_hostname),), - serving_api_key=self._auth._token, - ) diff --git a/hsml/python/hsml/client/istio/external.py b/hsml/python/hsml/client/istio/external.py deleted file mode 100644 index c4fd89787..000000000 --- a/hsml/python/hsml/client/istio/external.py +++ /dev/null @@ -1,56 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import requests -from hsml.client import auth -from hsml.client.istio import base as istio - - -class Client(istio.Client): - def __init__( - self, - host, - port, - project, - api_key_value, - hostname_verification=None, - trust_store_path=None, - ): - """Initializes a client in an external environment such as AWS Sagemaker.""" - self._host = host - self._port = port - self._base_url = "http://" + self._host + ":" + str(self._port) - self._project_name = project - - self._auth = auth.ApiKeyAuth(api_key_value) - - self._session = requests.session() - self._connected = True - self._verify = self._get_verify(hostname_verification, trust_store_path) - - self._cert_key = None - - def _close(self): - """Closes a client.""" - self._connected = False - - def _replace_public_host(self, url): - """no need to replace as we are already in external client""" - return url - - @property - def host(self): - return self._host diff --git a/hsml/python/hsml/client/istio/grpc/__init__.py b/hsml/python/hsml/client/istio/grpc/__init__.py deleted file mode 100644 index ff8055b9b..000000000 --- a/hsml/python/hsml/client/istio/grpc/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/hsml/python/hsml/client/istio/grpc/errors.py b/hsml/python/hsml/client/istio/grpc/errors.py deleted file mode 100644 index 062630bea..000000000 --- a/hsml/python/hsml/client/istio/grpc/errors.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2022 The KServe Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# This implementation has been borrowed from the kserve/kserve repository -# https://github.com/kserve/kserve/blob/release-0.11/python/kserve/kserve/errors.py - - -class InvalidInput(ValueError): - """ - Exception class indicating invalid input arguments. - HTTP Servers should return HTTP_400 (Bad Request). - """ - - def __init__(self, reason): - self.reason = reason - - def __str__(self): - return self.reason diff --git a/hsml/python/hsml/client/istio/grpc/exceptions.py b/hsml/python/hsml/client/istio/grpc/exceptions.py deleted file mode 100644 index 6477c9488..000000000 --- a/hsml/python/hsml/client/istio/grpc/exceptions.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright 2023 The KServe Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf-8 - -# This implementation has been borrowed from kserve/kserve repository -# https://github.com/kserve/kserve/blob/release-0.11/python/kserve/kserve/exceptions.py - -import six - - -class OpenApiException(Exception): - """The base exception class for all OpenAPIExceptions""" - - -class ApiTypeError(OpenApiException, TypeError): - def __init__(self, msg, path_to_item=None, valid_classes=None, key_type=None): - """Raises an exception for TypeErrors - - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (list): a list of keys an indices to get to the - current_item - None if unset - valid_classes (tuple): the primitive classes that current item - should be an instance of - None if unset - key_type (bool): False if our value is a value in a dict - True if it is a key in a dict - False if our item is an item in a list - None if unset - """ - self.path_to_item = path_to_item - self.valid_classes = valid_classes - self.key_type = key_type - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(ApiTypeError, self).__init__(full_msg) - - -class ApiValueError(OpenApiException, ValueError): - def __init__(self, msg, path_to_item=None): - """ - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (list) the path to the exception in the - received_data dict. None if unset - """ - - self.path_to_item = path_to_item - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(ApiValueError, self).__init__(full_msg) - - -class ApiKeyError(OpenApiException, KeyError): - def __init__(self, msg, path_to_item=None): - """ - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (None/list) the path to the exception in the - received_data dict - """ - self.path_to_item = path_to_item - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(ApiKeyError, self).__init__(full_msg) - - -class ApiException(OpenApiException): - def __init__(self, status=None, reason=None, http_resp=None): - if http_resp: - self.status = http_resp.status - self.reason = http_resp.reason - self.body = http_resp.data - self.headers = http_resp.getheaders() - else: - self.status = status - self.reason = reason - self.body = None - self.headers = None - - def __str__(self): - """Custom error messages for exception""" - error_message = "({0})\n" "Reason: {1}\n".format(self.status, self.reason) - if self.headers: - error_message += "HTTP response headers: {0}\n".format(self.headers) - - if self.body: - error_message += "HTTP response body: {0}\n".format(self.body) - - return error_message - - -def render_path(path_to_item): - """Returns a string representation of a path""" - result = "" - for pth in path_to_item: - if isinstance(pth, six.integer_types): - result += "[{0}]".format(pth) - else: - result += "['{0}']".format(pth) - return result diff --git a/hsml/python/hsml/client/istio/grpc/inference_client.py b/hsml/python/hsml/client/istio/grpc/inference_client.py deleted file mode 100644 index 3cc3164c5..000000000 --- a/hsml/python/hsml/client/istio/grpc/inference_client.py +++ /dev/null @@ -1,74 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import grpc -from hsml.client.istio.grpc.proto.grpc_predict_v2_pb2_grpc import ( - GRPCInferenceServiceStub, -) -from hsml.client.istio.utils.infer_type import InferRequest, InferResponse - - -class GRPCInferenceServerClient: - def __init__( - self, - url, - serving_api_key, - channel_args=None, - ): - if channel_args is not None: - channel_opt = channel_args - else: - channel_opt = [ - ("grpc.max_send_message_length", -1), - ("grpc.max_receive_message_length", -1), - ] - - # Authentication is done via API Key in the Authorization header - self._channel = grpc.insecure_channel(url, options=channel_opt) - self._client_stub = GRPCInferenceServiceStub(self._channel) - self._serving_api_key = serving_api_key - - def __enter__(self): - return self - - def __exit__(self, type, value, traceback): - self.close() - - def __del__(self): - """It is called during object garbage collection.""" - self.close() - - def close(self): - """Close the client. Future calls to server will result in an Error.""" - self._channel.close() - - def infer(self, infer_request: InferRequest, headers=None, client_timeout=None): - headers = {} if headers is None else headers - headers["authorization"] = "ApiKey " + self._serving_api_key - metadata = headers.items() - - # convert the InferRequest to a ModelInferRequest message - request = infer_request.to_grpc() - - try: - # send request - model_infer_response = self._client_stub.ModelInfer( - request=request, metadata=metadata, timeout=client_timeout - ) - except grpc.RpcError as rpc_error: - raise rpc_error - - # convert back the ModelInferResponse message to InferResponse - return InferResponse.from_grpc(model_infer_response) diff --git a/hsml/python/hsml/client/istio/grpc/proto/__init__.py b/hsml/python/hsml/client/istio/grpc/proto/__init__.py deleted file mode 100644 index ff8055b9b..000000000 --- a/hsml/python/hsml/client/istio/grpc/proto/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2.proto b/hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2.proto deleted file mode 100644 index c05221d73..000000000 --- a/hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2.proto +++ /dev/null @@ -1,362 +0,0 @@ -// Copyright 2022 The KServe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; -package inference; - -// Inference Server GRPC endpoints. -service GRPCInferenceService -{ - // The ServerLive API indicates if the inference server is able to receive - // and respond to metadata and inference requests. - rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {} - - // The ServerReady API indicates if the server is ready for inferencing. - rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {} - - // The ModelReady API indicates if a specific model is ready for inferencing. - rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {} - - // The ServerMetadata API provides information about the server. Errors are - // indicated by the google.rpc.Status returned for the request. The OK code - // indicates success and other codes indicate failure. - rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {} - - // The per-model metadata API provides information about a model. Errors are - // indicated by the google.rpc.Status returned for the request. The OK code - // indicates success and other codes indicate failure. - rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {} - - // The ModelInfer API performs inference using the specified model. Errors are - // indicated by the google.rpc.Status returned for the request. The OK code - // indicates success and other codes indicate failure. - rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {} - - // Load or reload a model from a repository. - rpc RepositoryModelLoad(RepositoryModelLoadRequest) returns (RepositoryModelLoadResponse) {} - - // Unload a model. - rpc RepositoryModelUnload(RepositoryModelUnloadRequest) returns (RepositoryModelUnloadResponse) {} -} - -message ServerLiveRequest {} - -message ServerLiveResponse -{ - // True if the inference server is live, false if not live. - bool live = 1; -} - -message ServerReadyRequest {} - -message ServerReadyResponse -{ - // True if the inference server is ready, false if not ready. - bool ready = 1; -} - -message ModelReadyRequest -{ - // The name of the model to check for readiness. - string name = 1; - - // The version of the model to check for readiness. If not given the - // server will choose a version based on the model and internal policy. - string version = 2; -} - -message ModelReadyResponse -{ - // True if the model is ready, false if not ready. - bool ready = 1; -} - -message ServerMetadataRequest {} - -message ServerMetadataResponse -{ - // The server name. - string name = 1; - - // The server version. - string version = 2; - - // The extensions supported by the server. - repeated string extensions = 3; -} - -message ModelMetadataRequest -{ - // The name of the model. - string name = 1; - - // The version of the model to check for readiness. If not given the - // server will choose a version based on the model and internal policy. - string version = 2; -} - -message ModelMetadataResponse -{ - // Metadata for a tensor. - message TensorMetadata - { - // The tensor name. - string name = 1; - - // The tensor data type. - string datatype = 2; - - // The tensor shape. A variable-size dimension is represented - // by a -1 value. - repeated int64 shape = 3; - } - - // The model name. - string name = 1; - - // The versions of the model available on the server. - repeated string versions = 2; - - // The model's platform. See Platforms. - string platform = 3; - - // The model's inputs. - repeated TensorMetadata inputs = 4; - - // The model's outputs. - repeated TensorMetadata outputs = 5; -} - -message ModelInferRequest -{ - // An input tensor for an inference request. - message InferInputTensor - { - // The tensor name. - string name = 1; - - // The tensor data type. - string datatype = 2; - - // The tensor shape. - repeated int64 shape = 3; - - // Optional inference input tensor parameters. - map parameters = 4; - - // The tensor contents using a data-type format. This field must - // not be specified if "raw" tensor contents are being used for - // the inference request. - InferTensorContents contents = 5; - } - - // An output tensor requested for an inference request. - message InferRequestedOutputTensor - { - // The tensor name. - string name = 1; - - // Optional requested output tensor parameters. - map parameters = 2; - } - - // The name of the model to use for inferencing. - string model_name = 1; - - // The version of the model to use for inference. If not given the - // server will choose a version based on the model and internal policy. - string model_version = 2; - - // Optional identifier for the request. If specified will be - // returned in the response. - string id = 3; - - // Optional inference parameters. - map parameters = 4; - - // The input tensors for the inference. - repeated InferInputTensor inputs = 5; - - // The requested output tensors for the inference. Optional, if not - // specified all outputs produced by the model will be returned. - repeated InferRequestedOutputTensor outputs = 6; - - // The data contained in an input tensor can be represented in "raw" - // bytes form or in the repeated type that matches the tensor's data - // type. To use the raw representation 'raw_input_contents' must be - // initialized with data for each tensor in the same order as - // 'inputs'. For each tensor, the size of this content must match - // what is expected by the tensor's shape and data type. The raw - // data must be the flattened, one-dimensional, row-major order of - // the tensor elements without any stride or padding between the - // elements. Note that the FP16 and BF16 data types must be represented as - // raw content as there is no specific data type for a 16-bit float type. - // - // If this field is specified then InferInputTensor::contents must - // not be specified for any input tensor. - repeated bytes raw_input_contents = 7; -} - -message ModelInferResponse -{ - // An output tensor returned for an inference request. - message InferOutputTensor - { - // The tensor name. - string name = 1; - - // The tensor data type. - string datatype = 2; - - // The tensor shape. - repeated int64 shape = 3; - - // Optional output tensor parameters. - map parameters = 4; - - // The tensor contents using a data-type format. This field must - // not be specified if "raw" tensor contents are being used for - // the inference response. - InferTensorContents contents = 5; - } - - // The name of the model used for inference. - string model_name = 1; - - // The version of the model used for inference. - string model_version = 2; - - // The id of the inference request if one was specified. - string id = 3; - - // Optional inference response parameters. - map parameters = 4; - - // The output tensors holding inference results. - repeated InferOutputTensor outputs = 5; - - // The data contained in an output tensor can be represented in - // "raw" bytes form or in the repeated type that matches the - // tensor's data type. To use the raw representation 'raw_output_contents' - // must be initialized with data for each tensor in the same order as - // 'outputs'. For each tensor, the size of this content must match - // what is expected by the tensor's shape and data type. The raw - // data must be the flattened, one-dimensional, row-major order of - // the tensor elements without any stride or padding between the - // elements. Note that the FP16 and BF16 data types must be represented as - // raw content as there is no specific data type for a 16-bit float type. - // - // If this field is specified then InferOutputTensor::contents must - // not be specified for any output tensor. - repeated bytes raw_output_contents = 6; -} - -// An inference parameter value. The Parameters message describes a -// “name”/”value” pair, where the “name” is the name of the parameter -// and the “value” is a boolean, integer, or string corresponding to -// the parameter. -message InferParameter -{ - // The parameter value can be a string, an int64, a boolean - // or a message specific to a predefined parameter. - oneof parameter_choice - { - // A boolean parameter value. - bool bool_param = 1; - - // An int64 parameter value. - int64 int64_param = 2; - - // A string parameter value. - string string_param = 3; - } -} - -// The data contained in a tensor represented by the repeated type -// that matches the tensor's data type. Protobuf oneof is not used -// because oneofs cannot contain repeated fields. -message InferTensorContents -{ - // Representation for BOOL data type. The size must match what is - // expected by the tensor's shape. The contents must be the flattened, - // one-dimensional, row-major order of the tensor elements. - repeated bool bool_contents = 1; - - // Representation for INT8, INT16, and INT32 data types. The size - // must match what is expected by the tensor's shape. The contents - // must be the flattened, one-dimensional, row-major order of the - // tensor elements. - repeated int32 int_contents = 2; - - // Representation for INT64 data types. The size must match what - // is expected by the tensor's shape. The contents must be the - // flattened, one-dimensional, row-major order of the tensor elements. - repeated int64 int64_contents = 3; - - // Representation for UINT8, UINT16, and UINT32 data types. The size - // must match what is expected by the tensor's shape. The contents - // must be the flattened, one-dimensional, row-major order of the - // tensor elements. - repeated uint32 uint_contents = 4; - - // Representation for UINT64 data types. The size must match what - // is expected by the tensor's shape. The contents must be the - // flattened, one-dimensional, row-major order of the tensor elements. - repeated uint64 uint64_contents = 5; - - // Representation for FP32 data type. The size must match what is - // expected by the tensor's shape. The contents must be the flattened, - // one-dimensional, row-major order of the tensor elements. - repeated float fp32_contents = 6; - - // Representation for FP64 data type. The size must match what is - // expected by the tensor's shape. The contents must be the flattened, - // one-dimensional, row-major order of the tensor elements. - repeated double fp64_contents = 7; - - // Representation for BYTES data type. The size must match what is - // expected by the tensor's shape. The contents must be the flattened, - // one-dimensional, row-major order of the tensor elements. - repeated bytes bytes_contents = 8; -} - -message RepositoryModelLoadRequest -{ - // The name of the model to load, or reload. - string model_name = 1; -} - -message RepositoryModelLoadResponse -{ - // The name of the model trying to load or reload. - string model_name = 1; - - // boolean parameter to indicate whether model is loaded or not - bool isLoaded = 2; -} - -message RepositoryModelUnloadRequest -{ - // The name of the model to unload. - string model_name = 1; -} - -message RepositoryModelUnloadResponse -{ - // The name of the model trying to load or reload. - string model_name = 1; - - // boolean parameter to indicate whether model is unloaded or not - bool isUnloaded = 2; -} diff --git a/hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.py b/hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.py deleted file mode 100644 index 07af5f1c5..000000000 --- a/hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.py +++ /dev/null @@ -1,451 +0,0 @@ -# Copyright 2022 The KServe Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: grpc_predict_v2.proto -"""Generated protocol buffer code.""" - -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x15grpc_predict_v2.proto\x12\tinference"\x13\n\x11ServerLiveRequest""\n\x12ServerLiveResponse\x12\x0c\n\x04live\x18\x01 \x01(\x08"\x14\n\x12ServerReadyRequest"$\n\x13ServerReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08"2\n\x11ModelReadyRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t"#\n\x12ModelReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08"\x17\n\x15ServerMetadataRequest"K\n\x16ServerMetadataResponse\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\x12\n\nextensions\x18\x03 \x03(\t"5\n\x14ModelMetadataRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t"\x8d\x02\n\x15ModelMetadataResponse\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08versions\x18\x02 \x03(\t\x12\x10\n\x08platform\x18\x03 \x01(\t\x12?\n\x06inputs\x18\x04 \x03(\x0b\x32/.inference.ModelMetadataResponse.TensorMetadata\x12@\n\x07outputs\x18\x05 \x03(\x0b\x32/.inference.ModelMetadataResponse.TensorMetadata\x1a?\n\x0eTensorMetadata\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tatype\x18\x02 \x01(\t\x12\r\n\x05shape\x18\x03 \x03(\x03"\xee\x06\n\x11ModelInferRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\x12\n\n\x02id\x18\x03 \x01(\t\x12@\n\nparameters\x18\x04 \x03(\x0b\x32,.inference.ModelInferRequest.ParametersEntry\x12=\n\x06inputs\x18\x05 \x03(\x0b\x32-.inference.ModelInferRequest.InferInputTensor\x12H\n\x07outputs\x18\x06 \x03(\x0b\x32\x37.inference.ModelInferRequest.InferRequestedOutputTensor\x12\x1a\n\x12raw_input_contents\x18\x07 \x03(\x0c\x1a\x94\x02\n\x10InferInputTensor\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tatype\x18\x02 \x01(\t\x12\r\n\x05shape\x18\x03 \x03(\x03\x12Q\n\nparameters\x18\x04 \x03(\x0b\x32=.inference.ModelInferRequest.InferInputTensor.ParametersEntry\x12\x30\n\x08\x63ontents\x18\x05 \x01(\x0b\x32\x1e.inference.InferTensorContents\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01\x1a\xd5\x01\n\x1aInferRequestedOutputTensor\x12\x0c\n\x04name\x18\x01 \x01(\t\x12[\n\nparameters\x18\x02 \x03(\x0b\x32G.inference.ModelInferRequest.InferRequestedOutputTensor.ParametersEntry\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01"\xd5\x04\n\x12ModelInferResponse\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\x12\n\n\x02id\x18\x03 \x01(\t\x12\x41\n\nparameters\x18\x04 \x03(\x0b\x32-.inference.ModelInferResponse.ParametersEntry\x12@\n\x07outputs\x18\x05 \x03(\x0b\x32/.inference.ModelInferResponse.InferOutputTensor\x12\x1b\n\x13raw_output_contents\x18\x06 \x03(\x0c\x1a\x97\x02\n\x11InferOutputTensor\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tatype\x18\x02 \x01(\t\x12\r\n\x05shape\x18\x03 \x03(\x03\x12S\n\nparameters\x18\x04 \x03(\x0b\x32?.inference.ModelInferResponse.InferOutputTensor.ParametersEntry\x12\x30\n\x08\x63ontents\x18\x05 \x01(\x0b\x32\x1e.inference.InferTensorContents\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01"i\n\x0eInferParameter\x12\x14\n\nbool_param\x18\x01 \x01(\x08H\x00\x12\x15\n\x0bint64_param\x18\x02 \x01(\x03H\x00\x12\x16\n\x0cstring_param\x18\x03 \x01(\tH\x00\x42\x12\n\x10parameter_choice"\xd0\x01\n\x13InferTensorContents\x12\x15\n\rbool_contents\x18\x01 \x03(\x08\x12\x14\n\x0cint_contents\x18\x02 \x03(\x05\x12\x16\n\x0eint64_contents\x18\x03 \x03(\x03\x12\x15\n\ruint_contents\x18\x04 \x03(\r\x12\x17\n\x0fuint64_contents\x18\x05 \x03(\x04\x12\x15\n\rfp32_contents\x18\x06 \x03(\x02\x12\x15\n\rfp64_contents\x18\x07 \x03(\x01\x12\x16\n\x0e\x62ytes_contents\x18\x08 \x03(\x0c"0\n\x1aRepositoryModelLoadRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t"C\n\x1bRepositoryModelLoadResponse\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x10\n\x08isLoaded\x18\x02 \x01(\x08"2\n\x1cRepositoryModelUnloadRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t"G\n\x1dRepositoryModelUnloadResponse\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x12\n\nisUnloaded\x18\x02 \x01(\x08\x32\xd2\x05\n\x14GRPCInferenceService\x12K\n\nServerLive\x12\x1c.inference.ServerLiveRequest\x1a\x1d.inference.ServerLiveResponse"\x00\x12N\n\x0bServerReady\x12\x1d.inference.ServerReadyRequest\x1a\x1e.inference.ServerReadyResponse"\x00\x12K\n\nModelReady\x12\x1c.inference.ModelReadyRequest\x1a\x1d.inference.ModelReadyResponse"\x00\x12W\n\x0eServerMetadata\x12 .inference.ServerMetadataRequest\x1a!.inference.ServerMetadataResponse"\x00\x12T\n\rModelMetadata\x12\x1f.inference.ModelMetadataRequest\x1a .inference.ModelMetadataResponse"\x00\x12K\n\nModelInfer\x12\x1c.inference.ModelInferRequest\x1a\x1d.inference.ModelInferResponse"\x00\x12\x66\n\x13RepositoryModelLoad\x12%.inference.RepositoryModelLoadRequest\x1a&.inference.RepositoryModelLoadResponse"\x00\x12l\n\x15RepositoryModelUnload\x12\'.inference.RepositoryModelUnloadRequest\x1a(.inference.RepositoryModelUnloadResponse"\x00\x62\x06proto3' -) - - -_SERVERLIVEREQUEST = DESCRIPTOR.message_types_by_name["ServerLiveRequest"] -_SERVERLIVERESPONSE = DESCRIPTOR.message_types_by_name["ServerLiveResponse"] -_SERVERREADYREQUEST = DESCRIPTOR.message_types_by_name["ServerReadyRequest"] -_SERVERREADYRESPONSE = DESCRIPTOR.message_types_by_name["ServerReadyResponse"] -_MODELREADYREQUEST = DESCRIPTOR.message_types_by_name["ModelReadyRequest"] -_MODELREADYRESPONSE = DESCRIPTOR.message_types_by_name["ModelReadyResponse"] -_SERVERMETADATAREQUEST = DESCRIPTOR.message_types_by_name["ServerMetadataRequest"] -_SERVERMETADATARESPONSE = DESCRIPTOR.message_types_by_name["ServerMetadataResponse"] -_MODELMETADATAREQUEST = DESCRIPTOR.message_types_by_name["ModelMetadataRequest"] -_MODELMETADATARESPONSE = DESCRIPTOR.message_types_by_name["ModelMetadataResponse"] -_MODELMETADATARESPONSE_TENSORMETADATA = _MODELMETADATARESPONSE.nested_types_by_name[ - "TensorMetadata" -] -_MODELINFERREQUEST = DESCRIPTOR.message_types_by_name["ModelInferRequest"] -_MODELINFERREQUEST_INFERINPUTTENSOR = _MODELINFERREQUEST.nested_types_by_name[ - "InferInputTensor" -] -_MODELINFERREQUEST_INFERINPUTTENSOR_PARAMETERSENTRY = ( - _MODELINFERREQUEST_INFERINPUTTENSOR.nested_types_by_name["ParametersEntry"] -) -_MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR = _MODELINFERREQUEST.nested_types_by_name[ - "InferRequestedOutputTensor" -] -_MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR_PARAMETERSENTRY = ( - _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR.nested_types_by_name[ - "ParametersEntry" - ] -) -_MODELINFERREQUEST_PARAMETERSENTRY = _MODELINFERREQUEST.nested_types_by_name[ - "ParametersEntry" -] -_MODELINFERRESPONSE = DESCRIPTOR.message_types_by_name["ModelInferResponse"] -_MODELINFERRESPONSE_INFEROUTPUTTENSOR = _MODELINFERRESPONSE.nested_types_by_name[ - "InferOutputTensor" -] -_MODELINFERRESPONSE_INFEROUTPUTTENSOR_PARAMETERSENTRY = ( - _MODELINFERRESPONSE_INFEROUTPUTTENSOR.nested_types_by_name["ParametersEntry"] -) -_MODELINFERRESPONSE_PARAMETERSENTRY = _MODELINFERRESPONSE.nested_types_by_name[ - "ParametersEntry" -] -_INFERPARAMETER = DESCRIPTOR.message_types_by_name["InferParameter"] -_INFERTENSORCONTENTS = DESCRIPTOR.message_types_by_name["InferTensorContents"] -_REPOSITORYMODELLOADREQUEST = DESCRIPTOR.message_types_by_name[ - "RepositoryModelLoadRequest" -] -_REPOSITORYMODELLOADRESPONSE = DESCRIPTOR.message_types_by_name[ - "RepositoryModelLoadResponse" -] -_REPOSITORYMODELUNLOADREQUEST = DESCRIPTOR.message_types_by_name[ - "RepositoryModelUnloadRequest" -] -_REPOSITORYMODELUNLOADRESPONSE = DESCRIPTOR.message_types_by_name[ - "RepositoryModelUnloadResponse" -] -ServerLiveRequest = _reflection.GeneratedProtocolMessageType( - "ServerLiveRequest", - (_message.Message,), - { - "DESCRIPTOR": _SERVERLIVEREQUEST, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ServerLiveRequest) - }, -) -_sym_db.RegisterMessage(ServerLiveRequest) - -ServerLiveResponse = _reflection.GeneratedProtocolMessageType( - "ServerLiveResponse", - (_message.Message,), - { - "DESCRIPTOR": _SERVERLIVERESPONSE, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ServerLiveResponse) - }, -) -_sym_db.RegisterMessage(ServerLiveResponse) - -ServerReadyRequest = _reflection.GeneratedProtocolMessageType( - "ServerReadyRequest", - (_message.Message,), - { - "DESCRIPTOR": _SERVERREADYREQUEST, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ServerReadyRequest) - }, -) -_sym_db.RegisterMessage(ServerReadyRequest) - -ServerReadyResponse = _reflection.GeneratedProtocolMessageType( - "ServerReadyResponse", - (_message.Message,), - { - "DESCRIPTOR": _SERVERREADYRESPONSE, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ServerReadyResponse) - }, -) -_sym_db.RegisterMessage(ServerReadyResponse) - -ModelReadyRequest = _reflection.GeneratedProtocolMessageType( - "ModelReadyRequest", - (_message.Message,), - { - "DESCRIPTOR": _MODELREADYREQUEST, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ModelReadyRequest) - }, -) -_sym_db.RegisterMessage(ModelReadyRequest) - -ModelReadyResponse = _reflection.GeneratedProtocolMessageType( - "ModelReadyResponse", - (_message.Message,), - { - "DESCRIPTOR": _MODELREADYRESPONSE, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ModelReadyResponse) - }, -) -_sym_db.RegisterMessage(ModelReadyResponse) - -ServerMetadataRequest = _reflection.GeneratedProtocolMessageType( - "ServerMetadataRequest", - (_message.Message,), - { - "DESCRIPTOR": _SERVERMETADATAREQUEST, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ServerMetadataRequest) - }, -) -_sym_db.RegisterMessage(ServerMetadataRequest) - -ServerMetadataResponse = _reflection.GeneratedProtocolMessageType( - "ServerMetadataResponse", - (_message.Message,), - { - "DESCRIPTOR": _SERVERMETADATARESPONSE, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ServerMetadataResponse) - }, -) -_sym_db.RegisterMessage(ServerMetadataResponse) - -ModelMetadataRequest = _reflection.GeneratedProtocolMessageType( - "ModelMetadataRequest", - (_message.Message,), - { - "DESCRIPTOR": _MODELMETADATAREQUEST, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ModelMetadataRequest) - }, -) -_sym_db.RegisterMessage(ModelMetadataRequest) - -ModelMetadataResponse = _reflection.GeneratedProtocolMessageType( - "ModelMetadataResponse", - (_message.Message,), - { - "TensorMetadata": _reflection.GeneratedProtocolMessageType( - "TensorMetadata", - (_message.Message,), - { - "DESCRIPTOR": _MODELMETADATARESPONSE_TENSORMETADATA, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ModelMetadataResponse.TensorMetadata) - }, - ), - "DESCRIPTOR": _MODELMETADATARESPONSE, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ModelMetadataResponse) - }, -) -_sym_db.RegisterMessage(ModelMetadataResponse) -_sym_db.RegisterMessage(ModelMetadataResponse.TensorMetadata) - -ModelInferRequest = _reflection.GeneratedProtocolMessageType( - "ModelInferRequest", - (_message.Message,), - { - "InferInputTensor": _reflection.GeneratedProtocolMessageType( - "InferInputTensor", - (_message.Message,), - { - "ParametersEntry": _reflection.GeneratedProtocolMessageType( - "ParametersEntry", - (_message.Message,), - { - "DESCRIPTOR": _MODELINFERREQUEST_INFERINPUTTENSOR_PARAMETERSENTRY, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ModelInferRequest.InferInputTensor.ParametersEntry) - }, - ), - "DESCRIPTOR": _MODELINFERREQUEST_INFERINPUTTENSOR, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ModelInferRequest.InferInputTensor) - }, - ), - "InferRequestedOutputTensor": _reflection.GeneratedProtocolMessageType( - "InferRequestedOutputTensor", - (_message.Message,), - { - "ParametersEntry": _reflection.GeneratedProtocolMessageType( - "ParametersEntry", - (_message.Message,), - { - "DESCRIPTOR": _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR_PARAMETERSENTRY, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ModelInferRequest.InferRequestedOutputTensor.ParametersEntry) - }, - ), - "DESCRIPTOR": _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ModelInferRequest.InferRequestedOutputTensor) - }, - ), - "ParametersEntry": _reflection.GeneratedProtocolMessageType( - "ParametersEntry", - (_message.Message,), - { - "DESCRIPTOR": _MODELINFERREQUEST_PARAMETERSENTRY, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ModelInferRequest.ParametersEntry) - }, - ), - "DESCRIPTOR": _MODELINFERREQUEST, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ModelInferRequest) - }, -) -_sym_db.RegisterMessage(ModelInferRequest) -_sym_db.RegisterMessage(ModelInferRequest.InferInputTensor) -_sym_db.RegisterMessage(ModelInferRequest.InferInputTensor.ParametersEntry) -_sym_db.RegisterMessage(ModelInferRequest.InferRequestedOutputTensor) -_sym_db.RegisterMessage(ModelInferRequest.InferRequestedOutputTensor.ParametersEntry) -_sym_db.RegisterMessage(ModelInferRequest.ParametersEntry) - -ModelInferResponse = _reflection.GeneratedProtocolMessageType( - "ModelInferResponse", - (_message.Message,), - { - "InferOutputTensor": _reflection.GeneratedProtocolMessageType( - "InferOutputTensor", - (_message.Message,), - { - "ParametersEntry": _reflection.GeneratedProtocolMessageType( - "ParametersEntry", - (_message.Message,), - { - "DESCRIPTOR": _MODELINFERRESPONSE_INFEROUTPUTTENSOR_PARAMETERSENTRY, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ModelInferResponse.InferOutputTensor.ParametersEntry) - }, - ), - "DESCRIPTOR": _MODELINFERRESPONSE_INFEROUTPUTTENSOR, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ModelInferResponse.InferOutputTensor) - }, - ), - "ParametersEntry": _reflection.GeneratedProtocolMessageType( - "ParametersEntry", - (_message.Message,), - { - "DESCRIPTOR": _MODELINFERRESPONSE_PARAMETERSENTRY, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ModelInferResponse.ParametersEntry) - }, - ), - "DESCRIPTOR": _MODELINFERRESPONSE, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.ModelInferResponse) - }, -) -_sym_db.RegisterMessage(ModelInferResponse) -_sym_db.RegisterMessage(ModelInferResponse.InferOutputTensor) -_sym_db.RegisterMessage(ModelInferResponse.InferOutputTensor.ParametersEntry) -_sym_db.RegisterMessage(ModelInferResponse.ParametersEntry) - -InferParameter = _reflection.GeneratedProtocolMessageType( - "InferParameter", - (_message.Message,), - { - "DESCRIPTOR": _INFERPARAMETER, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.InferParameter) - }, -) -_sym_db.RegisterMessage(InferParameter) - -InferTensorContents = _reflection.GeneratedProtocolMessageType( - "InferTensorContents", - (_message.Message,), - { - "DESCRIPTOR": _INFERTENSORCONTENTS, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.InferTensorContents) - }, -) -_sym_db.RegisterMessage(InferTensorContents) - -RepositoryModelLoadRequest = _reflection.GeneratedProtocolMessageType( - "RepositoryModelLoadRequest", - (_message.Message,), - { - "DESCRIPTOR": _REPOSITORYMODELLOADREQUEST, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.RepositoryModelLoadRequest) - }, -) -_sym_db.RegisterMessage(RepositoryModelLoadRequest) - -RepositoryModelLoadResponse = _reflection.GeneratedProtocolMessageType( - "RepositoryModelLoadResponse", - (_message.Message,), - { - "DESCRIPTOR": _REPOSITORYMODELLOADRESPONSE, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.RepositoryModelLoadResponse) - }, -) -_sym_db.RegisterMessage(RepositoryModelLoadResponse) - -RepositoryModelUnloadRequest = _reflection.GeneratedProtocolMessageType( - "RepositoryModelUnloadRequest", - (_message.Message,), - { - "DESCRIPTOR": _REPOSITORYMODELUNLOADREQUEST, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.RepositoryModelUnloadRequest) - }, -) -_sym_db.RegisterMessage(RepositoryModelUnloadRequest) - -RepositoryModelUnloadResponse = _reflection.GeneratedProtocolMessageType( - "RepositoryModelUnloadResponse", - (_message.Message,), - { - "DESCRIPTOR": _REPOSITORYMODELUNLOADRESPONSE, - "__module__": "grpc_predict_v2_pb2", - # @@protoc_insertion_point(class_scope:inference.RepositoryModelUnloadResponse) - }, -) -_sym_db.RegisterMessage(RepositoryModelUnloadResponse) - -_GRPCINFERENCESERVICE = DESCRIPTOR.services_by_name["GRPCInferenceService"] -if _descriptor._USE_C_DESCRIPTORS == False: # noqa: E712 - DESCRIPTOR._options = None - _MODELINFERREQUEST_INFERINPUTTENSOR_PARAMETERSENTRY._options = None - _MODELINFERREQUEST_INFERINPUTTENSOR_PARAMETERSENTRY._serialized_options = b"8\001" - _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR_PARAMETERSENTRY._options = None - _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR_PARAMETERSENTRY._serialized_options = b"8\001" - _MODELINFERREQUEST_PARAMETERSENTRY._options = None - _MODELINFERREQUEST_PARAMETERSENTRY._serialized_options = b"8\001" - _MODELINFERRESPONSE_INFEROUTPUTTENSOR_PARAMETERSENTRY._options = None - _MODELINFERRESPONSE_INFEROUTPUTTENSOR_PARAMETERSENTRY._serialized_options = b"8\001" - _MODELINFERRESPONSE_PARAMETERSENTRY._options = None - _MODELINFERRESPONSE_PARAMETERSENTRY._serialized_options = b"8\001" - _SERVERLIVEREQUEST._serialized_start = 36 - _SERVERLIVEREQUEST._serialized_end = 55 - _SERVERLIVERESPONSE._serialized_start = 57 - _SERVERLIVERESPONSE._serialized_end = 91 - _SERVERREADYREQUEST._serialized_start = 93 - _SERVERREADYREQUEST._serialized_end = 113 - _SERVERREADYRESPONSE._serialized_start = 115 - _SERVERREADYRESPONSE._serialized_end = 151 - _MODELREADYREQUEST._serialized_start = 153 - _MODELREADYREQUEST._serialized_end = 203 - _MODELREADYRESPONSE._serialized_start = 205 - _MODELREADYRESPONSE._serialized_end = 240 - _SERVERMETADATAREQUEST._serialized_start = 242 - _SERVERMETADATAREQUEST._serialized_end = 265 - _SERVERMETADATARESPONSE._serialized_start = 267 - _SERVERMETADATARESPONSE._serialized_end = 342 - _MODELMETADATAREQUEST._serialized_start = 344 - _MODELMETADATAREQUEST._serialized_end = 397 - _MODELMETADATARESPONSE._serialized_start = 400 - _MODELMETADATARESPONSE._serialized_end = 669 - _MODELMETADATARESPONSE_TENSORMETADATA._serialized_start = 606 - _MODELMETADATARESPONSE_TENSORMETADATA._serialized_end = 669 - _MODELINFERREQUEST._serialized_start = 672 - _MODELINFERREQUEST._serialized_end = 1550 - _MODELINFERREQUEST_INFERINPUTTENSOR._serialized_start = 980 - _MODELINFERREQUEST_INFERINPUTTENSOR._serialized_end = 1256 - _MODELINFERREQUEST_INFERINPUTTENSOR_PARAMETERSENTRY._serialized_start = 1180 - _MODELINFERREQUEST_INFERINPUTTENSOR_PARAMETERSENTRY._serialized_end = 1256 - _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR._serialized_start = 1259 - _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR._serialized_end = 1472 - _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR_PARAMETERSENTRY._serialized_start = ( - 1180 - ) - _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR_PARAMETERSENTRY._serialized_end = 1256 - _MODELINFERREQUEST_PARAMETERSENTRY._serialized_start = 1180 - _MODELINFERREQUEST_PARAMETERSENTRY._serialized_end = 1256 - _MODELINFERRESPONSE._serialized_start = 1553 - _MODELINFERRESPONSE._serialized_end = 2150 - _MODELINFERRESPONSE_INFEROUTPUTTENSOR._serialized_start = 1793 - _MODELINFERRESPONSE_INFEROUTPUTTENSOR._serialized_end = 2072 - _MODELINFERRESPONSE_INFEROUTPUTTENSOR_PARAMETERSENTRY._serialized_start = 1180 - _MODELINFERRESPONSE_INFEROUTPUTTENSOR_PARAMETERSENTRY._serialized_end = 1256 - _MODELINFERRESPONSE_PARAMETERSENTRY._serialized_start = 1180 - _MODELINFERRESPONSE_PARAMETERSENTRY._serialized_end = 1256 - _INFERPARAMETER._serialized_start = 2152 - _INFERPARAMETER._serialized_end = 2257 - _INFERTENSORCONTENTS._serialized_start = 2260 - _INFERTENSORCONTENTS._serialized_end = 2468 - _REPOSITORYMODELLOADREQUEST._serialized_start = 2470 - _REPOSITORYMODELLOADREQUEST._serialized_end = 2518 - _REPOSITORYMODELLOADRESPONSE._serialized_start = 2520 - _REPOSITORYMODELLOADRESPONSE._serialized_end = 2587 - _REPOSITORYMODELUNLOADREQUEST._serialized_start = 2589 - _REPOSITORYMODELUNLOADREQUEST._serialized_end = 2639 - _REPOSITORYMODELUNLOADRESPONSE._serialized_start = 2641 - _REPOSITORYMODELUNLOADRESPONSE._serialized_end = 2712 - _GRPCINFERENCESERVICE._serialized_start = 2715 - _GRPCINFERENCESERVICE._serialized_end = 3437 -# @@protoc_insertion_point(module_scope) diff --git a/hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.pyi b/hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.pyi deleted file mode 100644 index dcaac5eb4..000000000 --- a/hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.pyi +++ /dev/null @@ -1,414 +0,0 @@ -from typing import ( - ClassVar as _ClassVar, -) -from typing import ( - Iterable as _Iterable, -) -from typing import ( - Mapping as _Mapping, -) -from typing import ( - Optional as _Optional, -) -from typing import ( - Union as _Union, -) - -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf.internal import containers as _containers - -DESCRIPTOR: _descriptor.FileDescriptor - -class InferParameter(_message.Message): - __slots__ = ["bool_param", "int64_param", "string_param"] - BOOL_PARAM_FIELD_NUMBER: _ClassVar[int] - INT64_PARAM_FIELD_NUMBER: _ClassVar[int] - STRING_PARAM_FIELD_NUMBER: _ClassVar[int] - bool_param: bool - int64_param: int - string_param: str - def __init__( - self, - bool_param: bool = ..., - int64_param: _Optional[int] = ..., - string_param: _Optional[str] = ..., - ) -> None: ... - -class InferTensorContents(_message.Message): - __slots__ = [ - "bool_contents", - "bytes_contents", - "fp32_contents", - "fp64_contents", - "int64_contents", - "int_contents", - "uint64_contents", - "uint_contents", - ] - BOOL_CONTENTS_FIELD_NUMBER: _ClassVar[int] - BYTES_CONTENTS_FIELD_NUMBER: _ClassVar[int] - FP32_CONTENTS_FIELD_NUMBER: _ClassVar[int] - FP64_CONTENTS_FIELD_NUMBER: _ClassVar[int] - INT64_CONTENTS_FIELD_NUMBER: _ClassVar[int] - INT_CONTENTS_FIELD_NUMBER: _ClassVar[int] - UINT64_CONTENTS_FIELD_NUMBER: _ClassVar[int] - UINT_CONTENTS_FIELD_NUMBER: _ClassVar[int] - bool_contents: _containers.RepeatedScalarFieldContainer[bool] - bytes_contents: _containers.RepeatedScalarFieldContainer[bytes] - fp32_contents: _containers.RepeatedScalarFieldContainer[float] - fp64_contents: _containers.RepeatedScalarFieldContainer[float] - int64_contents: _containers.RepeatedScalarFieldContainer[int] - int_contents: _containers.RepeatedScalarFieldContainer[int] - uint64_contents: _containers.RepeatedScalarFieldContainer[int] - uint_contents: _containers.RepeatedScalarFieldContainer[int] - def __init__( - self, - bool_contents: _Optional[_Iterable[bool]] = ..., - int_contents: _Optional[_Iterable[int]] = ..., - int64_contents: _Optional[_Iterable[int]] = ..., - uint_contents: _Optional[_Iterable[int]] = ..., - uint64_contents: _Optional[_Iterable[int]] = ..., - fp32_contents: _Optional[_Iterable[float]] = ..., - fp64_contents: _Optional[_Iterable[float]] = ..., - bytes_contents: _Optional[_Iterable[bytes]] = ..., - ) -> None: ... - -class ModelInferRequest(_message.Message): - __slots__ = [ - "id", - "inputs", - "model_name", - "model_version", - "outputs", - "parameters", - "raw_input_contents", - ] - - class InferInputTensor(_message.Message): - __slots__ = ["contents", "datatype", "name", "parameters", "shape"] - - class ParametersEntry(_message.Message): - __slots__ = ["key", "value"] - KEY_FIELD_NUMBER: _ClassVar[int] - VALUE_FIELD_NUMBER: _ClassVar[int] - key: str - value: InferParameter - def __init__( - self, - key: _Optional[str] = ..., - value: _Optional[_Union[InferParameter, _Mapping]] = ..., - ) -> None: ... - - CONTENTS_FIELD_NUMBER: _ClassVar[int] - DATATYPE_FIELD_NUMBER: _ClassVar[int] - NAME_FIELD_NUMBER: _ClassVar[int] - PARAMETERS_FIELD_NUMBER: _ClassVar[int] - SHAPE_FIELD_NUMBER: _ClassVar[int] - contents: InferTensorContents - datatype: str - name: str - parameters: _containers.MessageMap[str, InferParameter] - shape: _containers.RepeatedScalarFieldContainer[int] - def __init__( - self, - name: _Optional[str] = ..., - datatype: _Optional[str] = ..., - shape: _Optional[_Iterable[int]] = ..., - parameters: _Optional[_Mapping[str, InferParameter]] = ..., - contents: _Optional[_Union[InferTensorContents, _Mapping]] = ..., - ) -> None: ... - - class InferRequestedOutputTensor(_message.Message): - __slots__ = ["name", "parameters"] - - class ParametersEntry(_message.Message): - __slots__ = ["key", "value"] - KEY_FIELD_NUMBER: _ClassVar[int] - VALUE_FIELD_NUMBER: _ClassVar[int] - key: str - value: InferParameter - def __init__( - self, - key: _Optional[str] = ..., - value: _Optional[_Union[InferParameter, _Mapping]] = ..., - ) -> None: ... - - NAME_FIELD_NUMBER: _ClassVar[int] - PARAMETERS_FIELD_NUMBER: _ClassVar[int] - name: str - parameters: _containers.MessageMap[str, InferParameter] - def __init__( - self, - name: _Optional[str] = ..., - parameters: _Optional[_Mapping[str, InferParameter]] = ..., - ) -> None: ... - - class ParametersEntry(_message.Message): - __slots__ = ["key", "value"] - KEY_FIELD_NUMBER: _ClassVar[int] - VALUE_FIELD_NUMBER: _ClassVar[int] - key: str - value: InferParameter - def __init__( - self, - key: _Optional[str] = ..., - value: _Optional[_Union[InferParameter, _Mapping]] = ..., - ) -> None: ... - - ID_FIELD_NUMBER: _ClassVar[int] - INPUTS_FIELD_NUMBER: _ClassVar[int] - MODEL_NAME_FIELD_NUMBER: _ClassVar[int] - MODEL_VERSION_FIELD_NUMBER: _ClassVar[int] - OUTPUTS_FIELD_NUMBER: _ClassVar[int] - PARAMETERS_FIELD_NUMBER: _ClassVar[int] - RAW_INPUT_CONTENTS_FIELD_NUMBER: _ClassVar[int] - id: str - inputs: _containers.RepeatedCompositeFieldContainer[ - ModelInferRequest.InferInputTensor - ] - model_name: str - model_version: str - outputs: _containers.RepeatedCompositeFieldContainer[ - ModelInferRequest.InferRequestedOutputTensor - ] - parameters: _containers.MessageMap[str, InferParameter] - raw_input_contents: _containers.RepeatedScalarFieldContainer[bytes] - def __init__( - self, - model_name: _Optional[str] = ..., - model_version: _Optional[str] = ..., - id: _Optional[str] = ..., - parameters: _Optional[_Mapping[str, InferParameter]] = ..., - inputs: _Optional[ - _Iterable[_Union[ModelInferRequest.InferInputTensor, _Mapping]] - ] = ..., - outputs: _Optional[ - _Iterable[_Union[ModelInferRequest.InferRequestedOutputTensor, _Mapping]] - ] = ..., - raw_input_contents: _Optional[_Iterable[bytes]] = ..., - ) -> None: ... - -class ModelInferResponse(_message.Message): - __slots__ = [ - "id", - "model_name", - "model_version", - "outputs", - "parameters", - "raw_output_contents", - ] - - class InferOutputTensor(_message.Message): - __slots__ = ["contents", "datatype", "name", "parameters", "shape"] - - class ParametersEntry(_message.Message): - __slots__ = ["key", "value"] - KEY_FIELD_NUMBER: _ClassVar[int] - VALUE_FIELD_NUMBER: _ClassVar[int] - key: str - value: InferParameter - def __init__( - self, - key: _Optional[str] = ..., - value: _Optional[_Union[InferParameter, _Mapping]] = ..., - ) -> None: ... - - CONTENTS_FIELD_NUMBER: _ClassVar[int] - DATATYPE_FIELD_NUMBER: _ClassVar[int] - NAME_FIELD_NUMBER: _ClassVar[int] - PARAMETERS_FIELD_NUMBER: _ClassVar[int] - SHAPE_FIELD_NUMBER: _ClassVar[int] - contents: InferTensorContents - datatype: str - name: str - parameters: _containers.MessageMap[str, InferParameter] - shape: _containers.RepeatedScalarFieldContainer[int] - def __init__( - self, - name: _Optional[str] = ..., - datatype: _Optional[str] = ..., - shape: _Optional[_Iterable[int]] = ..., - parameters: _Optional[_Mapping[str, InferParameter]] = ..., - contents: _Optional[_Union[InferTensorContents, _Mapping]] = ..., - ) -> None: ... - - class ParametersEntry(_message.Message): - __slots__ = ["key", "value"] - KEY_FIELD_NUMBER: _ClassVar[int] - VALUE_FIELD_NUMBER: _ClassVar[int] - key: str - value: InferParameter - def __init__( - self, - key: _Optional[str] = ..., - value: _Optional[_Union[InferParameter, _Mapping]] = ..., - ) -> None: ... - - ID_FIELD_NUMBER: _ClassVar[int] - MODEL_NAME_FIELD_NUMBER: _ClassVar[int] - MODEL_VERSION_FIELD_NUMBER: _ClassVar[int] - OUTPUTS_FIELD_NUMBER: _ClassVar[int] - PARAMETERS_FIELD_NUMBER: _ClassVar[int] - RAW_OUTPUT_CONTENTS_FIELD_NUMBER: _ClassVar[int] - id: str - model_name: str - model_version: str - outputs: _containers.RepeatedCompositeFieldContainer[ - ModelInferResponse.InferOutputTensor - ] - parameters: _containers.MessageMap[str, InferParameter] - raw_output_contents: _containers.RepeatedScalarFieldContainer[bytes] - def __init__( - self, - model_name: _Optional[str] = ..., - model_version: _Optional[str] = ..., - id: _Optional[str] = ..., - parameters: _Optional[_Mapping[str, InferParameter]] = ..., - outputs: _Optional[ - _Iterable[_Union[ModelInferResponse.InferOutputTensor, _Mapping]] - ] = ..., - raw_output_contents: _Optional[_Iterable[bytes]] = ..., - ) -> None: ... - -class ModelMetadataRequest(_message.Message): - __slots__ = ["name", "version"] - NAME_FIELD_NUMBER: _ClassVar[int] - VERSION_FIELD_NUMBER: _ClassVar[int] - name: str - version: str - def __init__( - self, name: _Optional[str] = ..., version: _Optional[str] = ... - ) -> None: ... - -class ModelMetadataResponse(_message.Message): - __slots__ = ["inputs", "name", "outputs", "platform", "versions"] - - class TensorMetadata(_message.Message): - __slots__ = ["datatype", "name", "shape"] - DATATYPE_FIELD_NUMBER: _ClassVar[int] - NAME_FIELD_NUMBER: _ClassVar[int] - SHAPE_FIELD_NUMBER: _ClassVar[int] - datatype: str - name: str - shape: _containers.RepeatedScalarFieldContainer[int] - def __init__( - self, - name: _Optional[str] = ..., - datatype: _Optional[str] = ..., - shape: _Optional[_Iterable[int]] = ..., - ) -> None: ... - - INPUTS_FIELD_NUMBER: _ClassVar[int] - NAME_FIELD_NUMBER: _ClassVar[int] - OUTPUTS_FIELD_NUMBER: _ClassVar[int] - PLATFORM_FIELD_NUMBER: _ClassVar[int] - VERSIONS_FIELD_NUMBER: _ClassVar[int] - inputs: _containers.RepeatedCompositeFieldContainer[ - ModelMetadataResponse.TensorMetadata - ] - name: str - outputs: _containers.RepeatedCompositeFieldContainer[ - ModelMetadataResponse.TensorMetadata - ] - platform: str - versions: _containers.RepeatedScalarFieldContainer[str] - def __init__( - self, - name: _Optional[str] = ..., - versions: _Optional[_Iterable[str]] = ..., - platform: _Optional[str] = ..., - inputs: _Optional[ - _Iterable[_Union[ModelMetadataResponse.TensorMetadata, _Mapping]] - ] = ..., - outputs: _Optional[ - _Iterable[_Union[ModelMetadataResponse.TensorMetadata, _Mapping]] - ] = ..., - ) -> None: ... - -class ModelReadyRequest(_message.Message): - __slots__ = ["name", "version"] - NAME_FIELD_NUMBER: _ClassVar[int] - VERSION_FIELD_NUMBER: _ClassVar[int] - name: str - version: str - def __init__( - self, name: _Optional[str] = ..., version: _Optional[str] = ... - ) -> None: ... - -class ModelReadyResponse(_message.Message): - __slots__ = ["ready"] - READY_FIELD_NUMBER: _ClassVar[int] - ready: bool - def __init__(self, ready: bool = ...) -> None: ... - -class RepositoryModelLoadRequest(_message.Message): - __slots__ = ["model_name"] - MODEL_NAME_FIELD_NUMBER: _ClassVar[int] - model_name: str - def __init__(self, model_name: _Optional[str] = ...) -> None: ... - -class RepositoryModelLoadResponse(_message.Message): - __slots__ = ["isLoaded", "model_name"] - ISLOADED_FIELD_NUMBER: _ClassVar[int] - MODEL_NAME_FIELD_NUMBER: _ClassVar[int] - isLoaded: bool - model_name: str - def __init__( - self, model_name: _Optional[str] = ..., isLoaded: bool = ... - ) -> None: ... - -class RepositoryModelUnloadRequest(_message.Message): - __slots__ = ["model_name"] - MODEL_NAME_FIELD_NUMBER: _ClassVar[int] - model_name: str - def __init__(self, model_name: _Optional[str] = ...) -> None: ... - -class RepositoryModelUnloadResponse(_message.Message): - __slots__ = ["isUnloaded", "model_name"] - ISUNLOADED_FIELD_NUMBER: _ClassVar[int] - MODEL_NAME_FIELD_NUMBER: _ClassVar[int] - isUnloaded: bool - model_name: str - def __init__( - self, model_name: _Optional[str] = ..., isUnloaded: bool = ... - ) -> None: ... - -class ServerLiveRequest(_message.Message): - __slots__ = [] - def __init__(self) -> None: ... - -class ServerLiveResponse(_message.Message): - __slots__ = ["live"] - LIVE_FIELD_NUMBER: _ClassVar[int] - live: bool - def __init__(self, live: bool = ...) -> None: ... - -class ServerMetadataRequest(_message.Message): - __slots__ = [] - def __init__(self) -> None: ... - -class ServerMetadataResponse(_message.Message): - __slots__ = ["extensions", "name", "version"] - EXTENSIONS_FIELD_NUMBER: _ClassVar[int] - NAME_FIELD_NUMBER: _ClassVar[int] - VERSION_FIELD_NUMBER: _ClassVar[int] - extensions: _containers.RepeatedScalarFieldContainer[str] - name: str - version: str - def __init__( - self, - name: _Optional[str] = ..., - version: _Optional[str] = ..., - extensions: _Optional[_Iterable[str]] = ..., - ) -> None: ... - -class ServerReadyRequest(_message.Message): - __slots__ = [] - def __init__(self) -> None: ... - -class ServerReadyResponse(_message.Message): - __slots__ = ["ready"] - READY_FIELD_NUMBER: _ClassVar[int] - ready: bool - def __init__(self, ready: bool = ...) -> None: ... diff --git a/hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2_grpc.py b/hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2_grpc.py deleted file mode 100644 index a5f986c20..000000000 --- a/hsml/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2_grpc.py +++ /dev/null @@ -1,419 +0,0 @@ -# Copyright 2022 The KServe Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" - -import hsml.client.istio.grpc.inference_client as inference_client -import hsml.client.istio.grpc.proto.grpc_predict_v2_pb2 as grpc__predict__v2__pb2 - - -class GRPCInferenceServiceStub(object): - """Inference Server GRPC endpoints.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.ServerLive = channel.unary_unary( - "/inference.GRPCInferenceService/ServerLive", - request_serializer=grpc__predict__v2__pb2.ServerLiveRequest.SerializeToString, - response_deserializer=grpc__predict__v2__pb2.ServerLiveResponse.FromString, - ) - self.ServerReady = channel.unary_unary( - "/inference.GRPCInferenceService/ServerReady", - request_serializer=grpc__predict__v2__pb2.ServerReadyRequest.SerializeToString, - response_deserializer=grpc__predict__v2__pb2.ServerReadyResponse.FromString, - ) - self.ModelReady = channel.unary_unary( - "/inference.GRPCInferenceService/ModelReady", - request_serializer=grpc__predict__v2__pb2.ModelReadyRequest.SerializeToString, - response_deserializer=grpc__predict__v2__pb2.ModelReadyResponse.FromString, - ) - self.ServerMetadata = channel.unary_unary( - "/inference.GRPCInferenceService/ServerMetadata", - request_serializer=grpc__predict__v2__pb2.ServerMetadataRequest.SerializeToString, - response_deserializer=grpc__predict__v2__pb2.ServerMetadataResponse.FromString, - ) - self.ModelMetadata = channel.unary_unary( - "/inference.GRPCInferenceService/ModelMetadata", - request_serializer=grpc__predict__v2__pb2.ModelMetadataRequest.SerializeToString, - response_deserializer=grpc__predict__v2__pb2.ModelMetadataResponse.FromString, - ) - self.ModelInfer = channel.unary_unary( - "/inference.GRPCInferenceService/ModelInfer", - request_serializer=grpc__predict__v2__pb2.ModelInferRequest.SerializeToString, - response_deserializer=grpc__predict__v2__pb2.ModelInferResponse.FromString, - ) - self.RepositoryModelLoad = channel.unary_unary( - "/inference.GRPCInferenceService/RepositoryModelLoad", - request_serializer=grpc__predict__v2__pb2.RepositoryModelLoadRequest.SerializeToString, - response_deserializer=grpc__predict__v2__pb2.RepositoryModelLoadResponse.FromString, - ) - self.RepositoryModelUnload = channel.unary_unary( - "/inference.GRPCInferenceService/RepositoryModelUnload", - request_serializer=grpc__predict__v2__pb2.RepositoryModelUnloadRequest.SerializeToString, - response_deserializer=grpc__predict__v2__pb2.RepositoryModelUnloadResponse.FromString, - ) - - -class GRPCInferenceServiceServicer(object): - """Inference Server GRPC endpoints.""" - - def ServerLive(self, request, context): - """The ServerLive API indicates if the inference server is able to receive - and respond to metadata and inference requests. - """ - context.set_code(inference_client.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def ServerReady(self, request, context): - """The ServerReady API indicates if the server is ready for inferencing.""" - context.set_code(inference_client.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def ModelReady(self, request, context): - """The ModelReady API indicates if a specific model is ready for inferencing.""" - context.set_code(inference_client.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def ServerMetadata(self, request, context): - """The ServerMetadata API provides information about the server. Errors are - indicated by the google.rpc.Status returned for the request. The OK code - indicates success and other codes indicate failure. - """ - context.set_code(inference_client.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def ModelMetadata(self, request, context): - """The per-model metadata API provides information about a model. Errors are - indicated by the google.rpc.Status returned for the request. The OK code - indicates success and other codes indicate failure. - """ - context.set_code(inference_client.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def ModelInfer(self, request, context): - """The ModelInfer API performs inference using the specified model. Errors are - indicated by the google.rpc.Status returned for the request. The OK code - indicates success and other codes indicate failure. - """ - context.set_code(inference_client.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def RepositoryModelLoad(self, request, context): - """Load or reload a model from a repository.""" - context.set_code(inference_client.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def RepositoryModelUnload(self, request, context): - """Unload a model.""" - context.set_code(inference_client.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - -def add_GRPCInferenceServiceServicer_to_server(servicer, server): - rpc_method_handlers = { - "ServerLive": inference_client.unary_unary_rpc_method_handler( - servicer.ServerLive, - request_deserializer=grpc__predict__v2__pb2.ServerLiveRequest.FromString, - response_serializer=grpc__predict__v2__pb2.ServerLiveResponse.SerializeToString, - ), - "ServerReady": inference_client.unary_unary_rpc_method_handler( - servicer.ServerReady, - request_deserializer=grpc__predict__v2__pb2.ServerReadyRequest.FromString, - response_serializer=grpc__predict__v2__pb2.ServerReadyResponse.SerializeToString, - ), - "ModelReady": inference_client.unary_unary_rpc_method_handler( - servicer.ModelReady, - request_deserializer=grpc__predict__v2__pb2.ModelReadyRequest.FromString, - response_serializer=grpc__predict__v2__pb2.ModelReadyResponse.SerializeToString, - ), - "ServerMetadata": inference_client.unary_unary_rpc_method_handler( - servicer.ServerMetadata, - request_deserializer=grpc__predict__v2__pb2.ServerMetadataRequest.FromString, - response_serializer=grpc__predict__v2__pb2.ServerMetadataResponse.SerializeToString, - ), - "ModelMetadata": inference_client.unary_unary_rpc_method_handler( - servicer.ModelMetadata, - request_deserializer=grpc__predict__v2__pb2.ModelMetadataRequest.FromString, - response_serializer=grpc__predict__v2__pb2.ModelMetadataResponse.SerializeToString, - ), - "ModelInfer": inference_client.unary_unary_rpc_method_handler( - servicer.ModelInfer, - request_deserializer=grpc__predict__v2__pb2.ModelInferRequest.FromString, - response_serializer=grpc__predict__v2__pb2.ModelInferResponse.SerializeToString, - ), - "RepositoryModelLoad": inference_client.unary_unary_rpc_method_handler( - servicer.RepositoryModelLoad, - request_deserializer=grpc__predict__v2__pb2.RepositoryModelLoadRequest.FromString, - response_serializer=grpc__predict__v2__pb2.RepositoryModelLoadResponse.SerializeToString, - ), - "RepositoryModelUnload": inference_client.unary_unary_rpc_method_handler( - servicer.RepositoryModelUnload, - request_deserializer=grpc__predict__v2__pb2.RepositoryModelUnloadRequest.FromString, - response_serializer=grpc__predict__v2__pb2.RepositoryModelUnloadResponse.SerializeToString, - ), - } - generic_handler = inference_client.method_handlers_generic_handler( - "inference.GRPCInferenceService", rpc_method_handlers - ) - server.add_generic_rpc_handlers((generic_handler,)) - - -# This class is part of an EXPERIMENTAL API. -class GRPCInferenceService(object): - """Inference Server GRPC endpoints.""" - - @staticmethod - def ServerLive( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return inference_client.experimental.unary_unary( - request, - target, - "/inference.GRPCInferenceService/ServerLive", - grpc__predict__v2__pb2.ServerLiveRequest.SerializeToString, - grpc__predict__v2__pb2.ServerLiveResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def ServerReady( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return inference_client.experimental.unary_unary( - request, - target, - "/inference.GRPCInferenceService/ServerReady", - grpc__predict__v2__pb2.ServerReadyRequest.SerializeToString, - grpc__predict__v2__pb2.ServerReadyResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def ModelReady( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return inference_client.experimental.unary_unary( - request, - target, - "/inference.GRPCInferenceService/ModelReady", - grpc__predict__v2__pb2.ModelReadyRequest.SerializeToString, - grpc__predict__v2__pb2.ModelReadyResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def ServerMetadata( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return inference_client.experimental.unary_unary( - request, - target, - "/inference.GRPCInferenceService/ServerMetadata", - grpc__predict__v2__pb2.ServerMetadataRequest.SerializeToString, - grpc__predict__v2__pb2.ServerMetadataResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def ModelMetadata( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return inference_client.experimental.unary_unary( - request, - target, - "/inference.GRPCInferenceService/ModelMetadata", - grpc__predict__v2__pb2.ModelMetadataRequest.SerializeToString, - grpc__predict__v2__pb2.ModelMetadataResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def ModelInfer( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return inference_client.experimental.unary_unary( - request, - target, - "/inference.GRPCInferenceService/ModelInfer", - grpc__predict__v2__pb2.ModelInferRequest.SerializeToString, - grpc__predict__v2__pb2.ModelInferResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def RepositoryModelLoad( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return inference_client.experimental.unary_unary( - request, - target, - "/inference.GRPCInferenceService/RepositoryModelLoad", - grpc__predict__v2__pb2.RepositoryModelLoadRequest.SerializeToString, - grpc__predict__v2__pb2.RepositoryModelLoadResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def RepositoryModelUnload( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return inference_client.experimental.unary_unary( - request, - target, - "/inference.GRPCInferenceService/RepositoryModelUnload", - grpc__predict__v2__pb2.RepositoryModelUnloadRequest.SerializeToString, - grpc__predict__v2__pb2.RepositoryModelUnloadResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) diff --git a/hsml/python/hsml/client/istio/internal.py b/hsml/python/hsml/client/istio/internal.py deleted file mode 100644 index b1befd39d..000000000 --- a/hsml/python/hsml/client/istio/internal.py +++ /dev/null @@ -1,206 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import base64 -import os -import textwrap -from pathlib import Path - -import requests -from hsml.client import auth, exceptions -from hsml.client.istio import base as istio - - -try: - import jks -except ImportError: - pass - - -class Client(istio.Client): - REQUESTS_VERIFY = "REQUESTS_VERIFY" - PROJECT_ID = "HOPSWORKS_PROJECT_ID" - PROJECT_NAME = "HOPSWORKS_PROJECT_NAME" - HADOOP_USER_NAME = "HADOOP_USER_NAME" - HDFS_USER = "HDFS_USER" - - DOMAIN_CA_TRUSTSTORE_PEM = "DOMAIN_CA_TRUSTSTORE_PEM" - MATERIAL_DIRECTORY = "MATERIAL_DIRECTORY" - T_CERTIFICATE = "t_certificate" - K_CERTIFICATE = "k_certificate" - TRUSTSTORE_SUFFIX = "__tstore.jks" - KEYSTORE_SUFFIX = "__kstore.jks" - PEM_CA_CHAIN = "ca_chain.pem" - CERT_KEY_SUFFIX = "__cert.key" - MATERIAL_PWD = "material_passwd" - SECRETS_DIR = "SECRETS_DIR" - - def __init__(self, host, port): - """Initializes a client being run from a job/notebook directly on Hopsworks.""" - self._host = host - self._port = port - self._base_url = "http://" + self._host + ":" + str(self._port) - - trust_store_path = self._get_trust_store_path() - hostname_verification = ( - os.environ[self.REQUESTS_VERIFY] - if self.REQUESTS_VERIFY in os.environ - else "true" - ) - self._project_id = os.environ[self.PROJECT_ID] - self._project_name = self._project_name() - self._auth = auth.ApiKeyAuth(self._get_serving_api_key()) - self._verify = self._get_verify(hostname_verification, trust_store_path) - self._session = requests.session() - - self._connected = True - - def _project_name(self): - try: - return os.environ[self.PROJECT_NAME] - except KeyError: - pass - - hops_user = self._project_user() - hops_user_split = hops_user.split( - "__" - ) # project users have username project__user - project = hops_user_split[0] - return project - - def _project_user(self): - try: - hops_user = os.environ[self.HADOOP_USER_NAME] - except KeyError: - hops_user = os.environ[self.HDFS_USER] - return hops_user - - def _get_trust_store_path(self): - """Convert truststore from jks to pem and return the location""" - ca_chain_path = Path(self.PEM_CA_CHAIN) - if not ca_chain_path.exists(): - self._write_ca_chain(ca_chain_path) - return str(ca_chain_path) - - def _write_ca_chain(self, ca_chain_path): - """ - Converts JKS trustore file into PEM to be compatible with Python libraries - """ - keystore_pw = self._cert_key - keystore_ca_cert = self._convert_jks_to_pem( - self._get_jks_key_store_path(), keystore_pw - ) - truststore_ca_cert = self._convert_jks_to_pem( - self._get_jks_trust_store_path(), keystore_pw - ) - - with ca_chain_path.open("w") as f: - f.write(keystore_ca_cert + truststore_ca_cert) - - def _convert_jks_to_pem(self, jks_path, keystore_pw): - """ - Converts a keystore JKS that contains client private key, - client certificate and CA certificate that was used to - sign the certificate to PEM format and returns the CA certificate. - Args: - :jks_path: path to the JKS file - :pw: password for decrypting the JKS file - Returns: - strings: (ca_cert) - """ - # load the keystore and decrypt it with password - ks = jks.KeyStore.load(jks_path, keystore_pw, try_decrypt_keys=True) - ca_certs = "" - - # Convert CA Certificates into PEM format and append to string - for _alias, c in ks.certs.items(): - ca_certs = ca_certs + self._bytes_to_pem_str(c.cert, "CERTIFICATE") - return ca_certs - - def _bytes_to_pem_str(self, der_bytes, pem_type): - """ - Utility function for creating PEM files - - Args: - der_bytes: DER encoded bytes - pem_type: type of PEM, e.g Certificate, Private key, or RSA private key - - Returns: - PEM String for a DER-encoded certificate or private key - """ - pem_str = "" - pem_str = pem_str + "-----BEGIN {}-----".format(pem_type) + "\n" - pem_str = ( - pem_str - + "\r\n".join( - textwrap.wrap(base64.b64encode(der_bytes).decode("ascii"), 64) - ) - + "\n" - ) - pem_str = pem_str + "-----END {}-----".format(pem_type) + "\n" - return pem_str - - def _get_jks_trust_store_path(self): - """ - Get truststore location - - Returns: - truststore location - """ - t_certificate = Path(self.T_CERTIFICATE) - if t_certificate.exists(): - return str(t_certificate) - else: - username = os.environ[self.HADOOP_USER_NAME] - material_directory = Path(os.environ[self.MATERIAL_DIRECTORY]) - return str(material_directory.joinpath(username + self.TRUSTSTORE_SUFFIX)) - - def _get_jks_key_store_path(self): - """ - Get keystore location - - Returns: - keystore location - """ - k_certificate = Path(self.K_CERTIFICATE) - if k_certificate.exists(): - return str(k_certificate) - else: - username = os.environ[self.HADOOP_USER_NAME] - material_directory = Path(os.environ[self.MATERIAL_DIRECTORY]) - return str(material_directory.joinpath(username + self.KEYSTORE_SUFFIX)) - - def _get_cert_pw(self): - """ - Get keystore password from local container - - Returns: - Certificate password - """ - pwd_path = Path(self.MATERIAL_PWD) - if not pwd_path.exists(): - username = os.environ[self.HADOOP_USER_NAME] - material_directory = Path(os.environ[self.MATERIAL_DIRECTORY]) - pwd_path = material_directory.joinpath(username + self.CERT_KEY_SUFFIX) - - with pwd_path.open() as f: - return f.read() - - def _get_serving_api_key(self): - """Retrieve serving API key from environment variable.""" - if self.SERVING_API_KEY not in os.environ: - raise exceptions.InternalClientError("Serving API key not found") - return os.environ[self.SERVING_API_KEY] diff --git a/hsml/python/hsml/client/istio/utils/__init__.py b/hsml/python/hsml/client/istio/utils/__init__.py deleted file mode 100644 index ff8055b9b..000000000 --- a/hsml/python/hsml/client/istio/utils/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/hsml/python/hsml/client/istio/utils/infer_type.py b/hsml/python/hsml/client/istio/utils/infer_type.py deleted file mode 100644 index e1dd2ab92..000000000 --- a/hsml/python/hsml/client/istio/utils/infer_type.py +++ /dev/null @@ -1,812 +0,0 @@ -# Copyright 2023 The KServe Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This implementation has been borrowed from kserve/kserve repository -# https://github.com/kserve/kserve/blob/release-0.11/python/kserve/kserve/protocol/infer_type.py - -import struct -from typing import Dict, List, Optional - -import numpy -import numpy as np -import pandas as pd -from hsml.client.istio.grpc.errors import InvalidInput -from hsml.client.istio.grpc.proto.grpc_predict_v2_pb2 import ( - InferTensorContents, - ModelInferRequest, - ModelInferResponse, -) -from hsml.client.istio.utils.numpy_codec import from_np_dtype, to_np_dtype - - -GRPC_CONTENT_DATATYPE_MAPPINGS = { - "BOOL": "bool_contents", - "INT8": "int_contents", - "INT16": "int_contents", - "INT32": "int_contents", - "INT64": "int64_contents", - "UINT8": "uint_contents", - "UINT16": "uint_contents", - "UINT32": "uint_contents", - "UINT64": "uint64_contents", - "FP32": "fp32_contents", - "FP64": "fp64_contents", - "BYTES": "bytes_contents", -} - - -def raise_error(msg): - """ - Raise error with the provided message - """ - raise InferenceServerException(msg=msg) from None - - -def serialize_byte_tensor(input_tensor): - """ - Serializes a bytes tensor into a flat numpy array of length prepended - bytes. The numpy array should use dtype of np.object. For np.bytes, - numpy will remove trailing zeros at the end of byte sequence and because - of this it should be avoided. - - Parameters - ---------- - input_tensor : np.array - The bytes tensor to serialize. - - Returns - ------- - serialized_bytes_tensor : np.array - The 1-D numpy array of type uint8 containing the serialized bytes in row-major form. - - Raises - ------ - InferenceServerException - If unable to serialize the given tensor. - """ - - if input_tensor.size == 0: - return np.empty([0], dtype=np.object_) - - # If the input is a tensor of string/bytes objects, then must flatten those into - # a 1-dimensional array containing the 4-byte byte size followed by the - # actual element bytes. All elements are concatenated together in row-major - # order. - - if (input_tensor.dtype != np.object_) and (input_tensor.dtype.type != np.bytes_): - raise_error("cannot serialize bytes tensor: invalid datatype") - - flattened_ls = [] - # 'C' order is row-major. - for obj in np.nditer(input_tensor, flags=["refs_ok"], order="C"): - # If directly passing bytes to BYTES type, - # don't convert it to str as Python will encode the - # bytes which may distort the meaning - if input_tensor.dtype == np.object_: - if isinstance(obj.item(), bytes): - s = obj.item() - else: - s = str(obj.item()).encode("utf-8") - else: - s = obj.item() - flattened_ls.append(struct.pack(" np.ndarray: - dtype = to_np_dtype(self.datatype) - if dtype is None: - raise InvalidInput("invalid datatype in the input") - if self._raw_data is not None: - np_array = np.frombuffer(self._raw_data, dtype=dtype) - return np_array.reshape(self._shape) - else: - np_array = np.array(self._data, dtype=dtype) - return np_array.reshape(self._shape) - - def set_data_from_numpy(self, input_tensor, binary_data=True): - """Set the tensor data from the specified numpy array for - input associated with this object. - Parameters - ---------- - input_tensor : numpy array - The tensor data in numpy array format - binary_data : bool - Indicates whether to set data for the input in binary format - or explicit tensor within JSON. The default value is True, - which means the data will be delivered as binary data in the - HTTP body after the JSON object. - Raises - ------ - InferenceServerException - If failed to set data for the tensor. - """ - if not isinstance(input_tensor, (np.ndarray,)): - raise_error("input_tensor must be a numpy array") - - dtype = from_np_dtype(input_tensor.dtype) - if self._datatype != dtype: - raise_error( - "got unexpected datatype {} from numpy array, expected {}".format( - dtype, self._datatype - ) - ) - valid_shape = True - if len(self._shape) != len(input_tensor.shape): - valid_shape = False - else: - for i in range(len(self._shape)): - if self._shape[i] != input_tensor.shape[i]: - valid_shape = False - if not valid_shape: - raise_error( - "got unexpected numpy array shape [{}], expected [{}]".format( - str(input_tensor.shape)[1:-1], str(self._shape)[1:-1] - ) - ) - - if not binary_data: - self._parameters.pop("binary_data_size", None) - self._raw_data = None - if self._datatype == "BYTES": - self._data = [] - try: - if input_tensor.size > 0: - for obj in np.nditer( - input_tensor, flags=["refs_ok"], order="C" - ): - # We need to convert the object to string using utf-8, - # if we want to use the binary_data=False. JSON requires - # the input to be a UTF-8 string. - if input_tensor.dtype == np.object_: - if isinstance(obj.item(), bytes): - self._data.append(str(obj.item(), encoding="utf-8")) - else: - self._data.append(str(obj.item())) - else: - self._data.append(str(obj.item(), encoding="utf-8")) - except UnicodeDecodeError: - raise_error( - f'Failed to encode "{obj.item()}" using UTF-8. Please use binary_data=True, if' - " you want to pass a byte array." - ) - else: - self._data = [val.item() for val in input_tensor.flatten()] - else: - self._data = None - if self._datatype == "BYTES": - serialized_output = serialize_byte_tensor(input_tensor) - if serialized_output.size > 0: - self._raw_data = serialized_output.item() - else: - self._raw_data = b"" - else: - self._raw_data = input_tensor.tobytes() - self._parameters["binary_data_size"] = len(self._raw_data) - - -def get_content(datatype: str, data: InferTensorContents): - if datatype == "BOOL": - return list(data.bool_contents) - elif datatype in ["UINT8", "UINT16", "UINT32"]: - return list(data.uint_contents) - elif datatype == "UINT64": - return list(data.uint64_contents) - elif datatype in ["INT8", "INT16", "INT32"]: - return list(data.int_contents) - elif datatype == "INT64": - return list(data.int64_contents) - elif datatype == "FP32": - return list(data.fp32_contents) - elif datatype == "FP64": - return list(data.fp64_contents) - elif datatype == "BYTES": - return list(data.bytes_contents) - else: - raise InvalidInput("invalid content type") - - -class InferRequest: - """InferenceRequest Model - - $inference_request = - { - "id" : $string #optional, - "parameters" : $parameters #optional, - "inputs" : [ $request_input, ... ], - "outputs" : [ $request_output, ... ] #optional - } - """ - - id: Optional[str] - model_name: str - parameters: Optional[Dict] - inputs: List[InferInput] - from_grpc: bool - - def __init__( - self, - model_name: str, - infer_inputs: List[InferInput], - request_id=None, - raw_inputs=None, - from_grpc=False, - parameters=None, - ): - if parameters is None: - parameters = {} - self.id = request_id - self.model_name = model_name - self.inputs = infer_inputs - self.parameters = parameters - self.from_grpc = from_grpc - if raw_inputs: - for i, raw_input in enumerate(raw_inputs): - self.inputs[i]._raw_data = raw_input - - @classmethod - def from_grpc(cls, request: ModelInferRequest): - infer_inputs = [ - InferInput( - name=input_tensor.name, - shape=list(input_tensor.shape), - datatype=input_tensor.datatype, - data=get_content(input_tensor.datatype, input_tensor.contents), - parameters=input_tensor.parameters, - ) - for input_tensor in request.inputs - ] - return cls( - request_id=request.id, - model_name=request.model_name, - infer_inputs=infer_inputs, - raw_inputs=request.raw_input_contents, - from_grpc=True, - parameters=request.parameters, - ) - - def to_rest(self) -> Dict: - """Converts the InferRequest object to v2 REST InferenceRequest message""" - infer_inputs = [] - for infer_input in self.inputs: - infer_input_dict = { - "name": infer_input.name, - "shape": infer_input.shape, - "datatype": infer_input.datatype, - } - if isinstance(infer_input.data, numpy.ndarray): - infer_input.set_data_from_numpy(infer_input.data, binary_data=False) - infer_input_dict["data"] = infer_input.data - else: - infer_input_dict["data"] = infer_input.data - infer_inputs.append(infer_input_dict) - return {"id": self.id, "inputs": infer_inputs} - - def to_grpc(self) -> ModelInferRequest: - """Converts the InferRequest object to gRPC ModelInferRequest message""" - infer_inputs = [] - raw_input_contents = [] - for infer_input in self.inputs: - if isinstance(infer_input.data, numpy.ndarray): - infer_input.set_data_from_numpy(infer_input.data, binary_data=True) - infer_input_dict = { - "name": infer_input.name, - "shape": infer_input.shape, - "datatype": infer_input.datatype, - } - if infer_input._raw_data is not None: - raw_input_contents.append(infer_input._raw_data) - else: - if not isinstance(infer_input.data, List): - raise InvalidInput("input data is not a List") - infer_input_dict["contents"] = {} - data_key = GRPC_CONTENT_DATATYPE_MAPPINGS.get( - infer_input.datatype, None - ) - if data_key is not None: - infer_input._data = [ - bytes(val, "utf-8") if isinstance(val, str) else val - for val in infer_input.data - ] # str to byte conversion for grpc proto - infer_input_dict["contents"][data_key] = infer_input.data - else: - raise InvalidInput("invalid input datatype") - infer_inputs.append(infer_input_dict) - - return ModelInferRequest( - id=self.id, - model_name=self.model_name, - inputs=infer_inputs, - raw_input_contents=raw_input_contents, - ) - - def as_dataframe(self) -> pd.DataFrame: - """ - Decode the tensor inputs as pandas dataframe - """ - dfs = [] - for input in self.inputs: - input_data = input.data - if input.datatype == "BYTES": - input_data = [ - str(val, "utf-8") if isinstance(val, bytes) else val - for val in input.data - ] - dfs.append(pd.DataFrame(input_data, columns=[input.name])) - return pd.concat(dfs, axis=1) - - -class InferOutput: - def __init__(self, name, shape, datatype, data=None, parameters=None): - """An object of InferOutput class is used to describe - input tensor for an inference request. - Parameters - ---------- - name : str - The name of input whose data will be described by this object - shape : list - The shape of the associated input. - datatype : str - The datatype of the associated input. - data : Union[List, InferTensorContents] - The data of the REST/gRPC input. When data is not set, raw_data is used for gRPC for numpy array bytes. - parameters : dict - The additional server-specific parameters. - """ - if parameters is None: - parameters = {} - self._name = name - self._shape = shape - self._datatype = datatype - self._parameters = parameters - self._data = data - self._raw_data = None - - @property - def name(self): - """Get the name of input associated with this object. - Returns - ------- - str - The name of input - """ - return self._name - - @property - def datatype(self): - """Get the datatype of input associated with this object. - Returns - ------- - str - The datatype of input - """ - return self._datatype - - @property - def data(self): - """Get the data of InferOutput""" - return self._data - - @property - def shape(self): - """Get the shape of input associated with this object. - Returns - ------- - list - The shape of input - """ - return self._shape - - @property - def parameters(self): - """Get the parameters of input associated with this object. - Returns - ------- - dict - The key, value pair of string and InferParameter - """ - return self._parameters - - def set_shape(self, shape): - """Set the shape of input. - Parameters - ---------- - shape : list - The shape of the associated input. - """ - self._shape = shape - - def as_numpy(self) -> numpy.ndarray: - """ - Decode the tensor data as numpy array - """ - dtype = to_np_dtype(self.datatype) - if dtype is None: - raise InvalidInput("invalid datatype in the input") - if self._raw_data is not None: - np_array = np.frombuffer(self._raw_data, dtype=dtype) - return np_array.reshape(self._shape) - else: - np_array = np.array(self._data, dtype=dtype) - return np_array.reshape(self._shape) - - def set_data_from_numpy(self, input_tensor, binary_data=True): - """Set the tensor data from the specified numpy array for - input associated with this object. - Parameters - ---------- - input_tensor : numpy array - The tensor data in numpy array format - binary_data : bool - Indicates whether to set data for the input in binary format - or explicit tensor within JSON. The default value is True, - which means the data will be delivered as binary data in the - HTTP body after the JSON object. - Raises - ------ - InferenceServerException - If failed to set data for the tensor. - """ - if not isinstance(input_tensor, (np.ndarray,)): - raise_error("input_tensor must be a numpy array") - - dtype = from_np_dtype(input_tensor.dtype) - if self._datatype != dtype: - raise_error( - "got unexpected datatype {} from numpy array, expected {}".format( - dtype, self._datatype - ) - ) - valid_shape = True - if len(self._shape) != len(input_tensor.shape): - valid_shape = False - else: - for i in range(len(self._shape)): - if self._shape[i] != input_tensor.shape[i]: - valid_shape = False - if not valid_shape: - raise_error( - "got unexpected numpy array shape [{}], expected [{}]".format( - str(input_tensor.shape)[1:-1], str(self._shape)[1:-1] - ) - ) - - if not binary_data: - self._parameters.pop("binary_data_size", None) - self._raw_data = None - if self._datatype == "BYTES": - self._data = [] - try: - if input_tensor.size > 0: - for obj in np.nditer( - input_tensor, flags=["refs_ok"], order="C" - ): - # We need to convert the object to string using utf-8, - # if we want to use the binary_data=False. JSON requires - # the input to be a UTF-8 string. - if input_tensor.dtype == np.object_: - if isinstance(obj.item(), bytes): - self._data.append(str(obj.item(), encoding="utf-8")) - else: - self._data.append(str(obj.item())) - else: - self._data.append(str(obj.item(), encoding="utf-8")) - except UnicodeDecodeError: - raise_error( - f'Failed to encode "{obj.item()}" using UTF-8. Please use binary_data=True, if' - " you want to pass a byte array." - ) - else: - self._data = [val.item() for val in input_tensor.flatten()] - else: - self._data = None - if self._datatype == "BYTES": - serialized_output = serialize_byte_tensor(input_tensor) - if serialized_output.size > 0: - self._raw_data = serialized_output.item() - else: - self._raw_data = b"" - else: - self._raw_data = input_tensor.tobytes() - self._parameters["binary_data_size"] = len(self._raw_data) - - -class InferResponse: - """InferenceResponse - - $inference_response = - { - "model_name" : $string, - "model_version" : $string #optional, - "id" : $string, - "parameters" : $parameters #optional, - "outputs" : [ $response_output, ... ] - } - """ - - id: str - model_name: str - parameters: Optional[Dict] - outputs: List[InferOutput] - from_grpc: bool - - def __init__( - self, - response_id: str, - model_name: str, - infer_outputs: List[InferOutput], - raw_outputs=None, - from_grpc=False, - parameters=None, - ): - if parameters is None: - parameters = {} - self.id = response_id - self.model_name = model_name - self.outputs = infer_outputs - self.parameters = parameters - self.from_grpc = from_grpc - if raw_outputs: - for i, raw_output in enumerate(raw_outputs): - self.outputs[i]._raw_data = raw_output - - @classmethod - def from_grpc(cls, response: ModelInferResponse) -> "InferResponse": - infer_outputs = [ - InferOutput( - name=output.name, - shape=list(output.shape), - datatype=output.datatype, - data=get_content(output.datatype, output.contents), - parameters=output.parameters, - ) - for output in response.outputs - ] - return cls( - model_name=response.model_name, - response_id=response.id, - parameters=response.parameters, - infer_outputs=infer_outputs, - raw_outputs=response.raw_output_contents, - from_grpc=True, - ) - - @classmethod - def from_rest(cls, model_name: str, response: Dict) -> "InferResponse": - infer_outputs = [ - InferOutput( - name=output["name"], - shape=list(output["shape"]), - datatype=output["datatype"], - data=output["data"], - parameters=output.get("parameters", {}), - ) - for output in response["outputs"] - ] - return cls( - model_name=model_name, - response_id=response.get("id", None), - parameters=response.get("parameters", {}), - infer_outputs=infer_outputs, - ) - - def to_rest(self) -> Dict: - """Converts the InferResponse object to v2 REST InferenceRequest message""" - infer_outputs = [] - for infer_output in self.outputs: - infer_output_dict = { - "name": infer_output.name, - "shape": infer_output.shape, - "datatype": infer_output.datatype, - } - if isinstance(infer_output.data, numpy.ndarray): - infer_output.set_data_from_numpy(infer_output.data, binary_data=False) - infer_output_dict["data"] = infer_output.data - elif isinstance(infer_output._raw_data, bytes): - infer_output_dict["data"] = infer_output.as_numpy().tolist() - else: - infer_output_dict["data"] = infer_output.data - infer_outputs.append(infer_output_dict) - res = {"id": self.id, "model_name": self.model_name, "outputs": infer_outputs} - return res - - def to_grpc(self) -> ModelInferResponse: - """Converts the InferResponse object to gRPC ModelInferRequest message""" - infer_outputs = [] - raw_output_contents = [] - for infer_output in self.outputs: - if isinstance(infer_output.data, numpy.ndarray): - infer_output.set_data_from_numpy(infer_output.data, binary_data=True) - infer_output_dict = { - "name": infer_output.name, - "shape": infer_output.shape, - "datatype": infer_output.datatype, - } - if infer_output._raw_data is not None: - raw_output_contents.append(infer_output._raw_data) - else: - if not isinstance(infer_output.data, List): - raise InvalidInput("output data is not a List") - infer_output_dict["contents"] = {} - data_key = GRPC_CONTENT_DATATYPE_MAPPINGS.get( - infer_output.datatype, None - ) - if data_key is not None: - infer_output._data = [ - bytes(val, "utf-8") if isinstance(val, str) else val - for val in infer_output.data - ] # str to byte conversion for grpc proto - infer_output_dict["contents"][data_key] = infer_output.data - else: - raise InvalidInput("to_grpc: invalid output datatype") - infer_outputs.append(infer_output_dict) - - return ModelInferResponse( - id=self.id, - model_name=self.model_name, - outputs=infer_outputs, - raw_output_contents=raw_output_contents, - ) diff --git a/hsml/python/hsml/client/istio/utils/numpy_codec.py b/hsml/python/hsml/client/istio/utils/numpy_codec.py deleted file mode 100644 index 3c6ecb606..000000000 --- a/hsml/python/hsml/client/istio/utils/numpy_codec.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright 2021 The KServe Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This implementation has been borrowed from kserve/kserve repository -# https://github.com/kserve/kserve/blob/release-0.11/python/kserve/kserve/utils/numpy_codec.py - -import numpy as np - - -def to_np_dtype(dtype): - dtype_map = { - "BOOL": bool, - "INT8": np.int8, - "INT16": np.int16, - "INT32": np.int32, - "INT64": np.int64, - "UINT8": np.uint8, - "UINT16": np.uint16, - "UINT32": np.uint32, - "UINT64": np.uint64, - "FP16": np.float16, - "FP32": np.float32, - "FP64": np.float64, - "BYTES": np.object_, - } - return dtype_map.get(dtype, None) - - -def from_np_dtype(np_dtype): - if np_dtype == bool: - return "BOOL" - elif np_dtype == np.int8: - return "INT8" - elif np_dtype == np.int16: - return "INT16" - elif np_dtype == np.int32: - return "INT32" - elif np_dtype == np.int64: - return "INT64" - elif np_dtype == np.uint8: - return "UINT8" - elif np_dtype == np.uint16: - return "UINT16" - elif np_dtype == np.uint32: - return "UINT32" - elif np_dtype == np.uint64: - return "UINT64" - elif np_dtype == np.float16: - return "FP16" - elif np_dtype == np.float32: - return "FP32" - elif np_dtype == np.float64: - return "FP64" - elif np_dtype == np.object_ or np_dtype.type == np.bytes_: - return "BYTES" - return None diff --git a/hsml/python/hsml/connection.py b/hsml/python/hsml/connection.py deleted file mode 100644 index d9d61b9e8..000000000 --- a/hsml/python/hsml/connection.py +++ /dev/null @@ -1,294 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os - -from hsml import client -from hsml.core import model_api, model_registry_api, model_serving_api -from hsml.decorators import connected, not_connected -from requests.exceptions import ConnectionError - - -CONNECTION_SAAS_HOSTNAME = "c.app.hopsworks.ai" - -HOPSWORKS_PORT_DEFAULT = 443 -HOSTNAME_VERIFICATION_DEFAULT = True - - -class Connection: - """A Hopsworks Model Management connection object. - - The connection is project specific, so you can access the project's own Model Registry and Model Serving. - - This class provides convenience classmethods accessible from the `hsml`-module: - - !!! example "Connection factory" - For convenience, `hsml` provides a factory method, accessible from the top level - module, so you don't have to import the `Connection` class manually: - - ```python - import hsml - conn = hsml.connection() - ``` - - !!! hint "Save API Key as File" - To get started quickly, you can simply create a file with the previously - created Hopsworks API Key and place it on the environment from which you - wish to connect to Hopsworks. - - You can then connect by simply passing the path to the key file when - instantiating a connection: - - ```python hl_lines="6" - import hsml - conn = hsml.connection( - 'my_instance', # DNS of your Hopsworks instance - 443, # Port to reach your Hopsworks instance, defaults to 443 - 'my_project', # Name of your Hopsworks project - api_key_file='modelregistry.key', # The file containing the API key generated above - hostname_verification=True) # Disable for self-signed certificates - ) - mr = conn.get_model_registry() # Get the project's default model registry - ms = conn.get_model_serving() # Uses the previous model registry - ``` - - Clients in external clusters need to connect to the Hopsworks Model Registry and Model Serving using an - API key. The API key is generated inside the Hopsworks platform, and requires at - least the "project", "modelregistry", "dataset.create", "dataset.view", "dataset.delete", "serving" and "kafka" scopes - to be able to access a model registry and its model serving. - For more information, see the [integration guides](../../integrations/overview.md). - - # Arguments - host: The hostname of the Hopsworks instance, defaults to `None`. - port: The port on which the Hopsworks instance can be reached, - defaults to `443`. - project: The name of the project to connect to. When running on Hopsworks, this - defaults to the project from where the client is run from. - Defaults to `None`. - hostname_verification: Whether or not to verify Hopsworks certificate, defaults - to `True`. - trust_store_path: Path on the file system containing the Hopsworks certificates, - defaults to `None`. - api_key_file: Path to a file containing the API Key. - api_key_value: API Key as string, if provided, however, this should be used with care, - especially if the used notebook or job script is accessible by multiple parties. Defaults to `None`. - - # Returns - `Connection`. Connection handle to perform operations on a Hopsworks project. - """ - - def __init__( - self, - host: str = None, - port: int = HOPSWORKS_PORT_DEFAULT, - project: str = None, - hostname_verification: bool = HOSTNAME_VERIFICATION_DEFAULT, - trust_store_path: str = None, - api_key_file: str = None, - api_key_value: str = None, - ): - self._host = host - self._port = port - self._project = project - self._hostname_verification = hostname_verification - self._trust_store_path = trust_store_path - self._api_key_file = api_key_file - self._api_key_value = api_key_value - self._connected = False - self._model_api = model_api.ModelApi() - self._model_registry_api = model_registry_api.ModelRegistryApi() - self._model_serving_api = model_serving_api.ModelServingApi() - - self.connect() - - @connected - def get_model_registry(self, project: str = None): - """Get a reference to a model registry to perform operations on, defaulting to the project's default model registry. - Shared model registries can be retrieved by passing the `project` argument. - - # Arguments - project: The name of the project that owns the shared model registry, - the model registry must be shared with the project the connection was established for, defaults to `None`. - # Returns - `ModelRegistry`. A model registry handle object to perform operations on. - """ - return self._model_registry_api.get(project) - - @connected - def get_model_serving(self): - """Get a reference to model serving to perform operations on. Model serving operates on top of a model registry, defaulting to the project's default model registry. - - !!! example - ```python - - import hopsworks - - project = hopsworks.login() - - ms = project.get_model_serving() - ``` - - # Returns - `ModelServing`. A model serving handle object to perform operations on. - """ - return self._model_serving_api.get() - - @not_connected - def connect(self): - """Instantiate the connection. - - Creating a `Connection` object implicitly calls this method for you to - instantiate the connection. However, it is possible to close the connection - gracefully with the `close()` method, in order to clean up materialized - certificates. This might be desired when working on external environments. - Subsequently you can call `connect()` again to reopen the connection. - - !!! example - ```python - import hsml - conn = hsml.connection() - conn.close() - conn.connect() - ``` - """ - self._connected = True - try: - # init client - if client.hopsworks.base.Client.REST_ENDPOINT not in os.environ: - client.init( - "external", - self._host, - self._port, - self._project, - self._hostname_verification, - self._trust_store_path, - self._api_key_file, - self._api_key_value, - ) - else: - client.init("internal") - - self._model_api = model_api.ModelApi() - self._model_serving_api.load_default_configuration() # istio client, default resources,... - except (TypeError, ConnectionError): - self._connected = False - raise - print("Connected. Call `.close()` to terminate connection gracefully.") - - def close(self): - """Close a connection gracefully. - - This will clean up any materialized certificates on the local file system of - external environments. - - Usage is recommended but optional. - """ - client.stop() - self._model_api = None - self._connected = False - print("Connection closed.") - - @classmethod - def connection( - cls, - host: str = None, - port: int = HOPSWORKS_PORT_DEFAULT, - project: str = None, - hostname_verification: bool = HOSTNAME_VERIFICATION_DEFAULT, - trust_store_path: str = None, - api_key_file: str = None, - api_key_value: str = None, - ): - """Connection factory method, accessible through `hsml.connection()`.""" - return cls( - host, - port, - project, - hostname_verification, - trust_store_path, - api_key_file, - api_key_value, - ) - - @property - def host(self): - return self._host - - @host.setter - @not_connected - def host(self, host): - self._host = host - - @property - def port(self): - return self._port - - @port.setter - @not_connected - def port(self, port): - self._port = port - - @property - def project(self): - return self._project - - @project.setter - @not_connected - def project(self, project): - self._project = project - - @property - def hostname_verification(self): - return self._hostname_verification - - @hostname_verification.setter - @not_connected - def hostname_verification(self, hostname_verification): - self._hostname_verification = hostname_verification - - @property - def trust_store_path(self): - return self._trust_store_path - - @trust_store_path.setter - @not_connected - def trust_store_path(self, trust_store_path): - self._trust_store_path = trust_store_path - - @property - def api_key_file(self): - return self._api_key_file - - @property - def api_key_value(self): - return self._api_key_value - - @api_key_file.setter - @not_connected - def api_key_file(self, api_key_file): - self._api_key_file = api_key_file - - @api_key_value.setter - @not_connected - def api_key_value(self, api_key_value): - self._api_key_value = api_key_value - - def __enter__(self): - self.connect() - return self - - def __exit__(self, type, value, traceback): - self.close() diff --git a/hsml/python/hsml/constants.py b/hsml/python/hsml/constants.py deleted file mode 100644 index 6ec99ff3c..000000000 --- a/hsml/python/hsml/constants.py +++ /dev/null @@ -1,119 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - -DEFAULT = dict() # used as default parameter for a class object - - -class MODEL: - FRAMEWORK_TENSORFLOW = "TENSORFLOW" - FRAMEWORK_TORCH = "TORCH" - FRAMEWORK_PYTHON = "PYTHON" - FRAMEWORK_SKLEARN = "SKLEARN" - - -class MODEL_REGISTRY: - HOPSFS_MOUNT_PREFIX = "/hopsfs/" - - -class MODEL_SERVING: - MODELS_DATASET = "Models" - - -class ARTIFACT_VERSION: - CREATE = "CREATE" - - -class RESOURCES: - MIN_NUM_INSTANCES = 1 # disable scale-to-zero by default - # default values, not hard limits - MIN_CORES = 0.2 - MIN_MEMORY = 32 - MIN_GPUS = 0 - MAX_CORES = 2 - MAX_MEMORY = 1024 - MAX_GPUS = 0 - - -class KAFKA_TOPIC: - NONE = "NONE" - CREATE = "CREATE" - NUM_REPLICAS = 1 - NUM_PARTITIONS = 1 - - -class INFERENCE_LOGGER: - MODE_NONE = "NONE" - MODE_ALL = "ALL" - MODE_MODEL_INPUTS = "MODEL_INPUTS" - MODE_PREDICTIONS = "PREDICTIONS" - - -class INFERENCE_BATCHER: - ENABLED = False - - -class DEPLOYMENT: - ACTION_START = "START" - ACTION_STOP = "STOP" - - -class PREDICTOR: - # model server - MODEL_SERVER_PYTHON = "PYTHON" - MODEL_SERVER_TF_SERVING = "TENSORFLOW_SERVING" - # serving tool - SERVING_TOOL_DEFAULT = "DEFAULT" - SERVING_TOOL_KSERVE = "KSERVE" - - -class PREDICTOR_STATE: - # status - STATUS_CREATING = "Creating" - STATUS_CREATED = "Created" - STATUS_STARTING = "Starting" - STATUS_FAILED = "Failed" - STATUS_RUNNING = "Running" - STATUS_IDLE = "Idle" - STATUS_UPDATING = "Updating" - STATUS_STOPPING = "Stopping" - STATUS_STOPPED = "Stopped" - # condition type - CONDITION_TYPE_STOPPED = "STOPPED" - CONDITION_TYPE_SCHEDULED = "SCHEDULED" - CONDITION_TYPE_INITIALIZED = "INITIALIZED" - CONDITION_TYPE_STARTED = "STARTED" - CONDITION_TYPE_READY = "READY" - - -class INFERENCE_ENDPOINTS: - # endpoint type - ENDPOINT_TYPE_NODE = "NODE" - ENDPOINT_TYPE_KUBE_CLUSTER = "KUBE_CLUSTER" - ENDPOINT_TYPE_LOAD_BALANCER = "LOAD_BALANCER" - # port name - PORT_NAME_HTTP = "HTTP" - PORT_NAME_HTTPS = "HTTPS" - PORT_NAME_STATUS_PORT = "STATUS" - PORT_NAME_TLS = "TLS" - # protocol - API_PROTOCOL_REST = "REST" - API_PROTOCOL_GRPC = "GRPC" - - -class DEPLOYABLE_COMPONENT: - PREDICTOR = "predictor" - TRANSFORMER = "transformer" diff --git a/hsml/python/hsml/core/__init__.py b/hsml/python/hsml/core/__init__.py deleted file mode 100644 index ff0a6f046..000000000 --- a/hsml/python/hsml/core/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/hsml/python/hsml/core/dataset_api.py b/hsml/python/hsml/core/dataset_api.py deleted file mode 100644 index dc6301b48..000000000 --- a/hsml/python/hsml/core/dataset_api.py +++ /dev/null @@ -1,582 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import copy -import json -import math -import os -import time -from concurrent.futures import ThreadPoolExecutor, wait - -from hsml import client, tag -from hsml.client.exceptions import RestAPIError -from tqdm.auto import tqdm - - -class Chunk: - def __init__(self, content, number, status): - self.content = content - self.number = number - self.status = status - self.retries = 0 - - -class DatasetApi: - def __init__(self): - pass - - DEFAULT_UPLOAD_FLOW_CHUNK_SIZE = 10 - DEFAULT_UPLOAD_SIMULTANEOUS_UPLOADS = 3 - DEFAULT_UPLOAD_MAX_CHUNK_RETRIES = 1 - - DEFAULT_DOWNLOAD_FLOW_CHUNK_SIZE = 1_048_576 - FLOW_PERMANENT_ERRORS = [404, 413, 415, 500, 501] - - def upload( - self, - local_path: str, - upload_path: str, - overwrite: bool = False, - chunk_size=DEFAULT_UPLOAD_FLOW_CHUNK_SIZE, - simultaneous_uploads=DEFAULT_UPLOAD_SIMULTANEOUS_UPLOADS, - max_chunk_retries=DEFAULT_UPLOAD_MAX_CHUNK_RETRIES, - chunk_retry_interval=1, - ): - """Upload a file to the Hopsworks filesystem. - - ```python - - conn = hsml.connection(project="my-project") - - dataset_api = conn.get_dataset_api() - - uploaded_file_path = dataset_api.upload("my_local_file.txt", "Resources") - - ``` - # Arguments - local_path: local path to file to upload - upload_path: path to directory where to upload the file in Hopsworks Filesystem - overwrite: overwrite file if exists - chunk_size: upload chunk size in megabytes. Default 10 MB - simultaneous_uploads: number of simultaneous chunks to upload. Default 3 - max_chunk_retries: maximum retry for a chunk. Default is 1 - chunk_retry_interval: chunk retry interval in seconds. Default is 1sec - # Returns - `str`: Path to uploaded file - # Raises - `RestAPIError`: If unable to upload the file - """ - # local path could be absolute or relative, - if not os.path.isabs(local_path) and os.path.exists( - os.path.join(os.getcwd(), local_path) - ): - local_path = os.path.join(os.getcwd(), local_path) - - file_size = os.path.getsize(local_path) - - _, file_name = os.path.split(local_path) - - destination_path = upload_path + "/" + file_name - chunk_size_bytes = chunk_size * 1024 * 1024 - - if self.path_exists(destination_path): - if overwrite: - self.rm(destination_path) - else: - raise Exception( - "{} already exists, set overwrite=True to overwrite it".format( - local_path - ) - ) - - num_chunks = math.ceil(file_size / chunk_size_bytes) - - base_params = self._get_flow_base_params( - file_name, num_chunks, file_size, chunk_size_bytes - ) - - chunk_number = 1 - with open(local_path, "rb") as f: - pbar = None - try: - pbar = tqdm( - total=file_size, - bar_format="{desc}: {percentage:.3f}%|{bar}| {n_fmt}/{total_fmt} elapsed<{elapsed} remaining<{remaining}", - desc="Uploading", - ) - except Exception: - self._log.exception("Failed to initialize progress bar.") - self._log.info("Starting upload") - with ThreadPoolExecutor(simultaneous_uploads) as executor: - while True: - chunks = [] - for _ in range(simultaneous_uploads): - chunk = f.read(chunk_size_bytes) - if not chunk: - break - chunks.append(Chunk(chunk, chunk_number, "pending")) - chunk_number += 1 - - if len(chunks) == 0: - break - - # upload each chunk and update pbar - futures = [ - executor.submit( - self._upload_chunk, - base_params, - upload_path, - file_name, - chunk, - pbar, - max_chunk_retries, - chunk_retry_interval, - ) - for chunk in chunks - ] - # wait for all upload tasks to complete - _, _ = wait(futures) - try: - _ = [future.result() for future in futures] - except Exception as e: - if pbar is not None: - pbar.close() - raise e - - if pbar is not None: - pbar.close() - else: - self._log.info("Upload finished") - - return upload_path + "/" + os.path.basename(local_path) - - def _upload_chunk( - self, - base_params, - upload_path, - file_name, - chunk: Chunk, - pbar, - max_chunk_retries, - chunk_retry_interval, - ): - query_params = copy.copy(base_params) - query_params["flowCurrentChunkSize"] = len(chunk.content) - query_params["flowChunkNumber"] = chunk.number - - chunk.status = "uploading" - while True: - try: - self._upload_request( - query_params, upload_path, file_name, chunk.content - ) - break - except RestAPIError as re: - chunk.retries += 1 - if ( - re.response.status_code in DatasetApi.FLOW_PERMANENT_ERRORS - or chunk.retries > max_chunk_retries - ): - chunk.status = "failed" - raise re - time.sleep(chunk_retry_interval) - continue - - chunk.status = "uploaded" - - if pbar is not None: - pbar.update(query_params["flowCurrentChunkSize"]) - - def _get_flow_base_params(self, file_name, num_chunks, size, chunk_size): - return { - "templateId": -1, - "flowChunkSize": chunk_size, - "flowTotalSize": size, - "flowIdentifier": str(size) + "_" + file_name, - "flowFilename": file_name, - "flowRelativePath": file_name, - "flowTotalChunks": num_chunks, - } - - def _upload_request(self, params, path, file_name, chunk): - _client = client.get_instance() - path_params = ["project", _client._project_id, "dataset", "upload", path] - - # Flow configuration params are sent as form data - _client._send_request( - "POST", path_params, data=params, files={"file": (file_name, chunk)} - ) - - def download(self, path, local_path): - """Download file/directory on a path in datasets. - :param path: path to download - :type path: str - :param local_path: path to download in datasets - :type local_path: str - """ - - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "dataset", - "download", - "with_auth", - path, - ] - query_params = {"type": "DATASET"} - - with _client._send_request( - "GET", path_params, query_params=query_params, stream=True - ) as response: - with open(local_path, "wb") as f: - downloaded = 0 - # if not response.headers.get("Content-Length"), file is still downloading - for chunk in response.iter_content( - chunk_size=self.DEFAULT_DOWNLOAD_FLOW_CHUNK_SIZE - ): - f.write(chunk) - downloaded += len(chunk) - - def get(self, remote_path): - """Get metadata about a path in datasets. - - :param remote_path: path to check - :type remote_path: str - :return: dataset metadata - :rtype: dict - """ - _client = client.get_instance() - path_params = ["project", _client._project_id, "dataset", remote_path] - headers = {"content-type": "application/json"} - return _client._send_request("GET", path_params, headers=headers) - - def path_exists(self, remote_path): - """Check if a path exists in datasets. - - :param remote_path: path to check - :type remote_path: str - :return: boolean whether path exists - :rtype: bool - """ - try: - self.get(remote_path) - return True - except RestAPIError: - return False - - def list(self, remote_path, sort_by=None, limit=1000): - """List all files in a directory in datasets. - - :param remote_path: path to list - :type remote_path: str - :param sort_by: sort string - :type sort_by: str - :param limit: max number of returned files - :type limit: int - """ - _client = client.get_instance() - path_params = ["project", _client._project_id, "dataset", remote_path] - query_params = {"action": "listing", "sort_by": sort_by, "limit": limit} - headers = {"content-type": "application/json"} - return _client._send_request( - "GET", path_params, headers=headers, query_params=query_params - ) - - def chmod(self, remote_path, permissions): - """Chmod operation on file or directory in datasets. - - :param remote_path: path to chmod - :type remote_path: str - :param permissions: permissions string, for example u+x - :type permissions: str - """ - _client = client.get_instance() - path_params = ["project", _client._project_id, "dataset", remote_path] - headers = {"content-type": "application/json"} - query_params = {"action": "PERMISSION", "permissions": permissions} - return _client._send_request( - "PUT", path_params, headers=headers, query_params=query_params - ) - - def mkdir(self, remote_path): - """Path to create in datasets. - - :param remote_path: path to create - :type remote_path: str - """ - _client = client.get_instance() - path_params = ["project", _client._project_id, "dataset", remote_path] - query_params = { - "action": "create", - "searchable": "true", - "generate_readme": "false", - "type": "DATASET", - } - headers = {"content-type": "application/json"} - return _client._send_request( - "POST", path_params, headers=headers, query_params=query_params - ) - - def rm(self, remote_path): - """Remove a path in datasets. - - :param remote_path: path to remove - :type remote_path: str - """ - _client = client.get_instance() - path_params = ["project", _client._project_id, "dataset", remote_path] - return _client._send_request("DELETE", path_params) - - def _archive( - self, - remote_path, - destination_path=None, - block=False, - timeout=120, - action="unzip", - ): - """Internal (de)compression logic. - - :param remote_path: path to file or directory to unzip - :type remote_path: str - :param destination_path: path to upload the zip - :type destination_path: str - :param block: if the operation should be blocking until complete - :type block: bool - :param timeout: timeout if the operation is blocking - :type timeout: int - :param action: zip or unzip - :type action: str - """ - - _client = client.get_instance() - path_params = ["project", _client._project_id, "dataset", remote_path] - - query_params = {"action": action} - - if destination_path is not None: - query_params["destination_path"] = destination_path - query_params["destination_type"] = "DATASET" - - headers = {"content-type": "application/json"} - - _client._send_request( - "POST", path_params, headers=headers, query_params=query_params - ) - - if block is True: - # Wait for zip file to appear. When it does, check that parent dir zipState is not set to CHOWNING - count = 0 - while count < timeout: - if action == "zip": - zip_path = remote_path + ".zip" - # Get the status of the zipped file - if destination_path is None: - zip_exists = self.path_exists(zip_path) - else: - zip_exists = self.path_exists( - destination_path + "/" + os.path.split(zip_path)[1] - ) - # Get the zipState of the directory being zipped - dir_status = self.get(remote_path) - zip_state = ( - dir_status["zipState"] if "zipState" in dir_status else None - ) - if zip_exists and zip_state == "NONE": - return - else: - time.sleep(1) - elif action == "unzip": - # Get the status of the unzipped dir - unzipped_dir_exists = self.path_exists( - remote_path[: remote_path.index(".")] - ) - # Get the zipState of the zip being extracted - dir_status = self.get(remote_path) - zip_state = ( - dir_status["zipState"] if "zipState" in dir_status else None - ) - if unzipped_dir_exists and zip_state == "NONE": - return - else: - time.sleep(1) - count += 1 - raise Exception( - "Timeout of {} seconds exceeded while {} {}.".format( - timeout, action, remote_path - ) - ) - - def unzip(self, remote_path, block=False, timeout=120): - """Unzip an archive in the dataset. - - :param remote_path: path to file or directory to unzip - :type remote_path: str - :param block: if the operation should be blocking until complete - :type block: bool - :param timeout: timeout if the operation is blocking - :type timeout: int - """ - self._archive(remote_path, block=block, timeout=timeout, action="unzip") - - def zip(self, remote_path, destination_path=None, block=False, timeout=120): - """Zip a file or directory in the dataset. - - :param remote_path: path to file or directory to zip - :type remote_path: str - :param destination_path: path to upload the zip - :type destination_path: str - :param block: if the operation should be blocking until complete - :type block: bool - :param timeout: timeout if the operation is blocking - :type timeout: int - """ - self._archive( - remote_path, - destination_path=destination_path, - block=block, - timeout=timeout, - action="zip", - ) - - def move(self, source_path, destination_path): - """Move a file or directory in the dataset. - - A tag consists of a name/value pair. Tag names are unique identifiers. - The value of a tag can be any valid json - primitives, arrays or json objects. - - :param source_path: path to file or directory to move - :type source_path: str - :param destination_path: destination path - :type destination_path: str - """ - - _client = client.get_instance() - path_params = ["project", _client._project_id, "dataset", source_path] - - query_params = {"action": "move", "destination_path": destination_path} - headers = {"content-type": "application/json"} - - _client._send_request( - "POST", path_params, headers=headers, query_params=query_params - ) - - def copy(self, source_path, destination_path): - """Copy a file or directory in the dataset. - - A tag consists of a name/value pair. Tag names are unique identifiers. - The value of a tag can be any valid json - primitives, arrays or json objects. - - :param source_path: path to file or directory to copy - :type source_path: str - :param destination_path: destination path - :type destination_path: str - """ - - _client = client.get_instance() - path_params = ["project", _client._project_id, "dataset", source_path] - - query_params = {"action": "copy", "destination_path": destination_path} - headers = {"content-type": "application/json"} - - _client._send_request( - "POST", path_params, headers=headers, query_params=query_params - ) - - def add(self, path, name, value): - """Attach a name/value tag to a model. - - A tag consists of a name/value pair. Tag names are unique identifiers. - The value of a tag can be any valid json - primitives, arrays or json objects. - - :param path: path to add the tag - :type path: str - :param name: name of the tag to be added - :type name: str - :param value: value of the tag to be added - :type value: str - """ - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "dataset", - "tags", - "schema", - name, - path, - ] - headers = {"content-type": "application/json"} - json_value = json.dumps(value) - _client._send_request("PUT", path_params, headers=headers, data=json_value) - - def delete(self, path, name): - """Delete a tag. - - Tag names are unique identifiers. - - :param path: path to delete the tags - :type path: str - :param name: name of the tag to be removed - :type name: str - """ - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "dataset", - "tags", - "schema", - name, - path, - ] - _client._send_request("DELETE", path_params) - - def get_tags(self, path, name: str = None): - """Get the tags. - - Gets all tags if no tag name is specified. - - :param path: path to get the tags - :type path: str - :param name: tag name - :type name: str - :return: dict of tag name/values - :rtype: dict - """ - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "dataset", - "tags", - ] - - if name is not None: - path_params.append("schema") - path_params.append(name) - else: - path_params.append("all") - - path_params.append(path) - - return { - tag._name: json.loads(tag._value) - for tag in tag.Tag.from_response_json( - _client._send_request("GET", path_params) - ) - } diff --git a/hsml/python/hsml/core/explicit_provenance.py b/hsml/python/hsml/core/explicit_provenance.py deleted file mode 100644 index ea6ce9bd8..000000000 --- a/hsml/python/hsml/core/explicit_provenance.py +++ /dev/null @@ -1,368 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import json -import logging -from enum import Enum -from typing import Set - -import humps - - -_logger = logging.getLogger(__name__) - - -class Artifact: - class MetaType(Enum): - DELETED = 1 - INACCESSIBLE = 2 - FAULTY = 3 - NOT_SUPPORTED = 4 - - def __init__( - self, - model_registry_id, - name, - version, - type, - meta_type, - href=None, - exception_cause=None, - **kwargs, - ): - self._model_registry_id = model_registry_id - self._name = name - self._version = version - self._type = type - self._meta_type = meta_type - self._href = href - self._exception_cause = exception_cause - - @property - def model_registry_id(self): - """Id of the model registry in which the artifact is located.""" - return self._model_registry_id - - @property - def name(self): - """Name of the artifact.""" - return self._name - - @property - def version(self): - """Version of the artifact""" - return self._version - - def __str__(self): - return { - "model_registry_id": self._model_registry_id, - "name": self._name, - "version": self._version, - } - - def __repr__(self): - return ( - f"Artifact({self._model_registry_id!r}, {self._name!r}, " - f"{self._version!r}, {self._type!r}, {self._meta_type!r}, " - f"{self._href!r}, {self._exception_cause!r})" - ) - - @staticmethod - def from_response_json(json_dict: dict): - link_json = humps.decamelize(json_dict) - href = None - exception_cause = None - if link_json.get("exception_cause") is not None: - meta_type = Artifact.MetaType.FAULTY - exception_cause = link_json.get("exception_cause") - elif bool(link_json["deleted"]): - meta_type = Artifact.MetaType.DELETED - elif not bool(link_json["accessible"]): - meta_type = Artifact.MetaType.INACCESSIBLE - href = link_json["artifact"]["href"] - else: - meta_type = Artifact.MetaType.NOT_SUPPORTED - href = link_json["artifact"]["href"] - return Artifact( - link_json["artifact"]["project"], - link_json["artifact"]["name"], - link_json["artifact"]["version"], - link_json["artifact_type"], - meta_type, - href=href, - exception_cause=exception_cause, - ) - - -class Links: - def __init__(self, accessible=None, deleted=None, inaccessible=None, faulty=None): - if accessible is None: - self._accessible = [] - else: - self._accessible = accessible - if deleted is None: - self._deleted = [] - else: - self._deleted = deleted - if inaccessible is None: - self._inaccessible = [] - else: - self._inaccessible = inaccessible - if faulty is None: - self._faulty = [] - else: - self._faulty = faulty - - @property - def deleted(self): - """List of [Artifact objects] which contains - minimal information (name, version) about the entities - (feature views, training datasets) they represent. - These entities have been removed from the feature store. - """ - return self._deleted - - @property - def inaccessible(self): - """List of [Artifact objects] which contains - minimal information (name, version) about the entities - (feature views, training datasets) they represent. - These entities exist in the feature store, however the user - does not have access to them anymore. - """ - return self._inaccessible - - @property - def accessible(self): - """List of [FeatureView|TrainingDataset objects] objects - which are part of the provenance graph requested. These entities - exist in the feature store and the user has access to them. - """ - return self._accessible - - @property - def faulty(self): - """List of [Artifact objects] which contains - minimal information (name, version) about the entities - (feature views, training datasets) they represent. - These entities exist in the feature store, however they are corrupted. - """ - return self._faulty - - class Direction(Enum): - UPSTREAM = 1 - DOWNSTREAM = 2 - - class Type(Enum): - FEATURE_VIEW = 1 - TRAINING_DATASET = 2 - - def __str__(self, indent=None): - return json.dumps(self, cls=ProvenanceEncoder, indent=indent) - - def __repr__(self): - return ( - f"Links({self._accessible!r}, {self._deleted!r}" - f", {self._inaccessible!r}, {self._faulty!r})" - ) - - @staticmethod - def get_one_accessible_parent(links): - if links is None: - _logger.info("There is no parent information") - return - elif links.inaccessible or links.deleted: - _logger.info( - "The parent is deleted or inaccessible. For more details get the full provenance from `_provenance` method" - ) - return None - elif links.accessible: - if len(links.accessible) > 1: - msg = "Backend inconsistency - provenance returned more than one parent" - raise Exception(msg) - parent = links.accessible[0] - if isinstance(parent, Artifact): - msg = "The returned object is not a valid object. For more details get the full provenance from `_provenance` method" - raise Exception(msg) - return parent - else: - _logger.info("There is no parent information") - return None - - @staticmethod - def __parse_feature_views(links_json: dict, artifacts: Set[str]): - from hsfs import feature_view - from hsfs.core import explicit_provenance as hsfs_explicit_provenance - - links = Links() - for link_json in links_json: - if link_json["node"]["artifact_type"] in artifacts: - if link_json["node"].get("exception_cause") is not None: - links._faulty.append( - hsfs_explicit_provenance.Artifact.from_response_json( - link_json["node"] - ) - ) - elif bool(link_json["node"]["accessible"]): - fv = feature_view.FeatureView.from_response_json( - link_json["node"]["artifact"] - ) - links.accessible.append(fv) - elif bool(link_json["node"]["deleted"]): - links.deleted.append( - hsfs_explicit_provenance.Artifact.from_response_json( - link_json["node"] - ) - ) - else: - links.inaccessible.append( - hsfs_explicit_provenance.Artifact.from_response_json( - link_json["node"] - ) - ) - else: - new_links = Links.__parse_feature_views( - link_json["upstream"], artifacts - ) - links.faulty.extend(new_links.faulty) - links.accessible.extend(new_links.accessible) - links.inaccessible.extend(new_links.inaccessible) - links.deleted.extend(new_links.deleted) - return links - - @staticmethod - def __parse_training_datasets(links_json: dict, artifacts: Set[str]): - from hsfs import training_dataset - from hsfs.core import explicit_provenance as hsfs_explicit_provenance - - links = Links() - for link_json in links_json: - if link_json["node"]["artifact_type"] in artifacts: - if link_json["node"].get("exception_cause") is not None: - links._faulty.append( - hsfs_explicit_provenance.Artifact.from_response_json( - link_json["node"] - ) - ) - elif bool(link_json["node"]["accessible"]): - td = training_dataset.TrainingDataset.from_response_json_single( - link_json["node"]["artifact"] - ) - links.accessible.append(td) - elif bool(link_json["node"]["deleted"]): - links.deleted.append( - hsfs_explicit_provenance.Artifact.from_response_json( - link_json["node"] - ) - ) - else: - links.inaccessible.append( - hsfs_explicit_provenance.Artifact.from_response_json( - link_json["node"] - ) - ) - return links - - @staticmethod - def from_response_json(json_dict: dict, direction: Direction, artifact: Type): - """Parse explicit links from json response. There are three types of - Links: UpstreamFeatureGroups, DownstreamFeatureGroups, DownstreamFeatureViews - - # Arguments - links_json: json response from the explicit provenance endpoint - direction: subset of links to parse - UPSTREAM/DOWNSTREAM - type: subset of links to parse - FEATURE_VIEW/TRAINING_DATASET/MODEL - - # Returns - A ProvenanceLink object for the selected parse type. - """ - - import importlib.util - - if not importlib.util.find_spec("hsfs"): - raise ValueError( - "hsfs is not installed in the environment - cannot parse feature store artifacts" - ) - if not importlib.util.find_spec("hopsworks"): - raise ValueError( - "hopsworks is not installed in the environment - cannot switch from hsml connection to hsfs connection" - ) - - # make sure the hsfs connection is initialized so that the feature view/training dataset can actually be used after being returned - import hopsworks - - if not hopsworks._connected_project: - raise Exception( - "hopsworks connection is not initialized - use hopsworks.login to connect if you want the ability to use provenance with connections between hsfs and hsml" - ) - - hopsworks._connected_project.get_feature_store() - - links = Links.__from_response_json_feature_store_artifacts( - json_dict, direction, artifact - ) - return links - - @staticmethod - def __from_response_json_feature_store_artifacts( - json_dict: dict, direction: Direction, artifact: Type - ): - links_json = humps.decamelize(json_dict) - if direction == Links.Direction.UPSTREAM: - if artifact == Links.Type.FEATURE_VIEW: - return Links.__parse_feature_views( - links_json["upstream"], - { - "FEATURE_VIEW", - }, - ) - elif artifact == Links.Type.TRAINING_DATASET: - return Links.__parse_training_datasets( - links_json["upstream"], {"TRAINING_DATASET"} - ) - else: - return Links() - - -class ProvenanceEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, Links): - return { - "accessible": obj.accessible, - "inaccessible": obj.inaccessible, - "deleted": obj.deleted, - "faulty": obj.faulty, - } - else: - import importlib.util - - if importlib.util.find_spec("hsfs"): - from hsfs import feature_view - from hsfs.core import explicit_provenance as hsfs_explicit_provenance - - if isinstance( - obj, - ( - feature_view.FeatureView, - hsfs_explicit_provenance.Artifact, - ), - ): - return { - "feature_store_name": obj.feature_store_name, - "name": obj.name, - "version": obj.version, - } - return json.JSONEncoder.default(self, obj) diff --git a/hsml/python/hsml/core/model_api.py b/hsml/python/hsml/core/model_api.py deleted file mode 100644 index 190a0aca8..000000000 --- a/hsml/python/hsml/core/model_api.py +++ /dev/null @@ -1,301 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import json -from typing import Union - -from hsml import client, model, tag -from hsml.core import explicit_provenance - - -class ModelApi: - def __init__(self): - pass - - def put(self, model_instance, query_params): - """Save model metadata to the model registry. - - :param model_instance: metadata object of model to be saved - :type model_instance: Model - :return: updated metadata object of the model - :rtype: Model - """ - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "modelregistries", - str(model_instance.model_registry_id), - "models", - model_instance.name + "_" + str(model_instance.version), - ] - headers = {"content-type": "application/json"} - return model_instance.update_from_response_json( - _client._send_request( - "PUT", - path_params, - headers=headers, - query_params=query_params, - data=model_instance.json(), - ) - ) - - def get(self, name, version, model_registry_id, shared_registry_project_name=None): - """Get the metadata of a model with a certain name and version. - - :param name: name of the model - :type name: str - :param version: version of the model - :type version: int - :return: model metadata object - :rtype: Model - """ - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "modelregistries", - model_registry_id, - "models", - name + "_" + str(version), - ] - query_params = {"expand": "trainingdatasets"} - - model_json = _client._send_request("GET", path_params, query_params) - model_meta = model.Model.from_response_json(model_json) - - model_meta.shared_registry_project_name = shared_registry_project_name - - return model_meta - - def get_models( - self, - name, - model_registry_id, - shared_registry_project_name=None, - metric=None, - direction=None, - ): - """Get the metadata of models based on the name or optionally the best model given a metric and direction. - - :param name: name of the model - :type name: str - :param metric: Name of the metric to maximize or minimize - :type metric: str - :param direction: Whether to maximize or minimize the metric, allowed values are 'max' or 'min' - :type direction: str - :return: model metadata object - :rtype: Model - """ - - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "modelregistries", - model_registry_id, - "models", - ] - query_params = { - "expand": "trainingdatasets", - "filter_by": ["name_eq:" + name], - } - - if metric is not None and direction is not None: - if direction.lower() == "max": - direction = "desc" - elif direction.lower() == "min": - direction = "asc" - - query_params["sort_by"] = metric + ":" + direction - query_params["limit"] = "1" - - model_json = _client._send_request("GET", path_params, query_params) - models_meta = model.Model.from_response_json(model_json) - - for model_meta in models_meta: - model_meta.shared_registry_project_name = shared_registry_project_name - - return models_meta - - def delete(self, model_instance): - """Delete the model and metadata. - - :param model_instance: metadata object of model to delete - :type model_instance: Model - """ - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "modelregistries", - str(model_instance.model_registry_id), - "models", - model_instance.id, - ] - _client._send_request("DELETE", path_params) - - def set_tag(self, model_instance, name, value: Union[str, dict]): - """Attach a name/value tag to a model. - - A tag consists of a name/value pair. Tag names are unique identifiers. - The value of a tag can be any valid json - primitives, arrays or json objects. - - :param model_instance: model instance to attach tag - :type model_instance: Model - :param name: name of the tag to be added - :type name: str - :param value: value of the tag to be added - :type value: str or dict - """ - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "modelregistries", - str(model_instance.model_registry_id), - "models", - model_instance.id, - "tags", - name, - ] - headers = {"content-type": "application/json"} - json_value = json.dumps(value) - _client._send_request("PUT", path_params, headers=headers, data=json_value) - - def delete_tag(self, model_instance, name): - """Delete a tag. - - Tag names are unique identifiers. - - :param model_instance: model instance to delete tag from - :type model_instance: Model - :param name: name of the tag to be removed - :type name: str - """ - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "modelregistries", - str(model_instance.model_registry_id), - "models", - model_instance.id, - "tags", - name, - ] - _client._send_request("DELETE", path_params) - - def get_tags(self, model_instance, name: str = None): - """Get the tags. - - Gets all tags if no tag name is specified. - - :param model_instance: model instance to get the tags from - :type model_instance: Model - :param name: tag name - :type name: str - :return: dict of tag name/values - :rtype: dict - """ - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "modelregistries", - str(model_instance.model_registry_id), - "models", - model_instance.id, - "tags", - ] - - if name is not None: - path_params.append(name) - - return { - tag._name: json.loads(tag._value) - for tag in tag.Tag.from_response_json( - _client._send_request("GET", path_params) - ) - } - - def get_feature_view_provenance(self, model_instance): - """Get the parent feature view of this model, based on explicit provenance. - These feature views can be accessible, deleted or inaccessible. - For deleted and inaccessible feature views, only a minimal information is returned. - - # Arguments - model_instance: Metadata object of model. - - # Returns - `ExplicitProvenance.Links`: the feature view used to generate this model - """ - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "modelregistries", - str(model_instance.model_registry_id), - "models", - model_instance.id, - "provenance", - "links", - ] - query_params = { - "expand": "provenance_artifacts", - "upstreamLvls": 2, - "downstreamLvls": 0, - } - links_json = _client._send_request("GET", path_params, query_params) - return explicit_provenance.Links.from_response_json( - links_json, - explicit_provenance.Links.Direction.UPSTREAM, - explicit_provenance.Links.Type.FEATURE_VIEW, - ) - - def get_training_dataset_provenance(self, model_instance): - """Get the parent training dataset of this model, based on explicit provenance. - These training datasets can be accessible, deleted or inaccessible. - For deleted and inaccessible training dataset, only a minimal information is returned. - - # Arguments - model_instance: Metadata object of model. - - # Returns - `ExplicitProvenance.Links`: the training dataset used to generate this model - """ - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "modelregistries", - str(model_instance.model_registry_id), - "models", - model_instance.id, - "provenance", - "links", - ] - query_params = { - "expand": "provenance_artifacts", - "upstreamLvls": 1, - "downstreamLvls": 0, - } - links_json = _client._send_request("GET", path_params, query_params) - return explicit_provenance.Links.from_response_json( - links_json, - explicit_provenance.Links.Direction.UPSTREAM, - explicit_provenance.Links.Type.TRAINING_DATASET, - ) diff --git a/hsml/python/hsml/core/model_registry_api.py b/hsml/python/hsml/core/model_registry_api.py deleted file mode 100644 index 693136e36..000000000 --- a/hsml/python/hsml/core/model_registry_api.py +++ /dev/null @@ -1,67 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from hsml import client -from hsml.client.exceptions import ModelRegistryException -from hsml.core import dataset_api -from hsml.model_registry import ModelRegistry - - -class ModelRegistryApi: - def __init__(self): - self._dataset_api = dataset_api.DatasetApi() - - def get(self, project=None): - """Get model registry for specific project. - :param project: project of the model registry - :type project: str - :return: the model registry metadata - :rtype: ModelRegistry - """ - _client = client.get_instance() - - model_registry_id = _client._project_id - shared_registry_project_name = None - - # In the case of shared model registry, validate that there is Models dataset shared to the connected project from the set project name - if project is not None: - path_params = ["project", _client._project_id, "modelregistries"] - model_registries = _client._send_request("GET", path_params) - for registry in model_registries["items"]: - if registry["name"] == project: - model_registry_id = registry["id"] - shared_registry_project_name = project - - if shared_registry_project_name is None: - raise ModelRegistryException( - "No model registry shared with current project {}, from project {}".format( - _client._project_name, project - ) - ) - # In the case of default model registry, validate that there is a Models dataset in the connected project - elif project is None and not self._dataset_api.path_exists("Models"): - raise ModelRegistryException( - "No Models dataset exists in project {}, Please enable the Serving service or create the dataset manually.".format( - _client._project_name - ) - ) - - return ModelRegistry( - _client._project_name, - _client._project_id, - model_registry_id, - shared_registry_project_name=shared_registry_project_name, - ) diff --git a/hsml/python/hsml/core/model_serving_api.py b/hsml/python/hsml/core/model_serving_api.py deleted file mode 100644 index 437327742..000000000 --- a/hsml/python/hsml/core/model_serving_api.py +++ /dev/null @@ -1,148 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import socket - -from hsml import client -from hsml.client.exceptions import ModelRegistryException -from hsml.constants import INFERENCE_ENDPOINTS -from hsml.core import dataset_api, serving_api -from hsml.inference_endpoint import get_endpoint_by_type -from hsml.model_serving import ModelServing - - -class ModelServingApi: - def __init__(self): - self._dataset_api = dataset_api.DatasetApi() - self._serving_api = serving_api.ServingApi() - - def get(self): - """Get model serving for specific project. - :param project: project of the model registry - :type project: str - :return: the model serving metadata - :rtype: ModelServing - """ - - _client = client.get_instance() - - # Validate that there is a Models dataset in the connected project - if not self._dataset_api.path_exists("Models"): - raise ModelRegistryException( - "No Models dataset exists in project {}, Please enable the Serving service or create the dataset manually.".format( - _client._project_name - ) - ) - - return ModelServing(_client._project_name, _client._project_id) - - def load_default_configuration(self): - """Load default configuration and set istio client for model serving""" - - # kserve installed - is_kserve_installed = self._serving_api.is_kserve_installed() - client.set_kserve_installed(is_kserve_installed) - - # istio client - self._set_istio_client_if_available() - - # resource limits - max_resources = self._serving_api.get_resource_limits() - client.set_serving_resource_limits(max_resources) - - # num instances limits - num_instances_range = self._serving_api.get_num_instances_limits() - client.set_serving_num_instances_limits(num_instances_range) - - # Knative domain - knative_domain = self._serving_api.get_knative_domain() - client.set_knative_domain(knative_domain) - - def _set_istio_client_if_available(self): - """Set istio client if available""" - - if client.is_kserve_installed(): - # check existing istio client - try: - if client.get_istio_instance() is not None: - return # istio client already set - except Exception: - pass - - # setup istio client - inference_endpoints = self._serving_api.get_inference_endpoints() - if client.get_client_type() == "internal": - # if internal, get node port - endpoint = get_endpoint_by_type( - inference_endpoints, INFERENCE_ENDPOINTS.ENDPOINT_TYPE_NODE - ) - if endpoint is not None: - client.set_istio_client( - endpoint.get_any_host(), - endpoint.get_port(INFERENCE_ENDPOINTS.PORT_NAME_HTTP).number, - ) - else: - raise ValueError( - "Istio ingress endpoint of type '" - + INFERENCE_ENDPOINTS.ENDPOINT_TYPE_NODE - + "' not found" - ) - else: # if external - endpoint = get_endpoint_by_type( - inference_endpoints, INFERENCE_ENDPOINTS.ENDPOINT_TYPE_LOAD_BALANCER - ) - if endpoint is not None: - # if load balancer (external ip) available - _client = client.get_instance() - client.set_istio_client( - endpoint.get_any_host(), - endpoint.get_port(INFERENCE_ENDPOINTS.PORT_NAME_HTTP).number, - _client._project_name, - _client._auth._token, # reuse hopsworks client token - ) - return - # in case there's not load balancer, check if node port is open - endpoint = get_endpoint_by_type( - inference_endpoints, INFERENCE_ENDPOINTS.ENDPOINT_TYPE_NODE - ) - if endpoint is not None: - # if node port available - _client = client.get_instance() - host = _client.host - port = endpoint.get_port(INFERENCE_ENDPOINTS.PORT_NAME_HTTP).number - if self._is_host_port_open(host, port): - # and it is open - client.set_istio_client( - host, - port, - _client._project_name, - _client._auth._token, # reuse hopsworks client token - ) - return - # otherwise, fallback to hopsworks client - print( - "External IP not configured for the Istio ingress gateway, the Hopsworks client will be used for model inference instead" - ) - - def _is_host_port_open(self, host, port): - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.settimeout(1) - try: - result = sock.connect_ex((host, port)) - finally: - sock.settimeout(None) - sock.close() - return result == 0 diff --git a/hsml/python/hsml/core/native_hdfs_api.py b/hsml/python/hsml/core/native_hdfs_api.py deleted file mode 100644 index fadd856ea..000000000 --- a/hsml/python/hsml/core/native_hdfs_api.py +++ /dev/null @@ -1,59 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -try: - import pydoop.hdfs as hdfs -except ImportError: - pass - -from hsml import client - - -class NativeHdfsApi: - def __init__(self): - pass - - def exists(self, hdfs_path): - return hdfs.path.exists(hdfs_path) - - def project_path(self): - _client = client.get_instance() - return hdfs.path.abspath("/Projects/" + _client._project_name + "/") - - def chmod(self, hdfs_path, mode): - return hdfs.chmod(hdfs_path, mode) - - def mkdir(self, path): - return hdfs.mkdir(path) - - def rm(self, path, recursive=True): - hdfs.rm(path, recursive=recursive) - - def upload(self, local_path: str, remote_path: str): - # copy from local fs to hdfs - hdfs.put(local_path, remote_path) - - def download(self, remote_path: str, local_path: str): - # copy from hdfs to local fs - hdfs.get(remote_path, local_path) - - def copy(self, source_path: str, destination_path: str): - # both paths are hdfs paths - hdfs.cp(source_path, destination_path) - - def move(self, source_path: str, destination_path: str): - # both paths are hdfs paths - hdfs.rename(source_path, destination_path) diff --git a/hsml/python/hsml/core/serving_api.py b/hsml/python/hsml/core/serving_api.py deleted file mode 100644 index 45f4e7fcc..000000000 --- a/hsml/python/hsml/core/serving_api.py +++ /dev/null @@ -1,417 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import json -from typing import Dict, List, Union - -from hsml import ( - client, - deployable_component_logs, - deployment, - inference_endpoint, - predictor_state, -) -from hsml.client.istio.utils.infer_type import ( - InferInput, - InferOutput, - InferRequest, -) -from hsml.constants import ARTIFACT_VERSION -from hsml.constants import INFERENCE_ENDPOINTS as IE - - -class ServingApi: - def __init__(self): - pass - - def get_by_id(self, id: int): - """Get the metadata of a deployment with a certain id. - - :param id: id of the deployment - :type id: int - :return: deployment metadata object - :rtype: Deployment - """ - - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "serving", - str(id), - ] - deployment_json = _client._send_request("GET", path_params) - deployment_instance = deployment.Deployment.from_response_json(deployment_json) - deployment_instance.model_registry_id = _client._project_id - return deployment_instance - - def get(self, name: str): - """Get the metadata of a deployment with a certain name. - - :param name: name of the deployment - :type name: str - :return: deployment metadata object - :rtype: Deployment - """ - - _client = client.get_instance() - path_params = ["project", _client._project_id, "serving"] - query_params = {"name": name} - deployment_json = _client._send_request( - "GET", path_params, query_params=query_params - ) - deployment_instance = deployment.Deployment.from_response_json(deployment_json) - deployment_instance.model_registry_id = _client._project_id - return deployment_instance - - def get_all(self, model_name: str = None, status: str = None): - """Get the metadata of all deployments. - - :return: model metadata objects - :rtype: List[Deployment] - """ - - _client = client.get_instance() - path_params = ["project", _client._project_id, "serving"] - query_params = { - "model": model_name, - "status": status.capitalize() if status is not None else None, - } - deployments_json = _client._send_request( - "GET", path_params, query_params=query_params - ) - deployment_instances = deployment.Deployment.from_response_json( - deployments_json - ) - for deployment_instance in deployment_instances: - deployment_instance.model_registry_id = _client._project_id - return deployment_instances - - def get_inference_endpoints(self): - """Get inference endpoints. - - :return: inference endpoints for the current project. - :rtype: List[InferenceEndpoint] - """ - - _client = client.get_instance() - path_params = ["project", _client._project_id, "inference", "endpoints"] - endpoints_json = _client._send_request("GET", path_params) - return inference_endpoint.InferenceEndpoint.from_response_json(endpoints_json) - - def put(self, deployment_instance): - """Save deployment metadata to model serving. - - :param deployment_instance: metadata object of deployment to be saved - :type deployment_instance: Deployment - :return: updated metadata object of the deployment - :rtype: Deployment - """ - - _client = client.get_instance() - path_params = ["project", _client._project_id, "serving"] - headers = {"content-type": "application/json"} - - if deployment_instance.artifact_version == ARTIFACT_VERSION.CREATE: - deployment_instance.artifact_version = -1 - - deployment_instance = deployment_instance.update_from_response_json( - _client._send_request( - "PUT", - path_params, - headers=headers, - data=deployment_instance.json(), - ) - ) - deployment_instance.model_registry_id = _client._project_id - return deployment_instance - - def post(self, deployment_instance, action: str): - """Perform an action on the deployment - - :param action: action to perform on the deployment (i.e., START or STOP) - :type action: str - """ - - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "serving", - deployment_instance.id, - ] - query_params = {"action": action} - return _client._send_request("POST", path_params, query_params=query_params) - - def delete(self, deployment_instance): - """Delete the deployment and metadata. - - :param deployment_instance: metadata object of the deployment to delete - :type deployment_instance: Deployment - """ - - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "serving", - deployment_instance.id, - ] - return _client._send_request("DELETE", path_params) - - def get_state(self, deployment_instance): - """Get the state of a given deployment - - :param deployment_instance: metadata object of the deployment to get state of - :type deployment_instance: Deployment - :return: predictor state - :rtype: PredictorState - """ - - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "serving", - str(deployment_instance.id), - ] - deployment_json = _client._send_request("GET", path_params) - return predictor_state.PredictorState.from_response_json(deployment_json) - - def reset_changes(self, deployment_instance): - """Reset a given deployment to the original values in the Hopsworks instance - - :param deployment_instance: metadata object of the deployment to reset - :type deployment_instance: Deployment - :return: deployment with reset values - :rtype: Deployment - """ - - _client = client.get_instance() - path_params = ["project", _client._project_id, "serving"] - query_params = {"name": deployment_instance.name} - deployment_json = _client._send_request( - "GET", path_params, query_params=query_params - ) - deployment_aux = deployment_instance.update_from_response_json(deployment_json) - # TODO: remove when model_registry_id is added properly to deployments in backend - deployment_aux.model_registry_id = _client._project_id - return deployment_aux - - def send_inference_request( - self, - deployment_instance, - data: Union[Dict, List[InferInput]], - through_hopsworks: bool = False, - ) -> Union[Dict, List[InferOutput]]: - """Send inference requests to a deployment with a certain id - - :param deployment_instance: metadata object of the deployment to be used for the prediction - :type deployment_instance: Deployment - :param data: payload of the inference request - :type data: Union[Dict, List[InferInput]] - :param through_hopsworks: whether to send the inference request through the Hopsworks REST API or not - :type through_hopsworks: bool - :return: inference response - :rtype: Union[Dict, List[InferOutput]] - """ - if deployment_instance.api_protocol == IE.API_PROTOCOL_REST: - # REST protocol, use hopsworks or istio client - return self._send_inference_request_via_rest_protocol( - deployment_instance, data, through_hopsworks - ) - else: - # gRPC protocol, use the deployment grpc channel - return self._send_inference_request_via_grpc_protocol( - deployment_instance, data - ) - - def _send_inference_request_via_rest_protocol( - self, - deployment_instance, - data: Dict, - through_hopsworks: bool = False, - ) -> Dict: - headers = {"content-type": "application/json"} - if through_hopsworks: - # use Hopsworks client - _client = client.get_instance() - path_params = self._get_hopsworks_inference_path( - _client._project_id, deployment_instance - ) - else: - _client = client.get_istio_instance() - if _client is not None: - # use istio client - path_params = self._get_istio_inference_path(deployment_instance) - # - add host header - headers["host"] = self._get_inference_request_host_header( - _client._project_name, - deployment_instance.name, - client.get_knative_domain(), - ) - else: - # fallback to Hopsworks client - _client = client.get_instance() - path_params = self._get_hopsworks_inference_path( - _client._project_id, deployment_instance - ) - - # send inference request - return _client._send_request( - "POST", path_params, headers=headers, data=json.dumps(data) - ) - - def _send_inference_request_via_grpc_protocol( - self, deployment_instance, data: List[InferInput] - ) -> List[InferOutput]: - # get grpc channel - if deployment_instance._grpc_channel is None: - # The gRPC channel is lazily initialized. The first call to deployment.predict() will initialize - # the channel, which will be reused in all following calls on the same deployment object. - # The gRPC channel is freed when calling deployment.stop() - print("Initializing gRPC channel...") - deployment_instance._grpc_channel = self._create_grpc_channel( - deployment_instance.name - ) - # build an infer request - request = InferRequest( - infer_inputs=data, - model_name=deployment_instance.name, - ) - - # send infer request - infer_response = deployment_instance._grpc_channel.infer( - infer_request=request, headers=None - ) - - # extract infer outputs - return infer_response.outputs - - def _create_grpc_channel(self, deployment_name: str): - _client = client.get_istio_instance() - service_hostname = self._get_inference_request_host_header( - _client._project_name, - deployment_name, - client.get_knative_domain(), - ) - return _client._create_grpc_channel(service_hostname) - - def is_kserve_installed(self): - """Check if kserve is installed - - :return: whether kserve is installed - :rtype: bool - """ - - _client = client.get_instance() - path_params = ["variables", "kube_kserve_installed"] - kserve_installed = _client._send_request("GET", path_params) - return ( - "successMessage" in kserve_installed - and kserve_installed["successMessage"] == "true" - ) - - def get_resource_limits(self): - """Get resource limits for model serving""" - - _client = client.get_instance() - - path_params = ["variables", "kube_serving_max_cores_allocation"] - max_cores = _client._send_request("GET", path_params) - - path_params = ["variables", "kube_serving_max_memory_allocation"] - max_memory = _client._send_request("GET", path_params) - - path_params = ["variables", "kube_serving_max_gpus_allocation"] - max_gpus = _client._send_request("GET", path_params) - - return { - "cores": float(max_cores["successMessage"]), - "memory": int(max_memory["successMessage"]), - "gpus": int(max_gpus["successMessage"]), - } - - def get_num_instances_limits(self): - """Get number of instances limits for model serving""" - - _client = client.get_instance() - - path_params = ["variables", "kube_serving_min_num_instances"] - min_instances = _client._send_request("GET", path_params) - - path_params = ["variables", "kube_serving_max_num_instances"] - max_instances = _client._send_request("GET", path_params) - - return [ - int(min_instances["successMessage"]), - int(max_instances["successMessage"]), - ] - - def get_knative_domain(self): - """Get the domain used by knative""" - - _client = client.get_instance() - - path_params = ["variables", "kube_knative_domain_name"] - domain = _client._send_request("GET", path_params) - - return domain["successMessage"] - - def get_logs(self, deployment_instance, component, tail): - """Get the logs of a deployment - - :param deployment_instance: metadata object of the deployment to get logs from - :type deployment_instance: Deployment - :param component: deployment component (e.g., predictor or transformer) - :type component: str - :param tail: number of tailing lines to retrieve - :type tail: int - :return: deployment logs - :rtype: DeployableComponentLogs - """ - - _client = client.get_instance() - path_params = [ - "project", - _client._project_id, - "serving", - deployment_instance.id, - "logs", - ] - query_params = {"component": component, "tail": tail} - return deployable_component_logs.DeployableComponentLogs.from_response_json( - _client._send_request("GET", path_params, query_params=query_params) - ) - - def _get_inference_request_host_header( - self, project_name: str, deployment_name: str, domain: str - ): - return "{}.{}.{}".format( - deployment_name, project_name.replace("_", "-"), domain - ).lower() - - def _get_hopsworks_inference_path(self, project_id: int, deployment_instance): - return [ - "project", - project_id, - "inference", - "models", - deployment_instance.name + ":predict", - ] - - def _get_istio_inference_path(self, deployment_instance): - return ["v1", "models", deployment_instance.name + ":predict"] diff --git a/hsml/python/hsml/decorators.py b/hsml/python/hsml/decorators.py deleted file mode 100644 index 826fd5aa2..000000000 --- a/hsml/python/hsml/decorators.py +++ /dev/null @@ -1,55 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import functools - - -def not_connected(fn): - @functools.wraps(fn) - def if_not_connected(inst, *args, **kwargs): - if inst._connected: - raise HopsworksConnectionError - return fn(inst, *args, **kwargs) - - return if_not_connected - - -def connected(fn): - @functools.wraps(fn) - def if_connected(inst, *args, **kwargs): - if not inst._connected: - raise NoHopsworksConnectionError - return fn(inst, *args, **kwargs) - - return if_connected - - -class HopsworksConnectionError(Exception): - """Thrown when attempted to change connection attributes while connected.""" - - def __init__(self): - super().__init__( - "Connection is currently in use. Needs to be closed for modification." - ) - - -class NoHopsworksConnectionError(Exception): - """Thrown when attempted to perform operation on connection while not connected.""" - - def __init__(self): - super().__init__( - "Connection is not active. Needs to be connected for model registry operations." - ) diff --git a/hsml/python/hsml/deployable_component.py b/hsml/python/hsml/deployable_component.py deleted file mode 100644 index adabff5b8..000000000 --- a/hsml/python/hsml/deployable_component.py +++ /dev/null @@ -1,92 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -from abc import ABC, abstractmethod -from typing import Optional, Union - -import humps -from hsml import util -from hsml.inference_batcher import InferenceBatcher -from hsml.resources import Resources - - -class DeployableComponent(ABC): - """Configuration of a deployable component (predictor or transformer).""" - - def __init__( - self, - script_file: Optional[str] = None, - resources: Optional[Resources] = None, - inference_batcher: Optional[Union[InferenceBatcher, dict]] = None, - **kwargs, - ): - self._script_file = script_file - self._resources = resources - self._inference_batcher = ( - util.get_obj_from_json(inference_batcher, InferenceBatcher) - or InferenceBatcher() - ) - - @classmethod - @abstractmethod - def from_json(cls, json_decamelized): - "To be implemented by the component type" - pass - - @classmethod - def from_response_json(cls, json_dict): - json_decamelized = humps.decamelize(json_dict) - return cls.from_json(json_decamelized) - - def json(self): - return json.dumps(self, cls=util.MLEncoder) - - @abstractmethod - def update_from_response_json(self, json_dict): - "To be implemented by the component type" - pass - - @abstractmethod - def to_dict(self): - "To be implemented by the component type" - pass - - @property - def script_file(self): - """Script file ran by the deployment component (i.e., predictor or transformer).""" - return self._script_file - - @script_file.setter - def script_file(self, script_file: str): - self._script_file = script_file - - @property - def resources(self): - """Resource configuration for the deployment component (i.e., predictor or transformer).""" - return self._resources - - @resources.setter - def resources(self, resources: Resources): - self._resources = resources - - @property - def inference_batcher(self): - """Configuration of the inference batcher attached to the deployment component (i.e., predictor or transformer).""" - return self._inference_batcher - - @inference_batcher.setter - def inference_batcher(self, inference_batcher: InferenceBatcher): - self._inference_batcher = inference_batcher diff --git a/hsml/python/hsml/deployable_component_logs.py b/hsml/python/hsml/deployable_component_logs.py deleted file mode 100644 index 7035030a4..000000000 --- a/hsml/python/hsml/deployable_component_logs.py +++ /dev/null @@ -1,91 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from datetime import datetime - -import humps -from hsml import util - - -class DeployableComponentLogs: - """Server logs of a deployable component (predictor or transformer). - - # Arguments - name: Deployment instance name. - content: actual logs - # Returns - `DeployableComponentLogs`. Server logs of a deployable component - """ - - def __init__(self, instance_name: str, content: str, **kwargs): - self._instance_name = instance_name - self._content = content - self._created_at = datetime.now() - - @classmethod - def from_response_json(cls, json_dict): - json_decamelized = humps.decamelize(json_dict) - if len(json_decamelized) == 0: - return [] - return [cls.from_json(logs) for logs in json_decamelized] - - @classmethod - def from_json(cls, json_decamelized): - return DeployableComponentLogs(*cls.extract_fields_from_json(json_decamelized)) - - @classmethod - def extract_fields_from_json(cls, json_decamelized): - instance_name = util.extract_field_from_json(json_decamelized, "instance_name") - content = util.extract_field_from_json(json_decamelized, "content") - return instance_name, content - - def to_dict(self): - return {"instance_name": self._instance_name, "content": self._content} - - @property - def instance_name(self): - """Name of the deployment instance containing these server logs.""" - return self._instance_name - - @property - def content(self): - """Content of the server logs of the current deployment instance.""" - return self._content - - @property - def created_at(self): - """Datetime when the current server logs chunk was retrieved.""" - return self._created_at - - @property - def component(self): - """Component of the deployment containing these server logs.""" - return self._component - - @component.setter - def component(self, component: str): - self._component = component - - @property - def tail(self): - """Number of lines of server logs.""" - return self._tail - - @tail.setter - def tail(self, tail: int): - self._tail = tail - - def __repr__(self): - return f"DeployableComponentLogs(instance_name: {self._instance_name!r}, date: {self._created_at!r}) \n{self._content!s}" diff --git a/hsml/python/hsml/deployment.py b/hsml/python/hsml/deployment.py deleted file mode 100644 index 0336d1209..000000000 --- a/hsml/python/hsml/deployment.py +++ /dev/null @@ -1,479 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Dict, List, Optional, Union - -from hsml import client, util -from hsml import predictor as predictor_mod -from hsml.client.exceptions import ModelServingException -from hsml.client.istio.utils.infer_type import InferInput -from hsml.constants import DEPLOYABLE_COMPONENT, PREDICTOR_STATE -from hsml.core import model_api, serving_api -from hsml.engine import serving_engine -from hsml.inference_batcher import InferenceBatcher -from hsml.inference_logger import InferenceLogger -from hsml.predictor_state import PredictorState -from hsml.resources import Resources -from hsml.transformer import Transformer - - -class Deployment: - """Metadata object representing a deployment in Model Serving.""" - - def __init__( - self, - predictor, - name: Optional[str] = None, - description: Optional[str] = None, - **kwargs, - ): - self._predictor = predictor - self._description = description - - if self._predictor is None: - raise ModelServingException("A predictor is required") - elif not isinstance(self._predictor, predictor_mod.Predictor): - raise ValueError( - "The predictor provided is not an instance of the Predictor class" - ) - - if name is not None: - self._predictor.name = name - - if self._description is None: - self._description = self._predictor.description - else: - self._description = self._predictor.description = description - - self._serving_api = serving_api.ServingApi() - self._serving_engine = serving_engine.ServingEngine() - self._model_api = model_api.ModelApi() - self._grpc_channel = None - self._model_registry_id = None - - def save(self, await_update: Optional[int] = 60): - """Persist this deployment including the predictor and metadata to Model Serving. - - # Arguments - await_update: If the deployment is running, awaiting time (seconds) for the running instances to be updated. - If the running instances are not updated within this timespan, the call to this method returns while - the update in the background. - """ - - self._serving_engine.save(self, await_update) - - def start(self, await_running: Optional[int] = 60): - """Start the deployment - - # Arguments - await_running: Awaiting time (seconds) for the deployment to start. - If the deployment has not started within this timespan, the call to this method returns while - it deploys in the background. - """ - - self._serving_engine.start(self, await_status=await_running) - - def stop(self, await_stopped: Optional[int] = 60): - """Stop the deployment - - # Arguments - await_stopped: Awaiting time (seconds) for the deployment to stop. - If the deployment has not stopped within this timespan, the call to this method returns while - it stopping in the background. - """ - - self._serving_engine.stop(self, await_status=await_stopped) - - def delete(self, force=False): - """Delete the deployment - - # Arguments - force: Force the deletion of the deployment. - If the deployment is running, it will be stopped and deleted automatically. - !!! warn A call to this method does not ask for a second confirmation. - """ - - self._serving_engine.delete(self, force) - - def get_state(self) -> PredictorState: - """Get the current state of the deployment - - # Returns - `PredictorState`. The state of the deployment. - """ - - return self._serving_engine.get_state(self) - - def is_created(self) -> bool: - """Check whether the deployment is created. - - # Returns - `bool`. Whether the deployment is created or not. - """ - - return ( - self._serving_engine.get_state(self).status - != PREDICTOR_STATE.STATUS_CREATING - ) - - def is_running(self, or_idle=True, or_updating=True) -> bool: - """Check whether the deployment is ready to handle inference requests - - # Arguments - or_idle: Whether the idle state is considered as running (default is True) - or_updating: Whether the updating state is considered as running (default is True) - - # Returns - `bool`. Whether the deployment is ready or not. - """ - - status = self._serving_engine.get_state(self).status - return ( - status == PREDICTOR_STATE.STATUS_RUNNING - or (or_idle and status == PREDICTOR_STATE.STATUS_IDLE) - or (or_updating and status == PREDICTOR_STATE.STATUS_UPDATING) - ) - - def is_stopped(self, or_created=True) -> bool: - """Check whether the deployment is stopped - - # Arguments - or_created: Whether the creating and created state is considered as stopped (default is True) - - # Returns - `bool`. Whether the deployment is stopped or not. - """ - - status = self._serving_engine.get_state(self).status - return status == PREDICTOR_STATE.STATUS_STOPPED or ( - or_created - and ( - status == PREDICTOR_STATE.STATUS_CREATING - or status == PREDICTOR_STATE.STATUS_CREATED - ) - ) - - def predict( - self, - data: Union[Dict, InferInput] = None, - inputs: Union[List, Dict] = None, - ): - """Send inference requests to the deployment. - One of data or inputs parameters must be set. If both are set, inputs will be ignored. - - !!! example - ```python - # login into Hopsworks using hopsworks.login() - - # get Hopsworks Model Serving handle - ms = project.get_model_serving() - - # retrieve deployment by name - my_deployment = ms.get_deployment("my_deployment") - - # (optional) retrieve model input example - my_model = project.get_model_registry() \ - .get_model(my_deployment.model_name, my_deployment.model_version) - - # make predictions using model inputs (single or batch) - predictions = my_deployment.predict(inputs=my_model.input_example) - - # or using more sophisticated inference request payloads - data = { "instances": [ my_model.input_example ], "key2": "value2" } - predictions = my_deployment.predict(data) - ``` - - # Arguments - data: Payload dictionary for the inference request including the model input(s) - inputs: Model inputs used in the inference requests - - # Returns - `dict`. Inference response. - """ - - return self._serving_engine.predict(self, data, inputs) - - def get_model(self): - """Retrieve the metadata object for the model being used by this deployment""" - return self._model_api.get( - self.model_name, self.model_version, self.model_registry_id - ) - - def download_artifact(self): - """Download the model artifact served by the deployment""" - - return self._serving_engine.download_artifact(self) - - def get_logs(self, component="predictor", tail=10): - """Prints the deployment logs of the predictor or transformer. - - # Arguments - component: Deployment component to get the logs from (e.g., predictor or transformer) - tail: Number of most recent lines to retrieve from the logs. - """ - - # validate component - components = list(util.get_members(DEPLOYABLE_COMPONENT)) - if component not in components: - raise ValueError( - "Component '{}' is not valid. Possible values are '{}'".format( - component, ", ".join(components) - ) - ) - - logs = self._serving_engine.get_logs(self, component, tail) - if logs is not None: - for log in logs: - print(log, end="\n\n") - - def get_url(self): - """Get url to the deployment in Hopsworks""" - - path = ( - "/p/" - + str(client.get_instance()._project_id) - + "/deployments/" - + str(self.id) - ) - return util.get_hostname_replaced_url(path) - - def describe(self): - """Print a description of the deployment""" - - util.pretty_print(self) - - @classmethod - def from_response_json(cls, json_dict): - predictors = predictor_mod.Predictor.from_response_json(json_dict) - if isinstance(predictors, list): - return [ - cls.from_predictor(predictor_instance) - for predictor_instance in predictors - ] - else: - return cls.from_predictor(predictors) - - @classmethod - def from_predictor(cls, predictor_instance): - return Deployment( - predictor=predictor_instance, - name=predictor_instance._name, - description=predictor_instance._description, - ) - - def update_from_response_json(self, json_dict): - self._predictor.update_from_response_json(json_dict) - self.__init__( - predictor=self._predictor, - name=self._predictor._name, - description=self._predictor._description, - ) - return self - - def json(self): - return self._predictor.json() - - def to_dict(self): - return self._predictor.to_dict() - - # Deployment - - @property - def id(self): - """Id of the deployment.""" - return self._predictor.id - - @property - def name(self): - """Name of the deployment.""" - return self._predictor.name - - @name.setter - def name(self, name: str): - self._predictor.name = name - - @property - def description(self): - """Description of the deployment.""" - return self._description - - @description.setter - def description(self, description: str): - self._description = description - - @property - def predictor(self): - """Predictor used in the deployment.""" - return self._predictor - - @predictor.setter - def predictor(self, predictor): - self._predictor = predictor - - @property - def requested_instances(self): - """Total number of requested instances in the deployment.""" - return self._predictor.requested_instances - - # Single predictor - - @property - def model_name(self): - """Name of the model deployed by the predictor""" - return self._predictor.model_name - - @model_name.setter - def model_name(self, model_name: str): - self._predictor.model_name = model_name - - @property - def model_path(self): - """Model path deployed by the predictor.""" - return self._predictor.model_path - - @model_path.setter - def model_path(self, model_path: str): - self._predictor.model_path = model_path - - @property - def model_version(self): - """Model version deployed by the predictor.""" - return self._predictor.model_version - - @model_version.setter - def model_version(self, model_version: int): - self._predictor.model_version = model_version - - @property - def artifact_version(self): - """Artifact version deployed by the predictor.""" - return self._predictor.artifact_version - - @artifact_version.setter - def artifact_version(self, artifact_version: Union[int, str]): - self._predictor.artifact_version = artifact_version - - @property - def artifact_path(self): - """Path of the model artifact deployed by the predictor.""" - return self._predictor.artifact_path - - @property - def model_server(self): - """Model server ran by the predictor.""" - return self._predictor.model_server - - @model_server.setter - def model_server(self, model_server: str): - self._predictor.model_server = model_server - - @property - def serving_tool(self): - """Serving tool used to run the model server.""" - return self._predictor.serving_tool - - @serving_tool.setter - def serving_tool(self, serving_tool: str): - self._predictor.serving_tool = serving_tool - - @property - def script_file(self): - """Script file used by the predictor.""" - return self._predictor.script_file - - @script_file.setter - def script_file(self, script_file: str): - self._predictor.script_file = script_file - - @property - def resources(self): - """Resource configuration for the predictor.""" - return self._predictor.resources - - @resources.setter - def resources(self, resources: Resources): - self._predictor.resources = resources - - @property - def inference_logger(self): - """Configuration of the inference logger attached to this predictor.""" - return self._predictor.inference_logger - - @inference_logger.setter - def inference_logger(self, inference_logger: InferenceLogger): - self._predictor.inference_logger = inference_logger - - @property - def inference_batcher(self): - """Configuration of the inference batcher attached to this predictor.""" - return self._predictor.inference_batcher - - @inference_batcher.setter - def inference_batcher(self, inference_batcher: InferenceBatcher): - self._predictor.inference_batcher = inference_batcher - - @property - def transformer(self): - """Transformer configured in the predictor.""" - return self._predictor.transformer - - @transformer.setter - def transformer(self, transformer: Transformer): - self._predictor.transformer = transformer - - @property - def model_registry_id(self): - """Model Registry Id of the deployment.""" - return self._model_registry_id - - @model_registry_id.setter - def model_registry_id(self, model_registry_id: int): - self._model_registry_id = model_registry_id - - @property - def created_at(self): - """Created at date of the predictor.""" - return self._predictor.created_at - - @property - def creator(self): - """Creator of the predictor.""" - return self._predictor.creator - - @property - def api_protocol(self): - """API protocol enabled in the deployment (e.g., HTTP or GRPC).""" - return self._predictor.api_protocol - - @api_protocol.setter - def api_protocol(self, api_protocol: str): - self._predictor.api_protocol = api_protocol - - @property - def environment(self): - """Name of inference environment""" - return self._predictor.environment - - @environment.setter - def environment(self, environment: str): - self._predictor.environment = environment - - def __repr__(self): - desc = ( - f", description: {self._description!r}" - if self._description is not None - else "" - ) - return f"Deployment(name: {self._predictor._name!r}" + desc + ")" diff --git a/hsml/python/hsml/engine/__init__.py b/hsml/python/hsml/engine/__init__.py deleted file mode 100644 index ff0a6f046..000000000 --- a/hsml/python/hsml/engine/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/hsml/python/hsml/engine/hopsworks_engine.py b/hsml/python/hsml/engine/hopsworks_engine.py deleted file mode 100644 index 79537fa48..000000000 --- a/hsml/python/hsml/engine/hopsworks_engine.py +++ /dev/null @@ -1,65 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os - -from hsml import client -from hsml.core import model_api, native_hdfs_api - - -class HopsworksEngine: - def __init__(self): - self._native_hdfs_api = native_hdfs_api.NativeHdfsApi() - self._model_api = model_api.ModelApi() - - def mkdir(self, remote_path: str): - remote_path = self._prepend_project_path(remote_path) - self._native_hdfs_api.mkdir(remote_path) - self._native_hdfs_api.chmod(remote_path, "ug+rwx") - - def delete(self, model_instance): - self._model_api.delete(model_instance) - - def upload(self, local_path: str, remote_path: str, upload_configuration=None): - local_path = self._get_abs_path(local_path) - remote_path = self._prepend_project_path(remote_path) - self._native_hdfs_api.upload(local_path, remote_path) - self._native_hdfs_api.chmod(remote_path, "ug+rwx") - - def download(self, remote_path: str, local_path: str): - local_path = self._get_abs_path(local_path) - remote_path = self._prepend_project_path(remote_path) - self._native_hdfs_api.download(remote_path, local_path) - - def copy(self, source_path: str, destination_path: str): - # both paths are hdfs paths - source_path = self._prepend_project_path(source_path) - destination_path = self._prepend_project_path(destination_path) - self._native_hdfs_api.copy(source_path, destination_path) - - def move(self, source_path: str, destination_path: str): - source_path = self._prepend_project_path(source_path) - destination_path = self._prepend_project_path(destination_path) - self._native_hdfs_api.move(source_path, destination_path) - - def _get_abs_path(self, local_path: str): - return local_path if os.path.isabs(local_path) else os.path.abspath(local_path) - - def _prepend_project_path(self, remote_path: str): - if not remote_path.startswith("/Projects/"): - _client = client.get_instance() - remote_path = "/Projects/{}/{}".format(_client._project_name, remote_path) - return remote_path diff --git a/hsml/python/hsml/engine/local_engine.py b/hsml/python/hsml/engine/local_engine.py deleted file mode 100644 index 7b669a249..000000000 --- a/hsml/python/hsml/engine/local_engine.py +++ /dev/null @@ -1,79 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os - -from hsml import client -from hsml.core import dataset_api, model_api - - -class LocalEngine: - def __init__(self): - self._dataset_api = dataset_api.DatasetApi() - self._model_api = model_api.ModelApi() - - def mkdir(self, remote_path: str): - remote_path = self._prepend_project_path(remote_path) - self._dataset_api.mkdir(remote_path) - - def delete(self, model_instance): - self._model_api.delete(model_instance) - - def upload(self, local_path: str, remote_path: str, upload_configuration=None): - local_path = self._get_abs_path(local_path) - remote_path = self._prepend_project_path(remote_path) - - # Initialize the upload configuration to empty dictionary if is None - upload_configuration = upload_configuration if upload_configuration else {} - self._dataset_api.upload( - local_path, - remote_path, - chunk_size=upload_configuration.get( - "chunk_size", self._dataset_api.DEFAULT_UPLOAD_FLOW_CHUNK_SIZE - ), - simultaneous_uploads=upload_configuration.get( - "simultaneous_uploads", - self._dataset_api.DEFAULT_UPLOAD_SIMULTANEOUS_UPLOADS, - ), - max_chunk_retries=upload_configuration.get( - "max_chunk_retries", - self._dataset_api.DEFAULT_UPLOAD_MAX_CHUNK_RETRIES, - ), - ) - - def download(self, remote_path: str, local_path: str): - local_path = self._get_abs_path(local_path) - remote_path = self._prepend_project_path(remote_path) - self._dataset_api.download(remote_path, local_path) - - def copy(self, source_path, destination_path): - source_path = self._prepend_project_path(source_path) - destination_path = self._prepend_project_path(destination_path) - self._dataset_api.copy(source_path, destination_path) - - def move(self, source_path, destination_path): - source_path = self._prepend_project_path(source_path) - destination_path = self._prepend_project_path(destination_path) - self._dataset_api.move(source_path, destination_path) - - def _get_abs_path(self, local_path: str): - return local_path if os.path.isabs(local_path) else os.path.abspath(local_path) - - def _prepend_project_path(self, remote_path: str): - if not remote_path.startswith("/Projects/"): - _client = client.get_instance() - remote_path = "/Projects/{}/{}".format(_client._project_name, remote_path) - return remote_path diff --git a/hsml/python/hsml/engine/model_engine.py b/hsml/python/hsml/engine/model_engine.py deleted file mode 100644 index b4b4090a8..000000000 --- a/hsml/python/hsml/engine/model_engine.py +++ /dev/null @@ -1,549 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import json -import os -import tempfile -import time -import uuid - -from hsml import client, constants, util -from hsml.client.exceptions import ModelRegistryException, RestAPIError -from hsml.core import dataset_api, model_api -from hsml.engine import local_engine -from tqdm.auto import tqdm - - -class ModelEngine: - def __init__(self): - self._model_api = model_api.ModelApi() - self._dataset_api = dataset_api.DatasetApi() - - self._engine = local_engine.LocalEngine() - - def _poll_model_available(self, model_instance, await_registration): - if await_registration > 0: - model_registry_id = model_instance.model_registry_id - sleep_seconds = 5 - for _ in range(int(await_registration / sleep_seconds)): - try: - time.sleep(sleep_seconds) - model_meta = self._model_api.get( - model_instance.name, - model_instance.version, - model_registry_id, - model_instance.shared_registry_project_name, - ) - if model_meta is not None: - return model_meta - except RestAPIError as e: - if e.response.status_code != 404: - raise e - print( - "Model not available during polling, set a higher value for await_registration to wait longer." - ) - - def _upload_additional_resources(self, model_instance): - if model_instance._input_example is not None: - input_example_path = os.path.join(os.getcwd(), "input_example.json") - input_example = util.input_example_to_json(model_instance._input_example) - - with open(input_example_path, "w+") as out: - json.dump(input_example, out, cls=util.NumpyEncoder) - - self._engine.upload(input_example_path, model_instance.version_path) - os.remove(input_example_path) - model_instance.input_example = None - if model_instance._model_schema is not None: - model_schema_path = os.path.join(os.getcwd(), "model_schema.json") - model_schema = model_instance._model_schema - - with open(model_schema_path, "w+") as out: - out.write(model_schema.json()) - - self._engine.upload(model_schema_path, model_instance.version_path) - os.remove(model_schema_path) - model_instance.model_schema = None - return model_instance - - def _copy_or_move_hopsfs_model_item( - self, item_attr, to_model_version_path, keep_original_files - ): - """Copy or move model item from a hdfs path to the model version folder in the Models dataset. It works with files and folders.""" - path = item_attr["path"] - to_hdfs_path = os.path.join(to_model_version_path, os.path.basename(path)) - if keep_original_files: - self._engine.copy(path, to_hdfs_path) - else: - self._engine.move(path, to_hdfs_path) - - def _copy_or_move_hopsfs_model( - self, - from_hdfs_model_path, - to_model_version_path, - keep_original_files, - update_upload_progress, - ): - """Copy or move model files from a hdfs path to the model version folder in the Models dataset.""" - # Strip hdfs prefix - if from_hdfs_model_path.startswith("hdfs:/"): - projects_index = from_hdfs_model_path.find("/Projects", 0) - from_hdfs_model_path = from_hdfs_model_path[projects_index:] - - n_dirs, n_files = 0, 0 - - model_path_resp = self._dataset_api.get(from_hdfs_model_path) - model_path_attr = model_path_resp["attributes"] - if ( - "datasetType" in model_path_resp - and model_path_resp["datasetType"] == "DATASET" - ): # This is needed to avoid a user exporting for example "Resources" from wiping the dataset - raise AssertionError( - "It is disallowed to export a root dataset path." - " Move the model to a sub-folder and try again." - ) - elif model_path_attr.get("dir", False): - # if path is a directory, iterate of the directory content - for entry in self._dataset_api.list( - from_hdfs_model_path, sort_by="NAME:desc" - )["items"]: - path_attr = entry["attributes"] - self._copy_or_move_hopsfs_model_item( - path_attr, to_model_version_path, keep_original_files - ) - if path_attr.get("dir", False): - n_dirs += 1 - else: - n_files += 1 - update_upload_progress(n_dirs=n_dirs, n_files=n_files) - else: - # if path is a file, copy/move it - self._copy_or_move_hopsfs_model_item( - model_path_attr, to_model_version_path, keep_original_files - ) - n_files += 1 - update_upload_progress(n_dirs=n_dirs, n_files=n_files) - - def _download_model_from_hopsfs_recursive( - self, - from_hdfs_model_path: str, - to_local_path: str, - update_download_progress, - n_dirs, - n_files, - ): - """Download model files from a model path in hdfs, recursively""" - - for entry in self._dataset_api.list(from_hdfs_model_path, sort_by="NAME:desc")[ - "items" - ]: - path_attr = entry["attributes"] - path = path_attr["path"] - basename = os.path.basename(path) - - if path_attr.get("dir", False): - # otherwise, make a recursive call for the folder - if basename == "Artifacts": - continue # skip Artifacts subfolder - local_folder_path = os.path.join(to_local_path, basename) - os.mkdir(local_folder_path) - n_dirs, n_files = self._download_model_from_hopsfs_recursive( - from_hdfs_model_path=path, - to_local_path=local_folder_path, - update_download_progress=update_download_progress, - n_dirs=n_dirs, - n_files=n_files, - ) - n_dirs += 1 - update_download_progress(n_dirs=n_dirs, n_files=n_files) - else: - # if it's a file, download it - local_file_path = os.path.join(to_local_path, basename) - self._engine.download(path, local_file_path) - n_files += 1 - update_download_progress(n_dirs=n_dirs, n_files=n_files) - - return n_dirs, n_files - - def _download_model_from_hopsfs( - self, from_hdfs_model_path: str, to_local_path: str, update_download_progress - ): - """Download model files from a model path in hdfs.""" - - n_dirs, n_files = self._download_model_from_hopsfs_recursive( - from_hdfs_model_path=from_hdfs_model_path, - to_local_path=to_local_path, - update_download_progress=update_download_progress, - n_dirs=0, - n_files=0, - ) - update_download_progress(n_dirs=n_dirs, n_files=n_files, done=True) - - def _upload_local_model( - self, - from_local_model_path, - to_model_version_path, - update_upload_progress, - upload_configuration=None, - ): - """Copy or upload model files from a local path to the model version folder in the Models dataset.""" - n_dirs, n_files = 0, 0 - if os.path.isdir(from_local_model_path): - # if path is a dir, upload files and folders iteratively - for root, dirs, files in os.walk(from_local_model_path): - # os.walk(local_model_path), where local_model_path is expected to be an absolute path - # - root is the absolute path of the directory being walked - # - dirs is the list of directory names present in the root dir - # - files is the list of file names present in the root dir - # we need to replace the local path prefix with the hdfs path prefix (i.e., /srv/hops/....../root with /Projects/.../) - remote_base_path = root.replace( - from_local_model_path, to_model_version_path - ) - for d_name in dirs: - self._engine.mkdir(remote_base_path + "/" + d_name) - n_dirs += 1 - update_upload_progress(n_dirs, n_files) - for f_name in files: - self._engine.upload( - root + "/" + f_name, - remote_base_path, - upload_configuration=upload_configuration, - ) - n_files += 1 - update_upload_progress(n_dirs, n_files) - else: - # if path is a file, upload file - self._engine.upload( - from_local_model_path, - to_model_version_path, - upload_configuration=upload_configuration, - ) - n_files += 1 - update_upload_progress(n_dirs, n_files) - - def _save_model_from_local_or_hopsfs_mount( - self, - model_instance, - model_path, - keep_original_files, - update_upload_progress, - upload_configuration=None, - ): - """Save model files from a local path. The local path can be on hopsfs mount""" - # check hopsfs mount - if model_path.startswith(constants.MODEL_REGISTRY.HOPSFS_MOUNT_PREFIX): - self._copy_or_move_hopsfs_model( - from_hdfs_model_path=model_path.replace( - constants.MODEL_REGISTRY.HOPSFS_MOUNT_PREFIX, "" - ), - to_model_version_path=model_instance.version_path, - keep_original_files=keep_original_files, - update_upload_progress=update_upload_progress, - ) - else: - self._upload_local_model( - from_local_model_path=model_path, - to_model_version_path=model_instance.version_path, - update_upload_progress=update_upload_progress, - upload_configuration=upload_configuration, - ) - - def _set_model_version( - self, model_instance, dataset_models_root_path, dataset_model_path - ): - # Set model version if not defined - if model_instance._version is None: - current_highest_version = 0 - for item in self._dataset_api.list(dataset_model_path, sort_by="NAME:desc")[ - "items" - ]: - _, file_name = os.path.split(item["attributes"]["path"]) - try: - try: - current_version = int(file_name) - except ValueError: - continue - if current_version > current_highest_version: - current_highest_version = current_version - except RestAPIError: - pass - model_instance._version = current_highest_version + 1 - - elif self._dataset_api.path_exists( - dataset_models_root_path - + "/" - + model_instance._name - + "/" - + str(model_instance._version) - ): - raise ModelRegistryException( - "Model with name {} and version {} already exists".format( - model_instance._name, model_instance._version - ) - ) - return model_instance - - def _build_resource_path(self, model_instance, artifact): - artifact_path = "{}/{}".format(model_instance.version_path, artifact) - return artifact_path - - def save( - self, - model_instance, - model_path, - await_registration=480, - keep_original_files=False, - upload_configuration=None, - ): - _client = client.get_instance() - - is_shared_registry = model_instance.shared_registry_project_name is not None - - if is_shared_registry: - dataset_models_root_path = "{}::{}".format( - model_instance.shared_registry_project_name, - constants.MODEL_SERVING.MODELS_DATASET, - ) - model_instance._project_name = model_instance.shared_registry_project_name - else: - dataset_models_root_path = constants.MODEL_SERVING.MODELS_DATASET - model_instance._project_name = _client._project_name - - util.validate_metrics(model_instance.training_metrics) - - if not self._dataset_api.path_exists(dataset_models_root_path): - raise AssertionError( - "{} dataset does not exist in this project. Please enable the Serving service or create it manually.".format( - dataset_models_root_path - ) - ) - - # Create /Models/{model_instance._name} folder - dataset_model_name_path = dataset_models_root_path + "/" + model_instance._name - if not self._dataset_api.path_exists(dataset_model_name_path): - self._engine.mkdir(dataset_model_name_path) - - model_instance = self._set_model_version( - model_instance, dataset_models_root_path, dataset_model_name_path - ) - - # Attach model summary xattr to /Models/{model_instance._name}/{model_instance._version} - model_query_params = {} - - if "ML_ID" in os.environ: - model_instance._experiment_id = os.environ["ML_ID"] - - model_instance._experiment_project_name = _client._project_name - - if "HOPSWORKS_JOB_NAME" in os.environ: - model_query_params["jobName"] = os.environ["HOPSWORKS_JOB_NAME"] - elif "HOPSWORKS_KERNEL_ID" in os.environ: - model_query_params["kernelId"] = os.environ["HOPSWORKS_KERNEL_ID"] - - pbar = tqdm( - [ - {"id": 0, "desc": "Creating model folder"}, - {"id": 1, "desc": "Uploading model files"}, - {"id": 2, "desc": "Uploading input_example and model_schema"}, - {"id": 3, "desc": "Registering model"}, - {"id": 4, "desc": "Waiting for model registration"}, - {"id": 5, "desc": "Model export complete"}, - ] - ) - - for step in pbar: - try: - pbar.set_description("%s" % step["desc"]) - if step["id"] == 0: - # Create folders - self._engine.mkdir(model_instance.version_path) - if step["id"] == 1: - - def update_upload_progress(n_dirs=0, n_files=0, step=step): - pbar.set_description( - "%s (%s dirs, %s files)" % (step["desc"], n_dirs, n_files) - ) - - update_upload_progress(n_dirs=0, n_files=0) - - # Upload Model files from local path to /Models/{model_instance._name}/{model_instance._version} - # check local absolute - if os.path.isabs(model_path) and os.path.exists(model_path): - self._save_model_from_local_or_hopsfs_mount( - model_instance=model_instance, - model_path=model_path, - keep_original_files=keep_original_files, - update_upload_progress=update_upload_progress, - upload_configuration=upload_configuration, - ) - # check local relative - elif os.path.exists( - os.path.join(os.getcwd(), model_path) - ): # check local relative - self._save_model_from_local_or_hopsfs_mount( - model_instance=model_instance, - model_path=os.path.join(os.getcwd(), model_path), - keep_original_files=keep_original_files, - update_upload_progress=update_upload_progress, - upload_configuration=upload_configuration, - ) - # check project relative - elif self._dataset_api.path_exists( - model_path - ): # check hdfs relative and absolute - self._copy_or_move_hopsfs_model( - from_hdfs_model_path=model_path, - to_model_version_path=model_instance.version_path, - keep_original_files=keep_original_files, - update_upload_progress=update_upload_progress, - ) - else: - raise IOError( - "Could not find path {} in the local filesystem or in Hopsworks File System".format( - model_path - ) - ) - if step["id"] == 2: - model_instance = self._upload_additional_resources(model_instance) - if step["id"] == 3: - model_instance = self._model_api.put( - model_instance, model_query_params - ) - if step["id"] == 4: - model_instance = self._poll_model_available( - model_instance, await_registration - ) - if step["id"] == 5: - pass - except BaseException as be: - self._dataset_api.rm(model_instance.version_path) - raise be - - print("Model created, explore it at " + model_instance.get_url()) - - return model_instance - - def download(self, model_instance): - model_name_path = os.path.join( - tempfile.gettempdir(), str(uuid.uuid4()), model_instance._name - ) - model_version_path = model_name_path + "/" + str(model_instance._version) - os.makedirs(model_version_path) - - def update_download_progress(n_dirs, n_files, done=False): - print( - "Downloading model artifact (%s dirs, %s files)... %s" - % (n_dirs, n_files, "DONE" if done else ""), - end="\r", - ) - - try: - from_hdfs_model_path = model_instance.version_path - if from_hdfs_model_path.startswith("hdfs:/"): - projects_index = from_hdfs_model_path.find("/Projects", 0) - from_hdfs_model_path = from_hdfs_model_path[projects_index:] - - self._download_model_from_hopsfs( - from_hdfs_model_path=from_hdfs_model_path, - to_local_path=model_version_path, - update_download_progress=update_download_progress, - ) - except BaseException as be: - raise be - - return model_version_path - - def read_file(self, model_instance, resource): - hdfs_resource_path = self._build_resource_path( - model_instance, os.path.basename(resource) - ) - if self._dataset_api.path_exists(hdfs_resource_path): - try: - resource = os.path.basename(resource) - tmp_dir = tempfile.TemporaryDirectory(dir=os.getcwd()) - local_resource_path = os.path.join(tmp_dir.name, resource) - self._engine.download( - hdfs_resource_path, - local_resource_path, - ) - with open(local_resource_path, "r") as f: - return f.read() - finally: - if tmp_dir is not None and os.path.exists(tmp_dir.name): - tmp_dir.cleanup() - - def read_json(self, model_instance, resource): - hdfs_resource_path = self._build_resource_path(model_instance, resource) - if self._dataset_api.path_exists(hdfs_resource_path): - try: - tmp_dir = tempfile.TemporaryDirectory(dir=os.getcwd()) - local_resource_path = os.path.join(tmp_dir.name, resource) - self._engine.download( - hdfs_resource_path, - local_resource_path, - ) - with open(local_resource_path, "rb") as f: - return json.loads(f.read()) - finally: - if tmp_dir is not None and os.path.exists(tmp_dir.name): - tmp_dir.cleanup() - - def delete(self, model_instance): - self._engine.delete(model_instance) - - def set_tag(self, model_instance, name, value): - """Attach a name/value tag to a model.""" - self._model_api.set_tag(model_instance, name, value) - - def delete_tag(self, model_instance, name): - """Remove a tag from a model.""" - self._model_api.delete_tag(model_instance, name) - - def get_tag(self, model_instance, name): - """Get tag with a certain name.""" - return self._model_api.get_tags(model_instance, name)[name] - - def get_tags(self, model_instance): - """Get all tags for a model.""" - return self._model_api.get_tags(model_instance) - - def get_feature_view_provenance(self, model_instance): - """Get the parent feature view of this model, based on explicit provenance. - These feature views can be accessible, deleted or inaccessible. - For deleted and inaccessible feature views, only a minimal information is - returned. - - # Arguments - model_instance: Metadata object of model. - - # Returns - `ProvenanceLinks`: the feature view used to generate this model - """ - return self._model_api.get_feature_view_provenance(model_instance) - - def get_training_dataset_provenance(self, model_instance): - """Get the parent training dataset of this model, based on explicit provenance. - These training datasets can be accessible, deleted or inaccessible. - For deleted and inaccessible feature views, only a minimal information is - returned. - - # Arguments - model_instance: Metadata object of model. - - # Returns - `ProvenanceLinks`: the training dataset used to generate this model - """ - return self._model_api.get_training_dataset_provenance(model_instance) diff --git a/hsml/python/hsml/engine/serving_engine.py b/hsml/python/hsml/engine/serving_engine.py deleted file mode 100644 index 15e2b3fa6..000000000 --- a/hsml/python/hsml/engine/serving_engine.py +++ /dev/null @@ -1,690 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os -import time -import uuid -from typing import Dict, List, Union - -from hsml import util -from hsml.client.exceptions import ModelServingException, RestAPIError -from hsml.client.istio.utils.infer_type import InferInput -from hsml.constants import ( - DEPLOYMENT, - PREDICTOR, - PREDICTOR_STATE, -) -from hsml.constants import ( - INFERENCE_ENDPOINTS as IE, -) -from hsml.core import dataset_api, serving_api -from tqdm.auto import tqdm - - -class ServingEngine: - START_STEPS = [ - PREDICTOR_STATE.CONDITION_TYPE_STOPPED, - PREDICTOR_STATE.CONDITION_TYPE_SCHEDULED, - PREDICTOR_STATE.CONDITION_TYPE_INITIALIZED, - PREDICTOR_STATE.CONDITION_TYPE_STARTED, - PREDICTOR_STATE.CONDITION_TYPE_READY, - ] - STOP_STEPS = [ - PREDICTOR_STATE.CONDITION_TYPE_SCHEDULED, - PREDICTOR_STATE.CONDITION_TYPE_STOPPED, - ] - - def __init__(self): - self._serving_api = serving_api.ServingApi() - self._dataset_api = dataset_api.DatasetApi() - - def _poll_deployment_status( - self, deployment_instance, status: str, await_status: int, update_progress=None - ): - if await_status > 0: - sleep_seconds = 5 - for _ in range(int(await_status / sleep_seconds)): - time.sleep(sleep_seconds) - state = deployment_instance.get_state() - num_instances = self._get_available_instances(state) - if update_progress is not None: - update_progress(state, num_instances) - if state.status == status: - return state # deployment reached desired status - elif ( - status == PREDICTOR_STATE.STATUS_RUNNING - and state.status == PREDICTOR_STATE.STATUS_FAILED - ): - error_msg = state.condition.reason - if ( - state.condition.type - == PREDICTOR_STATE.CONDITION_TYPE_INITIALIZED - or state.condition.type - == PREDICTOR_STATE.CONDITION_TYPE_STARTED - ): - component = ( - "transformer" - if "transformer" in state.condition.reason - else "predictor" - ) - error_msg += ( - ". Please, check the server logs using `.get_logs(component='" - + component - + "')`" - ) - raise ModelServingException(error_msg) - raise ModelServingException( - "Deployment has not reached the desired status within the expected awaiting time. Check the current status by using `.get_state()`, " - + "explore the server logs using `.get_logs()` or set a higher value for await_" - + status.lower() - ) - - def start(self, deployment_instance, await_status: int) -> bool: - (done, state) = self._check_status( - deployment_instance, PREDICTOR_STATE.STATUS_RUNNING - ) - - if not done: - min_instances = self._get_min_starting_instances(deployment_instance) - num_steps = (len(self.START_STEPS) - 1) + min_instances - if deployment_instance._predictor._state.condition is None: - num_steps = min_instances # backward compatibility - pbar = tqdm(total=num_steps) - pbar.set_description("Creating deployment") - - # set progress function - def update_progress(state, num_instances): - (progress, desc) = self._get_starting_progress( - pbar.n, state, num_instances - ) - pbar.update(progress) - if desc is not None: - pbar.set_description(desc) - - try: - update_progress(state, num_instances=0) - - if state.status == PREDICTOR_STATE.STATUS_CREATING: - state = self._poll_deployment_status( # wait for preparation - deployment_instance, - PREDICTOR_STATE.STATUS_CREATED, - await_status, - update_progress, - ) - - self._serving_api.post( - deployment_instance, DEPLOYMENT.ACTION_START - ) # start deployment - - state = self._poll_deployment_status( # wait for status - deployment_instance, - PREDICTOR_STATE.STATUS_RUNNING, - await_status, - update_progress, - ) - except RestAPIError as re: - self.stop(deployment_instance, await_status=0) - raise re - - if state.status == PREDICTOR_STATE.STATUS_RUNNING: - print("Start making predictions by using `.predict()`") - - def stop(self, deployment_instance, await_status: int) -> bool: - (done, state) = self._check_status( - deployment_instance, PREDICTOR_STATE.STATUS_STOPPED - ) - if not done: - num_instances = self._get_available_instances(state) - num_steps = len(self.STOP_STEPS) + ( - deployment_instance.requested_instances - if deployment_instance.requested_instances >= num_instances - else num_instances - ) - if deployment_instance._predictor._state.condition is None: - # backward compatibility - num_steps = self._get_min_starting_instances(deployment_instance) - pbar = tqdm(total=num_steps) - pbar.set_description("Preparing to stop deployment") - - # set progress function - def update_progress(state, num_instances): - (progress, desc) = self._get_stopping_progress( - pbar.total, pbar.n, state, num_instances - ) - pbar.update(progress) - if desc is not None: - pbar.set_description(desc) - - update_progress(state, num_instances) - self._serving_api.post( - deployment_instance, DEPLOYMENT.ACTION_STOP - ) # stop deployment - - _ = self._poll_deployment_status( # wait for status - deployment_instance, - PREDICTOR_STATE.STATUS_STOPPED, - await_status, - update_progress, - ) - - # free grpc channel - deployment_instance._grpc_channel = None - - def _check_status(self, deployment_instance, desired_status): - state = deployment_instance.get_state() - if state is None: - return (True, None) - - # desired status: running - if desired_status == PREDICTOR_STATE.STATUS_RUNNING: - if ( - state.status == PREDICTOR_STATE.STATUS_RUNNING - or state.status == PREDICTOR_STATE.STATUS_IDLE - ): - print("Deployment is already running") - return (True, state) - if state.status == PREDICTOR_STATE.STATUS_STARTING: - print("Deployment is already starting") - return (True, state) - if state.status == PREDICTOR_STATE.STATUS_UPDATING: - print("Deployments is already running and updating") - return (True, state) - if state.status == PREDICTOR_STATE.STATUS_FAILED: - print("Deployment is in failed state. " + state.condition.reason) - return (True, state) - if state.status == PREDICTOR_STATE.STATUS_STOPPING: - raise ModelServingException( - "Deployment is stopping, please wait until it completely stops" - ) - - # desired status: stopped - if desired_status == PREDICTOR_STATE.STATUS_STOPPED: - if ( - state.status == PREDICTOR_STATE.STATUS_CREATING - or state.status == PREDICTOR_STATE.STATUS_CREATED - or state.status == PREDICTOR_STATE.STATUS_STOPPED - ): - print("Deployment is already stopped") - return (True, state) - if state.status == PREDICTOR_STATE.STATUS_STOPPING: - print("Deployment is already stopping") - return (True, state) - - return (False, state) - - def _get_starting_progress(self, current_step, state, num_instances): - if state.condition is None: # backward compatibility - progress = num_instances - current_step - if state.status == PREDICTOR_STATE.STATUS_RUNNING: - return (progress, "Deployment is ready") - return (progress, None if current_step == 0 else "Deployment is starting") - - step = self.START_STEPS.index(state.condition.type) - if ( - state.condition.type == PREDICTOR_STATE.CONDITION_TYPE_STARTED - or state.condition.type == PREDICTOR_STATE.CONDITION_TYPE_READY - ): - step += num_instances - progress = step - current_step - desc = None - if state.condition.type != PREDICTOR_STATE.CONDITION_TYPE_STOPPED: - desc = ( - state.condition.reason - if state.status != PREDICTOR_STATE.STATUS_FAILED - else "Deployment failed to start" - ) - return (progress, desc) - - def _get_stopping_progress(self, total_steps, current_step, state, num_instances): - if state.condition is None: # backward compatibility - progress = (total_steps - num_instances) - current_step - if state.status == PREDICTOR_STATE.STATUS_STOPPED: - return (progress, "Deployment is stopped") - return ( - progress, - None if total_steps == current_step else "Deployment is stopping", - ) - - step = 0 - if state.condition.type == PREDICTOR_STATE.CONDITION_TYPE_SCHEDULED: - step = 1 if state.condition.status is None else 0 - elif state.condition.type == PREDICTOR_STATE.CONDITION_TYPE_STOPPED: - num_instances = (total_steps - 2) - num_instances # num stopped instances - step = ( - (2 + num_instances) - if (state.condition.status is None or state.condition.status) - else 0 - ) - progress = step - current_step - desc = None - if ( - state.condition.type != PREDICTOR_STATE.CONDITION_TYPE_READY - and state.status != PREDICTOR_STATE.STATUS_FAILED - ): - desc = ( - "Deployment is stopped" - if state.status == PREDICTOR_STATE.STATUS_STOPPED - else state.condition.reason - ) - - return (progress, desc) - - def _get_min_starting_instances(self, deployment_instance): - min_start_instances = 1 # predictor - if deployment_instance.transformer is not None: - min_start_instances += 1 # transformer - return ( - deployment_instance.requested_instances - if deployment_instance.requested_instances >= min_start_instances - else min_start_instances - ) - - def _get_available_instances(self, state): - if state.status == PREDICTOR_STATE.STATUS_CREATING: - return 0 - num_instances = state.available_predictor_instances - if state.available_transformer_instances is not None: - num_instances += state.available_transformer_instances - return num_instances - - def _get_stopped_instances(self, available_instances, requested_instances): - num_instances = requested_instances - available_instances - return num_instances if num_instances >= 0 else 0 - - def download_artifact(self, deployment_instance): - if deployment_instance.id is None: - raise ModelServingException( - "Deployment is not created yet. To create the deployment use `.save()`" - ) - if deployment_instance.artifact_version is None: - # model artifacts are not created in non-k8s installations - raise ModelServingException( - "Model artifacts not supported in non-k8s installations. \ - Download the model files by using `model.download()`" - ) - - from_artifact_zip_path = deployment_instance.artifact_path - to_artifacts_path = os.path.join( - os.getcwd(), - str(uuid.uuid4()), - deployment_instance.model_name, - str(deployment_instance.model_version), - "Artifacts", - ) - to_artifact_version_path = ( - to_artifacts_path + "/" + str(deployment_instance.artifact_version) - ) - to_artifact_zip_path = to_artifact_version_path + ".zip" - - os.makedirs(to_artifacts_path) - - try: - self._dataset_api.download(from_artifact_zip_path, to_artifact_zip_path) - util.decompress(to_artifact_zip_path, extract_dir=to_artifacts_path) - os.remove(to_artifact_zip_path) - finally: - if os.path.exists(to_artifact_zip_path): - os.remove(to_artifact_zip_path) - - return to_artifact_version_path - - def create(self, deployment_instance): - try: - self._serving_api.put(deployment_instance) - print("Deployment created, explore it at " + deployment_instance.get_url()) - except RestAPIError as re: - raise_err = True - if re.error_code == ModelServingException.ERROR_CODE_DUPLICATED_ENTRY: - msg = "Deployment with the same name already exists" - existing_deployment = self._serving_api.get(deployment_instance.name) - if ( - existing_deployment.model_name == deployment_instance.model_name - and existing_deployment.model_version - == deployment_instance.model_version - ): # if same name and model version, retrieve existing deployment - print(msg + ". Getting existing deployment...") - print("To create a new deployment choose a different name.") - deployment_instance.update_from_response_json( - existing_deployment.to_dict() - ) - raise_err = False - else: # otherwise, raise an exception - print(", but it is serving a different model version.") - print("Please, choose a different name.") - - if raise_err: - raise re - - if deployment_instance.is_stopped(): - print("Before making predictions, start the deployment by using `.start()`") - - def update(self, deployment_instance, await_update): - state = deployment_instance.get_state() - if state is None: - return - - if state.status == PREDICTOR_STATE.STATUS_STARTING: - # if starting, it cannot be updated yet - raise ModelServingException( - "Deployment is starting, please wait until it is running before applying changes. \n" - + "Check the current status by using `.get_state()` or explore the server logs using `.get_logs()`" - ) - if ( - state.status == PREDICTOR_STATE.STATUS_RUNNING - or state.status == PREDICTOR_STATE.STATUS_IDLE - or state.status == PREDICTOR_STATE.STATUS_FAILED - ): - # if running, it's fine - self._serving_api.put(deployment_instance) - print("Deployment updated, applying changes to running instances...") - state = self._poll_deployment_status( # wait for status - deployment_instance, PREDICTOR_STATE.STATUS_RUNNING, await_update - ) - if state is not None: - if state.status == PREDICTOR_STATE.STATUS_RUNNING: - print("Running instances updated successfully") - return - if state.status == PREDICTOR_STATE.STATUS_UPDATING: - # if updating, it cannot be updated yet - raise ModelServingException( - "Deployment is updating, please wait until it is running before applying changes. \n" - + "Check the current status by using `.get_state()` or explore the server logs using `.get_logs()`" - ) - if state.status == PREDICTOR_STATE.STATUS_STOPPING: - # if stopping, it cannot be updated yet - raise ModelServingException( - "Deployment is stopping, please wait until it is stopped before applying changes" - ) - if ( - state.status == PREDICTOR_STATE.STATUS_CREATING - or state.status == PREDICTOR_STATE.STATUS_CREATED - or state.status == PREDICTOR_STATE.STATUS_STOPPED - ): - # if stopped, it's fine - self._serving_api.put(deployment_instance) - print("Deployment updated, explore it at " + deployment_instance.get_url()) - return - - raise ValueError("Unknown deployment status: " + state.status) - - def save(self, deployment_instance, await_update: int): - if deployment_instance.id is None: - # if new deployment - self.create(deployment_instance) - return - - # if existing deployment - self.update(deployment_instance, await_update) - - def delete(self, deployment_instance, force=False): - state = deployment_instance.get_state() - if state is None: - return - - if ( - not force - and state.status != PREDICTOR_STATE.STATUS_STOPPED - and state.status != PREDICTOR_STATE.STATUS_CREATED - ): - raise ModelServingException( - "Deployment not stopped, please stop it first by using `.stop()` or check its status with .get_state()" - ) - - self._serving_api.delete(deployment_instance) - print("Deployment deleted successfully") - - def get_state(self, deployment_instance): - try: - state = self._serving_api.get_state(deployment_instance) - except RestAPIError as re: - if re.error_code == ModelServingException.ERROR_CODE_SERVING_NOT_FOUND: - raise ModelServingException("Deployment not found") from re - raise re - deployment_instance._predictor._set_state(state) - return state - - def get_logs(self, deployment_instance, component, tail): - state = self.get_state(deployment_instance) - if state is None: - return - - if state.status == PREDICTOR_STATE.STATUS_STOPPING: - print( - "Deployment is stopping, explore historical logs at " - + deployment_instance.get_url() - ) - return - if state.status == PREDICTOR_STATE.STATUS_STOPPED: - print( - "Deployment not running, explore historical logs at " - + deployment_instance.get_url() - ) - return - if state.status == PREDICTOR_STATE.STATUS_STARTING: - print("Deployment is starting, server logs might not be ready yet") - - print( - "Explore all the logs and filters in the Kibana logs at " - + deployment_instance.get_url(), - end="\n\n", - ) - - return self._serving_api.get_logs(deployment_instance, component, tail) - - # Model inference - - def predict( - self, - deployment_instance, - data: Union[Dict, List[InferInput]], - inputs: Union[Dict, List[Dict]], - ): - # validate user-provided payload - self._validate_inference_payload(deployment_instance.api_protocol, data, inputs) - - # build inference payload based on API protocol - payload = self._build_inference_payload( - deployment_instance.api_protocol, data, inputs - ) - - # if not KServe, send request through Hopsworks - serving_tool = deployment_instance.predictor.serving_tool - through_hopsworks = serving_tool != PREDICTOR.SERVING_TOOL_KSERVE - try: - return self._serving_api.send_inference_request( - deployment_instance, payload, through_hopsworks - ) - except RestAPIError as re: - if ( - re.response.status_code == RestAPIError.STATUS_CODE_NOT_FOUND - or re.error_code - == ModelServingException.ERROR_CODE_DEPLOYMENT_NOT_RUNNING - ): - raise ModelServingException( - "Deployment not created or running. If it is already created, start it by using `.start()` or check its status with .get_state()" - ) from re - - re.args = ( - re.args[0] + "\n\n Check the model server logs by using `.get_logs()`", - ) - raise re - - def _validate_inference_payload( - self, - api_protocol, - data: Union[Dict, List[InferInput]], - inputs: Union[Dict, List[Dict]], - ): - """Validates the user-provided inference payload. Either data or inputs parameter is expected, but both cannot be provided together.""" - # check null inputs - if data is not None and inputs is not None: - raise ModelServingException( - "Inference data and inputs parameters cannot be provided together." - ) - # check data or inputs - if data is not None: - self._validate_inference_data(api_protocol, data) - else: - self._validate_inference_inputs(api_protocol, inputs) - - def _validate_inference_data( - self, api_protocol, data: Union[Dict, List[InferInput]] - ): - """Validates the inference payload when provided through the `data` parameter. The data parameter contains the raw payload to be sent - in the inference request and should have the corresponding type and format depending on the API protocol. - For the REST protocol, data should be a dictionary. For GRPC protocol, one or more InferInput objects is expected. - """ - if api_protocol == IE.API_PROTOCOL_REST: # REST protocol - if isinstance(data, Dict): - if "instances" not in data and "inputs" not in data: - raise ModelServingException( - "Inference data is missing 'instances' key." - ) - - payload = data["instances"] if "instances" in data else data["inputs"] - if not isinstance(payload, List): - raise ModelServingException( - "Instances field should contain a 2-dim list." - ) - elif len(payload) == 0: - raise ModelServingException( - "Inference data cannot contain an empty list." - ) - elif not isinstance(payload[0], List): - raise ModelServingException( - "Instances field should contain a 2-dim list." - ) - elif len(payload[0]) == 0: - raise ModelServingException( - "Inference data cannot contain an empty list." - ) - else: # not Dict - if isinstance(data, InferInput) or ( - isinstance(data, List) and isinstance(data[0], InferInput) - ): - raise ModelServingException( - "Inference data cannot contain `InferInput` for deployments with gRPC protocol disabled. Use a dictionary instead." - ) - raise ModelServingException( - "Inference data must be a dictionary. Otherwise, use the `inputs` parameter." - ) - - else: # gRPC protocol - if isinstance(data, Dict): - raise ModelServingException( - "Inference data cannot be a dictionary for deployments with gRPC protocol enabled. " - "Create a `InferInput` object or use the `inputs` parameter instead." - ) - elif isinstance(data, List): - if len(data) == 0: - raise ModelServingException( - "Inference data cannot contain an empty list." - ) - if not isinstance(data[0], InferInput): - raise ModelServingException( - "Inference data must contain a list of `InferInput` objects. Otherwise, use the `inputs` parameter." - ) - else: - raise ModelServingException( - "Inference data must contain a list of `InferInput` objects for deployments with gRPC protocol enabled." - ) - - def _validate_inference_inputs( - self, api_protocol, inputs: Union[Dict, List[Dict]], recursive_call=False - ): - """Validates the inference payload when provided through the `inputs` parameter. The inputs parameter contains only the payload values, - which will be parsed when building the request payload. It can be either a dictionary or a list. - """ - if isinstance(inputs, List): - if len(inputs) == 0: - raise ModelServingException("Inference inputs cannot be an empty list.") - else: - self._validate_inference_inputs( - api_protocol, inputs[0], recursive_call=True - ) - elif isinstance(inputs, InferInput): - raise ModelServingException( - "Inference inputs cannot be of type `InferInput`. Use the `data` parameter instead." - ) - elif isinstance(inputs, Dict): - required_keys = ("name", "shape", "datatype", "data") - if api_protocol == IE.API_PROTOCOL_GRPC and not all( - k in inputs for k in required_keys - ): - raise ModelServingException( - f"Inference inputs is missing one or more keys. Required keys are [{', '.join(required_keys)}]." - ) - elif not recursive_call or (api_protocol == IE.API_PROTOCOL_GRPC): - # if it is the first call to this method, inputs have an invalid type/format - # if GRPC protocol is used, only Dict type is valid for the input values - raise ModelServingException( - "Inference inputs type is not valid. Supported types are dictionary and list." - ) - - def _build_inference_payload( - self, - api_protocol, - data: Union[Dict, List[InferInput]], - inputs: Union[Dict, List[Dict]], - ): - """Build the inference payload for an inference request. If the 'data' parameter is provided, this method ensures - it has the correct format depending on the API protocol. Otherwise, if the 'inputs' parameter is provided, this method - builds the correct request payload depending on the API protocol. - """ - if data is not None: - # data contains the raw payload (dict or InferInput), nothing needs to be changed - return data - else: # parse inputs - return self._parse_inference_inputs(api_protocol, inputs) - - def _parse_inference_inputs( - self, api_protocol, inputs: Union[Dict, List[Dict]], recursive_call=False - ): - if api_protocol == IE.API_PROTOCOL_REST: # REST protocol - if not isinstance(inputs, List): - data = {"instances": [[inputs]]} # wrap inputs in a 2-dim list - else: - data = {"instances": inputs} # use given inputs list by default - # check depth of the list: at least two levels are required for batch inference - # if the content is neither a list or dict, wrap it in an additional list - for i in inputs: - if not isinstance(i, List) and not isinstance(i, Dict): - # if there are no two levels, wrap inputs in a list - data = {"instances": [inputs]} - break - else: # gRPC protocol - if isinstance(inputs, Dict): # Dict - data = InferInput( - name=inputs["name"], - shape=inputs["shape"], - datatype=inputs["datatype"], - data=inputs["data"], - parameters=( - inputs["parameters"] if "parameters" in inputs else None - ), - ) - if not recursive_call: - # if inputs is of type Dict, return a singleton - data = [data] - - else: # List[Dict] - data = inputs - for index, inputs_item in enumerate(inputs): - data[index] = self._parse_inference_inputs( - api_protocol, inputs_item, recursive_call=True - ) - - return data diff --git a/hsml/python/hsml/inference_batcher.py b/hsml/python/hsml/inference_batcher.py deleted file mode 100644 index 265615c56..000000000 --- a/hsml/python/hsml/inference_batcher.py +++ /dev/null @@ -1,136 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -from typing import Optional - -import humps -from hsml import util -from hsml.constants import INFERENCE_BATCHER - - -class InferenceBatcher: - """Configuration of an inference batcher for a predictor. - - # Arguments - enabled: Whether the inference batcher is enabled or not. The default value is `false`. - max_batch_size: Maximum requests batch size. - max_latency: Maximum latency for request batching. - timeout: Maximum waiting time for request batching. - # Returns - `InferenceLogger`. Configuration of an inference logger. - """ - - def __init__( - self, - enabled: Optional[bool] = None, - max_batch_size: Optional[int] = None, - max_latency: Optional[int] = None, - timeout: Optional[int] = None, - **kwargs, - ): - self._enabled = enabled if enabled is not None else INFERENCE_BATCHER.ENABLED - self._max_batch_size = max_batch_size if max_batch_size is not None else None - self._max_latency = max_latency if max_latency is not None else None - self._timeout = timeout if timeout is not None else None - - def describe(self): - """Print a description of the inference batcher""" - util.pretty_print(self) - - @classmethod - def from_response_json(cls, json_dict): - json_decamelized = humps.decamelize(json_dict) - return cls.from_json(json_decamelized) - - @classmethod - def from_json(cls, json_decamelized): - return InferenceBatcher(**cls.extract_fields_from_json(json_decamelized)) - - @classmethod - def extract_fields_from_json(cls, json_decamelized): - config = ( - json_decamelized.pop("batching_configuration") - if "batching_configuration" in json_decamelized - else json_decamelized - ) - kwargs = {} - kwargs["enabled"] = util.extract_field_from_json( - config, ["batching_enabled", "enabled"] - ) - kwargs["max_batch_size"] = util.extract_field_from_json( - config, "max_batch_size" - ) - kwargs["max_latency"] = util.extract_field_from_json(config, "max_latency") - kwargs["timeout"] = util.extract_field_from_json(config, "timeout") - - return kwargs - - def update_from_response_json(self, json_dict): - json_decamelized = humps.decamelize(json_dict) - self.__init__(**self.extract_fields_from_json(json_decamelized)) - return self - - def json(self): - return json.dumps(self, cls=util.MLEncoder) - - def to_dict(self): - json = {"batchingEnabled": self._enabled} - if self._max_batch_size is not None: - json["maxBatchSize"] = self._max_batch_size - if self._max_latency is not None: - json["maxLatency"] = self._max_latency - if self._timeout is not None: - json["timeout"] = self._timeout - return {"batchingConfiguration": json} - - @property - def enabled(self): - """Whether the inference batcher is enabled or not.""" - return self._enabled - - @enabled.setter - def enabled(self, enabled: bool): - self._enabled = enabled - - @property - def max_batch_size(self): - """Maximum requests batch size.""" - return self._max_batch_size - - @max_batch_size.setter - def max_batch_size(self, max_batch_size: int): - self._max_batch_size = max_batch_size - - @property - def max_latency(self): - """Maximum latency.""" - return self._max_latency - - @max_latency.setter - def max_latency(self, max_latency: int): - self._max_latency = max_latency - - @property - def timeout(self): - """Maximum timeout.""" - return self._timeout - - @timeout.setter - def timeout(self, timeout: int): - self._timeout = timeout - - def __repr__(self): - return f"InferenceBatcher(enabled: {self._enabled!r})" diff --git a/hsml/python/hsml/inference_endpoint.py b/hsml/python/hsml/inference_endpoint.py deleted file mode 100644 index af031dbf5..000000000 --- a/hsml/python/hsml/inference_endpoint.py +++ /dev/null @@ -1,163 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import random -from typing import List, Optional - -import humps -from hsml import util - - -class InferenceEndpointPort: - """Port of an inference endpoint. - - # Arguments - name: Name of the port. It typically defines the purpose of the port (e.g., HTTP, HTTPS, STATUS-PORT, TLS) - number: Port number. - # Returns - `InferenceEndpointPort`. Port of an inference endpoint. - """ - - def __init__(self, name: str, number: int, **kwargs): - self._name = name - self._number = number - - @classmethod - def from_response_json(cls, json_dict): - json_decamelized = humps.decamelize(json_dict) - return cls.from_json(json_decamelized) - - @classmethod - def from_json(cls, json_decamelized): - return InferenceEndpointPort(**cls.extract_fields_from_json(json_decamelized)) - - @classmethod - def extract_fields_from_json(cls, json_decamelized): - kwargs = {} - kwargs["name"] = util.extract_field_from_json(json_decamelized, "name") - kwargs["number"] = util.extract_field_from_json(json_decamelized, "number") - return kwargs - - def to_dict(self): - return {"name": self._name, "number": self._number} - - @property - def name(self): - """Name of the inference endpoint port.""" - return self._name - - @property - def number(self): - """Port number of the inference endpoint port.""" - return self._number - - def __repr__(self): - return f"InferenceEndpointPort(name: {self._name!r})" - - -class InferenceEndpoint: - """Inference endpoint available in the current project for model inference. - - # Arguments - type: Type of inference endpoint (e.g., NODE, KUBE_CLUSTER, LOAD_BALANCER). - hosts: List of hosts of the inference endpoint. - ports: List of ports of the inference endpoint. - # Returns - `InferenceEndpoint`. Inference endpoint. - """ - - def __init__( - self, - type: str, - hosts: List[str], - ports: Optional[List[InferenceEndpointPort]], - ): - self._type = type - self._hosts = hosts - self._ports = ports - - def get_any_host(self): - """Get any host available""" - return random.choice(self._hosts) if self._hosts is not None else None - - def get_port(self, name): - """Get port by name""" - if self._ports is not None: - for port in self._ports: - if port.name == name: - return port - return None - - @classmethod - def from_response_json(cls, json_dict): - json_decamelized = humps.decamelize(json_dict) - if isinstance(json_decamelized, list): - if len(json_decamelized) == 0: - return [] - return [cls.from_json(endpoint) for endpoint in json_decamelized] - else: - if "count" in json_decamelized: - if json_decamelized["count"] == 0: - return [] - return [ - cls.from_json(endpoint) for endpoint in json_decamelized["items"] - ] - return cls.from_json(json_decamelized) - - @classmethod - def from_json(cls, json_decamelized): - return InferenceEndpoint(**cls.extract_fields_from_json(json_decamelized)) - - @classmethod - def extract_fields_from_json(cls, json_decamelized): - kwargs = {} - kwargs["type"] = util.extract_field_from_json(json_decamelized, "type") - kwargs["hosts"] = util.extract_field_from_json(json_decamelized, "hosts") - kwargs["ports"] = util.extract_field_from_json( - obj=json_decamelized, fields="ports", as_instance_of=InferenceEndpointPort - ) - return kwargs - - def to_dict(self): - return { - "type": self._type, - "hosts": self._hosts, - "ports": [port.to_dict() for port in self._ports], - } - - @property - def type(self): - """Type of inference endpoint.""" - return self._type - - @property - def hosts(self): - """Hosts of the inference endpoint.""" - return self._hosts - - @property - def ports(self): - """Ports of the inference endpoint.""" - return self._ports - - def __repr__(self): - return f"InferenceEndpoint(type: {self._type!r})" - - -def get_endpoint_by_type(endpoints, type) -> InferenceEndpoint: - for endpoint in endpoints: - if endpoint.type == type: - return endpoint - return None diff --git a/hsml/python/hsml/inference_logger.py b/hsml/python/hsml/inference_logger.py deleted file mode 100644 index ef2f5c9ab..000000000 --- a/hsml/python/hsml/inference_logger.py +++ /dev/null @@ -1,124 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -from typing import Optional, Union - -import humps -from hsml import util -from hsml.constants import DEFAULT, INFERENCE_LOGGER -from hsml.kafka_topic import KafkaTopic - - -class InferenceLogger: - """Configuration of an inference logger for a predictor. - - # Arguments - kafka_topic: Kafka topic to send the inference logs to. By default, a new Kafka topic is configured. - mode: Inference logging mode. (e.g., `NONE`, `ALL`, `PREDICTIONS`, or `MODEL_INPUTS`). By default, `ALL` inference logs are sent. - # Returns - `InferenceLogger`. Configuration of an inference logger. - """ - - def __init__( - self, - kafka_topic: Optional[Union[KafkaTopic, dict]] = DEFAULT, - mode: Optional[str] = INFERENCE_LOGGER.MODE_ALL, - **kwargs, - ): - self._kafka_topic = util.get_obj_from_json(kafka_topic, KafkaTopic) - self._mode = self._validate_mode(mode, self._kafka_topic) or ( - INFERENCE_LOGGER.MODE_ALL - if self._kafka_topic is not None - else INFERENCE_LOGGER.MODE_NONE - ) - - def describe(self): - """Print a description of the inference logger""" - util.pretty_print(self) - - @classmethod - def _validate_mode(cls, mode, kafka_topic): - if mode is not None: - modes = list(util.get_members(INFERENCE_LOGGER)) - if mode not in modes: - raise ValueError( - "Inference logging mode '{}' is not valid. Possible values are '{}'".format( - mode, ", ".join(modes) - ) - ) - - if kafka_topic is None and mode is not None: - mode = None - elif kafka_topic is not None and mode is None: - mode = INFERENCE_LOGGER.MODE_NONE - - return mode - - @classmethod - def from_response_json(cls, json_dict): - json_decamelized = humps.decamelize(json_dict) - return cls.from_json(json_decamelized) - - @classmethod - def from_json(cls, json_decamelized): - return InferenceLogger(**cls.extract_fields_from_json(json_decamelized)) - - @classmethod - def extract_fields_from_json(cls, json_decamelized): - kwargs = {} - kwargs["kafka_topic"] = util.extract_field_from_json( - json_decamelized, - ["kafka_topic_dto", "kafka_topic"], - ) - kwargs["mode"] = util.extract_field_from_json( - json_decamelized, ["inference_logging", "mode"] - ) - return kwargs - - def update_from_response_json(self, json_dict): - json_decamelized = humps.decamelize(json_dict) - self.__init__(**self.extract_fields_from_json(json_decamelized)) - return self - - def json(self): - return json.dumps(self, cls=util.MLEncoder) - - def to_dict(self): - json = {"inferenceLogging": self._mode} - if self._kafka_topic is not None: - return {**json, **self._kafka_topic.to_dict()} - return json - - @property - def kafka_topic(self): - """Kafka topic to send the inference logs to.""" - return self._kafka_topic - - @kafka_topic.setter - def kafka_topic(self, kafka_topic: KafkaTopic): - self._kafka_topic = kafka_topic - - @property - def mode(self): - """Inference logging mode ("NONE", "ALL", "PREDICTIONS", or "MODEL_INPUTS").""" - return self._mode - - @mode.setter - def mode(self, mode: str): - self._mode = mode - - def __repr__(self): - return f"InferenceLogger(mode: {self._mode!r})" diff --git a/hsml/python/hsml/kafka_topic.py b/hsml/python/hsml/kafka_topic.py deleted file mode 100644 index 9dce0bb56..000000000 --- a/hsml/python/hsml/kafka_topic.py +++ /dev/null @@ -1,137 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -from typing import Optional - -import humps -from hsml import util -from hsml.constants import KAFKA_TOPIC - - -class KafkaTopic: - """Configuration for a Kafka topic.""" - - def __init__( - self, - name: str = KAFKA_TOPIC.CREATE, - num_replicas: Optional[int] = None, - num_partitions: Optional[int] = None, - **kwargs, - ): - self._name = name - self._num_replicas, self._num_partitions = self._validate_topic_config( - self._name, num_replicas, num_partitions - ) - - def describe(self): - util.pretty_print(self) - - @classmethod - def _validate_topic_config(cls, name, num_replicas, num_partitions): - if name is not None and name != KAFKA_TOPIC.NONE: - if name == KAFKA_TOPIC.CREATE: - if num_replicas is None: - print( - "Setting number of replicas to default value '{}'".format( - KAFKA_TOPIC.NUM_REPLICAS - ) - ) - num_replicas = KAFKA_TOPIC.NUM_REPLICAS - if num_partitions is None: - print( - "Setting number of partitions to default value '{}'".format( - KAFKA_TOPIC.NUM_PARTITIONS - ) - ) - num_partitions = KAFKA_TOPIC.NUM_PARTITIONS - else: - if num_replicas is not None or num_partitions is not None: - raise ValueError( - "Number of replicas or partitions cannot be changed in existing kafka topics." - ) - elif name is None or name == KAFKA_TOPIC.NONE: - num_replicas = None - num_partitions = None - - return num_replicas, num_partitions - - @classmethod - def from_response_json(cls, json_dict): - json_decamelized = humps.decamelize(json_dict) - return cls.from_json(json_decamelized) - - @classmethod - def from_json(cls, json_decamelized): - return KafkaTopic(**cls.extract_fields_from_json(json_decamelized)) - - @classmethod - def extract_fields_from_json(cls, json_decamelized): - kwargs = {} - kwargs["name"] = json_decamelized.pop("name") # required - kwargs["num_replicas"] = util.extract_field_from_json( - json_decamelized, ["num_of_replicas", "num_replicas"] - ) - kwargs["num_partitions"] = util.extract_field_from_json( - json_decamelized, ["num_of_partitions", "num_partitions"] - ) - return kwargs - - def update_from_response_json(self, json_dict): - json_decamelized = humps.decamelize(json_dict) - self.__init__(**self.extract_fields_from_json(json_decamelized)) - return self - - def json(self): - return json.dumps(self, cls=util.MLEncoder) - - def to_dict(self): - return { - "kafkaTopicDTO": { - "name": self._name, - "numOfReplicas": self._num_replicas, - "numOfPartitions": self._num_partitions, - } - } - - @property - def name(self): - """Name of the Kafka topic.""" - return self._name - - @name.setter - def name(self, name: str): - self._name = name - - @property - def num_replicas(self): - """Number of replicas of the Kafka topic.""" - return self._num_replicas - - @num_replicas.setter - def num_replicas(self, num_replicas: int): - self._num_replicas = num_replicas - - @property - def num_partitions(self): - """Number of partitions of the Kafka topic.""" - return self._num_partitions - - @num_partitions.setter - def topic_num_partitions(self, num_partitions: int): - self._num_partitions = num_partitions - - def __repr__(self): - return f"KafkaTopic({self._name!r})" diff --git a/hsml/python/hsml/model.py b/hsml/python/hsml/model.py deleted file mode 100644 index e6147d5fd..000000000 --- a/hsml/python/hsml/model.py +++ /dev/null @@ -1,575 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import json -import logging -import os -import warnings -from typing import Any, Dict, Optional, Union - -import humps -from hsml import client, util -from hsml.constants import ARTIFACT_VERSION -from hsml.constants import INFERENCE_ENDPOINTS as IE -from hsml.core import explicit_provenance -from hsml.engine import model_engine -from hsml.inference_batcher import InferenceBatcher -from hsml.inference_logger import InferenceLogger -from hsml.predictor import Predictor -from hsml.resources import PredictorResources -from hsml.transformer import Transformer - - -_logger = logging.getLogger(__name__) - - -class Model: - """Metadata object representing a model in the Model Registry.""" - - def __init__( - self, - id, - name, - version=None, - created=None, - creator=None, - environment=None, - description=None, - experiment_id=None, - project_name=None, - experiment_project_name=None, - metrics=None, - program=None, - user_full_name=None, - model_schema=None, - training_dataset=None, - input_example=None, - framework=None, - model_registry_id=None, - # unused, but needed since they come in the backend response - tags=None, - href=None, - feature_view=None, - training_dataset_version=None, - **kwargs, - ): - self._id = id - self._name = name - self._version = version - - if description is None: - self._description = "A collection of models for " + name - else: - self._description = description - - self._created = created - self._creator = creator - self._environment = environment - self._experiment_id = experiment_id - self._project_name = project_name - self._experiment_project_name = experiment_project_name - self._training_metrics = metrics - self._program = program - self._user_full_name = user_full_name - self._input_example = input_example - self._framework = framework - self._model_schema = model_schema - self._training_dataset = training_dataset - - # This is needed for update_from_response_json function to not overwrite name of the shared registry this model originates from - if not hasattr(self, "_shared_registry_project_name"): - self._shared_registry_project_name = None - - self._model_registry_id = model_registry_id - - self._model_engine = model_engine.ModelEngine() - self._feature_view = feature_view - self._training_dataset_version = training_dataset_version - if training_dataset_version is None and feature_view is not None: - if feature_view.get_last_accessed_training_dataset() is not None: - self._training_dataset_version = ( - feature_view.get_last_accessed_training_dataset() - ) - else: - warnings.warn( - "Provenance cached data - feature view provided, but training dataset version is missing", - util.ProvenanceWarning, - stacklevel=1, - ) - - def save( - self, - model_path, - await_registration=480, - keep_original_files=False, - upload_configuration: Optional[Dict[str, Any]] = None, - ): - """Persist this model including model files and metadata to the model registry. - - # Arguments - model_path: Local or remote (Hopsworks file system) path to the folder where the model files are located, or path to a specific model file. - await_registration: Awaiting time for the model to be registered in Hopsworks. - keep_original_files: If the model files are located in hopsfs, whether to move or copy those files into the Models dataset. Default is False (i.e., model files will be moved) - upload_configuration: When saving a model from outside Hopsworks, the model is uploaded to the model registry using the REST APIs. Each model artifact is divided into - chunks and each chunk uploaded independently. This parameter can be used to control the upload chunk size, the parallelism and the number of retries. - `upload_configuration` can contain the following keys: - * key `chunk_size`: size of each chunk in megabytes. Default 10. - * key `simultaneous_uploads`: number of chunks to upload in parallel. Default 3. - * key `max_chunk_retries`: number of times to retry the upload of a chunk in case of failure. Default 1. - - # Returns - `Model`: The model metadata object. - """ - return self._model_engine.save( - model_instance=self, - model_path=model_path, - await_registration=await_registration, - keep_original_files=keep_original_files, - upload_configuration=upload_configuration, - ) - - def download(self): - """Download the model files. - - # Returns - `str`: Absolute path to local folder containing the model files. - """ - return self._model_engine.download(model_instance=self) - - def delete(self): - """Delete the model - - !!! danger "Potentially dangerous operation" - This operation drops all metadata associated with **this version** of the - model **and** deletes the model files. - - # Raises - `RestAPIError`. - """ - self._model_engine.delete(model_instance=self) - - def deploy( - self, - name: Optional[str] = None, - description: Optional[str] = None, - artifact_version: Optional[str] = ARTIFACT_VERSION.CREATE, - serving_tool: Optional[str] = None, - script_file: Optional[str] = None, - resources: Optional[Union[PredictorResources, dict]] = None, - inference_logger: Optional[Union[InferenceLogger, dict]] = None, - inference_batcher: Optional[Union[InferenceBatcher, dict]] = None, - transformer: Optional[Union[Transformer, dict]] = None, - api_protocol: Optional[str] = IE.API_PROTOCOL_REST, - environment: Optional[str] = None, - ): - """Deploy the model. - - !!! example - ```python - - import hopsworks - - project = hopsworks.login() - - # get Hopsworks Model Registry handle - mr = project.get_model_registry() - - # retrieve the trained model you want to deploy - my_model = mr.get_model("my_model", version=1) - - my_deployment = my_model.deploy() - ``` - # Arguments - name: Name of the deployment. - description: Description of the deployment. - artifact_version: Version number of the model artifact to deploy, `CREATE` to create a new model artifact - or `MODEL-ONLY` to reuse the shared artifact containing only the model files. - serving_tool: Serving tool used to deploy the model server. - script_file: Path to a custom predictor script implementing the Predict class. - resources: Resources to be allocated for the predictor. - inference_logger: Inference logger configuration. - inference_batcher: Inference batcher configuration. - transformer: Transformer to be deployed together with the predictor. - api_protocol: API protocol to be enabled in the deployment (i.e., 'REST' or 'GRPC'). Defaults to 'REST'. - environment: The inference environment to use. - - # Returns - `Deployment`: The deployment metadata object of a new or existing deployment. - """ - - if name is None: - name = self._name - - predictor = Predictor.for_model( - self, - name=name, - description=description, - artifact_version=artifact_version, - serving_tool=serving_tool, - script_file=script_file, - resources=resources, - inference_logger=inference_logger, - inference_batcher=inference_batcher, - transformer=transformer, - api_protocol=api_protocol, - environment=environment, - ) - - return predictor.deploy() - - def set_tag(self, name: str, value: Union[str, dict]): - """Attach a tag to a model. - - A tag consists of a pair. Tag names are unique identifiers across the whole cluster. - The value of a tag can be any valid json - primitives, arrays or json objects. - - # Arguments - name: Name of the tag to be added. - value: Value of the tag to be added. - # Raises - `RestAPIError` in case the backend fails to add the tag. - """ - - self._model_engine.set_tag(model_instance=self, name=name, value=value) - - def delete_tag(self, name: str): - """Delete a tag attached to a model. - - # Arguments - name: Name of the tag to be removed. - # Raises - `RestAPIError` in case the backend fails to delete the tag. - """ - self._model_engine.delete_tag(model_instance=self, name=name) - - def get_tag(self, name: str): - """Get the tags of a model. - - # Arguments - name: Name of the tag to get. - # Returns - tag value - # Raises - `RestAPIError` in case the backend fails to retrieve the tag. - """ - return self._model_engine.get_tag(model_instance=self, name=name) - - def get_tags(self): - """Retrieves all tags attached to a model. - - # Returns - `Dict[str, obj]` of tags. - # Raises - `RestAPIError` in case the backend fails to retrieve the tags. - """ - return self._model_engine.get_tags(model_instance=self) - - def get_url(self): - path = ( - "/p/" - + str(client.get_instance()._project_id) - + "/models/" - + str(self.name) - + "/" - + str(self.version) - ) - return util.get_hostname_replaced_url(sub_path=path) - - def get_feature_view(self, init: bool = True, online: Optional[bool] = None): - """Get the parent feature view of this model, based on explicit provenance. - Only accessible, usable feature view objects are returned. Otherwise an Exception is raised. - For more details, call the base method - get_feature_view_provenance - - # Returns - `FeatureView`: Feature View Object. - # Raises - `Exception` in case the backend fails to retrieve the tags. - """ - fv_prov = self.get_feature_view_provenance() - fv = explicit_provenance.Links.get_one_accessible_parent(fv_prov) - if fv is None: - return None - if init: - td_prov = self.get_training_dataset_provenance() - td = explicit_provenance.Links.get_one_accessible_parent(td_prov) - is_deployment = "DEPLOYMENT_NAME" in os.environ - if online or is_deployment: - _logger.info( - "Initializing for batch and online retrieval of feature vectors" - + (" - within a deployment" if is_deployment else "") - ) - fv.init_serving(training_dataset_version=td.version) - elif online is False: - _logger.info("Initializing for batch retrieval of feature vectors") - fv.init_batch_scoring(training_dataset_version=td.version) - return fv - - def get_feature_view_provenance(self): - """Get the parent feature view of this model, based on explicit provenance. - This feature view can be accessible, deleted or inaccessible. - For deleted and inaccessible feature views, only a minimal information is - returned. - - # Returns - `ProvenanceLinks`: Object containing the section of provenance graph requested. - """ - return self._model_engine.get_feature_view_provenance(model_instance=self) - - def get_training_dataset_provenance(self): - """Get the parent training dataset of this model, based on explicit provenance. - This training dataset can be accessible, deleted or inaccessible. - For deleted and inaccessible training datasets, only a minimal information is - returned. - - # Returns - `ProvenanceLinks`: Object containing the section of provenance graph requested. - """ - return self._model_engine.get_training_dataset_provenance(model_instance=self) - - @classmethod - def from_response_json(cls, json_dict): - json_decamelized = humps.decamelize(json_dict) - if "count" in json_decamelized: - if json_decamelized["count"] == 0: - return [] - return [util.set_model_class(model) for model in json_decamelized["items"]] - else: - return util.set_model_class(json_decamelized) - - def update_from_response_json(self, json_dict): - json_decamelized = humps.decamelize(json_dict) - if "type" in json_decamelized: # backwards compatibility - _ = json_decamelized.pop("type") - self.__init__(**json_decamelized) - return self - - def json(self): - return json.dumps(self, cls=util.MLEncoder) - - def to_dict(self): - return { - "id": self._name + "_" + str(self._version), - "experimentId": self._experiment_id, - "projectName": self._project_name, - "experimentProjectName": self._experiment_project_name, - "name": self._name, - "modelSchema": self._model_schema, - "version": self._version, - "description": self._description, - "inputExample": self._input_example, - "framework": self._framework, - "metrics": self._training_metrics, - "trainingDataset": self._training_dataset, - "environment": self._environment, - "program": self._program, - "featureView": util.feature_view_to_json(self._feature_view), - "trainingDatasetVersion": self._training_dataset_version, - } - - @property - def id(self): - """Id of the model.""" - return self._id - - @id.setter - def id(self, id): - self._id = id - - @property - def name(self): - """Name of the model.""" - return self._name - - @name.setter - def name(self, name): - self._name = name - - @property - def version(self): - """Version of the model.""" - return self._version - - @version.setter - def version(self, version): - self._version = version - - @property - def description(self): - """Description of the model.""" - return self._description - - @description.setter - def description(self, description): - self._description = description - - @property - def created(self): - """Creation date of the model.""" - return self._created - - @created.setter - def created(self, created): - self._created = created - - @property - def creator(self): - """Creator of the model.""" - return self._creator - - @creator.setter - def creator(self, creator): - self._creator = creator - - @property - def environment(self): - """Input example of the model.""" - if self._environment is not None: - return self._model_engine.read_file( - model_instance=self, resource="environment.yml" - ) - return self._environment - - @environment.setter - def environment(self, environment): - self._environment = environment - - @property - def experiment_id(self): - """Experiment Id of the model.""" - return self._experiment_id - - @experiment_id.setter - def experiment_id(self, experiment_id): - self._experiment_id = experiment_id - - @property - def training_metrics(self): - """Training metrics of the model.""" - return self._training_metrics - - @training_metrics.setter - def training_metrics(self, training_metrics): - self._training_metrics = training_metrics - - @property - def program(self): - """Executable used to export the model.""" - if self._program is not None: - return self._model_engine.read_file( - model_instance=self, resource=self._program - ) - - @program.setter - def program(self, program): - self._program = program - - @property - def user(self): - """user of the model.""" - return self._user_full_name - - @user.setter - def user(self, user_full_name): - self._user_full_name = user_full_name - - @property - def input_example(self): - """input_example of the model.""" - return self._model_engine.read_json( - model_instance=self, resource="input_example.json" - ) - - @input_example.setter - def input_example(self, input_example): - self._input_example = input_example - - @property - def framework(self): - """framework of the model.""" - return self._framework - - @framework.setter - def framework(self, framework): - self._framework = framework - - @property - def model_schema(self): - """model schema of the model.""" - return self._model_engine.read_json( - model_instance=self, resource="model_schema.json" - ) - - @model_schema.setter - def model_schema(self, model_schema): - self._model_schema = model_schema - - @property - def training_dataset(self): - """training_dataset of the model.""" - return self._training_dataset - - @training_dataset.setter - def training_dataset(self, training_dataset): - self._training_dataset = training_dataset - - @property - def project_name(self): - """project_name of the model.""" - return self._project_name - - @project_name.setter - def project_name(self, project_name): - self._project_name = project_name - - @property - def model_registry_id(self): - """model_registry_id of the model.""" - return self._model_registry_id - - @model_registry_id.setter - def model_registry_id(self, model_registry_id): - self._model_registry_id = model_registry_id - - @property - def experiment_project_name(self): - """experiment_project_name of the model.""" - return self._experiment_project_name - - @experiment_project_name.setter - def experiment_project_name(self, experiment_project_name): - self._experiment_project_name = experiment_project_name - - @property - def model_path(self): - """path of the model with version folder omitted. Resolves to /Projects/{project_name}/Models/{name}""" - return "/Projects/{}/Models/{}".format(self.project_name, self.name) - - @property - def version_path(self): - """path of the model including version folder. Resolves to /Projects/{project_name}/Models/{name}/{version}""" - return "{}/{}".format(self.model_path, str(self.version)) - - @property - def shared_registry_project_name(self): - """shared_registry_project_name of the model.""" - return self._shared_registry_project_name - - @shared_registry_project_name.setter - def shared_registry_project_name(self, shared_registry_project_name): - self._shared_registry_project_name = shared_registry_project_name - - def __repr__(self): - return f"Model(name: {self._name!r}, version: {self._version!r})" diff --git a/hsml/python/hsml/model_registry.py b/hsml/python/hsml/model_registry.py deleted file mode 100644 index 4a7f3443b..000000000 --- a/hsml/python/hsml/model_registry.py +++ /dev/null @@ -1,196 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import warnings - -import humps -from hsml import util -from hsml.core import model_api -from hsml.python import signature as python_signature # noqa: F401 -from hsml.sklearn import signature as sklearn_signature # noqa: F401 -from hsml.tensorflow import signature as tensorflow_signature # noqa: F401 -from hsml.torch import signature as torch_signature # noqa: F401 - - -class ModelRegistry: - DEFAULT_VERSION = 1 - - def __init__( - self, - project_name, - project_id, - model_registry_id, - shared_registry_project_name=None, - **kwargs, - ): - self._project_name = project_name - self._project_id = project_id - - self._shared_registry_project_name = shared_registry_project_name - self._model_registry_id = model_registry_id - - self._model_api = model_api.ModelApi() - - self._tensorflow = tensorflow_signature - self._python = python_signature - self._sklearn = sklearn_signature - self._torch = torch_signature - - tensorflow_signature._mr = self - python_signature._mr = self - sklearn_signature._mr = self - torch_signature._mr = self - - @classmethod - def from_response_json(cls, json_dict): - json_decamelized = humps.decamelize(json_dict) - return cls(**json_decamelized) - - def get_model(self, name: str, version: int = None): - """Get a model entity from the model registry. - Getting a model from the Model Registry means getting its metadata handle - so you can subsequently download the model directory. - - # Arguments - name: Name of the model to get. - version: Version of the model to retrieve, defaults to `None` and will - return the `version=1`. - # Returns - `Model`: The model metadata object. - # Raises - `RestAPIError`: If unable to retrieve model from the model registry. - """ - - if version is None: - warnings.warn( - "No version provided for getting model `{}`, defaulting to `{}`.".format( - name, self.DEFAULT_VERSION - ), - util.VersionWarning, - stacklevel=1, - ) - version = self.DEFAULT_VERSION - - return self._model_api.get( - name, - version, - self.model_registry_id, - shared_registry_project_name=self.shared_registry_project_name, - ) - - def get_models(self, name: str): - """Get all model entities from the model registry for a specified name. - Getting all models from the Model Registry for a given name returns a list of model entities, one for each version registered under - the specified model name. - - # Arguments - name: Name of the model to get. - # Returns - `List[Model]`: A list of model metadata objects. - # Raises - `RestAPIError`: If unable to retrieve model versions from the model registry. - """ - - return self._model_api.get_models( - name, - self.model_registry_id, - shared_registry_project_name=self.shared_registry_project_name, - ) - - def get_best_model(self, name: str, metric: str, direction: str): - """Get the best performing model entity from the model registry. - Getting the best performing model from the Model Registry means specifying in addition to the name, also a metric - name corresponding to one of the keys in the training_metrics dict of the model and a direction. For example to - get the model version with the highest accuracy, specify metric='accuracy' and direction='max'. - - # Arguments - name: Name of the model to get. - metric: Name of the key in the training metrics field to compare. - direction: 'max' to get the model entity with the highest value of the set metric, or 'min' for the lowest. - # Returns - `Model`: The model metadata object. - # Raises - `RestAPIError`: If unable to retrieve model from the model registry. - """ - - model = self._model_api.get_models( - name, - self.model_registry_id, - shared_registry_project_name=self.shared_registry_project_name, - metric=metric, - direction=direction, - ) - if isinstance(model, list) and len(model) > 0: - return model[0] - else: - return None - - @property - def project_name(self): - """Name of the project the registry is connected to.""" - return self._project_name - - @property - def project_path(self): - """Path of the project the registry is connected to.""" - return "/Projects/{}".format(self._project_name) - - @property - def project_id(self): - """Id of the project the registry is connected to.""" - return self._project_id - - @property - def shared_registry_project_name(self): - """Name of the project the shared model registry originates from.""" - return self._shared_registry_project_name - - @property - def model_registry_id(self): - """Id of the model registry.""" - return self._model_registry_id - - @property - def tensorflow(self): - """Module for exporting a TensorFlow model.""" - - return tensorflow_signature - - @property - def sklearn(self): - """Module for exporting a sklearn model.""" - - return sklearn_signature - - @property - def torch(self): - """Module for exporting a torch model.""" - - return torch_signature - - @property - def python(self): - """Module for exporting a generic Python model.""" - - return python_signature - - def __repr__(self): - project_name = ( - self._shared_registry_project_name - if self._shared_registry_project_name is not None - else self._project_name - ) - return f"ModelRegistry(project: {project_name!r})" diff --git a/hsml/python/hsml/model_schema.py b/hsml/python/hsml/model_schema.py deleted file mode 100644 index 7af3999ca..000000000 --- a/hsml/python/hsml/model_schema.py +++ /dev/null @@ -1,64 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import json -from typing import Optional - -from hsml.schema import Schema - - -class ModelSchema: - """Create a schema for a model. - - # Arguments - input_schema: Schema to describe the inputs. - output_schema: Schema to describe the outputs. - - # Returns - `ModelSchema`. The model schema object. - """ - - def __init__( - self, - input_schema: Optional[Schema] = None, - output_schema: Optional[Schema] = None, - **kwargs, - ): - if input_schema is not None: - self.input_schema = input_schema - - if output_schema is not None: - self.output_schema = output_schema - - def json(self): - return json.dumps( - self, default=lambda o: getattr(o, "__dict__", o), sort_keys=True, indent=2 - ) - - def to_dict(self): - """ - Get dict representation of the ModelSchema. - """ - return json.loads(self.json()) - - def __repr__(self): - input_type = ( - self.input_schema._get_type() if hasattr(self, "input_schema") else None - ) - output_type = ( - self.output_schema._get_type() if hasattr(self, "output_schema") else None - ) - return f"ModelSchema(input: {input_type!r}, output: {output_type!r})" diff --git a/hsml/python/hsml/model_serving.py b/hsml/python/hsml/model_serving.py deleted file mode 100644 index 21d04b833..000000000 --- a/hsml/python/hsml/model_serving.py +++ /dev/null @@ -1,375 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os -from typing import Optional, Union - -from hsml import util -from hsml.constants import ARTIFACT_VERSION, PREDICTOR_STATE -from hsml.constants import INFERENCE_ENDPOINTS as IE -from hsml.core import serving_api -from hsml.deployment import Deployment -from hsml.inference_batcher import InferenceBatcher -from hsml.inference_logger import InferenceLogger -from hsml.model import Model -from hsml.predictor import Predictor -from hsml.resources import PredictorResources -from hsml.transformer import Transformer - - -class ModelServing: - DEFAULT_VERSION = 1 - - def __init__( - self, - project_name: str, - project_id: int, - **kwargs, - ): - self._project_name = project_name - self._project_id = project_id - - self._serving_api = serving_api.ServingApi() - - def get_deployment_by_id(self, id: int): - """Get a deployment by id from Model Serving. - Getting a deployment from Model Serving means getting its metadata handle - so you can subsequently operate on it (e.g., start or stop). - - !!! example - ```python - # login and get Hopsworks Model Serving handle using .login() and .get_model_serving() - - # get a deployment by id - my_deployment = ms.get_deployment_by_id(1) - ``` - - # Arguments - id: Id of the deployment to get. - # Returns - `Deployment`: The deployment metadata object. - # Raises - `RestAPIError`: If unable to retrieve deployment from model serving. - """ - - return self._serving_api.get_by_id(id) - - def get_deployment(self, name: str = None): - """Get a deployment by name from Model Serving. - - !!! example - ```python - # login and get Hopsworks Model Serving handle using .login() and .get_model_serving() - - # get a deployment by name - my_deployment = ms.get_deployment('deployment_name') - ``` - - Getting a deployment from Model Serving means getting its metadata handle - so you can subsequently operate on it (e.g., start or stop). - - # Arguments - name: Name of the deployment to get. - # Returns - `Deployment`: The deployment metadata object. - # Raises - `RestAPIError`: If unable to retrieve deployment from model serving. - """ - - if name is None and ("DEPLOYMENT_NAME" in os.environ): - name = os.environ["DEPLOYMENT_NAME"] - return self._serving_api.get(name) - - def get_deployments(self, model: Model = None, status: str = None): - """Get all deployments from model serving. - !!! example - ```python - # login into Hopsworks using hopsworks.login() - - # get Hopsworks Model Registry handle - mr = project.get_model_registry() - - # get Hopsworks Model Serving handle - ms = project.get_model_serving() - - # retrieve the trained model you want to deploy - my_model = mr.get_model("my_model", version=1) - - list_deployments = ms.get_deployment(my_model) - - for deployment in list_deployments: - print(deployment.get_state()) - ``` - # Arguments - model: Filter by model served in the deployments - status: Filter by status of the deployments - # Returns - `List[Deployment]`: A list of deployments. - # Raises - `RestAPIError`: If unable to retrieve deployments from model serving. - """ - - model_name = model.name if model is not None else None - if status is not None: - self._validate_deployment_status(status) - - return self._serving_api.get_all(model_name, status) - - def _validate_deployment_status(self, status): - statuses = list(util.get_members(PREDICTOR_STATE, prefix="STATUS")) - status = status.upper() - if status not in statuses: - raise ValueError( - "Deployment status '{}' is not valid. Possible values are '{}'".format( - status, ", ".join(statuses) - ) - ) - return status - - def get_inference_endpoints(self): - """Get all inference endpoints available in the current project. - - # Returns - `List[InferenceEndpoint]`: Inference endpoints for model inference - """ - - return self._serving_api.get_inference_endpoints() - - def create_predictor( - self, - model: Model, - name: Optional[str] = None, - artifact_version: Optional[str] = ARTIFACT_VERSION.CREATE, - serving_tool: Optional[str] = None, - script_file: Optional[str] = None, - resources: Optional[Union[PredictorResources, dict]] = None, - inference_logger: Optional[Union[InferenceLogger, dict, str]] = None, - inference_batcher: Optional[Union[InferenceBatcher, dict]] = None, - transformer: Optional[Union[Transformer, dict]] = None, - api_protocol: Optional[str] = IE.API_PROTOCOL_REST, - ): - """Create a Predictor metadata object. - - !!! example - ```python - # login into Hopsworks using hopsworks.login() - - # get Hopsworks Model Registry handle - mr = project.get_model_registry() - - # retrieve the trained model you want to deploy - my_model = mr.get_model("my_model", version=1) - - # get Hopsworks Model Serving handle - ms = project.get_model_serving() - - my_predictor = ms.create_predictor(my_model) - - my_deployment = my_predictor.deploy() - ``` - - !!! note "Lazy" - This method is lazy and does not persist any metadata or deploy any model on its own. - To create a deployment using this predictor, call the `deploy()` method. - - # Arguments - model: Model to be deployed. - name: Name of the predictor. - artifact_version: Version number of the model artifact to deploy, `CREATE` to create a new model artifact - or `MODEL-ONLY` to reuse the shared artifact containing only the model files. - serving_tool: Serving tool used to deploy the model server. - script_file: Path to a custom predictor script implementing the Predict class. - resources: Resources to be allocated for the predictor. - inference_logger: Inference logger configuration. - inference_batcher: Inference batcher configuration. - transformer: Transformer to be deployed together with the predictor. - api_protocol: API protocol to be enabled in the deployment (i.e., 'REST' or 'GRPC'). Defaults to 'REST'. - - # Returns - `Predictor`. The predictor metadata object. - """ - - if name is None: - name = model.name - - return Predictor.for_model( - model, - name=name, - artifact_version=artifact_version, - serving_tool=serving_tool, - script_file=script_file, - resources=resources, - inference_logger=inference_logger, - inference_batcher=inference_batcher, - transformer=transformer, - api_protocol=api_protocol, - ) - - def create_transformer( - self, - script_file: Optional[str] = None, - resources: Optional[Union[PredictorResources, dict]] = None, - ): - """Create a Transformer metadata object. - - !!! example - ```python - # login into Hopsworks using hopsworks.login() - - # get Dataset API instance - dataset_api = project.get_dataset_api() - - # get Hopsworks Model Serving handle - ms = project.get_model_serving() - - # create my_transformer.py Python script - class Transformer(object): - def __init__(self): - ''' Initialization code goes here ''' - pass - - def preprocess(self, inputs): - ''' Transform the requests inputs here. The object returned by this method will be used as model input to make predictions. ''' - return inputs - - def postprocess(self, outputs): - ''' Transform the predictions computed by the model before returning a response ''' - return outputs - - uploaded_file_path = dataset_api.upload("my_transformer.py", "Resources", overwrite=True) - transformer_script_path = os.path.join("/Projects", project.name, uploaded_file_path) - - my_transformer = ms.create_transformer(script_file=uploaded_file_path) - - # or - - from hsml.transformer import Transformer - - my_transformer = Transformer(script_file) - ``` - - !!! example "Create a deployment with the transformer" - ```python - - my_predictor = ms.create_predictor(transformer=my_transformer) - my_deployment = my_predictor.deploy() - - # or - my_deployment = ms.create_deployment(my_predictor, transformer=my_transformer) - my_deployment.save() - ``` - - !!! note "Lazy" - This method is lazy and does not persist any metadata or deploy any transformer. To create a deployment using this transformer, set it in the `predictor.transformer` property. - - # Arguments - script_file: Path to a custom predictor script implementing the Transformer class. - resources: Resources to be allocated for the transformer. - - # Returns - `Transformer`. The model metadata object. - """ - - return Transformer(script_file=script_file, resources=resources) - - def create_deployment(self, predictor: Predictor, name: Optional[str] = None, environment: Optional[str] = None): - """Create a Deployment metadata object. - - !!! example - ```python - # login into Hopsworks using hopsworks.login() - - # get Hopsworks Model Registry handle - mr = project.get_model_registry() - - # retrieve the trained model you want to deploy - my_model = mr.get_model("my_model", version=1) - - # get Hopsworks Model Serving handle - ms = project.get_model_serving() - - my_predictor = ms.create_predictor(my_model) - - my_deployment = ms.create_deployment(my_predictor) - my_deployment.save() - ``` - - !!! example "Using the model object" - ```python - # login into Hopsworks using hopsworks.login() - - # get Hopsworks Model Registry handle - mr = project.get_model_registry() - - # retrieve the trained model you want to deploy - my_model = mr.get_model("my_model", version=1) - - my_deployment = my_model.deploy() - - my_deployment.get_state().describe() - ``` - - !!! example "Using the Model Serving handle" - ```python - # login into Hopsworks using hopsworks.login() - - # get Hopsworks Model Registry handle - mr = project.get_model_registry() - - # retrieve the trained model you want to deploy - my_model = mr.get_model("my_model", version=1) - - # get Hopsworks Model Serving handle - ms = project.get_model_serving() - - my_predictor = ms.create_predictor(my_model) - - my_deployment = my_predictor.deploy() - - my_deployment.get_state().describe() - ``` - - !!! note "Lazy" - This method is lazy and does not persist any metadata or deploy any model. To create a deployment, call the `save()` method. - - # Arguments - predictor: predictor to be used in the deployment - name: name of the deployment - environment: The inference environment to use - - # Returns - `Deployment`. The model metadata object. - """ - - return Deployment(predictor=predictor, name=name, environment=environment) - - @property - def project_name(self): - """Name of the project in which Model Serving is located.""" - return self._project_name - - @property - def project_path(self): - """Path of the project the registry is connected to.""" - return "/Projects/{}".format(self._project_name) - - @property - def project_id(self): - """Id of the project in which Model Serving is located.""" - return self._project_id - - def __repr__(self): - return f"ModelServing(project: {self._project_name!r})" diff --git a/hsml/python/hsml/predictor.py b/hsml/python/hsml/predictor.py deleted file mode 100644 index 96f4dd966..000000000 --- a/hsml/python/hsml/predictor.py +++ /dev/null @@ -1,482 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -from typing import Optional, Union - -import humps -from hsml import client, deployment, util -from hsml.constants import ( - ARTIFACT_VERSION, - INFERENCE_ENDPOINTS, - MODEL, - PREDICTOR, - RESOURCES, -) -from hsml.deployable_component import DeployableComponent -from hsml.inference_batcher import InferenceBatcher -from hsml.inference_logger import InferenceLogger -from hsml.predictor_state import PredictorState -from hsml.resources import PredictorResources -from hsml.transformer import Transformer - - -class Predictor(DeployableComponent): - """Metadata object representing a predictor in Model Serving.""" - - def __init__( - self, - name: str, - model_name: str, - model_path: str, - model_version: int, - model_framework: str, # MODEL.FRAMEWORK - artifact_version: Union[int, str], - model_server: str, - serving_tool: Optional[str] = None, - script_file: Optional[str] = None, - resources: Optional[Union[PredictorResources, dict]] = None, # base - inference_logger: Optional[Union[InferenceLogger, dict]] = None, # base - inference_batcher: Optional[Union[InferenceBatcher, dict]] = None, # base - transformer: Optional[Union[Transformer, dict]] = None, - id: Optional[int] = None, - description: Optional[str] = None, - created_at: Optional[str] = None, - creator: Optional[str] = None, - api_protocol: Optional[str] = INFERENCE_ENDPOINTS.API_PROTOCOL_REST, - environment: Optional[str] = None, - **kwargs, - ): - serving_tool = ( - self._validate_serving_tool(serving_tool) - or self._get_default_serving_tool() - ) - resources = self._validate_resources( - util.get_obj_from_json(resources, PredictorResources), serving_tool - ) or self._get_default_resources(serving_tool) - - super().__init__( - script_file, - resources, - inference_batcher, - ) - - self._name = name - self._model_name = model_name - self._model_path = model_path - self._model_version = model_version - self._model_framework = model_framework - self._artifact_version = artifact_version - self._serving_tool = serving_tool - self._model_server = model_server - self._id = id - self._description = description - self._created_at = created_at - self._creator = creator - - self._inference_logger = util.get_obj_from_json( - inference_logger, InferenceLogger - ) - self._transformer = util.get_obj_from_json(transformer, Transformer) - self._validate_script_file(self._model_framework, self._script_file) - self._api_protocol = api_protocol - self._environment = environment - - def deploy(self): - """Create a deployment for this predictor and persists it in the Model Serving. - - !!! example - ```python - - import hopsworks - - project = hopsworks.login() - - # get Hopsworks Model Registry handle - mr = project.get_model_registry() - - # retrieve the trained model you want to deploy - my_model = mr.get_model("my_model", version=1) - - # get Hopsworks Model Serving handle - ms = project.get_model_serving() - - my_predictor = ms.create_predictor(my_model) - my_deployment = my_predictor.deploy() - - print(my_deployment.get_state()) - ``` - - # Returns - `Deployment`. The deployment metadata object of a new or existing deployment. - """ - - _deployment = deployment.Deployment( - predictor=self, name=self._name, description=self._description - ) - _deployment.save() - - return _deployment - - def describe(self): - """Print a description of the predictor""" - util.pretty_print(self) - - def _set_state(self, state: PredictorState): - """Set the state of the predictor""" - self._state = state - - @classmethod - def _validate_serving_tool(cls, serving_tool): - if serving_tool is not None: - if client.is_saas_connection(): - # only kserve supported in saasy hopsworks - if serving_tool != PREDICTOR.SERVING_TOOL_KSERVE: - raise ValueError( - "KServe deployments are the only supported in Serverless Hopsworks" - ) - return serving_tool - # if not saas, check valid serving_tool - serving_tools = list(util.get_members(PREDICTOR, prefix="SERVING_TOOL")) - if serving_tool not in serving_tools: - raise ValueError( - "Serving tool '{}' is not valid. Possible values are '{}'".format( - serving_tool, ", ".join(serving_tools) - ) - ) - return serving_tool - - @classmethod - def _validate_script_file(cls, model_framework, script_file): - if model_framework == MODEL.FRAMEWORK_PYTHON and script_file is None: - raise ValueError( - "Predictor scripts are required in deployments for custom Python models" - ) - - @classmethod - def _infer_model_server(cls, model_framework): - return ( - PREDICTOR.MODEL_SERVER_TF_SERVING - if model_framework == MODEL.FRAMEWORK_TENSORFLOW - else PREDICTOR.MODEL_SERVER_PYTHON - ) - - @classmethod - def _get_default_serving_tool(cls): - # set kserve as default if it is available - return ( - PREDICTOR.SERVING_TOOL_KSERVE - if client.is_kserve_installed() - else PREDICTOR.SERVING_TOOL_DEFAULT - ) - - @classmethod - def _validate_resources(cls, resources, serving_tool): - if resources is not None: - # ensure scale-to-zero for kserve deployments when required - if ( - serving_tool == PREDICTOR.SERVING_TOOL_KSERVE - and resources.num_instances != 0 - and client.is_scale_to_zero_required() - ): - raise ValueError( - "Scale-to-zero is required for KServe deployments in this cluster. Please, set the number of instances to 0." - ) - return resources - - @classmethod - def _get_default_resources(cls, serving_tool): - num_instances = ( - 0 # enable scale-to-zero by default if required - if serving_tool == PREDICTOR.SERVING_TOOL_KSERVE - and client.is_scale_to_zero_required() - else RESOURCES.MIN_NUM_INSTANCES - ) - return PredictorResources(num_instances) - - @classmethod - def for_model(cls, model, **kwargs): - kwargs["model_name"] = model.name - kwargs["model_path"] = model.model_path - kwargs["model_version"] = model.version - - # get predictor for specific model, includes model type-related validations - return util.get_predictor_for_model(model=model, **kwargs) - - @classmethod - def from_response_json(cls, json_dict): - json_decamelized = humps.decamelize(json_dict) - if isinstance(json_decamelized, list): - if len(json_decamelized) == 0: - return [] - return [cls.from_json(predictor) for predictor in json_decamelized] - else: - if "count" in json_decamelized: - if json_decamelized["count"] == 0: - return [] - return [ - cls.from_json(predictor) for predictor in json_decamelized["items"] - ] - return cls.from_json(json_decamelized) - - @classmethod - def from_json(cls, json_decamelized): - predictor = Predictor(**cls.extract_fields_from_json(json_decamelized)) - predictor._set_state(PredictorState.from_response_json(json_decamelized)) - return predictor - - @classmethod - def extract_fields_from_json(cls, json_decamelized): - kwargs = {} - kwargs["name"] = json_decamelized.pop("name") - kwargs["description"] = util.extract_field_from_json( - json_decamelized, "description" - ) - kwargs["model_name"] = util.extract_field_from_json( - json_decamelized, "model_name", default=kwargs["name"] - ) - kwargs["model_path"] = json_decamelized.pop("model_path") - kwargs["model_version"] = json_decamelized.pop("model_version") - kwargs["model_framework"] = ( - json_decamelized.pop("model_framework") - if "model_framework" in json_decamelized - else MODEL.FRAMEWORK_SKLEARN # backward compatibility - ) - kwargs["artifact_version"] = util.extract_field_from_json( - json_decamelized, "artifact_version" - ) - kwargs["model_server"] = json_decamelized.pop("model_server") - kwargs["serving_tool"] = json_decamelized.pop("serving_tool") - kwargs["script_file"] = util.extract_field_from_json( - json_decamelized, "predictor" - ) - kwargs["resources"] = PredictorResources.from_json(json_decamelized) - kwargs["inference_logger"] = InferenceLogger.from_json(json_decamelized) - kwargs["inference_batcher"] = InferenceBatcher.from_json(json_decamelized) - kwargs["transformer"] = Transformer.from_json(json_decamelized) - kwargs["id"] = json_decamelized.pop("id") - kwargs["created_at"] = json_decamelized.pop("created") - kwargs["creator"] = json_decamelized.pop("creator") - kwargs["api_protocol"] = json_decamelized.pop("api_protocol") - if "environment_dto" in json_decamelized: - environment = json_decamelized.pop("environment_dto") - kwargs["environment"] = environment["name"] - return kwargs - - def update_from_response_json(self, json_dict): - json_decamelized = humps.decamelize(json_dict) - self.__init__(**self.extract_fields_from_json(json_decamelized)) - self._set_state(PredictorState.from_response_json(json_decamelized)) - return self - - def json(self): - return json.dumps(self, cls=util.MLEncoder) - - def to_dict(self): - json = { - "id": self._id, - "name": self._name, - "description": self._description, - "modelName": self._model_name, - "modelPath": self._model_path, - "modelVersion": self._model_version, - "modelFramework": self._model_framework, - "artifactVersion": self._artifact_version, - "created": self._created_at, - "creator": self._creator, - "modelServer": self._model_server, - "servingTool": self._serving_tool, - "predictor": self._script_file, - "apiProtocol": self._api_protocol, - } - if self.environment is not None: - json = {**json, **{"environmentDTO": {"name": self._environment}}} - if self._resources is not None: - json = {**json, **self._resources.to_dict()} - if self._inference_logger is not None: - json = {**json, **self._inference_logger.to_dict()} - if self._inference_batcher is not None: - json = {**json, **self._inference_batcher.to_dict()} - if self._transformer is not None: - json = {**json, **self._transformer.to_dict()} - return json - - @property - def id(self): - """Id of the predictor.""" - return self._id - - @property - def name(self): - """Name of the predictor.""" - return self._name - - @name.setter - def name(self, name: str): - self._name = name - - @property - def description(self): - """Description of the predictor.""" - return self._description - - @description.setter - def description(self, description: str): - self._description = description - - @property - def model_name(self): - """Name of the model deployed by the predictor.""" - return self._model_name - - @model_name.setter - def model_name(self, model_name: str): - self._model_name = model_name - - @property - def model_path(self): - """Model path deployed by the predictor.""" - return self._model_path - - @model_path.setter - def model_path(self, model_path: str): - self._model_path = model_path - - @property - def model_version(self): - """Model version deployed by the predictor.""" - return self._model_version - - @model_version.setter - def model_version(self, model_version: int): - self._model_version = model_version - - @property - def model_framework(self): - """Model framework of the model to be deployed by the predictor.""" - return self._model_framework - - @model_framework.setter - def model_framework(self, model_framework: str): - self._model_framework = model_framework - self._model_server = self._infer_model_server(model_framework) - - @property - def artifact_version(self): - """Artifact version deployed by the predictor.""" - return self._artifact_version - - @artifact_version.setter - def artifact_version(self, artifact_version: Union[int, str]): - self._artifact_version = artifact_version - - @property - def artifact_path(self): - """Path of the model artifact deployed by the predictor. Resolves to /Projects/{project_name}/Models/{name}/{version}/Artifacts/{artifact_version}/{name}_{version}_{artifact_version}.zip""" - artifact_name = "{}_{}_{}.zip".format( - self._model_name, str(self._model_version), str(self._artifact_version) - ) - return "{}/{}/Artifacts/{}/{}".format( - self._model_path, - str(self._model_version), - str(self._artifact_version), - artifact_name, - ) - - @property - def model_server(self): - """Model server used by the predictor.""" - return self._model_server - - @property - def serving_tool(self): - """Serving tool used to run the model server.""" - return self._serving_tool - - @serving_tool.setter - def serving_tool(self, serving_tool: str): - self._serving_tool = serving_tool - - @property - def script_file(self): - """Script file used to load and run the model.""" - return self._script_file - - @script_file.setter - def script_file(self, script_file: str): - self._script_file = script_file - self._artifact_version = ARTIFACT_VERSION.CREATE - - @property - def inference_logger(self): - """Configuration of the inference logger attached to this predictor.""" - return self._inference_logger - - @inference_logger.setter - def inference_logger(self, inference_logger: InferenceLogger): - self._inference_logger = inference_logger - - @property - def transformer(self): - """Transformer configuration attached to the predictor.""" - return self._transformer - - @transformer.setter - def transformer(self, transformer: Transformer): - self._transformer = transformer - - @property - def created_at(self): - """Created at date of the predictor.""" - return self._created_at - - @property - def creator(self): - """Creator of the predictor.""" - return self._creator - - @property - def requested_instances(self): - """Total number of requested instances in the predictor.""" - num_instances = self._resources.num_instances - if self._transformer is not None: - num_instances += self._transformer.resources.num_instances - return num_instances - - @property - def api_protocol(self): - """API protocol enabled in the predictor (e.g., HTTP or GRPC).""" - return self._api_protocol - - @api_protocol.setter - def api_protocol(self, api_protocol): - self._api_protocol = api_protocol - - @property - def environment(self): - """Name of the inference environment""" - return self._environment - - @environment.setter - def environment(self, environment): - self._environment = environment - - def __repr__(self): - desc = ( - f", description: {self._description!r}" - if self._description is not None - else "" - ) - return f"Predictor(name: {self._name!r}" + desc + ")" diff --git a/hsml/python/hsml/predictor_state.py b/hsml/python/hsml/predictor_state.py deleted file mode 100644 index b145993e1..000000000 --- a/hsml/python/hsml/predictor_state.py +++ /dev/null @@ -1,147 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional - -import humps -from hsml import util -from hsml.predictor_state_condition import PredictorStateCondition - - -class PredictorState: - """State of a predictor.""" - - def __init__( - self, - available_predictor_instances: int, - available_transformer_instances: Optional[int], - hopsworks_inference_path: str, - model_server_inference_path: str, - internal_port: Optional[int], - revision: Optional[int], - deployed: Optional[bool], - condition: Optional[PredictorStateCondition], - status: str, - **kwargs, - ): - self._available_predictor_instances = available_predictor_instances - self._available_transformer_instances = available_transformer_instances - self._hopsworks_inference_path = hopsworks_inference_path - self._model_server_inference_path = model_server_inference_path - self._internal_port = internal_port - self._revision = revision - self._deployed = deployed if deployed is not None else False - self._condition = condition - self._status = status - - def describe(self): - """Print a description of the deployment state""" - util.pretty_print(self) - - @classmethod - def from_response_json(cls, json_dict): - json_decamelized = humps.decamelize(json_dict) - return PredictorState(*cls.extract_fields_from_json(json_decamelized)) - - @classmethod - def extract_fields_from_json(cls, json_decamelized): - ai = util.extract_field_from_json(json_decamelized, "available_instances") - ati = util.extract_field_from_json( - json_decamelized, "available_transformer_instances" - ) - hip = util.extract_field_from_json(json_decamelized, "hopsworks_inference_path") - msip = util.extract_field_from_json( - json_decamelized, "model_server_inference_path" - ) - ipt = util.extract_field_from_json(json_decamelized, "internal_port") - r = util.extract_field_from_json(json_decamelized, "revision") - d = util.extract_field_from_json(json_decamelized, "deployed") - c = util.extract_field_from_json( - json_decamelized, "condition", as_instance_of=PredictorStateCondition - ) - s = util.extract_field_from_json(json_decamelized, "status") - - return ai, ati, hip, msip, ipt, r, d, c, s - - def to_dict(self): - json = { - "availableInstances": self._available_predictor_instances, - "hopsworksInferencePath": self._hopsworks_inference_path, - "modelServerInferencePath": self._model_server_inference_path, - "status": self._status, - } - - if self._available_transformer_instances is not None: - json["availableTransformerInstances"] = ( - self._available_transformer_instances - ) - if self._internal_port is not None: - json["internalPort"] = self._internal_port - if self._revision is not None: - json["revision"] = self._revision - if self._deployed is not None: - json["deployed"] = self._deployed - if self._condition is not None: - json = {**json, **self._condition.to_dict()} - - return json - - @property - def available_predictor_instances(self): - """Available predicotr instances.""" - return self._available_predictor_instances - - @property - def available_transformer_instances(self): - """Available transformer instances.""" - return self._available_transformer_instances - - @property - def hopsworks_inference_path(self): - """Inference path in the Hopsworks REST API.""" - return self._hopsworks_inference_path - - @property - def model_server_inference_path(self): - """Inference path in the model server""" - return self._model_server_inference_path - - @property - def internal_port(self): - """Internal port for the predictor.""" - return self._internal_port - - @property - def revision(self): - """Last revision of the predictor.""" - return self._revision - - @property - def deployed(self): - """Whether the predictor is deployed or not.""" - return self._deployed - - @property - def condition(self): - """Condition of the current state of predictor.""" - return self._condition - - @property - def status(self): - """Status of the predictor.""" - return self._status - - def __repr__(self): - return f"PredictorState(status: {self.status.capitalize()!r})" diff --git a/hsml/python/hsml/predictor_state_condition.py b/hsml/python/hsml/predictor_state_condition.py deleted file mode 100644 index cf1c58934..000000000 --- a/hsml/python/hsml/predictor_state_condition.py +++ /dev/null @@ -1,90 +0,0 @@ -# -# Copyright 2022 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -from typing import Optional - -import humps -from hsml import util - - -class PredictorStateCondition: - """Condition of a predictor state.""" - - def __init__( - self, - type: str, - status: Optional[bool] = None, - reason: Optional[str] = None, - **kwargs, - ): - self._type = type - self._status = status - self._reason = reason - - def describe(self): - util.pretty_print(self) - - @classmethod - def from_response_json(cls, json_dict): - json_decamelized = humps.decamelize(json_dict) - return cls.from_json(json_decamelized) - - @classmethod - def from_json(cls, json_decamelized): - return PredictorStateCondition(**cls.extract_fields_from_json(json_decamelized)) - - @classmethod - def extract_fields_from_json(cls, json_decamelized): - kwargs = {} - kwargs["type"] = json_decamelized.pop("type") # required - kwargs["status"] = util.extract_field_from_json(json_decamelized, "status") - kwargs["reason"] = util.extract_field_from_json(json_decamelized, "reason") - return kwargs - - def update_from_response_json(self, json_dict): - json_decamelized = humps.decamelize(json_dict) - self.__init__(**self.extract_fields_from_json(json_decamelized)) - return self - - def json(self): - return json.dumps(self, cls=util.MLEncoder) - - def to_dict(self): - return { - "condition": { - "type": self._type, - "status": self._status, - "reason": self._reason, - } - } - - @property - def type(self): - """Condition type of the predictor state.""" - return self._type - - @property - def status(self): - """Condition status of the predictor state.""" - return self._status - - @property - def reason(self): - """Condition reason of the predictor state.""" - return self._reason - - def __repr__(self): - return f"PredictorStateCondition(type: {self.type.capitalize()!r}, status: {self.status!r})" diff --git a/hsml/python/hsml/python/__init__.py b/hsml/python/hsml/python/__init__.py deleted file mode 100644 index ff0a6f046..000000000 --- a/hsml/python/hsml/python/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/hsml/python/hsml/python/model.py b/hsml/python/hsml/python/model.py deleted file mode 100644 index ce2f0f984..000000000 --- a/hsml/python/hsml/python/model.py +++ /dev/null @@ -1,79 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import humps -from hsml.constants import MODEL -from hsml.model import Model - - -class Model(Model): - """Metadata object representing a generic python model in the Model Registry.""" - - def __init__( - self, - id, - name, - version=None, - created=None, - creator=None, - environment=None, - description=None, - experiment_id=None, - project_name=None, - experiment_project_name=None, - metrics=None, - program=None, - user_full_name=None, - model_schema=None, - training_dataset=None, - input_example=None, - model_registry_id=None, - tags=None, - href=None, - feature_view=None, - training_dataset_version=None, - **kwargs, - ): - super().__init__( - id, - name, - version=version, - created=created, - creator=creator, - environment=environment, - description=description, - experiment_id=experiment_id, - project_name=project_name, - experiment_project_name=experiment_project_name, - metrics=metrics, - program=program, - user_full_name=user_full_name, - model_schema=model_schema, - training_dataset=training_dataset, - input_example=input_example, - framework=MODEL.FRAMEWORK_PYTHON, - model_registry_id=model_registry_id, - feature_view=feature_view, - training_dataset_version=training_dataset_version, - ) - - def update_from_response_json(self, json_dict): - json_decamelized = humps.decamelize(json_dict) - json_decamelized.pop("framework") - if "type" in json_decamelized: # backwards compatibility - _ = json_decamelized.pop("type") - self.__init__(**json_decamelized) - return self diff --git a/hsml/python/hsml/python/predictor.py b/hsml/python/hsml/python/predictor.py deleted file mode 100644 index a3ca1643f..000000000 --- a/hsml/python/hsml/python/predictor.py +++ /dev/null @@ -1,33 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from hsml.constants import MODEL, PREDICTOR -from hsml.predictor import Predictor - - -class Predictor(Predictor): - """Configuration for a predictor running a python model.""" - - def __init__(self, **kwargs): - kwargs["model_framework"] = MODEL.FRAMEWORK_PYTHON - kwargs["model_server"] = PREDICTOR.MODEL_SERVER_PYTHON - - if kwargs["script_file"] is None: - raise ValueError( - "Predictor scripts are required in deployments for custom Python models" - ) - - super().__init__(**kwargs) diff --git a/hsml/python/hsml/python/signature.py b/hsml/python/hsml/python/signature.py deleted file mode 100644 index 94b154abf..000000000 --- a/hsml/python/hsml/python/signature.py +++ /dev/null @@ -1,75 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from typing import Optional, Union - -import numpy -import pandas -from hsml.model_schema import ModelSchema -from hsml.python.model import Model - - -_mr = None - - -def create_model( - name: str, - version: Optional[int] = None, - metrics: Optional[dict] = None, - description: Optional[str] = None, - input_example: Optional[ - Union[pandas.DataFrame, pandas.Series, numpy.ndarray, list] - ] = None, - model_schema: Optional[ModelSchema] = None, - feature_view=None, - training_dataset_version: Optional[int] = None, -): - """Create a generic Python model metadata object. - - !!! note "Lazy" - This method is lazy and does not persist any metadata or uploads model artifacts in the - model registry on its own. To save the model object and the model artifacts, call the `save()` method with a - local file path to the directory containing the model artifacts. - - # Arguments - name: Name of the model to create. - version: Optionally version of the model to create, defaults to `None` and - will create the model with incremented version from the last - version in the model registry. - metrics: Optionally a dictionary with model evaluation metrics (e.g., accuracy, MAE) - description: Optionally a string describing the model, defaults to empty string - `""`. - input_example: Optionally an input example that represents a single input for the model, defaults to `None`. - model_schema: Optionally a model schema for the model inputs and/or outputs. - - # Returns - `Model`. The model metadata object. - """ - model = Model( - id=None, - name=name, - version=version, - description=description, - metrics=metrics, - input_example=input_example, - model_schema=model_schema, - feature_view=feature_view, - training_dataset_version=training_dataset_version, - ) - model._shared_registry_project_name = _mr.shared_registry_project_name - model._model_registry_id = _mr.model_registry_id - - return model diff --git a/hsml/python/hsml/resources.py b/hsml/python/hsml/resources.py deleted file mode 100644 index 039aa263a..000000000 --- a/hsml/python/hsml/resources.py +++ /dev/null @@ -1,394 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -from abc import ABC, abstractmethod -from typing import Optional, Union - -import humps -from hsml import client, util -from hsml.constants import RESOURCES - - -class Resources: - """Resource configuration for a predictor or transformer. - - # Arguments - cores: Number of CPUs. - memory: Memory (MB) resources. - gpus: Number of GPUs. - # Returns - `Resources`. Resource configuration for a predictor or transformer. - """ - - def __init__( - self, - cores: int, - memory: int, - gpus: int, - **kwargs, - ): - self._cores = cores - self._memory = memory - self._gpus = gpus - - def describe(self): - """Print a description of the resource configuration""" - util.pretty_print(self) - - @classmethod - def from_response_json(cls, json_dict): - json_decamelized = humps.decamelize(json_dict) - return cls.from_json(json_decamelized) - - @classmethod - def from_json(cls, json_decamelized): - return Resources(**cls.extract_fields_from_json(json_decamelized)) - - @classmethod - def extract_fields_from_json(cls, json_decamelized): - kwargs = {} - kwargs["cores"] = util.extract_field_from_json(json_decamelized, "cores") - kwargs["memory"] = util.extract_field_from_json(json_decamelized, "memory") - kwargs["gpus"] = util.extract_field_from_json(json_decamelized, "gpus") - return kwargs - - def json(self): - return json.dumps(self, cls=util.MLEncoder) - - def to_dict(self): - return {"cores": self._cores, "memory": self._memory, "gpus": self._gpus} - - @property - def cores(self): - """Number of CPUs to be allocated per instance""" - return self._cores - - @cores.setter - def cores(self, cores: int): - self._cores = cores - - @property - def memory(self): - """Memory resources to be allocated per instance""" - return self._memory - - @memory.setter - def memory(self, memory: int): - self._memory = memory - - @property - def gpus(self): - """Number of GPUs to be allocated per instance""" - return self._gpus - - @gpus.setter - def gpus(self, gpus: int): - self._gpus = gpus - - def __repr__(self): - return f"Resources(cores: {self._cores!r}, memory: {self._memory!r}, gpus: {self._gpus!r})" - - -class ComponentResources(ABC): - """Resource configuration for a predictor or transformer. - - # Arguments - num_instances: Number of instances. - requests: Minimum resources to allocate for a deployment - limits: Maximum resources to allocate for a deployment - # Returns - `ComponentResource`. Resource configuration for a predictor or transformer. - """ - - def __init__( - self, - num_instances: int, - requests: Optional[Union[Resources, dict]] = None, - limits: Optional[Union[Resources, dict]] = None, - ): - self._num_instances = num_instances - self._requests = util.get_obj_from_json(requests, Resources) or Resources( - RESOURCES.MIN_CORES, RESOURCES.MIN_MEMORY, RESOURCES.MIN_GPUS - ) - self._fill_missing_resources( - self._requests, - RESOURCES.MIN_CORES, - RESOURCES.MIN_MEMORY, - RESOURCES.MIN_GPUS, - ) - self._limits = util.get_obj_from_json(limits, Resources) or Resources( - *self._get_default_resource_limits() - ) - self._fill_missing_resources(self._limits, *self._get_default_resource_limits()) - - # validate both requests and limits - self._validate_resources(self._requests, self._limits) - - def describe(self): - """Print a description of the resource configuration""" - util.pretty_print(self) - - def _get_default_resource_limits(self): - max_resources = client.get_serving_resource_limits() - # cores limit - if max_resources["cores"] == -1: # no limit - max_cores = ( - RESOURCES.MAX_CORES - if RESOURCES.MAX_CORES >= self._requests.cores - else self._requests.cores - ) - else: - max_cores = ( - RESOURCES.MAX_CORES - if RESOURCES.MAX_CORES <= max_resources["cores"] - and RESOURCES.MAX_CORES >= self._requests.cores - else max_resources["cores"] - ) - # memory limit - if max_resources["memory"] == -1: # no limit - max_memory = ( - RESOURCES.MAX_MEMORY - if RESOURCES.MAX_MEMORY >= self._requests.memory - else self._requests.memory - ) - else: - max_memory = ( - RESOURCES.MAX_MEMORY - if RESOURCES.MAX_MEMORY <= max_resources["memory"] - and RESOURCES.MAX_MEMORY >= self._requests.memory - else max_resources["memory"] - ) - # gpus limit - if max_resources["gpus"] == -1: # no limit - max_gpus = ( - RESOURCES.MAX_GPUS - if RESOURCES.MAX_GPUS >= self._requests.gpus - else self._requests.gpus - ) - else: - max_gpus = ( - RESOURCES.MAX_GPUS - if RESOURCES.MAX_GPUS <= max_resources["gpus"] - and RESOURCES.MAX_GPUS >= self._requests.gpus - else max_resources["gpus"] - ) - return max_cores, max_memory, max_gpus - - @classmethod - def _fill_missing_resources(cls, resources, cores, memory, gpus): - if resources.cores is None: - resources.cores = cores - if resources.memory is None: - resources.memory = memory - if resources.gpus is None: - resources.gpus = gpus - - @classmethod - def _validate_resources(cls, requests, limits): - # limits - max_resources = client.get_serving_resource_limits() - if max_resources["cores"] > -1: - if limits.cores <= 0: - raise ValueError("Limit number of cores must be greater than 0 cores.") - if limits.cores > max_resources["cores"]: - raise ValueError( - "Limit number of cores cannot exceed the maximum of " - + str(max_resources["cores"]) - + " cores." - ) - if max_resources["memory"] > -1: - if limits.memory <= 0: - raise ValueError("Limit memory resources must be greater than 0 MB.") - if limits.memory > max_resources["memory"]: - raise ValueError( - "Limit memory resources cannot exceed the maximum of " - + str(max_resources["memory"]) - + " MB." - ) - if max_resources["gpus"] > -1: - if limits.gpus < 0: - raise ValueError( - "Limit number of gpus must be greater than or equal to 0 gpus." - ) - if limits.gpus > max_resources["gpus"]: - raise ValueError( - "Limit number of gpus cannot exceed the maximum of " - + str(max_resources["gpus"]) - + " gpus." - ) - - # requests - if requests.cores <= 0: - raise ValueError("Requested number of cores must be greater than 0 cores.") - if limits.cores > -1 and requests.cores > limits.cores: - raise ValueError( - "Requested number of cores cannot exceed the limit of " - + str(limits.cores) - + " cores." - ) - if requests.memory <= 0: - raise ValueError("Requested memory resources must be greater than 0 MB.") - if limits.memory > -1 and requests.memory > limits.memory: - raise ValueError( - "Requested memory resources cannot exceed the limit of " - + str(limits.memory) - + " MB." - ) - if requests.gpus < 0: - raise ValueError( - "Requested number of gpus must be greater than or equal to 0 gpus." - ) - if limits.gpus > -1 and requests.gpus > limits.gpus: - raise ValueError( - "Requested number of gpus cannot exceed the limit of " - + str(limits.gpus) - + " gpus." - ) - - @classmethod - def from_response_json(cls, json_dict): - json_decamelized = humps.decamelize(json_dict) - return cls.from_json(json_decamelized) - - @classmethod - @abstractmethod - def from_json(cls, json_decamelized): - pass - - @classmethod - def extract_fields_from_json(cls, json_decamelized): - kwargs = {} - - # extract resources - if cls.RESOURCES_CONFIG_KEY in json_decamelized: - resources = json_decamelized.pop(cls.RESOURCES_CONFIG_KEY) - elif "resources" in json_decamelized: - resources = json_decamelized.pop("resources") - else: - resources = json_decamelized - - # extract resource fields - kwargs["requests"] = util.extract_field_from_json( - resources, "requests", as_instance_of=Resources - ) - kwargs["limits"] = util.extract_field_from_json( - resources, "limits", as_instance_of=Resources - ) - - # extract num instances - if cls.NUM_INSTANCES_KEY in json_decamelized: - kwargs["num_instances"] = json_decamelized.pop(cls.NUM_INSTANCES_KEY) - elif "num_instances" in json_decamelized: - kwargs["num_instances"] = json_decamelized.pop("num_instances") - else: - kwargs["num_instances"] = util.extract_field_from_json( - resources, [cls.NUM_INSTANCES_KEY, "num_instances"] - ) - - return kwargs - - def json(self): - return json.dumps(self, cls=util.MLEncoder) - - @abstractmethod - def to_dict(self): - pass - - @property - def num_instances(self): - """Number of instances""" - return self._num_instances - - @num_instances.setter - def num_instances(self, num_instances: int): - self._num_instances = num_instances - - @property - def requests(self): - """Minimum resources to allocate""" - return self._requests - - @requests.setter - def requests(self, requests: Resources): - self._resources = requests - - @property - def limits(self): - """Maximum resources to allocate""" - return self._limits - - @limits.setter - def limits(self, limits: Resources): - self._limits = limits - - def __repr__(self): - return f"ComponentResources(num_instances: {self._num_instances!r}, requests: {self._requests is not None!r}, limits: {self._limits is not None!r})" - - -class PredictorResources(ComponentResources): - RESOURCES_CONFIG_KEY = "predictor_resources" - NUM_INSTANCES_KEY = "requested_instances" - - def __init__( - self, - num_instances: int, - requests: Optional[Union[Resources, dict]] = None, - limits: Optional[Union[Resources, dict]] = None, - ): - super().__init__(num_instances, requests, limits) - - @classmethod - def from_json(cls, json_decamelized): - return PredictorResources(**cls.extract_fields_from_json(json_decamelized)) - - def to_dict(self): - return { - humps.camelize(self.NUM_INSTANCES_KEY): self._num_instances, - humps.camelize(self.RESOURCES_CONFIG_KEY): { - "requests": ( - self._requests.to_dict() if self._requests is not None else None - ), - "limits": self._limits.to_dict() if self._limits is not None else None, - }, - } - - -class TransformerResources(ComponentResources): - RESOURCES_CONFIG_KEY = "transformer_resources" - NUM_INSTANCES_KEY = "requested_transformer_instances" - - def __init__( - self, - num_instances: int, - requests: Optional[Union[Resources, dict]] = None, - limits: Optional[Union[Resources, dict]] = None, - ): - super().__init__(num_instances, requests, limits) - - @classmethod - def from_json(cls, json_decamelized): - return TransformerResources(**cls.extract_fields_from_json(json_decamelized)) - - def to_dict(self): - return { - humps.camelize(self.NUM_INSTANCES_KEY): self._num_instances, - humps.camelize(self.RESOURCES_CONFIG_KEY): { - "requests": ( - self._requests.to_dict() if self._requests is not None else None - ), - "limits": self._limits.to_dict() if self._limits is not None else None, - }, - } diff --git a/hsml/python/hsml/schema.py b/hsml/python/hsml/schema.py deleted file mode 100644 index 22e46aed1..000000000 --- a/hsml/python/hsml/schema.py +++ /dev/null @@ -1,83 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import json -from typing import Optional, TypeVar, Union - -import numpy -import pandas -from hsml.utils.schema.columnar_schema import ColumnarSchema -from hsml.utils.schema.tensor_schema import TensorSchema - - -class Schema: - """Create a schema for a model input or output. - - # Arguments - object: The object to construct the schema from. - - # Returns - `Schema`. The schema object. - """ - - def __init__( - self, - object: Optional[ - Union[ - pandas.DataFrame, - pandas.Series, - TypeVar("pyspark.sql.dataframe.DataFrame"), # noqa: F821 - TypeVar("hsfs.training_dataset.TrainingDataset"), # noqa: F821 - numpy.ndarray, - list, - ] - ] = None, - **kwargs, - ): - # A tensor schema is either ndarray of a list containing name, type and shape dicts - if isinstance(object, numpy.ndarray) or ( - isinstance(object, list) and all(["shape" in entry for entry in object]) - ): - self.tensor_schema = self._convert_tensor_to_schema(object).tensors - else: - self.columnar_schema = self._convert_columnar_to_schema(object).columns - - def _convert_columnar_to_schema(self, object): - return ColumnarSchema(object) - - def _convert_tensor_to_schema(self, object): - return TensorSchema(object) - - def _get_type(self): - if hasattr(self, "tensor_schema"): - return "tensor" - if hasattr(self, "columnar_schema"): - return "columnar" - return None - - def json(self): - return json.dumps( - self, default=lambda o: getattr(o, "__dict__", o), sort_keys=True, indent=2 - ) - - def to_dict(self): - """ - Get dict representation of the Schema. - """ - return json.loads(self.json()) - - def __repr__(self): - return f"Schema(type: {self._get_type()!r})" diff --git a/hsml/python/hsml/sklearn/__init__.py b/hsml/python/hsml/sklearn/__init__.py deleted file mode 100644 index ff0a6f046..000000000 --- a/hsml/python/hsml/sklearn/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/hsml/python/hsml/sklearn/model.py b/hsml/python/hsml/sklearn/model.py deleted file mode 100644 index 900a36204..000000000 --- a/hsml/python/hsml/sklearn/model.py +++ /dev/null @@ -1,79 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import humps -from hsml.constants import MODEL -from hsml.model import Model - - -class Model(Model): - """Metadata object representing an sklearn model in the Model Registry.""" - - def __init__( - self, - id, - name, - version=None, - created=None, - creator=None, - environment=None, - description=None, - experiment_id=None, - project_name=None, - experiment_project_name=None, - metrics=None, - program=None, - user_full_name=None, - model_schema=None, - training_dataset=None, - input_example=None, - model_registry_id=None, - tags=None, - href=None, - feature_view=None, - training_dataset_version=None, - **kwargs, - ): - super().__init__( - id, - name, - version=version, - created=created, - creator=creator, - environment=environment, - description=description, - experiment_id=experiment_id, - project_name=project_name, - experiment_project_name=experiment_project_name, - metrics=metrics, - program=program, - user_full_name=user_full_name, - model_schema=model_schema, - training_dataset=training_dataset, - input_example=input_example, - framework=MODEL.FRAMEWORK_SKLEARN, - model_registry_id=model_registry_id, - feature_view=feature_view, - training_dataset_version=training_dataset_version, - ) - - def update_from_response_json(self, json_dict): - json_decamelized = humps.decamelize(json_dict) - json_decamelized.pop("framework") - if "type" in json_decamelized: # backwards compatibility - _ = json_decamelized.pop("type") - self.__init__(**json_decamelized) - return self diff --git a/hsml/python/hsml/sklearn/predictor.py b/hsml/python/hsml/sklearn/predictor.py deleted file mode 100644 index 1d43c66f7..000000000 --- a/hsml/python/hsml/sklearn/predictor.py +++ /dev/null @@ -1,28 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from hsml.constants import MODEL, PREDICTOR -from hsml.predictor import Predictor - - -class Predictor(Predictor): - """Configuration for a predictor running a sklearn model.""" - - def __init__(self, **kwargs): - kwargs["model_framework"] = MODEL.FRAMEWORK_SKLEARN - kwargs["model_server"] = PREDICTOR.MODEL_SERVER_PYTHON - - super().__init__(**kwargs) diff --git a/hsml/python/hsml/sklearn/signature.py b/hsml/python/hsml/sklearn/signature.py deleted file mode 100644 index ef2ab74d2..000000000 --- a/hsml/python/hsml/sklearn/signature.py +++ /dev/null @@ -1,75 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from typing import Optional, Union - -import numpy -import pandas -from hsml.model_schema import ModelSchema -from hsml.sklearn.model import Model - - -_mr = None - - -def create_model( - name: str, - version: Optional[int] = None, - metrics: Optional[dict] = None, - description: Optional[str] = None, - input_example: Optional[ - Union[pandas.DataFrame, pandas.Series, numpy.ndarray, list] - ] = None, - model_schema: Optional[ModelSchema] = None, - feature_view=None, - training_dataset_version: Optional[int] = None, -): - """Create an SkLearn model metadata object. - - !!! note "Lazy" - This method is lazy and does not persist any metadata or uploads model artifacts in the - model registry on its own. To save the model object and the model artifacts, call the `save()` method with a - local file path to the directory containing the model artifacts. - - # Arguments - name: Name of the model to create. - version: Optionally version of the model to create, defaults to `None` and - will create the model with incremented version from the last - version in the model registry. - metrics: Optionally a dictionary with model evaluation metrics (e.g., accuracy, MAE) - description: Optionally a string describing the model, defaults to empty string - `""`. - input_example: Optionally an input example that represents a single input for the model, defaults to `None`. - model_schema: Optionally a model schema for the model inputs and/or outputs. - - # Returns - `Model`. The model metadata object. - """ - model = Model( - id=None, - name=name, - version=version, - description=description, - metrics=metrics, - input_example=input_example, - model_schema=model_schema, - feature_view=feature_view, - training_dataset_version=training_dataset_version, - ) - model._shared_registry_project_name = _mr.shared_registry_project_name - model._model_registry_id = _mr.model_registry_id - - return model diff --git a/hsml/python/hsml/tag.py b/hsml/python/hsml/tag.py deleted file mode 100644 index aecf2ed74..000000000 --- a/hsml/python/hsml/tag.py +++ /dev/null @@ -1,77 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import json - -import humps -from hsml import util - - -class Tag: - def __init__( - self, - name, - value, - schema=None, - href=None, - expand=None, - items=None, - count=None, - type=None, - **kwargs, - ): - self._name = name - self._value = value - - def to_dict(self): - return { - "name": self._name, - "value": self._value, - } - - def json(self): - return json.dumps(self, cls=util.MLEncoder) - - @classmethod - def from_response_json(cls, json_dict): - json_decamelized = humps.decamelize(json_dict) - if "count" not in json_decamelized or json_decamelized["count"] == 0: - return [] - return [cls(**tag) for tag in json_decamelized["items"]] - - @property - def name(self): - """Name of the tag.""" - return self._name - - @name.setter - def name(self, name): - self._name = name - - @property - def value(self): - """Value of tag.""" - return self._value - - @value.setter - def value(self, value): - self._value = value - - def __str__(self): - return self.json() - - def __repr__(self): - return f"Tag({self._name!r}, {self._value!r})" diff --git a/hsml/python/hsml/tensorflow/__init__.py b/hsml/python/hsml/tensorflow/__init__.py deleted file mode 100644 index ff0a6f046..000000000 --- a/hsml/python/hsml/tensorflow/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/hsml/python/hsml/tensorflow/model.py b/hsml/python/hsml/tensorflow/model.py deleted file mode 100644 index c09ccf10d..000000000 --- a/hsml/python/hsml/tensorflow/model.py +++ /dev/null @@ -1,79 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import humps -from hsml.constants import MODEL -from hsml.model import Model - - -class Model(Model): - """Metadata object representing a tensorflow model in the Model Registry.""" - - def __init__( - self, - id, - name, - version=None, - created=None, - creator=None, - environment=None, - description=None, - experiment_id=None, - project_name=None, - experiment_project_name=None, - metrics=None, - program=None, - user_full_name=None, - model_schema=None, - training_dataset=None, - input_example=None, - model_registry_id=None, - tags=None, - href=None, - feature_view=None, - training_dataset_version=None, - **kwargs, - ): - super().__init__( - id, - name, - version=version, - created=created, - creator=creator, - environment=environment, - description=description, - experiment_id=experiment_id, - project_name=project_name, - experiment_project_name=experiment_project_name, - metrics=metrics, - program=program, - user_full_name=user_full_name, - model_schema=model_schema, - training_dataset=training_dataset, - input_example=input_example, - framework=MODEL.FRAMEWORK_TENSORFLOW, - model_registry_id=model_registry_id, - feature_view=feature_view, - training_dataset_version=training_dataset_version, - ) - - def update_from_response_json(self, json_dict): - json_decamelized = humps.decamelize(json_dict) - json_decamelized.pop("framework") - if "type" in json_decamelized: # backwards compatibility - _ = json_decamelized.pop("type") - self.__init__(**json_decamelized) - return self diff --git a/hsml/python/hsml/tensorflow/predictor.py b/hsml/python/hsml/tensorflow/predictor.py deleted file mode 100644 index 045aadf3a..000000000 --- a/hsml/python/hsml/tensorflow/predictor.py +++ /dev/null @@ -1,33 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from hsml.constants import MODEL, PREDICTOR -from hsml.predictor import Predictor - - -class Predictor(Predictor): - """Configuration for a predictor running a tensorflow model.""" - - def __init__(self, **kwargs): - kwargs["model_framework"] = MODEL.FRAMEWORK_TENSORFLOW - kwargs["model_server"] = PREDICTOR.MODEL_SERVER_TF_SERVING - - if kwargs["script_file"] is not None: - raise ValueError( - "Predictor scripts are not supported in deployments for Tensorflow models" - ) - - super().__init__(**kwargs) diff --git a/hsml/python/hsml/tensorflow/signature.py b/hsml/python/hsml/tensorflow/signature.py deleted file mode 100644 index 88b0f0fc4..000000000 --- a/hsml/python/hsml/tensorflow/signature.py +++ /dev/null @@ -1,75 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from typing import Optional, Union - -import numpy -import pandas -from hsml.model_schema import ModelSchema -from hsml.tensorflow.model import Model - - -_mr = None - - -def create_model( - name: str, - version: Optional[int] = None, - metrics: Optional[dict] = None, - description: Optional[str] = None, - input_example: Optional[ - Union[pandas.DataFrame, pandas.Series, numpy.ndarray, list] - ] = None, - model_schema: Optional[ModelSchema] = None, - feature_view=None, - training_dataset_version: Optional[int] = None, -): - """Create a TensorFlow model metadata object. - - !!! note "Lazy" - This method is lazy and does not persist any metadata or uploads model artifacts in the - model registry on its own. To save the model object and the model artifacts, call the `save()` method with a - local file path to the directory containing the model artifacts. - - # Arguments - name: Name of the model to create. - version: Optionally version of the model to create, defaults to `None` and - will create the model with incremented version from the last - version in the model registry. - metrics: Optionally a dictionary with model evaluation metrics (e.g., accuracy, MAE) - description: Optionally a string describing the model, defaults to empty string - `""`. - input_example: Optionally an input example that represents a single input for the model, defaults to `None`. - model_schema: Optionally a model schema for the model inputs and/or outputs. - - # Returns - `Model`. The model metadata object. - """ - model = Model( - id=None, - name=name, - version=version, - description=description, - metrics=metrics, - input_example=input_example, - model_schema=model_schema, - feature_view=feature_view, - training_dataset_version=training_dataset_version, - ) - model._shared_registry_project_name = _mr.shared_registry_project_name - model._model_registry_id = _mr.model_registry_id - - return model diff --git a/hsml/python/hsml/torch/__init__.py b/hsml/python/hsml/torch/__init__.py deleted file mode 100644 index ff0a6f046..000000000 --- a/hsml/python/hsml/torch/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/hsml/python/hsml/torch/model.py b/hsml/python/hsml/torch/model.py deleted file mode 100644 index 59102119c..000000000 --- a/hsml/python/hsml/torch/model.py +++ /dev/null @@ -1,79 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import humps -from hsml.constants import MODEL -from hsml.model import Model - - -class Model(Model): - """Metadata object representing a torch model in the Model Registry.""" - - def __init__( - self, - id, - name, - version=None, - created=None, - creator=None, - environment=None, - description=None, - experiment_id=None, - project_name=None, - experiment_project_name=None, - metrics=None, - program=None, - user_full_name=None, - model_schema=None, - training_dataset=None, - input_example=None, - model_registry_id=None, - tags=None, - href=None, - feature_view=None, - training_dataset_version=None, - **kwargs, - ): - super().__init__( - id, - name, - version=version, - created=created, - creator=creator, - environment=environment, - description=description, - experiment_id=experiment_id, - project_name=project_name, - experiment_project_name=experiment_project_name, - metrics=metrics, - program=program, - user_full_name=user_full_name, - model_schema=model_schema, - training_dataset=training_dataset, - input_example=input_example, - framework=MODEL.FRAMEWORK_TORCH, - model_registry_id=model_registry_id, - feature_view=feature_view, - training_dataset_version=training_dataset_version, - ) - - def update_from_response_json(self, json_dict): - json_decamelized = humps.decamelize(json_dict) - json_decamelized.pop("framework") - if "type" in json_decamelized: # backwards compatibility - _ = json_decamelized.pop("type") - self.__init__(**json_decamelized) - return self diff --git a/hsml/python/hsml/torch/predictor.py b/hsml/python/hsml/torch/predictor.py deleted file mode 100644 index 5f7ed5d7a..000000000 --- a/hsml/python/hsml/torch/predictor.py +++ /dev/null @@ -1,33 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from hsml.constants import MODEL, PREDICTOR -from hsml.predictor import Predictor - - -class Predictor(Predictor): - """Configuration for a predictor running a torch model.""" - - def __init__(self, **kwargs): - kwargs["model_framework"] = MODEL.FRAMEWORK_PYTHON - kwargs["model_server"] = PREDICTOR.MODEL_SERVER_PYTHON - - if kwargs["script_file"] is None: - raise ValueError( - "Predictor scripts are required in deployments for Torch models" - ) - - super().__init__(**kwargs) diff --git a/hsml/python/hsml/torch/signature.py b/hsml/python/hsml/torch/signature.py deleted file mode 100644 index 32ab27d37..000000000 --- a/hsml/python/hsml/torch/signature.py +++ /dev/null @@ -1,75 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from typing import Optional, Union - -import numpy -import pandas -from hsml.model_schema import ModelSchema -from hsml.torch.model import Model - - -_mr = None - - -def create_model( - name: str, - version: Optional[int] = None, - metrics: Optional[dict] = None, - description: Optional[str] = None, - input_example: Optional[ - Union[pandas.DataFrame, pandas.Series, numpy.ndarray, list] - ] = None, - model_schema: Optional[ModelSchema] = None, - feature_view=None, - training_dataset_version: Optional[int] = None, -): - """Create a Torch model metadata object. - - !!! note "Lazy" - This method is lazy and does not persist any metadata or uploads model artifacts in the - model registry on its own. To save the model object and the model artifacts, call the `save()` method with a - local file path to the directory containing the model artifacts. - - # Arguments - name: Name of the model to create. - version: Optionally version of the model to create, defaults to `None` and - will create the model with incremented version from the last - version in the model registry. - metrics: Optionally a dictionary with model evaluation metrics (e.g., accuracy, MAE) - description: Optionally a string describing the model, defaults to empty string - `""`. - input_example: Optionally an input example that represents a single input for the model, defaults to `None`. - model_schema: Optionally a model schema for the model inputs and/or outputs. - - # Returns - `Model`. The model metadata object. - """ - model = Model( - id=None, - name=name, - version=version, - description=description, - metrics=metrics, - input_example=input_example, - model_schema=model_schema, - feature_view=feature_view, - training_dataset_version=training_dataset_version, - ) - model._shared_registry_project_name = _mr.shared_registry_project_name - model._model_registry_id = _mr.model_registry_id - - return model diff --git a/hsml/python/hsml/transformer.py b/hsml/python/hsml/transformer.py deleted file mode 100644 index 4121d106d..000000000 --- a/hsml/python/hsml/transformer.py +++ /dev/null @@ -1,93 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Union - -import humps -from hsml import client, util -from hsml.constants import RESOURCES -from hsml.deployable_component import DeployableComponent -from hsml.resources import TransformerResources - - -class Transformer(DeployableComponent): - """Metadata object representing a transformer to be used in a predictor.""" - - def __init__( - self, - script_file: str, - resources: Optional[Union[TransformerResources, dict]] = None, # base - **kwargs, - ): - resources = ( - self._validate_resources( - util.get_obj_from_json(resources, TransformerResources) - ) - or self._get_default_resources() - ) - if resources.num_instances is None: - resources.num_instances = self._get_default_num_instances() - - super().__init__(script_file, resources) - - def describe(self): - """Print a description of the transformer""" - util.pretty_print(self) - - @classmethod - def _validate_resources(cls, resources): - if resources is not None: - # ensure scale-to-zero for kserve deployments when required - if resources.num_instances != 0 and client.is_scale_to_zero_required(): - raise ValueError( - "Scale-to-zero is required for KServe deployments in this cluster. Please, set the number of transformer instances to 0." - ) - return resources - - @classmethod - def _get_default_num_instances(cls): - return ( - 0 # enable scale-to-zero by default if required - if client.is_scale_to_zero_required() - else RESOURCES.MIN_NUM_INSTANCES - ) - - @classmethod - def _get_default_resources(cls): - return TransformerResources(cls._get_default_num_instances()) - - @classmethod - def from_json(cls, json_decamelized): - sf, rc = cls.extract_fields_from_json(json_decamelized) - return Transformer(sf, rc) if sf is not None else None - - @classmethod - def extract_fields_from_json(cls, json_decamelized): - sf = util.extract_field_from_json( - json_decamelized, ["transformer", "script_file"] - ) - rc = TransformerResources.from_json(json_decamelized) - return sf, rc - - def update_from_response_json(self, json_dict): - json_decamelized = humps.decamelize(json_dict) - self.__init__(*self.extract_fields_from_json(json_decamelized)) - return self - - def to_dict(self): - return {"transformer": self._script_file, **self._resources.to_dict()} - - def __repr__(self): - return f"Transformer({self._script_file!r})" diff --git a/hsml/python/hsml/util.py b/hsml/python/hsml/util.py deleted file mode 100644 index 96380b6f4..000000000 --- a/hsml/python/hsml/util.py +++ /dev/null @@ -1,347 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from __future__ import annotations - -import datetime -import inspect -import os -import shutil -from json import JSONEncoder, dumps -from urllib.parse import urljoin, urlparse - -import humps -import numpy as np -import pandas as pd -from hsml import client -from hsml.constants import DEFAULT, MODEL, PREDICTOR -from hsml.model import Model as BaseModel -from hsml.predictor import Predictor as BasePredictor -from hsml.python.model import Model as PyModel -from hsml.python.predictor import Predictor as PyPredictor -from hsml.sklearn.model import Model as SkLearnModel -from hsml.sklearn.predictor import Predictor as SkLearnPredictor -from hsml.tensorflow.model import Model as TFModel -from hsml.tensorflow.predictor import Predictor as TFPredictor -from hsml.torch.model import Model as TorchModel -from hsml.torch.predictor import Predictor as TorchPredictor -from six import string_types - - -class VersionWarning(Warning): - pass - - -class ProvenanceWarning(Warning): - pass - - -class MLEncoder(JSONEncoder): - def default(self, obj): - try: - return obj.to_dict() - except AttributeError: - return super().default(obj) - - -class NumpyEncoder(JSONEncoder): - """Special json encoder for numpy types. - Note that some numpy types doesn't have native python equivalence, - hence json.dumps will raise TypeError. - In this case, you'll need to convert your numpy types into its closest python equivalence. - """ - - def convert(self, obj): - import base64 - - import numpy as np - import pandas as pd - - def encode_binary(x): - return base64.encodebytes(x).decode("ascii") - - if isinstance(obj, np.ndarray): - if obj.dtype == np.object: - return [self.convert(x)[0] for x in obj.tolist()] - elif obj.dtype == np.bytes_: - return np.vectorize(encode_binary)(obj), True - else: - return obj.tolist(), True - - if isinstance(obj, (pd.Timestamp, datetime.date)): - return obj.isoformat(), True - if isinstance(obj, bytes) or isinstance(obj, bytearray): - return encode_binary(obj), True - if isinstance(obj, np.generic): - return obj.item(), True - if isinstance(obj, np.datetime64): - return np.datetime_as_string(obj), True - return obj, False - - def default(self, obj): # pylint: disable=E0202 - res, converted = self.convert(obj) - if converted: - return res - else: - return super().default(obj) - - -# Model registry - -# - schema and types - - -def set_model_class(model): - if "href" in model: - _ = model.pop("href") - if "type" in model: # backwards compatibility - _ = model.pop("type") - if "tags" in model: - _ = model.pop("tags") # tags are always retrieved from backend - - if "framework" not in model: - return BaseModel(**model) - - framework = model.pop("framework") - if framework == MODEL.FRAMEWORK_TENSORFLOW: - return TFModel(**model) - if framework == MODEL.FRAMEWORK_TORCH: - return TorchModel(**model) - if framework == MODEL.FRAMEWORK_SKLEARN: - return SkLearnModel(**model) - elif framework == MODEL.FRAMEWORK_PYTHON: - return PyModel(**model) - else: - raise ValueError( - "framework {} is not a supported framework".format(str(framework)) - ) - - -def input_example_to_json(input_example): - if isinstance(input_example, np.ndarray): - if input_example.size > 0: - return _handle_tensor_input(input_example) - else: - raise ValueError( - "input_example of type {} can not be empty".format(type(input_example)) - ) - elif isinstance(input_example, dict): - return _handle_dict_input(input_example) - else: - return _handle_dataframe_input(input_example) - - -def _handle_tensor_input(input_tensor): - return input_tensor.tolist() - - -def _handle_dataframe_input(input_ex): - if isinstance(input_ex, pd.DataFrame): - if not input_ex.empty: - return input_ex.iloc[0].tolist() - else: - raise ValueError( - "input_example of type {} can not be empty".format(type(input_ex)) - ) - elif isinstance(input_ex, pd.Series): - if not input_ex.empty: - return input_ex.tolist() - else: - raise ValueError( - "input_example of type {} can not be empty".format(type(input_ex)) - ) - elif isinstance(input_ex, list): - if len(input_ex) > 0: - return input_ex - else: - raise ValueError( - "input_example of type {} can not be empty".format(type(input_ex)) - ) - else: - raise TypeError( - "{} is not a supported input example type".format(type(input_ex)) - ) - - -def _handle_dict_input(input_ex): - return input_ex - - -# - artifacts - - -def compress(archive_out_path, archive_name, path_to_archive): - if os.path.isdir(path_to_archive): - return shutil.make_archive( - os.path.join(archive_out_path, archive_name), "gztar", path_to_archive - ) - else: - return shutil.make_archive( - os.path.join(archive_out_path, archive_name), - "gztar", - os.path.dirname(path_to_archive), - os.path.basename(path_to_archive), - ) - - -def decompress(archive_file_path, extract_dir=None): - return shutil.unpack_archive(archive_file_path, extract_dir=extract_dir) - - -# - export models - - -def validate_metrics(metrics): - if metrics is not None: - if not isinstance(metrics, dict): - raise TypeError( - "provided metrics is of instance {}, expected a dict".format( - type(metrics) - ) - ) - - for metric in metrics: - # Validate key is a string - if not isinstance(metric, string_types): - raise TypeError( - "provided metrics key is of instance {}, expected a string".format( - type(metric) - ) - ) - # Validate value is a number - try: - float(metrics[metric]) - except ValueError as err: - raise ValueError( - "{} is not a number, only numbers can be attached as metadata for models.".format( - str(metrics[metric]) - ) - ) from err - - -# Model serving - - -def get_predictor_for_model(model, **kwargs): - if not isinstance(model, BaseModel): - raise ValueError( - "model is of type {}, but an instance of {} class is expected".format( - type(model), BaseModel - ) - ) - - if type(model) == TFModel: - return TFPredictor(**kwargs) - if type(model) == TorchModel: - return TorchPredictor(**kwargs) - if type(model) == SkLearnModel: - return SkLearnPredictor(**kwargs) - if type(model) == PyModel: - return PyPredictor(**kwargs) - if type(model) == BaseModel: - return BasePredictor( # python as default framework and model server - model_framework=MODEL.FRAMEWORK_PYTHON, - model_server=PREDICTOR.MODEL_SERVER_PYTHON, - **kwargs, - ) - - -def get_hostname_replaced_url(sub_path: str): - """ - construct and return an url with public hopsworks hostname and sub path - :param self: - :param sub_path: url sub-path after base url - :return: href url - """ - href = urljoin(client.get_instance()._base_url, sub_path) - url_parsed = client.get_instance()._replace_public_host(urlparse(href)) - return url_parsed.geturl() - - -# General - - -def pretty_print(obj): - if isinstance(obj, list): - for logs in obj: - pretty_print(logs) - else: - json_decamelized = humps.decamelize(obj.to_dict()) - print(dumps(json_decamelized, indent=4, sort_keys=True)) - - -def get_members(cls, prefix=None): - for m in inspect.getmembers(cls, lambda m: not (inspect.isroutine(m))): - n = m[0] # name - if (prefix is not None and n.startswith(prefix)) or ( - prefix is None and not (n.startswith("__") and n.endswith("__")) - ): - yield m[1] # value - - -# - json - - -def extract_field_from_json(obj, fields, default=None, as_instance_of=None): - if isinstance(fields, list): - for field in fields: - value = extract_field_from_json(obj, field, default, as_instance_of) - if value is not None: - break - else: - value = obj.pop(fields) if fields in obj else default - if as_instance_of is not None: - if isinstance(value, list): - # if the field is a list, get all obj - value = [ - get_obj_from_json(obj=subvalue, cls=as_instance_of) - for subvalue in value - ] - else: - # otherwise, get single obj - value = get_obj_from_json(obj=value, cls=as_instance_of) - return value - - -def get_obj_from_json(obj, cls): - if obj is not None: - if isinstance(obj, cls): - return obj - if isinstance(obj, dict): - if obj is DEFAULT: - return cls() - return cls.from_json(obj) - raise ValueError( - "Object of type {} cannot be converted to class {}".format(type(obj), cls) - ) - return obj - - -def feature_view_to_json(obj): - if obj is None: - return None - import importlib.util - - if importlib.util.find_spec("hsfs"): - from hsfs import feature_view - - if isinstance(obj, feature_view.FeatureView): - import json - - import humps - - return humps.camelize(json.loads(obj.json())) - return None diff --git a/hsml/python/hsml/utils/__init__.py b/hsml/python/hsml/utils/__init__.py deleted file mode 100644 index 7fa8fd556..000000000 --- a/hsml/python/hsml/utils/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/hsml/python/hsml/utils/schema/__init__.py b/hsml/python/hsml/utils/schema/__init__.py deleted file mode 100644 index 7fa8fd556..000000000 --- a/hsml/python/hsml/utils/schema/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/hsml/python/hsml/utils/schema/column.py b/hsml/python/hsml/utils/schema/column.py deleted file mode 100644 index fa5fc3723..000000000 --- a/hsml/python/hsml/utils/schema/column.py +++ /dev/null @@ -1,28 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - -class Column: - """Metadata object representing a column in the schema for a model.""" - - def __init__(self, type, name=None, description=None): - self.type = str(type) - - if name is not None: - self.name = str(name) - - if description is not None: - self.description = str(description) diff --git a/hsml/python/hsml/utils/schema/columnar_schema.py b/hsml/python/hsml/utils/schema/columnar_schema.py deleted file mode 100644 index 3aa5fde0e..000000000 --- a/hsml/python/hsml/utils/schema/columnar_schema.py +++ /dev/null @@ -1,109 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import importlib - -import pandas -from hsml.utils.schema.column import Column - - -try: - import hsfs -except ImportError: - pass - -try: - import pyspark -except ImportError: - pass - - -class ColumnarSchema: - """Metadata object representing a columnar schema for a model.""" - - def __init__(self, columnar_obj=None): - if isinstance(columnar_obj, list): - self.columns = self._convert_list_to_schema(columnar_obj) - elif isinstance(columnar_obj, pandas.DataFrame): - self.columns = self._convert_pandas_df_to_schema(columnar_obj) - elif isinstance(columnar_obj, pandas.Series): - self.columns = self._convert_pandas_series_to_schema(columnar_obj) - elif importlib.util.find_spec("pyspark") is not None and isinstance( - columnar_obj, pyspark.sql.dataframe.DataFrame - ): - self.columns = self._convert_spark_to_schema(columnar_obj) - elif importlib.util.find_spec("hsfs") is not None and isinstance( - columnar_obj, hsfs.training_dataset.TrainingDataset - ): - self.columns = self._convert_td_to_schema(columnar_obj) - else: - raise TypeError( - "{} is not supported in a columnar schema.".format(type(columnar_obj)) - ) - - def _convert_list_to_schema(self, columnar_obj): - columns = [] - for column in columnar_obj: - columns.append(self._build_column(column)) - return columns - - def _convert_pandas_df_to_schema(self, pandas_df): - pandas_columns = pandas_df.columns - pandas_data_types = pandas_df.dtypes - columns = [] - for name in pandas_columns: - columns.append(Column(pandas_data_types[name], name=name)) - return columns - - def _convert_pandas_series_to_schema(self, pandas_series): - columns = [] - columns.append(Column(pandas_series.dtype, name=pandas_series.name)) - return columns - - def _convert_spark_to_schema(self, spark_df): - columns = [] - types = spark_df.dtypes - for dtype in types: - name, dtype = dtype - columns.append(Column(dtype, name=name)) - return columns - - def _convert_td_to_schema(self, td): - columns = [] - features = td.schema - for feature in features: - columns.append(Column(feature.type, name=feature.name)) - return columns - - def _build_column(self, columnar_obj): - type = None - name = None - description = None - - if "description" in columnar_obj: - description = columnar_obj["description"] - - if "name" in columnar_obj: - name = columnar_obj["name"] - - if "type" in columnar_obj: - type = columnar_obj["type"] - else: - raise ValueError( - "Mandatory 'type' key missing from entry {}".format(columnar_obj) - ) - - return Column(type, name=name, description=description) diff --git a/hsml/python/hsml/utils/schema/tensor.py b/hsml/python/hsml/utils/schema/tensor.py deleted file mode 100644 index 2722776b9..000000000 --- a/hsml/python/hsml/utils/schema/tensor.py +++ /dev/null @@ -1,30 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - -class Tensor: - """Metadata object representing a tensor in the schema for a model.""" - - def __init__(self, type, shape, name=None, description=None): - self.type = str(type) - - self.shape = str(shape) - - if name is not None: - self.name = str(name) - - if description is not None: - self.description = str(description) diff --git a/hsml/python/hsml/utils/schema/tensor_schema.py b/hsml/python/hsml/utils/schema/tensor_schema.py deleted file mode 100644 index da24ba836..000000000 --- a/hsml/python/hsml/utils/schema/tensor_schema.py +++ /dev/null @@ -1,73 +0,0 @@ -# -# Copyright 2022 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy -from hsml.utils.schema.tensor import Tensor - - -class TensorSchema: - """Metadata object representing a tensor schema for a model.""" - - def __init__(self, tensor_obj=None): - if isinstance(tensor_obj, list): - self.tensors = self._convert_list_to_schema(tensor_obj) - elif isinstance(tensor_obj, numpy.ndarray): - self.tensors = self._convert_tensor_to_schema(tensor_obj) - else: - raise TypeError( - "{} is not supported in a tensor schema.".format(type(tensor_obj)) - ) - - def _convert_tensor_to_schema(self, tensor_obj): - return Tensor(tensor_obj.dtype, tensor_obj.shape) - - def _convert_list_to_schema(self, tensor_obj): - if len(tensor_obj) == 1: - return [self._build_tensor(tensor_obj[0])] - else: - tensors = [] - for tensor in tensor_obj: - tensors.append(self._build_tensor(tensor)) - return tensors - - def _build_tensor(self, tensor_obj): - name = None - type = None - shape = None - description = None - - # Name is optional - if "name" in tensor_obj: - name = tensor_obj["name"] - - if "description" in tensor_obj: - description = tensor_obj["description"] - - if "type" in tensor_obj: - type = tensor_obj["type"] - else: - raise ValueError( - "Mandatory 'type' key missing from entry {}".format(tensor_obj) - ) - - if "shape" in tensor_obj: - shape = tensor_obj["shape"] - else: - raise ValueError( - "Mandatory 'shape' key missing from entry {}".format(tensor_obj) - ) - - return Tensor(type, shape, name=name, description=description) diff --git a/hsml/python/hsml/version.py b/hsml/python/hsml/version.py deleted file mode 100644 index a7136ad06..000000000 --- a/hsml/python/hsml/version.py +++ /dev/null @@ -1,17 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -__version__ = "4.0.0.dev1" diff --git a/hsml/python/pyproject.toml b/hsml/python/pyproject.toml deleted file mode 100644 index e4770cd4a..000000000 --- a/hsml/python/pyproject.toml +++ /dev/null @@ -1,136 +0,0 @@ -[project] -name="hsml" -dynamic = ["version"] -requires-python = ">=3.8,<3.13" -readme = "README.md" -description = "HSML Python SDK to interact with Hopsworks Model Registry" -keywords = ["Hopsworks", "Model Registry", "hsml", "Models", "ML", "Machine Learning Models", "TensorFlow", "PyTorch", "Machine Learning", "MLOps", "DataOps"] -authors = [{name = "Hopsworks AB", email = "robin@hopswors.ai"}] -license = { text = "Apache-2.0" } - -classifiers = [ - "Development Status :: 5 - Production/Stable", - "Topic :: Utilities", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Intended Audience :: Developers", -] - -dependencies = [ - "pyhumps==1.6.1", - "requests", - "furl", - "boto3", - "pandas", - "numpy", - "pyjks", - "mock", - "tqdm", - "grpcio>=1.49.1,<2.0.0", # ^1.49.1 - "protobuf>=3.19.0,<4.0.0", # ^3.19.0 -] - -[project.optional-dependencies] -dev = ["pytest==7.4.4", "pytest-mock==3.12.0", "ruff"] - -[build-system] -requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta" - - -[tool.setuptools.packages.find] -exclude = ["tests*"] -include = ["../Readme.md", "../LICENSE", "hsml", "hsml.*"] - - -[tool.setuptools.dynamic] -version = {attr = "hsml.version.__version__"} - -[project.urls] -Documentation = "https://docs.hopsworks.ai/latest" -Repository = "https://github.com/logicalclocks/machine-learning-api" -Homepage = "https://www.hopsworks.ai" -Community = "https://community.hopsworks.ai" - - -[tool.ruff] -# Exclude a variety of commonly ignored directories. -exclude = [ - ".bzr", - ".direnv", - ".eggs", - ".git", - ".git-rewrite", - ".hg", - ".ipynb_checkpoints", - ".mypy_cache", - ".nox", - ".pants.d", - ".pyenv", - ".pytest_cache", - ".pytype", - ".ruff_cache", - ".svn", - ".tox", - ".venv", - ".vscode", - "__pypackages__", - "_build", - "buck-out", - "build", - "dist", - "node_modules", - "site-packages", - "venv", - "java", -] - -# Same as Black. -line-length = 88 -indent-width = 4 - -# Assume Python 3.8+ syntax. -target-version = "py38" - -[tool.ruff.lint] -# 1. Enable flake8-bugbear (`B`) rules, in addition to the defaults. -select = ["E4", "E7", "E9", "F", "B", "I", "W"]#, "ANN"] -ignore = [ - "B905", # zip has no strict kwarg until Python 3.10 - "ANN101", # Missing type annotation for self in method - "ANN102", # Missing type annotation for cls in classmethod - "ANN003", # Missing type annotation for **kwarg in function - "ANN002", # Missing type annotation for *args in function - "ANN401", # Allow Any in type annotations - "W505", # Doc line too long -] - -# Allow fix for all enabled rules (when `--fix`) is provided. -fixable = ["ALL"] -unfixable = [] - -# Allow unused variables when underscore-prefixed. -dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" - -[tool.ruff.lint.isort] -lines-after-imports = 2 -known-third-party = ["hopsworks", "hsfs", "hsml"] - - -[tool.ruff.format] -# Like Black, use double quotes for strings. -quote-style = "double" - -# Like Black, indent with spaces, rather than tabs. -indent-style = "space" - -# Like Black, respect magic trailing commas. -skip-magic-trailing-comma = false - -# Like Black, automatically detect the appropriate line ending. -line-ending = "auto" diff --git a/hsml/python/setup.py b/hsml/python/setup.py deleted file mode 100644 index cb916d7e6..000000000 --- a/hsml/python/setup.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# Copyright 2021 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from setuptools import setup - - -setup() diff --git a/hsml/python/tests/__init__.py b/hsml/python/tests/__init__.py deleted file mode 100644 index 5b0cd48e7..000000000 --- a/hsml/python/tests/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2024 Logical Clocks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/hsml/python/tests/conftest.py b/hsml/python/tests/conftest.py deleted file mode 100644 index 00d23a9fc..000000000 --- a/hsml/python/tests/conftest.py +++ /dev/null @@ -1,20 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -pytest_plugins = [ - "tests.fixtures.backend_fixtures", - "tests.fixtures.model_fixtures", -] diff --git a/hsml/python/tests/fixtures/__init__.py b/hsml/python/tests/fixtures/__init__.py deleted file mode 100644 index ff8055b9b..000000000 --- a/hsml/python/tests/fixtures/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/hsml/python/tests/fixtures/backend_fixtures.py b/hsml/python/tests/fixtures/backend_fixtures.py deleted file mode 100644 index c79bc6ddb..000000000 --- a/hsml/python/tests/fixtures/backend_fixtures.py +++ /dev/null @@ -1,45 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import json -import os - -import pytest - - -FIXTURES_DIR = os.path.dirname(os.path.abspath(__file__)) - -FIXTURES = [ - "tag", - "model", - "resources", - "transformer", - "predictor", - "kafka_topic", - "inference_logger", - "inference_batcher", - "inference_endpoint", -] - -backend_fixtures_json = {} -for fixture in FIXTURES: - with open(os.path.join(FIXTURES_DIR, f"{fixture}_fixtures.json"), "r") as json_file: - backend_fixtures_json[fixture] = json.load(json_file) - - -@pytest.fixture -def backend_fixtures(): - return backend_fixtures_json diff --git a/hsml/python/tests/fixtures/inference_batcher_fixtures.json b/hsml/python/tests/fixtures/inference_batcher_fixtures.json deleted file mode 100644 index 0fc8bbc15..000000000 --- a/hsml/python/tests/fixtures/inference_batcher_fixtures.json +++ /dev/null @@ -1,54 +0,0 @@ -{ - "get_enabled": { - "response": { - "enabled": true - }, - "response_nested": { - "batching_configuration": { - "enabled": true - } - } - }, - "get_disabled": { - "response": { - "enabled": false - }, - "response_nested": { - "batching_configuration": { - "enabled": true - } - } - }, - "get_enabled_with_config": { - "response": { - "enabled": true, - "max_batch_size": 1, - "max_latency": 2, - "timeout": 3 - }, - "response_nested": { - "batching_configuration": { - "enabled": true, - "max_batch_size": 1, - "max_latency": 2, - "timeout": 3 - } - } - }, - "get_disabled_with_config": { - "response": { - "enabled": false, - "max_batch_size": 1, - "max_latency": 2, - "timeout": 3 - }, - "response_nested": { - "batching_configuration": { - "enabled": false, - "max_batch_size": 1, - "max_latency": 2, - "timeout": 3 - } - } - } -} diff --git a/hsml/python/tests/fixtures/inference_endpoint_fixtures.json b/hsml/python/tests/fixtures/inference_endpoint_fixtures.json deleted file mode 100644 index 7dce25daf..000000000 --- a/hsml/python/tests/fixtures/inference_endpoint_fixtures.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "get_port": { - "response": { - "name": "port_name", - "number": 12345 - } - }, - "get_empty": { - "response": { - "count": 0, - "items": [] - } - }, - "get_singleton": { - "response": { - "count": 1, - "items": [ - { - "type": "LOAD_BALANCER", - "hosts": ["host1", "host2", "host3"], - "ports": [ - { - "name": "port_name", - "number": 12345 - } - ] - } - ] - } - }, - "get_list": { - "response": { - "count": 2, - "items": [ - { - "type": "LOAD_BALANCER", - "hosts": ["host1", "host2"], - "ports": [ - { - "name": "port_name", - "number": 12345 - } - ] - }, - { - "type": "NODE", - "hosts": 54321, - "ports": [ - { - "name": "port_name", - "number": 12345 - }, - { - "name": "port_name_2", - "number": 54321 - }, - { - "name": "port_name_3", - "number": 15243 - } - ] - } - ] - } - } -} diff --git a/hsml/python/tests/fixtures/inference_logger_fixtures.json b/hsml/python/tests/fixtures/inference_logger_fixtures.json deleted file mode 100644 index ae22a8c44..000000000 --- a/hsml/python/tests/fixtures/inference_logger_fixtures.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "get_mode_all": { - "response": { - "inference_logging": "ALL" - }, - "init_args": { - "mode": "ALL" - } - }, - "get_mode_inputs": { - "response": { - "inference_logging": "MODEL_INPUTS" - }, - "init_args": { - "mode": "MODEL_INPUTS" - } - }, - "get_mode_outputs": { - "response": { - "inference_logging": "MODEL_OUTPUTS" - }, - "init_args": { - "mode": "MODEL_OUTPUTS" - } - }, - "get_mode_none": { - "response": { - "inference_logging": "NONE" - }, - "init_args": { - "mode": "NONE" - } - }, - "get_mode_all_with_kafka_topic": { - "response": { - "inference_logging": "ALL", - "kafka_topic_dto": { - "name": "topic" - } - }, - "init_args": { - "mode": "ALL", - "kafka_topic": { - "name": "topic" - } - } - }, - "get_mode_inputs_with_kafka_topic": { - "response": { - "inference_logging": "MODEL_INPUTS", - "kafka_topic_dto": { - "name": "topic" - } - }, - "init_args": { - "mode": "MODEL_INPUTS", - "kafka_topic": { - "name": "topic", - "num_replicas": 1, - "num_partitions": 2 - } - } - }, - "get_mode_outputs_with_kafka_topic": { - "response": { - "inference_logging": "MODEL_OUTPUTS", - "kafka_topic_dto": { - "name": "topic" - } - }, - "init_args": { - "mode": "MODEL_OUTPUTS", - "kafka_topic": { - "name": "topic", - "num_replicas": 1, - "num_partitions": 2 - } - } - }, - "get_mode_none_with_kafka_topic": { - "response": { - "inference_logging": "NONE", - "kafka_topic_dto": { - "name": "topic" - } - }, - "init_args": { - "mode": "NONE", - "kafka_topic": { - "name": "topic", - "num_replicas": 1, - "num_partitions": 2 - } - } - } -} diff --git a/hsml/python/tests/fixtures/kafka_topic_fixtures.json b/hsml/python/tests/fixtures/kafka_topic_fixtures.json deleted file mode 100644 index f69d1e567..000000000 --- a/hsml/python/tests/fixtures/kafka_topic_fixtures.json +++ /dev/null @@ -1,59 +0,0 @@ -{ - "get_existing_with_name_only": { - "response": { - "kafka_topic_dto": { - "name": "topic" - } - } - }, - "get_existing_with_name_and_config": { - "response": { - "kafka_topic_dto": { - "name": "topic", - "num_replicas": 1, - "num_partitions": 2 - } - } - }, - "get_existing_with_name_and_config_alternative": { - "response": { - "kafka_topic_dto": { - "name": "topic", - "num_of_replicas": 1, - "num_of_partitions": 2 - } - } - }, - "get_none": { - "response": { - "kafka_topic_dto": { - "name": "NONE" - } - } - }, - "get_none_with_config": { - "response": { - "kafka_topic_dto": { - "name": "NONE", - "num_replicas": 1, - "num_partitions": 2 - } - } - }, - "get_create_with_name_only": { - "response": { - "kafka_topic_dto": { - "name": "CREATE" - } - } - }, - "get_create_with_name_and_config": { - "response": { - "kafka_topic_dto": { - "name": "CREATE", - "num_replicas": 1, - "num_partitions": 2 - } - } - } -} diff --git a/hsml/python/tests/fixtures/model_fixtures.json b/hsml/python/tests/fixtures/model_fixtures.json deleted file mode 100644 index 79f31ed72..000000000 --- a/hsml/python/tests/fixtures/model_fixtures.json +++ /dev/null @@ -1,203 +0,0 @@ -{ - "get_base": { - "response": { - "count": 1, - "items": [ - { - "id": "0", - "name": "basemodel", - "version": 0, - "created": "created", - "creator": "creator", - "environment": "environment.yml", - "description": "description", - "experiment_id": 1, - "project_name": "myproject", - "experiment_project_name": "myexperimentproject", - "metrics": { "acc": 0.7 }, - "program": "program", - "user_full_name": "Full Name", - "model_schema": "model_schema.json", - "training_dataset": "training_dataset", - "input_example": "input_example.json", - "model_registry_id": 1, - "tags": [], - "href": "test_href" - } - ] - } - }, - "get_python": { - "response": { - "count": 1, - "items": [ - { - "id": "1", - "name": "pythonmodel", - "version": 0, - "created": "created", - "creator": "creator", - "environment": "environment.yml", - "description": "description", - "experiment_id": 1, - "project_name": "myproject", - "experiment_project_name": "myexperimentproject", - "metrics": { "acc": 0.7 }, - "program": "program", - "user_full_name": "Full Name", - "model_schema": "model_schema.json", - "training_dataset": "training_dataset", - "input_example": "input_example.json", - "model_registry_id": 1, - "tags": [], - "framework": "PYTHON", - "href": "test_href" - } - ] - } - }, - "get_sklearn": { - "response": { - "count": 1, - "items": [ - { - "id": "2", - "name": "sklearnmodel", - "version": 0, - "created": "created", - "creator": "creator", - "environment": "environment.yml", - "description": "description", - "experiment_id": 1, - "project_name": "myproject", - "experiment_project_name": "myexperimentproject", - "metrics": { "acc": 0.7 }, - "program": "program", - "user_full_name": "Full Name", - "model_schema": "model_schema.json", - "training_dataset": "training_dataset", - "input_example": "input_example.json", - "model_registry_id": 1, - "tags": [], - "framework": "SKLEARN", - "href": "test_href" - } - ] - } - }, - "get_tensorflow": { - "response": { - "count": 1, - "items": [ - { - "id": "3", - "name": "tensorflowmodel", - "version": 0, - "created": "created", - "creator": "creator", - "environment": "environment.yml", - "description": "description", - "experiment_id": 1, - "project_name": "myproject", - "experiment_project_name": "myexperimentproject", - "metrics": { "acc": 0.7 }, - "program": "program", - "user_full_name": "Full Name", - "model_schema": "model_schema.json", - "training_dataset": "training_dataset", - "input_example": "input_example.json", - "model_registry_id": 1, - "tags": [], - "framework": "TENSORFLOW", - "href": "test_href" - } - ] - } - }, - "get_torch": { - "response": { - "count": 1, - "items": [ - { - "id": "4", - "name": "torchmodel", - "version": 0, - "created": "created", - "creator": "creator", - "environment": "environment.yml", - "description": "description", - "experiment_id": 1, - "project_name": "myproject", - "experiment_project_name": "myexperimentproject", - "metrics": { "acc": 0.7 }, - "program": "program", - "user_full_name": "Full Name", - "model_schema": "model_schema.json", - "training_dataset": "training_dataset", - "input_example": "input_example.json", - "model_registry_id": 1, - "tags": [], - "framework": "TORCH", - "href": "test_href" - } - ] - } - }, - "get_list": { - "response": { - "count": 2, - "items": [ - { - "id": "1", - "name": "pythonmodel", - "version": 0, - "created": "created", - "creator": "creator", - "environment": "environment.yml", - "description": "description", - "experiment_id": 1, - "project_name": "myproject", - "experiment_project_name": "myexperimentproject", - "metrics": { "acc": 0.7 }, - "program": "program", - "user_full_name": "Full Name", - "model_schema": "model_schema.json", - "training_dataset": "training_dataset", - "input_example": "input_example.json", - "model_registry_id": 1, - "tags": [], - "framework": "PYTHON", - "href": "test_href" - }, - { - "id": "2", - "name": "pythonmodel", - "version": 0, - "created": "created", - "creator": "creator", - "environment": "environment.yml", - "description": "description", - "experiment_id": 2, - "project_name": "myproject", - "experiment_project_name": "myexperimentproject", - "metrics": { "acc": 0.7 }, - "program": "program", - "user_full_name": "Full Name", - "model_schema": "model_schema.json", - "training_dataset": "training_dataset", - "input_example": "input_example.json", - "model_registry_id": 1, - "tags": [], - "framework": "PYTHON", - "href": "test_href" - } - ] - } - }, - "get_empty": { - "response": { - "count": 0, - "items": [] - } - } -} diff --git a/hsml/python/tests/fixtures/model_fixtures.py b/hsml/python/tests/fixtures/model_fixtures.py deleted file mode 100644 index 32fe396de..000000000 --- a/hsml/python/tests/fixtures/model_fixtures.py +++ /dev/null @@ -1,125 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pandas as pd -import pytest -from hsml.model import Model as BaseModel -from hsml.python.model import Model as PythonModel -from hsml.sklearn.model import Model as SklearnModel -from hsml.tensorflow.model import Model as TensorflowModel -from hsml.torch.model import Model as TorchModel - - -MODEL_BASE_ID = 0 -MODEL_PYTHON_ID = 1 -MODEL_SKLEARN_ID = 2 -MODEL_TENSORFLOW_ID = 3 -MODEL_TORCH_ID = 4 - -MODEL_BASE_NAME = "basemodel" -MODEL_PYTHON_NAME = "pythonmodel" -MODEL_SKLEARN_NAME = "sklearnmodel" -MODEL_TENSORFLOW_NAME = "tensorflowmodel" -MODEL_TORCH_NAME = "torchmodel" - -# models - - -@pytest.fixture -def model_base(): - return BaseModel(MODEL_BASE_ID, MODEL_BASE_NAME) - - -@pytest.fixture -def model_python(): - return PythonModel(MODEL_PYTHON_ID, MODEL_PYTHON_NAME) - - -@pytest.fixture -def model_sklearn(): - return SklearnModel(MODEL_SKLEARN_ID, MODEL_SKLEARN_NAME) - - -@pytest.fixture -def model_tensorflow(): - return TensorflowModel(MODEL_TENSORFLOW_ID, MODEL_TENSORFLOW_NAME) - - -@pytest.fixture -def model_torch(): - return TorchModel(MODEL_TORCH_ID, MODEL_TORCH_NAME) - - -# input example - - -@pytest.fixture -def input_example_numpy(): - return np.array([1, 2, 3, 4]) - - -@pytest.fixture -def input_example_dict(): - return {"instances": [[1, 2, 3, 4]]} - - -@pytest.fixture -def input_example_dataframe_pandas_dataframe(): - return pd.DataFrame({"a": [1], "b": [2], "c": [3], "d": [4]}) - - -@pytest.fixture -def input_example_dataframe_pandas_dataframe_empty(): - return pd.DataFrame() - - -@pytest.fixture -def input_example_dataframe_pandas_series(): - return pd.Series([1, 2, 3, 4]) - - -@pytest.fixture -def input_example_dataframe_pandas_series_empty(): - return pd.Series() - - -@pytest.fixture -def input_example_dataframe_list(): - return [1, 2, 3, 4] - - -# metrics - - -@pytest.fixture -def model_metrics(): - return {"accuracy": 0.4, "rmse": 0.6} - - -@pytest.fixture -def model_metrics_wrong_type(): - return [0.4, 0.6] - - -@pytest.fixture -def model_metrics_wrong_metric_type(): - return {1: 0.4, 2: 0.6} - - -@pytest.fixture -def model_metrics_wrong_metric_value(): - return {"accuracy": "non-number", "rmse": 0.4} diff --git a/hsml/python/tests/fixtures/predictor_fixtures.json b/hsml/python/tests/fixtures/predictor_fixtures.json deleted file mode 100644 index 76adeebe3..000000000 --- a/hsml/python/tests/fixtures/predictor_fixtures.json +++ /dev/null @@ -1,427 +0,0 @@ -{ - "get_deployments_singleton": { - "response": { - "count": 1, - "items": [ - { - "id": 1, - "name": "test", - "description": "test_desc", - "created": "", - "creator": "", - "model_path": "test_model_path", - "model_name": "test_model_name", - "model_version": 1, - "model_framework": "PYTHON", - "model_server": "PYTHON", - "serving_tool": "KSERVE", - "api_protocol": "REST", - "artifact_version": 2, - "predictor": "predictor_file", - "transformer": "transformer_file", - "requested_instances": 1, - "requested_transformer_instances": 1, - "predictor_resources": { - "requested_instances": 1, - "requests": { "cores": 0.2, "memory": 16, "gpus": 1 }, - "limits": { "cores": 0.3, "memory": 17, "gpus": 2 } - }, - "transformer_resources": { - "requested_instances": 1, - "requests": { "cores": 0.2, "memory": 16, "gpus": 1 }, - "limits": { "cores": 0.3, "memory": 17, "gpus": 2 } - }, - "batching_configuration": { - "batching_enabled": true, - "max_batch_size": 1000, - "max_latency": 1000, - "timeout": 1000 - }, - "inference_logging": "ALL", - "kafka_topic_dto": { - "name": "topic" - }, - "environment_dto": { - "name": "misc-inference-pipeline" - } - } - ] - } - }, - "get_deployments_empty": { - "response": { - "count": 0, - "items": [] - } - }, - "get_deployments_list": { - "response": { - "count": 2, - "items": [ - { - "id": 1, - "name": "test", - "description": "test_desc", - "created": "", - "creator": "", - "model_path": "test_model_path", - "model_name": "test_model_name", - "model_version": 1, - "model_framework": "PYTHON", - "model_server": "PYTHON", - "serving_tool": "KSERVE", - "api_protocol": "REST", - "artifact_version": 2, - "predictor": "predictor_file", - "transformer": "transformer_file", - "requested_instances": 1, - "requested_transformer_instances": 1, - "predictor_resources": { - "requested_instances": 1, - "requests": { "cores": 0.2, "memory": 16, "gpus": 1 }, - "limits": { "cores": 0.3, "memory": 17, "gpus": 2 } - }, - "transformer_resources": { - "requested_instances": 1, - "requests": { "cores": 0.2, "memory": 16, "gpus": 1 }, - "limits": { "cores": 0.3, "memory": 17, "gpus": 2 } - }, - "batching_configuration": { - "batching_enabled": true, - "max_batch_size": 1000, - "max_latency": 1000, - "timeout": 1000 - }, - "inference_logging": "ALL", - "kafka_topic_dto": { - "name": "topic" - }, - "environment_dto": { - "name": "misc-inference-pipeline" - } - }, - { - "id": 2, - "name": "test_2", - "description": "test_desc_2", - "created": "", - "creator": "", - "model_path": "test_model_path", - "model_name": "test_model_name", - "model_version": 2, - "model_framework": "PYTHON", - "model_server": "PYTHON", - "serving_tool": "KSERVE", - "api_protocol": "REST", - "artifact_version": 3, - "predictor": "predictor_file", - "transformer": "transformer_file", - "requested_instances": 1, - "requested_transformer_instances": 1, - "predictor_resources": { - "requested_instances": 1, - "requests": { "cores": 0.2, "memory": 16, "gpus": 1 }, - "limits": { "cores": 0.3, "memory": 17, "gpus": 2 } - }, - "transformer_resources": { - "requested_instances": 1, - "requests": { "cores": 0.2, "memory": 16, "gpus": 1 }, - "limits": { "cores": 0.3, "memory": 17, "gpus": 2 } - }, - "batching_configuration": { - "batching_enabled": true, - "max_batch_size": 1000, - "max_latency": 1000, - "timeout": 1000 - }, - "inference_logging": "ALL", - "kafka_topic_dto": { - "name": "topic" - }, - "environment_dto": { - "name": "misc-inference-pipeline" - } - } - ] - } - }, - "get_deployment_tf_kserve_rest": { - "response": { - "id": 1, - "name": "test", - "description": "test_desc", - "created": "", - "creator": "", - "model_path": "test_model_path", - "model_name": "test_model_name", - "model_version": 1, - "model_framework": "TENSORFLOW", - "model_server": "TENSORFLOW_SERVING", - "serving_tool": "KSERVE", - "api_protocol": "REST", - "artifact_version": 2, - "requested_instances": 1, - "predictor_resources": { - "requested_instances": 1, - "requests": { "cores": 0.2, "memory": 16, "gpus": 1 }, - "limits": { "cores": 0.3, "memory": 17, "gpus": 2 } - }, - "inference_logging": "ALL", - "kafka_topic_dto": { - "name": "topic" - }, - "environment_dto": { - "name": "tensorflow-inference-pipeline" - } - } - }, - "get_deployment_tf_kserve_rest_trans": { - "response": { - "id": 1, - "name": "test", - "description": "test_desc", - "created": "", - "creator": "", - "model_path": "test_model_path", - "model_name": "test_model_name", - "model_version": 1, - "model_framework": "TENSORFLOW", - "model_server": "TENSORFLOW_SERVING", - "serving_tool": "KSERVE", - "api_protocol": "REST", - "artifact_version": 2, - "transformer": "transformer_file", - "requested_instances": 1, - "requested_transformer_instances": 1, - "predictor_resources": { - "requested_instances": 1, - "requests": { "cores": 0.2, "memory": 16, "gpus": 1 }, - "limits": { "cores": 0.3, "memory": 17, "gpus": 2 } - }, - "transformer_resources": { - "requested_instances": 1, - "requests": { "cores": 0.2, "memory": 16, "gpus": 1 }, - "limits": { "cores": 0.3, "memory": 17, "gpus": 2 } - }, - "batching_configuration": { - "batching_enabled": true, - "max_batch_size": 1000, - "max_latency": 1000, - "timeout": 1000 - }, - "inference_logging": "ALL", - "kafka_topic_dto": { - "name": "topic" - }, - "environment_dto": { - "name": "tensorflow-inference-pipeline" - } - } - }, - - "get_deployment_py_kserve_rest_pred": { - "response": { - "id": 1, - "name": "test", - "description": "test_desc", - "created": "", - "creator": "", - "model_path": "test_model_path", - "model_name": "test_model_name", - "model_version": 1, - "model_framework": "PYTHON", - "model_server": "PYTHON", - "serving_tool": "KSERVE", - "api_protocol": "REST", - "artifact_version": 2, - "predictor": "predictor_file", - "requested_instances": 1, - "predictor_resources": { - "requested_instances": 1, - "requests": { "cores": 0.2, "memory": 16, "gpus": 1 }, - "limits": { "cores": 0.3, "memory": 17, "gpus": 2 } - }, - "batching_configuration": { - "batching_enabled": true, - "max_batch_size": 1000, - "max_latency": 1000, - "timeout": 1000 - }, - "inference_logging": "ALL", - "kafka_topic_dto": { - "name": "topic" - }, - "environment_dto": { - "name": "misc-inference-pipeline" - } - } - }, - - "get_deployment_py_kserve_rest_pred_trans": { - "response": { - "id": 1, - "name": "test", - "description": "test_desc", - "created": "", - "creator": "", - "model_path": "test_model_path", - "model_name": "test_model_name", - "model_version": 1, - "model_framework": "PYTHON", - "model_server": "PYTHON", - "serving_tool": "KSERVE", - "api_protocol": "REST", - "artifact_version": 2, - "predictor": "predictor_file", - "transformer": "transformer_file", - "requested_instances": 1, - "requested_transformer_instances": 1, - "predictor_resources": { - "requested_instances": 1, - "requests": { "cores": 0.2, "memory": 16, "gpus": 1 }, - "limits": { "cores": 0.3, "memory": 17, "gpus": 2 } - }, - "transformer_resources": { - "requested_instances": 1, - "requests": { "cores": 0.2, "memory": 16, "gpus": 1 }, - "limits": { "cores": 0.3, "memory": 17, "gpus": 2 } - }, - "inference_logging": "ALL", - "batching_configuration": { - "batching_enabled": true, - "max_batch_size": 1000, - "max_latency": 1000, - "timeout": 1000 - }, - "kafka_topic_dto": { - "name": "topic" - }, - "environment_dto": { - "name": "misc-inference-pipeline" - } - } - }, - - "get_deployment_py_kserve_grpc_pred": { - "response": { - "id": 1, - "name": "test", - "description": "test_desc", - "created": "", - "creator": "", - "model_path": "test_model_path", - "model_name": "test_model_name", - "model_version": 1, - "model_framework": "PYTHON", - "model_server": "PYTHON", - "serving_tool": "KSERVE", - "api_protocol": "GRPC", - "artifact_version": 2, - "predictor": "predictor_file", - "requested_instances": 1, - "predictor_resources": { - "requested_instances": 1, - "requests": { "cores": 0.2, "memory": 16, "gpus": 1 }, - "limits": { "cores": 0.3, "memory": 17, "gpus": 2 } - }, - "inference_logging": "ALL", - "batching_configuration": { - "batching_enabled": true, - "max_batch_size": 1000, - "max_latency": 1000, - "timeout": 1000 - }, - "kafka_topic_dto": { - "name": "topic" - }, - "environment_dto": { - "name": "misc-inference-pipeline" - } - } - }, - - "get_deployment_py_kserve_grpc_pred_trans": { - "response": { - "id": 1, - "name": "test", - "description": "test_desc", - "created": "", - "creator": "", - "model_path": "test_model_path", - "model_name": "test_model_name", - "model_version": 1, - "model_framework": "PYTHON", - "model_server": "PYTHON", - "serving_tool": "KSERVE", - "api_protocol": "GRPC", - "artifact_version": 2, - "predictor": "predictor_file", - "transformer": "transformer_file", - "requested_instances": 1, - "requested_transformer_instances": 1, - "predictor_resources": { - "requested_instances": 1, - "requests": { "cores": 0.2, "memory": 16, "gpus": 1 }, - "limits": { "cores": 0.3, "memory": 17, "gpus": 2 } - }, - "transformer_resources": { - "requested_instances": 1, - "requests": { "cores": 0.2, "memory": 16, "gpus": 1 }, - "limits": { "cores": 0.3, "memory": 17, "gpus": 2 } - }, - "inference_logging": "ALL", - "batching_configuration": { - "batching_enabled": true, - "max_batch_size": 1000, - "max_latency": 1000, - "timeout": 1000 - }, - "kafka_topic_dto": { - "name": "topic" - }, - "environment_dto": { - "name": "misc-inference-pipeline" - } - } - }, - "get_deployment_predictor_state": { - "response": { - "available_instances": 1, - "available_transformer_instances": 1, - "hopsworks_inference_path": "hopsworks/api/path", - "model_server_inference_path": "model-server/path", - "internal_port": 1234, - "revision": 1234, - "deployed": "1234", - "condition": { - "type": "TYPE", - "status": true, - "reason": "REASON" - }, - "status": "RUNNING" - } - }, - "get_deployment_component_logs_empty": { - "response": [] - }, - "get_deployment_component_logs_single": { - "response": [ - { - "instance_name": "instance_name", - "content": "content" - } - ] - }, - "get_deployment_component_logs_list": { - "response": [ - { - "instance_name": "instance_name_2", - "content": "content_2" - }, - { - "instance_name": "instance_name_2", - "content": "content_2" - } - ] - } -} diff --git a/hsml/python/tests/fixtures/resources_fixtures.json b/hsml/python/tests/fixtures/resources_fixtures.json deleted file mode 100644 index 874daf0bf..000000000 --- a/hsml/python/tests/fixtures/resources_fixtures.json +++ /dev/null @@ -1,155 +0,0 @@ -{ - "get_only_cores": { - "response": { - "cores": 0.2 - } - }, - "get_only_memory": { - "response": { - "memory": 16 - } - }, - "get_only_gpus": { - "response": { - "gpus": 1 - } - }, - - "get_cores_and_memory": { - "response": { - "cores": 0.2, - "memory": 16 - } - }, - - "get_cores_and_gpus": { - "response": { - "cores": 0.2, - "gpus": 1 - } - }, - - "get_memory_and_gpus": { - "response": { - "memory": 16, - "gpus": 1 - } - }, - - "get_cores_memory_and_gpus": { - "response": { - "cores": 0.2, - "memory": 16, - "gpus": 1 - } - }, - "get_component_resources_num_instances": { - "response": { - "num_instances": 1 - } - }, - "get_component_resources_requests": { - "response": { - "requests": { - "cores": 0.2, - "memory": 16, - "gpus": 1 - } - } - }, - "get_component_resources_limits": { - "response": { - "limits": { - "cores": 0.3, - "memory": 17, - "gpus": 2 - } - } - }, - "get_component_resources_num_instances_and_requests": { - "response": { - "num_instances": 1, - "requests": { - "cores": 0.2, - "memory": 16, - "gpus": 1 - } - } - }, - "get_component_resources_num_instances_and_limits": { - "response": { - "num_instances": 1, - "limits": { - "cores": 0.3, - "memory": 17, - "gpus": 2 - } - } - }, - "get_component_resources_num_instances_requests_and_limits": { - "response": { - "num_instances": 1, - "requests": { - "cores": 0.2, - "memory": 16, - "gpus": 1 - }, - "limits": { - "cores": 0.3, - "memory": 17, - "gpus": 2 - } - } - }, - "get_component_resources_requested_instances_and_predictor_resources": { - "response": { - "requested_instances": 1, - "predictor_resources": { - "requests": { - "cores": 0.2, - "memory": 16, - "gpus": 1 - }, - "limits": { - "cores": 0.3, - "memory": 17, - "gpus": 2 - } - } - } - }, - "get_component_resources_requested_instances_and_predictor_resources_alternative": { - "response": { - "num_instances": 1, - "resources": { - "requests": { - "cores": 0.2, - "memory": 16, - "gpus": 1 - }, - "limits": { - "cores": 0.3, - "memory": 17, - "gpus": 2 - } - } - } - }, - "get_component_resources_requested_instances_and_transformer_resources": { - "response": { - "requested_transformer_instances": 1, - "transformer_resources": { - "requests": { - "cores": 0.2, - "memory": 16, - "gpus": 1 - }, - "limits": { - "cores": 0.3, - "memory": 17, - "gpus": 2 - } - } - } - } -} diff --git a/hsml/python/tests/fixtures/tag_fixtures.json b/hsml/python/tests/fixtures/tag_fixtures.json deleted file mode 100644 index a2516562f..000000000 --- a/hsml/python/tests/fixtures/tag_fixtures.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "get": { - "response": { - "count": 1, - "items": [ - { - "name": "test_name", - "value": "test_value", - "schema": "test_schema", - "href": "test_href", - "expand": "test_expand", - "items": [], - "count": 0, - "type": "tagDTO" - } - ] - } - }, - "get_empty": { - "response": { - "count": 0, - "items": [] - } - } -} diff --git a/hsml/python/tests/fixtures/transformer_fixtures.json b/hsml/python/tests/fixtures/transformer_fixtures.json deleted file mode 100644 index 269e525de..000000000 --- a/hsml/python/tests/fixtures/transformer_fixtures.json +++ /dev/null @@ -1,63 +0,0 @@ -{ - "get_deployment_with_transformer": { - "response": { - "name": "test", - "transformer": "transformer_file_name", - "transformer_resources": { - "requested_transformer_instances": 1, - "requests": { - "cores": 0.2, - "memory": 16, - "gpus": 1 - }, - "limits": { - "cores": 0.3, - "memory": 17, - "gpus": 2 - } - } - } - }, - "get_deployment_without_transformer": { - "response": { - "name": "test", - "predictor": "predictor_file_name", - "predictor_resources": { - "num_instances": 1, - "requests": { - "cores": 0.2, - "memory": 16, - "gpus": 1 - }, - "limits": { - "cores": 0.3, - "memory": 17, - "gpus": 2 - } - } - } - }, - "get_transformer_with_resources": { - "response": { - "script_file": "transformer_file_name", - "resources": { - "num_instances": 1, - "requests": { - "cores": 0.2, - "memory": 16, - "gpus": 1 - }, - "limits": { - "cores": 0.3, - "memory": 17, - "gpus": 2 - } - } - } - }, - "get_transformer_without_resources": { - "response": { - "script_file": "transformer_file_name" - } - } -} diff --git a/hsml/python/tests/test_connection.py b/hsml/python/tests/test_connection.py deleted file mode 100644 index c8d100279..000000000 --- a/hsml/python/tests/test_connection.py +++ /dev/null @@ -1,173 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from hsml.connection import ( - CONNECTION_SAAS_HOSTNAME, - HOPSWORKS_PORT_DEFAULT, - HOSTNAME_VERIFICATION_DEFAULT, - Connection, -) -from hsml.core import model_api, model_registry_api, model_serving_api - - -class TestConnection: - # constants - - def test_constants(self): - # The purpose of this test is to ensure that (1) we don't make undesired changes to contant values - # that might break things somewhere else, and (2) we remember to update the pytests accordingly by - # adding / removing / updating tests, if necessary. - assert CONNECTION_SAAS_HOSTNAME == "c.app.hopsworks.ai" - assert HOPSWORKS_PORT_DEFAULT == 443 - assert HOSTNAME_VERIFICATION_DEFAULT - - # constructor - - def test_constructor_default(self, mocker): - # Arrange - class MockConnection: - pass - - mock_connection = MockConnection() - mock_connection.connect = mocker.MagicMock() - mock_connection.init = Connection.__init__ - mock_model_api_init = mocker.patch( - "hsml.core.model_api.ModelApi.__init__", return_value=None - ) - mock_model_registry_api = mocker.patch( - "hsml.core.model_registry_api.ModelRegistryApi.__init__", return_value=None - ) - mock_model_serving_api = mocker.patch( - "hsml.core.model_serving_api.ModelServingApi.__init__", return_value=None - ) - - # Act - mock_connection.init(mock_connection) - - # Assert - assert mock_connection._host is None - assert mock_connection._port == HOPSWORKS_PORT_DEFAULT - assert mock_connection._project is None - assert mock_connection._hostname_verification == HOSTNAME_VERIFICATION_DEFAULT - assert mock_connection._trust_store_path is None - assert mock_connection._api_key_file is None - assert mock_connection._api_key_value is None - assert isinstance(mock_connection._model_api, model_api.ModelApi) - assert isinstance( - mock_connection._model_registry_api, model_registry_api.ModelRegistryApi - ) - assert isinstance( - mock_connection._model_serving_api, model_serving_api.ModelServingApi - ) - assert not mock_connection._connected - mock_model_api_init.assert_called_once() - mock_model_registry_api.assert_called_once() - mock_model_serving_api.assert_called_once() - mock_connection.connect.assert_called_once() - - def test_constructor(self, mocker): - # Arrange - class MockConnection: - pass - - mock_connection = MockConnection() - mock_connection.connect = mocker.MagicMock() - mock_connection.init = Connection.__init__ - mock_model_api_init = mocker.patch( - "hsml.core.model_api.ModelApi.__init__", return_value=None - ) - mock_model_registry_api = mocker.patch( - "hsml.core.model_registry_api.ModelRegistryApi.__init__", return_value=None - ) - mock_model_serving_api = mocker.patch( - "hsml.core.model_serving_api.ModelServingApi.__init__", return_value=None - ) - - # Act - mock_connection.init( - mock_connection, - host="host", - port=1234, - project="project", - hostname_verification=False, - trust_store_path="ts_path", - api_key_file="ak_file", - api_key_value="ak_value", - ) - - # Assert - assert mock_connection._host == "host" - assert mock_connection._port == 1234 - assert mock_connection._project == "project" - assert not mock_connection._hostname_verification - assert mock_connection._trust_store_path == "ts_path" - assert mock_connection._api_key_file == "ak_file" - assert mock_connection._api_key_value == "ak_value" - assert isinstance(mock_connection._model_api, model_api.ModelApi) - assert isinstance( - mock_connection._model_registry_api, model_registry_api.ModelRegistryApi - ) - assert isinstance( - mock_connection._model_serving_api, model_serving_api.ModelServingApi - ) - assert not mock_connection._connected - mock_model_api_init.assert_called_once() - mock_model_registry_api.assert_called_once() - mock_model_serving_api.assert_called_once() - mock_connection.connect.assert_called_once() - - # handlers - - def test_get_model_registry(self, mocker): - # Arrange - mock_connection = mocker.MagicMock() - mock_connection.get_model_registry = Connection.get_model_registry - mock_connection._model_registry_api = mocker.MagicMock() - mock_connection._model_registry_api.get = mocker.MagicMock(return_value="mr") - - # Act - mr = mock_connection.get_model_registry(mock_connection) - - # Assert - assert mr == "mr" - mock_connection._model_registry_api.get.assert_called_once() - - def test_get_model_serving(self, mocker): - # Arrange - mock_connection = mocker.MagicMock() - mock_connection.get_model_serving = Connection.get_model_serving - mock_connection._model_serving_api = mocker.MagicMock() - mock_connection._model_serving_api.get = mocker.MagicMock(return_value="ms") - - # Act - ms = mock_connection.get_model_serving(mock_connection) - - # Assert - assert ms == "ms" - mock_connection._model_serving_api.get.assert_called_once() - - # connection - - # TODO: Add tests for connection-related methods - - def test_connect(self, mocker): - pass - - def test_close(self, mocker): - pass - - def test_connection(self, mocker): - pass diff --git a/hsml/python/tests/test_constants.py b/hsml/python/tests/test_constants.py deleted file mode 100644 index 7a923d8d8..000000000 --- a/hsml/python/tests/test_constants.py +++ /dev/null @@ -1,383 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import inspect - -from hsml import constants - - -class TestConstants: - # NOTE - # This class contains validations for constants and enum values. - # The purpose of this class is to ensure that (1) we don't make undesired changes to contant values - # that might break things somewhere else, and (2) we remember to update the pytests accordingly by - # adding / removing / updating tests. - - # This class includes the following validations: - # - Number of possible values of an Enum (to check for added/removed values) - # - Exact values of contants (to check for modified values) - - # MODEL - - def test_model_framework_constants(self): - # Arrange - model_frameworks = { - "FRAMEWORK_TENSORFLOW": "TENSORFLOW", - "FRAMEWORK_TORCH": "TORCH", - "FRAMEWORK_PYTHON": "PYTHON", - "FRAMEWORK_SKLEARN": "SKLEARN", - } - - # Assert - self._check_added_modified_or_removed_values( - constants.MODEL, - num_values=len(model_frameworks), - expected_constants=model_frameworks, - prefix="FRAMEWORK", - ) - - # MODEL_REGISTRY - - def test_model_registry_constants(self): - # Arrange - hopsfs_mount_prefix = {"HOPSFS_MOUNT_PREFIX": "/hopsfs/"} - - # Assert - self._check_added_modified_or_removed_values( - constants.MODEL_REGISTRY, - num_values=len(hopsfs_mount_prefix), - expected_constants=hopsfs_mount_prefix, - ) - - # MODEL_SERVING - - def test_model_serving_constants(self): - # Arrange - models_dataset = {"MODELS_DATASET": "Models"} - - # Assert - self._check_added_modified_or_removed_values( - constants.MODEL_SERVING, - num_values=len(models_dataset), - expected_constants=models_dataset, - ) - - # ARTIFACT_VERSION - - def test_artifact_version_constants(self): - # Arrange - artifact_versions = {"CREATE": "CREATE"} - - # Assert - self._check_added_modified_or_removed_values( - constants.ARTIFACT_VERSION, - num_values=len(artifact_versions), - expected_constants=artifact_versions, - ) - - # RESOURCES - - def test_resources_min_constants(self): - # Arrange - min_resources = { - "MIN_NUM_INSTANCES": 1, - "MIN_CORES": 0.2, - "MIN_MEMORY": 32, - "MIN_GPUS": 0, - } - - # Assert - self._check_added_modified_or_removed_values( - constants.RESOURCES, - num_values=len(min_resources), - expected_constants=min_resources, - prefix="MIN", - ) - - def test_resources_max_constants(self): - # Arrange - max_resources = { - "MAX_CORES": 2, - "MAX_MEMORY": 1024, - "MAX_GPUS": 0, - } - - # Assert - self._check_added_modified_or_removed_values( - constants.RESOURCES, - num_values=len(max_resources), - expected_constants=max_resources, - prefix="MAX", - ) - - # KAFKA_TOPIC - - def test_kafka_topic_names_constants(self): - # Arrange - kafka_topic_cons = { - "NONE": "NONE", - "CREATE": "CREATE", - "NUM_REPLICAS": 1, - "NUM_PARTITIONS": 1, - } - - # Assert - self._check_added_modified_or_removed_values( - constants.KAFKA_TOPIC, - num_values=len(kafka_topic_cons), - expected_constants=kafka_topic_cons, - ) - - # INFERENCE_LOGGER - - def test_inference_logger_constants(self): - # Arrange - if_modes = { - "MODE_NONE": "NONE", - "MODE_ALL": "ALL", - "MODE_MODEL_INPUTS": "MODEL_INPUTS", - "MODE_PREDICTIONS": "PREDICTIONS", - } - - # Assert - self._check_added_modified_or_removed_values( - constants.INFERENCE_LOGGER, - num_values=len(if_modes), - expected_constants=if_modes, - prefix="MODE", - ) - - # INFERENCE_BATCHER - - def test_inference_batcher_constants(self): - # Arrange - if_batcher = {"ENABLED": False} - - # Assert - self._check_added_modified_or_removed_values( - constants.INFERENCE_BATCHER, - num_values=len(if_batcher), - expected_constants=if_batcher, - ) - - # DEPLOYMENT - - def test_deployment_constants(self): - # Arrange - depl_actions = {"ACTION_START": "START", "ACTION_STOP": "STOP"} - - # Assert - self._check_added_modified_or_removed_values( - constants.DEPLOYMENT, - num_values=len(depl_actions), - expected_constants=depl_actions, - prefix="ACTION", - ) - - # PREDICTOR - - def test_predictor_model_server_constants(self): - # Arrange - model_servers = { - "MODEL_SERVER_PYTHON": "PYTHON", - "MODEL_SERVER_TF_SERVING": "TENSORFLOW_SERVING", - } - - # Assert - self._check_added_modified_or_removed_values( - constants.PREDICTOR, - num_values=len(model_servers), - expected_constants=model_servers, - prefix="MODEL_SERVER", - ) - - def test_predictor_serving_tool_constants(self): - # Arrange - serving_tools = { - "SERVING_TOOL_DEFAULT": "DEFAULT", - "SERVING_TOOL_KSERVE": "KSERVE", - } - - # Assert - self._check_added_modified_or_removed_values( - constants.PREDICTOR, - num_values=len(serving_tools), - expected_constants=serving_tools, - prefix="SERVING_TOOL", - ) - - # PREDICTOR_STATE - - def test_predictor_state_status_constants(self): - # Arrange - predictor_states = { - "STATUS_CREATING": "Creating", - "STATUS_CREATED": "Created", - "STATUS_STARTING": "Starting", - "STATUS_FAILED": "Failed", - "STATUS_RUNNING": "Running", - "STATUS_IDLE": "Idle", - "STATUS_UPDATING": "Updating", - "STATUS_STOPPING": "Stopping", - "STATUS_STOPPED": "Stopped", - } - - # Assert - self._check_added_modified_or_removed_values( - constants.PREDICTOR_STATE, - num_values=len(predictor_states), - expected_constants=predictor_states, - prefix="STATUS", - ) - - def test_predictor_state_condition_constants(self): - # Arrange - predictor_states = { - "CONDITION_TYPE_STOPPED": "STOPPED", - "CONDITION_TYPE_SCHEDULED": "SCHEDULED", - "CONDITION_TYPE_INITIALIZED": "INITIALIZED", - "CONDITION_TYPE_STARTED": "STARTED", - "CONDITION_TYPE_READY": "READY", - } - - # Assert - self._check_added_modified_or_removed_values( - constants.PREDICTOR_STATE, - num_values=len(predictor_states), - expected_constants=predictor_states, - prefix="CONDITION", - ) - - # INFERENCE_ENDPOINTS - - def test_inference_endpoints_type_constants(self): - # Arrange - ie_types = { - "ENDPOINT_TYPE_NODE": "NODE", - "ENDPOINT_TYPE_KUBE_CLUSTER": "KUBE_CLUSTER", - "ENDPOINT_TYPE_LOAD_BALANCER": "LOAD_BALANCER", - } - - # Assert - self._check_added_modified_or_removed_values( - constants.INFERENCE_ENDPOINTS, - num_values=len(ie_types), - expected_constants=ie_types, - prefix="ENDPOINT_TYPE", - ) - - def test_inference_endpoints_port_constants(self): - # Arrange - ie_ports = { - "PORT_NAME_HTTP": "HTTP", - "PORT_NAME_HTTPS": "HTTPS", - "PORT_NAME_STATUS_PORT": "STATUS", - "PORT_NAME_TLS": "TLS", - } - - # Assert - self._check_added_modified_or_removed_values( - constants.INFERENCE_ENDPOINTS, - num_values=len(ie_ports), - expected_constants=ie_ports, - prefix="PORT_NAME", - ) - - def test_inference_endpoints_api_protocol_constants(self): - # Arrange - ie_api_protocols = { - "API_PROTOCOL_REST": "REST", - "API_PROTOCOL_GRPC": "GRPC", - } - - # Assert - self._check_added_modified_or_removed_values( - constants.INFERENCE_ENDPOINTS, - num_values=len(ie_api_protocols), - expected_constants=ie_api_protocols, - prefix="API_PROTOCOL", - ) - - # DEPLOYABLE_COMPONENT - - def test_inference_endpoints_deployable_component_constants(self): - # Arrange - depl_components = { - "PREDICTOR": "predictor", - "TRANSFORMER": "transformer", - } - - # Assert - self._check_added_modified_or_removed_values( - constants.DEPLOYABLE_COMPONENT, - num_values=len(depl_components), - expected_constants=depl_components, - ) - - # Auxiliary methods - - def _check_added_modified_or_removed_values( - self, cls, num_values, expected_constants=None, prefix=None - ): - cname = cls.__name__ + ("." + prefix if prefix is not None else "") - const_dict = self._get_contants_name_value_dict(cls, prefix=prefix) - # exact constants - if expected_constants is not None: - # constant names - added_cnames = const_dict.keys() - expected_constants.keys() - removed_cnames = expected_constants.keys() - const_dict.keys() - - assert len(added_cnames) == 0, ( - f"One or more constants were added under {cname} with names {added_cnames}. " - + "If it was intentional, please add/remove/update tests accordingly (not only in this file, " - + "but wherever it corresponds)." - ) - - assert len(removed_cnames) == 0, ( - f"One or more constants were removed under {cname} with names {removed_cnames}. " - + "If it was intentional, please add/remove/update tests accordingly (not only in this file, " - + "but wherever it corresponds)." - ) - - assert const_dict.keys() == expected_constants.keys(), ( - f"One or more constants under {cname} were modified from {removed_cnames} to {added_cnames}. " - + "If it was intentional, please add/remove/update tests accordingly (not only in this file, " - + "but wherever it corresponds)." - ) - - # constant values - for cname, cvalue in expected_constants.items(): - full_cname = f"{cls.__name__}.{cname}" - assert cvalue == const_dict[cname], ( - f"The constant {full_cname} was modified from {cvalue} to {const_dict[cname]}. " - + "If it was intentional, please add/remove/update tests accordingly (not only in this file, " - + "but wherever it corresponds)." - ) - else: - # number of values - assert len(const_dict) == num_values, ( - f"A constant was added/removed under {cname}. If it was intentional, please " - + "add/remove/update tests accordingly (not only in this file, but wherever it corresponds)." - ) - - def _get_contants_name_value_dict(self, cls, prefix=None) -> dict: - const_dict = dict() - for m in inspect.getmembers(cls, lambda m: not (inspect.isroutine(m))): - n = m[0] # name - if (prefix is not None and n.startswith(prefix)) or ( - prefix is None and not (n.startswith("__") and n.endswith("__")) - ): - const_dict[n] = m[1] # value - return const_dict diff --git a/hsml/python/tests/test_decorators.py b/hsml/python/tests/test_decorators.py deleted file mode 100644 index 7d17e18ea..000000000 --- a/hsml/python/tests/test_decorators.py +++ /dev/null @@ -1,82 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest -from hsml.decorators import ( - HopsworksConnectionError, - NoHopsworksConnectionError, - connected, - not_connected, -) - - -class TestDecorators: - # test not connected - - def test_not_connected_valid(self, mocker): - # Arrange - mock_instance = mocker.MagicMock() - mock_instance._connected = False - - @not_connected - def assert_not_connected(inst, arg, key_arg): - assert not inst._connected - assert arg == "arg" - assert key_arg == "key_arg" - - # Act - assert_not_connected(mock_instance, "arg", key_arg="key_arg") - - def test_not_connected_invalid(self, mocker): - # Arrange - mock_instance = mocker.MagicMock() - mock_instance._connected = True - - @not_connected - def assert_not_connected(inst, arg, key_arg): - pass - - # Act - with pytest.raises(HopsworksConnectionError): - assert_not_connected(mock_instance, "arg", key_arg="key_arg") - - # test connected - - def test_connected_valid(self, mocker): - # Arrange - mock_instance = mocker.MagicMock() - mock_instance._connected = True - - @connected - def assert_connected(inst, arg, key_arg): - assert inst._connected - assert arg == "arg" - assert key_arg == "key_arg" - - # Act - assert_connected(mock_instance, "arg", key_arg="key_arg") - - def test_connected_invalid(self, mocker): - # Arrange - mock_instance = mocker.MagicMock() - mock_instance._connected = False - - @connected - def assert_connected(inst, arg, key_arg): - pass - - # Act - with pytest.raises(NoHopsworksConnectionError): - assert_connected(mock_instance, "arg", key_arg="key_arg") diff --git a/hsml/python/tests/test_deployable_component.py b/hsml/python/tests/test_deployable_component.py deleted file mode 100644 index 97ec67018..000000000 --- a/hsml/python/tests/test_deployable_component.py +++ /dev/null @@ -1,106 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from hsml import deployable_component, inference_batcher - - -class TestDeployableComponent: - # from response json - - def test_from_response_json(self, mocker): - # Arrange - json = {"test": "test"} - mock_from_json = mocker.patch( - "hsml.deployable_component.DeployableComponent.from_json", - return_value="from_json_result", - ) - - # Act - result = deployable_component.DeployableComponent.from_response_json(json) - - # Assert - assert result == "from_json_result" - mock_from_json.assert_called_once_with(json) - - # constructor - - def test_constructor_default(self, mocker): - # Arrange - mock_get_obj_from_json = mocker.patch( - "hsml.util.get_obj_from_json", return_value=None - ) - mock_ib_init = mocker.patch( - "hsml.inference_batcher.InferenceBatcher.__init__", return_value=None - ) - - class DeployableComponentChild(deployable_component.DeployableComponent): - def from_json(): - pass - - def update_from_response_json(): - pass - - def to_dict(): - pass - - # Act - dc = DeployableComponentChild() - - # Assert - assert dc.script_file is None - assert dc.resources is None - mock_get_obj_from_json.assert_called_once_with( - None, inference_batcher.InferenceBatcher - ) - mock_ib_init.assert_called_once() - - def test_constructor_with_params(self, mocker): - # Arrange - script_file = "script_file" - resources = {} - inf_batcher = inference_batcher.InferenceBatcher() - mock_get_obj_from_json = mocker.patch( - "hsml.util.get_obj_from_json", return_value=inf_batcher - ) - mock_ib_init = mocker.patch( - "hsml.inference_batcher.InferenceBatcher.__init__", return_value=None - ) - - class DeployableComponentChild(deployable_component.DeployableComponent): - def from_json(): - pass - - def update_from_response_json(): - pass - - def to_dict(): - pass - - # Act - dc = DeployableComponentChild( - script_file=script_file, - resources=resources, - inference_batcher=inf_batcher, - ) - - # Assert - assert dc.script_file == script_file - assert dc.resources == resources - mock_get_obj_from_json.assert_called_once_with( - inf_batcher, inference_batcher.InferenceBatcher - ) - assert dc.inference_batcher == inf_batcher - mock_ib_init.assert_not_called() diff --git a/hsml/python/tests/test_deployable_component_logs.py b/hsml/python/tests/test_deployable_component_logs.py deleted file mode 100644 index 3c61aabb0..000000000 --- a/hsml/python/tests/test_deployable_component_logs.py +++ /dev/null @@ -1,110 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import datetime - -import humps -from hsml import deployable_component_logs - - -class TestDeployableComponentLogs: - # from response json - - def test_from_response_json(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["predictor"]["get_deployment_component_logs_single"][ - "response" - ] - json_camelized = humps.camelize(json) - mocker_from_json = mocker.patch( - "hsml.deployable_component_logs.DeployableComponentLogs.from_json", - return_value=None, - ) - - # Act - dc_logs = deployable_component_logs.DeployableComponentLogs.from_response_json( - json_camelized - ) - - # Assert - assert isinstance(dc_logs, list) - assert len(dc_logs) == 1 - mocker_from_json.assert_called_once_with(json[0]) - - def test_from_response_json_list(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["predictor"]["get_deployment_component_logs_list"][ - "response" - ] - json_camelized = humps.camelize(json) - mocker_from_json = mocker.patch( - "hsml.deployable_component_logs.DeployableComponentLogs.from_json", - return_value=None, - ) - - # Act - dc_logs = deployable_component_logs.DeployableComponentLogs.from_response_json( - json_camelized - ) - - # Assert - assert isinstance(dc_logs, list) - assert len(dc_logs) == len(json_camelized) - assert mocker_from_json.call_count == len(json_camelized) - - def test_from_response_json_empty(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["predictor"]["get_deployment_component_logs_empty"][ - "response" - ] - json_camelized = humps.camelize(json) - mocker_from_json = mocker.patch( - "hsml.deployable_component_logs.DeployableComponentLogs.from_json", - return_value=None, - ) - - # Act - dc_logs = deployable_component_logs.DeployableComponentLogs.from_response_json( - json_camelized - ) - - # Assert - assert isinstance(dc_logs, list) - assert len(dc_logs) == 0 - mocker_from_json.assert_not_called() - - # constructor - - def test_constructor(self, backend_fixtures): - # Arrange - json = backend_fixtures["predictor"]["get_deployment_component_logs_single"][ - "response" - ] - instance_name = json[0]["instance_name"] - content = json[0]["content"] - now = datetime.datetime.now() - - # Act - dcl = deployable_component_logs.DeployableComponentLogs( - instance_name=instance_name, content=content - ) - - # Assert - assert dcl.instance_name == instance_name - assert dcl.content == content - assert (dcl.created_at >= now) and ( - dcl.created_at < (now + datetime.timedelta(seconds=1)) - ) diff --git a/hsml/python/tests/test_deployment.py b/hsml/python/tests/test_deployment.py deleted file mode 100644 index 7e3d7e4a5..000000000 --- a/hsml/python/tests/test_deployment.py +++ /dev/null @@ -1,795 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import pytest -from hsml import deployment, predictor -from hsml.client.exceptions import ModelServingException -from hsml.constants import PREDICTOR_STATE -from hsml.core import serving_api -from hsml.engine import serving_engine - - -class TestDeployment: - # from response json - - def test_from_response_json_list(self, mocker, backend_fixtures): - # Arrange - preds = [{"name": "pred_name"}] - mock_pred_from_response_json = mocker.patch( - "hsml.predictor.Predictor.from_response_json", - return_value=preds, - ) - mock_from_predictor = mocker.patch( - "hsml.deployment.Deployment.from_predictor", return_value=preds[0] - ) - - # Act - depl = deployment.Deployment.from_response_json(preds) - - # Assert - assert isinstance(depl, list) - assert depl[0] == preds[0] - mock_pred_from_response_json.assert_called_once_with(preds) - mock_from_predictor.assert_called_once_with(preds[0]) - - def test_from_response_json_single(self, mocker, backend_fixtures): - # Arrange - pred = {"name": "pred_name"} - mock_pred_from_response_json = mocker.patch( - "hsml.predictor.Predictor.from_response_json", - return_value=pred, - ) - mock_from_predictor = mocker.patch( - "hsml.deployment.Deployment.from_predictor", return_value=pred - ) - - # Act - depl = deployment.Deployment.from_response_json(pred) - - # Assert - assert depl == pred - mock_pred_from_response_json.assert_called_once_with(pred) - mock_from_predictor.assert_called_once_with(pred) - - # constructor - - def test_constructor_default(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - - # Act - d = deployment.Deployment(predictor=p) - - # Assert - assert d.name == p.name - assert d.description == p.description - assert d.predictor == p - assert isinstance(d._serving_api, serving_api.ServingApi) - assert isinstance(d._serving_engine, serving_engine.ServingEngine) - - def test_constructor(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - - # Act - d = deployment.Deployment(predictor=p, name=p.name, description=p.description) - - # Assert - assert d.name == p.name - assert d.description == p.description - assert d.predictor == p - assert isinstance(d._serving_api, serving_api.ServingApi) - assert isinstance(d._serving_engine, serving_engine.ServingEngine) - - def test_constructor_no_predictor(self): - # Act - with pytest.raises(ModelServingException) as e_info: - _ = deployment.Deployment(predictor=None) - - # Assert - assert "A predictor is required" in str(e_info.value) - - def test_constructor_wrong_predictor(self): - # Act - with pytest.raises(ValueError) as e_info: - _ = deployment.Deployment(predictor={"wrong": "type"}) - - # Assert - assert "not an instance of the Predictor class" in str(e_info.value) - - # from predictor - - def test_from_predictor(self, mocker): - # Arrange - class MockPredictor: - _name = "name" - _description = "description" - - p = MockPredictor() - mock_deployment_init = mocker.patch( - "hsml.deployment.Deployment.__init__", return_value=None - ) - - # Act - deployment.Deployment.from_predictor(p) - - # Assert - mock_deployment_init.assert_called_once_with( - predictor=p, name=p._name, description=p._description - ) - - # save - - def test_save_default(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - mock_serving_engine_save = mocker.patch( - "hsml.engine.serving_engine.ServingEngine.save" - ) - - # Act - d.save() - - # Assert - mock_serving_engine_save.assert_called_once_with(d, 60) - - def test_save(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - mock_serving_engine_save = mocker.patch( - "hsml.engine.serving_engine.ServingEngine.save" - ) - - # Act - await_update = 120 - d.save(await_update=await_update) - - # Assert - mock_serving_engine_save.assert_called_once_with(d, await_update) - - # start - - def test_start_default(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - mock_serving_engine_start = mocker.patch( - "hsml.engine.serving_engine.ServingEngine.start" - ) - - # Act - d.start() - - # Assert - mock_serving_engine_start.assert_called_once_with(d, await_status=60) - - def test_start(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - mock_serving_engine_start = mocker.patch( - "hsml.engine.serving_engine.ServingEngine.start" - ) - - # Act - await_running = 120 - d.start(await_running=await_running) - - # Assert - mock_serving_engine_start.assert_called_once_with(d, await_status=await_running) - - # stop - - def test_stop_default(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - mock_serving_engine_stop = mocker.patch( - "hsml.engine.serving_engine.ServingEngine.stop" - ) - - # Act - d.stop() - - # Assert - mock_serving_engine_stop.assert_called_once_with(d, await_status=60) - - def test_stop(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - mock_serving_engine_start = mocker.patch( - "hsml.engine.serving_engine.ServingEngine.stop" - ) - - # Act - await_stopped = 120 - d.stop(await_stopped=await_stopped) - - # Assert - mock_serving_engine_start.assert_called_once_with(d, await_status=await_stopped) - - # delete - - def test_delete_default(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - mock_serving_engine_delete = mocker.patch( - "hsml.engine.serving_engine.ServingEngine.delete" - ) - - # Act - d.delete() - - # Assert - mock_serving_engine_delete.assert_called_once_with(d, False) - - def test_delete(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - mock_serving_engine_delete = mocker.patch( - "hsml.engine.serving_engine.ServingEngine.delete" - ) - - # Act - force = True - d.delete(force=force) - - # Assert - mock_serving_engine_delete.assert_called_once_with(d, force) - - # get state - - def test_get_state(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - mock_serving_engine_get_state = mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_state" - ) - - # Act - d.get_state() - - # Assert - mock_serving_engine_get_state.assert_called_once_with(d) - - # status - - # - is created - - def test_is_created_false(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - - class MockPredictorState: - def __init__(self, status): - self.status = status - - mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_state", - return_value=MockPredictorState(PREDICTOR_STATE.STATUS_CREATING), - ) - - # Act - is_created = d.is_created() - - # Assert - assert not is_created - - def test_is_created_true(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - - class MockPredictorState: - def __init__(self, status): - self.status = status - - valid_statuses = [ - PREDICTOR_STATE.STATUS_CREATED, - PREDICTOR_STATE.STATUS_FAILED, - PREDICTOR_STATE.STATUS_IDLE, - PREDICTOR_STATE.STATUS_RUNNING, - PREDICTOR_STATE.STATUS_STARTING, - PREDICTOR_STATE.STATUS_STOPPED, - PREDICTOR_STATE.STATUS_STOPPING, - PREDICTOR_STATE.STATUS_UPDATING, - ] - - for valid_status in valid_statuses: - mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_state", - return_value=MockPredictorState(valid_status), - ) - - # Act and Assert - assert d.is_created() - - # is running - - def test_is_running_true(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - - class MockPredictorState: - def __init__(self, status): - self.status = status - - mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_state", - return_value=MockPredictorState(PREDICTOR_STATE.STATUS_RUNNING), - ) - - # Act and Assert - assert d.is_running() - - def test_is_running_false(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - - class MockPredictorState: - def __init__(self, status): - self.status = status - - valid_statuses = [ - PREDICTOR_STATE.STATUS_CREATED, - PREDICTOR_STATE.STATUS_CREATING, - PREDICTOR_STATE.STATUS_FAILED, - PREDICTOR_STATE.STATUS_IDLE, - PREDICTOR_STATE.STATUS_STARTING, - # PREDICTOR_STATE.STATUS_RUNNING, - PREDICTOR_STATE.STATUS_STOPPED, - PREDICTOR_STATE.STATUS_STOPPING, - PREDICTOR_STATE.STATUS_UPDATING, - ] - - for valid_status in valid_statuses: - mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_state", - return_value=MockPredictorState(valid_status), - ) - - # Act and Assert - assert not d.is_running(or_idle=False, or_updating=False) - - def test_is_running_or_idle_true(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - - class MockPredictorState: - def __init__(self, status): - self.status = status - - valid_statuses = [ - PREDICTOR_STATE.STATUS_IDLE, - PREDICTOR_STATE.STATUS_RUNNING, - ] - - for valid_status in valid_statuses: - mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_state", - return_value=MockPredictorState(valid_status), - ) - - # Act and Assert - assert d.is_running(or_idle=True) - - def test_is_running_or_idle_false(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - - class MockPredictorState: - def __init__(self, status): - self.status = status - - valid_statuses = [ - PREDICTOR_STATE.STATUS_CREATED, - PREDICTOR_STATE.STATUS_CREATING, - PREDICTOR_STATE.STATUS_FAILED, - # PREDICTOR_STATE.STATUS_IDLE, - PREDICTOR_STATE.STATUS_STARTING, - # PREDICTOR_STATE.STATUS_RUNNING, - PREDICTOR_STATE.STATUS_STOPPED, - PREDICTOR_STATE.STATUS_STOPPING, - PREDICTOR_STATE.STATUS_UPDATING, - ] - - for valid_status in valid_statuses: - mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_state", - return_value=MockPredictorState(valid_status), - ) - - # Act and Assert - assert not d.is_running(or_idle=True, or_updating=False) - - def test_is_running_or_updating_true(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - - class MockPredictorState: - def __init__(self, status): - self.status = status - - valid_statuses = [ - # PREDICTOR_STATE.STATUS_CREATED, - # PREDICTOR_STATE.STATUS_CREATING, - # PREDICTOR_STATE.STATUS_FAILED, - # PREDICTOR_STATE.STATUS_IDLE, - # PREDICTOR_STATE.STATUS_STARTING, - PREDICTOR_STATE.STATUS_RUNNING, - # PREDICTOR_STATE.STATUS_STOPPED, - # PREDICTOR_STATE.STATUS_STOPPING, - PREDICTOR_STATE.STATUS_UPDATING, - ] - - for valid_status in valid_statuses: - mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_state", - return_value=MockPredictorState(valid_status), - ) - - # Act and Assert - assert d.is_running(or_updating=True) - - def test_is_running_or_updating_false(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - - class MockPredictorState: - def __init__(self, status): - self.status = status - - valid_statuses = [ - PREDICTOR_STATE.STATUS_CREATED, - PREDICTOR_STATE.STATUS_CREATING, - PREDICTOR_STATE.STATUS_FAILED, - PREDICTOR_STATE.STATUS_IDLE, - PREDICTOR_STATE.STATUS_STARTING, - # PREDICTOR_STATE.STATUS_RUNNING, - PREDICTOR_STATE.STATUS_STOPPED, - PREDICTOR_STATE.STATUS_STOPPING, - # PREDICTOR_STATE.STATUS_UPDATING, - ] - - for valid_status in valid_statuses: - mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_state", - return_value=MockPredictorState(valid_status), - ) - - # Act and Assert - assert not d.is_running(or_idle=False, or_updating=True) - - # - is stopped - - def test_is_stopped_true(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - - class MockPredictorState: - def __init__(self, status): - self.status = status - - mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_state", - return_value=MockPredictorState(PREDICTOR_STATE.STATUS_STOPPED), - ) - - # Act and Assert - assert d.is_stopped() - - def test_is_stopped_false(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - - class MockPredictorState: - def __init__(self, status): - self.status = status - - valid_statuses = [ - PREDICTOR_STATE.STATUS_CREATED, - PREDICTOR_STATE.STATUS_CREATING, - PREDICTOR_STATE.STATUS_FAILED, - PREDICTOR_STATE.STATUS_IDLE, - PREDICTOR_STATE.STATUS_STARTING, - PREDICTOR_STATE.STATUS_RUNNING, - # PREDICTOR_STATE.STATUS_STOPPED, - PREDICTOR_STATE.STATUS_STOPPING, - PREDICTOR_STATE.STATUS_UPDATING, - ] - - for valid_status in valid_statuses: - mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_state", - return_value=MockPredictorState(valid_status), - ) - - # Act and Assert - assert not d.is_stopped(or_created=False) - - def test_is_stopped_or_created_true(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - - class MockPredictorState: - def __init__(self, status): - self.status = status - - valid_statuses = [ - PREDICTOR_STATE.STATUS_CREATED, - PREDICTOR_STATE.STATUS_CREATING, - # PREDICTOR_STATE.STATUS_FAILED, - # PREDICTOR_STATE.STATUS_IDLE, - # PREDICTOR_STATE.STATUS_STARTING, - # PREDICTOR_STATE.STATUS_RUNNING, - PREDICTOR_STATE.STATUS_STOPPED, - # PREDICTOR_STATE.STATUS_STOPPING, - # PREDICTOR_STATE.STATUS_UPDATING, - ] - - for valid_status in valid_statuses: - mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_state", - return_value=MockPredictorState(valid_status), - ) - - # Act and Assert - assert d.is_stopped(or_created=True) - - def test_is_stopped_or_created_false(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - - class MockPredictorState: - def __init__(self, status): - self.status = status - - valid_statuses = [ - # PREDICTOR_STATE.STATUS_CREATED, - # PREDICTOR_STATE.STATUS_CREATING, - PREDICTOR_STATE.STATUS_FAILED, - PREDICTOR_STATE.STATUS_IDLE, - PREDICTOR_STATE.STATUS_STARTING, - PREDICTOR_STATE.STATUS_RUNNING, - # PREDICTOR_STATE.STATUS_STOPPED, - PREDICTOR_STATE.STATUS_STOPPING, - PREDICTOR_STATE.STATUS_UPDATING, - ] - - for valid_status in valid_statuses: - mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_state", - return_value=MockPredictorState(valid_status), - ) - - # Act and Assert - assert not d.is_stopped(or_created=True) - - # predict - - def test_predict(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - mock_serving_engine_predict = mocker.patch( - "hsml.engine.serving_engine.ServingEngine.predict" - ) - - # Act - d.predict("data", "inputs") - - # Assert - mock_serving_engine_predict.assert_called_once_with(d, "data", "inputs") - - # download artifact - - def test_download_artifact(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - mock_serving_engine_download_artifact = mocker.patch( - "hsml.engine.serving_engine.ServingEngine.download_artifact" - ) - - # Act - d.download_artifact() - - # Assert - mock_serving_engine_download_artifact.assert_called_once_with(d) - - # get logs - - def test_get_logs_default(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - mock_util_get_members = mocker.patch( - "hsml.util.get_members", return_value=["predictor"] - ) - mock_print = mocker.patch("builtins.print") - - class MockLogs: - instance_name = "instance_name" - content = "content" - - mock_logs = [MockLogs()] - mock_serving_get_logs = mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_logs", - return_value=mock_logs, - ) - - # Act - d.get_logs() - - # Assert - mock_util_get_members.assert_called_once() - mock_serving_get_logs.assert_called_once_with(d, "predictor", 10) - assert mock_print.call_count == len(mock_logs) - - def test_get_logs_component_valid(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - mock_util_get_members = mocker.patch( - "hsml.util.get_members", return_value=["valid"] - ) - mock_print = mocker.patch("builtins.print") - - class MockLogs: - instance_name = "instance_name" - content = "content" - - mock_logs = [MockLogs()] - mock_serving_get_logs = mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_logs", - return_value=mock_logs, - ) - - # Act - d.get_logs(component="valid") - - # Assert - mock_util_get_members.assert_called_once() - mock_serving_get_logs.assert_called_once_with(d, "valid", 10) - assert mock_print.call_count == len(mock_logs) - - def test_get_logs_component_invalid(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - - # Act - with pytest.raises(ValueError) as e_info: - d.get_logs(component="invalid") - - # Assert - assert "is not valid" in str(e_info.value) - - def test_get_logs_tail(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - mock_util_get_members = mocker.patch( - "hsml.util.get_members", return_value=["predictor"] - ) - mock_print = mocker.patch("builtins.print") - - class MockLogs: - instance_name = "instance_name" - content = "content" - - mock_logs = [MockLogs()] - mock_serving_get_logs = mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_logs", - return_value=mock_logs, - ) - - # Act - d.get_logs(tail=40) - - # Assert - mock_util_get_members.assert_called_once() - mock_serving_get_logs.assert_called_once_with(d, "predictor", 40) - assert mock_print.call_count == len(mock_logs) - - def test_get_logs_no_logs(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - mock_util_get_members = mocker.patch( - "hsml.util.get_members", return_value=["predictor"] - ) - mock_print = mocker.patch("builtins.print") - - mock_serving_get_logs = mocker.patch( - "hsml.engine.serving_engine.ServingEngine.get_logs", - return_value=None, - ) - - # Act - d.get_logs() - - # Assert - mock_util_get_members.assert_called_once() - mock_serving_get_logs.assert_called_once_with(d, "predictor", 10) - assert mock_print.call_count == 0 - - # get url - - def test_get_url(self, mocker, backend_fixtures): - # Arrange - p = self._get_dummy_predictor(mocker, backend_fixtures) - d = deployment.Deployment(predictor=p) - - class MockClient: - _project_id = "project_id" - - mock_client = MockClient() - path = "/p/" + str(mock_client._project_id) + "/deployments/" + str(d.id) - - mock_util_get_hostname_replaced_url = mocker.patch( - "hsml.util.get_hostname_replaced_url", return_value="url" - ) - mock_client_get_instance = mocker.patch( - "hsml.client.get_instance", return_value=mock_client - ) - - # Act - url = d.get_url() - - # Assert - assert url == "url" - mock_util_get_hostname_replaced_url.assert_called_once_with(path) - mock_client_get_instance.assert_called_once() - - # auxiliary methods - - def _get_dummy_predictor(self, mocker, backend_fixtures): - p_json = backend_fixtures["predictor"]["get_deployments_singleton"]["response"][ - "items" - ][0] - mocker.patch("hsml.predictor.Predictor._validate_serving_tool") - mocker.patch("hsml.predictor.Predictor._validate_resources") - mocker.patch("hsml.predictor.Predictor._validate_script_file") - mocker.patch("hsml.util.get_obj_from_json") - return predictor.Predictor( - id=p_json["id"], - name=p_json["name"], - description=p_json["description"], - model_name=p_json["model_name"], - model_path=p_json["model_path"], - model_version=p_json["model_version"], - model_framework=p_json["model_framework"], - model_server=p_json["model_server"], - artifact_version=p_json["artifact_version"], - ) diff --git a/hsml/python/tests/test_explicit_provenance.py b/hsml/python/tests/test_explicit_provenance.py deleted file mode 100644 index eb396b364..000000000 --- a/hsml/python/tests/test_explicit_provenance.py +++ /dev/null @@ -1,78 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import unittest -from unittest import mock - -from hsml.core import explicit_provenance - - -class TestExplicitProvenance(unittest.TestCase): - def test_one_accessible_parent(self): - artifact = {"id": 1} - links = explicit_provenance.Links(accessible=[artifact]) - parent = explicit_provenance.Links.get_one_accessible_parent(links) - self.assertEqual(artifact["id"], parent["id"]) - - def test_one_accessible_parent_none(self): - links = explicit_provenance.Links() - with mock.patch.object(explicit_provenance._logger, "info") as mock_logger: - parent = explicit_provenance.Links.get_one_accessible_parent(links) - mock_logger.assert_called_once_with("There is no parent information") - self.assertIsNone(parent) - - def test_one_accessible_parent_inaccessible(self): - artifact = {"id": 1} - links = explicit_provenance.Links(inaccessible=[artifact]) - with mock.patch.object(explicit_provenance._logger, "info") as mock_logger: - parent = explicit_provenance.Links.get_one_accessible_parent(links) - mock_logger.assert_called_once_with( - "The parent is deleted or inaccessible. For more details get the full provenance from `_provenance` method" - ) - self.assertIsNone(parent) - - def test_one_accessible_parent_deleted(self): - artifact = {"id": 1} - links = explicit_provenance.Links(deleted=[artifact]) - with mock.patch.object(explicit_provenance._logger, "info") as mock_logger: - parent = explicit_provenance.Links.get_one_accessible_parent(links) - mock_logger.assert_called_once_with( - "The parent is deleted or inaccessible. For more details get the full provenance from `_provenance` method" - ) - self.assertIsNone(parent) - - def test_one_accessible_parent_too_many(self): - artifact1 = {"id": 1} - artifact2 = {"id": 2} - links = explicit_provenance.Links(accessible=[artifact1, artifact2]) - with self.assertRaises(Exception) as context: - explicit_provenance.Links.get_one_accessible_parent(links) - self.assertTrue( - "Backend inconsistency - provenance returned more than one parent" - in context.exception - ) - - def test_one_accessible_parent_should_not_be_artifact(self): - artifact = explicit_provenance.Artifact( - 1, "test", 1, None, explicit_provenance.Artifact.MetaType.NOT_SUPPORTED - ) - links = explicit_provenance.Links(accessible=[artifact]) - with self.assertRaises(Exception) as context: - explicit_provenance.Links.get_one_accessible_parent(links) - self.assertTrue( - "The returned object is not a valid object. For more details get the full provenance from `_provenance` method" - in context.exception - ) diff --git a/hsml/python/tests/test_inference_batcher.py b/hsml/python/tests/test_inference_batcher.py deleted file mode 100644 index 441fbff7e..000000000 --- a/hsml/python/tests/test_inference_batcher.py +++ /dev/null @@ -1,234 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import copy - -import humps -from hsml import inference_batcher -from hsml.constants import INFERENCE_BATCHER - - -class TestInferenceBatcher: - # from response json - - def test_from_response_json_enabled(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_batcher"]["get_enabled"]["response"] - json_camelized = humps.camelize(json) # as returned by the backend - mock_ib_from_json = mocker.patch( - "hsml.inference_batcher.InferenceBatcher.from_json" - ) - - # Act - _ = inference_batcher.InferenceBatcher.from_response_json(json_camelized) - - # Assert - mock_ib_from_json.assert_called_once_with(json) - - def test_from_response_json_enabled_with_config(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_batcher"]["get_enabled_with_config"][ - "response" - ] - json_camelized = humps.camelize(json) # as returned by the backend - mock_ib_from_json = mocker.patch( - "hsml.inference_batcher.InferenceBatcher.from_json" - ) - - # Act - _ = inference_batcher.InferenceBatcher.from_response_json(json_camelized) - - # Assert - mock_ib_from_json.assert_called_once_with(json) - - # from json - - def test_from_json_enabled(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_batcher"]["get_enabled"]["response"] - mock_ib_extract_fields = mocker.patch( - "hsml.inference_batcher.InferenceBatcher.extract_fields_from_json", - return_value=json, - ) - mock_ib_init = mocker.patch( - "hsml.inference_batcher.InferenceBatcher.__init__", return_value=None - ) - - # Act - _ = inference_batcher.InferenceBatcher.from_json(json) - - # Assert - mock_ib_extract_fields.assert_called_once_with(json) - mock_ib_init.assert_called_once_with(**json) - - def test_from_json_enabled_with_config(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_batcher"]["get_enabled_with_config"][ - "response" - ] - mock_ib_extract_fields = mocker.patch( - "hsml.inference_batcher.InferenceBatcher.extract_fields_from_json", - return_value=json, - ) - mock_ib_init = mocker.patch( - "hsml.inference_batcher.InferenceBatcher.__init__", return_value=None - ) - - # Act - _ = inference_batcher.InferenceBatcher.from_json(json) - - # Assert - mock_ib_extract_fields.assert_called_once_with(json) - mock_ib_init.assert_called_once_with(**json) - - # constructor - - def test_constructor_default(self): - # Act - ib = inference_batcher.InferenceBatcher() - - # Assert - assert isinstance(ib, inference_batcher.InferenceBatcher) - assert ib.enabled == INFERENCE_BATCHER.ENABLED - assert ib.max_batch_size is None - assert ib.max_latency is None - assert ib.timeout is None - - def test_constructor_enabled(self, backend_fixtures): - # Arrange - json = backend_fixtures["inference_batcher"]["get_enabled"]["response"] - - # Act - ib = inference_batcher.InferenceBatcher(**json) - - # Assert - assert isinstance(ib, inference_batcher.InferenceBatcher) - assert ib.enabled == json["enabled"] - assert ib.max_batch_size is None - assert ib.max_latency is None - assert ib.timeout is None - - def test_constructor_enabled_with_config(self, backend_fixtures): - # Arrange - json = backend_fixtures["inference_batcher"]["get_enabled_with_config"][ - "response" - ] - - # Act - ib = inference_batcher.InferenceBatcher(**json) - - # Assert - assert isinstance(ib, inference_batcher.InferenceBatcher) - assert ib.enabled == json["enabled"] - assert ib.max_batch_size == json["max_batch_size"] - assert ib.max_latency == json["max_latency"] - assert ib.timeout == json["timeout"] - - def test_constructor_disabled(self, backend_fixtures): - # Arrange - json = backend_fixtures["inference_batcher"]["get_disabled"]["response"] - - # Act - ib = inference_batcher.InferenceBatcher(**json) - - # Assert - assert isinstance(ib, inference_batcher.InferenceBatcher) - assert ib.enabled == json["enabled"] - assert ib.max_batch_size is None - assert ib.max_latency is None - assert ib.timeout is None - - def test_constructor_disabled_with_config(self, backend_fixtures): - # Arrange - json = backend_fixtures["inference_batcher"]["get_disabled_with_config"][ - "response" - ] - - # Act - ib = inference_batcher.InferenceBatcher(**json) - - # Assert - assert isinstance(ib, inference_batcher.InferenceBatcher) - assert ib.enabled == json["enabled"] - assert ib.max_batch_size == json["max_batch_size"] - assert ib.max_latency == json["max_latency"] - assert ib.timeout == json["timeout"] - - # # extract fields from json - - def test_extract_fields_from_json_enabled(self, backend_fixtures): - # Arrange - json = backend_fixtures["inference_batcher"]["get_enabled"]["response"] - json_copy = copy.deepcopy(json) - - # Act - kwargs = inference_batcher.InferenceBatcher.extract_fields_from_json(json_copy) - - # Assert - assert kwargs["enabled"] == json["enabled"] - assert kwargs["max_batch_size"] is None - assert kwargs["max_latency"] is None - assert kwargs["timeout"] is None - - def test_extract_fields_from_json_enabled_with_config(self, backend_fixtures): - # Arrange - json = backend_fixtures["inference_batcher"]["get_enabled_with_config"][ - "response" - ] - json_copy = copy.deepcopy(json) - - # Act - kwargs = inference_batcher.InferenceBatcher.extract_fields_from_json(json_copy) - - # Assert - assert kwargs["enabled"] == json["enabled"] - assert kwargs["max_batch_size"] == json["max_batch_size"] - assert kwargs["max_latency"] == json["max_latency"] - assert kwargs["timeout"] == json["timeout"] - - def test_extract_fields_from_json_enabled_nested(self, backend_fixtures): - # Arrange - json = backend_fixtures["inference_batcher"]["get_enabled"]["response_nested"] - json_copy = copy.deepcopy(json) - - # Act - kwargs = inference_batcher.InferenceBatcher.extract_fields_from_json(json_copy) - - # Assert - assert kwargs["enabled"] == json["batching_configuration"]["enabled"] - assert kwargs["max_batch_size"] is None - assert kwargs["max_latency"] is None - assert kwargs["timeout"] is None - - def test_extract_fields_from_json_enabled_with_config_nested( - self, backend_fixtures - ): - # Arrange - json = backend_fixtures["inference_batcher"]["get_enabled_with_config"][ - "response_nested" - ] - json_copy = copy.deepcopy(json) - - # Act - kwargs = inference_batcher.InferenceBatcher.extract_fields_from_json(json_copy) - - # Assert - assert kwargs["enabled"] == json["batching_configuration"]["enabled"] - assert ( - kwargs["max_batch_size"] == json["batching_configuration"]["max_batch_size"] - ) - assert kwargs["max_latency"] == json["batching_configuration"]["max_latency"] - assert kwargs["timeout"] == json["batching_configuration"]["timeout"] diff --git a/hsml/python/tests/test_inference_endpoint.py b/hsml/python/tests/test_inference_endpoint.py deleted file mode 100644 index 0f79a6ff3..000000000 --- a/hsml/python/tests/test_inference_endpoint.py +++ /dev/null @@ -1,298 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import copy - -import humps -from hsml import inference_endpoint - - -class TestInferenceEndpoint: - # InferenceEndpointPort - - # from response json - - def test_from_response_json_port(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_endpoint"]["get_port"]["response"] - json_camelized = humps.camelize(json) # as returned by the backend - mock_ie_from_json = mocker.patch( - "hsml.inference_endpoint.InferenceEndpointPort.from_json" - ) - - # Act - _ = inference_endpoint.InferenceEndpointPort.from_response_json(json_camelized) - - # Assert - mock_ie_from_json.assert_called_once_with(json) - - # from json - - def test_from_json_port(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_endpoint"]["get_port"]["response"] - mock_ie_extract_fields = mocker.patch( - "hsml.inference_endpoint.InferenceEndpointPort.extract_fields_from_json", - return_value=json, - ) - mock_ie_init = mocker.patch( - "hsml.inference_endpoint.InferenceEndpointPort.__init__", return_value=None - ) - - # Act - _ = inference_endpoint.InferenceEndpointPort.from_json(json) - - # Assert - mock_ie_extract_fields.assert_called_once_with(json) - mock_ie_init.assert_called_once_with(**json) - - # constructor - - def test_constructor_port(self, backend_fixtures): - # Arrange - json = backend_fixtures["inference_endpoint"]["get_port"]["response"] - - # Act - ie_port = inference_endpoint.InferenceEndpointPort( - name=json["name"], number=json["number"] - ) - - # Assert - assert ie_port.name == json["name"] - assert ie_port.number == json["number"] - - # extract fields from json - - def test_extract_fields_from_json_port(self, backend_fixtures): - # Arrange - json = backend_fixtures["inference_endpoint"]["get_port"]["response"] - json_copy = copy.deepcopy(json) - - # Act - kwargs = inference_endpoint.InferenceEndpointPort.extract_fields_from_json( - json_copy - ) - - # Assert - assert kwargs["name"] == json["name"] - assert kwargs["number"] == json["number"] - - # InferenceEndpoint - - # from response json - - def test_from_response_json_empty(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_endpoint"]["get_empty"]["response"] - json_camelized = humps.camelize(json) # as returned by the backend - mock_ie_from_json = mocker.patch( - "hsml.inference_endpoint.InferenceEndpoint.from_json" - ) - - # Act - ie = inference_endpoint.InferenceEndpoint.from_response_json(json_camelized) - - # Assert - assert isinstance(ie, list) - assert len(ie) == 0 - mock_ie_from_json.assert_not_called() - - def test_from_response_json_singleton(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_endpoint"]["get_singleton"]["response"] - json_camelized = humps.camelize(json) # as returned by the backend - mock_ie_from_json = mocker.patch( - "hsml.inference_endpoint.InferenceEndpoint.from_json" - ) - - # Act - ie = inference_endpoint.InferenceEndpoint.from_response_json(json_camelized) - - # Assert - assert isinstance(ie, list) - assert len(ie) == 1 - mock_ie_from_json.assert_called_once_with(json["items"][0]) - - def test_from_response_json_list(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_endpoint"]["get_list"]["response"] - json_camelized = humps.camelize(json) # as returned by the backend - mock_ie_from_json = mocker.patch( - "hsml.inference_endpoint.InferenceEndpoint.from_json" - ) - - # Act - ie = inference_endpoint.InferenceEndpoint.from_response_json(json_camelized) - - # Assert - assert isinstance(ie, list) - assert len(ie) == json["count"] - assert mock_ie_from_json.call_count == json["count"] - - def test_from_response_json_single(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_endpoint"]["get_singleton"]["response"][ - "items" - ][0] - json_camelized = humps.camelize(json) # as returned by the backend - mock_ie_from_json = mocker.patch( - "hsml.inference_endpoint.InferenceEndpoint.from_json" - ) - - # Act - _ = inference_endpoint.InferenceEndpoint.from_response_json(json_camelized) - - # Assert - mock_ie_from_json.assert_called_once_with(json) - - # from json - - def test_from_json(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_endpoint"]["get_singleton"]["response"][ - "items" - ][0] - mock_ie_extract_fields = mocker.patch( - "hsml.inference_endpoint.InferenceEndpoint.extract_fields_from_json", - return_value=json, - ) - mock_ie_init = mocker.patch( - "hsml.inference_endpoint.InferenceEndpoint.__init__", return_value=None - ) - - # Act - _ = inference_endpoint.InferenceEndpoint.from_json(json) - - # Assert - mock_ie_extract_fields.assert_called_once_with(json) - mock_ie_init.assert_called_once_with(**json) - - # constructor - - def test_constructor(self, backend_fixtures): - # Arrange - json = backend_fixtures["inference_endpoint"]["get_singleton"]["response"][ - "items" - ][0] - - # Act - ie = inference_endpoint.InferenceEndpoint( - type=json["type"], hosts=json["hosts"], ports=json["ports"] - ) - - # Assert - assert isinstance(ie, inference_endpoint.InferenceEndpoint) - assert ie.type == json["type"] - assert ie.hosts == json["hosts"] - assert ie.ports == json["ports"] - - # extract fields from json - - def test_extract_fields_from_json(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_endpoint"]["get_singleton"]["response"][ - "items" - ][0] - json_copy = copy.deepcopy(json) - mock_ie_port_from_json = mocker.patch( - "hsml.inference_endpoint.InferenceEndpointPort.from_json", return_value=None - ) - - # Act - kwargs = inference_endpoint.InferenceEndpoint.extract_fields_from_json( - json_copy - ) - - # Assert - assert kwargs["type"] == json["type"] - assert kwargs["hosts"] == json["hosts"] - mock_ie_port_from_json.assert_called_once_with(json["ports"][0]) - - # get any host - - def test_get_any_host(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_endpoint"]["get_singleton"]["response"][ - "items" - ][0] - ie = inference_endpoint.InferenceEndpoint( - type=None, hosts=json["hosts"], ports=None - ) - mocker_random_choice = mocker.patch("random.choice", return_value=None) - - # Act - _ = ie.get_any_host() - - # Assert - mocker_random_choice.assert_called_once_with(ie.hosts) - - def test_get_any_host_none(self, mocker, backend_fixtures): - # Arrange - ie = inference_endpoint.InferenceEndpoint(type=None, hosts=None, ports=None) - mocker_random_choice = mocker.patch("random.choice", return_value=None) - - # Act - host = ie.get_any_host() - - # Assert - assert host is None - mocker_random_choice.assert_not_called() - - # get port - - def test_get_port_existing(self, backend_fixtures): - # Arrange - json = backend_fixtures["inference_endpoint"]["get_list"]["response"]["items"][ - 1 - ] - ports = [ - inference_endpoint.InferenceEndpointPort(p["name"], p["number"]) - for p in json["ports"] - ] - ie = inference_endpoint.InferenceEndpoint(type=None, hosts=None, ports=ports) - - # Act - port = ie.get_port(ports[0].name) - - # Assert - assert port == ports[0] - - def test_get_port_not_found(self, backend_fixtures): - # Arrange - json = backend_fixtures["inference_endpoint"]["get_list"]["response"]["items"][ - 1 - ] - ports = [ - inference_endpoint.InferenceEndpointPort(p["name"], p["number"]) - for p in json["ports"] - ] - ie = inference_endpoint.InferenceEndpoint(type=None, hosts=None, ports=ports) - - # Act - port = ie.get_port("not_found") - - # Assert - assert port is None - - def test_get_port_none(self): - # Arrange - ie = inference_endpoint.InferenceEndpoint(type=None, hosts=None, ports=None) - - # Act - port = ie.get_port("not_found") - - # Assert - assert port is None diff --git a/hsml/python/tests/test_inference_logger.py b/hsml/python/tests/test_inference_logger.py deleted file mode 100644 index 1f137cefa..000000000 --- a/hsml/python/tests/test_inference_logger.py +++ /dev/null @@ -1,413 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import copy - -import humps -import pytest -from hsml import inference_logger, kafka_topic -from hsml.constants import DEFAULT, INFERENCE_LOGGER - - -class TestInferenceLogger: - # from response json - - def test_from_response_json_with_mode_only(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_logger"]["get_mode_all"]["response"] - json_camelized = humps.camelize(json) # as returned by the backend - mock_il_from_json = mocker.patch( - "hsml.inference_logger.InferenceLogger.from_json" - ) - - # Act - _ = inference_logger.InferenceLogger.from_response_json(json_camelized) - - # Assert - mock_il_from_json.assert_called_once_with(json) - - def test_from_response_json_with_mode_and_kafka_topic( - self, mocker, backend_fixtures - ): - # Arrange - json = backend_fixtures["inference_logger"]["get_mode_all_with_kafka_topic"][ - "response" - ] - json_camelized = humps.camelize(json) # as returned by the backend - mock_il_from_json = mocker.patch( - "hsml.inference_logger.InferenceLogger.from_json" - ) - - # Act - _ = inference_logger.InferenceLogger.from_response_json(json_camelized) - - # Assert - mock_il_from_json.assert_called_once_with(json) - - # from json - - def test_from_json_with_mode_all(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_logger"]["get_mode_all"]["response"] - mock_il_extract_fields = mocker.patch( - "hsml.inference_logger.InferenceLogger.extract_fields_from_json", - return_value=json, - ) - mock_il_init = mocker.patch( - "hsml.inference_logger.InferenceLogger.__init__", return_value=None - ) - - # Act - _ = inference_logger.InferenceLogger.from_json(json) - - # Assert - mock_il_extract_fields.assert_called_once_with(json) - mock_il_init.assert_called_once_with(**json) - - def test_from_json_with_mode_all_and_kafka_topic(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_logger"]["get_mode_all_with_kafka_topic"][ - "response" - ] - mock_il_extract_fields = mocker.patch( - "hsml.inference_logger.InferenceLogger.extract_fields_from_json", - return_value=json, - ) - mock_il_init = mocker.patch( - "hsml.inference_logger.InferenceLogger.__init__", return_value=None - ) - - # Act - _ = inference_logger.InferenceLogger.from_json(json) - - # Assert - mock_il_extract_fields.assert_called_once_with(json) - mock_il_init.assert_called_once_with(**json) - - # constructor - - def test_constructor_default(self, mocker): - # Arrange - mock_il_validate_mode = mocker.patch( - "hsml.inference_logger.InferenceLogger._validate_mode", - return_value=INFERENCE_LOGGER.MODE_ALL, - ) - default_kt = kafka_topic.KafkaTopic() - mock_util_get_obj_from_json = mocker.patch( - "hsml.util.get_obj_from_json", return_value=default_kt - ) - - # Act - il = inference_logger.InferenceLogger() - - # Assert - assert isinstance(il, inference_logger.InferenceLogger) - assert il.mode == INFERENCE_LOGGER.MODE_ALL - assert isinstance(il.kafka_topic, kafka_topic.KafkaTopic) - assert il.kafka_topic.name == default_kt.name - assert il.kafka_topic.num_replicas == default_kt.num_replicas - assert il.kafka_topic.num_partitions == default_kt.num_partitions - mock_util_get_obj_from_json.assert_called_once_with( - DEFAULT, kafka_topic.KafkaTopic - ) - mock_il_validate_mode.assert_called_once_with( - INFERENCE_LOGGER.MODE_ALL, default_kt - ) - - def test_constructor_mode_all(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_logger"]["get_mode_all"]["init_args"] - mock_il_validate_mode = mocker.patch( - "hsml.inference_logger.InferenceLogger._validate_mode", - return_value=json["mode"], - ) - default_kt = kafka_topic.KafkaTopic() - mock_util_get_obj_from_json = mocker.patch( - "hsml.util.get_obj_from_json", return_value=default_kt - ) - - # Act - il = inference_logger.InferenceLogger(**json) - - # Assert - assert isinstance(il, inference_logger.InferenceLogger) - assert il.mode == json["mode"] - assert isinstance(il.kafka_topic, kafka_topic.KafkaTopic) - assert il.kafka_topic.name == default_kt.name - assert il.kafka_topic.num_replicas == default_kt.num_replicas - assert il.kafka_topic.num_partitions == default_kt.num_partitions - mock_util_get_obj_from_json.assert_called_once_with( - DEFAULT, kafka_topic.KafkaTopic - ) - mock_il_validate_mode.assert_called_once_with(json["mode"], default_kt) - - def test_constructor_mode_inputs(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_logger"]["get_mode_inputs"]["init_args"] - mock_il_validate_mode = mocker.patch( - "hsml.inference_logger.InferenceLogger._validate_mode", - return_value=json["mode"], - ) - default_kt = kafka_topic.KafkaTopic() - mock_util_get_obj_from_json = mocker.patch( - "hsml.util.get_obj_from_json", return_value=default_kt - ) - - # Act - il = inference_logger.InferenceLogger(**json) - - # Assert - assert isinstance(il, inference_logger.InferenceLogger) - assert il.mode == json["mode"] - assert isinstance(il.kafka_topic, kafka_topic.KafkaTopic) - assert il.kafka_topic.name == default_kt.name - assert il.kafka_topic.num_replicas == default_kt.num_replicas - assert il.kafka_topic.num_partitions == default_kt.num_partitions - mock_util_get_obj_from_json.assert_called_once_with( - DEFAULT, kafka_topic.KafkaTopic - ) - mock_il_validate_mode.assert_called_once_with(json["mode"], default_kt) - - def test_constructor_mode_outputs(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_logger"]["get_mode_outputs"]["init_args"] - mock_il_validate_mode = mocker.patch( - "hsml.inference_logger.InferenceLogger._validate_mode", - return_value=json["mode"], - ) - default_kt = kafka_topic.KafkaTopic() - mock_util_get_obj_from_json = mocker.patch( - "hsml.util.get_obj_from_json", return_value=default_kt - ) - - # Act - il = inference_logger.InferenceLogger(**json) - - # Assert - assert isinstance(il, inference_logger.InferenceLogger) - assert il.mode == json["mode"] - assert isinstance(il.kafka_topic, kafka_topic.KafkaTopic) - assert il.kafka_topic.name == default_kt.name - assert il.kafka_topic.num_replicas == default_kt.num_replicas - assert il.kafka_topic.num_partitions == default_kt.num_partitions - mock_util_get_obj_from_json.assert_called_once_with( - DEFAULT, kafka_topic.KafkaTopic - ) - mock_il_validate_mode.assert_called_once_with(json["mode"], default_kt) - - def test_constructor_mode_all_and_kafka_topic(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_logger"]["get_mode_all_with_kafka_topic"][ - "init_args" - ] - json_copy = copy.deepcopy(json) - mock_il_validate_mode = mocker.patch( - "hsml.inference_logger.InferenceLogger._validate_mode", - return_value=json["mode"], - ) - kt = kafka_topic.KafkaTopic(json["kafka_topic"]["name"]) - mock_util_get_obj_from_json = mocker.patch( - "hsml.util.get_obj_from_json", return_value=kt - ) - - # Act - il = inference_logger.InferenceLogger(**json_copy) - - # Assert - assert isinstance(il, inference_logger.InferenceLogger) - assert il.mode == json["mode"] - assert isinstance(il.kafka_topic, kafka_topic.KafkaTopic) - assert il.kafka_topic.name == kt.name - assert il.kafka_topic.num_replicas is None - assert il.kafka_topic.num_partitions is None - mock_util_get_obj_from_json.assert_called_once_with( - json["kafka_topic"], kafka_topic.KafkaTopic - ) - mock_il_validate_mode.assert_called_once_with(json["mode"], kt) - - def test_constructor_mode_inputs_and_kafka_topic(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_logger"]["get_mode_inputs_with_kafka_topic"][ - "init_args" - ] - json_copy = copy.deepcopy(json) - mock_il_validate_mode = mocker.patch( - "hsml.inference_logger.InferenceLogger._validate_mode", - return_value=json["mode"], - ) - kt = kafka_topic.KafkaTopic(json["kafka_topic"]["name"]) - mock_util_get_obj_from_json = mocker.patch( - "hsml.util.get_obj_from_json", return_value=kt - ) - - # Act - il = inference_logger.InferenceLogger(**json_copy) - - # Assert - assert isinstance(il, inference_logger.InferenceLogger) - assert il.mode == json["mode"] - assert isinstance(il.kafka_topic, kafka_topic.KafkaTopic) - assert il.kafka_topic.name == kt.name - assert il.kafka_topic.num_replicas is None - assert il.kafka_topic.num_partitions is None - mock_util_get_obj_from_json.assert_called_once_with( - json["kafka_topic"], kafka_topic.KafkaTopic - ) - mock_il_validate_mode.assert_called_once_with(json["mode"], kt) - - def test_constructor_mode_outputs_and_kafka_topic(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_logger"]["get_mode_all_with_kafka_topic"][ - "init_args" - ] - json_copy = copy.deepcopy(json) - mock_il_validate_mode = mocker.patch( - "hsml.inference_logger.InferenceLogger._validate_mode", - return_value=json["mode"], - ) - kt = kafka_topic.KafkaTopic(json["kafka_topic"]["name"]) - mock_util_get_obj_from_json = mocker.patch( - "hsml.util.get_obj_from_json", return_value=kt - ) - - # Act - il = inference_logger.InferenceLogger(**json_copy) - - # Assert - assert isinstance(il, inference_logger.InferenceLogger) - assert il.mode == json["mode"] - assert isinstance(il.kafka_topic, kafka_topic.KafkaTopic) - assert il.kafka_topic.name == kt.name - assert il.kafka_topic.num_replicas is None - assert il.kafka_topic.num_partitions is None - mock_util_get_obj_from_json.assert_called_once_with( - json["kafka_topic"], kafka_topic.KafkaTopic - ) - mock_il_validate_mode.assert_called_once_with(json["mode"], kt) - - def test_constructor_mode_none_and_kafka_topic(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["inference_logger"]["get_mode_none_with_kafka_topic"][ - "init_args" - ] - json_copy = copy.deepcopy(json) - mock_il_validate_mode = mocker.patch( - "hsml.inference_logger.InferenceLogger._validate_mode", - return_value=json["mode"], - ) - kt = kafka_topic.KafkaTopic(json["kafka_topic"]["name"]) - mock_util_get_obj_from_json = mocker.patch( - "hsml.util.get_obj_from_json", return_value=kt - ) - - # Act - il = inference_logger.InferenceLogger(**json_copy) - - # Assert - assert isinstance(il, inference_logger.InferenceLogger) - assert il.mode == json["mode"] - assert isinstance(il.kafka_topic, kafka_topic.KafkaTopic) - assert il.kafka_topic.name == kt.name - assert il.kafka_topic.num_replicas is None - assert il.kafka_topic.num_partitions is None - mock_util_get_obj_from_json.assert_called_once_with( - json["kafka_topic"], kafka_topic.KafkaTopic - ) - mock_il_validate_mode.assert_called_once_with(json["mode"], kt) - - # validate mode - - def test_validate_mode_none_and_kafka_topic_none(self): - # Act - mode = inference_logger.InferenceLogger._validate_mode(None, None) - - # Assert - assert mode is None - - def test_validate_mode_all_and_kafka_topic_none(self): - # Act - mode = inference_logger.InferenceLogger._validate_mode( - INFERENCE_LOGGER.MODE_ALL, None - ) - - # Assert - assert mode is None - - def test_validate_mode_invalid_and_kafka_topic_none(self): - # Act - with pytest.raises(ValueError) as e_info: - _ = inference_logger.InferenceLogger._validate_mode("invalid", None) - - # Assert - assert "is not valid" in str(e_info.value) - - def test_validate_mode_none_and_kafka_topic(self): - # Act - mode = inference_logger.InferenceLogger._validate_mode( - None, kafka_topic.KafkaTopic() - ) - - # Assert - assert mode == INFERENCE_LOGGER.MODE_NONE - - def test_validate_mode_all_and_kafka_topic(self): - # Act - mode = inference_logger.InferenceLogger._validate_mode( - INFERENCE_LOGGER.MODE_ALL, kafka_topic.KafkaTopic() - ) - - # Assert - assert mode == INFERENCE_LOGGER.MODE_ALL - - def test_validate_mode_invalid_and_kafka_topic(self): - # Act - with pytest.raises(ValueError) as e_info: - _ = inference_logger.InferenceLogger._validate_mode( - "invalid", kafka_topic.KafkaTopic() - ) - - # Assert - assert "is not valid" in str(e_info.value) - - # extract fields from json - - def test_extract_fields_from_json(self, backend_fixtures): - # Arrange - json = backend_fixtures["inference_logger"]["get_mode_all_with_kafka_topic"][ - "response" - ] - json_copy = copy.deepcopy(json) - - # Act - kwargs = inference_logger.InferenceLogger.extract_fields_from_json(json_copy) - - # Assert - assert kwargs["kafka_topic"] == json["kafka_topic_dto"] - assert kwargs["mode"] == json["inference_logging"] - - def test_extract_fields_from_json_alternative(self, backend_fixtures): - # Arrange - json = backend_fixtures["inference_logger"]["get_mode_all_with_kafka_topic"][ - "init_args" - ] - json_copy = copy.deepcopy(json) - - # Act - kwargs = inference_logger.InferenceLogger.extract_fields_from_json(json_copy) - - # Assert - assert kwargs["kafka_topic"] == json["kafka_topic"] - assert kwargs["mode"] == json["mode"] diff --git a/hsml/python/tests/test_kafka_topic.py b/hsml/python/tests/test_kafka_topic.py deleted file mode 100644 index b9ada2a91..000000000 --- a/hsml/python/tests/test_kafka_topic.py +++ /dev/null @@ -1,289 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import copy - -import humps -import pytest -from hsml import kafka_topic -from hsml.constants import KAFKA_TOPIC - - -class TestKafkaTopic: - # from response json - - def test_from_response_json_with_name_only(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["kafka_topic"]["get_existing_with_name_only"][ - "response" - ]["kafka_topic_dto"] - json_camelized = humps.camelize(json) # as returned by the backend - mock_kt_from_json = mocker.patch("hsml.kafka_topic.KafkaTopic.from_json") - - # Act - _ = kafka_topic.KafkaTopic.from_response_json(json_camelized) - - # Assert - mock_kt_from_json.assert_called_once_with(json) - - def test_from_response_json_with_name_and_config(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["kafka_topic"]["get_existing_with_name_and_config"][ - "response" - ]["kafka_topic_dto"] - json_camelized = humps.camelize(json) # as returned by the backend - mock_kt_from_json = mocker.patch("hsml.kafka_topic.KafkaTopic.from_json") - - # Act - _ = kafka_topic.KafkaTopic.from_response_json(json_camelized) - - # Assert - mock_kt_from_json.assert_called_once_with(json) - - # from json - - def test_from_json_with_name_only(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["kafka_topic"]["get_existing_with_name_only"][ - "response" - ]["kafka_topic_dto"] - mock_kt_extract_fields = mocker.patch( - "hsml.kafka_topic.KafkaTopic.extract_fields_from_json", return_value=json - ) - mock_kt_init = mocker.patch( - "hsml.kafka_topic.KafkaTopic.__init__", return_value=None - ) - - # Act - _ = kafka_topic.KafkaTopic.from_response_json(json) - - # Assert - mock_kt_extract_fields.assert_called_once_with(json) - mock_kt_init.assert_called_once_with(**json) - - def test_from_json_with_name_and_config(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["kafka_topic"]["get_existing_with_name_and_config"][ - "response" - ]["kafka_topic_dto"] - mock_kt_extract_fields = mocker.patch( - "hsml.kafka_topic.KafkaTopic.extract_fields_from_json", return_value=json - ) - mock_kt_init = mocker.patch( - "hsml.kafka_topic.KafkaTopic.__init__", return_value=None - ) - - # Act - _ = kafka_topic.KafkaTopic.from_response_json(json) - - # Assert - mock_kt_extract_fields.assert_called_once_with(json) - mock_kt_init.assert_called_once_with(**json) - - # constructor - - def test_constructor_existing_with_name_only(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["kafka_topic"]["get_existing_with_name_only"][ - "response" - ]["kafka_topic_dto"] - mock_kt_validate_topic_config = mocker.patch( - "hsml.kafka_topic.KafkaTopic._validate_topic_config", - return_value=(KAFKA_TOPIC.NUM_REPLICAS, KAFKA_TOPIC.NUM_PARTITIONS), - ) - - # Act - kt = kafka_topic.KafkaTopic(**json) - - # Assert - assert isinstance(kt, kafka_topic.KafkaTopic) - assert kt.name == json["name"] - assert kt.num_replicas == KAFKA_TOPIC.NUM_REPLICAS - assert kt.num_partitions == KAFKA_TOPIC.NUM_PARTITIONS - mock_kt_validate_topic_config.assert_called_once_with(json["name"], None, None) - - def test_constructor_existing_with_name_and_config(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["kafka_topic"]["get_existing_with_name_and_config"][ - "response" - ]["kafka_topic_dto"] - mock_kt_validate_topic_config = mocker.patch( - "hsml.kafka_topic.KafkaTopic._validate_topic_config", - return_value=(json["num_replicas"], json["num_partitions"]), - ) - - # Act - kt = kafka_topic.KafkaTopic(**json) - - # Assert - assert isinstance(kt, kafka_topic.KafkaTopic) - assert kt.name == json["name"] - assert kt.num_replicas == json["num_replicas"] - assert kt.num_partitions == json["num_partitions"] - mock_kt_validate_topic_config.assert_called_once_with( - json["name"], json["num_replicas"], json["num_partitions"] - ) - - # validate topic config - - def test_validate_topic_config_existing_with_name_only(self, backend_fixtures): - # Arrange - json = backend_fixtures["kafka_topic"]["get_existing_with_name_only"][ - "response" - ]["kafka_topic_dto"] - - # Act - num_repl, num_part = kafka_topic.KafkaTopic._validate_topic_config( - json["name"], None, None - ) - - # Assert - assert num_repl is None - assert num_part is None - - def test_validate_topic_config_existing_with_name_and_config( - self, backend_fixtures - ): - # Arrange - json = backend_fixtures["kafka_topic"]["get_existing_with_name_and_config"][ - "response" - ]["kafka_topic_dto"] - - # Act - with pytest.raises(ValueError) as e_info: - num_repl, num_part = kafka_topic.KafkaTopic._validate_topic_config( - json["name"], json["num_replicas"], json["num_partitions"] - ) - - # Assert - assert "Number of replicas or partitions cannot be changed" in str(e_info.value) - - def test_validate_topic_config_none(self, backend_fixtures): - # Arrange - json = backend_fixtures["kafka_topic"]["get_none"]["response"][ - "kafka_topic_dto" - ] - - # Act - num_repl, num_part = kafka_topic.KafkaTopic._validate_topic_config( - json["name"], None, None - ) - - # Assert - assert num_repl is None - assert num_part is None - - def test_validate_topic_config_none_with_config(self, backend_fixtures): - # Arrange - json = backend_fixtures["kafka_topic"]["get_none_with_config"]["response"][ - "kafka_topic_dto" - ] - - # Act - num_repl, num_part = kafka_topic.KafkaTopic._validate_topic_config( - json["name"], json["num_replicas"], json["num_partitions"] - ) - - # Assert - assert num_repl is None - assert num_part is None - - def test_validate_topic_config_none_value(self): - # Act - num_repl, num_part = kafka_topic.KafkaTopic._validate_topic_config( - None, None, None - ) - - # Assert - assert num_repl is None - assert num_part is None - - def test_validate_topic_config_none_value_with_config(self, backend_fixtures): - # Arrange - json = backend_fixtures["kafka_topic"]["get_none_with_config"]["response"][ - "kafka_topic_dto" - ] - - # Act - num_repl, num_part = kafka_topic.KafkaTopic._validate_topic_config( - None, json["num_replicas"], json["num_partitions"] - ) - - # Assert - assert num_repl is None - assert num_part is None - - def test_validate_topic_config_create(self, backend_fixtures): - # Arrange - json = backend_fixtures["kafka_topic"]["get_create_with_name_only"]["response"][ - "kafka_topic_dto" - ] - - # Act - num_repl, num_part = kafka_topic.KafkaTopic._validate_topic_config( - json["name"], None, None - ) - - # Assert - assert num_repl == KAFKA_TOPIC.NUM_REPLICAS - assert num_part == KAFKA_TOPIC.NUM_PARTITIONS - - def test_validate_topic_config_create_with_config(self, backend_fixtures): - # Arrange - json = backend_fixtures["kafka_topic"]["get_create_with_name_and_config"][ - "response" - ]["kafka_topic_dto"] - - # Act - num_repl, num_part = kafka_topic.KafkaTopic._validate_topic_config( - json["name"], json["num_replicas"], json["num_partitions"] - ) - - # Assert - assert num_repl == json["num_replicas"] - assert num_part == json["num_partitions"] - - # extract fields from json - - def test_extract_fields_from_json(self, backend_fixtures): - # Arrange - json = backend_fixtures["kafka_topic"]["get_existing_with_name_and_config"][ - "response" - ]["kafka_topic_dto"] - json_copy = copy.deepcopy(json) - - # Act - kwargs = kafka_topic.KafkaTopic.extract_fields_from_json(json_copy) - - # Assert - assert kwargs["name"] == json["name"] - assert kwargs["num_replicas"] == json["num_replicas"] - assert kwargs["num_partitions"] == json["num_partitions"] - - def test_extract_fields_from_json_alternative(self, backend_fixtures): - # Arrange - json = backend_fixtures["kafka_topic"][ - "get_existing_with_name_and_config_alternative" - ]["response"]["kafka_topic_dto"] - json_copy = copy.deepcopy(json) - - # Act - kwargs = kafka_topic.KafkaTopic.extract_fields_from_json(json_copy) - - # Assert - assert kwargs["name"] == json["name"] - assert kwargs["num_replicas"] == json["num_of_replicas"] - assert kwargs["num_partitions"] == json["num_of_partitions"] diff --git a/hsml/python/tests/test_model.py b/hsml/python/tests/test_model.py deleted file mode 100644 index c7fac9a44..000000000 --- a/hsml/python/tests/test_model.py +++ /dev/null @@ -1,472 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import copy -import os - -import humps -from hsml import model -from hsml.constants import MODEL -from hsml.core import explicit_provenance - - -class TestModel: - # from response json - - def test_from_response_json_empty(self, backend_fixtures): - # Arrange - json = backend_fixtures["model"]["get_empty"]["response"] - - # Act - m_lst = model.Model.from_response_json(json) - - # Assert - assert isinstance(m_lst, list) - assert len(m_lst) == 0 - - def test_from_response_json_singleton(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["model"]["get_python"]["response"] - json_camelized = humps.camelize(json) # as returned by the backend - - # Act - m = model.Model.from_response_json(copy.deepcopy(json_camelized)) - - # Assert - assert isinstance(m, list) - assert len(m) == 1 - - m = m[0] - m_json = json["items"][0] - - self.assert_model(mocker, m, m_json, MODEL.FRAMEWORK_PYTHON) - - def test_from_response_json_list(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["model"]["get_list"]["response"] - json_camelized = humps.camelize(json) # as returned by the backend - - # Act - m_lst = model.Model.from_response_json(copy.deepcopy(json_camelized)) - - # Assert - assert isinstance(m_lst, list) - assert len(m_lst) == 2 - - for i in range(len(m_lst)): - m = m_lst[i] - m_json = json["items"][i] - self.assert_model(mocker, m, m_json, MODEL.FRAMEWORK_PYTHON) - - # constructor - - def test_constructor_base(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["model"]["get_base"]["response"]["items"][0] - m_json = copy.deepcopy(json) - id = m_json.pop("id") - name = m_json.pop("name") - - # Act - m = model.Model(id=id, name=name, **m_json) - - # Assert - self.assert_model(mocker, m, json, None) - - def test_constructor_python(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["model"]["get_python"]["response"]["items"][0] - m_json = copy.deepcopy(json) - id = m_json.pop("id") - name = m_json.pop("name") - - # Act - m = model.Model(id=id, name=name, **m_json) - - # Assert - self.assert_model(mocker, m, json, MODEL.FRAMEWORK_PYTHON) - - def test_constructor_sklearn(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["model"]["get_sklearn"]["response"]["items"][0] - m_json = copy.deepcopy(json) - id = m_json.pop("id") - name = m_json.pop("name") - - # Act - m = model.Model(id=id, name=name, **m_json) - - # Assert - self.assert_model(mocker, m, json, MODEL.FRAMEWORK_SKLEARN) - - def test_constructor_tensorflow(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["model"]["get_tensorflow"]["response"]["items"][0] - m_json = copy.deepcopy(json) - id = m_json.pop("id") - name = m_json.pop("name") - - # Act - m = model.Model(id=id, name=name, **m_json) - - # Assert - self.assert_model(mocker, m, json, MODEL.FRAMEWORK_TENSORFLOW) - - def test_constructor_torch(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["model"]["get_torch"]["response"]["items"][0] - m_json = copy.deepcopy(json) - id = m_json.pop("id") - name = m_json.pop("name") - - # Act - m = model.Model(id=id, name=name, **m_json) - - # Assert - self.assert_model(mocker, m, json, MODEL.FRAMEWORK_TORCH) - - # save - - def test_save(self, mocker, backend_fixtures): - # Arrange - m_json = backend_fixtures["model"]["get_python"]["response"]["items"][0] - mock_model_engine_save = mocker.patch( - "hsml.engine.model_engine.ModelEngine.save" - ) - upload_configuration = {"config": "value"} - - # Act - m = model.Model.from_response_json(m_json) - m.save( - model_path="model_path", - await_registration=1234, - keep_original_files=True, - upload_configuration=upload_configuration, - ) - - # Assert - mock_model_engine_save.assert_called_once_with( - model_instance=m, - model_path="model_path", - await_registration=1234, - keep_original_files=True, - upload_configuration=upload_configuration, - ) - - # deploy - - def test_deploy(self, mocker, backend_fixtures): - # Arrange - m_json = backend_fixtures["model"]["get_python"]["response"]["items"][0] - p_json = backend_fixtures["predictor"]["get_deployments_singleton"]["response"][ - "items" - ][0] - mock_predictor = mocker.Mock() - mock_predictor_for_model = mocker.patch( - "hsml.predictor.Predictor.for_model", return_value=mock_predictor - ) - # params - resources = copy.deepcopy(p_json["predictor_resources"]) - inference_logger = { - "mode": p_json["inference_logging"], - "kafka_topic": copy.deepcopy(p_json["kafka_topic_dto"]), - } - inference_batcher = copy.deepcopy(p_json["batching_configuration"]) - transformer = { - "script_file": p_json["transformer"], - "resources": copy.deepcopy(p_json["transformer_resources"]), - } - - # Act - m = model.Model.from_response_json(m_json) - m.deploy( - name=p_json["name"], - description=p_json["description"], - artifact_version=p_json["artifact_version"], - serving_tool=p_json["serving_tool"], - script_file=p_json["predictor"], - resources=resources, - inference_logger=inference_logger, - inference_batcher=inference_batcher, - transformer=transformer, - api_protocol=p_json["api_protocol"], - environment=p_json["environment_dto"]["name"], - ) - - # Assert - mock_predictor_for_model.assert_called_once_with( - m, - name=p_json["name"], - description=p_json["description"], - artifact_version=p_json["artifact_version"], - serving_tool=p_json["serving_tool"], - script_file=p_json["predictor"], - resources=resources, - inference_logger=inference_logger, - inference_batcher=inference_batcher, - transformer=transformer, - api_protocol=p_json["api_protocol"], - environment=p_json["environment_dto"]["name"], - ) - mock_predictor.deploy.assert_called_once() - - # delete - - def test_delete(self, mocker, backend_fixtures): - # Arrange - m_json = backend_fixtures["model"]["get_python"]["response"]["items"][0] - mock_model_engine_delete = mocker.patch( - "hsml.engine.model_engine.ModelEngine.delete" - ) - - # Act - m = model.Model.from_response_json(m_json) - m.delete() - - # Assert - mock_model_engine_delete.assert_called_once_with(model_instance=m) - - # download - - def test_download(self, mocker, backend_fixtures): - # Arrange - m_json = backend_fixtures["model"]["get_python"]["response"]["items"][0] - mock_model_engine_download = mocker.patch( - "hsml.engine.model_engine.ModelEngine.download" - ) - - # Act - m = model.Model.from_response_json(m_json) - m.download() - - # Assert - mock_model_engine_download.assert_called_once_with(model_instance=m) - - # tags - - def test_get_tag(self, mocker, backend_fixtures): - # Arrange - m_json = backend_fixtures["model"]["get_python"]["response"]["items"][0] - mock_model_engine_get_tag = mocker.patch( - "hsml.engine.model_engine.ModelEngine.get_tag" - ) - - # Act - m = model.Model.from_response_json(m_json) - m.get_tag("tag_name") - - # Assert - mock_model_engine_get_tag.assert_called_once_with( - model_instance=m, name="tag_name" - ) - - def test_get_tags(self, mocker, backend_fixtures): - # Arrange - m_json = backend_fixtures["model"]["get_python"]["response"]["items"][0] - mock_model_engine_get_tags = mocker.patch( - "hsml.engine.model_engine.ModelEngine.get_tags" - ) - - # Act - m = model.Model.from_response_json(m_json) - m.get_tags() - - # Assert - mock_model_engine_get_tags.assert_called_once_with(model_instance=m) - - def test_set_tag(self, mocker, backend_fixtures): - # Arrange - m_json = backend_fixtures["model"]["get_python"]["response"]["items"][0] - mock_model_engine_set_tag = mocker.patch( - "hsml.engine.model_engine.ModelEngine.set_tag" - ) - - # Act - m = model.Model.from_response_json(m_json) - m.set_tag("tag_name", "tag_value") - - # Assert - mock_model_engine_set_tag.assert_called_once_with( - model_instance=m, name="tag_name", value="tag_value" - ) - - def test_delete_tag(self, mocker, backend_fixtures): - # Arrange - m_json = backend_fixtures["model"]["get_python"]["response"]["items"][0] - mock_model_engine_delete_tag = mocker.patch( - "hsml.engine.model_engine.ModelEngine.delete_tag" - ) - - # Act - m = model.Model.from_response_json(m_json) - m.delete_tag("tag_name") - - # Assert - mock_model_engine_delete_tag.assert_called_once_with( - model_instance=m, name="tag_name" - ) - - # get url - - def test_get_url(self, mocker, backend_fixtures): - # Arrange - m_json = backend_fixtures["model"]["get_python"]["response"]["items"][0] - - class ClientMock: - _project_id = 1 - - mock_client_get_instance = mocker.patch( - "hsml.client.get_instance", return_value=ClientMock() - ) - mock_util_get_hostname_replaced_url = mocker.patch( - "hsml.util.get_hostname_replaced_url", return_value="full_path" - ) - path_arg = "/p/1/models/" + m_json["name"] + "/" + str(m_json["version"]) - - # Act - m = model.Model.from_response_json(m_json) - url = m.get_url() - - # Assert - assert url == "full_path" - mock_client_get_instance.assert_called_once() - mock_util_get_hostname_replaced_url.assert_called_once_with(sub_path=path_arg) - - # auxiliary methods - def assert_model(self, mocker, m, m_json, model_framework): - assert isinstance(m, model.Model) - assert m.id == m_json["id"] - assert m.name == m_json["name"] - assert m.version == m_json["version"] - assert m.created == m_json["created"] - assert m.creator == m_json["creator"] - assert m.description == m_json["description"] - assert m.experiment_id == m_json["experiment_id"] - assert m.project_name == m_json["project_name"] - assert m.experiment_project_name == m_json["experiment_project_name"] - assert m.training_metrics == m_json["metrics"] - assert m._user_full_name == m_json["user_full_name"] - assert m.training_dataset == m_json["training_dataset"] - assert m.model_registry_id == m_json["model_registry_id"] - - if model_framework is None: - assert m.framework is None - else: - assert m.framework == model_framework - - mock_read_json = mocker.patch( - "hsml.engine.model_engine.ModelEngine.read_json", - return_value="input_example_content", - ) - assert m.input_example == "input_example_content" - mock_read_json.assert_called_once_with( - model_instance=m, resource=m_json["input_example"] - ) - - mock_read_json = mocker.patch( - "hsml.engine.model_engine.ModelEngine.read_json", - return_value="model_schema_content", - ) - assert m.model_schema == "model_schema_content" - mock_read_json.assert_called_once_with( - model_instance=m, resource=m_json["model_schema"] - ) - - mock_read_file = mocker.patch( - "hsml.engine.model_engine.ModelEngine.read_file", - return_value="program_file_content", - ) - assert m.program == "program_file_content" - mock_read_file.assert_called_once_with( - model_instance=m, resource=m_json["program"] - ) - - mock_read_file = mocker.patch( - "hsml.engine.model_engine.ModelEngine.read_file", - return_value="env_file_content", - ) - assert m.environment == "env_file_content" - mock_read_file.assert_called_once_with( - model_instance=m, resource=m_json["environment"] - ) - - def test_get_feature_view(self, mocker): - mock_fv = mocker.Mock() - links = explicit_provenance.Links(accessible=[mock_fv]) - mock_fv_provenance = mocker.patch( - "hsml.model.Model.get_feature_view_provenance", return_value=links - ) - mock_td_provenance = mocker.patch( - "hsml.model.Model.get_training_dataset_provenance", return_value=links - ) - mocker.patch("os.environ", return_value={}) - m = model.Model(1, "test") - m.get_feature_view() - mock_fv_provenance.assert_called_once() - mock_td_provenance.assert_called_once() - assert not mock_fv.init_serving.called - assert not mock_fv.init_batch_scoring.called - - def test_get_feature_view_online(self, mocker): - mock_fv = mocker.Mock() - links = explicit_provenance.Links(accessible=[mock_fv]) - mock_fv_provenance = mocker.patch( - "hsml.model.Model.get_feature_view_provenance", return_value=links - ) - mock_td_provenance = mocker.patch( - "hsml.model.Model.get_training_dataset_provenance", return_value=links - ) - mocker.patch("os.environ", return_value={}) - m = model.Model(1, "test") - m.get_feature_view(online=True) - mock_fv_provenance.assert_called_once() - mock_td_provenance.assert_called_once() - assert mock_fv.init_serving.called - assert not mock_fv.init_batch_scoring.called - - def test_get_feature_view_batch(self, mocker): - mock_fv = mocker.Mock() - links = explicit_provenance.Links(accessible=[mock_fv]) - mock_fv_provenance = mocker.patch( - "hsml.model.Model.get_feature_view_provenance", return_value=links - ) - mock_td_provenance = mocker.patch( - "hsml.model.Model.get_training_dataset_provenance", return_value=links - ) - mocker.patch("os.environ", return_value={}) - m = model.Model(1, "test") - m.get_feature_view(online=False) - mock_fv_provenance.assert_called_once() - mock_td_provenance.assert_called_once() - assert not mock_fv.init_serving.called - assert mock_fv.init_batch_scoring.called - - def test_get_feature_view_deployment(self, mocker): - mock_fv = mocker.Mock() - links = explicit_provenance.Links(accessible=[mock_fv]) - mock_fv_provenance = mocker.patch( - "hsml.model.Model.get_feature_view_provenance", return_value=links - ) - mock_td_provenance = mocker.patch( - "hsml.model.Model.get_training_dataset_provenance", return_value=links - ) - mocker.patch.dict(os.environ, {"DEPLOYMENT_NAME": "test"}) - m = model.Model(1, "test") - m.get_feature_view() - mock_fv_provenance.assert_called_once() - mock_td_provenance.assert_called_once() - assert mock_fv.init_serving.called - assert not mock_fv.init_batch_scoring.called diff --git a/hsml/python/tests/test_model_schema.py b/hsml/python/tests/test_model_schema.py deleted file mode 100644 index 826975f9e..000000000 --- a/hsml/python/tests/test_model_schema.py +++ /dev/null @@ -1,30 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - -from hsml import model_schema - - -class TestModelSchema: - # constructor - - def test_constructor(self): - # Act - msch = model_schema.ModelSchema(input_schema="1234", output_schema="4321") - - # Assert - assert msch.input_schema == "1234" - assert msch.output_schema == "4321" diff --git a/hsml/python/tests/test_predictor.py b/hsml/python/tests/test_predictor.py deleted file mode 100644 index e2e5485fc..000000000 --- a/hsml/python/tests/test_predictor.py +++ /dev/null @@ -1,709 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import copy - -import pytest -from hsml import ( - inference_batcher, - inference_logger, - predictor, - resources, - transformer, - util, -) -from hsml.constants import MODEL, PREDICTOR, RESOURCES - - -SERVING_RESOURCE_LIMITS = {"cores": 2, "memory": 1024, "gpus": 2} -SERVING_NUM_INSTANCES_NO_LIMIT = [-1] -SERVING_NUM_INSTANCES_SCALE_TO_ZERO = [0] -SERVING_NUM_INSTANCES_ONE = [0] - - -class TestPredictor: - # from response json - - def test_from_response_json_empty(self, mocker, backend_fixtures): - # Arrange - self._mock_serving_variables(mocker, SERVING_NUM_INSTANCES_NO_LIMIT) - json = backend_fixtures["predictor"]["get_deployments_empty"]["response"] - - # Act - pred = predictor.Predictor.from_response_json(json) - - # Assert - assert isinstance(pred, list) - assert len(pred) == 0 - - def test_from_response_json_singleton(self, mocker, backend_fixtures): - # Arrange - self._mock_serving_variables(mocker, SERVING_NUM_INSTANCES_NO_LIMIT) - json = backend_fixtures["predictor"]["get_deployments_singleton"]["response"] - - # Act - pred = predictor.Predictor.from_response_json(json) - - # Assert - assert isinstance(pred, list) - assert len(pred) == 1 - - p = pred[0] - p_json = json["items"][0] - - assert isinstance(p, predictor.Predictor) - assert p.id == p_json["id"] - assert p.name == p_json["name"] - assert p.description == p_json["description"] - assert p.created_at == p_json["created"] - assert p.creator == p_json["creator"] - assert p.model_path == p_json["model_path"] - assert p.model_name == p_json["model_name"] - assert p.model_version == p_json["model_version"] - assert p.model_framework == p_json["model_framework"] - assert p.model_server == p_json["model_server"] - assert p.serving_tool == p_json["serving_tool"] - assert p.api_protocol == p_json["api_protocol"] - assert p.artifact_version == p_json["artifact_version"] - assert p.environment == p_json["environment_dto"]["name"] - assert p.script_file == p_json["predictor"] - assert isinstance(p.resources, resources.PredictorResources) - assert isinstance(p.transformer, transformer.Transformer) - assert p.transformer.script_file == p_json["transformer"] - assert isinstance(p.transformer.resources, resources.TransformerResources) - assert isinstance(p.inference_logger, inference_logger.InferenceLogger) - assert p.inference_logger.mode == p_json["inference_logging"] - assert isinstance(p.inference_batcher, inference_batcher.InferenceBatcher) - assert p.inference_batcher.enabled == bool( - p_json["batching_configuration"]["batching_enabled"] - ) - - def test_from_response_json_list(self, mocker, backend_fixtures): - # Arrange - self._mock_serving_variables(mocker, SERVING_NUM_INSTANCES_NO_LIMIT) - json = backend_fixtures["predictor"]["get_deployments_list"]["response"] - - # Act - pred = predictor.Predictor.from_response_json(json) - - # Assert - assert isinstance(pred, list) - assert len(pred) == 2 - - for i in range(len(pred)): - p = pred[i] - p_json = json["items"][i] - - assert isinstance(p, predictor.Predictor) - assert p.id == p_json["id"] - assert p.name == p_json["name"] - assert p.description == p_json["description"] - assert p.created_at == p_json["created"] - assert p.creator == p_json["creator"] - assert p.model_path == p_json["model_path"] - assert p.model_name == p_json["model_name"] - assert p.model_version == p_json["model_version"] - assert p.model_framework == p_json["model_framework"] - assert p.model_server == p_json["model_server"] - assert p.serving_tool == p_json["serving_tool"] - assert p.api_protocol == p_json["api_protocol"] - assert p.environment == p_json["environment_dto"]["name"] - assert p.artifact_version == p_json["artifact_version"] - assert p.script_file == p_json["predictor"] - assert isinstance(p.resources, resources.PredictorResources) - assert isinstance(p.transformer, transformer.Transformer) - assert p.transformer.script_file == p_json["transformer"] - assert isinstance(p.transformer.resources, resources.TransformerResources) - assert isinstance(p.inference_logger, inference_logger.InferenceLogger) - assert p.inference_logger.mode == p_json["inference_logging"] - assert isinstance(p.inference_batcher, inference_batcher.InferenceBatcher) - assert p.inference_batcher.enabled == bool( - p_json["batching_configuration"]["batching_enabled"] - ) - - def test_from_response_json_single(self, mocker, backend_fixtures): - # Arrange - self._mock_serving_variables(mocker, SERVING_NUM_INSTANCES_NO_LIMIT) - p_json = backend_fixtures["predictor"]["get_deployments_singleton"]["response"][ - "items" - ][0] - - # Act - p = predictor.Predictor.from_response_json(p_json) - - # Assert - assert isinstance(p, predictor.Predictor) - assert p.id == p_json["id"] - assert p.name == p_json["name"] - assert p.description == p_json["description"] - assert p.created_at == p_json["created"] - assert p.creator == p_json["creator"] - assert p.model_path == p_json["model_path"] - assert p.model_version == p_json["model_version"] - assert p.model_name == p_json["model_name"] - assert p.model_framework == p_json["model_framework"] - assert p.model_server == p_json["model_server"] - assert p.serving_tool == p_json["serving_tool"] - assert p.api_protocol == p_json["api_protocol"] - assert p.environment == p_json["environment_dto"]["name"] - assert p.artifact_version == p_json["artifact_version"] - assert p.script_file == p_json["predictor"] - assert isinstance(p.resources, resources.PredictorResources) - assert isinstance(p.transformer, transformer.Transformer) - assert p.transformer.script_file == p_json["transformer"] - assert isinstance(p.transformer.resources, resources.TransformerResources) - assert isinstance(p.inference_logger, inference_logger.InferenceLogger) - assert p.inference_logger.mode == p_json["inference_logging"] - assert isinstance(p.inference_batcher, inference_batcher.InferenceBatcher) - assert p.inference_batcher.enabled == bool( - p_json["batching_configuration"]["batching_enabled"] - ) - - # constructor - - def test_constructor(self, mocker, backend_fixtures): - # Arrange - self._mock_serving_variables(mocker, SERVING_NUM_INSTANCES_NO_LIMIT) - p_json = backend_fixtures["predictor"]["get_deployments_singleton"]["response"][ - "items" - ][0] - mock_validate_serving_tool = mocker.patch( - "hsml.predictor.Predictor._validate_serving_tool", - return_value=p_json["serving_tool"], - ) - mock_resources = util.get_obj_from_json( - copy.deepcopy(p_json["predictor_resources"]), resources.PredictorResources - ) - mock_validate_resources = mocker.patch( - "hsml.predictor.Predictor._validate_resources", - return_value=mock_resources, - ) - mock_validate_script_file = mocker.patch( - "hsml.predictor.Predictor._validate_script_file", - return_value=p_json["predictor"], - ) - - # Act - p = predictor.Predictor( - id=p_json["id"], - name=p_json["name"], - description=p_json["description"], - created_at=p_json["created"], - creator=p_json["creator"], - model_path=p_json["model_path"], - model_version=p_json["model_version"], - model_name=p_json["model_name"], - model_framework=p_json["model_framework"], - model_server=p_json["model_server"], - serving_tool=p_json["serving_tool"], - api_protocol=p_json["api_protocol"], - environment=p_json["environment_dto"]["name"], - artifact_version=p_json["artifact_version"], - script_file=p_json["predictor"], - resources=p_json["predictor_resources"], - transformer={ - "script_file": p_json["transformer"], - "resources": copy.deepcopy(p_json["transformer_resources"]), - }, - inference_logger={ - "mode": p_json["inference_logging"], - "kafka_topic": copy.deepcopy(p_json["kafka_topic_dto"]), - }, - inference_batcher=copy.deepcopy(p_json["batching_configuration"]), - ) - - # Assert - assert p.id == p_json["id"] - assert p.name == p_json["name"] - assert p.description == p_json["description"] - assert p.created_at == p_json["created"] - assert p.creator == p_json["creator"] - assert p.model_path == p_json["model_path"] - assert p.model_name == p_json["model_name"] - assert p.model_version == p_json["model_version"] - assert p.model_framework == p_json["model_framework"] - assert p.model_server == p_json["model_server"] - assert p.serving_tool == p_json["serving_tool"] - assert p.api_protocol == p_json["api_protocol"] - assert p.environment == p_json["environment_dto"]["name"] - assert p.artifact_version == p_json["artifact_version"] - assert p.script_file == p_json["predictor"] - assert isinstance(p.resources, resources.PredictorResources) - assert isinstance(p.transformer, transformer.Transformer) - assert p.transformer.script_file == p_json["transformer"] - assert isinstance(p.transformer.resources, resources.TransformerResources) - assert isinstance(p.inference_logger, inference_logger.InferenceLogger) - assert p.inference_logger.mode == p_json["inference_logging"] - assert isinstance(p.inference_batcher, inference_batcher.InferenceBatcher) - assert p.inference_batcher.enabled == bool( - p_json["batching_configuration"]["batching_enabled"] - ) - mock_validate_serving_tool.assert_called_once_with(p_json["serving_tool"]) - assert mock_validate_resources.call_count == 1 - mock_validate_script_file.assert_called_once_with( - p_json["model_framework"], p_json["predictor"] - ) - - # validate serving tool - - def test_validate_serving_tool_none(self): - # Act - st = predictor.Predictor._validate_serving_tool(None) - - # Assert - assert st is None - - def test_validate_serving_tool_valid(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, is_saas_connection=False - ) - - # Act - st = predictor.Predictor._validate_serving_tool(PREDICTOR.SERVING_TOOL_DEFAULT) - - # Assert - assert st == PREDICTOR.SERVING_TOOL_DEFAULT - - def test_validate_serving_tool_invalid(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, is_saas_connection=False - ) - - # Act - with pytest.raises(ValueError) as e_info: - _ = predictor.Predictor._validate_serving_tool("INVALID_NAME") - - # Assert - assert "is not valid" in str(e_info.value) - - def test_validate_serving_tool_valid_saas_connection(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, is_saas_connection=True - ) - - # Act - st = predictor.Predictor._validate_serving_tool(PREDICTOR.SERVING_TOOL_KSERVE) - - # Assert - assert st == PREDICTOR.SERVING_TOOL_KSERVE - - def test_validate_serving_tool_invalid_saas_connection(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, is_saas_connection=True - ) - - # Act - with pytest.raises(ValueError) as e_info: - _ = predictor.Predictor._validate_serving_tool( - PREDICTOR.SERVING_TOOL_DEFAULT - ) - - # Assert - assert "KServe deployments are the only supported" in str(e_info.value) - - # validate script file - - def test_validate_script_file_tf_none(self): - # Act - predictor.Predictor._validate_script_file(MODEL.FRAMEWORK_TENSORFLOW, None) - - def test_validate_script_file_sk_none(self): - # Act - predictor.Predictor._validate_script_file(MODEL.FRAMEWORK_SKLEARN, None) - - def test_validate_script_file_th_none(self): - # Act - predictor.Predictor._validate_script_file(MODEL.FRAMEWORK_TORCH, None) - - def test_validate_script_file_py_none(self): - # Act - with pytest.raises(ValueError) as e_info: - _ = predictor.Predictor._validate_script_file(MODEL.FRAMEWORK_PYTHON, None) - - # Assert - assert "Predictor scripts are required" in str(e_info.value) - - def test_validate_script_file_tf_script_file(self): - # Act - predictor.Predictor._validate_script_file( - MODEL.FRAMEWORK_TENSORFLOW, "script_file" - ) - - def test_validate_script_file_sk_script_file(self): - # Act - predictor.Predictor._validate_script_file( - MODEL.FRAMEWORK_SKLEARN, "script_file" - ) - - def test_validate_script_file_th_script_file(self): - # Act - predictor.Predictor._validate_script_file(MODEL.FRAMEWORK_TORCH, "script_file") - - def test_validate_script_file_py_script_file(self): - # Act - predictor.Predictor._validate_script_file(MODEL.FRAMEWORK_PYTHON, "script_file") - - # infer model server - - def test_infer_model_server_tf(self): - # Act - ms = predictor.Predictor._infer_model_server(MODEL.FRAMEWORK_TENSORFLOW) - - # Assert - assert ms == PREDICTOR.MODEL_SERVER_TF_SERVING - - def test_infer_model_server_sk(self): - # Act - ms = predictor.Predictor._infer_model_server(MODEL.FRAMEWORK_SKLEARN) - - # Assert - assert ms == PREDICTOR.MODEL_SERVER_PYTHON - - def test_infer_model_server_th(self): - # Act - ms = predictor.Predictor._infer_model_server(MODEL.FRAMEWORK_TORCH) - - # Assert - assert ms == PREDICTOR.MODEL_SERVER_PYTHON - - def test_infer_model_server_py(self): - # Act - ms = predictor.Predictor._infer_model_server(MODEL.FRAMEWORK_PYTHON) - - # Assert - assert ms == PREDICTOR.MODEL_SERVER_PYTHON - - # default serving tool - - def test_get_default_serving_tool_kserve_installed(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, is_kserve_installed=True - ) - - # Act - st = predictor.Predictor._get_default_serving_tool() - - # Assert - assert st == PREDICTOR.SERVING_TOOL_KSERVE - - def test_get_default_serving_tool_kserve_not_installed(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, is_kserve_installed=False - ) - - # Act - st = predictor.Predictor._get_default_serving_tool() - - # Assert - assert st == PREDICTOR.SERVING_TOOL_DEFAULT - - # validate resources - - def test_validate_resources_none_non_kserve(self): - # Act - res = predictor.Predictor._validate_resources( - None, PREDICTOR.SERVING_TOOL_DEFAULT - ) - - # Assert - assert res is None - - def test_validate_resources_none_kserve(self): - # Act - res = predictor.Predictor._validate_resources( - None, PREDICTOR.SERVING_TOOL_KSERVE - ) - - # Assert - assert res is None - - def test_validate_resources_num_instances_zero_non_kserve(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=False - ) - pr = resources.PredictorResources(num_instances=0) - - # Act - res = predictor.Predictor._validate_resources( - pr, PREDICTOR.SERVING_TOOL_DEFAULT - ) - - # Assert - assert res == pr - - def test_validate_resources_num_instances_zero_kserve(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=False - ) - pr = resources.PredictorResources(num_instances=0) - - # Act - res = predictor.Predictor._validate_resources(pr, PREDICTOR.SERVING_TOOL_KSERVE) - - # Assert - assert res == pr - - def test_validate_resources_num_instances_one_without_scale_to_zero_non_kserve( - self, mocker - ): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=False - ) - pr = resources.PredictorResources(num_instances=1) - - # Act - res = predictor.Predictor._validate_resources( - pr, PREDICTOR.SERVING_TOOL_DEFAULT - ) - - # Assert - assert res == pr - - def test_validate_resources_num_instances_one_without_scale_to_zero_kserve( - self, mocker - ): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=False - ) - pr = resources.PredictorResources(num_instances=1) - - # Act - res = predictor.Predictor._validate_resources(pr, PREDICTOR.SERVING_TOOL_KSERVE) - - # Assert - assert res == pr - - def test_validate_resources_num_instances_one_with_scale_to_zero_non_kserve( - self, mocker - ): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=True - ) - pr = resources.PredictorResources(num_instances=1) - - # Act - res = predictor.Predictor._validate_resources( - pr, PREDICTOR.SERVING_TOOL_DEFAULT - ) - - # Assert - assert res == pr - - def test_validate_resources_num_instances_one_with_scale_to_zero_kserve( - self, mocker - ): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=True - ) - pr = resources.PredictorResources(num_instances=1) - - # Act - with pytest.raises(ValueError) as e_info: - _ = predictor.Predictor._validate_resources( - pr, PREDICTOR.SERVING_TOOL_KSERVE - ) - - # Assert - assert "Scale-to-zero is required" in str(e_info.value) - - # default resources - - def test_get_default_resources_non_kserve_without_scale_to_zero(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=False - ) - - # Act - res = predictor.Predictor._get_default_resources(PREDICTOR.SERVING_TOOL_DEFAULT) - - # Assert - assert isinstance(res, resources.PredictorResources) - assert res.num_instances == RESOURCES.MIN_NUM_INSTANCES - - def test_get_default_resources_non_kserve_with_scale_to_zero(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=True - ) - - # Act - res = predictor.Predictor._get_default_resources(PREDICTOR.SERVING_TOOL_DEFAULT) - - # Assert - assert isinstance(res, resources.PredictorResources) - assert res.num_instances == RESOURCES.MIN_NUM_INSTANCES - - def test_get_default_resources_kserve_without_scale_to_zero(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=False - ) - - # Act - res = predictor.Predictor._get_default_resources(PREDICTOR.SERVING_TOOL_KSERVE) - - # Assert - assert isinstance(res, resources.PredictorResources) - assert res.num_instances == RESOURCES.MIN_NUM_INSTANCES - - def test_get_default_resources_kserve_with_scale_to_zero(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=True - ) - - # Act - res = predictor.Predictor._get_default_resources(PREDICTOR.SERVING_TOOL_KSERVE) - - # Assert - assert isinstance(res, resources.PredictorResources) - assert res.num_instances == 0 - - # for model - - def test_for_model(self, mocker): - # Arrange - def spec(model, model_name, model_version, model_path): - pass - - mock_get_predictor_for_model = mocker.patch( - "hsml.util.get_predictor_for_model", return_value=True, spec=spec - ) - - class MockModel: - name = "model_name" - version = "model_version" - model_path = "model_path" - - mock_model = MockModel() - - # Act - predictor.Predictor.for_model(mock_model) - - # Assert - mock_get_predictor_for_model.assert_called_once_with( - model=mock_model, - model_name=mock_model.name, - model_version=mock_model.version, - model_path=mock_model.model_path, - ) - - # extract fields from json - - def extract_fields_from_json(self, mocker, backend_fixtures): - # Arrange - self._mock_serving_variables(mocker, SERVING_NUM_INSTANCES_NO_LIMIT) - p_json = backend_fixtures["predictor"]["get_deployments_singleton"]["response"][ - "items" - ][0] - - # Act - kwargs = predictor.Predictor.extract_fields_from_json(p_json) - - # Assert - assert kwargs["id"] == p_json["id"] - assert kwargs["name"] == p_json["name"] - assert kwargs["description"] == p_json["description"] - assert kwargs["created_at"] == p_json["created"] - assert kwargs["creator"] == p_json["creator"] - assert kwargs["model_name"] == p_json["model_name"] - assert kwargs["model_path"] == p_json["model_path"] - assert kwargs["model_version"] == p_json["model_version"] - assert kwargs["model_framework"] == p_json["model_framework"] - assert kwargs["artifact_version"] == p_json["artifact_version"] - assert kwargs["model_server"] == p_json["model_server"] - assert kwargs["serving_tool"] == p_json["serving_tool"] - assert kwargs["script_file"] == p_json["predictor"] - assert isinstance(kwargs["resources"], resources.PredictorResources) - assert isinstance(kwargs["inference_logger"], inference_logger.InferenceLogger) - assert kwargs["inference_logger"].mode == p_json["inference_logging"] - assert isinstance( - kwargs["inference_batcher"], inference_batcher.InferenceBatcher - ) - assert kwargs["inference_batcher"].enabled == bool( - p_json["batching_configuration"]["batching_enabled"] - ) - assert kwargs["api_protocol"] == p_json["api_protocol"] - assert kwargs["environment"] == p_json["environment_dto"]["name"] - assert isinstance(kwargs["transformer"], transformer.Transformer) - assert kwargs["transformer"].script_file == p_json["transformer"] - assert isinstance( - kwargs["transformer"].resources, resources.TransformerResources - ) - - # deploy - - def test_deploy(self, mocker, backend_fixtures): - # Arrange - self._mock_serving_variables(mocker, SERVING_NUM_INSTANCES_NO_LIMIT) - p_json = backend_fixtures["predictor"]["get_deployments_singleton"]["response"][ - "items" - ][0] - mock_deployment_init = mocker.patch( - "hsml.deployment.Deployment.__init__", return_value=None - ) - mock_deployment_save = mocker.patch("hsml.deployment.Deployment.save") - - # Act - - p = predictor.Predictor.from_response_json(p_json) - p.deploy() - - # Assert - mock_deployment_init.assert_called_once_with( - predictor=p, - name=p.name, - description=p.description, - ) - mock_deployment_save.assert_called_once() - - # auxiliary methods - - def _mock_serving_variables( - self, - mocker, - num_instances, - force_scale_to_zero=False, - is_saas_connection=False, - is_kserve_installed=True, - ): - mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=SERVING_RESOURCE_LIMITS, - ) - mocker.patch( - "hsml.client.get_serving_num_instances_limits", return_value=num_instances - ) - mocker.patch( - "hsml.client.is_scale_to_zero_required", return_value=force_scale_to_zero - ) - mocker.patch("hsml.client.is_saas_connection", return_value=is_saas_connection) - mocker.patch( - "hsml.client.is_kserve_installed", return_value=is_kserve_installed - ) diff --git a/hsml/python/tests/test_predictor_state.py b/hsml/python/tests/test_predictor_state.py deleted file mode 100644 index c9feabcc5..000000000 --- a/hsml/python/tests/test_predictor_state.py +++ /dev/null @@ -1,126 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import copy - -import humps -from hsml import predictor_state, predictor_state_condition - - -class TestPredictorState: - # from response json - - def test_from_response_json(self, backend_fixtures): - # Arrange - json = backend_fixtures["predictor"]["get_deployment_predictor_state"][ - "response" - ] - json_camelized = humps.camelize(json) # as returned by the backend - - # Act - ps = predictor_state.PredictorState.from_response_json(json_camelized) - - # Assert - assert isinstance(ps, predictor_state.PredictorState) - assert ps.available_predictor_instances == json["available_instances"] - assert ( - ps.available_transformer_instances - == json["available_transformer_instances"] - ) - assert ps.hopsworks_inference_path == json["hopsworks_inference_path"] - assert ps.model_server_inference_path == json["model_server_inference_path"] - assert ps.internal_port == json["internal_port"] - assert ps.revision == json["revision"] - assert ps.deployed == json["deployed"] - assert isinstance( - ps.condition, predictor_state_condition.PredictorStateCondition - ) - assert ps.condition.status == json["condition"]["status"] - assert ps.status == json["status"] - - # constructor - - def test_constructor(self, backend_fixtures): - # Arrange - json = backend_fixtures["predictor"]["get_deployment_predictor_state"][ - "response" - ] - - # Act - ps = predictor_state.PredictorState( - available_predictor_instances=json["available_instances"], - available_transformer_instances=json["available_transformer_instances"], - hopsworks_inference_path=json["hopsworks_inference_path"], - model_server_inference_path=json["model_server_inference_path"], - internal_port=json["internal_port"], - revision=json["revision"], - deployed=json["deployed"], - condition=predictor_state_condition.PredictorStateCondition( - **copy.deepcopy(json["condition"]) - ), - status=json["status"], - ) - - # Assert - assert isinstance(ps, predictor_state.PredictorState) - assert ps.available_predictor_instances == json["available_instances"] - assert ( - ps.available_transformer_instances - == json["available_transformer_instances"] - ) - assert ps.hopsworks_inference_path == json["hopsworks_inference_path"] - assert ps.model_server_inference_path == json["model_server_inference_path"] - assert ps.internal_port == json["internal_port"] - assert ps.revision == json["revision"] - assert ps.deployed == json["deployed"] - assert isinstance( - ps.condition, predictor_state_condition.PredictorStateCondition - ) - assert ps.condition.status == json["condition"]["status"] - assert ps.status == json["status"] - - # extract fields from json - - def test_extract_fields_from_json(self, backend_fixtures): - # Arrange - json = backend_fixtures["predictor"]["get_deployment_predictor_state"][ - "response" - ] - - # Act - ( - ai, - ati, - hip, - msip, - ipt, - r, - d, - c, - s, - ) = predictor_state.PredictorState.extract_fields_from_json(copy.deepcopy(json)) - - # Assert - assert ai == json["available_instances"] - assert ati == json["available_transformer_instances"] - assert hip == json["hopsworks_inference_path"] - assert msip == json["model_server_inference_path"] - assert ipt == json["internal_port"] - assert r == json["revision"] - assert d == json["deployed"] - assert isinstance(c, predictor_state_condition.PredictorStateCondition) - assert c.status == json["condition"]["status"] - assert s == json["status"] diff --git a/hsml/python/tests/test_predictor_state_condition.py b/hsml/python/tests/test_predictor_state_condition.py deleted file mode 100644 index 5b0387f97..000000000 --- a/hsml/python/tests/test_predictor_state_condition.py +++ /dev/null @@ -1,81 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import copy - -import humps -from hsml import predictor_state_condition - - -class TestPredictorStateCondition: - # from response json - - def test_from_response_json(self, backend_fixtures): - # Arrange - json = backend_fixtures["predictor"]["get_deployment_predictor_state"][ - "response" - ]["condition"] - json_camelized = humps.camelize(json) # as returned by the backend - - # Act - psc = predictor_state_condition.PredictorStateCondition.from_response_json( - json_camelized - ) - - # Assert - assert isinstance(psc, predictor_state_condition.PredictorStateCondition) - assert psc.type == json["type"] - assert psc.status == json["status"] - assert psc.reason == json["reason"] - - # constructor - - def test_constructor(self, backend_fixtures): - # Arrange - json = backend_fixtures["predictor"]["get_deployment_predictor_state"][ - "response" - ]["condition"] - - # Act - psc = predictor_state_condition.PredictorStateCondition( - type=json["type"], status=json["status"], reason=json["reason"] - ) - - # Assert - assert isinstance(psc, predictor_state_condition.PredictorStateCondition) - assert psc.type == json["type"] - assert psc.status == json["status"] - assert psc.reason == json["reason"] - - # extract fields from json - - def test_extract_fields_from_json(self, backend_fixtures): - # Arrange - json = backend_fixtures["predictor"]["get_deployment_predictor_state"][ - "response" - ]["condition"] - - # Act - kwargs = ( - predictor_state_condition.PredictorStateCondition.extract_fields_from_json( - copy.deepcopy(json) - ) - ) - - # Assert - assert kwargs["type"] == json["type"] - assert kwargs["status"] == json["status"] - assert kwargs["reason"] == json["reason"] diff --git a/hsml/python/tests/test_resources.py b/hsml/python/tests/test_resources.py deleted file mode 100644 index f77863b38..000000000 --- a/hsml/python/tests/test_resources.py +++ /dev/null @@ -1,928 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import copy - -import pytest -from hsml import resources -from hsml.constants import RESOURCES -from mock import call - - -SERVING_RESOURCE_LIMITS = {"cores": 2, "memory": 516, "gpus": 2} - - -class TestResources: - # Resources - - def test_from_response_json_cpus(self, backend_fixtures): - # Arrange - json = backend_fixtures["resources"]["get_only_cores"]["response"] - - # Act - r = resources.Resources.from_response_json(json) - - # Assert - assert r.cores == json["cores"] - assert r.memory is None - assert r.gpus is None - - def test_from_response_json_memory(self, backend_fixtures): - # Arrange - json = backend_fixtures["resources"]["get_only_memory"]["response"] - - # Act - r = resources.Resources.from_response_json(json) - - # Assert - assert r.cores is None - assert r.memory is json["memory"] - assert r.gpus is None - - def test_from_response_json_gpus(self, backend_fixtures): - # Arrange - json = backend_fixtures["resources"]["get_only_gpus"]["response"] - - # Act - r = resources.Resources.from_response_json(json) - - # Assert - assert r.cores is None - assert r.memory is None - assert r.gpus == json["gpus"] - - def test_from_response_json_cores_and_memory(self, backend_fixtures): - # Arrange - json = backend_fixtures["resources"]["get_cores_and_memory"]["response"] - - # Act - r = resources.Resources.from_response_json(json) - - # Assert - assert r.cores == json["cores"] - assert r.memory == json["memory"] - assert r.gpus is None - - def test_from_response_json_cores_and_gpus(self, backend_fixtures): - # Arrange - json = backend_fixtures["resources"]["get_cores_and_gpus"]["response"] - - # Act - r = resources.Resources.from_response_json(json) - - # Assert - assert r.cores == json["cores"] - assert r.memory is None - assert r.gpus == json["gpus"] - - def test_from_response_json_memory_and_gpus(self, backend_fixtures): - # Arrange - json = backend_fixtures["resources"]["get_memory_and_gpus"]["response"] - - # Act - r = resources.Resources.from_response_json(json) - - # Assert - assert r.cores is None - assert r.memory == json["memory"] - assert r.gpus == json["gpus"] - - def test_from_response_json_cores_memory_and_gpus(self, backend_fixtures): - # Arrange - json = backend_fixtures["resources"]["get_cores_memory_and_gpus"]["response"] - - # Act - r = resources.Resources.from_response_json(json) - - # Assert - assert r.cores == json["cores"] - assert r.memory == json["memory"] - assert r.gpus == json["gpus"] - - # ComponentResources - - # - from response json - - def test_from_response_json_component_resources(self, mocker): - # Arrange - res = {"something": "here"} - json_decamelized = {"key": "value"} - mock_humps_decamelize = mocker.patch( - "humps.decamelize", return_value=json_decamelized - ) - mock_from_json = mocker.patch( - "hsml.resources.ComponentResources.from_json", - return_value="from_json_result", - ) - - # Act - result = resources.ComponentResources.from_response_json(res) - - # Assert - assert result == "from_json_result" - mock_humps_decamelize.assert_called_once_with(res) - mock_from_json.assert_called_once_with(json_decamelized) - - # - constructor - - def test_constructor_component_resources_default(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["resources"][ - "get_component_resources_num_instances_requests_and_limits" - ]["response"] - mock_default_resource_limits = mocker.patch( - "hsml.resources.ComponentResources._get_default_resource_limits", - return_value=(0, 1, 2), - ) - mock_fill_missing_resources = mocker.patch( - "hsml.resources.ComponentResources._fill_missing_resources" - ) - mock_validate_resources = mocker.patch( - "hsml.resources.ComponentResources._validate_resources" - ) - mock_resources_init = mocker.patch( - "hsml.resources.Resources.__init__", return_value=None - ) - - # Act - pr = resources.PredictorResources(num_instances=json["num_instances"]) - - # Assert - assert pr.num_instances == json["num_instances"] - assert mock_default_resource_limits.call_count == 2 - assert mock_fill_missing_resources.call_count == 2 - assert ( - mock_fill_missing_resources.call_args_list[0][0][1] == RESOURCES.MIN_CORES - ) - assert ( - mock_fill_missing_resources.call_args_list[0][0][2] == RESOURCES.MIN_MEMORY - ) - assert mock_fill_missing_resources.call_args_list[0][0][3] == RESOURCES.MIN_GPUS - assert mock_fill_missing_resources.call_args_list[1][0][1] == 0 - assert mock_fill_missing_resources.call_args_list[1][0][2] == 1 - assert mock_fill_missing_resources.call_args_list[1][0][3] == 2 - mock_validate_resources.assert_called_once_with(pr._requests, pr._limits) - expected_calls = [ - call(RESOURCES.MIN_CORES, RESOURCES.MIN_MEMORY, RESOURCES.MIN_GPUS), - call(0, 1, 2), - ] - mock_resources_init.assert_has_calls(expected_calls) - - def test_constructor_component_resources(self, mocker, backend_fixtures): - # Arrange - json = backend_fixtures["resources"][ - "get_component_resources_num_instances_requests_and_limits" - ]["response"] - mock_util_get_obj_from_json = mocker.patch( - "hsml.util.get_obj_from_json", - side_effect=[json["requests"], json["limits"]], - ) - mock_default_resource_limits = mocker.patch( - "hsml.resources.ComponentResources._get_default_resource_limits", - return_value=(0, 1, 2), - ) - mock_fill_missing_resources = mocker.patch( - "hsml.resources.ComponentResources._fill_missing_resources" - ) - mock_validate_resources = mocker.patch( - "hsml.resources.ComponentResources._validate_resources" - ) - - # Act - pr = resources.PredictorResources( - num_instances=json["num_instances"], - requests=json["requests"], - limits=json["limits"], - ) - - # Assert - assert pr.num_instances == json["num_instances"] - assert pr.requests == json["requests"] - assert pr.limits == json["limits"] - mock_default_resource_limits.assert_called_once() - assert mock_fill_missing_resources.call_count == 2 - assert ( - mock_fill_missing_resources.call_args_list[0][0][1] == RESOURCES.MIN_CORES - ) - assert ( - mock_fill_missing_resources.call_args_list[0][0][2] == RESOURCES.MIN_MEMORY - ) - assert mock_fill_missing_resources.call_args_list[0][0][3] == RESOURCES.MIN_GPUS - assert mock_fill_missing_resources.call_args_list[1][0][1] == 0 - assert mock_fill_missing_resources.call_args_list[1][0][2] == 1 - assert mock_fill_missing_resources.call_args_list[1][0][3] == 2 - mock_validate_resources.assert_called_once_with(pr._requests, pr._limits) - assert mock_util_get_obj_from_json.call_count == 2 - expected_calls = [ - call(json["requests"], resources.Resources), - call(json["limits"], resources.Resources), - ] - mock_util_get_obj_from_json.assert_has_calls(expected_calls) - - # - extract fields from json - - def test_extract_fields_from_json_component_resources_with_key( - self, backend_fixtures - ): - # Arrange - json = backend_fixtures["resources"][ - "get_component_resources_requested_instances_and_predictor_resources" - ]["response"] - copy_json = copy.deepcopy(json) - resources.ComponentResources.RESOURCES_CONFIG_KEY = "predictor_resources" - resources.ComponentResources.NUM_INSTANCES_KEY = "requested_instances" - - # Act - kwargs = resources.ComponentResources.extract_fields_from_json(copy_json) - - # Assert - assert kwargs["num_instances"] == json["requested_instances"] - assert isinstance(kwargs["requests"], resources.Resources) - assert ( - kwargs["requests"].cores == json["predictor_resources"]["requests"]["cores"] - ) - assert ( - kwargs["requests"].memory - == json["predictor_resources"]["requests"]["memory"] - ) - assert ( - kwargs["requests"].gpus == json["predictor_resources"]["requests"]["gpus"] - ) - assert isinstance(kwargs["limits"], resources.Resources) - assert kwargs["limits"].cores == json["predictor_resources"]["limits"]["cores"] - assert ( - kwargs["limits"].memory == json["predictor_resources"]["limits"]["memory"] - ) - assert kwargs["limits"].gpus == json["predictor_resources"]["limits"]["gpus"] - - def test_extract_fields_from_json_component_resources( - self, mocker, backend_fixtures - ): - # Arrange - json = backend_fixtures["resources"][ - "get_component_resources_requested_instances_and_predictor_resources_alternative" - ]["response"] - copy_json = copy.deepcopy(json) - resources.ComponentResources.RESOURCES_CONFIG_KEY = "predictor_resources" - resources.ComponentResources.NUM_INSTANCES_KEY = "requested_instances" - - # Act - kwargs = resources.ComponentResources.extract_fields_from_json(copy_json) - - # Assert - assert kwargs["num_instances"] == json["num_instances"] - assert isinstance(kwargs["requests"], resources.Resources) - assert kwargs["requests"].cores == json["resources"]["requests"]["cores"] - assert kwargs["requests"].memory == json["resources"]["requests"]["memory"] - assert kwargs["requests"].gpus == json["resources"]["requests"]["gpus"] - assert isinstance(kwargs["limits"], resources.Resources) - assert kwargs["limits"].cores == json["resources"]["limits"]["cores"] - assert kwargs["limits"].memory == json["resources"]["limits"]["memory"] - assert kwargs["limits"].gpus == json["resources"]["limits"]["gpus"] - - def test_extract_fields_from_json_component_resources_flatten( - self, backend_fixtures - ): - # Arrange - json = backend_fixtures["resources"][ - "get_component_resources_num_instances_requests_and_limits" - ]["response"] - copy_json = copy.deepcopy(json) - resources.ComponentResources.RESOURCES_CONFIG_KEY = "predictor_resources" - resources.ComponentResources.NUM_INSTANCES_KEY = "requested_instances" - - # Act - kwargs = resources.ComponentResources.extract_fields_from_json(copy_json) - - # Assert - assert kwargs["num_instances"] == json["num_instances"] - assert isinstance(kwargs["requests"], resources.Resources) - assert kwargs["requests"].cores == json["requests"]["cores"] - assert kwargs["requests"].memory == json["requests"]["memory"] - assert kwargs["requests"].gpus == json["requests"]["gpus"] - assert isinstance(kwargs["limits"], resources.Resources) - assert kwargs["limits"].cores == json["limits"]["cores"] - assert kwargs["limits"].memory == json["limits"]["memory"] - assert kwargs["limits"].gpus == json["limits"]["gpus"] - - # - fill missing dependencies - - def test_fill_missing_dependencies_none(self, mocker): - # Arrange - class MockResources: - cores = None - memory = None - gpus = None - - mock_resource = MockResources() - - # Act - resources.ComponentResources._fill_missing_resources(mock_resource, 10, 11, 12) - - # Assert - assert mock_resource.cores == 10 - assert mock_resource.memory == 11 - assert mock_resource.gpus == 12 - - def test_fill_missing_dependencies_all(self, mocker): - # Arrange - class MockResources: - cores = 1 - memory = 2 - gpus = 3 - - mock_resource = MockResources() - - # Act - resources.ComponentResources._fill_missing_resources(mock_resource, 10, 11, 12) - - # Assert - assert mock_resource.cores == 1 - assert mock_resource.memory == 2 - assert mock_resource.gpus == 3 - - def test_fill_missing_dependencies_some(self, mocker): - # Arrange - class MockResources: - cores = 1 - memory = None - gpus = None - - mock_resource = MockResources() - - # Act - resources.ComponentResources._fill_missing_resources(mock_resource, 10, 11, 12) - - # Assert - assert mock_resource.cores == 1 - assert mock_resource.memory == 11 - assert mock_resource.gpus == 12 - - # - get default resource limits - - def test_get_default_resource_limits_no_hard_limit_and_lower_than_default( - self, mocker - ): - # Arrange - no_limit_res = {"cores": -1, "memory": -1, "gpus": -1} - mock_comp_res = mocker.MagicMock() - mock_comp_res._requests = resources.Resources(cores=0.2, memory=516, gpus=0) - mock_comp_res._default_resource_limits = ( - resources.ComponentResources._get_default_resource_limits - ) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=no_limit_res, # no upper limit - ) - - # Act - cores, memory, gpus = mock_comp_res._default_resource_limits(mock_comp_res) - - # Assert - assert cores == RESOURCES.MAX_CORES - assert memory == RESOURCES.MAX_MEMORY - assert gpus == RESOURCES.MAX_GPUS - mock_get_serving_res_limits.assert_called_once() - - def test_get_default_resource_limits_no_hard_limit_and_higher_than_default( - self, mocker - ): - # Arrange - no_limit_res = {"cores": -1, "memory": -1, "gpus": -1} - mock_comp_res = mocker.MagicMock() - mock_comp_res._requests = resources.Resources(cores=4, memory=2048, gpus=2) - mock_comp_res._default_resource_limits = ( - resources.ComponentResources._get_default_resource_limits - ) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=no_limit_res, # no upper limit - ) - - # Act - cores, memory, gpus = mock_comp_res._default_resource_limits(mock_comp_res) - - # Assert - assert cores == mock_comp_res._requests.cores - assert memory == mock_comp_res._requests.memory - assert gpus == mock_comp_res._requests.gpus - mock_get_serving_res_limits.assert_called_once() - - def test_get_default_resource_limits_with_higher_hard_limit_and_lower_than_default( - self, mocker - ): - # Arrange - hard_limit_res = {"cores": 3, "memory": 3072, "gpus": 3} - mock_comp_res = mocker.MagicMock() - mock_comp_res._requests = resources.Resources(cores=1, memory=516, gpus=0) - mock_comp_res._default_resource_limits = ( - resources.ComponentResources._get_default_resource_limits - ) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=hard_limit_res, # upper limit - ) - - # Act - cores, memory, gpus = mock_comp_res._default_resource_limits(mock_comp_res) - - # Assert - assert cores == RESOURCES.MAX_CORES - assert memory == RESOURCES.MAX_MEMORY - assert gpus == RESOURCES.MAX_GPUS - mock_get_serving_res_limits.assert_called_once() - - def test_get_default_resource_limits_with_higher_hard_limit_and_higher_than_default( - self, mocker - ): - # Arrange - hard_limit_res = {"cores": 3, "memory": 3072, "gpus": 3} - mock_comp_res = mocker.MagicMock() - mock_comp_res._requests = resources.Resources(cores=3, memory=2048, gpus=1) - mock_comp_res._default_resource_limits = ( - resources.ComponentResources._get_default_resource_limits - ) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=hard_limit_res, # upper limit - ) - - # Act - cores, memory, gpus = mock_comp_res._default_resource_limits(mock_comp_res) - - # Assert - assert cores == hard_limit_res["cores"] - assert memory == hard_limit_res["memory"] - assert gpus == hard_limit_res["gpus"] - mock_get_serving_res_limits.assert_called_once() - - def test_get_default_resource_limits_with_lower_hard_limit_and_lower_than_default( - self, mocker - ): - # Arrange - RESOURCES.MAX_GPUS = 1 # override default - hard_limit_res = {"cores": 1, "memory": 516, "gpus": 0} - mock_comp_res = mocker.MagicMock() - mock_comp_res._requests = resources.Resources(cores=0.5, memory=256, gpus=0.5) - mock_comp_res._default_resource_limits = ( - resources.ComponentResources._get_default_resource_limits - ) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=hard_limit_res, # upper limit - ) - - # Act - cores, memory, gpus = mock_comp_res._default_resource_limits(mock_comp_res) - - # Assert - assert cores == hard_limit_res["cores"] - assert memory == hard_limit_res["memory"] - assert gpus == hard_limit_res["gpus"] - mock_get_serving_res_limits.assert_called_once() - - def test_get_default_resource_limits_with_lower_hard_limit_and_higher_than_default( - self, mocker - ): - # Arrange - RESOURCES.MAX_GPUS = 1 # override default - hard_limit_res = {"cores": 1, "memory": 516, "gpus": 0} - mock_comp_res = mocker.MagicMock() - mock_comp_res._requests = resources.Resources(cores=4, memory=4080, gpus=4) - mock_comp_res._default_resource_limits = ( - resources.ComponentResources._get_default_resource_limits - ) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=hard_limit_res, # upper limit - ) - - # Act - cores, memory, gpus = mock_comp_res._default_resource_limits(mock_comp_res) - - # Assert - assert cores == hard_limit_res["cores"] - assert memory == hard_limit_res["memory"] - assert gpus == hard_limit_res["gpus"] - mock_get_serving_res_limits.assert_called_once() - - # - validate resources - - def test_validate_resources_no_hard_limits_valid_resources(self, mocker): - # Arrange - no_limit_res = {"cores": -1, "memory": -1, "gpus": -1} - requests = resources.Resources(cores=1, memory=1024, gpus=0) - limits = resources.Resources(cores=2, memory=2048, gpus=1) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=no_limit_res, # upper limit - ) - - # Act - resources.ComponentResources._validate_resources(requests, limits) - - # Assert - mock_get_serving_res_limits.assert_called_once() - - def test_validate_resources_no_hard_limit_invalid_cores_request(self, mocker): - # Arrange - no_limit_res = {"cores": -1, "memory": -1, "gpus": -1} - requests = resources.Resources(cores=0, memory=1024, gpus=0) - limits = resources.Resources(cores=2, memory=2048, gpus=1) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=no_limit_res, # upper limit - ) - - # Act - with pytest.raises(ValueError) as e_info: - resources.ComponentResources._validate_resources(requests, limits) - - # Assert - mock_get_serving_res_limits.assert_called_once() - assert "Requested number of cores must be greater than 0 cores." in str( - e_info.value - ) - - def test_validate_resources_no_hard_limit_invalid_memory_request(self, mocker): - # Arrange - no_limit_res = {"cores": -1, "memory": -1, "gpus": -1} - requests = resources.Resources(cores=1, memory=0, gpus=0) - limits = resources.Resources(cores=2, memory=2048, gpus=1) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=no_limit_res, # upper limit - ) - - # Act - with pytest.raises(ValueError) as e_info: - resources.ComponentResources._validate_resources(requests, limits) - - # Assert - mock_get_serving_res_limits.assert_called_once() - assert "Requested memory resources must be greater than 0 MB." in str( - e_info.value - ) - - def test_validate_resources_no_hard_limit_invalid_gpus_request(self, mocker): - # Arrange - no_limit_res = {"cores": -1, "memory": -1, "gpus": -1} - requests = resources.Resources( - cores=1, memory=1024, gpus=-1 - ) # 0 gpus is accepted - limits = resources.Resources(cores=2, memory=2048, gpus=1) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=no_limit_res, # upper limit - ) - - # Act - with pytest.raises(ValueError) as e_info: - resources.ComponentResources._validate_resources(requests, limits) - - # Assert - mock_get_serving_res_limits.assert_called_once() - assert ( - "Requested number of gpus must be greater than or equal to 0 gpus." - in str(e_info.value) - ) - - def test_validate_resources_no_hard_limit_cores_request_out_of_range(self, mocker): - # Arrange - no_limit_res = {"cores": -1, "memory": -1, "gpus": -1} - requests = resources.Resources(cores=2, memory=1024, gpus=0) - limits = resources.Resources(cores=1, memory=2048, gpus=1) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=no_limit_res, # upper limit - ) - - # Act - with pytest.raises(ValueError) as e_info: - resources.ComponentResources._validate_resources(requests, limits) - - # Assert - mock_get_serving_res_limits.assert_called_once() - assert ( - f"Requested number of cores cannot exceed the limit of {str(limits.cores)} cores." - in str(e_info.value) - ) - - def test_validate_resources_no_hard_limit_invalid_memory_request_out_of_range( - self, mocker - ): - # Arrange - no_limit_res = {"cores": -1, "memory": -1, "gpus": -1} - requests = resources.Resources(cores=1, memory=2048, gpus=0) - limits = resources.Resources(cores=2, memory=1024, gpus=1) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=no_limit_res, # upper limit - ) - - # Act - with pytest.raises(ValueError) as e_info: - resources.ComponentResources._validate_resources(requests, limits) - - # Assert - mock_get_serving_res_limits.assert_called_once() - assert ( - f"Requested memory resources cannot exceed the limit of {str(limits.memory)} MB." - in str(e_info.value) - ) - - def test_validate_resources_no_hard_limit_invalid_gpus_request_out_of_range( - self, mocker - ): - # Arrange - no_limit_res = {"cores": -1, "memory": -1, "gpus": -1} - requests = resources.Resources(cores=1, memory=1024, gpus=2) - limits = resources.Resources(cores=2, memory=2048, gpus=1) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=no_limit_res, # upper limit - ) - - # Act - with pytest.raises(ValueError) as e_info: - resources.ComponentResources._validate_resources(requests, limits) - - # Assert - mock_get_serving_res_limits.assert_called_once() - assert ( - f"Requested number of gpus cannot exceed the limit of {str(limits.gpus)} gpus." - in str(e_info.value) - ) - - def test_validate_resources_with_hard_limit_valid_resources(self, mocker): - # Arrange - hard_limit_res = {"cores": 3, "memory": 3072, "gpus": 3} - requests = resources.Resources(cores=1, memory=1024, gpus=0) - limits = resources.Resources(cores=2, memory=2048, gpus=1) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=hard_limit_res, # upper limit - ) - - # Act - resources.ComponentResources._validate_resources(requests, limits) - - # Assert - mock_get_serving_res_limits.assert_called_once() - - def test_validate_resources_with_hard_limit_invalid_cores_limit(self, mocker): - # Arrange - hard_limit_res = {"cores": 3, "memory": 3072, "gpus": 3} - requests = resources.Resources(cores=2, memory=1024, gpus=0) - limits = resources.Resources(cores=0, memory=2048, gpus=1) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=hard_limit_res, # upper limit - ) - - # Act - with pytest.raises(ValueError) as e_info: - resources.ComponentResources._validate_resources(requests, limits) - - # Assert - mock_get_serving_res_limits.assert_called_once() - assert "Limit number of cores must be greater than 0 cores." in str( - e_info.value - ) - - def test_validate_resources_with_hard_limit_invalid_memory_limit(self, mocker): - # Arrange - hard_limit_res = {"cores": 3, "memory": 3072, "gpus": 3} - requests = resources.Resources(cores=2, memory=1024, gpus=0) - limits = resources.Resources(cores=1, memory=0, gpus=1) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=hard_limit_res, # upper limit - ) - - # Act - with pytest.raises(ValueError) as e_info: - resources.ComponentResources._validate_resources(requests, limits) - - # Assert - mock_get_serving_res_limits.assert_called_once() - assert "Limit memory resources must be greater than 0 MB." in str(e_info.value) - - def test_validate_resources_with_hard_limit_invalid_gpus_limit(self, mocker): - # Arrange - hard_limit_res = {"cores": 3, "memory": 3072, "gpus": 3} - requests = resources.Resources(cores=2, memory=1024, gpus=0) - limits = resources.Resources(cores=1, memory=2048, gpus=-1) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=hard_limit_res, # upper limit - ) - - # Act - with pytest.raises(ValueError) as e_info: - resources.ComponentResources._validate_resources(requests, limits) - - # Assert - mock_get_serving_res_limits.assert_called_once() - assert "Limit number of gpus must be greater than or equal to 0 gpus." in str( - e_info.value - ) - - def test_validate_resources_with_hard_limit_invalid_cores_request(self, mocker): - # Arrange - hard_limit_res = {"cores": 3, "memory": 3072, "gpus": 3} - requests = resources.Resources(cores=2, memory=1024, gpus=0) - limits = resources.Resources(cores=4, memory=2048, gpus=1) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=hard_limit_res, # upper limit - ) - - # Act - with pytest.raises(ValueError) as e_info: - resources.ComponentResources._validate_resources(requests, limits) - - # Assert - mock_get_serving_res_limits.assert_called_once() - assert ( - f"Limit number of cores cannot exceed the maximum of {hard_limit_res['cores']} cores." - in str(e_info.value) - ) - - def test_validate_resources_with_hard_limit_invalid_memory_request(self, mocker): - # Arrange - hard_limit_res = {"cores": 3, "memory": 3072, "gpus": 3} - requests = resources.Resources(cores=2, memory=1024, gpus=0) - limits = resources.Resources(cores=3, memory=4076, gpus=1) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=hard_limit_res, # upper limit - ) - - # Act - with pytest.raises(ValueError) as e_info: - resources.ComponentResources._validate_resources(requests, limits) - - # Assert - mock_get_serving_res_limits.assert_called_once() - assert ( - f"Limit memory resources cannot exceed the maximum of {hard_limit_res['memory']} MB." - in str(e_info.value) - ) - - def test_validate_resources_with_hard_limit_invalid_gpus_request(self, mocker): - # Arrange - hard_limit_res = {"cores": 3, "memory": 3072, "gpus": 3} - requests = resources.Resources(cores=2, memory=1024, gpus=0) - limits = resources.Resources(cores=3, memory=2048, gpus=4) - mock_get_serving_res_limits = mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=hard_limit_res, # upper limit - ) - - # Act - with pytest.raises(ValueError) as e_info: - resources.ComponentResources._validate_resources(requests, limits) - - # Assert - mock_get_serving_res_limits.assert_called_once() - assert ( - f"Limit number of gpus cannot exceed the maximum of {hard_limit_res['gpus']} gpus." - in str(e_info.value) - ) - - # PredictorResources - - def test_from_response_json_predictor_resources(self, mocker, backend_fixtures): - mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=SERVING_RESOURCE_LIMITS, - ) - json = backend_fixtures["resources"][ - "get_component_resources_num_instances_requests_and_limits" - ]["response"] - - # Act - r = resources.PredictorResources.from_response_json(json) - - # Assert - assert r.num_instances == json["num_instances"] - assert r.requests.cores == json["requests"]["cores"] - assert r.requests.memory == json["requests"]["memory"] - assert r.requests.gpus == json["requests"]["gpus"] - assert r.limits.cores == json["limits"]["cores"] - assert r.limits.memory == json["limits"]["memory"] - assert r.limits.gpus == json["limits"]["gpus"] - - def test_from_response_json_predictor_resources_specific_keys( - self, mocker, backend_fixtures - ): - mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=SERVING_RESOURCE_LIMITS, - ) - json = backend_fixtures["resources"][ - "get_component_resources_requested_instances_and_predictor_resources" - ]["response"] - - # Act - r = resources.PredictorResources.from_response_json(json) - - # Assert - assert r.num_instances == json["requested_instances"] - assert r.requests.cores == json["predictor_resources"]["requests"]["cores"] - assert r.requests.memory == json["predictor_resources"]["requests"]["memory"] - assert r.requests.gpus == json["predictor_resources"]["requests"]["gpus"] - assert r.limits.cores == json["predictor_resources"]["limits"]["cores"] - assert r.limits.memory == json["predictor_resources"]["limits"]["memory"] - assert r.limits.gpus == json["predictor_resources"]["limits"]["gpus"] - - # TransformerResources - - def test_from_response_json_transformer_resources(self, mocker, backend_fixtures): - mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=SERVING_RESOURCE_LIMITS, - ) - json = backend_fixtures["resources"][ - "get_component_resources_num_instances_requests_and_limits" - ]["response"] - - # Act - r = resources.TransformerResources.from_response_json(json) - - # Assert - assert r.num_instances == json["num_instances"] - assert r.requests.cores == json["requests"]["cores"] - assert r.requests.memory == json["requests"]["memory"] - assert r.requests.gpus == json["requests"]["gpus"] - assert r.limits.cores == json["limits"]["cores"] - assert r.limits.memory == json["limits"]["memory"] - assert r.limits.gpus == json["limits"]["gpus"] - - def test_from_response_json_transformer_resources_specific_keys( - self, mocker, backend_fixtures - ): - mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=SERVING_RESOURCE_LIMITS, - ) - json = backend_fixtures["resources"][ - "get_component_resources_requested_instances_and_transformer_resources" - ]["response"] - - # Act - r = resources.TransformerResources.from_response_json(json) - - # Assert - assert r.num_instances == json["requested_transformer_instances"] - assert r.requests.cores == json["transformer_resources"]["requests"]["cores"] - assert r.requests.memory == json["transformer_resources"]["requests"]["memory"] - assert r.requests.gpus == json["transformer_resources"]["requests"]["gpus"] - assert r.limits.cores == json["transformer_resources"]["limits"]["cores"] - assert r.limits.memory == json["transformer_resources"]["limits"]["memory"] - assert r.limits.gpus == json["transformer_resources"]["limits"]["gpus"] - - def test_from_response_json_transformer_resources_default_limits( - self, mocker, backend_fixtures - ): - mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=SERVING_RESOURCE_LIMITS, - ) - mocker.patch( - "hsml.resources.ComponentResources._get_default_resource_limits", - return_value=( - SERVING_RESOURCE_LIMITS["cores"], - SERVING_RESOURCE_LIMITS["memory"], - SERVING_RESOURCE_LIMITS["gpus"], - ), - ) - json = backend_fixtures["resources"][ - "get_component_resources_num_instances_and_requests" - ]["response"] - - # Act - r = resources.TransformerResources.from_response_json(json) - - # Assert - assert r.num_instances == json["num_instances"] - assert r.requests.cores == json["requests"]["cores"] - assert r.requests.memory == json["requests"]["memory"] - assert r.requests.gpus == json["requests"]["gpus"] - assert r.limits.cores == SERVING_RESOURCE_LIMITS["cores"] - assert r.limits.memory == SERVING_RESOURCE_LIMITS["memory"] - assert r.limits.gpus == SERVING_RESOURCE_LIMITS["gpus"] diff --git a/hsml/python/tests/test_schema.py b/hsml/python/tests/test_schema.py deleted file mode 100644 index 69ddd0782..000000000 --- a/hsml/python/tests/test_schema.py +++ /dev/null @@ -1,199 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -from hsml import schema - - -class TestSchema: - # constructor - - def test_constructor_default(self, mocker): - # Arrange - mock_tensor = mocker.MagicMock() - mock_tensor.tensors = mocker.MagicMock(return_value="tensor_schema") - mock_columnar = mocker.MagicMock() - mock_columnar.columns = mocker.MagicMock(return_value="columnar_schema") - mock_convert_tensor_to_schema = mocker.patch( - "hsml.schema.Schema._convert_tensor_to_schema", return_value=mock_tensor - ) - mock_convert_columnar_to_schema = mocker.patch( - "hsml.schema.Schema._convert_columnar_to_schema", - return_value=mock_columnar, - ) - - # Act - s = schema.Schema() - - # Assert - assert s.columnar_schema == mock_columnar.columns - assert not hasattr(s, "tensor_schema") - mock_convert_tensor_to_schema.assert_not_called() - mock_convert_columnar_to_schema.assert_called_once_with(None) - - def test_constructor_numpy(self, mocker): - # Arrange - obj = np.array([]) - mock_tensor = mocker.MagicMock() - mock_tensor.tensors = mocker.MagicMock(return_value="tensor_schema") - mock_columnar = mocker.MagicMock() - mock_columnar.columns = mocker.MagicMock(return_value="columnar_schema") - mock_convert_tensor_to_schema = mocker.patch( - "hsml.schema.Schema._convert_tensor_to_schema", return_value=mock_tensor - ) - mock_convert_columnar_to_schema = mocker.patch( - "hsml.schema.Schema._convert_columnar_to_schema", - return_value=mock_columnar, - ) - - # Act - s = schema.Schema(obj) - - # Assert - assert s.tensor_schema == mock_tensor.tensors - assert not hasattr(s, "columnar_schema") - mock_convert_columnar_to_schema.assert_not_called() - mock_convert_tensor_to_schema.assert_called_once_with(obj) - - def test_constructor_tensor_list(self, mocker): - # Arrange - obj = [{"shape": "some_shape"}] - mock_tensor = mocker.MagicMock() - mock_tensor.tensors = mocker.MagicMock(return_value="tensor_schema") - mock_columnar = mocker.MagicMock() - mock_columnar.columns = mocker.MagicMock(return_value="columnar_schema") - mock_convert_tensor_to_schema = mocker.patch( - "hsml.schema.Schema._convert_tensor_to_schema", return_value=mock_tensor - ) - mock_convert_columnar_to_schema = mocker.patch( - "hsml.schema.Schema._convert_columnar_to_schema", - return_value=mock_columnar, - ) - - # Act - s = schema.Schema(obj) - - # Assert - assert s.tensor_schema == mock_tensor.tensors - assert not hasattr(s, "columnar_schema") - mock_convert_columnar_to_schema.assert_not_called() - mock_convert_tensor_to_schema.assert_called_once_with(obj) - - def test_constructor_column_list(self, mocker): - # Arrange - obj = [{"no_shape": "nothing"}] - mock_tensor = mocker.MagicMock() - mock_tensor.tensors = mocker.MagicMock(return_value="tensor_schema") - mock_columnar = mocker.MagicMock() - mock_columnar.columns = mocker.MagicMock(return_value="columnar_schema") - mock_convert_tensor_to_schema = mocker.patch( - "hsml.schema.Schema._convert_tensor_to_schema", return_value=mock_tensor - ) - mock_convert_columnar_to_schema = mocker.patch( - "hsml.schema.Schema._convert_columnar_to_schema", - return_value=mock_columnar, - ) - - # Act - s = schema.Schema(obj) - - # Assert - assert s.columnar_schema == mock_columnar.columns - assert not hasattr(s, "tensor_schema") - mock_convert_tensor_to_schema.assert_not_called() - mock_convert_columnar_to_schema.assert_called_once_with(obj) - - # convert to schema - - def test_convert_columnar_to_schema(self, mocker): - # Arrange - obj = {"key": "value"} - mock_columnar_schema_init = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema.__init__", - return_value=None, - ) - mock_schema = mocker.MagicMock() - mock_schema._convert_columnar_to_schema = ( - schema.Schema._convert_columnar_to_schema - ) - - # Act - ret = mock_schema._convert_columnar_to_schema(mock_schema, obj) - - # Assert - assert isinstance(ret, schema.ColumnarSchema) - mock_columnar_schema_init.assert_called_once_with(obj) - - def test_convert_tensor_to_schema(self, mocker): - # Arrange - obj = {"key": "value"} - mock_tensor_schema_init = mocker.patch( - "hsml.utils.schema.tensor_schema.TensorSchema.__init__", - return_value=None, - ) - mock_schema = mocker.MagicMock() - mock_schema._convert_tensor_to_schema = schema.Schema._convert_tensor_to_schema - - # Act - ret = mock_schema._convert_tensor_to_schema(mock_schema, obj) - - # Assert - assert isinstance(ret, schema.TensorSchema) - mock_tensor_schema_init.assert_called_once_with(obj) - - # get type - - def test_get_type_none(self, mocker): - # Arrange - class MockSchema: - pass - - mock_schema = MockSchema() - mock_schema._get_type = schema.Schema._get_type - - # Act - t = mock_schema._get_type(mock_schema) - - # Assert - assert t is None - - def test_get_type_tensor(self, mocker): - # Arrange - class MockSchema: - tensor_schema = {} - - mock_schema = MockSchema() - mock_schema._get_type = schema.Schema._get_type - - # Act - t = mock_schema._get_type(mock_schema) - - # Assert - assert t == "tensor" - - def test_get_type_columnar(self, mocker): - # Arrange - class MockSchema: - columnar_schema = {} - - mock_schema = MockSchema() - mock_schema._get_type = schema.Schema._get_type - - # Act - t = mock_schema._get_type(mock_schema) - - # Assert - assert t == "columnar" diff --git a/hsml/python/tests/test_tag.py b/hsml/python/tests/test_tag.py deleted file mode 100644 index 7a12955ac..000000000 --- a/hsml/python/tests/test_tag.py +++ /dev/null @@ -1,62 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import humps -from hsml import tag - - -class TestTag: - # from response json - - def test_from_response_json(self, backend_fixtures): - # Arrange - json = backend_fixtures["tag"]["get"]["response"] - json_camelized = humps.camelize(json) - - # Act - t_list = tag.Tag.from_response_json(json_camelized) - - # Assert - assert len(t_list) == 1 - t = t_list[0] - assert t.name == "test_name" - assert t.value == "test_value" - - def test_from_response_json_empty(self, backend_fixtures): - # Arrange - json = backend_fixtures["tag"]["get_empty"]["response"] - json_camelized = humps.camelize(json) - - # Act - t_list = tag.Tag.from_response_json(json_camelized) - - # Assert - assert len(t_list) == 0 - - # constructor - - def test_constructor(self, backend_fixtures): - # Arrange - json = backend_fixtures["tag"]["get"]["response"]["items"][0] - tag_name = json.pop("name") - tag_value = json.pop("value") - - # Act - t = tag.Tag(name=tag_name, value=tag_value, **json) - - # Assert - assert t.name == "test_name" - assert t.value == "test_value" diff --git a/hsml/python/tests/test_transformer.py b/hsml/python/tests/test_transformer.py deleted file mode 100644 index 7df302bd6..000000000 --- a/hsml/python/tests/test_transformer.py +++ /dev/null @@ -1,309 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import copy - -import pytest -from hsml import resources, transformer -from hsml.constants import RESOURCES - - -SERVING_RESOURCE_LIMITS = {"cores": 2, "memory": 1024, "gpus": 2} -SERVING_NUM_INSTANCES_NO_LIMIT = [-1] -SERVING_NUM_INSTANCES_SCALE_TO_ZERO = [0] -SERVING_NUM_INSTANCES_ONE = [0] - - -class TestTransformer: - # from response json - - def test_from_response_json_with_transformer_field(self, mocker, backend_fixtures): - # Arrange - self._mock_serving_variables(mocker, SERVING_NUM_INSTANCES_NO_LIMIT) - json = backend_fixtures["transformer"]["get_deployment_with_transformer"][ - "response" - ] - - # Act - t = transformer.Transformer.from_response_json(json) - - # Assert - assert isinstance(t, transformer.Transformer) - assert t.script_file == json["transformer"] - - tr_resources = json["transformer_resources"] - assert ( - t.resources.num_instances == tr_resources["requested_transformer_instances"] - ) - assert t.resources.requests.cores == tr_resources["requests"]["cores"] - assert t.resources.requests.memory == tr_resources["requests"]["memory"] - assert t.resources.requests.gpus == tr_resources["requests"]["gpus"] - assert t.resources.limits.cores == tr_resources["limits"]["cores"] - assert t.resources.limits.memory == tr_resources["limits"]["memory"] - assert t.resources.limits.gpus == tr_resources["limits"]["gpus"] - - def test_from_response_json_with_script_file_field(self, mocker, backend_fixtures): - # Arrange - self._mock_serving_variables(mocker, SERVING_NUM_INSTANCES_NO_LIMIT) - json = backend_fixtures["transformer"]["get_transformer_with_resources"][ - "response" - ] - - # Act - t = transformer.Transformer.from_response_json(json) - - # Assert - assert isinstance(t, transformer.Transformer) - assert t.script_file == json["script_file"] - - tr_resources = json["resources"] - assert t.resources.num_instances == tr_resources["num_instances"] - assert t.resources.requests.cores == tr_resources["requests"]["cores"] - assert t.resources.requests.memory == tr_resources["requests"]["memory"] - assert t.resources.requests.gpus == tr_resources["requests"]["gpus"] - assert t.resources.limits.cores == tr_resources["limits"]["cores"] - assert t.resources.limits.memory == tr_resources["limits"]["memory"] - assert t.resources.limits.gpus == tr_resources["limits"]["gpus"] - - def test_from_response_json_empty(self, mocker, backend_fixtures): - # Arrange - self._mock_serving_variables(mocker, SERVING_NUM_INSTANCES_NO_LIMIT) - json = backend_fixtures["transformer"]["get_deployment_without_transformer"][ - "response" - ] - - # Act - t = transformer.Transformer.from_response_json(json) - - # Assert - assert t is None - - def test_from_response_json_default_resources(self, mocker, backend_fixtures): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=False - ) - json = backend_fixtures["transformer"]["get_transformer_without_resources"][ - "response" - ] - - # Act - t = transformer.Transformer.from_response_json(json) - - # Assert - assert isinstance(t, transformer.Transformer) - assert t.script_file == json["script_file"] - - assert t.resources.num_instances == RESOURCES.MIN_NUM_INSTANCES - assert t.resources.requests.cores == RESOURCES.MIN_CORES - assert t.resources.requests.memory == RESOURCES.MIN_MEMORY - assert t.resources.requests.gpus == RESOURCES.MIN_GPUS - assert t.resources.limits.cores == RESOURCES.MAX_CORES - assert t.resources.limits.memory == RESOURCES.MAX_MEMORY - assert t.resources.limits.gpus == RESOURCES.MAX_GPUS - - # constructor - - def test_constructor_default_resources(self, mocker, backend_fixtures): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=False - ) - json = backend_fixtures["transformer"]["get_transformer_without_resources"][ - "response" - ] - - # Act - t = transformer.Transformer(json["script_file"], resources=None) - - # Assert - assert t.script_file == json["script_file"] - - assert t.resources.num_instances == RESOURCES.MIN_NUM_INSTANCES - assert t.resources.requests.cores == RESOURCES.MIN_CORES - assert t.resources.requests.memory == RESOURCES.MIN_MEMORY - assert t.resources.requests.gpus == RESOURCES.MIN_GPUS - assert t.resources.limits.cores == RESOURCES.MAX_CORES - assert t.resources.limits.memory == RESOURCES.MAX_MEMORY - assert t.resources.limits.gpus == RESOURCES.MAX_GPUS - - def test_constructor_default_resources_when_scale_to_zero_is_required( - self, mocker, backend_fixtures - ): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=True - ) - json = backend_fixtures["transformer"]["get_transformer_without_resources"][ - "response" - ] - - # Act - t = transformer.Transformer(json["script_file"], resources=None) - - # Assert - assert t.script_file == json["script_file"] - - assert t.resources.num_instances == 0 - assert t.resources.requests.cores == RESOURCES.MIN_CORES - assert t.resources.requests.memory == RESOURCES.MIN_MEMORY - assert t.resources.requests.gpus == RESOURCES.MIN_GPUS - assert t.resources.limits.cores == RESOURCES.MAX_CORES - assert t.resources.limits.memory == RESOURCES.MAX_MEMORY - assert t.resources.limits.gpus == RESOURCES.MAX_GPUS - - # validate resources - - def test_validate_resources_none(self): - # Act - res = transformer.Transformer._validate_resources(None) - - # Assert - assert res is None - - def test_validate_resources_num_instances_zero(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=False - ) - tr = resources.TransformerResources(num_instances=0) - - # Act - res = transformer.Transformer._validate_resources(tr) - - # Assert - assert res == tr - - def test_validate_resources_num_instances_one_without_scale_to_zero(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=False - ) - tr = resources.TransformerResources(num_instances=1) - - # Act - res = transformer.Transformer._validate_resources(tr) - - # Assert - assert res == tr - - def test_validate_resources_num_instances_one_with_scale_to_zero(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=True - ) - tr = resources.TransformerResources(num_instances=1) - - # Act - with pytest.raises(ValueError) as e_info: - _ = transformer.Transformer._validate_resources(tr) - - # Assert - assert "Scale-to-zero is required" in str(e_info.value) - - # default num instances - - def test_get_default_num_instances_without_scale_to_zero(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=False - ) - - # Act - num_instances = transformer.Transformer._get_default_num_instances() - - # Assert - assert num_instances == RESOURCES.MIN_NUM_INSTANCES - - def test_get_default_num_instances_with_scale_to_zero(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=True - ) - - # Act - num_instances = transformer.Transformer._get_default_num_instances() - - # Assert - assert num_instances == 0 - - # default resources - - def test_get_default_resources_without_scale_to_zero(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=False - ) - - # Act - res = transformer.Transformer._get_default_resources() - - # Assert - assert isinstance(res, resources.TransformerResources) - assert res.num_instances == RESOURCES.MIN_NUM_INSTANCES - - def test_get_default_resources_with_scale_to_zero(self, mocker): - # Arrange - self._mock_serving_variables( - mocker, SERVING_NUM_INSTANCES_NO_LIMIT, force_scale_to_zero=True - ) - - # Act - res = transformer.Transformer._get_default_resources() - - # Assert - assert isinstance(res, resources.TransformerResources) - assert res.num_instances == 0 - - # extract fields from json - - def test_extract_fields_from_json(self, mocker, backend_fixtures): - # Arrange - self._mock_serving_variables(mocker, SERVING_NUM_INSTANCES_NO_LIMIT) - json = backend_fixtures["transformer"]["get_deployment_with_transformer"][ - "response" - ] - json_copy = copy.deepcopy(json) - - # Act - sf, rc = transformer.Transformer.extract_fields_from_json(json_copy) - - # Assert - assert sf == json["transformer"] - assert isinstance(rc, resources.TransformerResources) - - tr_resources = json["transformer_resources"] - assert rc.num_instances == tr_resources["requested_transformer_instances"] - assert rc.requests.cores == tr_resources["requests"]["cores"] - assert rc.requests.memory == tr_resources["requests"]["memory"] - assert rc.requests.gpus == tr_resources["requests"]["gpus"] - assert rc.limits.cores == tr_resources["limits"]["cores"] - assert rc.limits.memory == tr_resources["limits"]["memory"] - assert rc.limits.gpus == tr_resources["limits"]["gpus"] - - # auxiliary methods - - def _mock_serving_variables(self, mocker, num_instances, force_scale_to_zero=False): - mocker.patch( - "hsml.client.get_serving_resource_limits", - return_value=SERVING_RESOURCE_LIMITS, - ) - mocker.patch( - "hsml.client.get_serving_num_instances_limits", return_value=num_instances - ) - mocker.patch( - "hsml.client.is_scale_to_zero_required", return_value=force_scale_to_zero - ) diff --git a/hsml/python/tests/test_util.py b/hsml/python/tests/test_util.py deleted file mode 100644 index 3e7d18166..000000000 --- a/hsml/python/tests/test_util.py +++ /dev/null @@ -1,645 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os -from urllib.parse import ParseResult - -import pytest -from hsml import util -from hsml.constants import MODEL -from hsml.model import Model as BaseModel -from hsml.predictor import Predictor as BasePredictor -from hsml.python.model import Model as PythonModel -from hsml.python.predictor import Predictor as PyPredictor -from hsml.sklearn.model import Model as SklearnModel -from hsml.sklearn.predictor import Predictor as SkLearnPredictor -from hsml.tensorflow.model import Model as TensorflowModel -from hsml.tensorflow.predictor import Predictor as TFPredictor -from hsml.torch.model import Model as TorchModel -from hsml.torch.predictor import Predictor as TorchPredictor - - -class TestUtil: - # schema and types - - # - set_model_class - - def test_set_model_class_base(self, backend_fixtures): - # Arrange - json = backend_fixtures["model"]["get_base"]["response"]["items"][0] - - # Act - model = util.set_model_class(json) - - # Assert - assert isinstance(model, BaseModel) - assert model.framework is None - - def test_set_model_class_python(self, backend_fixtures): - # Arrange - json = backend_fixtures["model"]["get_python"]["response"]["items"][0] - - # Act - model = util.set_model_class(json) - - # Assert - assert isinstance(model, PythonModel) - assert model.framework == MODEL.FRAMEWORK_PYTHON - - def test_set_model_class_sklearn(self, backend_fixtures): - # Arrange - json = backend_fixtures["model"]["get_sklearn"]["response"]["items"][0] - - # Act - model = util.set_model_class(json) - - # Assert - assert isinstance(model, SklearnModel) - assert model.framework == MODEL.FRAMEWORK_SKLEARN - - def test_set_model_class_tensorflow(self, backend_fixtures): - # Arrange - json = backend_fixtures["model"]["get_tensorflow"]["response"]["items"][0] - - # Act - model = util.set_model_class(json) - - # Assert - assert isinstance(model, TensorflowModel) - assert model.framework == MODEL.FRAMEWORK_TENSORFLOW - - def test_set_model_class_torch(self, backend_fixtures): - # Arrange - json = backend_fixtures["model"]["get_torch"]["response"]["items"][0] - - # Act - model = util.set_model_class(json) - - # Assert - assert isinstance(model, TorchModel) - assert model.framework == MODEL.FRAMEWORK_TORCH - - def test_set_model_class_unsupported(self, backend_fixtures): - # Arrange - json = backend_fixtures["model"]["get_base"]["response"]["items"][0] - json["framework"] = "UNSUPPORTED" - - # Act - with pytest.raises(ValueError) as e_info: - util.set_model_class(json) - - # Assert - assert "is not a supported framework" in str(e_info.value) - - # - input_example_to_json - - def test_input_example_to_json_from_numpy(self, mocker, input_example_numpy): - # Arrange - mock_handle_tensor_input = mocker.patch("hsml.util._handle_tensor_input") - mock_handle_dataframe_input = mocker.patch("hsml.util._handle_dataframe_input") - mock_handle_dict_input = mocker.patch("hsml.util._handle_dict_input") - - # Act - util.input_example_to_json(input_example_numpy) - - # Assert - mock_handle_tensor_input.assert_called_once() - mock_handle_dict_input.assert_not_called() - mock_handle_dataframe_input.assert_not_called() - - def test_input_example_to_json_from_dict(self, mocker, input_example_dict): - # Arrange - mock_handle_tensor_input = mocker.patch("hsml.util._handle_tensor_input") - mock_handle_dataframe_input = mocker.patch("hsml.util._handle_dataframe_input") - mock_handle_dict_input = mocker.patch("hsml.util._handle_dict_input") - - # Act - util.input_example_to_json(input_example_dict) - - # Assert - mock_handle_tensor_input.assert_not_called() - mock_handle_dict_input.assert_called_once() - mock_handle_dataframe_input.assert_not_called() - - def test_input_example_to_json_from_dataframe( - self, mocker, input_example_dataframe_pandas_dataframe - ): - # Arrange - mock_handle_tensor_input = mocker.patch("hsml.util._handle_tensor_input") - mock_handle_dataframe_input = mocker.patch("hsml.util._handle_dataframe_input") - mock_handle_dict_input = mocker.patch("hsml.util._handle_dict_input") - - # Act - util.input_example_to_json(input_example_dataframe_pandas_dataframe) - - # Assert - mock_handle_tensor_input.assert_not_called() - mock_handle_dict_input.assert_not_called() - mock_handle_dataframe_input.assert_called_once() # default - - def test_input_example_to_json_unsupported(self, mocker): - # Arrange - mock_handle_tensor_input = mocker.patch("hsml.util._handle_tensor_input") - mock_handle_dataframe_input = mocker.patch("hsml.util._handle_dataframe_input") - mock_handle_dict_input = mocker.patch("hsml.util._handle_dict_input") - - # Act - util.input_example_to_json(lambda unsupported_type: None) - - # Assert - mock_handle_tensor_input.assert_not_called() - mock_handle_dict_input.assert_not_called() - mock_handle_dataframe_input.assert_called_once() # default - - # - handle input examples - - def test_handle_dataframe_input_pandas_dataframe( - self, - input_example_dataframe_pandas_dataframe, - input_example_dataframe_pandas_dataframe_empty, - input_example_dataframe_list, - ): - # Act - json = util._handle_dataframe_input(input_example_dataframe_pandas_dataframe) - with pytest.raises(ValueError) as e_info: - util._handle_dataframe_input(input_example_dataframe_pandas_dataframe_empty) - - # Assert - assert isinstance(json, list) - assert json == input_example_dataframe_list - assert "can not be empty" in str(e_info.value) - - def test_handle_dataframe_input_pandas_dataframe_series( - self, - input_example_dataframe_pandas_series, - input_example_dataframe_pandas_series_empty, - input_example_dataframe_list, - ): - # Act - json = util._handle_dataframe_input(input_example_dataframe_pandas_series) - with pytest.raises(ValueError) as e_info: - util._handle_dataframe_input(input_example_dataframe_pandas_series_empty) - - # Assert - assert isinstance(json, list) - assert json == input_example_dataframe_list - assert "can not be empty" in str(e_info.value) - - def test_handle_dataframe_input_list(self, input_example_dataframe_list): - # Act - json = util._handle_dataframe_input(input_example_dataframe_list) - - # Assert - assert isinstance(json, list) - assert json == input_example_dataframe_list - - def test_handle_dataframe_input_unsupported(self): - # Act - with pytest.raises(TypeError) as e_info: - util._handle_dataframe_input(lambda unsupported: None) - - # Assert - assert "is not a supported input example type" in str(e_info.value) - - def test_handle_tensor_input( - self, input_example_numpy, input_example_dataframe_list - ): - # Act - json = util._handle_tensor_input(input_example_numpy) - - # Assert - assert isinstance(json, list) - assert json == input_example_dataframe_list - - def test_handle_dict_input(self, input_example_dict): - # Act - json = util._handle_dict_input(input_example_dict) - - # Assert - assert isinstance(json, dict) - assert json == input_example_dict - - # artifacts - - def test_compress_dir(self, mocker): - # Arrange - archive_name = "archive_name" - path_to_archive = os.path.join("this", "is", "the", "path", "to", "archive") - archive_out_path = os.path.join( - "this", "is", "the", "output", "path", "to", "archive" - ) - full_archive_out_path = os.path.join(archive_out_path, archive_name) - mock_isdir = mocker.patch("os.path.isdir", return_value=True) - mock_shutil_make_archive = mocker.patch( - "shutil.make_archive", return_value="resulting_path" - ) - - # Act - path = util.compress(archive_out_path, archive_name, path_to_archive) - - # Assert - assert path == "resulting_path" - mock_isdir.assert_called_once_with(path_to_archive) - mock_shutil_make_archive.assert_called_once_with( - full_archive_out_path, "gztar", path_to_archive - ) - - def test_compress_file(self, mocker): - # Arrange - archive_name = "archive_name" - path_to_archive = os.path.join("path", "to", "archive") - archive_out_path = os.path.join("output", "path", "to", "archive") - full_archive_out_path = os.path.join(archive_out_path, archive_name) - archive_path_dirname = os.path.join("path", "to") - archive_path_basename = "archive" - mock_isdir = mocker.patch("os.path.isdir", return_value=False) - mock_shutil_make_archive = mocker.patch( - "shutil.make_archive", return_value="resulting_path" - ) - - # Act - path = util.compress(archive_out_path, archive_name, path_to_archive) - - # Assert - assert path == "resulting_path" - mock_isdir.assert_called_once_with(path_to_archive) - mock_shutil_make_archive.assert_called_once_with( - full_archive_out_path, "gztar", archive_path_dirname, archive_path_basename - ) - - def test_decompress(self, mocker): - # Arrange - archive_file_path = os.path.join("path", "to", "archive", "file") - extract_dir = False - mock_shutil_unpack_archive = mocker.patch( - "shutil.unpack_archive", return_value="resulting_path" - ) - - # Act - path = util.decompress(archive_file_path, extract_dir) - - # Assert - assert path == "resulting_path" - mock_shutil_unpack_archive.assert_called_once_with( - archive_file_path, extract_dir=extract_dir - ) - - # export models - - def test_validate_metrics(self, model_metrics): - # Act - util.validate_metrics(model_metrics) - - # Assert - # noop - - def test_validate_metrics_unsupported_type(self, model_metrics_wrong_type): - # Act - with pytest.raises(TypeError) as e_info: - util.validate_metrics(model_metrics_wrong_type) - - # Assert - assert "expected a dict" in str(e_info.value) - - def test_validate_metrics_unsupported_metric_type( - self, model_metrics_wrong_metric_type - ): - # Act - with pytest.raises(TypeError) as e_info: - util.validate_metrics(model_metrics_wrong_metric_type) - - # Assert - assert "expected a string" in str(e_info.value) - - def test_validate_metrics_unsupported_metric_value( - self, model_metrics_wrong_metric_value - ): - # Act - with pytest.raises(ValueError) as e_info: - util.validate_metrics(model_metrics_wrong_metric_value) - - # Assert - assert "is not a number" in str(e_info.value) - - # model serving - - def test_get_predictor_for_model_base(self, mocker, model_base): - # Arrange - def pred_base_spec(model_framework, model_server): - pass - - pred_base = mocker.patch( - "hsml.predictor.Predictor.__init__", return_value=None, spec=pred_base_spec - ) - pred_python = mocker.patch("hsml.python.predictor.Predictor.__init__") - pred_sklearn = mocker.patch("hsml.sklearn.predictor.Predictor.__init__") - pred_tensorflow = mocker.patch("hsml.tensorflow.predictor.Predictor.__init__") - pred_torch = mocker.patch("hsml.torch.predictor.Predictor.__init__") - - # Act - predictor = util.get_predictor_for_model(model_base) - - # Assert - assert isinstance(predictor, BasePredictor) - pred_base.assert_called_once_with( - model_framework=MODEL.FRAMEWORK_PYTHON, model_server=MODEL.FRAMEWORK_PYTHON - ) - pred_python.assert_not_called() - pred_sklearn.assert_not_called() - pred_tensorflow.assert_not_called() - pred_torch.assert_not_called() - - def test_get_predictor_for_model_python(self, mocker, model_python): - # Arrange - pred_base = mocker.patch("hsml.predictor.Predictor.__init__") - pred_python = mocker.patch( - "hsml.python.predictor.Predictor.__init__", return_value=None - ) - pred_sklearn = mocker.patch("hsml.sklearn.predictor.Predictor.__init__") - pred_tensorflow = mocker.patch("hsml.tensorflow.predictor.Predictor.__init__") - pred_torch = mocker.patch("hsml.torch.predictor.Predictor.__init__") - - # Act - predictor = util.get_predictor_for_model(model_python) - - # Assert - assert isinstance(predictor, PyPredictor) - pred_base.assert_not_called() - pred_python.assert_called_once() - pred_sklearn.assert_not_called() - pred_tensorflow.assert_not_called() - pred_torch.assert_not_called() - - def test_get_predictor_for_model_sklearn(self, mocker, model_sklearn): - # Arrange - pred_base = mocker.patch("hsml.predictor.Predictor.__init__") - pred_python = mocker.patch("hsml.python.predictor.Predictor.__init__") - pred_sklearn = mocker.patch( - "hsml.sklearn.predictor.Predictor.__init__", return_value=None - ) - pred_tensorflow = mocker.patch("hsml.tensorflow.predictor.Predictor.__init__") - pred_torch = mocker.patch("hsml.torch.predictor.Predictor.__init__") - - # Act - predictor = util.get_predictor_for_model(model_sklearn) - - # Assert - assert isinstance(predictor, SkLearnPredictor) - pred_base.assert_not_called() - pred_python.assert_not_called() - pred_sklearn.assert_called_once() - pred_tensorflow.assert_not_called() - pred_torch.assert_not_called() - - def test_get_predictor_for_model_tensorflow(self, mocker, model_tensorflow): - # Arrange - pred_base = mocker.patch("hsml.predictor.Predictor.__init__") - pred_python = mocker.patch("hsml.python.predictor.Predictor.__init__") - pred_sklearn = mocker.patch("hsml.sklearn.predictor.Predictor.__init__") - pred_tensorflow = mocker.patch( - "hsml.tensorflow.predictor.Predictor.__init__", return_value=None - ) - pred_torch = mocker.patch("hsml.torch.predictor.Predictor.__init__") - - # Act - predictor = util.get_predictor_for_model(model_tensorflow) - - # Assert - assert isinstance(predictor, TFPredictor) - pred_base.assert_not_called() - pred_python.assert_not_called() - pred_sklearn.assert_not_called() - pred_tensorflow.assert_called_once() - pred_torch.assert_not_called() - - def test_get_predictor_for_model_torch(self, mocker, model_torch): - # Arrange - pred_base = mocker.patch("hsml.predictor.Predictor.__init__") - pred_python = mocker.patch("hsml.python.predictor.Predictor.__init__") - pred_sklearn = mocker.patch("hsml.sklearn.predictor.Predictor.__init__") - pred_tensorflow = mocker.patch("hsml.tensorflow.predictor.Predictor.__init__") - pred_torch = mocker.patch( - "hsml.torch.predictor.Predictor.__init__", return_value=None - ) - - # Act - predictor = util.get_predictor_for_model(model_torch) - - # Assert - assert isinstance(predictor, TorchPredictor) - pred_base.assert_not_called() - pred_python.assert_not_called() - pred_sklearn.assert_not_called() - pred_tensorflow.assert_not_called() - pred_torch.assert_called_once() - - def test_get_predictor_for_model_non_base(self, mocker): - # Arrange - pred_base = mocker.patch("hsml.predictor.Predictor.__init__") - pred_python = mocker.patch("hsml.python.predictor.Predictor.__init__") - pred_sklearn = mocker.patch("hsml.sklearn.predictor.Predictor.__init__") - pred_tensorflow = mocker.patch("hsml.tensorflow.predictor.Predictor.__init__") - pred_torch = mocker.patch("hsml.torch.predictor.Predictor.__init__") - - class NonBaseModel: - pass - - # Act - with pytest.raises(ValueError) as e_info: - util.get_predictor_for_model(NonBaseModel()) - - assert "an instance of {} class is expected".format(BaseModel) in str( - e_info.value - ) - pred_base.assert_not_called() - pred_python.assert_not_called() - pred_sklearn.assert_not_called() - pred_tensorflow.assert_not_called() - pred_torch.assert_not_called() - - def test_get_hostname_replaced_url(self, mocker): - # Arrange - sub_path = "this/is/a/sub_path" - base_url = "/hopsworks/api/base/" - urlparse_href_arg = ParseResult( - scheme="", - netloc="", - path=base_url + sub_path, - params="", - query="", - fragment="", - ) - geturl_return = "final_url" - mock_url_parsed = mocker.MagicMock() - mock_url_parsed.geturl = mocker.MagicMock(return_value=geturl_return) - mock_client = mocker.MagicMock() - mock_client._base_url = base_url + "url" - mock_client._replace_public_host = mocker.MagicMock( - return_value=mock_url_parsed - ) - mocker.patch("hsml.client.get_instance", return_value=mock_client) - - # Act - url = util.get_hostname_replaced_url(sub_path) - - # Assert - mock_client._replace_public_host.assert_called_once_with(urlparse_href_arg) - mock_url_parsed.geturl.assert_called_once() - assert url == geturl_return - - # general - - def test_get_members(self): - # Arrange - class TEST: - TEST_1 = 1 - TEST_2 = "two" - TEST_3 = "3" - - # Act - members = list(util.get_members(TEST)) - - # Assert - assert members == [1, "two", "3"] - - def test_get_members_with_prefix(self): - # Arrange - class TEST: - TEST_1 = 1 - TEST_2 = "two" - RES_3 = "3" - NONE = None - - # Act - members = list(util.get_members(TEST, prefix="TEST")) - - # Assert - assert members == [1, "two"] - - # json - - def test_extract_field_from_json(self, mocker): - # Arrange - json = {"a": "1", "b": "2"} - get_obj_from_json = mocker.patch("hsml.util.get_obj_from_json") - - # Act - b = util.extract_field_from_json(json, "b") - - # Assert - assert b == "2" - assert get_obj_from_json.call_count == 0 - - def test_extract_field_from_json_fields(self, mocker): - # Arrange - json = {"a": "1", "b": "2"} - get_obj_from_json = mocker.patch("hsml.util.get_obj_from_json") - - # Act - b = util.extract_field_from_json(json, ["B", "b"]) # alternative fields - - # Assert - assert b == "2" - assert get_obj_from_json.call_count == 0 - - def test_extract_field_from_json_as_instance_of_str(self, mocker): - # Arrange - json = {"a": "1", "b": "2"} - get_obj_from_json = mocker.patch( - "hsml.util.get_obj_from_json", return_value="2" - ) - - # Act - b = util.extract_field_from_json(json, "b", as_instance_of=str) - - # Assert - assert b == "2" - get_obj_from_json.assert_called_once_with(obj="2", cls=str) - - def test_extract_field_from_json_as_instance_of_list_str(self, mocker): - # Arrange - json = {"a": "1", "b": ["2", "2", "2"]} - get_obj_from_json = mocker.patch( - "hsml.util.get_obj_from_json", return_value="2" - ) - - # Act - b = util.extract_field_from_json(json, "b", as_instance_of=str) - - # Assert - assert b == ["2", "2", "2"] - assert get_obj_from_json.call_count == 3 - assert get_obj_from_json.call_args[1]["obj"] == "2" - assert get_obj_from_json.call_args[1]["cls"] == str - - def test_get_obj_from_json_cls(self, mocker): - # Arrange - class Test: - def __init__(self): - self.a = "1" - - # Act - obj = util.get_obj_from_json(Test(), Test) - - # Assert - assert isinstance(obj, Test) - assert obj.a == "1" - - def test_get_obj_from_json_dict(self, mocker): - # Arrange - class Test: - def __init__(self, a): - self.a = a - - @classmethod - def from_json(cls, json): - return cls(**json) - - # Act - obj = util.get_obj_from_json({"a": "1"}, Test) - - # Assert - assert isinstance(obj, Test) - assert obj.a == "1" - - def test_get_obj_from_json_dict_default(self, mocker): - # Arrange - class Test: - def __init__(self, a="11"): - self.a = "11" - - @classmethod - def from_json(cls, json): - return cls(**json) - - # Act - obj = util.get_obj_from_json({}, Test) - - # Assert - assert isinstance(obj, Test) - assert obj.a == "11" - - def test_get_obj_from_json_unsupported(self, mocker): - # Arrange - class Test: - pass - - # Act - with pytest.raises(ValueError) as e_info: - util.get_obj_from_json("UNSUPPORTED", Test) - - # Assert - assert "cannot be converted to class" in str(e_info.value) diff --git a/hsml/python/tests/utils/__init__.py b/hsml/python/tests/utils/__init__.py deleted file mode 100644 index ff8055b9b..000000000 --- a/hsml/python/tests/utils/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/hsml/python/tests/utils/schema/test_column.py b/hsml/python/tests/utils/schema/test_column.py deleted file mode 100644 index 0a41ef205..000000000 --- a/hsml/python/tests/utils/schema/test_column.py +++ /dev/null @@ -1,44 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from hsml.utils.schema import column - - -class TestColumn: - def test_constructor_default(self): - # Arrange - _type = 1234 - - # Act - t = column.Column(_type) - - # Assert - assert t.type == str(_type) - assert not hasattr(t, "name") - assert not hasattr(t, "description") - - def test_constructor(self): - # Arrange - _type = 1234 - name = 1111111 - description = 2222222 - - # Act - t = column.Column(_type, name, description) - - # Assert - assert t.type == str(_type) - assert t.name == str(name) - assert t.description == str(description) diff --git a/hsml/python/tests/utils/schema/test_columnar_schema.py b/hsml/python/tests/utils/schema/test_columnar_schema.py deleted file mode 100644 index c01c3c33d..000000000 --- a/hsml/python/tests/utils/schema/test_columnar_schema.py +++ /dev/null @@ -1,461 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pandas as pd -import pytest -from hsml.utils.schema import column, columnar_schema -from mock import call - - -class TestColumnarSchema: - # constructor - - def test_constructor_default(self, mocker): - # Arrange - mock_convert_list_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_list_to_schema", - return_value="convert_list_to_schema", - ) - mock_convert_pandas_df_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_pandas_df_to_schema", - return_value="convert_pandas_df_to_schema", - ) - mock_convert_pandas_series_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_pandas_series_to_schema", - return_value="convert_pandas_series_to_schema", - ) - mock_convert_spark_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_spark_to_schema", - return_value="convert_spark_to_schema", - ) - mock_convert_td_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_td_to_schema", - return_value="convert_td_to_schema", - ) - mock_find_spec = mocker.patch("importlib.util.find_spec", return_value=None) - - # Act - with pytest.raises(TypeError) as e_info: - _ = columnar_schema.ColumnarSchema() - - # Assert - assert "is not supported in a columnar schema" in str(e_info.value) - mock_convert_list_to_schema.assert_not_called() - mock_convert_pandas_df_to_schema.assert_not_called() - mock_convert_pandas_series_to_schema.assert_not_called() - mock_convert_spark_to_schema.assert_not_called() - mock_convert_td_to_schema.assert_not_called() - assert mock_find_spec.call_count == 2 - - def test_constructor_list(self, mocker): - # Arrange - columnar_obj = [1, 2, 3, 4] - mock_convert_list_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_list_to_schema", - return_value="convert_list_to_schema", - ) - mock_convert_pandas_df_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_pandas_df_to_schema", - return_value="convert_pandas_df_to_schema", - ) - mock_convert_pandas_series_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_pandas_series_to_schema", - return_value="convert_pandas_series_to_schema", - ) - mock_convert_spark_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_spark_to_schema", - return_value="convert_spark_to_schema", - ) - mock_convert_td_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_td_to_schema", - return_value="convert_td_to_schema", - ) - mock_find_spec = mocker.patch("importlib.util.find_spec", return_value=None) - - # Act - cs = columnar_schema.ColumnarSchema(columnar_obj) - - # Assert - assert cs.columns == "convert_list_to_schema" - mock_convert_list_to_schema.assert_called_once_with(columnar_obj) - mock_convert_pandas_df_to_schema.assert_not_called() - mock_convert_pandas_series_to_schema.assert_not_called() - mock_convert_spark_to_schema.assert_not_called() - mock_convert_td_to_schema.assert_not_called() - mock_find_spec.assert_not_called() - - def test_constructor_pd_dataframe(self, mocker): - # Arrange - columnar_obj = pd.DataFrame([1, 2, 3, 4]) - mock_convert_list_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_list_to_schema", - return_value="convert_list_to_schema", - ) - mock_convert_pandas_df_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_pandas_df_to_schema", - return_value="convert_pandas_df_to_schema", - ) - mock_convert_pandas_series_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_pandas_series_to_schema", - return_value="convert_pandas_series_to_schema", - ) - mock_convert_spark_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_spark_to_schema", - return_value="convert_spark_to_schema", - ) - mock_convert_td_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_td_to_schema", - return_value="convert_td_to_schema", - ) - mock_find_spec = mocker.patch("importlib.util.find_spec", return_value=None) - - # Act - cs = columnar_schema.ColumnarSchema(columnar_obj) - - # Assert - assert cs.columns == "convert_pandas_df_to_schema" - mock_convert_list_to_schema.assert_not_called() - mock_convert_pandas_df_to_schema.assert_called_once_with(columnar_obj) - mock_convert_pandas_series_to_schema.assert_not_called() - mock_convert_spark_to_schema.assert_not_called() - mock_convert_td_to_schema.assert_not_called() - mock_find_spec.assert_not_called() - - def test_constructor_pd_series(self, mocker): - # Arrange - columnar_obj = pd.Series([1, 2, 3, 4]) - mock_convert_list_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_list_to_schema", - return_value="convert_list_to_schema", - ) - mock_convert_pandas_df_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_pandas_df_to_schema", - return_value="convert_pandas_df_to_schema", - ) - mock_convert_pandas_series_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_pandas_series_to_schema", - return_value="convert_pandas_series_to_schema", - ) - mock_convert_spark_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_spark_to_schema", - return_value="convert_spark_to_schema", - ) - mock_convert_td_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_td_to_schema", - return_value="convert_td_to_schema", - ) - mock_find_spec = mocker.patch("importlib.util.find_spec", return_value=None) - - # Act - cs = columnar_schema.ColumnarSchema(columnar_obj) - - # Assert - assert cs.columns == "convert_pandas_series_to_schema" - mock_convert_list_to_schema.assert_not_called() - mock_convert_pandas_df_to_schema.assert_not_called() - mock_convert_pandas_series_to_schema.assert_called_once_with(columnar_obj) - mock_convert_spark_to_schema.assert_not_called() - mock_convert_td_to_schema.assert_not_called() - mock_find_spec.assert_not_called() - - def test_constructor_pyspark_dataframe(self, mocker): - try: - import pyspark - except ImportError: - pytest.skip("pyspark not available") - - # Arrange - columnar_obj = mocker.MagicMock(spec=pyspark.sql.dataframe.DataFrame) - mock_convert_list_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_list_to_schema", - return_value="convert_list_to_schema", - ) - mock_convert_pandas_df_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_pandas_df_to_schema", - return_value="convert_pandas_df_to_schema", - ) - mock_convert_pandas_series_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_pandas_series_to_schema", - return_value="convert_pandas_series_to_schema", - ) - mock_convert_spark_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_spark_to_schema", - return_value="convert_spark_to_schema", - ) - mock_convert_td_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_td_to_schema", - return_value="convert_td_to_schema", - ) - mock_find_spec = mocker.patch( - "importlib.util.find_spec", return_value="Not None" - ) - - # Act - cs = columnar_schema.ColumnarSchema(columnar_obj) - - # Assert - assert cs.columns == "convert_spark_to_schema" - mock_convert_list_to_schema.assert_not_called() - mock_convert_pandas_df_to_schema.assert_not_called() - mock_convert_pandas_series_to_schema.assert_not_called() - mock_convert_spark_to_schema.assert_called_once_with(columnar_obj) - mock_convert_td_to_schema.assert_not_called() - mock_find_spec.assert_called_once_with("pyspark") - - def test_constructor_hsfs_td(self, mocker): - # Arrange - try: - import hsfs - except ImportError: - pytest.skip("hsfs not available") - - # Arrange - columnar_obj = mocker.MagicMock(spec=hsfs.training_dataset.TrainingDataset) - mock_convert_list_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_list_to_schema", - return_value="convert_list_to_schema", - ) - mock_convert_pandas_df_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_pandas_df_to_schema", - return_value="convert_pandas_df_to_schema", - ) - mock_convert_pandas_series_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_pandas_series_to_schema", - return_value="convert_pandas_series_to_schema", - ) - mock_convert_spark_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_spark_to_schema", - return_value="convert_spark_to_schema", - ) - mock_convert_td_to_schema = mocker.patch( - "hsml.utils.schema.columnar_schema.ColumnarSchema._convert_td_to_schema", - return_value="convert_td_to_schema", - ) - mock_find_spec = mocker.patch( - "importlib.util.find_spec", return_value="Not None" - ) - - # Act - cs = columnar_schema.ColumnarSchema(columnar_obj) - - # Assert - assert cs.columns == "convert_td_to_schema" - mock_convert_list_to_schema.assert_not_called() - mock_convert_pandas_df_to_schema.assert_not_called() - mock_convert_pandas_series_to_schema.assert_not_called() - mock_convert_spark_to_schema.assert_not_called() - mock_convert_td_to_schema.assert_called_once_with(columnar_obj) - assert mock_find_spec.call_count == 2 - - # convert list to schema - - def test_convert_list_to_schema(self, mocker): - # Arrange - columnar_obj = [1, 2, 3, 4] - mock_columnar_schema = mocker.MagicMock() - mock_columnar_schema._convert_list_to_schema = ( - columnar_schema.ColumnarSchema._convert_list_to_schema - ) - mock_columnar_schema._build_column.side_effect = columnar_obj - - # Act - c = mock_columnar_schema._convert_list_to_schema( - mock_columnar_schema, columnar_obj - ) - - # Assert - expected_calls = [call(cv) for cv in columnar_obj] - mock_columnar_schema._build_column.assert_has_calls(expected_calls) - assert mock_columnar_schema._build_column.call_count == len(columnar_obj) - assert c == columnar_obj - - # convert pandas df to schema - - def test_convert_pd_dataframe_to_schema(self, mocker): - # Arrange - columnar_obj = pd.DataFrame([[1, 2], [3, 4], [1, 2], [3, 4]]) - mock_column_init = mocker.patch( - "hsml.utils.schema.column.Column.__init__", return_value=None - ) - mock_columnar_schema = mocker.MagicMock() - mock_columnar_schema._convert_pandas_df_to_schema = ( - columnar_schema.ColumnarSchema._convert_pandas_df_to_schema - ) - - # Act - c = mock_columnar_schema._convert_pandas_df_to_schema( - mock_columnar_schema, columnar_obj - ) - - # Assert - cols = columnar_obj.columns - dtypes = columnar_obj.dtypes - expected_calls = [call(dtypes[col], name=col) for col in cols] - mock_column_init.assert_has_calls(expected_calls) - assert mock_column_init.call_count == 2 - assert len(c) == 2 - - # convert pandas series to schema - - def test_convert_pd_series_to_schema(self, mocker): - # Arrange - columnar_obj = pd.Series([1, 2, 3, 4]) - mock_column_init = mocker.patch( - "hsml.utils.schema.column.Column.__init__", return_value=None - ) - mock_columnar_schema = mocker.MagicMock() - mock_columnar_schema._convert_pandas_series_to_schema = ( - columnar_schema.ColumnarSchema._convert_pandas_series_to_schema - ) - - # Act - c = mock_columnar_schema._convert_pandas_series_to_schema( - mock_columnar_schema, columnar_obj - ) - - # Assert - expected_call = call(columnar_obj.dtype, name=columnar_obj.name) - mock_column_init.assert_has_calls([expected_call]) - assert mock_column_init.call_count == 1 - assert len(c) == 1 - - # convert spark to schema - - def test_convert_spark_to_schema(self, mocker): - # Arrange - try: - import pyspark - except ImportError: - pytest.skip("pyspark not available") - - # Arrange - columnar_obj = mocker.MagicMock(spec=pyspark.sql.dataframe.DataFrame) - columnar_obj.dtypes = [("name_1", "type_1"), ("name_2", "type_2")] - mock_column_init = mocker.patch( - "hsml.utils.schema.column.Column.__init__", return_value=None - ) - mock_columnar_schema = mocker.MagicMock() - mock_columnar_schema._convert_spark_to_schema = ( - columnar_schema.ColumnarSchema._convert_spark_to_schema - ) - - # Act - c = mock_columnar_schema._convert_spark_to_schema( - mock_columnar_schema, columnar_obj - ) - - # Assert - expected_calls = [call(dtype, name=name) for name, dtype in columnar_obj.dtypes] - mock_column_init.assert_has_calls(expected_calls) - assert mock_column_init.call_count == len(columnar_obj.dtypes) - assert len(c) == len(columnar_obj.dtypes) - - # convert td to schema - - def test_convert_td_to_schema(self, mocker): - # Arrange - class MockFeature: - def __init__(self, fname, ftype): - self.name = fname - self.type = ftype - - columnar_obj = mocker.MagicMock() - columnar_obj.schema = [ - MockFeature("name_1", "type_1"), - MockFeature("name_2", "type_2"), - ] - mock_column_init = mocker.patch( - "hsml.utils.schema.column.Column.__init__", return_value=None - ) - mock_columnar_schema = mocker.MagicMock() - mock_columnar_schema._convert_td_to_schema = ( - columnar_schema.ColumnarSchema._convert_td_to_schema - ) - - # Act - c = mock_columnar_schema._convert_td_to_schema( - mock_columnar_schema, columnar_obj - ) - - # Assert - expected_calls = [ - call(feat.type, name=feat.name) for feat in columnar_obj.schema - ] - mock_column_init.assert_has_calls(expected_calls) - assert mock_column_init.call_count == len(columnar_obj.schema) - assert len(c) == len(columnar_obj.schema) - - # build column - - def test_build_column_type_only(self, mocker): - # Arrange - columnar_obj = {"type": "tensor_type"} - mock_column_init = mocker.patch( - "hsml.utils.schema.column.Column.__init__", return_value=None - ) - mock_columnar_schema = mocker.MagicMock() - mock_columnar_schema._build_column = ( - columnar_schema.ColumnarSchema._build_column - ) - - # Act - c = mock_columnar_schema._build_column(mock_columnar_schema, columnar_obj) - - # Assert - assert isinstance(c, column.Column) - mock_column_init.assert_called_once_with( - columnar_obj["type"], name=None, description=None - ) - - def test_build_tensor_invalid_missing_type(self, mocker): - # Arrange - columnar_obj = {} - mock_columnar_schema = mocker.MagicMock() - mock_columnar_schema._build_column = ( - columnar_schema.ColumnarSchema._build_column - ) - - # Act - with pytest.raises(ValueError) as e_info: - _ = mock_columnar_schema._build_column(mock_columnar_schema, columnar_obj) - - # Assert - assert "Mandatory 'type' key missing from entry" in str(e_info.value) - - def test_build_tensor_type_name_and_description(self, mocker): - # Arrange - columnar_obj = { - "type": "tensor_type", - "name": "tensor_name", - "description": "tensor_description", - } - mock_column_init = mocker.patch( - "hsml.utils.schema.column.Column.__init__", return_value=None - ) - mock_columnar_schema = mocker.MagicMock() - mock_columnar_schema._build_column = ( - columnar_schema.ColumnarSchema._build_column - ) - - # Act - c = mock_columnar_schema._build_column(mock_columnar_schema, columnar_obj) - - # Assert - assert isinstance(c, column.Column) - mock_column_init.assert_called_once_with( - columnar_obj["type"], - name=columnar_obj["name"], - description=columnar_obj["description"], - ) diff --git a/hsml/python/tests/utils/schema/test_tensor.py b/hsml/python/tests/utils/schema/test_tensor.py deleted file mode 100644 index 22c2ab360..000000000 --- a/hsml/python/tests/utils/schema/test_tensor.py +++ /dev/null @@ -1,48 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from hsml.utils.schema import tensor - - -class TestTensor: - def test_constructor_default(self): - # Arrange - _type = 1234 - shape = 4321 - - # Act - t = tensor.Tensor(_type, shape) - - # Assert - assert t.type == str(_type) - assert t.shape == str(shape) - assert not hasattr(t, "name") - assert not hasattr(t, "description") - - def test_constructor(self): - # Arrange - _type = 1234 - shape = 4321 - name = 1111111 - description = 2222222 - - # Act - t = tensor.Tensor(_type, shape, name, description) - - # Assert - assert t.type == str(_type) - assert t.shape == str(shape) - assert t.name == str(name) - assert t.description == str(description) diff --git a/hsml/python/tests/utils/schema/test_tensor_schema.py b/hsml/python/tests/utils/schema/test_tensor_schema.py deleted file mode 100644 index 18afb3fdc..000000000 --- a/hsml/python/tests/utils/schema/test_tensor_schema.py +++ /dev/null @@ -1,204 +0,0 @@ -# -# Copyright 2024 Hopsworks AB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import pytest -from hsml.utils.schema import tensor, tensor_schema - - -class TestTensorSchema: - # constructor - - def test_constructor_default(self): - # Act - with pytest.raises(TypeError) as e_info: - _ = tensor_schema.TensorSchema() - - # Assert - assert "is not supported in a tensor schema" in str(e_info.value) - - def test_constructor_invalid(self): - # Act - with pytest.raises(TypeError) as e_info: - _ = tensor_schema.TensorSchema("invalid") - - # Assert - assert "is not supported in a tensor schema" in str(e_info.value) - - def test_constructor_list(self, mocker): - # Arrange - tensor_obj = [1234, 4321, 1111111, 2222222] - mock_convert_list_to_schema = mocker.patch( - "hsml.utils.schema.tensor_schema.TensorSchema._convert_list_to_schema", - return_value="list_to_schema", - ) - mock_convert_tensor_to_schema = mocker.patch( - "hsml.utils.schema.tensor_schema.TensorSchema._convert_tensor_to_schema", - return_value="tensor_to_schema", - ) - - # Act - ts = tensor_schema.TensorSchema(tensor_obj) - - # Assert - assert ts.tensors == "list_to_schema" - mock_convert_list_to_schema.assert_called_once_with(tensor_obj) - mock_convert_tensor_to_schema.assert_not_called() - - def test_constructor_ndarray(self, mocker): - # Arrange - tensor_obj = np.array([1234, 4321, 1111111, 2222222]) - mock_convert_list_to_schema = mocker.patch( - "hsml.utils.schema.tensor_schema.TensorSchema._convert_list_to_schema", - return_value="list_to_schema", - ) - mock_convert_tensor_to_schema = mocker.patch( - "hsml.utils.schema.tensor_schema.TensorSchema._convert_tensor_to_schema", - return_value="tensor_to_schema", - ) - - # Act - ts = tensor_schema.TensorSchema(tensor_obj) - - # Assert - assert ts.tensors == "tensor_to_schema" - mock_convert_tensor_to_schema.assert_called_once_with(tensor_obj) - mock_convert_list_to_schema.assert_not_called() - - # convert tensor to schema - - def test_convert_tensor_to_schema(self, mocker): - # Arrange - tensor_obj = mocker.MagicMock() - mock_tensor_schema = mocker.MagicMock() - mock_tensor_schema._convert_tensor_to_schema = ( - tensor_schema.TensorSchema._convert_tensor_to_schema - ) - mock_tensor_init = mocker.patch( - "hsml.utils.schema.tensor.Tensor.__init__", return_value=None - ) - - # Act - t = mock_tensor_schema._convert_tensor_to_schema(mock_tensor_schema, tensor_obj) - - # Assert - assert isinstance(t, tensor.Tensor) - mock_tensor_init.assert_called_once_with(tensor_obj.dtype, tensor_obj.shape) - - # convert list to schema - - def test_convert_list_to_schema_singleton(self, mocker): - # Arrange - tensor_obj = [1234] - mock_tensor_schema = mocker.MagicMock() - mock_tensor_schema._convert_list_to_schema = ( - tensor_schema.TensorSchema._convert_list_to_schema - ) - - # Act - t = mock_tensor_schema._convert_list_to_schema(mock_tensor_schema, tensor_obj) - - # Assert - assert isinstance(t, list) - assert len(t) == len(tensor_obj) - mock_tensor_schema._build_tensor.assert_called_once_with(1234) - - def test_convert_list_to_schema_list(self, mocker): - # Arrange - tensor_obj = np.array([1234, 4321, 1111111, 2222222]) - mock_tensor_schema = mocker.MagicMock() - mock_tensor_schema._convert_list_to_schema = ( - tensor_schema.TensorSchema._convert_list_to_schema - ) - - # Act - t = mock_tensor_schema._convert_list_to_schema(mock_tensor_schema, tensor_obj) - - # Assert - assert isinstance(t, list) - assert len(t) == len(tensor_obj) - assert mock_tensor_schema._build_tensor.call_count == len(tensor_obj) - - # build tensor - - def test_build_tensor_type_and_shape_only(self, mocker): - # Arrange - tensor_obj = {"type": "tensor_type", "shape": "tensor_shape"} - mock_tensor_init = mocker.patch( - "hsml.utils.schema.tensor.Tensor.__init__", return_value=None - ) - mock_tensor_schema = mocker.MagicMock() - mock_tensor_schema._build_tensor = tensor_schema.TensorSchema._build_tensor - - # Act - t = mock_tensor_schema._build_tensor(mock_tensor_schema, tensor_obj) - - # Assert - assert isinstance(t, tensor.Tensor) - mock_tensor_init.assert_called_once_with( - tensor_obj["type"], tensor_obj["shape"], name=None, description=None - ) - - def test_build_tensor_invalid_missing_type(self, mocker): - # Arrange - tensor_obj = {"shape": "tensor_shape"} - mock_tensor_schema = mocker.MagicMock() - mock_tensor_schema._build_tensor = tensor_schema.TensorSchema._build_tensor - - # Act - with pytest.raises(ValueError) as e_info: - _ = mock_tensor_schema._build_tensor(mock_tensor_schema, tensor_obj) - - # Assert - assert "Mandatory 'type' key missing from entry" in str(e_info.value) - - def test_build_tensor_invalid_missing_shape(self, mocker): - # Arrange - tensor_obj = {"type": "tensor_type"} - mock_tensor_schema = mocker.MagicMock() - mock_tensor_schema._build_tensor = tensor_schema.TensorSchema._build_tensor - - # Act - with pytest.raises(ValueError) as e_info: - _ = mock_tensor_schema._build_tensor(mock_tensor_schema, tensor_obj) - - # Assert - assert "Mandatory 'shape' key missing from entry" in str(e_info.value) - - def test_build_tensor_type_shape_name_and_description(self, mocker): - # Arrange - tensor_obj = { - "type": "tensor_type", - "shape": "tensor_shape", - "name": "tensor_name", - "description": "tensor_description", - } - mock_tensor_init = mocker.patch( - "hsml.utils.schema.tensor.Tensor.__init__", return_value=None - ) - mock_tensor_schema = mocker.MagicMock() - mock_tensor_schema._build_tensor = tensor_schema.TensorSchema._build_tensor - - # Act - t = mock_tensor_schema._build_tensor(mock_tensor_schema, tensor_obj) - - # Assert - assert isinstance(t, tensor.Tensor) - mock_tensor_init.assert_called_once_with( - tensor_obj["type"], - tensor_obj["shape"], - name=tensor_obj["name"], - description=tensor_obj["description"], - ) diff --git a/hsml/requirements-docs.txt b/hsml/requirements-docs.txt deleted file mode 100644 index d1499a262..000000000 --- a/hsml/requirements-docs.txt +++ /dev/null @@ -1,11 +0,0 @@ -mkdocs==1.5.3 -mkdocs-material==9.5.17 -mike==2.0.0 -sphinx==7.2.6 -keras_autodoc @ git+https://git@github.com/logicalclocks/keras-autodoc -markdown-include==0.8.1 -mkdocs-jupyter==0.24.3 -markdown==3.6 -pymdown-extensions==10.7.1 -mkdocs-macros-plugin==1.0.4 -mkdocs-minify-plugin>=0.2.0