Skip to content

Commit

Permalink
feat(database): added function to fetch table sizes and index statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
tanmoysrt committed Dec 12, 2024
1 parent 41913f3 commit 560076b
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 83 deletions.
119 changes: 112 additions & 7 deletions agent/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,20 +153,125 @@ def modify_user_permissions(self, username: str, mode: str, permissions: dict |

self._run_sql(queries_str, commit=True, allow_all_stmt_types=True)

def fetch_database_table_sizes(self) -> list[dict]:
"""
SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES
"""
def fetch_database_table_sizes(self) -> dict:
data = self._run_sql(
"SELECT table_name, data_length, index_length FROM INFORMATION_SCHEMA.TABLES", as_dict=True
f"SELECT table_name, data_length, index_length FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA"
f"='{self.database_name}'",
as_dict=True,
)
if len(data) == 0:
return []
for d in data[0]["output"]:
data = data[0]["output"]
tables = {}
for d in data:
tables[d["table_name"]] = {
"data_length": int(d["data_length"]),
"index_length": int(d["index_length"]),
"total_size": int(d["data_length"]) + int(d["index_length"]),
}
d["data_length"] = int(d["data_length"])
d["index_length"] = int(d["index_length"])
d["total_size"] = d["data_length"] + d["index_length"]
return data
return tables

def fetch_database_table_schema(self, include_index_info: bool = True):
index_info = []
index_usage_info = []
data = self._run_sql(
f"""SELECT
TABLE_NAME AS `table`,
COLUMN_NAME AS `column`,
DATA_TYPE AS `data_type`,
IS_NULLABLE AS `is_nullable`,
COLUMN_DEFAULT AS `default`
FROM
INFORMATION_SCHEMA.COLUMNS
WHERE
TABLE_SCHEMA='{self.database_name}';
""",
as_dict=True,
)
if len(data) == 0:
return {}
data = data[0]["output"]
tables = {} # <table_name>: [<column_1_info>, <column_2_info>, ...]

if include_index_info:
index_info = self.fetch_database_table_indexes()
index_usage_info = self.fetch_database_table_index_usage()

for record in data:
if record["table"] not in tables:
tables[record["table"]] = []
indexes = index_info.get(record["table"], {}).get(record["column"], [])
column_index_usage = {}
for index in indexes:
column_index_usage[index] = index_usage_info.get(record["table"], {}).get(index, 0)

tables[record["table"]].append(
{
"column": record["column"],
"data_type": record["data_type"],
"is_nullable": record["is_nullable"] == "YES",
"default": record["default"],
"index_info": {
"is_indexed": len(indexes) > 0,
"indexes": indexes,
"index_usage": column_index_usage,
},
}
)
return tables

def fetch_database_table_indexes(self):
data = self._run_sql(
f"""
SELECT
TABLE_NAME AS `table`,
COLUMN_NAME AS `column`,
INDEX_NAME AS `index`
FROM
INFORMATION_SCHEMA.STATISTICS
WHERE
TABLE_SCHEMA='{self.database_name}'
""",
as_dict=True,
)
if len(data) == 0:
return {}
data = data[0]["output"]
tables = {} # <table_name>: { <column_name> : [<index1>, <index2>, ...] }
for record in data:
if record["table"] not in tables:
tables[record["table"]] = {}
if record["column"] not in tables[record["table"]]:
tables[record["table"]][record["column"]] = []
tables[record["table"]][record["column"]].append(record["index"])
return tables

def fetch_database_table_index_usage(self):
data = self._run_sql(
f"""
SELECT
TABLE_NAME AS `table`,
INDEX_NAME AS `index`,
ROWS_READ AS `rows_read`
FROM
INFORMATION_SCHEMA.INDEX_STATISTICS
WHERE
TABLE_SCHEMA='{self.database_name}'
""",
as_dict=True,
)
if len(data) == 0:
return {}
data = data[0]["output"]
tables = {} # <table_name>: { <index_name> : <rows_read> }
for record in data:
if record["table"] not in tables:
tables[record["table"]] = {}
tables[record["table"]][record["index"]] = int(record["rows_read"])
return tables

# Private helper methods
def _run_sql( # noqa C901
Expand Down
91 changes: 22 additions & 69 deletions agent/site.py
Original file line number Diff line number Diff line change
Expand Up @@ -852,78 +852,31 @@ def get_database_free_tables(self):
return []

@job("Fetch Database Table Schema")
def fetch_database_table_schema(self):
return self._fetch_database_table_schema()
def fetch_database_table_schema(self, include_table_size: bool = True, include_index_info: bool = True):
database = Database(self.host, 3306, self.user, self.password, self.database)
tables = {}
table_schemas = self._fetch_database_table_schema(database, include_index_info=include_index_info)
for table_name in table_schemas:
tables[table_name] = {
"columns": table_schemas[table_name],
}

if include_table_size:
table_sizes = self._fetch_database_table_sizes(database)
for table_name in table_sizes:
if table_name not in tables:
continue
tables[table_name]["size"] = table_sizes[table_name]

@step("Fetch Database Table Schema")
def _fetch_database_table_schema(self):
index_info = self.get_database_table_indexes()
command = f"""SELECT
TABLE_NAME AS `table`,
COLUMN_NAME AS `column`,
DATA_TYPE AS `data_type`,
IS_NULLABLE AS `is_nullable`,
COLUMN_DEFAULT AS `default`
FROM
INFORMATION_SCHEMA.COLUMNS
WHERE
TABLE_SCHEMA='{self.database}';
"""
command = quote(command)
data = self.execute(
f"mysql -sN -h {self.host} -u{self.user} -p{self.password} -e {command} --batch"
).get("output")
data = data.split("\n")
data = [line.split("\t") for line in data]
tables = {} # <table_name>: [<column_1_info>, <column_2_info>, ...]
for row in data:
if len(row) != 5:
continue
table = row[0]
if table not in tables:
tables[table] = []
tables[table].append(
{
"column": row[1],
"data_type": row[2],
"is_nullable": row[3] == "YES",
"default": row[4],
"indexes": index_info.get(table, {}).get(row[1], []),
}
)
return tables

def fetch_database_table_sizes(self, root_password: str):
return Database(self.host, 3306, "root", root_password, self.database).fetch_database_table_sizes()

def get_database_table_indexes(self):
command = f"""
SELECT
TABLE_NAME AS `table`,
COLUMN_NAME AS `column`,
INDEX_NAME AS `index`
FROM
INFORMATION_SCHEMA.STATISTICS
WHERE
TABLE_SCHEMA='{self.database}'
"""
command = quote(command)
data = self.execute(
f"mysql -sN -h {self.host} -u{self.user} -p{self.password} -e {command} --batch"
).get("output")
data = data.split("\n")
data = [line.split("\t") for line in data]
tables = {} # <table_name>: { <column_name> : [<index1>, <index2>, ...] }
for row in data:
if len(row) != 3:
continue
table = row[0]
if table not in tables:
tables[table] = {}
if row[1] not in tables[table]:
tables[table][row[1]] = []
tables[table][row[1]].append(row[2])
return tables
@step("Fetch Database Table Schema")
def _fetch_database_table_schema(self, database: Database, include_index_info: bool = True):
return database.fetch_database_table_schema(include_index_info=include_index_info)

@step("Fetch Database Table Sizes")
def _fetch_database_table_sizes(self, database: Database):
return database.fetch_database_table_sizes()

def run_sql_query(self, query: str, commit: bool = False, as_dict: bool = False):
db = self.db_instance()
Expand Down
20 changes: 13 additions & 7 deletions agent/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from playhouse.shortcuts import model_to_dict

from agent.builder import ImageBuilder, get_image_build_context_directory
from agent.database import Database, JSONEncoderForSQLQueryResult
from agent.database import JSONEncoderForSQLQueryResult
from agent.database_server import DatabaseServer
from agent.exceptions import BenchNotExistsException, SiteNotExistsException
from agent.job import JobModel, connection
Expand Down Expand Up @@ -553,15 +553,21 @@ def backup_site(bench, site):
@application.route("/benches/<string:bench>/sites/<string:site>/database/schema", methods=["POST"])
@validate_bench_and_site
def fetch_database_table_schema(bench, site):
job = Server().benches[bench].sites[site].fetch_database_table_schema()
data = request.json or {}
include_table_size = data.get("include_table_size", False)
include_index_info = data.get("include_index_info", False)
job = (
Server()
.benches[bench]
.sites[site]
.fetch_database_table_schema(
include_table_size=include_table_size,
include_index_info=include_index_info,
)
)
return {"job": job}


@application.route("/benches/<string:bench>/sites/<string:site>/database/size", methods=["POST"])
@validate_bench_and_site
def fetch_database_table_sizes(bench, site):
return Response(json.dumps(Server().benches[bench].sites[site].fetch_database_table_sizes()))

@application.route("/benches/<string:bench>/sites/<string:site>/database/query/execute", methods=["POST"])
@validate_bench_and_site
def run_sql(bench, site):
Expand Down

0 comments on commit 560076b

Please sign in to comment.