From 8bf6eca5a75c4ca44dedd6979770ae4985c0ca10 Mon Sep 17 00:00:00 2001 From: James Greenhill Date: Tue, 30 Jan 2024 22:47:38 -0800 Subject: [PATCH] add model for is_sharded and only backup one shard if the table is not sharded --- frontend/src/pages/Backups/Backups.tsx | 13 ++++- .../src/pages/Backups/ScheduledBackups.tsx | 50 ++++++++++++------- housewatch/clickhouse/backups.py | 15 +++--- housewatch/clickhouse/table.py | 17 ++++++- ...011_scheduledbackup_is_sharded_and_more.py | 23 +++++++++ housewatch/models/backup.py | 1 + 6 files changed, 93 insertions(+), 26 deletions(-) create mode 100644 housewatch/migrations/0011_scheduledbackup_is_sharded_and_more.py diff --git a/frontend/src/pages/Backups/Backups.tsx b/frontend/src/pages/Backups/Backups.tsx index 7ea1aed..36b662b 100644 --- a/frontend/src/pages/Backups/Backups.tsx +++ b/frontend/src/pages/Backups/Backups.tsx @@ -1,7 +1,7 @@ import React, { useEffect, useState } from 'react' import { usePollingEffect } from '../../utils/usePollingEffect' import { ColumnType } from 'antd/es/table' -import { Table, Button, Form, Input, Modal, Tag, Col, Progress, Row, Tooltip, notification } from 'antd' +import { Table, Button, Form, Input, Checkbox, Modal, Tag, Col, Progress, Row, Tooltip, notification } from 'antd' import useSWR, { mutate } from 'swr' interface BackupRow { @@ -29,6 +29,7 @@ type FieldType = { table?: string bucket?: string path?: string + is_sharded?: boolean aws_access_key_id?: string aws_secret_access_key?: string } @@ -165,6 +166,16 @@ export default function Backups() { + + label="Is Sharded" + name="is_sharded" + initialValue="false" + valuePropName="checked" + rules={[{ required: true, message: 'Is this table sharded?' }]} + > + is sharded + + label="S3 Bucket" name="bucket" diff --git a/frontend/src/pages/Backups/ScheduledBackups.tsx b/frontend/src/pages/Backups/ScheduledBackups.tsx index 275ce3e..d09e9e5 100644 --- a/frontend/src/pages/Backups/ScheduledBackups.tsx +++ b/frontend/src/pages/Backups/ScheduledBackups.tsx @@ -1,9 +1,23 @@ import React, { useEffect, useState } from 'react' import { usePollingEffect } from '../../utils/usePollingEffect' import { ColumnType } from 'antd/es/table' -import { Switch, Select, Table, Button, Form, Input, Modal, Tag, Col, Progress, Row, Tooltip, notification } from 'antd' -import DeleteOutlined from '@ant-design/icons' -import EditOutlined from '@ant-design/icons' +import { + Switch, + Select, + Table, + Button, + Form, + Input, + Checkbox, + Modal, + Tag, + Col, + Progress, + Row, + Tooltip, + notification, +} from 'antd' +import { DeleteOutlined, EditOutlined } from '@ant-design/icons' import { Clusters } from '../Clusters/Clusters' import useSWR, { mutate } from 'swr' @@ -32,6 +46,7 @@ type FieldType = { incremental_schedule?: string database?: string table?: string + is_sharded?: boolean bucket?: string path?: string aws_access_key_id?: string @@ -153,12 +168,12 @@ export default function ScheduledBackups() { { title: 'Last Run Time', dataIndex: 'last_run_time' }, { title: 'Database', dataIndex: 'database' }, { title: 'Table', dataIndex: 'table' }, + { title: 'Is Sharded', dataIndex: 'is_sharded', render: (_, sched) => (sched.is_sharded ? 'Yes' : 'No') }, { title: 'S3 Location', dataIndex: 'bucket', render: (_, sched) => 's3://' + sched.bucket + '/' + sched.path }, - { title: 'Created At', dataIndex: 'created_at' }, { - title: '', + title: 'Actions', dataIndex: 'id', - render: id => { + render: (id: string, rowData: ScheduleRow) => { const deleteBackup = async () => { try { const res = await fetch(`/api/scheduled_backups/${id}`, { @@ -173,21 +188,10 @@ export default function ScheduledBackups() { } } - return ( - - - - ) - }, - }, - { - title: 'Actions', - dataIndex: 'id', - render: (id: string, rowData: ScheduleRow) => { return ( <> handleEdit(rowData)} /> - {/* handleDelete(id)} /> */} + deleteBackup()} style={{ marginLeft: '15px' }} /> ) }, @@ -285,6 +289,16 @@ export default function ScheduledBackups() { + + label="Is Sharded" + name="is_sharded" + initialValue="false" + valuePropName="checked" + rules={[{ required: true, message: 'Is this table sharded?' }]} + > + is sharded + + label="S3 Bucket" name="bucket" diff --git a/housewatch/clickhouse/backups.py b/housewatch/clickhouse/backups.py index a353639..063fdc3 100644 --- a/housewatch/clickhouse/backups.py +++ b/housewatch/clickhouse/backups.py @@ -4,8 +4,8 @@ from typing import Dict, Optional from uuid import uuid4 from housewatch.clickhouse.client import run_query -from housewatch.clickhouse.table import is_replicated_table from housewatch.models.backup import ScheduledBackup, ScheduledBackupRun +from housewatch.clickhouse.table import table_engine_full from housewatch.clickhouse.clusters import get_node_per_shard from django.conf import settings @@ -26,7 +26,7 @@ def execute_backup( aws_key: Optional[str] = None, aws_secret: Optional[str] = None, base_backup: Optional[str] = None, - is_replicated: bool = False, + is_sharded: bool = False, ): """ This function will execute a backup on each shard in a cluster @@ -59,8 +59,8 @@ def execute_backup( item[key[0]] = res[index] response.append(item) responses.append((shard, response)) - if is_replicated: - break + if not is_sharded: + return response return response @@ -82,7 +82,9 @@ def get_backup(backup, cluster=None): return run_query(QUERY, {"uuid": backup}, use_cache=False) -def create_table_backup(database, table, bucket, path, cluster=None, aws_key=None, aws_secret=None, base_backup=None): +def create_table_backup( + database, table, bucket, path, cluster=None, aws_key=None, aws_secret=None, base_backup=None, is_sharded=False +): if aws_key is None or aws_secret is None: aws_key = settings.AWS_ACCESS_KEY_ID aws_secret = settings.AWS_SECRET_ACCESS_KEY @@ -106,7 +108,7 @@ def create_table_backup(database, table, bucket, path, cluster=None, aws_key=Non aws_key=aws_key, aws_secret=aws_secret, base_backup=base_backup, - is_replicated=is_replicated_table(database, table), + is_sharded=is_sharded, ) QUERY = """BACKUP TABLE %(database)s.%(table)s TO S3('https://%(bucket)s.s3.amazonaws.com/%(path)s', '%(aws_key)s', '%(aws_secret)s') @@ -204,6 +206,7 @@ def run_backup(backup_id, incremental=False): backup.aws_access_key_id, backup.aws_secret_access_key, base_backup=base_backup, + is_sharded=backup.is_sharded, ) uuid = str(uuid4()) br = ScheduledBackupRun.objects.create( diff --git a/housewatch/clickhouse/table.py b/housewatch/clickhouse/table.py index e977173..cb89af2 100644 --- a/housewatch/clickhouse/table.py +++ b/housewatch/clickhouse/table.py @@ -2,5 +2,20 @@ def is_replicated_table(database, table): - QUERY = """SELECT is_replicated FROM system.tables WHERE database = '%(database)s' AND name = '%(table)s'""" + QUERY = """SELECT engine FROM system.tables WHERE database = '%(database)s' AND name = '%(table)s'""" return "replicated" in run_query(QUERY, {"database": database, "table": table})[0]["engine"].lower() + + +def table_engine_full(database, table): + QUERY = """SELECT engine_full FROM system.tables WHERE database = '%(database)s' AND name = '%(table)s'""" + return run_query(QUERY, {"database": database, "table": table})[0]["engine_full"] + + +def parse_engine(engine_full): + engine = engine_full.split("(")[0].strip() + params = engine_full.split("(")[1].split(")")[0].split(",") + return engine, params + + +def is_sharded_table(database, table): + return "sharded" in table_engine_full(database, table).lower() diff --git a/housewatch/migrations/0011_scheduledbackup_is_sharded_and_more.py b/housewatch/migrations/0011_scheduledbackup_is_sharded_and_more.py new file mode 100644 index 0000000..ae32402 --- /dev/null +++ b/housewatch/migrations/0011_scheduledbackup_is_sharded_and_more.py @@ -0,0 +1,23 @@ +# Generated by Django 4.1.1 on 2024-01-31 06:36 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('housewatch', '0010_scheduledbackup_incremental_schedule_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='scheduledbackup', + name='is_sharded', + field=models.BooleanField(default=False), + ), + migrations.AlterField( + model_name='scheduledbackup', + name='table', + field=models.CharField(blank=True, max_length=255, null=True), + ), + ] diff --git a/housewatch/models/backup.py b/housewatch/models/backup.py index b512b00..c19b61e 100644 --- a/housewatch/models/backup.py +++ b/housewatch/models/backup.py @@ -19,6 +19,7 @@ class ScheduledBackup(models.Model): incremental_schedule: models.CharField = models.CharField(max_length=255, null=True) table: models.CharField = models.CharField(max_length=255, null=True, blank=True) database: models.CharField = models.CharField(max_length=255) + is_sharded: models.BooleanField = models.BooleanField(default=False) cluster: models.CharField = models.CharField(max_length=255, null=True) bucket: models.CharField = models.CharField(max_length=255) path: models.CharField = models.CharField(max_length=255)