Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make gui for split zip script #6

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
138 changes: 138 additions & 0 deletions datamanager/cloud/split_large_zip/gui.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import sys
import threading

from typing import Callable
from pathlib import Path

import tkinter as tk
from tkinter import messagebox

from utils import ZipUtils


DEFAULT_ZIP_SIZE_IN_KB = 100


class Window:
zip_size_options: list = [
"KB",
"MB",
"GB"
]
size_in_bytes_map: dict = {
"KB": 2**10,
"MB": 2**20,
"GB": 2**30
}

def __init__(self, zip_process_func: Callable):
self.zip_process_func = zip_process_func

self.is_split_in_progress = False
self.is_split_finished = False

self.gui = tk.Tk(className="Split ZIP")
self.gui.geometry("640x360")
self.gui.minsize(width=300, height=300)

self.gui.protocol("WM_DELETE_WINDOW", self.on_close_gui)

self.path_label = tk.Label(self.gui, text="ZIP Path:")
self.path_label.place(rely=0.1)

self.path_field = tk.Entry(self.gui, bd=3)
self.path_field.place(x=55, rely=0.1, relwidth=0.7)

self.zip_max_size_label = tk.Label(self.gui, text="ZIP Size:")
self.zip_max_size_label.place(rely=0.2)

self.zip_max_size_field = tk.Entry(self.gui, bd=3)
self.zip_max_size_field.insert(0, DEFAULT_ZIP_SIZE_IN_KB)
self.zip_max_size_field.place(x=55, rely=0.2)

self.selected_zip_size = tk.StringVar()
self.selected_zip_size.set(Window.zip_size_options[0])

self.size_options = tk.OptionMenu(self.gui, self.selected_zip_size, *Window.zip_size_options)
self.size_options.place(rely=0.195, x=190, height=25)

self.page_limit_label = tk.Label(self.gui, text="File limit:")
self.page_limit_label.place(rely=0.3)

self.page_limit_field = tk.Entry(self.gui, bd=3)
self.page_limit_field.place(x=55, rely=0.3)
self.page_limit_field.insert(0, DEFAULT_ZIP_SIZE_IN_KB)

self.button = tk.Button(self.gui, command=self.perform_split, text="Split zip")
self.button.place(rely=0.45, x=5)

self.split_zip_started_label = tk.Label(self.gui, text="Split zip started", fg="green")

self.current_zip_no_label = tk.Label(self.gui, fg="green")

self.error_label = tk.Label(self.gui, text="")
self.error_label.place(rely=0.7, x=10)

def perform_split(self):
if not self.is_input_valid():
return

ZipUtils.size_limit_in_bytes = int(self.zip_max_size_field.get()) * Window.size_in_bytes_map[self.selected_zip_size.get()]
ZipUtils.page_limit = int(self.page_limit_field.get())

self.is_split_in_progress = True

self.current_zip_no_label.config(text="")
self.split_zip_started_label.place(rely=0.8, x=10)
self.current_zip_no_label.place(rely=0.9, x=10)

self.button["state"] = "disabled"
self.error_label.place_forget()
split_zip_thread = threading.Thread(target=self.zip_process_func, args=[Path(self.path_field.get())], daemon=True)
split_zip_thread.start()

def update_zip_status(self, message):
if self.is_split_finished:
self.is_split_in_progress = False
self.split_zip_started_label.place_forget()
self.button["state"] = "active"
self.is_split_finished = False
self.current_zip_no_label.config(text=message, fg="green")

def handle_exception(self, args):
self.is_split_in_progress = False
self.split_zip_started_label.place_forget()
self.current_zip_no_label.place_forget()
self.button["state"] = "active"
if not args.exc_value:
self.error_label.config(text="Split failed.", fg="red")
else:
self.error_label.config(text=f"Split failed.\n Reason: {args.exc_value}", fg="red")
self.error_label.place(rely=0.65, x=0)

def is_input_valid(self) -> bool:
zip_max_size = self.zip_max_size_field.get()
page_limit = self.page_limit_field.get()

if not self.path_field.get():
messagebox.showerror("Error", "Zip path must not be empty")
return False

if not zip_max_size.isdigit() or int(zip_max_size) <= 0:
messagebox.showerror("Error", "Zip size must be a positive integer")
return False

if not page_limit.isdigit() or int(page_limit) <= 0:
messagebox.showerror("Error", "File limit must be a positive integer")
return False

return True

def on_close_gui(self):
if not self.is_split_in_progress:
self.gui.destroy()
return

if messagebox.askyesno(title="", message="Closing the window will end the zip split. Do you want to continue?", icon=messagebox.WARNING):
self.gui.destroy()
sys.exit()
50 changes: 40 additions & 10 deletions datamanager/cloud/split_large_zip/split_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,25 @@
This script is used to split zip files (DM exports) that are above 1GB in size or over 1500 files.

Usage:
split_zip.py --path C:\\path\\to\\import\\archive.zip

User interface -> python split_zip.py --terminal no
Use the terminal -> python split_zip.py --path C:\\path\\to\\import\\archive-zip
Use the terminal -> python split_zip.py --terminal yes --path C:\\path\\to\\import\\archive-zip
The result of running the script is multiple zip files that are all below 1GB and have less than 1500 files.
"""
import argparse
import csv
import os
import re
import tempfile
import threading
from typing import Callable
import zipfile
from dataclasses import dataclass
from io import TextIOWrapper
from pathlib import Path

SIZE_LIMIT_IN_BYTES = 300000000
PAGE_LIMIT = 1500

from gui import Window
from utils import ZipUtils

@dataclass
class Configs:
Expand All @@ -29,12 +31,35 @@ class Configs:
zip_name: str = ""


window: Window = None
print_function: Callable = None


def main():
parser = argparse.ArgumentParser(description="Split a dataset into multiple archives.")
parser.add_argument("--path", help="the absolute path to the zip file")
args = parser.parse_args()
try:
process_zip(Path(args.path))
parser = argparse.ArgumentParser(description="Split a dataset into multiple archives.")
parser.add_argument("--terminal", help="Use terminal or user interface (yes/no)", default=None)
parser.add_argument("--path", default=None, help="the absolute path to the zip file")
args = parser.parse_args()

if not args.terminal:
print("To use the user interface run the script file in the following way\npython split_zip.py --terminal n")

global print_function
if not args.terminal or args.terminal.lower() == "yes":
if not args.path:
print("--path option is required")
return
print_function = print
process_zip(Path(args.path))
else:
global window
window = Window(process_zip)
print_function = window.update_zip_status

threading.excepthook = window.handle_exception

window.gui.mainloop()
except Exception as e:
print(f"Split failed. Reason: \n {e}")

Expand Down Expand Up @@ -72,14 +97,19 @@ def split_files(images_path: Path, latest_path: Path, folder_contents_path: str)
size = image_paths[image_name].stat().st_size + latest_paths[image_name].stat().st_size
size += sum([x.stat().st_size for x in pages_by_documents[image_name]])
current_size += size
if current_size < SIZE_LIMIT_IN_BYTES and len(image_names) < PAGE_LIMIT:
if current_size < ZipUtils.size_limit_in_bytes and len(image_names) < ZipUtils.page_limit:
image_names.append(image_name)
else:
create_archive(folder_contents_path, image_names, archive_count)
print_function(f"Processed zip number {archive_count}")

archive_count += 1
current_size = Configs.start_size + size
image_names = [image_name]
create_archive(folder_contents_path, image_names, archive_count) # for the last zip
if window:
window.is_split_finished = True
print_function(f"Split zip finished with {archive_count} zip files.")


def get_pages_by_documents(document_names: dict, image_names: dict):
Expand Down
7 changes: 7 additions & 0 deletions datamanager/cloud/split_large_zip/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from dataclasses import dataclass


@dataclass
class ZipUtils:
size_limit_in_bytes: int = 300_000_000
page_limit: int = 1500