From 0cf2ea453f9ce028a22a07f2ce29693367d9fcf1 Mon Sep 17 00:00:00 2001 From: Kosuke Fujimoto Date: Sun, 4 Nov 2018 20:44:57 +0900 Subject: [PATCH 1/5] Added tqdm for inference part and replaced os call with python native functions. Changed image file's name from 5 digits to 6 digits for generating long video --- classify.py | 41 ++++++++++------- dataset.py | 11 +++-- .../generate_result_video.py | 44 ++++++++++++------- main.py | 24 +++++----- 4 files changed, 73 insertions(+), 47 deletions(-) diff --git a/classify.py b/classify.py index 78550f8..f4e98c9 100644 --- a/classify.py +++ b/classify.py @@ -2,38 +2,47 @@ from torch.autograd import Variable from dataset import Video -from spatial_transforms import (Compose, Normalize, Scale, CenterCrop, ToTensor) +from spatial_transforms import (Compose, Normalize, Scale, CenterCrop, + ToTensor) from temporal_transforms import LoopPadding +from tqdm import tqdm + def classify_video(video_dir, video_name, class_names, model, opt): assert opt.mode in ['score', 'feature'] - spatial_transform = Compose([Scale(opt.sample_size), - CenterCrop(opt.sample_size), - ToTensor(), - Normalize(opt.mean, [1, 1, 1])]) + spatial_transform = Compose([ + Scale(opt.sample_size), + CenterCrop(opt.sample_size), + ToTensor(), + Normalize(opt.mean, [1, 1, 1]) + ]) temporal_transform = LoopPadding(opt.sample_duration) - data = Video(video_dir, spatial_transform=spatial_transform, - temporal_transform=temporal_transform, - sample_duration=opt.sample_duration) - data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, - shuffle=False, num_workers=opt.n_threads, pin_memory=True) + data = Video( + video_dir, + spatial_transform=spatial_transform, + temporal_transform=temporal_transform, + sample_duration=opt.sample_duration) + data_loader = torch.utils.data.DataLoader( + data, + batch_size=opt.batch_size, + shuffle=False, + num_workers=opt.n_threads, + pin_memory=True) video_outputs = [] video_segments = [] - for i, (inputs, segments) in enumerate(data_loader): + print('start inference') + for i, (inputs, segments) in enumerate(tqdm(data_loader)): inputs = Variable(inputs, volatile=True) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) - + print('end inference') video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) - results = { - 'video': video_name, - 'clips': [] - } + results = {'video': video_name, 'clips': []} _, max_indices = video_outputs.max(dim=1) for i in range(video_outputs.size(0)): diff --git a/dataset.py b/dataset.py index 71595c2..3816209 100644 --- a/dataset.py +++ b/dataset.py @@ -34,7 +34,7 @@ def get_default_image_loader(): def video_loader(video_dir_path, frame_indices, image_loader): video = [] for i in frame_indices: - image_path = os.path.join(video_dir_path, 'image_{:05d}.jpg'.format(i)) + image_path = os.path.join(video_dir_path, 'image_{:06d}.jpg'.format(i)) if os.path.exists(image_path): video.append(image_loader(image_path)) else: @@ -103,9 +103,12 @@ def make_dataset(video_path, sample_duration): class Video(data.Dataset): - def __init__(self, video_path, - spatial_transform=None, temporal_transform=None, - sample_duration=16, get_loader=get_default_video_loader): + def __init__(self, + video_path, + spatial_transform=None, + temporal_transform=None, + sample_duration=16, + get_loader=get_default_video_loader): self.data = make_dataset(video_path, sample_duration) self.spatial_transform = spatial_transform diff --git a/generate_result_video/generate_result_video.py b/generate_result_video/generate_result_video.py index 5a2c4ce..13ae6b3 100644 --- a/generate_result_video/generate_result_video.py +++ b/generate_result_video/generate_result_video.py @@ -7,8 +7,11 @@ def get_fps(video_file_path, frames_directory_path): - p = subprocess.Popen('ffprobe {}'.format(video_file_path), - shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p = subprocess.Popen( + 'ffprobe {}'.format(video_file_path), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) _, res = p.communicate() res = res.decode('utf-8') @@ -59,25 +62,29 @@ def get_fps(video_file_path, frames_directory_path): scores += np.array(clips[i]['scores']) scores /= n_elements unit_classes.append(class_names[np.argmax(scores)]) - unit_segments.append([clips[i]['segment'][0], - clips[i + n_elements - 1]['segment'][1]]) + unit_segments.append([ + clips[i]['segment'][0], clips[i + n_elements - 1]['segment'][1] + ]) if os.path.exists('tmp'): subprocess.call('rm -rf tmp', shell=True) subprocess.call('mkdir tmp', shell=True) - subprocess.call('ffmpeg -i {} tmp/image_%05d.jpg'.format(video_path), shell=True) + subprocess.call( + 'ffmpeg -i {} tmp/image_%06d.jpg'.format(video_path), shell=True) fps = get_fps(video_path, 'tmp') for i in range(len(unit_classes)): for j in range(unit_segments[i][0], unit_segments[i][1] + 1): - image = Image.open('tmp/image_{:05}.jpg'.format(j)).convert('RGB') + image = Image.open( + 'tmp/image_{:06}.jpg'.format(j)).convert('RGB') min_length = min(image.size) font_size = int(min_length * 0.05) - font = ImageFont.truetype(os.path.join(os.path.dirname(__file__), - 'SourceSansPro-Regular.ttf'), - font_size) + font = ImageFont.truetype( + os.path.join( + os.path.dirname(__file__), + 'SourceSansPro-Regular.ttf'), font_size) d = ImageDraw.Draw(image) textsize = d.textsize(unit_classes[i], font=font) x = int(font_size * 0.5) @@ -87,13 +94,18 @@ def get_fps(video_file_path, frames_directory_path): rect_position = (x, y, x + textsize[0] + x_offset * 2, y + textsize[1] + y_offset * 2) d.rectangle(rect_position, fill=(30, 30, 30)) - d.text((x + x_offset, y + y_offset), unit_classes[i], - font=font, fill=(235, 235, 235)) - image.save('tmp/image_{:05}_pred.jpg'.format(j)) - - dst_file_path = os.path.join(dst_directory_path, video_path.split('/')[-1]) - subprocess.call('ffmpeg -y -r {} -i tmp/image_%05d_pred.jpg -b:v 1000k {}'.format(fps, dst_file_path), - shell=True) + d.text((x + x_offset, y + y_offset), + unit_classes[i], + font=font, + fill=(235, 235, 235)) + image.save('tmp/image_{:06}_pred.jpg'.format(j)) + + dst_file_path = os.path.join(dst_directory_path, + video_path.split('/')[-1]) + subprocess.call( + 'ffmpeg -y -r {} -i tmp/image_%06d_pred.jpg -b:v 1000k {}'.format( + fps, dst_file_path), + shell=True) if os.path.exists('tmp'): subprocess.call('rm -rf tmp', shell=True) diff --git a/main.py b/main.py index b6d6f0a..df563a6 100644 --- a/main.py +++ b/main.py @@ -10,13 +10,16 @@ from model import generate_model from mean import get_mean from classify import classify_video +from tqdm import tqdm +from pathlib import Path +import shutil -if __name__=="__main__": +if __name__ == "__main__": opt = parse_opts() opt.mean = get_mean() opt.arch = '{}-{}'.format(opt.model_name, opt.model_depth) opt.sample_size = 112 - opt.sample_duration = 16 + opt.sample_duration = 64 opt.n_classes = 400 model = generate_model(opt) @@ -42,27 +45,26 @@ if opt.verbose: ffmpeg_loglevel = 'info' - if os.path.exists('tmp'): - subprocess.call('rm -rf tmp', shell=True) + tmp_path = Path('tmp') + if tmp_path.exists(): + shutil.rmtree(tmp_path) outputs = [] for input_file in input_files: video_path = os.path.join(opt.video_root, input_file) if os.path.exists(video_path): print(video_path) - subprocess.call('mkdir tmp', shell=True) - subprocess.call('ffmpeg -i {} tmp/image_%05d.jpg'.format(video_path), - shell=True) + tmp_path.mkdir() + subprocess.call( + 'ffmpeg -i {} tmp/image_%06d.jpg'.format(video_path), + shell=True) result = classify_video('tmp', input_file, class_names, model, opt) outputs.append(result) - subprocess.call('rm -rf tmp', shell=True) + shutil.rmtree(tmp_path) else: print('{} does not exist'.format(input_file)) - if os.path.exists('tmp'): - subprocess.call('rm -rf tmp', shell=True) - with open(opt.output, 'w') as f: json.dump(outputs, f) From e0f456a3bfb432a2b222a703352bd8b2424bfa1d Mon Sep 17 00:00:00 2001 From: Kosuke Fujimoto Date: Mon, 5 Nov 2018 00:08:55 +0900 Subject: [PATCH 2/5] Added tqdm on putting label on images --- generate_result_video/generate_result_video.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/generate_result_video/generate_result_video.py b/generate_result_video/generate_result_video.py index 13ae6b3..36e084d 100644 --- a/generate_result_video/generate_result_video.py +++ b/generate_result_video/generate_result_video.py @@ -4,6 +4,7 @@ import subprocess import numpy as np from PIL import Image, ImageDraw, ImageFont +from tqdm import tqdm def get_fps(video_file_path, frames_directory_path): @@ -75,8 +76,8 @@ def get_fps(video_file_path, frames_directory_path): fps = get_fps(video_path, 'tmp') - for i in range(len(unit_classes)): - for j in range(unit_segments[i][0], unit_segments[i][1] + 1): + for i in tqdm(range(len(unit_classes))): + for j in tqdm(range(unit_segments[i][0], unit_segments[i][1] + 1)): image = Image.open( 'tmp/image_{:06}.jpg'.format(j)).convert('RGB') min_length = min(image.size) From a59d3a51511ec7592c65054e3e5019a898c3aab2 Mon Sep 17 00:00:00 2001 From: Kosuke Fujimoto Date: Sun, 11 Nov 2018 15:39:37 +0900 Subject: [PATCH 3/5] Added time calculation and made generate image part as function --- .../generate_result_video.py | 69 ++++++++++++------- 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/generate_result_video/generate_result_video.py b/generate_result_video/generate_result_video.py index 36e084d..290b8a7 100644 --- a/generate_result_video/generate_result_video.py +++ b/generate_result_video/generate_result_video.py @@ -5,6 +5,8 @@ import numpy as np from PIL import Image, ImageDraw, ImageFont from tqdm import tqdm +import time +import multiprocessing def get_fps(video_file_path, frames_directory_path): @@ -28,6 +30,29 @@ def get_fps(video_file_path, frames_directory_path): return fps +def generate_result_image(frame_num, unit_class_num): + image = Image.open('tmp/image_{:06}.jpg'.format(frame_num)).convert('RGB') + min_length = min(image.size) + font_size = int(min_length * 0.05) + font = ImageFont.truetype( + os.path.join(os.path.dirname(__file__), 'SourceSansPro-Regular.ttf'), + font_size) + d = ImageDraw.Draw(image) + textsize = d.textsize(unit_classes[unit_class_num], font=font) + x = int(font_size * 0.5) + y = int(font_size * 0.25) + x_offset = x + y_offset = y + rect_position = (x, y, x + textsize[0] + x_offset * 2, + y + textsize[1] + y_offset * 2) + d.rectangle(rect_position, fill=(30, 30, 30)) + d.text((x + x_offset, y + y_offset), + unit_classes[i], + font=font, + fill=(235, 235, 235)) + return image.save('tmp/image_{:06}_pred.jpg'.format(frame_num)) + + if __name__ == '__main__': result_json_path = sys.argv[1] video_root_path = sys.argv[2] @@ -49,6 +74,7 @@ def get_fps(video_file_path, frames_directory_path): video_path = os.path.join(video_root_path, results[index]['video']) print(video_path) + #execute per video clips = results[index]['clips'] unit_classes = [] unit_segments = [] @@ -71,42 +97,37 @@ def get_fps(video_file_path, frames_directory_path): subprocess.call('rm -rf tmp', shell=True) subprocess.call('mkdir tmp', shell=True) + since = time.time() subprocess.call( 'ffmpeg -i {} tmp/image_%06d.jpg'.format(video_path), shell=True) + time_elapsed = time.time() - since + print('Extracting images complete in {:.0f}m {:.0f}s'.format( + time_elapsed // 60, time_elapsed % 60)) fps = get_fps(video_path, 'tmp') - for i in tqdm(range(len(unit_classes))): - for j in tqdm(range(unit_segments[i][0], unit_segments[i][1] + 1)): - image = Image.open( - 'tmp/image_{:06}.jpg'.format(j)).convert('RGB') - min_length = min(image.size) - font_size = int(min_length * 0.05) - font = ImageFont.truetype( - os.path.join( - os.path.dirname(__file__), - 'SourceSansPro-Regular.ttf'), font_size) - d = ImageDraw.Draw(image) - textsize = d.textsize(unit_classes[i], font=font) - x = int(font_size * 0.5) - y = int(font_size * 0.25) - x_offset = x - y_offset = y - rect_position = (x, y, x + textsize[0] + x_offset * 2, - y + textsize[1] + y_offset * 2) - d.rectangle(rect_position, fill=(30, 30, 30)) - d.text((x + x_offset, y + y_offset), - unit_classes[i], - font=font, - fill=(235, 235, 235)) - image.save('tmp/image_{:06}_pred.jpg'.format(j)) + since = time.time() + NUM_WORKERS = multiprocessing.cpu_count() * 2 + for unit_class_num in tqdm(range(len(unit_classes))): + for frame_num in tqdm( + range(unit_segments[unit_class_num][0], + unit_segments[unit_class_num][1] + 1)): + generate_result_image(frame_num, unit_class_num) + time_elapsed = time.time() - since + print('Generating images complete in {:.0f}m {:.0f}s'.format( + time_elapsed // 60, time_elapsed % 60)) dst_file_path = os.path.join(dst_directory_path, video_path.split('/')[-1]) + + since = time.time() subprocess.call( 'ffmpeg -y -r {} -i tmp/image_%06d_pred.jpg -b:v 1000k {}'.format( fps, dst_file_path), shell=True) + time_elapsed = time.time() - since + print('Creating video from images complete in {:.0f}m {:.0f}s'.format( + time_elapsed // 60, time_elapsed % 60)) if os.path.exists('tmp'): subprocess.call('rm -rf tmp', shell=True) From 5b82972b35bac53c057ca4ea7d3c5a734c53ddb2 Mon Sep 17 00:00:00 2001 From: Kosuke Fujimoto Date: Mon, 12 Nov 2018 11:42:11 +0900 Subject: [PATCH 4/5] Added parallel image processing (putting predicted class on images) --- .../generate_result_video.py | 67 ++++++++++++------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/generate_result_video/generate_result_video.py b/generate_result_video/generate_result_video.py index 290b8a7..88cffa5 100644 --- a/generate_result_video/generate_result_video.py +++ b/generate_result_video/generate_result_video.py @@ -7,6 +7,7 @@ from tqdm import tqdm import time import multiprocessing +from itertools import repeat def get_fps(video_file_path, frames_directory_path): @@ -30,27 +31,30 @@ def get_fps(video_file_path, frames_directory_path): return fps -def generate_result_image(frame_num, unit_class_num): - image = Image.open('tmp/image_{:06}.jpg'.format(frame_num)).convert('RGB') - min_length = min(image.size) - font_size = int(min_length * 0.05) - font = ImageFont.truetype( - os.path.join(os.path.dirname(__file__), 'SourceSansPro-Regular.ttf'), - font_size) - d = ImageDraw.Draw(image) - textsize = d.textsize(unit_classes[unit_class_num], font=font) - x = int(font_size * 0.5) - y = int(font_size * 0.25) - x_offset = x - y_offset = y - rect_position = (x, y, x + textsize[0] + x_offset * 2, - y + textsize[1] + y_offset * 2) - d.rectangle(rect_position, fill=(30, 30, 30)) - d.text((x + x_offset, y + y_offset), - unit_classes[i], - font=font, - fill=(235, 235, 235)) - return image.save('tmp/image_{:06}_pred.jpg'.format(frame_num)) +def generate_result_images(frame_nums, predicted_class): + for frame_num in frame_nums: + image = Image.open( + 'tmp/image_{:06}.jpg'.format(frame_num)).convert('RGB') + min_length = min(image.size) + font_size = int(min_length * 0.05) + font = ImageFont.truetype( + os.path.join( + os.path.dirname(__file__), 'SourceSansPro-Regular.ttf'), + font_size) + d = ImageDraw.Draw(image) + textsize = d.textsize(predicted_class, font=font) + x = int(font_size * 0.5) + y = int(font_size * 0.25) + x_offset = x + y_offset = y + rect_position = (x, y, x + textsize[0] + x_offset * 2, + y + textsize[1] + y_offset * 2) + d.rectangle(rect_position, fill=(30, 30, 30)) + d.text((x + x_offset, y + y_offset), + predicted_class, + font=font, + fill=(235, 235, 235)) + image.save('tmp/image_{:06}_pred.jpg'.format(frame_num)) if __name__ == '__main__': @@ -107,12 +111,23 @@ def generate_result_image(frame_num, unit_class_num): fps = get_fps(video_path, 'tmp') since = time.time() - NUM_WORKERS = multiprocessing.cpu_count() * 2 + num_workers = multiprocessing.cpu_count() for unit_class_num in tqdm(range(len(unit_classes))): - for frame_num in tqdm( - range(unit_segments[unit_class_num][0], - unit_segments[unit_class_num][1] + 1)): - generate_result_image(frame_num, unit_class_num) + frame_nums = range(unit_segments[unit_class_num][0], + unit_segments[unit_class_num][1] + 1) + # split frame numbers into multiple sub-arrays to process them in parallel + frame_nums_list = [ + list(i) for i in np.array_split(frame_nums, num_workers) + ] + unit_predicted_class = unit_classes[unit_class_num] + + # overlay predicted class name on images in parallel by multiprocessing + pool = multiprocessing.Pool(num_workers) + pool.starmap(generate_result_images, + zip(frame_nums_list, repeat(unit_predicted_class))) + pool.close() + pool.join() + time_elapsed = time.time() - since print('Generating images complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) From 6740f1222ec6191948e795590b858fd858de9e71 Mon Sep 17 00:00:00 2001 From: Kosuke Fujimoto Date: Fri, 23 Nov 2018 22:22:42 +0900 Subject: [PATCH 5/5] Performed reformatting to align with original repo --- classify.py | 34 ++++++++----------- dataset.py | 9 ++--- .../generate_result_video.py | 21 ++++-------- main.py | 9 ++--- 4 files changed, 29 insertions(+), 44 deletions(-) diff --git a/classify.py b/classify.py index f4e98c9..ba473a4 100644 --- a/classify.py +++ b/classify.py @@ -2,8 +2,7 @@ from torch.autograd import Variable from dataset import Video -from spatial_transforms import (Compose, Normalize, Scale, CenterCrop, - ToTensor) +from spatial_transforms import (Compose, Normalize, Scale, CenterCrop, ToTensor) from temporal_transforms import LoopPadding from tqdm import tqdm @@ -11,24 +10,16 @@ def classify_video(video_dir, video_name, class_names, model, opt): assert opt.mode in ['score', 'feature'] - spatial_transform = Compose([ - Scale(opt.sample_size), - CenterCrop(opt.sample_size), - ToTensor(), - Normalize(opt.mean, [1, 1, 1]) - ]) + spatial_transform = Compose([Scale(opt.sample_size), + CenterCrop(opt.sample_size), + ToTensor(), + Normalize(opt.mean, [1, 1, 1])]) temporal_transform = LoopPadding(opt.sample_duration) - data = Video( - video_dir, - spatial_transform=spatial_transform, - temporal_transform=temporal_transform, - sample_duration=opt.sample_duration) - data_loader = torch.utils.data.DataLoader( - data, - batch_size=opt.batch_size, - shuffle=False, - num_workers=opt.n_threads, - pin_memory=True) + data = Video(video_dir, spatial_transform=spatial_transform, + temporal_transform=temporal_transform, + sample_duration=opt.sample_duration) + data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, + shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] @@ -42,7 +33,10 @@ def classify_video(video_dir, video_name, class_names, model, opt): print('end inference') video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) - results = {'video': video_name, 'clips': []} + results = { + 'video': video_name, + 'clips': [] + } _, max_indices = video_outputs.max(dim=1) for i in range(video_outputs.size(0)): diff --git a/dataset.py b/dataset.py index 3816209..b82db1f 100644 --- a/dataset.py +++ b/dataset.py @@ -103,12 +103,9 @@ def make_dataset(video_path, sample_duration): class Video(data.Dataset): - def __init__(self, - video_path, - spatial_transform=None, - temporal_transform=None, - sample_duration=16, - get_loader=get_default_video_loader): + def __init__(self, video_path, + spatial_transform=None, temporal_transform=None, + sample_duration=16, get_loader=get_default_video_loader): self.data = make_dataset(video_path, sample_duration) self.spatial_transform = spatial_transform diff --git a/generate_result_video/generate_result_video.py b/generate_result_video/generate_result_video.py index 88cffa5..3040530 100644 --- a/generate_result_video/generate_result_video.py +++ b/generate_result_video/generate_result_video.py @@ -11,11 +11,8 @@ def get_fps(video_file_path, frames_directory_path): - p = subprocess.Popen( - 'ffprobe {}'.format(video_file_path), - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + p = subprocess.Popen('ffprobe {}'.format(video_file_path), + shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) _, res = p.communicate() res = res.decode('utf-8') @@ -93,17 +90,15 @@ def generate_result_images(frame_nums, predicted_class): scores += np.array(clips[i]['scores']) scores /= n_elements unit_classes.append(class_names[np.argmax(scores)]) - unit_segments.append([ - clips[i]['segment'][0], clips[i + n_elements - 1]['segment'][1] - ]) + unit_segments.append([clips[i]['segment'][0], + clips[i + n_elements - 1]['segment'][1]]) if os.path.exists('tmp'): subprocess.call('rm -rf tmp', shell=True) subprocess.call('mkdir tmp', shell=True) since = time.time() - subprocess.call( - 'ffmpeg -i {} tmp/image_%06d.jpg'.format(video_path), shell=True) + subprocess.call('ffmpeg -i {} tmp/image_%06d.jpg'.format(video_path), shell=True) time_elapsed = time.time() - since print('Extracting images complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) @@ -136,10 +131,8 @@ def generate_result_images(frame_nums, predicted_class): video_path.split('/')[-1]) since = time.time() - subprocess.call( - 'ffmpeg -y -r {} -i tmp/image_%06d_pred.jpg -b:v 1000k {}'.format( - fps, dst_file_path), - shell=True) + subprocess.call('ffmpeg -y -r {} -i tmp/image_%06d_pred.jpg -b:v 1000k {}'.format(fps, dst_file_path), + shell=True) time_elapsed = time.time() - since print('Creating video from images complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) diff --git a/main.py b/main.py index df563a6..bf95b29 100644 --- a/main.py +++ b/main.py @@ -19,7 +19,7 @@ opt.mean = get_mean() opt.arch = '{}-{}'.format(opt.model_name, opt.model_depth) opt.sample_size = 112 - opt.sample_duration = 64 + opt.sample_duration = 16 opt.n_classes = 400 model = generate_model(opt) @@ -55,9 +55,8 @@ if os.path.exists(video_path): print(video_path) tmp_path.mkdir() - subprocess.call( - 'ffmpeg -i {} tmp/image_%06d.jpg'.format(video_path), - shell=True) + subprocess.call('ffmpeg -i {} tmp/image_%06d.jpg'.format(video_path), + shell=True) result = classify_video('tmp', input_file, class_names, model, opt) outputs.append(result) @@ -66,5 +65,7 @@ else: print('{} does not exist'.format(input_file)) + if tmp_path.exists(): + shutil.rmtree(tmp_path) with open(opt.output, 'w') as f: json.dump(outputs, f)