From 0cf2ea453f9ce028a22a07f2ce29693367d9fcf1 Mon Sep 17 00:00:00 2001
From: Kosuke Fujimoto <fujikosu@gmail.com>
Date: Sun, 4 Nov 2018 20:44:57 +0900
Subject: [PATCH 1/5] Added tqdm for inference part and replaced os call with
 python native functions. Changed image file's name from 5 digits to 6 digits
 for generating long video

---
 classify.py                                   | 41 ++++++++++-------
 dataset.py                                    | 11 +++--
 .../generate_result_video.py                  | 44 ++++++++++++-------
 main.py                                       | 24 +++++-----
 4 files changed, 73 insertions(+), 47 deletions(-)

diff --git a/classify.py b/classify.py
index 78550f8..f4e98c9 100644
--- a/classify.py
+++ b/classify.py
@@ -2,38 +2,47 @@
 from torch.autograd import Variable
 
 from dataset import Video
-from spatial_transforms import (Compose, Normalize, Scale, CenterCrop, ToTensor)
+from spatial_transforms import (Compose, Normalize, Scale, CenterCrop,
+                                ToTensor)
 from temporal_transforms import LoopPadding
+from tqdm import tqdm
+
 
 def classify_video(video_dir, video_name, class_names, model, opt):
     assert opt.mode in ['score', 'feature']
 
-    spatial_transform = Compose([Scale(opt.sample_size),
-                                 CenterCrop(opt.sample_size),
-                                 ToTensor(),
-                                 Normalize(opt.mean, [1, 1, 1])])
+    spatial_transform = Compose([
+        Scale(opt.sample_size),
+        CenterCrop(opt.sample_size),
+        ToTensor(),
+        Normalize(opt.mean, [1, 1, 1])
+    ])
     temporal_transform = LoopPadding(opt.sample_duration)
-    data = Video(video_dir, spatial_transform=spatial_transform,
-                 temporal_transform=temporal_transform,
-                 sample_duration=opt.sample_duration)
-    data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size,
-                                              shuffle=False, num_workers=opt.n_threads, pin_memory=True)
+    data = Video(
+        video_dir,
+        spatial_transform=spatial_transform,
+        temporal_transform=temporal_transform,
+        sample_duration=opt.sample_duration)
+    data_loader = torch.utils.data.DataLoader(
+        data,
+        batch_size=opt.batch_size,
+        shuffle=False,
+        num_workers=opt.n_threads,
+        pin_memory=True)
 
     video_outputs = []
     video_segments = []
-    for i, (inputs, segments) in enumerate(data_loader):
+    print('start inference')
+    for i, (inputs, segments) in enumerate(tqdm(data_loader)):
         inputs = Variable(inputs, volatile=True)
         outputs = model(inputs)
 
         video_outputs.append(outputs.cpu().data)
         video_segments.append(segments)
-
+    print('end inference')
     video_outputs = torch.cat(video_outputs)
     video_segments = torch.cat(video_segments)
-    results = {
-        'video': video_name,
-        'clips': []
-    }
+    results = {'video': video_name, 'clips': []}
 
     _, max_indices = video_outputs.max(dim=1)
     for i in range(video_outputs.size(0)):
diff --git a/dataset.py b/dataset.py
index 71595c2..3816209 100644
--- a/dataset.py
+++ b/dataset.py
@@ -34,7 +34,7 @@ def get_default_image_loader():
 def video_loader(video_dir_path, frame_indices, image_loader):
     video = []
     for i in frame_indices:
-        image_path = os.path.join(video_dir_path, 'image_{:05d}.jpg'.format(i))
+        image_path = os.path.join(video_dir_path, 'image_{:06d}.jpg'.format(i))
         if os.path.exists(image_path):
             video.append(image_loader(image_path))
         else:
@@ -103,9 +103,12 @@ def make_dataset(video_path, sample_duration):
 
 
 class Video(data.Dataset):
-    def __init__(self, video_path,
-                 spatial_transform=None, temporal_transform=None,
-                 sample_duration=16, get_loader=get_default_video_loader):
+    def __init__(self,
+                 video_path,
+                 spatial_transform=None,
+                 temporal_transform=None,
+                 sample_duration=16,
+                 get_loader=get_default_video_loader):
         self.data = make_dataset(video_path, sample_duration)
 
         self.spatial_transform = spatial_transform
diff --git a/generate_result_video/generate_result_video.py b/generate_result_video/generate_result_video.py
index 5a2c4ce..13ae6b3 100644
--- a/generate_result_video/generate_result_video.py
+++ b/generate_result_video/generate_result_video.py
@@ -7,8 +7,11 @@
 
 
 def get_fps(video_file_path, frames_directory_path):
-    p = subprocess.Popen('ffprobe {}'.format(video_file_path),
-                         shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    p = subprocess.Popen(
+        'ffprobe {}'.format(video_file_path),
+        shell=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE)
     _, res = p.communicate()
     res = res.decode('utf-8')
 
@@ -59,25 +62,29 @@ def get_fps(video_file_path, frames_directory_path):
                 scores += np.array(clips[i]['scores'])
             scores /= n_elements
             unit_classes.append(class_names[np.argmax(scores)])
-            unit_segments.append([clips[i]['segment'][0],
-                                  clips[i + n_elements - 1]['segment'][1]])
+            unit_segments.append([
+                clips[i]['segment'][0], clips[i + n_elements - 1]['segment'][1]
+            ])
 
         if os.path.exists('tmp'):
             subprocess.call('rm -rf tmp', shell=True)
         subprocess.call('mkdir tmp', shell=True)
 
-        subprocess.call('ffmpeg -i {} tmp/image_%05d.jpg'.format(video_path), shell=True)
+        subprocess.call(
+            'ffmpeg -i {} tmp/image_%06d.jpg'.format(video_path), shell=True)
 
         fps = get_fps(video_path, 'tmp')
 
         for i in range(len(unit_classes)):
             for j in range(unit_segments[i][0], unit_segments[i][1] + 1):
-                image = Image.open('tmp/image_{:05}.jpg'.format(j)).convert('RGB')
+                image = Image.open(
+                    'tmp/image_{:06}.jpg'.format(j)).convert('RGB')
                 min_length = min(image.size)
                 font_size = int(min_length * 0.05)
-                font = ImageFont.truetype(os.path.join(os.path.dirname(__file__),
-                                                       'SourceSansPro-Regular.ttf'),
-                                          font_size)
+                font = ImageFont.truetype(
+                    os.path.join(
+                        os.path.dirname(__file__),
+                        'SourceSansPro-Regular.ttf'), font_size)
                 d = ImageDraw.Draw(image)
                 textsize = d.textsize(unit_classes[i], font=font)
                 x = int(font_size * 0.5)
@@ -87,13 +94,18 @@ def get_fps(video_file_path, frames_directory_path):
                 rect_position = (x, y, x + textsize[0] + x_offset * 2,
                                  y + textsize[1] + y_offset * 2)
                 d.rectangle(rect_position, fill=(30, 30, 30))
-                d.text((x + x_offset, y + y_offset), unit_classes[i],
-                       font=font, fill=(235, 235, 235))
-                image.save('tmp/image_{:05}_pred.jpg'.format(j))
-
-        dst_file_path = os.path.join(dst_directory_path, video_path.split('/')[-1])
-        subprocess.call('ffmpeg -y -r {} -i tmp/image_%05d_pred.jpg -b:v 1000k {}'.format(fps, dst_file_path),
-                        shell=True)
+                d.text((x + x_offset, y + y_offset),
+                       unit_classes[i],
+                       font=font,
+                       fill=(235, 235, 235))
+                image.save('tmp/image_{:06}_pred.jpg'.format(j))
+
+        dst_file_path = os.path.join(dst_directory_path,
+                                     video_path.split('/')[-1])
+        subprocess.call(
+            'ffmpeg -y -r {} -i tmp/image_%06d_pred.jpg -b:v 1000k {}'.format(
+                fps, dst_file_path),
+            shell=True)
 
         if os.path.exists('tmp'):
             subprocess.call('rm -rf tmp', shell=True)
diff --git a/main.py b/main.py
index b6d6f0a..df563a6 100644
--- a/main.py
+++ b/main.py
@@ -10,13 +10,16 @@
 from model import generate_model
 from mean import get_mean
 from classify import classify_video
+from tqdm import tqdm
+from pathlib import Path
+import shutil
 
-if __name__=="__main__":
+if __name__ == "__main__":
     opt = parse_opts()
     opt.mean = get_mean()
     opt.arch = '{}-{}'.format(opt.model_name, opt.model_depth)
     opt.sample_size = 112
-    opt.sample_duration = 16
+    opt.sample_duration = 64
     opt.n_classes = 400
 
     model = generate_model(opt)
@@ -42,27 +45,26 @@
     if opt.verbose:
         ffmpeg_loglevel = 'info'
 
-    if os.path.exists('tmp'):
-        subprocess.call('rm -rf tmp', shell=True)
+    tmp_path = Path('tmp')
+    if tmp_path.exists():
+        shutil.rmtree(tmp_path)
 
     outputs = []
     for input_file in input_files:
         video_path = os.path.join(opt.video_root, input_file)
         if os.path.exists(video_path):
             print(video_path)
-            subprocess.call('mkdir tmp', shell=True)
-            subprocess.call('ffmpeg -i {} tmp/image_%05d.jpg'.format(video_path),
-                            shell=True)
+            tmp_path.mkdir()
+            subprocess.call(
+                'ffmpeg -i {} tmp/image_%06d.jpg'.format(video_path),
+                shell=True)
 
             result = classify_video('tmp', input_file, class_names, model, opt)
             outputs.append(result)
 
-            subprocess.call('rm -rf tmp', shell=True)
+            shutil.rmtree(tmp_path)
         else:
             print('{} does not exist'.format(input_file))
 
-    if os.path.exists('tmp'):
-        subprocess.call('rm -rf tmp', shell=True)
-
     with open(opt.output, 'w') as f:
         json.dump(outputs, f)

From e0f456a3bfb432a2b222a703352bd8b2424bfa1d Mon Sep 17 00:00:00 2001
From: Kosuke Fujimoto <fujikosu@gmail.com>
Date: Mon, 5 Nov 2018 00:08:55 +0900
Subject: [PATCH 2/5] Added tqdm on putting label on images

---
 generate_result_video/generate_result_video.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/generate_result_video/generate_result_video.py b/generate_result_video/generate_result_video.py
index 13ae6b3..36e084d 100644
--- a/generate_result_video/generate_result_video.py
+++ b/generate_result_video/generate_result_video.py
@@ -4,6 +4,7 @@
 import subprocess
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
+from tqdm import tqdm
 
 
 def get_fps(video_file_path, frames_directory_path):
@@ -75,8 +76,8 @@ def get_fps(video_file_path, frames_directory_path):
 
         fps = get_fps(video_path, 'tmp')
 
-        for i in range(len(unit_classes)):
-            for j in range(unit_segments[i][0], unit_segments[i][1] + 1):
+        for i in tqdm(range(len(unit_classes))):
+            for j in tqdm(range(unit_segments[i][0], unit_segments[i][1] + 1)):
                 image = Image.open(
                     'tmp/image_{:06}.jpg'.format(j)).convert('RGB')
                 min_length = min(image.size)

From a59d3a51511ec7592c65054e3e5019a898c3aab2 Mon Sep 17 00:00:00 2001
From: Kosuke Fujimoto <fujikosu@gmail.com>
Date: Sun, 11 Nov 2018 15:39:37 +0900
Subject: [PATCH 3/5] Added time calculation and made generate image part as
 function

---
 .../generate_result_video.py                  | 69 ++++++++++++-------
 1 file changed, 45 insertions(+), 24 deletions(-)

diff --git a/generate_result_video/generate_result_video.py b/generate_result_video/generate_result_video.py
index 36e084d..290b8a7 100644
--- a/generate_result_video/generate_result_video.py
+++ b/generate_result_video/generate_result_video.py
@@ -5,6 +5,8 @@
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
 from tqdm import tqdm
+import time
+import multiprocessing
 
 
 def get_fps(video_file_path, frames_directory_path):
@@ -28,6 +30,29 @@ def get_fps(video_file_path, frames_directory_path):
     return fps
 
 
+def generate_result_image(frame_num, unit_class_num):
+    image = Image.open('tmp/image_{:06}.jpg'.format(frame_num)).convert('RGB')
+    min_length = min(image.size)
+    font_size = int(min_length * 0.05)
+    font = ImageFont.truetype(
+        os.path.join(os.path.dirname(__file__), 'SourceSansPro-Regular.ttf'),
+        font_size)
+    d = ImageDraw.Draw(image)
+    textsize = d.textsize(unit_classes[unit_class_num], font=font)
+    x = int(font_size * 0.5)
+    y = int(font_size * 0.25)
+    x_offset = x
+    y_offset = y
+    rect_position = (x, y, x + textsize[0] + x_offset * 2,
+                     y + textsize[1] + y_offset * 2)
+    d.rectangle(rect_position, fill=(30, 30, 30))
+    d.text((x + x_offset, y + y_offset),
+           unit_classes[i],
+           font=font,
+           fill=(235, 235, 235))
+    return image.save('tmp/image_{:06}_pred.jpg'.format(frame_num))
+
+
 if __name__ == '__main__':
     result_json_path = sys.argv[1]
     video_root_path = sys.argv[2]
@@ -49,6 +74,7 @@ def get_fps(video_file_path, frames_directory_path):
         video_path = os.path.join(video_root_path, results[index]['video'])
         print(video_path)
 
+        #execute per video
         clips = results[index]['clips']
         unit_classes = []
         unit_segments = []
@@ -71,42 +97,37 @@ def get_fps(video_file_path, frames_directory_path):
             subprocess.call('rm -rf tmp', shell=True)
         subprocess.call('mkdir tmp', shell=True)
 
+        since = time.time()
         subprocess.call(
             'ffmpeg -i {} tmp/image_%06d.jpg'.format(video_path), shell=True)
+        time_elapsed = time.time() - since
+        print('Extracting images complete in {:.0f}m {:.0f}s'.format(
+            time_elapsed // 60, time_elapsed % 60))
 
         fps = get_fps(video_path, 'tmp')
 
-        for i in tqdm(range(len(unit_classes))):
-            for j in tqdm(range(unit_segments[i][0], unit_segments[i][1] + 1)):
-                image = Image.open(
-                    'tmp/image_{:06}.jpg'.format(j)).convert('RGB')
-                min_length = min(image.size)
-                font_size = int(min_length * 0.05)
-                font = ImageFont.truetype(
-                    os.path.join(
-                        os.path.dirname(__file__),
-                        'SourceSansPro-Regular.ttf'), font_size)
-                d = ImageDraw.Draw(image)
-                textsize = d.textsize(unit_classes[i], font=font)
-                x = int(font_size * 0.5)
-                y = int(font_size * 0.25)
-                x_offset = x
-                y_offset = y
-                rect_position = (x, y, x + textsize[0] + x_offset * 2,
-                                 y + textsize[1] + y_offset * 2)
-                d.rectangle(rect_position, fill=(30, 30, 30))
-                d.text((x + x_offset, y + y_offset),
-                       unit_classes[i],
-                       font=font,
-                       fill=(235, 235, 235))
-                image.save('tmp/image_{:06}_pred.jpg'.format(j))
+        since = time.time()
+        NUM_WORKERS = multiprocessing.cpu_count() * 2
+        for unit_class_num in tqdm(range(len(unit_classes))):
+            for frame_num in tqdm(
+                    range(unit_segments[unit_class_num][0],
+                          unit_segments[unit_class_num][1] + 1)):
+                generate_result_image(frame_num, unit_class_num)
+        time_elapsed = time.time() - since
+        print('Generating images complete in {:.0f}m {:.0f}s'.format(
+            time_elapsed // 60, time_elapsed % 60))
 
         dst_file_path = os.path.join(dst_directory_path,
                                      video_path.split('/')[-1])
+
+        since = time.time()
         subprocess.call(
             'ffmpeg -y -r {} -i tmp/image_%06d_pred.jpg -b:v 1000k {}'.format(
                 fps, dst_file_path),
             shell=True)
+        time_elapsed = time.time() - since
+        print('Creating video from images complete in {:.0f}m {:.0f}s'.format(
+            time_elapsed // 60, time_elapsed % 60))
 
         if os.path.exists('tmp'):
             subprocess.call('rm -rf tmp', shell=True)

From 5b82972b35bac53c057ca4ea7d3c5a734c53ddb2 Mon Sep 17 00:00:00 2001
From: Kosuke Fujimoto <fujikosu@gmail.com>
Date: Mon, 12 Nov 2018 11:42:11 +0900
Subject: [PATCH 4/5] Added parallel image processing (putting predicted class
 on images)

---
 .../generate_result_video.py                  | 67 ++++++++++++-------
 1 file changed, 41 insertions(+), 26 deletions(-)

diff --git a/generate_result_video/generate_result_video.py b/generate_result_video/generate_result_video.py
index 290b8a7..88cffa5 100644
--- a/generate_result_video/generate_result_video.py
+++ b/generate_result_video/generate_result_video.py
@@ -7,6 +7,7 @@
 from tqdm import tqdm
 import time
 import multiprocessing
+from itertools import repeat
 
 
 def get_fps(video_file_path, frames_directory_path):
@@ -30,27 +31,30 @@ def get_fps(video_file_path, frames_directory_path):
     return fps
 
 
-def generate_result_image(frame_num, unit_class_num):
-    image = Image.open('tmp/image_{:06}.jpg'.format(frame_num)).convert('RGB')
-    min_length = min(image.size)
-    font_size = int(min_length * 0.05)
-    font = ImageFont.truetype(
-        os.path.join(os.path.dirname(__file__), 'SourceSansPro-Regular.ttf'),
-        font_size)
-    d = ImageDraw.Draw(image)
-    textsize = d.textsize(unit_classes[unit_class_num], font=font)
-    x = int(font_size * 0.5)
-    y = int(font_size * 0.25)
-    x_offset = x
-    y_offset = y
-    rect_position = (x, y, x + textsize[0] + x_offset * 2,
-                     y + textsize[1] + y_offset * 2)
-    d.rectangle(rect_position, fill=(30, 30, 30))
-    d.text((x + x_offset, y + y_offset),
-           unit_classes[i],
-           font=font,
-           fill=(235, 235, 235))
-    return image.save('tmp/image_{:06}_pred.jpg'.format(frame_num))
+def generate_result_images(frame_nums, predicted_class):
+    for frame_num in frame_nums:
+        image = Image.open(
+            'tmp/image_{:06}.jpg'.format(frame_num)).convert('RGB')
+        min_length = min(image.size)
+        font_size = int(min_length * 0.05)
+        font = ImageFont.truetype(
+            os.path.join(
+                os.path.dirname(__file__), 'SourceSansPro-Regular.ttf'),
+            font_size)
+        d = ImageDraw.Draw(image)
+        textsize = d.textsize(predicted_class, font=font)
+        x = int(font_size * 0.5)
+        y = int(font_size * 0.25)
+        x_offset = x
+        y_offset = y
+        rect_position = (x, y, x + textsize[0] + x_offset * 2,
+                         y + textsize[1] + y_offset * 2)
+        d.rectangle(rect_position, fill=(30, 30, 30))
+        d.text((x + x_offset, y + y_offset),
+               predicted_class,
+               font=font,
+               fill=(235, 235, 235))
+        image.save('tmp/image_{:06}_pred.jpg'.format(frame_num))
 
 
 if __name__ == '__main__':
@@ -107,12 +111,23 @@ def generate_result_image(frame_num, unit_class_num):
         fps = get_fps(video_path, 'tmp')
 
         since = time.time()
-        NUM_WORKERS = multiprocessing.cpu_count() * 2
+        num_workers = multiprocessing.cpu_count()
         for unit_class_num in tqdm(range(len(unit_classes))):
-            for frame_num in tqdm(
-                    range(unit_segments[unit_class_num][0],
-                          unit_segments[unit_class_num][1] + 1)):
-                generate_result_image(frame_num, unit_class_num)
+            frame_nums = range(unit_segments[unit_class_num][0],
+                               unit_segments[unit_class_num][1] + 1)
+            # split frame numbers into multiple sub-arrays to process them in parallel
+            frame_nums_list = [
+                list(i) for i in np.array_split(frame_nums, num_workers)
+            ]
+            unit_predicted_class = unit_classes[unit_class_num]
+
+            # overlay predicted class name on images in parallel by multiprocessing
+            pool = multiprocessing.Pool(num_workers)
+            pool.starmap(generate_result_images,
+                         zip(frame_nums_list, repeat(unit_predicted_class)))
+            pool.close()
+            pool.join()
+
         time_elapsed = time.time() - since
         print('Generating images complete in {:.0f}m {:.0f}s'.format(
             time_elapsed // 60, time_elapsed % 60))

From 6740f1222ec6191948e795590b858fd858de9e71 Mon Sep 17 00:00:00 2001
From: Kosuke Fujimoto <fujikosu@gmail.com>
Date: Fri, 23 Nov 2018 22:22:42 +0900
Subject: [PATCH 5/5] Performed reformatting to align with original repo

---
 classify.py                                   | 34 ++++++++-----------
 dataset.py                                    |  9 ++---
 .../generate_result_video.py                  | 21 ++++--------
 main.py                                       |  9 ++---
 4 files changed, 29 insertions(+), 44 deletions(-)

diff --git a/classify.py b/classify.py
index f4e98c9..ba473a4 100644
--- a/classify.py
+++ b/classify.py
@@ -2,8 +2,7 @@
 from torch.autograd import Variable
 
 from dataset import Video
-from spatial_transforms import (Compose, Normalize, Scale, CenterCrop,
-                                ToTensor)
+from spatial_transforms import (Compose, Normalize, Scale, CenterCrop, ToTensor)
 from temporal_transforms import LoopPadding
 from tqdm import tqdm
 
@@ -11,24 +10,16 @@
 def classify_video(video_dir, video_name, class_names, model, opt):
     assert opt.mode in ['score', 'feature']
 
-    spatial_transform = Compose([
-        Scale(opt.sample_size),
-        CenterCrop(opt.sample_size),
-        ToTensor(),
-        Normalize(opt.mean, [1, 1, 1])
-    ])
+    spatial_transform = Compose([Scale(opt.sample_size),
+                                 CenterCrop(opt.sample_size),
+                                 ToTensor(),
+                                 Normalize(opt.mean, [1, 1, 1])])
     temporal_transform = LoopPadding(opt.sample_duration)
-    data = Video(
-        video_dir,
-        spatial_transform=spatial_transform,
-        temporal_transform=temporal_transform,
-        sample_duration=opt.sample_duration)
-    data_loader = torch.utils.data.DataLoader(
-        data,
-        batch_size=opt.batch_size,
-        shuffle=False,
-        num_workers=opt.n_threads,
-        pin_memory=True)
+    data = Video(video_dir, spatial_transform=spatial_transform,
+                 temporal_transform=temporal_transform,
+                 sample_duration=opt.sample_duration)
+    data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size,
+                                              shuffle=False, num_workers=opt.n_threads, pin_memory=True)
 
     video_outputs = []
     video_segments = []
@@ -42,7 +33,10 @@ def classify_video(video_dir, video_name, class_names, model, opt):
     print('end inference')
     video_outputs = torch.cat(video_outputs)
     video_segments = torch.cat(video_segments)
-    results = {'video': video_name, 'clips': []}
+    results = {
+        'video': video_name,
+        'clips': []
+    }
 
     _, max_indices = video_outputs.max(dim=1)
     for i in range(video_outputs.size(0)):
diff --git a/dataset.py b/dataset.py
index 3816209..b82db1f 100644
--- a/dataset.py
+++ b/dataset.py
@@ -103,12 +103,9 @@ def make_dataset(video_path, sample_duration):
 
 
 class Video(data.Dataset):
-    def __init__(self,
-                 video_path,
-                 spatial_transform=None,
-                 temporal_transform=None,
-                 sample_duration=16,
-                 get_loader=get_default_video_loader):
+    def __init__(self, video_path,
+                 spatial_transform=None, temporal_transform=None,
+                 sample_duration=16, get_loader=get_default_video_loader):
         self.data = make_dataset(video_path, sample_duration)
 
         self.spatial_transform = spatial_transform
diff --git a/generate_result_video/generate_result_video.py b/generate_result_video/generate_result_video.py
index 88cffa5..3040530 100644
--- a/generate_result_video/generate_result_video.py
+++ b/generate_result_video/generate_result_video.py
@@ -11,11 +11,8 @@
 
 
 def get_fps(video_file_path, frames_directory_path):
-    p = subprocess.Popen(
-        'ffprobe {}'.format(video_file_path),
-        shell=True,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE)
+    p = subprocess.Popen('ffprobe {}'.format(video_file_path),
+                         shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     _, res = p.communicate()
     res = res.decode('utf-8')
 
@@ -93,17 +90,15 @@ def generate_result_images(frame_nums, predicted_class):
                 scores += np.array(clips[i]['scores'])
             scores /= n_elements
             unit_classes.append(class_names[np.argmax(scores)])
-            unit_segments.append([
-                clips[i]['segment'][0], clips[i + n_elements - 1]['segment'][1]
-            ])
+            unit_segments.append([clips[i]['segment'][0],
+                                  clips[i + n_elements - 1]['segment'][1]])
 
         if os.path.exists('tmp'):
             subprocess.call('rm -rf tmp', shell=True)
         subprocess.call('mkdir tmp', shell=True)
 
         since = time.time()
-        subprocess.call(
-            'ffmpeg -i {} tmp/image_%06d.jpg'.format(video_path), shell=True)
+        subprocess.call('ffmpeg -i {} tmp/image_%06d.jpg'.format(video_path), shell=True)
         time_elapsed = time.time() - since
         print('Extracting images complete in {:.0f}m {:.0f}s'.format(
             time_elapsed // 60, time_elapsed % 60))
@@ -136,10 +131,8 @@ def generate_result_images(frame_nums, predicted_class):
                                      video_path.split('/')[-1])
 
         since = time.time()
-        subprocess.call(
-            'ffmpeg -y -r {} -i tmp/image_%06d_pred.jpg -b:v 1000k {}'.format(
-                fps, dst_file_path),
-            shell=True)
+        subprocess.call('ffmpeg -y -r {} -i tmp/image_%06d_pred.jpg -b:v 1000k {}'.format(fps, dst_file_path),
+                        shell=True)
         time_elapsed = time.time() - since
         print('Creating video from images complete in {:.0f}m {:.0f}s'.format(
             time_elapsed // 60, time_elapsed % 60))
diff --git a/main.py b/main.py
index df563a6..bf95b29 100644
--- a/main.py
+++ b/main.py
@@ -19,7 +19,7 @@
     opt.mean = get_mean()
     opt.arch = '{}-{}'.format(opt.model_name, opt.model_depth)
     opt.sample_size = 112
-    opt.sample_duration = 64
+    opt.sample_duration = 16
     opt.n_classes = 400
 
     model = generate_model(opt)
@@ -55,9 +55,8 @@
         if os.path.exists(video_path):
             print(video_path)
             tmp_path.mkdir()
-            subprocess.call(
-                'ffmpeg -i {} tmp/image_%06d.jpg'.format(video_path),
-                shell=True)
+            subprocess.call('ffmpeg -i {} tmp/image_%06d.jpg'.format(video_path),
+                            shell=True)
 
             result = classify_video('tmp', input_file, class_names, model, opt)
             outputs.append(result)
@@ -66,5 +65,7 @@
         else:
             print('{} does not exist'.format(input_file))
 
+    if tmp_path.exists():
+        shutil.rmtree(tmp_path)
     with open(opt.output, 'w') as f:
         json.dump(outputs, f)