From 07cf6c2ba22634377affa1471e41c3a9b570b472 Mon Sep 17 00:00:00 2001
From: Oscar <54097108+oscar-davids@users.noreply.github.com>
Date: Tue, 26 May 2020 20:48:16 +0800
Subject: [PATCH 01/12] Create work-diary.me

---
 work-diary.me | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 work-diary.me

diff --git a/work-diary.me b/work-diary.me
new file mode 100644
index 00000000..f151f404
--- /dev/null
+++ b/work-diary.me
@@ -0,0 +1,3 @@
+https://www.kaggle.com/c/youtube8m-2019/overview/getting-started
+https://github.com/google/youtube-8m#running-on-googles-cloud-machine-learning-platform
+https://research.google.com/youtube8m/download.html

From fb17ba9aa713f25cae76acd5f0b17b7eda0280e8 Mon Sep 17 00:00:00 2001
From: Oscar <54097108+oscar-davids@users.noreply.github.com>
Date: Wed, 27 May 2020 10:20:31 +0800
Subject: [PATCH 02/12] Update work-diary.me

---
 work-diary.me | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/work-diary.me b/work-diary.me
index f151f404..b6300aae 100644
--- a/work-diary.me
+++ b/work-diary.me
@@ -1,3 +1,16 @@
 https://www.kaggle.com/c/youtube8m-2019/overview/getting-started
 https://github.com/google/youtube-8m#running-on-googles-cloud-machine-learning-platform
 https://research.google.com/youtube8m/download.html
+
+Supervised Model
+  Keras
+  Random Forest
+  AdaBoost
+  SVM
+  XGBoost
+Unsupervised Model
+  OC SVM
+  Isolation Forest
+  Autoencoder
+  
+  

From 5b893c6dd0742f5804c88ae9bac5530f52158b6c Mon Sep 17 00:00:00 2001
From: oscar-davids <oscar_davids@outlook.com>
Date: Wed, 27 May 2020 16:01:05 +0800
Subject: [PATCH 03/12] some note

---
 work-diary.me | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/work-diary.me b/work-diary.me
index b6300aae..0f66060c 100644
--- a/work-diary.me
+++ b/work-diary.me
@@ -3,14 +3,28 @@ https://github.com/google/youtube-8m#running-on-googles-cloud-machine-learning-p
 https://research.google.com/youtube8m/download.html
 
 Supervised Model
-  Keras
-  Random Forest
-  AdaBoost
-  SVM
-  XGBoost
+    Keras
+    Random Forest
+    AdaBoost
+    SVM
+    XGBoost
 Unsupervised Model
-  OC SVM
-  Isolation Forest
-  Autoencoder
+    OC SVM
+    Isolation Forest
+    Autoencoder
+
+Attack methods
+    1.Scaling and bitrate reduction
+    2.Watermarks
+    3.Flips / rotations
+    4.Vertical flip
+    5.Horizontal flip
+    6.Rotate 90 degrees clockwise
+    7.Rotate 90 degrees counterclockwise
+    8.Black and white
+    9.Chroma subsampling
+    10.low bitrate
+    11.vignette
+
   
   

From 01ec5526dfcd8c789f1b91adb994741e56eca322 Mon Sep 17 00:00:00 2001
From: oscar-davids <oscar_davids@outlook.com>
Date: Wed, 27 May 2020 21:26:32 +0800
Subject: [PATCH 04/12] added utf-8 encoding

---
 YT8M_downloader/downloader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/YT8M_downloader/downloader.py b/YT8M_downloader/downloader.py
index 7b86db38..8fc95bba 100644
--- a/YT8M_downloader/downloader.py
+++ b/YT8M_downloader/downloader.py
@@ -81,7 +81,7 @@ def get_renditions(renditions: str) -> Dict:
 def read_data():
     read_rows = []
     read_ids = []
-    with open(CURRENT_PATH + '/' + 'yt8m_data.csv', newline='') as csv_file:
+    with open(CURRENT_PATH + '/' + 'yt8m_data.csv', encoding='utf-8', newline='') as csv_file:
         reader = csv.reader(csv_file, delimiter=',', quotechar='"')
         next(reader)
         for row in reader:

From ead3924b2c24c0857aa22b3688f8fffef664acca Mon Sep 17 00:00:00 2001
From: oscar-davids <oscar_davids@outlook.com>
Date: Fri, 14 Aug 2020 09:14:48 +0800
Subject: [PATCH 05/12] add parallel verifier

---
 parallel/parallelcompare.py  | 400 +++++++++++++++++++++++++++++++++++
 parallel/parallelgraber.py   | 200 ++++++++++++++++++
 parallel/parallelverifier.py | 337 +++++++++++++++++++++++++++++
 parallel/test_parallel.py    |  30 +++
 verifier/file_locker.py      |   6 +-
 5 files changed, 970 insertions(+), 3 deletions(-)
 create mode 100644 parallel/parallelcompare.py
 create mode 100644 parallel/parallelgraber.py
 create mode 100644 parallel/parallelverifier.py
 create mode 100644 parallel/test_parallel.py

diff --git a/parallel/parallelcompare.py b/parallel/parallelcompare.py
new file mode 100644
index 00000000..29d37380
--- /dev/null
+++ b/parallel/parallelcompare.py
@@ -0,0 +1,400 @@
+import os
+import shutil
+import timeit
+
+import cv2
+import numpy as np
+import pandas as pd
+import logging
+
+
+from parallel.parallelgraber import Sample
+
+from concurrent.futures.thread import ThreadPoolExecutor
+from concurrent.futures import ProcessPoolExecutor
+
+import multiprocessing
+from scipy.spatial import distance
+from scripts.asset_processor.video_metrics import VideoMetrics
+
+from scripts.asset_processor.video_asset_processor import VideoAssetProcessor
+from scripts.asset_processor.video_capture import VideoCapture
+
+class SampleCompare:
+    def __init__(self, metrics_list, features_list, do_profiling=False):
+        """
+        @param use_gpu:
+        @param original:
+        @param renditions:
+        @param metrics_list:
+        @param do_profiling:
+        @param max_samples: Max number of matched master-rendition frames to calculate metrics against. -1 = all
+        @param features_list:
+        @param debug_frames: dump frames selected for metric extraction on disk, decreases performance
+        @param channel: which HSV channel (0-3) to use for metric computation, -1 = all
+        @param image_pair_callback: function to call when image pair is created
+        """
+        # ************************************************************************
+        # Initialize global variables
+        # ************************************************************************
+        self.hash_size = 16
+        self.metrics_list = metrics_list
+        self.height = 0
+        self.features_list = features_list
+
+        self.metrics = {}
+
+        if do_profiling:
+            import line_profiler
+            self.cpu_profiler = line_profiler.LineProfiler()
+        else:
+            self.cpu_profiler = None
+        self.do_profiling = do_profiling
+
+        # Check if HD list is necessary
+        if 'temporal_ssim' in self.metrics_list or 'temporal_psnr' in self.metrics_list:
+            self.make_hd_list = True
+        else:
+            self.make_hd_list = False
+
+        self.video_metrics = VideoMetrics(self.metrics_list,
+                                          self.hash_size,
+                                          int(self.height),
+                                          self.cpu_profiler,
+                                          self.do_profiling)
+
+    def process(self, sampledata, renditions_list, source_video):
+
+        self.renditions_list = renditions_list
+        self.original_path = source_video
+        self.fps = sampledata[0].fps
+
+        original_metrics = {}
+        for i in range(len(sampledata[0].samples) - 1):
+            key = f'{i}'
+            result_metrics = {}
+            for metric in self.metrics_list:
+                result_metrics[metric] = 0.0
+            result_metrics['dimensions'] = '{}:{}'.format(int(sampledata[0].width), int(sampledata[0].height))
+            result_metrics['pixels'] = sampledata[0].pixels
+            result_metrics['ID'] = self.original_path
+            original_metrics[key] = result_metrics
+
+        self.metrics[self.original_path] = original_metrics
+
+        i = -1
+        for rendition in renditions_list:
+            path = rendition['path']
+            i += 1
+            try:
+                if os.path.exists(path):
+                    # Compute the metrics for the rendition
+                    self.metrics[path] = self.compute(sampledata[0], sampledata[i+1], path)
+                else:
+                    #logger.error(f'Unable to find rendition file: {path}')
+                    print(f'Unable to find rendition file: {path}')
+            except Exception as err:
+                #logger.exception('Unable to compute metrics for {}'.format(path))
+                print('Unable to compute metrics for {}'.format(path))
+            finally:
+                print('Finish to compute metrics for {}'.format(path))
+
+        return self.aggregate(self.metrics)
+
+    def aggregate(self, metrics):
+        """
+        Function to aggregate computed values of metrics and renditions into a
+        pandas DataFrame.
+        """
+
+        # Dictionary for containing all metrics
+        metrics_dict = {}
+        # Dictionary for containing all renditions
+        renditions_dict = {}
+
+        # Aggregate dictionary with all values for all renditions into a Pandas DataFrame
+        # All values are stored and obtained in a per-frame basis, then in a per-rendition
+        # fashion. They need to be rearranged.
+
+        # First, we combine the frames
+        dict_of_df = {k: pd.DataFrame(v) for k, v in metrics.items()}
+        metrics_df = pd.concat(dict_of_df, axis=1, sort=True).transpose().reset_index(inplace=False)
+
+        # Pandas concat function creates a level_0 and level_1 extra columns.
+        # They need to be renamed
+        metrics_df = metrics_df.rename(index=str,
+                                       columns={"level_1": "frame_num", "level_0": "path"})
+
+        # Then we can combine each rendition
+        for rendition in self.renditions_list:
+            # For the current rendition, we need an empty dictionary
+            rendition_dict = {}
+
+            # We have a number of different metrics that have been computed.
+            # These are an input for the constructor of the class an vary according to
+            # what metrics are of interest in the research
+            for metric in self.metrics_list:
+                # Obtain a Pandas DataFrame from the original and build the original time series
+                original_df = metrics_df[metrics_df['path'] == self.original_path][metric]
+                original_df = original_df.reset_index(drop=True).transpose().dropna().astype(float)
+                # Obtain a Pandas DataFrame from the current rendition and build its time series
+                rendition_df = metrics_df[metrics_df['path'] == rendition['path']][metric]
+                rendition_df = rendition_df.reset_index(drop=True)
+                rendition_df = rendition_df.transpose().dropna().astype(float)
+
+                # For those metrics that have a temporal character,
+                # we need to make a further aggregation
+                # We are basically using the Manhattan and euclidean distances,
+                # and statistically meaningful
+                # values such as mean, max and standard deviation.
+                # The whole time series is also provided for later exploration
+                #  in the analysis part.
+                if 'temporal' in metric:
+                    x_original = np.array(original_df[rendition_df.index].values)
+                    x_rendition = np.array(rendition_df.values)
+
+                    [[manhattan]] = distance.cdist(x_original.reshape(1, -1),
+                                                   x_rendition.reshape(1, -1),
+                                                   metric='cityblock')
+
+                    rendition_dict['{}-euclidean'.format(metric)] = distance.euclidean(x_original,
+                                                                                       x_rendition)
+                    rendition_dict['{}-manhattan'.format(metric)] = manhattan
+                    rendition_dict['{}-mean'.format(metric)] = np.mean(x_rendition)
+                    rendition_dict['{}-max'.format(metric)] = np.max(x_rendition)
+                    rendition_dict['{}-std'.format(metric)] = np.std(x_rendition)
+                    rendition_dict['{}-corr'.format(metric)] = np.correlate(x_original,
+                                                                            x_rendition,
+                                                                            mode='same').mean()
+                    rendition_dict['{}-series'.format(metric)] = x_rendition
+
+                # Other metrics do not need time evaluation
+                else:
+                    rendition_dict[metric] = rendition_df.mean()
+
+            # Size is an important feature of an asset, as it gives important information
+            # regarding the potential compression effect
+            rendition_dict['size'] = os.path.getsize(rendition['path'])
+            rendition_dict['fps'] = self.fps
+            rendition_dict['path'] = rendition['path']
+
+            # Extract the dimensions of the rendition
+            dimensions_df = metrics_df[metrics_df['path'] == rendition['path']]['dimensions']
+            rendition_dict['dimension_x'] = int(dimensions_df.unique()[0].split(':')[1])
+            rendition_dict['dimension_y'] = int(dimensions_df.unique()[0].split(':')[0])
+
+            # Extract the pixels for this rendition
+            pixels_df = metrics_df[metrics_df['path'] == rendition['path']]['pixels']
+            rendition_dict['pixels'] = int(pixels_df.unique())
+
+            # Store the rendition values in the dictionary of renditions for the present asset
+            renditions_dict[rendition['path']] = rendition_dict
+
+        # Add the current asset values to the global metrics_dict
+        metrics_dict[self.original_path] = renditions_dict
+
+        dict_of_df = {k: pd.DataFrame(v) for k, v in metrics_dict.items()}
+        metrics_df = pd.concat(dict_of_df, axis=1).transpose().reset_index(inplace=False)
+
+        pixels_df = metrics_df['pixels']
+
+        # Compute a size/dimension ratio column for better accuracy
+        metrics_df['size_dimension_ratio'] = metrics_df['size'] / (metrics_df['dimension_y'] * metrics_df['dimension_x'])
+
+        metrics_df = self.cleanup_dataframe(metrics_df, self.features_list)
+
+        return metrics_df, pixels_df, dimensions_df
+
+    def cleanup_dataframe(self, metrics_df, features):
+        """
+        Cleanup the resulting pandas dataframe and convert it to a numpy array
+        to pass to the prediction model
+        """
+
+        metrics_df = metrics_df.rename(columns={'level_0': 'title', 'level_1': 'attack'})
+
+        if features is not None:
+            if 'attack_ID' in features:
+                features.remove('attack_ID')
+            # Filter out features from metrics dataframe
+
+            # Scale measured metrics according to their resolution for better accuracy
+            metrics_df = self.rescale_to_resolution(metrics_df, features)
+            metrics_df = metrics_df[features]
+
+        return metrics_df
+
+    @staticmethod
+    def rescale_to_resolution(data, features):
+        """
+        Function to rescale features to improve accuracy
+        """
+
+        df_features = pd.DataFrame(data)
+        downscale_features = ['temporal_psnr',
+                              'temporal_ssim',
+                              'temporal_cross_correlation'
+                              ]
+
+        upscale_features = ['temporal_difference',
+                            'temporal_dct',
+                            'temporal_canny',
+                            'temporal_gaussian_mse',
+                            'temporal_gaussian_difference',
+                            'temporal_histogram_distance',
+                            'temporal_entropy',
+                            'temporal_lbp',
+                            'temporal_texture',
+                            'temporal_match',
+                            ]
+
+        for label in downscale_features:
+            downscale_feature = [feature for feature in features if label in feature]
+            if downscale_feature:
+                for feature in downscale_feature:
+                    print('Downscaling', label, feature)
+                    df_features[feature] = df_features[feature] / (df_features['dimension_y'] * df_features['dimension_x'])
+
+        for label in upscale_features:
+            upscale_feature = [feature for feature in features if label in feature]
+            if upscale_feature:
+                for feature in upscale_feature:
+                    print('Upscaling', label, feature)
+                    df_features[feature] = df_features[feature] * df_features['dimension_y'] * df_features['dimension_x']
+
+        return df_features
+
+    def compare_renditions_instant(self, idx, mastersample, renditionsample, path):
+        """
+        Function to compare pairs of numpy arrays extracting their corresponding metrics.
+        It basically takes the global original frame at frame_pos and its subsequent to
+        compare them against the corresponding ones in frame_list (a rendition).
+        It then extracts the metrics defined in the constructor under the metrics_list.
+        Methods of comparison are implemented in the video_metrics class
+        @param master_sample_idx_map: Mapping from rendition sample index to master sample index. If Nframes is different between master and rendition, the index mapping is not 1:1
+        @param rendition_sample_idx: Index of master sample we compare rendition against
+        @param frame_list:
+        @param frame_list_hd:
+        @param dimensions:
+        @param pixels:
+        @param path:
+        @return:
+        """
+
+        # Dictionary of metrics
+        frame_metrics = {}
+        # Original frame to compare against (downscaled for performance)
+        reference_frame = mastersample.samples[idx]
+        # Original's subsequent frame (downscaled for performance)
+        next_reference_frame = mastersample.samples[idx+1]
+        # Rendition frame (downscaled for performance)
+        rendition_frame = renditionsample.samples[idx]
+        # Rendition's subsequent frame (downscaled for performance)
+        next_rendition_frame = renditionsample.samples[idx + 1]
+
+        '''
+        if self.debug_frames:
+            cv2.imwrite(f'{self.frame_dir_name}/CRI_{idx:04}_ref.png', self._convert_debug_frame(reference_frame))
+            cv2.imwrite(f'{self.frame_dir_name}/CRI_{idx:04}_next_ref.png', self._convert_debug_frame(next_reference_frame))
+            cv2.imwrite(f'{self.frame_dir_name}/CRI_{idx:04}_rend.png', self._convert_debug_frame(rendition_frame))
+            cv2.imwrite(f'{self.frame_dir_name}/CRI_{idx:04}_next_rend.png', self._convert_debug_frame(next_rendition_frame))
+        '''
+
+        if self.make_hd_list:
+            # Original frame to compare against (HD for QoE metrics)
+            reference_frame_hd = mastersample.samples_hd[idx]
+            # Rendition frame (HD for QoE metrics)
+            rendition_frame_hd = renditionsample.samples_hd[idx]
+
+            # Compute the metrics defined in the global metrics_list.
+            # Uses the global instance of video_metrics
+            # Some metrics use a frame-to-frame comparison,
+            # but other require current and forward frames to extract
+            # their comparative values.
+            rendition_metrics = self.video_metrics.compute_metrics(rendition_frame,
+                                                                   next_rendition_frame,
+                                                                   reference_frame,
+                                                                   next_reference_frame,
+                                                                   rendition_frame_hd,
+                                                                   reference_frame_hd)
+        else:
+            rendition_metrics = self.video_metrics.compute_metrics(rendition_frame,
+                                                                   next_rendition_frame,
+                                                                   reference_frame,
+                                                                   next_reference_frame)
+
+        print(rendition_metrics)
+        # Retrieve rendition dimensions for further evaluation
+        dimensions = '{}:{}'.format(int(renditionsample.width), int(renditionsample.height))
+        rendition_metrics['dimensions'] = dimensions
+
+        # Retrieve rendition number of pixels for further verification
+        rendition_metrics['pixels'] = renditionsample.pixels
+
+        # Retrieve rendition path for further identification
+        #rendition_metrics['ID'] = self.original_path
+        rendition_metrics['ID'] = "original_path"
+
+        # Identify rendition uniquely by its path and store metric data in frame_metrics dict
+        frame_metrics[path] = rendition_metrics
+
+        # Return the metrics, together with the position of the frame
+        # frame_pos is needed for the ThreadPoolExecutor optimizations
+        return rendition_metrics, idx
+
+    def compute(self, mastersample, renditionsample, path):
+        """
+        Function to compare lists of numpy arrays extracting their corresponding metrics.
+        It basically takes the global original list of frames and the input frame_list
+        of numpy arrrays to extract the metrics defined in the constructor.
+        frame_pos establishes the index of the frames to be compared.
+        It is optimized by means of the ThreadPoolExecutor of Python's concurrent package
+        for better parallel performance.
+        @param master_sample_idx_map: Mapping from rendition sample index to master sample index. If Nframes is different between master and rendition, the index mapping is not 1:1
+        @param frame_list:
+        @param frame_list_hd:
+        @param path:
+        @param dimensions:
+        @param pixels:
+        @return:
+        """
+        start = timeit.default_timer()
+
+        # Dictionary of metrics
+        rendition_metrics = {}
+        future_list = []
+
+        # Execute computations in parallel using as many processors as possible
+        # future_list is a dictionary storing all computed values from each thread
+        with ThreadPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor:
+            # Compare the original asset against its renditions
+            for i in range(len(mastersample.samples) - 1):
+                key = f'{i}'
+                future = executor.submit(self.compare_renditions_instant,
+                                         i,
+                                         mastersample,
+                                         renditionsample,
+                                         path)
+                
+                future_list.append((key, future))
+
+        # Once all frames in frame_list have been iterated, we can retrieve their values
+        for key, future in future_list:
+            # Values are retrieved in a dict, as a result of the executor's process
+            result_rendition_metrics, frame_pos = future.result()
+            # The computed values at a given frame
+            rendition_metrics[key] = result_rendition_metrics
+
+        time_spent = timeit.default_timer() - start
+        #logger.info(f'Metrics compute took: {time_spent}')
+        #print(f'Metrics compute took: {time_spent}')
+        '''
+        for i in range(len(mastersample.samples) - 1):
+            key = f'{i}'
+            result_rendition_metrics, frame_pos = self.compare_renditions_instant( i, mastersample, renditionsample, path)
+            # The computed values at a given frame
+            rendition_metrics[key] = result_rendition_metrics
+        '''
+
+        # Return the metrics for the currently processed rendition
+        return rendition_metrics
\ No newline at end of file
diff --git a/parallel/parallelgraber.py b/parallel/parallelgraber.py
new file mode 100644
index 00000000..fc56b166
--- /dev/null
+++ b/parallel/parallelgraber.py
@@ -0,0 +1,200 @@
+
+
+import os
+import shutil
+import timeit
+import numpy as np
+import cv2
+from concurrent.futures.thread import ThreadPoolExecutor
+from concurrent.futures import ProcessPoolExecutor
+
+from collections import deque
+import multiprocessing
+
+from scripts.asset_processor.video_asset_processor import VideoAssetProcessor
+from scripts.asset_processor.video_capture import VideoCapture
+
+class Sample:
+    def __init__(self, idx_map, samples, samples_hd, pixels, height, width, fps):
+        self.indexes = []
+        self.timestamps = []
+        self.idx_map = idx_map
+        self.samples = samples
+        self.samples_hd = samples_hd
+        self.pixels = pixels
+        self.height = height
+        self.width = width
+        self.fps = fps
+
+class SampleGraber:
+    def __init__(self, id, videopath, use_gpu, do_profiling, debug):
+        self.videopath = videopath
+        self.use_gpu = use_gpu
+        self.do_profiling = do_profiling
+        self.debug = debug
+        self.debug_frames = False
+
+        self.capture = VideoCapture(videopath)
+        self.id = id
+        self.fps = self.capture.fps
+        self.width = self.capture.width
+        self.height = self.capture.height
+
+
+        if self.debug_frames:
+            self.frame_dir_name = type(self).__name__
+            shutil.rmtree(self.frame_dir_name, ignore_errors=True)
+            os.makedirs(self.frame_dir_name, exist_ok=True)
+
+    @staticmethod
+    def _convert_debug_frame(frame):
+        return cv2.resize(frame, (1920, 1080), cv2.INTER_CUBIC)
+
+    def grabsamples(self, sampleIdx, timestamps):
+
+        self.indexes = sampleIdx
+        self.timestamps = timestamps
+
+        # Create list of random timestamps in video file to calculate metrics at
+        # difference between master timestamp and best matching frame timestamp of current video
+        timestamp_diffs = [np.inf] * len(self.indexes)
+        # currently selected frames
+        candidate_frames = [None] * len(self.indexes)
+        # maps selected rendition sample to master sample
+        debug_index_mapping = {}
+        idx_map = []
+        frame_list = []
+        frame_list_hd = []
+        frames_read = 0
+        pixels = 0
+        height = 0
+        width = 0
+        timestamps_selected = []
+        fps = self.capture.fps
+        # Iterate through each frame in the video
+        while True:
+            # Read the frame from the capture
+            frame_data = self.capture.read(grab=True)
+            if frame_data is not None:
+                frames_read += 1
+
+                # update candidate frames
+                ts_diffs = [abs(frame_data.timestamp - mts) for mts in self.timestamps]
+                best_match_idx = int(np.argmin(ts_diffs))
+                best_match = np.min(ts_diffs)
+                # max theoretical timestamp difference between 'matching' frames would be 1/(2*fps) + max(jitter)
+                # don't consider frames that are too far, otherwise the algorithm will be linear on memory vs video length
+                if best_match < 1 / (2 * self.capture.fps) and timestamp_diffs[best_match_idx] > best_match:
+                    timestamp_diffs[best_match_idx] = best_match
+                    frame_data = self.capture.retrieve()
+                    candidate_frames[best_match_idx] = frame_data
+            # Break the loop when frames cannot be taken from original
+            else:
+                break
+
+        # process picked frames
+        for i in range(len(candidate_frames)):
+            frame_data = candidate_frames[i]
+            ts_diff = timestamp_diffs[i]
+            if frame_data is None or ts_diff > 1 / (2 * self.fps):
+                # no good matching candidate frame
+                continue
+
+            if self.debug_frames:
+                cv2.imwrite(
+                    f'{self.frame_dir_name}/{i:04}_{"s" if self.self.id  else ""}_{frame_data.index}_{frame_data.timestamp:.4}.png',
+                    self._convert_debug_frame(frame_data.frame))
+
+
+            timestamps_selected.append(frame_data.timestamp)
+            idx_map.append(i)
+            debug_index_mapping[self.indexes[i]] = frame_data.index
+            # Count the number of pixels
+            height = frame_data.frame.shape[1]
+            width = frame_data.frame.shape[0]
+            pixels += height * width
+
+            #if not self.markup_master_frames and self.image_pair_callback is not None:
+            #    self.image_pair_callback(self.master_samples_hd[i], frame_data.frame, len(frame_list), ts_diff,
+            #                             self.original_path, capture.filename)
+
+            frame_list_hd.append(frame_data.frame)
+            # Change color space to have only luminance
+            frame = cv2.resize(frame_data.frame, (480, 270), interpolation=cv2.INTER_LINEAR)
+            frame_list.append(frame)
+
+        # Clean up memory
+        self.capture.release()
+
+        #logger.info(f'Mean master-rendition timestamp diff, sec: {np.mean(
+        #    list(filter(lambda x: not np.isinf(x), timestamp_diffs)))} SD: {np.std(
+        #    list(filter(lambda x: not np.isinf(x), timestamp_diffs)))}')
+        #logger.info(f'Master frame index mapping for {capture.filename}: \n {debug_index_mapping}')
+
+        #return idx_map, np.array(frame_list), np.array(frame_list_hd), pixels, height, width
+
+        sample = Sample(idx_map, np.array(frame_list), np.array(frame_list_hd), pixels, height, width, fps)
+
+        return sample
+
+class ParallelGraber:
+    def __init__(self, max_samples, use_gpu, do_profiling, debug):
+        """
+        Initialize verifier instance
+        @param max_samples: Max number of samples to take for a video
+        @param model: Either URI of the archive with model files, or local folder path
+        @param use_gpu: Use GPU for video decoding and computations
+        @param do_profiling: Output execution times to logs
+        @param debug: Enable debug image output, greatly reduces performance
+        """
+        self.max_samples = max_samples
+        self.use_gpu = use_gpu
+        self.do_profiling = do_profiling
+        self.debug = debug
+
+        self.total_frames = 0
+        self.sample_indexes = []
+        self.sample_timestamps = []
+        self.grabers = []
+        self.samples = []
+
+    def addgraber(self, videopath):
+        id = len(self.grabers)
+        graber = SampleGraber(id, videopath, self.use_gpu, self.do_profiling, self.debug)
+        self.grabers.append(graber)
+
+        if len(self.grabers) == 1: #master
+            self.total_frames = graber.capture.frame_count
+            self.sample_indexes = np.sort(np.random.choice(self.total_frames, self.max_samples, False))
+            # setting time stamp
+            self.sample_timestamps = self.sample_indexes * 1.0 / graber.capture.fps
+
+
+    def capturesingle(self, id):
+        return self.grabers[id].grabsamples(self.sample_indexes, self.sample_timestamps)
+
+    def captureall(self):
+
+        future_result = []
+        future_list = []
+        with ThreadPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor:
+            # Compare the original asset against its renditions
+            for i in range(len(self.grabers)):
+                key = f'{i}'
+                future = executor.submit(self.capturesingle, i)
+                future_list.append((key, future))
+
+        # Once all frames in frame_list have been iterated, we can retrieve their values
+        for key, future in future_list:
+            # Values are retrieved in a dict, as a result of the executor's process
+            frame_pos = future.result()
+            future_result.append(frame_pos)
+
+        '''
+        for i in range(len(self.grabers)):
+            frame_pos = self.grabers[i].grabsamples(self.sample_indexes,self.sample_timestamps)
+            future_result.append(frame_pos)
+        '''
+
+        # Return the metrics for the currently processed rendition
+        return future_result
diff --git a/parallel/parallelverifier.py b/parallel/parallelverifier.py
new file mode 100644
index 00000000..24f0c6f5
--- /dev/null
+++ b/parallel/parallelverifier.py
@@ -0,0 +1,337 @@
+
+import uuid
+import timeit
+import json
+import tarfile
+import os
+import sys
+import urllib
+import subprocess
+import logging
+from joblib import load
+import numpy as np
+import pandas as pd
+import cv2
+from scipy.io import wavfile
+from catboost import CatBoostClassifier
+from catboost import CatBoostRegressor
+from verifier import file_locker
+
+from scripts.asset_processor.video_asset_processor import VideoAssetProcessor
+from parallel.parallelgraber import ParallelGraber
+from parallel.parallelcompare import SampleCompare
+
+
+logger = logging.getLogger()
+
+
+class ParallelVerifier:
+    def __init__(self, max_samples, model, use_gpu, do_profiling, debug):
+        """
+        Initialize verifier instance
+        @param max_samples: Max number of samples to take for a video
+        @param model: Either URI of the archive with model files, or local folder path
+        @param use_gpu: Use GPU for video decoding and computations
+        @param do_profiling: Output execution times to logs
+        @param debug: Enable debug image output, greatly reduces performance
+        """
+        self.use_gpu = use_gpu
+        self.debug = debug
+        self.model_dir = '/tmp/model'
+        if os.path.isdir(model):
+            self.model_dir = model
+        else:
+            self.retrieve_models(model, self.model_dir)
+        self.max_samples = max_samples
+        self.do_profiling = do_profiling
+        self.tmp_files = []
+        self.load_models()
+
+    @staticmethod
+    def read_video_metadata(filename):
+        try:
+            res = {}
+            cap = cv2.VideoCapture(filename)
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            height = float(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            width = float(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            res['frame_rate'] = fps
+            res['resolution'] = {'width': width, 'height': height}
+            res['pixels'] = width * height * frame_count
+            res['duration'] = frame_count / fps
+            res['bitrate'] = os.path.getsize(filename) / res['duration']
+        finally:
+            cap.release()
+        return res
+
+    def pre_verify(self, source, rendition):
+        """
+        Function to verify that rendition conditions and specifications
+        are met as prescribed by the Broadcaster
+        """
+        # Extract data from video capture
+        video_file, audio_file = self.get_video_audio(rendition['uri'])
+        rendition['video_available'] = video_file is not None
+        rendition['audio_available'] = audio_file is not None
+
+        if video_file:
+            # Check that the audio exists
+            if audio_file:
+                _, source_file_series = wavfile.read(source['audio_path'])
+                _, rendition_file_series = wavfile.read(audio_file)
+
+                try:
+                    # Compute the Euclidean distance between source's and rendition's signals
+                    rendition['audio_dist'] = np.linalg.norm(source_file_series - rendition_file_series)
+                except:
+                    # Set to negative to indicate an error during audio comparison
+                    # (matching floating-point datatype of Euclidean distance)
+                    rendition['audio_dist'] = -1.0
+                finally:
+                    # Cleanup the audio file generated to avoid cluttering
+                    os.remove(audio_file)
+
+            metadata = self.read_video_metadata(video_file)
+
+            rendition['path'] = video_file
+
+            # Create dictionary with passed / failed verification parameters
+            if rendition.get('resolution'):
+                rendition['resolution']['height_pre_verification'] = metadata['height'] / float(rendition['resolution']['height'])
+                rendition['resolution']['width_pre_verification'] = metadata['width'] / float(rendition['resolution']['width'])
+
+            if 'frame_rate' in rendition:
+                rend_exp_fps = float(rendition['frame_rate']) or source['frame_rate']
+                rendition['frame_rate'] = bool(np.isclose(rend_exp_fps, metadata['frame_rate'], atol=0.1))
+
+            if rendition.get('bitrate'):
+                rendition['bitrate'] = metadata['bitrate'] == rendition['bitrate']
+
+            if rendition.get('pixels'):
+                rendition['pixels_pre_verification'] = float(rendition['pixels']) / metadata['pixels']
+
+
+        return rendition
+
+    def meta_model(self, row):
+        """
+        The goal is to reduce the number of False Positives (tamper) to prevent wrongfully penalizing transcoder nodes. OCSVM model is expected to have higher precision (low FP) on novel data.
+        If OCSVM predicts the observation is an inlier (not tampered), we'll go with it, otherwise we'll use supervised model output.
+        """
+        if row['ul_pred_tamper'] == 0:
+            return 0
+        return row['sl_pred_tamper']
+
+    def verify(self, source_uri, renditions):
+        """
+        Function that returns the predicted compliance of a list of renditions
+        with respect to a given source file using a specified model.
+        """
+        total_start = timeit.default_timer()
+        source_video, source_audio = self.get_video_audio(source_uri)
+        if not source_video and not source_audio:
+            raise ValueError('Couldn\'t retrieve source files')
+        try:
+            if source_video:
+                # Prepare source and renditions for verification
+                source = {'path': source_video,
+                          'audio_path': source_audio,
+                          'video_available': True,
+                          'audio_available': source_audio is not None,
+                          'uri': source_uri}
+                # read source metadata
+                metadata = self.read_video_metadata(source_video)
+                source.update(metadata)
+
+                # Create a list of preverified renditions
+                pre_verified_renditions = []
+                for rendition in renditions:
+                    pre_verification = self.pre_verify(source, rendition)
+                    if rendition['video_available']:
+                        pre_verified_renditions.append(pre_verification)
+
+                # Remove non numeric features from feature list
+                non_temporal_features = ['attack_ID', 'title', 'attack', 'dimension', 'size', 'size_dimension_ratio']
+                metrics_list = []
+                features = list(np.unique(self.features_ul + self.features_sl))
+
+                for metric in features:
+                    if metric not in non_temporal_features:
+                        metrics_list.append(metric.split('-')[0])
+
+                #----- Frame grabber -----
+                # Frame grabber times
+                start = timeit.default_timer()
+
+                # Instantiate Frame Grabber class
+                samplegraber = ParallelGraber(self.max_samples, self.use_gpu, self.do_profiling, self.debug)
+
+                samplegraber.addgraber(source_video)
+                for rendition in renditions:
+                    samplegraber.addgraber(rendition['path'])
+                sampledata = samplegraber.captureall()
+
+                initialize_time = timeit.default_timer() - start
+
+                # ----- Feature calculation -----
+                # Register times for asset processing
+                start = timeit.default_timer()
+
+                featureCompare = SampleCompare(metrics_list, features, self.do_profiling)
+
+                # Assemble output dataframe with processed metrics
+                metrics_df, pixels_df, dimensions_df = featureCompare.process(sampledata, renditions, source_video)
+
+                # Record time for processing of assets metrics
+                process_time = timeit.default_timer() - start
+
+                # ----- Inference part -----
+                x_renditions_sl = np.asarray(metrics_df[self.features_sl])
+                x_renditions_ul = np.asarray(metrics_df[self.features_ul])
+                x_renditions_ul = self.loaded_scaler.transform(x_renditions_ul)
+
+                np.set_printoptions(precision=6, suppress=True)
+                logger.debug(f'INPUT SL ARRAY: {x_renditions_sl}')
+                logger.debug(f'Unscaled INPUT UL ARRAY: {np.asarray(metrics_df[self.features_ul])}')
+                logger.debug(f'SCALED INPUT UL ARRAY: {x_renditions_ul}')
+                # Make predictions for given data
+                start = timeit.default_timer()
+                predictions_df = pd.DataFrame()
+                predictions_df['sl_pred_tamper'] = self.loaded_model_sl.predict(x_renditions_sl)
+                predictions_df['ocsvm_dist'] = self.loaded_model_ul.decision_function(x_renditions_ul)
+                predictions_df['ul_pred_tamper'] = (-self.loaded_model_ul.predict(x_renditions_ul)+1)/2
+                predictions_df['meta_pred_tamper'] = predictions_df.apply(self.meta_model, axis=1)
+                prediction_time = timeit.default_timer() - start
+
+                # Add predictions to rendition dictionary
+                i = 0
+                for _, rendition in enumerate(renditions):
+                    if rendition['video_available']:
+                        rendition.pop('path', None)
+                        rendition['ocsvm_dist'] = float(predictions_df['ocsvm_dist'].iloc[i])
+                        rendition['tamper_ul'] = int(predictions_df['ul_pred_tamper'].iloc[i])
+                        rendition['tamper_sl'] = int(predictions_df['sl_pred_tamper'].iloc[i])
+                        rendition['tamper'] = int(predictions_df['meta_pred_tamper'].iloc[i])
+                        # Append the post-verification of resolution and pixel count
+                        if 'pixels' in rendition:
+                            rendition['pixels_post_verification'] = float(rendition['pixels']) / pixels_df[i]
+                        if 'resolution' in rendition:
+                            rendition['resolution']['height_post_verification'] = float(rendition['resolution']['height']) / int(dimensions_df[i].split(':')[0])
+                            rendition['resolution']['width_post_verification'] = float(rendition['resolution']['width']) / int(dimensions_df[i].split(':')[1])
+                        i += 1
+
+                if self.do_profiling:
+                    logger.info(f'Features used: {features}')
+                    logger.info(f'Total time: {timeit.default_timer() - total_start}')
+                    logger.info(f'Initialization time: {initialize_time}')
+                    logger.info(f'Process time: {process_time}')
+                    logger.info(f'Prediction time: {prediction_time}')
+
+            return renditions
+        finally:
+            for f in self.tmp_files:
+                if os.path.exists(f):
+                    os.remove(f)
+            self.tmp_files.clear()
+
+    def retrieve_models(self, uri, model_dir):
+        """
+        Function to obtain pre-trained model for verification predictions
+        """
+        with file_locker.FileLocker('model_op.lock'):
+            model_file = uri.split('/')[-1]
+            model_file_sl = f'{model_file}_cb_sl'
+            # Create target Directory if don't exist
+            if not os.path.exists(model_dir):
+                try:
+                    os.mkdir(model_dir)
+                    logger.info(f'Directory created: {model_dir}')
+                    logger.info('Model download started')
+                    filename, _ = urllib.request.urlretrieve(uri,
+                                                             filename='{}/{}'.format(model_dir, model_file))
+                    logger.info(f'Model {filename} downloaded')
+                    with tarfile.open(filename) as tar_f:
+                        tar_f.extractall(model_dir)
+
+                    return model_dir, model_file, model_file_sl
+                except Exception as exc:
+                    if os.path.exists(model_dir):
+                        os.rmdir(model_dir)
+                    logger.exception('Unable to untar model')
+                    raise exc
+            else:
+                logger.debug(f'Directory {model_dir} already exists, skipping download')
+
+    def get_video_audio(self, uri):
+        """
+        Function to obtain a path to a video and audio files from url or local path
+        """
+        video_file = None
+        audio_file = None
+        if uri.lower().startswith('http'):
+            try:
+                file_name = '/tmp/{}'.format(uuid.uuid4())
+                logger.info(f'File download started: {file_name}')
+                video_file, _ = urllib.request.urlretrieve(uri, filename=file_name)
+                self.tmp_files.append(video_file)
+                logger.info(f'File {file_name} downloaded to {video_file}')
+            except Exception as e:
+                logger.exception('Unable to download HTTP video file')
+        else:
+            if os.path.isfile(uri):
+                video_file = uri
+                logger.info(f'Video file {video_file} available in file system')
+            else:
+                logger.info(f'Video file {video_file} NOT available in file system')
+
+        if video_file:
+            audio_file = '{}_audio.wav'.format(video_file)
+            logger.info('Extracting audio track')
+            ffmpeg = subprocess.Popen(' '.join(['ffmpeg',
+                             '-i',
+                             video_file,
+                             '-vn',
+                             '-acodec',
+                             'pcm_s16le',
+                             '-loglevel',
+                             'quiet',
+                             audio_file]), stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+            stdout, stderr = ffmpeg.communicate()
+            if ffmpeg.returncode:
+                logger.error(f'Could not extract audio from video file {stderr}')
+            if os.path.isfile(audio_file):
+                logger.info(f'Audio file {audio_file} available in file system')
+                self.tmp_files.append(audio_file)
+            else:
+                logger.info(f'Audio file {audio_file} NOT available in file system')
+                audio_file = None
+        return video_file, audio_file
+
+    def load_models(self):
+        """
+        Cache models to memory
+        @return:
+        """
+        # Configure UL model for inference
+        model_name_ul = 'OCSVM'
+        scaler_type = 'StandardScaler'
+        learning_type = 'UL'
+        self.loaded_model_ul = load(open('{}/{}.joblib'.format(self.model_dir,
+                                                               model_name_ul), 'rb'))
+
+        self.loaded_scaler = load(open('{}/{}_{}.joblib'.format(self.model_dir,
+                                                                learning_type,
+                                                                scaler_type), 'rb'))
+        # Configure SL model for inference
+        model_name_sl = 'CB_Binary'
+        self.loaded_model_sl = CatBoostClassifier().load_model('{}/{}.cbm'.format(self.model_dir,
+                                                                                  model_name_sl))
+
+        # Open model configuration files
+        with open('{}/param_{}.json'.format(self.model_dir, model_name_ul)) as json_file:
+            params = json.load(json_file)
+            self.features_ul = params['features']
+        with open('{}/param_{}.json'.format(self.model_dir, model_name_sl)) as json_file:
+            params = json.load(json_file)
+            self.features_sl = params['features']
\ No newline at end of file
diff --git a/parallel/test_parallel.py b/parallel/test_parallel.py
new file mode 100644
index 00000000..d905e334
--- /dev/null
+++ b/parallel/test_parallel.py
@@ -0,0 +1,30 @@
+import requests
+import json
+import random
+import numpy as np
+import pandas as pd
+import os
+import tqdm
+import glob
+from parallel.parallelverifier import ParallelVerifier
+import logging
+from scripts.asset_processor import VideoAssetProcessor, VideoCapture
+import timeit
+import pytest
+import cProfile
+
+pd.options.display.width = 0
+pd.set_option('display.max_columns', None)
+
+class TestParallelDecode:
+    
+    def test_parallel_decodetamper(self):
+        debug = False
+        src = 'testing/tests/data/master2_4s_1080.mp4'
+        rend_path = 'testing/tests/data/rend2_4s_1080_adv_attack.mp4'
+        verifier = ParallelVerifier(10, 'http://storage.googleapis.com/verification-models/verification-metamodel-2020-07-06.tar.xz', False, False, debug)
+        verification_result = verifier.verify(src, [{'uri': rend_path}])
+        print(verification_result)
+        assert verification_result[0]['tamper'] == 1
+
+ 
\ No newline at end of file
diff --git a/verifier/file_locker.py b/verifier/file_locker.py
index ab06214f..05f4bba3 100644
--- a/verifier/file_locker.py
+++ b/verifier/file_locker.py
@@ -1,7 +1,7 @@
 """
 Provides file-based interprocess lock
 """
-import fcntl
+#import fcntl
 
 
 class FileLocker:
@@ -10,8 +10,8 @@ def __init__(self, lock_file) -> None:
 
     def __enter__(self):
         self.fp = open(self.lock_file, "wb+")
-        fcntl.flock(self.fp.fileno(), fcntl.LOCK_EX)
+        #fcntl.flock(self.fp.fileno(), fcntl.LOCK_EX)
 
     def __exit__(self, _type, value, tb):
-        fcntl.flock(self.fp.fileno(), fcntl.LOCK_UN)
+        #fcntl.flock(self.fp.fileno(), fcntl.LOCK_UN)
         self.fp.close()

From cda6b95042c48788f39f21c776ab0a3d8f892efc Mon Sep 17 00:00:00 2001
From: Oscar <54097108+oscar-davids@users.noreply.github.com>
Date: Fri, 14 Aug 2020 09:21:51 +0800
Subject: [PATCH 06/12] Delete work-diary.me

---
 work-diary.me | 30 ------------------------------
 1 file changed, 30 deletions(-)
 delete mode 100644 work-diary.me

diff --git a/work-diary.me b/work-diary.me
deleted file mode 100644
index 0f66060c..00000000
--- a/work-diary.me
+++ /dev/null
@@ -1,30 +0,0 @@
-https://www.kaggle.com/c/youtube8m-2019/overview/getting-started
-https://github.com/google/youtube-8m#running-on-googles-cloud-machine-learning-platform
-https://research.google.com/youtube8m/download.html
-
-Supervised Model
-    Keras
-    Random Forest
-    AdaBoost
-    SVM
-    XGBoost
-Unsupervised Model
-    OC SVM
-    Isolation Forest
-    Autoencoder
-
-Attack methods
-    1.Scaling and bitrate reduction
-    2.Watermarks
-    3.Flips / rotations
-    4.Vertical flip
-    5.Horizontal flip
-    6.Rotate 90 degrees clockwise
-    7.Rotate 90 degrees counterclockwise
-    8.Black and white
-    9.Chroma subsampling
-    10.low bitrate
-    11.vignette
-
-  
-  

From 94de062031d94ea3f4e1e2c016766ec5523f9cae Mon Sep 17 00:00:00 2001
From: oscar-davids <oscar_davids@outlook.com>
Date: Fri, 14 Aug 2020 09:25:09 +0800
Subject: [PATCH 07/12] fix package

---
 parallel/parallelcompare.py  | 4 ----
 parallel/parallelgraber.py   | 3 ---
 parallel/parallelverifier.py | 1 -
 3 files changed, 8 deletions(-)

diff --git a/parallel/parallelcompare.py b/parallel/parallelcompare.py
index 29d37380..415a0cb9 100644
--- a/parallel/parallelcompare.py
+++ b/parallel/parallelcompare.py
@@ -11,15 +11,11 @@
 from parallel.parallelgraber import Sample
 
 from concurrent.futures.thread import ThreadPoolExecutor
-from concurrent.futures import ProcessPoolExecutor
 
 import multiprocessing
 from scipy.spatial import distance
 from scripts.asset_processor.video_metrics import VideoMetrics
 
-from scripts.asset_processor.video_asset_processor import VideoAssetProcessor
-from scripts.asset_processor.video_capture import VideoCapture
-
 class SampleCompare:
     def __init__(self, metrics_list, features_list, do_profiling=False):
         """
diff --git a/parallel/parallelgraber.py b/parallel/parallelgraber.py
index fc56b166..018a066e 100644
--- a/parallel/parallelgraber.py
+++ b/parallel/parallelgraber.py
@@ -1,17 +1,14 @@
 
-
 import os
 import shutil
 import timeit
 import numpy as np
 import cv2
 from concurrent.futures.thread import ThreadPoolExecutor
-from concurrent.futures import ProcessPoolExecutor
 
 from collections import deque
 import multiprocessing
 
-from scripts.asset_processor.video_asset_processor import VideoAssetProcessor
 from scripts.asset_processor.video_capture import VideoCapture
 
 class Sample:
diff --git a/parallel/parallelverifier.py b/parallel/parallelverifier.py
index 24f0c6f5..580496bc 100644
--- a/parallel/parallelverifier.py
+++ b/parallel/parallelverifier.py
@@ -17,7 +17,6 @@
 from catboost import CatBoostRegressor
 from verifier import file_locker
 
-from scripts.asset_processor.video_asset_processor import VideoAssetProcessor
 from parallel.parallelgraber import ParallelGraber
 from parallel.parallelcompare import SampleCompare
 

From 04431849acdf778c5db5bde31ee5800b8a68ca16 Mon Sep 17 00:00:00 2001
From: Oscar <54097108+oscar-davids@users.noreply.github.com>
Date: Fri, 14 Aug 2020 11:27:30 +0800
Subject: [PATCH 08/12] Update file_locker.py

---
 verifier/file_locker.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/verifier/file_locker.py b/verifier/file_locker.py
index 05f4bba3..ab06214f 100644
--- a/verifier/file_locker.py
+++ b/verifier/file_locker.py
@@ -1,7 +1,7 @@
 """
 Provides file-based interprocess lock
 """
-#import fcntl
+import fcntl
 
 
 class FileLocker:
@@ -10,8 +10,8 @@ def __init__(self, lock_file) -> None:
 
     def __enter__(self):
         self.fp = open(self.lock_file, "wb+")
-        #fcntl.flock(self.fp.fileno(), fcntl.LOCK_EX)
+        fcntl.flock(self.fp.fileno(), fcntl.LOCK_EX)
 
     def __exit__(self, _type, value, tb):
-        #fcntl.flock(self.fp.fileno(), fcntl.LOCK_UN)
+        fcntl.flock(self.fp.fileno(), fcntl.LOCK_UN)
         self.fp.close()

From 47f9146832e3ddd6eb5148ea25ebbd36220c4761 Mon Sep 17 00:00:00 2001
From: oscar-davids <oscar_davids@outlook.com>
Date: Fri, 14 Aug 2020 13:34:19 +0800
Subject: [PATCH 09/12] update upstream

---
 parallel/parallelverifier.py | 4 ++--
 verifier/verifier.py         | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/parallel/parallelverifier.py b/parallel/parallelverifier.py
index 580496bc..aa7b10d4 100644
--- a/parallel/parallelverifier.py
+++ b/parallel/parallelverifier.py
@@ -97,8 +97,8 @@ def pre_verify(self, source, rendition):
 
             # Create dictionary with passed / failed verification parameters
             if rendition.get('resolution'):
-                rendition['resolution']['height_pre_verification'] = metadata['height'] / float(rendition['resolution']['height'])
-                rendition['resolution']['width_pre_verification'] = metadata['width'] / float(rendition['resolution']['width'])
+				rendition['resolution']['height_pre_verification'] = metadata["resolution"]['height'] / float(rendition['resolution']['height'])
+                rendition['resolution']['width_pre_verification'] = metadata["resolution"]['width'] / float(rendition['resolution']['width'])
 
             if 'frame_rate' in rendition:
                 rend_exp_fps = float(rendition['frame_rate']) or source['frame_rate']
diff --git a/verifier/verifier.py b/verifier/verifier.py
index 5fe40c86..c37972bc 100644
--- a/verifier/verifier.py
+++ b/verifier/verifier.py
@@ -98,8 +98,8 @@ def pre_verify(self, source, rendition):
 
             # Create dictionary with passed / failed verification parameters
             if rendition.get('resolution'):
-                rendition['resolution']['height_pre_verification'] = metadata['height'] / float(rendition['resolution']['height'])
-                rendition['resolution']['width_pre_verification'] = metadata['width'] / float(rendition['resolution']['width'])
+                rendition['resolution']['height_pre_verification'] = metadata["resolution"]['height'] / float(rendition['resolution']['height'])
+                rendition['resolution']['width_pre_verification'] = metadata["resolution"]['width'] / float(rendition['resolution']['width'])
 
             if 'frame_rate' in rendition:
                 rend_exp_fps = float(rendition['frame_rate']) or source['frame_rate']

From ad4a43c9dec375afb8ac7c2a19c444eb8d0a21f7 Mon Sep 17 00:00:00 2001
From: oscar-davids <oscar_davids@outlook.com>
Date: Fri, 14 Aug 2020 13:46:29 +0800
Subject: [PATCH 10/12] fix tab string

---
 parallel/parallelverifier.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parallel/parallelverifier.py b/parallel/parallelverifier.py
index aa7b10d4..cd6d2c7f 100644
--- a/parallel/parallelverifier.py
+++ b/parallel/parallelverifier.py
@@ -97,7 +97,7 @@ def pre_verify(self, source, rendition):
 
             # Create dictionary with passed / failed verification parameters
             if rendition.get('resolution'):
-				rendition['resolution']['height_pre_verification'] = metadata["resolution"]['height'] / float(rendition['resolution']['height'])
+                rendition['resolution']['height_pre_verification'] = metadata["resolution"]['height'] / float(rendition['resolution']['height'])
                 rendition['resolution']['width_pre_verification'] = metadata["resolution"]['width'] / float(rendition['resolution']['width'])
 
             if 'frame_rate' in rendition:

From 1466eabcf12cea3e798c31defd102062d5d3652c Mon Sep 17 00:00:00 2001
From: Oscar <54097108+oscar-davids@users.noreply.github.com>
Date: Fri, 14 Aug 2020 16:43:55 +0800
Subject: [PATCH 11/12] Update test_parallel.py

---
 parallel/test_parallel.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/parallel/test_parallel.py b/parallel/test_parallel.py
index d905e334..e804944c 100644
--- a/parallel/test_parallel.py
+++ b/parallel/test_parallel.py
@@ -11,7 +11,6 @@
 from scripts.asset_processor import VideoAssetProcessor, VideoCapture
 import timeit
 import pytest
-import cProfile
 
 pd.options.display.width = 0
 pd.set_option('display.max_columns', None)
@@ -27,4 +26,4 @@ def test_parallel_decodetamper(self):
         print(verification_result)
         assert verification_result[0]['tamper'] == 1
 
- 
\ No newline at end of file
+ 

From d5ebe206980b542e5837016371b7204836b43076 Mon Sep 17 00:00:00 2001
From: Oscar <54097108+oscar-davids@users.noreply.github.com>
Date: Fri, 28 Aug 2020 11:10:45 +0800
Subject: [PATCH 12/12] Update verifier.py

---
 verifier/verifier.py | 49 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/verifier/verifier.py b/verifier/verifier.py
index c37972bc..5e75d67b 100644
--- a/verifier/verifier.py
+++ b/verifier/verifier.py
@@ -127,6 +127,55 @@ def verify(self, source_uri, renditions):
         Function that returns the predicted compliance of a list of renditions
         with respect to a given source file using a specified model.
         """
+        """
+            if usecengine:
+            renditionlist = ""
+            for rendition in renditions:
+                renditionlist = renditionlist + (rendition['uri'] + ',')
+
+            try:
+                #[metatamper, videoalive, audioalive, fps, width, height, audiodiff,
+                # sizeratio, dctdiff, gaussiamse, gaussiandiff, gaussianthreshold, histogramdiff]
+                diffmatrix = extractfts.calc_featurediff(source_uri, renditionlist, self.max_samples)
+
+                for i, rendition in enumerate(renditions):
+                    rendition['audio_dist'] = -1.0
+                    rendition['video_available'] = None
+                    rendition['audio_available'] = None
+                    if diffmatrix[i,1] == 1:
+                        rendition['video_available'] = True
+                    if diffmatrix[i,2] == 1:
+                        rendition['audio_available'] = True
+                        rendition['audio_dist'] = diffmatrix[i,6]
+
+                    rendition['fps'] = diffmatrix[i,3]
+                    rendition['pixels'] = diffmatrix[i,4] * diffmatrix[i,5]
+
+                x_renditions_sl = diffmatrix[:, 7:]
+                x_renditions_ul = diffmatrix[:, 7:]
+
+                x_renditions_ul = self.loaded_scaler.transform(x_renditions_ul)
+                np.set_printoptions(precision=6, suppress=True)
+                # Make predictions for given data
+                start = timeit.default_timer()
+                predictions_df = pd.DataFrame()
+                predictions_df['sl_pred_tamper'] = self.loaded_model_sl.predict(x_renditions_sl)
+                predictions_df['ocsvm_dist'] = self.loaded_model_ul.decision_function(x_renditions_ul)
+                predictions_df['ul_pred_tamper'] = (-self.loaded_model_ul.predict(x_renditions_ul) + 1) / 2
+                predictions_df['meta_pred_tamper'] = predictions_df.apply(self.meta_model, axis=1)
+                prediction_time = timeit.default_timer() - start
+                i = 0
+                for _, rendition in enumerate(renditions):
+                    if rendition['video_available']:
+                        rendition['ocsvm_dist'] = float(predictions_df['ocsvm_dist'].iloc[i])
+                        rendition['tamper_ul'] = int(predictions_df['ul_pred_tamper'].iloc[i])
+                        rendition['tamper_sl'] = int(predictions_df['sl_pred_tamper'].iloc[i])
+                        rendition['tamper'] = int(predictions_df['meta_pred_tamper'].iloc[i])                      
+                        i += 1
+                return renditions
+            except Exception as e:
+                print(e)
+        """
         total_start = timeit.default_timer()
         source_video, source_audio = self.get_video_audio(source_uri)
         if not source_video and not source_audio: