From 07cf6c2ba22634377affa1471e41c3a9b570b472 Mon Sep 17 00:00:00 2001 From: Oscar <54097108+oscar-davids@users.noreply.github.com> Date: Tue, 26 May 2020 20:48:16 +0800 Subject: [PATCH 01/12] Create work-diary.me --- work-diary.me | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 work-diary.me diff --git a/work-diary.me b/work-diary.me new file mode 100644 index 00000000..f151f404 --- /dev/null +++ b/work-diary.me @@ -0,0 +1,3 @@ +https://www.kaggle.com/c/youtube8m-2019/overview/getting-started +https://github.com/google/youtube-8m#running-on-googles-cloud-machine-learning-platform +https://research.google.com/youtube8m/download.html From fb17ba9aa713f25cae76acd5f0b17b7eda0280e8 Mon Sep 17 00:00:00 2001 From: Oscar <54097108+oscar-davids@users.noreply.github.com> Date: Wed, 27 May 2020 10:20:31 +0800 Subject: [PATCH 02/12] Update work-diary.me --- work-diary.me | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/work-diary.me b/work-diary.me index f151f404..b6300aae 100644 --- a/work-diary.me +++ b/work-diary.me @@ -1,3 +1,16 @@ https://www.kaggle.com/c/youtube8m-2019/overview/getting-started https://github.com/google/youtube-8m#running-on-googles-cloud-machine-learning-platform https://research.google.com/youtube8m/download.html + +Supervised Model + Keras + Random Forest + AdaBoost + SVM + XGBoost +Unsupervised Model + OC SVM + Isolation Forest + Autoencoder + + From 5b893c6dd0742f5804c88ae9bac5530f52158b6c Mon Sep 17 00:00:00 2001 From: oscar-davids Date: Wed, 27 May 2020 16:01:05 +0800 Subject: [PATCH 03/12] some note --- work-diary.me | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/work-diary.me b/work-diary.me index b6300aae..0f66060c 100644 --- a/work-diary.me +++ b/work-diary.me @@ -3,14 +3,28 @@ https://github.com/google/youtube-8m#running-on-googles-cloud-machine-learning-p https://research.google.com/youtube8m/download.html Supervised Model - Keras - Random Forest - AdaBoost - SVM - XGBoost + Keras + Random Forest + AdaBoost + SVM + XGBoost Unsupervised Model - OC SVM - Isolation Forest - Autoencoder + OC SVM + Isolation Forest + Autoencoder + +Attack methods + 1.Scaling and bitrate reduction + 2.Watermarks + 3.Flips / rotations + 4.Vertical flip + 5.Horizontal flip + 6.Rotate 90 degrees clockwise + 7.Rotate 90 degrees counterclockwise + 8.Black and white + 9.Chroma subsampling + 10.low bitrate + 11.vignette + From 01ec5526dfcd8c789f1b91adb994741e56eca322 Mon Sep 17 00:00:00 2001 From: oscar-davids Date: Wed, 27 May 2020 21:26:32 +0800 Subject: [PATCH 04/12] added utf-8 encoding --- YT8M_downloader/downloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/YT8M_downloader/downloader.py b/YT8M_downloader/downloader.py index 7b86db38..8fc95bba 100644 --- a/YT8M_downloader/downloader.py +++ b/YT8M_downloader/downloader.py @@ -81,7 +81,7 @@ def get_renditions(renditions: str) -> Dict: def read_data(): read_rows = [] read_ids = [] - with open(CURRENT_PATH + '/' + 'yt8m_data.csv', newline='') as csv_file: + with open(CURRENT_PATH + '/' + 'yt8m_data.csv', encoding='utf-8', newline='') as csv_file: reader = csv.reader(csv_file, delimiter=',', quotechar='"') next(reader) for row in reader: From ead3924b2c24c0857aa22b3688f8fffef664acca Mon Sep 17 00:00:00 2001 From: oscar-davids Date: Fri, 14 Aug 2020 09:14:48 +0800 Subject: [PATCH 05/12] add parallel verifier --- parallel/parallelcompare.py | 400 +++++++++++++++++++++++++++++++++++ parallel/parallelgraber.py | 200 ++++++++++++++++++ parallel/parallelverifier.py | 337 +++++++++++++++++++++++++++++ parallel/test_parallel.py | 30 +++ verifier/file_locker.py | 6 +- 5 files changed, 970 insertions(+), 3 deletions(-) create mode 100644 parallel/parallelcompare.py create mode 100644 parallel/parallelgraber.py create mode 100644 parallel/parallelverifier.py create mode 100644 parallel/test_parallel.py diff --git a/parallel/parallelcompare.py b/parallel/parallelcompare.py new file mode 100644 index 00000000..29d37380 --- /dev/null +++ b/parallel/parallelcompare.py @@ -0,0 +1,400 @@ +import os +import shutil +import timeit + +import cv2 +import numpy as np +import pandas as pd +import logging + + +from parallel.parallelgraber import Sample + +from concurrent.futures.thread import ThreadPoolExecutor +from concurrent.futures import ProcessPoolExecutor + +import multiprocessing +from scipy.spatial import distance +from scripts.asset_processor.video_metrics import VideoMetrics + +from scripts.asset_processor.video_asset_processor import VideoAssetProcessor +from scripts.asset_processor.video_capture import VideoCapture + +class SampleCompare: + def __init__(self, metrics_list, features_list, do_profiling=False): + """ + @param use_gpu: + @param original: + @param renditions: + @param metrics_list: + @param do_profiling: + @param max_samples: Max number of matched master-rendition frames to calculate metrics against. -1 = all + @param features_list: + @param debug_frames: dump frames selected for metric extraction on disk, decreases performance + @param channel: which HSV channel (0-3) to use for metric computation, -1 = all + @param image_pair_callback: function to call when image pair is created + """ + # ************************************************************************ + # Initialize global variables + # ************************************************************************ + self.hash_size = 16 + self.metrics_list = metrics_list + self.height = 0 + self.features_list = features_list + + self.metrics = {} + + if do_profiling: + import line_profiler + self.cpu_profiler = line_profiler.LineProfiler() + else: + self.cpu_profiler = None + self.do_profiling = do_profiling + + # Check if HD list is necessary + if 'temporal_ssim' in self.metrics_list or 'temporal_psnr' in self.metrics_list: + self.make_hd_list = True + else: + self.make_hd_list = False + + self.video_metrics = VideoMetrics(self.metrics_list, + self.hash_size, + int(self.height), + self.cpu_profiler, + self.do_profiling) + + def process(self, sampledata, renditions_list, source_video): + + self.renditions_list = renditions_list + self.original_path = source_video + self.fps = sampledata[0].fps + + original_metrics = {} + for i in range(len(sampledata[0].samples) - 1): + key = f'{i}' + result_metrics = {} + for metric in self.metrics_list: + result_metrics[metric] = 0.0 + result_metrics['dimensions'] = '{}:{}'.format(int(sampledata[0].width), int(sampledata[0].height)) + result_metrics['pixels'] = sampledata[0].pixels + result_metrics['ID'] = self.original_path + original_metrics[key] = result_metrics + + self.metrics[self.original_path] = original_metrics + + i = -1 + for rendition in renditions_list: + path = rendition['path'] + i += 1 + try: + if os.path.exists(path): + # Compute the metrics for the rendition + self.metrics[path] = self.compute(sampledata[0], sampledata[i+1], path) + else: + #logger.error(f'Unable to find rendition file: {path}') + print(f'Unable to find rendition file: {path}') + except Exception as err: + #logger.exception('Unable to compute metrics for {}'.format(path)) + print('Unable to compute metrics for {}'.format(path)) + finally: + print('Finish to compute metrics for {}'.format(path)) + + return self.aggregate(self.metrics) + + def aggregate(self, metrics): + """ + Function to aggregate computed values of metrics and renditions into a + pandas DataFrame. + """ + + # Dictionary for containing all metrics + metrics_dict = {} + # Dictionary for containing all renditions + renditions_dict = {} + + # Aggregate dictionary with all values for all renditions into a Pandas DataFrame + # All values are stored and obtained in a per-frame basis, then in a per-rendition + # fashion. They need to be rearranged. + + # First, we combine the frames + dict_of_df = {k: pd.DataFrame(v) for k, v in metrics.items()} + metrics_df = pd.concat(dict_of_df, axis=1, sort=True).transpose().reset_index(inplace=False) + + # Pandas concat function creates a level_0 and level_1 extra columns. + # They need to be renamed + metrics_df = metrics_df.rename(index=str, + columns={"level_1": "frame_num", "level_0": "path"}) + + # Then we can combine each rendition + for rendition in self.renditions_list: + # For the current rendition, we need an empty dictionary + rendition_dict = {} + + # We have a number of different metrics that have been computed. + # These are an input for the constructor of the class an vary according to + # what metrics are of interest in the research + for metric in self.metrics_list: + # Obtain a Pandas DataFrame from the original and build the original time series + original_df = metrics_df[metrics_df['path'] == self.original_path][metric] + original_df = original_df.reset_index(drop=True).transpose().dropna().astype(float) + # Obtain a Pandas DataFrame from the current rendition and build its time series + rendition_df = metrics_df[metrics_df['path'] == rendition['path']][metric] + rendition_df = rendition_df.reset_index(drop=True) + rendition_df = rendition_df.transpose().dropna().astype(float) + + # For those metrics that have a temporal character, + # we need to make a further aggregation + # We are basically using the Manhattan and euclidean distances, + # and statistically meaningful + # values such as mean, max and standard deviation. + # The whole time series is also provided for later exploration + # in the analysis part. + if 'temporal' in metric: + x_original = np.array(original_df[rendition_df.index].values) + x_rendition = np.array(rendition_df.values) + + [[manhattan]] = distance.cdist(x_original.reshape(1, -1), + x_rendition.reshape(1, -1), + metric='cityblock') + + rendition_dict['{}-euclidean'.format(metric)] = distance.euclidean(x_original, + x_rendition) + rendition_dict['{}-manhattan'.format(metric)] = manhattan + rendition_dict['{}-mean'.format(metric)] = np.mean(x_rendition) + rendition_dict['{}-max'.format(metric)] = np.max(x_rendition) + rendition_dict['{}-std'.format(metric)] = np.std(x_rendition) + rendition_dict['{}-corr'.format(metric)] = np.correlate(x_original, + x_rendition, + mode='same').mean() + rendition_dict['{}-series'.format(metric)] = x_rendition + + # Other metrics do not need time evaluation + else: + rendition_dict[metric] = rendition_df.mean() + + # Size is an important feature of an asset, as it gives important information + # regarding the potential compression effect + rendition_dict['size'] = os.path.getsize(rendition['path']) + rendition_dict['fps'] = self.fps + rendition_dict['path'] = rendition['path'] + + # Extract the dimensions of the rendition + dimensions_df = metrics_df[metrics_df['path'] == rendition['path']]['dimensions'] + rendition_dict['dimension_x'] = int(dimensions_df.unique()[0].split(':')[1]) + rendition_dict['dimension_y'] = int(dimensions_df.unique()[0].split(':')[0]) + + # Extract the pixels for this rendition + pixels_df = metrics_df[metrics_df['path'] == rendition['path']]['pixels'] + rendition_dict['pixels'] = int(pixels_df.unique()) + + # Store the rendition values in the dictionary of renditions for the present asset + renditions_dict[rendition['path']] = rendition_dict + + # Add the current asset values to the global metrics_dict + metrics_dict[self.original_path] = renditions_dict + + dict_of_df = {k: pd.DataFrame(v) for k, v in metrics_dict.items()} + metrics_df = pd.concat(dict_of_df, axis=1).transpose().reset_index(inplace=False) + + pixels_df = metrics_df['pixels'] + + # Compute a size/dimension ratio column for better accuracy + metrics_df['size_dimension_ratio'] = metrics_df['size'] / (metrics_df['dimension_y'] * metrics_df['dimension_x']) + + metrics_df = self.cleanup_dataframe(metrics_df, self.features_list) + + return metrics_df, pixels_df, dimensions_df + + def cleanup_dataframe(self, metrics_df, features): + """ + Cleanup the resulting pandas dataframe and convert it to a numpy array + to pass to the prediction model + """ + + metrics_df = metrics_df.rename(columns={'level_0': 'title', 'level_1': 'attack'}) + + if features is not None: + if 'attack_ID' in features: + features.remove('attack_ID') + # Filter out features from metrics dataframe + + # Scale measured metrics according to their resolution for better accuracy + metrics_df = self.rescale_to_resolution(metrics_df, features) + metrics_df = metrics_df[features] + + return metrics_df + + @staticmethod + def rescale_to_resolution(data, features): + """ + Function to rescale features to improve accuracy + """ + + df_features = pd.DataFrame(data) + downscale_features = ['temporal_psnr', + 'temporal_ssim', + 'temporal_cross_correlation' + ] + + upscale_features = ['temporal_difference', + 'temporal_dct', + 'temporal_canny', + 'temporal_gaussian_mse', + 'temporal_gaussian_difference', + 'temporal_histogram_distance', + 'temporal_entropy', + 'temporal_lbp', + 'temporal_texture', + 'temporal_match', + ] + + for label in downscale_features: + downscale_feature = [feature for feature in features if label in feature] + if downscale_feature: + for feature in downscale_feature: + print('Downscaling', label, feature) + df_features[feature] = df_features[feature] / (df_features['dimension_y'] * df_features['dimension_x']) + + for label in upscale_features: + upscale_feature = [feature for feature in features if label in feature] + if upscale_feature: + for feature in upscale_feature: + print('Upscaling', label, feature) + df_features[feature] = df_features[feature] * df_features['dimension_y'] * df_features['dimension_x'] + + return df_features + + def compare_renditions_instant(self, idx, mastersample, renditionsample, path): + """ + Function to compare pairs of numpy arrays extracting their corresponding metrics. + It basically takes the global original frame at frame_pos and its subsequent to + compare them against the corresponding ones in frame_list (a rendition). + It then extracts the metrics defined in the constructor under the metrics_list. + Methods of comparison are implemented in the video_metrics class + @param master_sample_idx_map: Mapping from rendition sample index to master sample index. If Nframes is different between master and rendition, the index mapping is not 1:1 + @param rendition_sample_idx: Index of master sample we compare rendition against + @param frame_list: + @param frame_list_hd: + @param dimensions: + @param pixels: + @param path: + @return: + """ + + # Dictionary of metrics + frame_metrics = {} + # Original frame to compare against (downscaled for performance) + reference_frame = mastersample.samples[idx] + # Original's subsequent frame (downscaled for performance) + next_reference_frame = mastersample.samples[idx+1] + # Rendition frame (downscaled for performance) + rendition_frame = renditionsample.samples[idx] + # Rendition's subsequent frame (downscaled for performance) + next_rendition_frame = renditionsample.samples[idx + 1] + + ''' + if self.debug_frames: + cv2.imwrite(f'{self.frame_dir_name}/CRI_{idx:04}_ref.png', self._convert_debug_frame(reference_frame)) + cv2.imwrite(f'{self.frame_dir_name}/CRI_{idx:04}_next_ref.png', self._convert_debug_frame(next_reference_frame)) + cv2.imwrite(f'{self.frame_dir_name}/CRI_{idx:04}_rend.png', self._convert_debug_frame(rendition_frame)) + cv2.imwrite(f'{self.frame_dir_name}/CRI_{idx:04}_next_rend.png', self._convert_debug_frame(next_rendition_frame)) + ''' + + if self.make_hd_list: + # Original frame to compare against (HD for QoE metrics) + reference_frame_hd = mastersample.samples_hd[idx] + # Rendition frame (HD for QoE metrics) + rendition_frame_hd = renditionsample.samples_hd[idx] + + # Compute the metrics defined in the global metrics_list. + # Uses the global instance of video_metrics + # Some metrics use a frame-to-frame comparison, + # but other require current and forward frames to extract + # their comparative values. + rendition_metrics = self.video_metrics.compute_metrics(rendition_frame, + next_rendition_frame, + reference_frame, + next_reference_frame, + rendition_frame_hd, + reference_frame_hd) + else: + rendition_metrics = self.video_metrics.compute_metrics(rendition_frame, + next_rendition_frame, + reference_frame, + next_reference_frame) + + print(rendition_metrics) + # Retrieve rendition dimensions for further evaluation + dimensions = '{}:{}'.format(int(renditionsample.width), int(renditionsample.height)) + rendition_metrics['dimensions'] = dimensions + + # Retrieve rendition number of pixels for further verification + rendition_metrics['pixels'] = renditionsample.pixels + + # Retrieve rendition path for further identification + #rendition_metrics['ID'] = self.original_path + rendition_metrics['ID'] = "original_path" + + # Identify rendition uniquely by its path and store metric data in frame_metrics dict + frame_metrics[path] = rendition_metrics + + # Return the metrics, together with the position of the frame + # frame_pos is needed for the ThreadPoolExecutor optimizations + return rendition_metrics, idx + + def compute(self, mastersample, renditionsample, path): + """ + Function to compare lists of numpy arrays extracting their corresponding metrics. + It basically takes the global original list of frames and the input frame_list + of numpy arrrays to extract the metrics defined in the constructor. + frame_pos establishes the index of the frames to be compared. + It is optimized by means of the ThreadPoolExecutor of Python's concurrent package + for better parallel performance. + @param master_sample_idx_map: Mapping from rendition sample index to master sample index. If Nframes is different between master and rendition, the index mapping is not 1:1 + @param frame_list: + @param frame_list_hd: + @param path: + @param dimensions: + @param pixels: + @return: + """ + start = timeit.default_timer() + + # Dictionary of metrics + rendition_metrics = {} + future_list = [] + + # Execute computations in parallel using as many processors as possible + # future_list is a dictionary storing all computed values from each thread + with ThreadPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor: + # Compare the original asset against its renditions + for i in range(len(mastersample.samples) - 1): + key = f'{i}' + future = executor.submit(self.compare_renditions_instant, + i, + mastersample, + renditionsample, + path) + + future_list.append((key, future)) + + # Once all frames in frame_list have been iterated, we can retrieve their values + for key, future in future_list: + # Values are retrieved in a dict, as a result of the executor's process + result_rendition_metrics, frame_pos = future.result() + # The computed values at a given frame + rendition_metrics[key] = result_rendition_metrics + + time_spent = timeit.default_timer() - start + #logger.info(f'Metrics compute took: {time_spent}') + #print(f'Metrics compute took: {time_spent}') + ''' + for i in range(len(mastersample.samples) - 1): + key = f'{i}' + result_rendition_metrics, frame_pos = self.compare_renditions_instant( i, mastersample, renditionsample, path) + # The computed values at a given frame + rendition_metrics[key] = result_rendition_metrics + ''' + + # Return the metrics for the currently processed rendition + return rendition_metrics \ No newline at end of file diff --git a/parallel/parallelgraber.py b/parallel/parallelgraber.py new file mode 100644 index 00000000..fc56b166 --- /dev/null +++ b/parallel/parallelgraber.py @@ -0,0 +1,200 @@ + + +import os +import shutil +import timeit +import numpy as np +import cv2 +from concurrent.futures.thread import ThreadPoolExecutor +from concurrent.futures import ProcessPoolExecutor + +from collections import deque +import multiprocessing + +from scripts.asset_processor.video_asset_processor import VideoAssetProcessor +from scripts.asset_processor.video_capture import VideoCapture + +class Sample: + def __init__(self, idx_map, samples, samples_hd, pixels, height, width, fps): + self.indexes = [] + self.timestamps = [] + self.idx_map = idx_map + self.samples = samples + self.samples_hd = samples_hd + self.pixels = pixels + self.height = height + self.width = width + self.fps = fps + +class SampleGraber: + def __init__(self, id, videopath, use_gpu, do_profiling, debug): + self.videopath = videopath + self.use_gpu = use_gpu + self.do_profiling = do_profiling + self.debug = debug + self.debug_frames = False + + self.capture = VideoCapture(videopath) + self.id = id + self.fps = self.capture.fps + self.width = self.capture.width + self.height = self.capture.height + + + if self.debug_frames: + self.frame_dir_name = type(self).__name__ + shutil.rmtree(self.frame_dir_name, ignore_errors=True) + os.makedirs(self.frame_dir_name, exist_ok=True) + + @staticmethod + def _convert_debug_frame(frame): + return cv2.resize(frame, (1920, 1080), cv2.INTER_CUBIC) + + def grabsamples(self, sampleIdx, timestamps): + + self.indexes = sampleIdx + self.timestamps = timestamps + + # Create list of random timestamps in video file to calculate metrics at + # difference between master timestamp and best matching frame timestamp of current video + timestamp_diffs = [np.inf] * len(self.indexes) + # currently selected frames + candidate_frames = [None] * len(self.indexes) + # maps selected rendition sample to master sample + debug_index_mapping = {} + idx_map = [] + frame_list = [] + frame_list_hd = [] + frames_read = 0 + pixels = 0 + height = 0 + width = 0 + timestamps_selected = [] + fps = self.capture.fps + # Iterate through each frame in the video + while True: + # Read the frame from the capture + frame_data = self.capture.read(grab=True) + if frame_data is not None: + frames_read += 1 + + # update candidate frames + ts_diffs = [abs(frame_data.timestamp - mts) for mts in self.timestamps] + best_match_idx = int(np.argmin(ts_diffs)) + best_match = np.min(ts_diffs) + # max theoretical timestamp difference between 'matching' frames would be 1/(2*fps) + max(jitter) + # don't consider frames that are too far, otherwise the algorithm will be linear on memory vs video length + if best_match < 1 / (2 * self.capture.fps) and timestamp_diffs[best_match_idx] > best_match: + timestamp_diffs[best_match_idx] = best_match + frame_data = self.capture.retrieve() + candidate_frames[best_match_idx] = frame_data + # Break the loop when frames cannot be taken from original + else: + break + + # process picked frames + for i in range(len(candidate_frames)): + frame_data = candidate_frames[i] + ts_diff = timestamp_diffs[i] + if frame_data is None or ts_diff > 1 / (2 * self.fps): + # no good matching candidate frame + continue + + if self.debug_frames: + cv2.imwrite( + f'{self.frame_dir_name}/{i:04}_{"s" if self.self.id else ""}_{frame_data.index}_{frame_data.timestamp:.4}.png', + self._convert_debug_frame(frame_data.frame)) + + + timestamps_selected.append(frame_data.timestamp) + idx_map.append(i) + debug_index_mapping[self.indexes[i]] = frame_data.index + # Count the number of pixels + height = frame_data.frame.shape[1] + width = frame_data.frame.shape[0] + pixels += height * width + + #if not self.markup_master_frames and self.image_pair_callback is not None: + # self.image_pair_callback(self.master_samples_hd[i], frame_data.frame, len(frame_list), ts_diff, + # self.original_path, capture.filename) + + frame_list_hd.append(frame_data.frame) + # Change color space to have only luminance + frame = cv2.resize(frame_data.frame, (480, 270), interpolation=cv2.INTER_LINEAR) + frame_list.append(frame) + + # Clean up memory + self.capture.release() + + #logger.info(f'Mean master-rendition timestamp diff, sec: {np.mean( + # list(filter(lambda x: not np.isinf(x), timestamp_diffs)))} SD: {np.std( + # list(filter(lambda x: not np.isinf(x), timestamp_diffs)))}') + #logger.info(f'Master frame index mapping for {capture.filename}: \n {debug_index_mapping}') + + #return idx_map, np.array(frame_list), np.array(frame_list_hd), pixels, height, width + + sample = Sample(idx_map, np.array(frame_list), np.array(frame_list_hd), pixels, height, width, fps) + + return sample + +class ParallelGraber: + def __init__(self, max_samples, use_gpu, do_profiling, debug): + """ + Initialize verifier instance + @param max_samples: Max number of samples to take for a video + @param model: Either URI of the archive with model files, or local folder path + @param use_gpu: Use GPU for video decoding and computations + @param do_profiling: Output execution times to logs + @param debug: Enable debug image output, greatly reduces performance + """ + self.max_samples = max_samples + self.use_gpu = use_gpu + self.do_profiling = do_profiling + self.debug = debug + + self.total_frames = 0 + self.sample_indexes = [] + self.sample_timestamps = [] + self.grabers = [] + self.samples = [] + + def addgraber(self, videopath): + id = len(self.grabers) + graber = SampleGraber(id, videopath, self.use_gpu, self.do_profiling, self.debug) + self.grabers.append(graber) + + if len(self.grabers) == 1: #master + self.total_frames = graber.capture.frame_count + self.sample_indexes = np.sort(np.random.choice(self.total_frames, self.max_samples, False)) + # setting time stamp + self.sample_timestamps = self.sample_indexes * 1.0 / graber.capture.fps + + + def capturesingle(self, id): + return self.grabers[id].grabsamples(self.sample_indexes, self.sample_timestamps) + + def captureall(self): + + future_result = [] + future_list = [] + with ThreadPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor: + # Compare the original asset against its renditions + for i in range(len(self.grabers)): + key = f'{i}' + future = executor.submit(self.capturesingle, i) + future_list.append((key, future)) + + # Once all frames in frame_list have been iterated, we can retrieve their values + for key, future in future_list: + # Values are retrieved in a dict, as a result of the executor's process + frame_pos = future.result() + future_result.append(frame_pos) + + ''' + for i in range(len(self.grabers)): + frame_pos = self.grabers[i].grabsamples(self.sample_indexes,self.sample_timestamps) + future_result.append(frame_pos) + ''' + + # Return the metrics for the currently processed rendition + return future_result diff --git a/parallel/parallelverifier.py b/parallel/parallelverifier.py new file mode 100644 index 00000000..24f0c6f5 --- /dev/null +++ b/parallel/parallelverifier.py @@ -0,0 +1,337 @@ + +import uuid +import timeit +import json +import tarfile +import os +import sys +import urllib +import subprocess +import logging +from joblib import load +import numpy as np +import pandas as pd +import cv2 +from scipy.io import wavfile +from catboost import CatBoostClassifier +from catboost import CatBoostRegressor +from verifier import file_locker + +from scripts.asset_processor.video_asset_processor import VideoAssetProcessor +from parallel.parallelgraber import ParallelGraber +from parallel.parallelcompare import SampleCompare + + +logger = logging.getLogger() + + +class ParallelVerifier: + def __init__(self, max_samples, model, use_gpu, do_profiling, debug): + """ + Initialize verifier instance + @param max_samples: Max number of samples to take for a video + @param model: Either URI of the archive with model files, or local folder path + @param use_gpu: Use GPU for video decoding and computations + @param do_profiling: Output execution times to logs + @param debug: Enable debug image output, greatly reduces performance + """ + self.use_gpu = use_gpu + self.debug = debug + self.model_dir = '/tmp/model' + if os.path.isdir(model): + self.model_dir = model + else: + self.retrieve_models(model, self.model_dir) + self.max_samples = max_samples + self.do_profiling = do_profiling + self.tmp_files = [] + self.load_models() + + @staticmethod + def read_video_metadata(filename): + try: + res = {} + cap = cv2.VideoCapture(filename) + fps = cap.get(cv2.CAP_PROP_FPS) + frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + height = float(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + width = float(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + res['frame_rate'] = fps + res['resolution'] = {'width': width, 'height': height} + res['pixels'] = width * height * frame_count + res['duration'] = frame_count / fps + res['bitrate'] = os.path.getsize(filename) / res['duration'] + finally: + cap.release() + return res + + def pre_verify(self, source, rendition): + """ + Function to verify that rendition conditions and specifications + are met as prescribed by the Broadcaster + """ + # Extract data from video capture + video_file, audio_file = self.get_video_audio(rendition['uri']) + rendition['video_available'] = video_file is not None + rendition['audio_available'] = audio_file is not None + + if video_file: + # Check that the audio exists + if audio_file: + _, source_file_series = wavfile.read(source['audio_path']) + _, rendition_file_series = wavfile.read(audio_file) + + try: + # Compute the Euclidean distance between source's and rendition's signals + rendition['audio_dist'] = np.linalg.norm(source_file_series - rendition_file_series) + except: + # Set to negative to indicate an error during audio comparison + # (matching floating-point datatype of Euclidean distance) + rendition['audio_dist'] = -1.0 + finally: + # Cleanup the audio file generated to avoid cluttering + os.remove(audio_file) + + metadata = self.read_video_metadata(video_file) + + rendition['path'] = video_file + + # Create dictionary with passed / failed verification parameters + if rendition.get('resolution'): + rendition['resolution']['height_pre_verification'] = metadata['height'] / float(rendition['resolution']['height']) + rendition['resolution']['width_pre_verification'] = metadata['width'] / float(rendition['resolution']['width']) + + if 'frame_rate' in rendition: + rend_exp_fps = float(rendition['frame_rate']) or source['frame_rate'] + rendition['frame_rate'] = bool(np.isclose(rend_exp_fps, metadata['frame_rate'], atol=0.1)) + + if rendition.get('bitrate'): + rendition['bitrate'] = metadata['bitrate'] == rendition['bitrate'] + + if rendition.get('pixels'): + rendition['pixels_pre_verification'] = float(rendition['pixels']) / metadata['pixels'] + + + return rendition + + def meta_model(self, row): + """ + The goal is to reduce the number of False Positives (tamper) to prevent wrongfully penalizing transcoder nodes. OCSVM model is expected to have higher precision (low FP) on novel data. + If OCSVM predicts the observation is an inlier (not tampered), we'll go with it, otherwise we'll use supervised model output. + """ + if row['ul_pred_tamper'] == 0: + return 0 + return row['sl_pred_tamper'] + + def verify(self, source_uri, renditions): + """ + Function that returns the predicted compliance of a list of renditions + with respect to a given source file using a specified model. + """ + total_start = timeit.default_timer() + source_video, source_audio = self.get_video_audio(source_uri) + if not source_video and not source_audio: + raise ValueError('Couldn\'t retrieve source files') + try: + if source_video: + # Prepare source and renditions for verification + source = {'path': source_video, + 'audio_path': source_audio, + 'video_available': True, + 'audio_available': source_audio is not None, + 'uri': source_uri} + # read source metadata + metadata = self.read_video_metadata(source_video) + source.update(metadata) + + # Create a list of preverified renditions + pre_verified_renditions = [] + for rendition in renditions: + pre_verification = self.pre_verify(source, rendition) + if rendition['video_available']: + pre_verified_renditions.append(pre_verification) + + # Remove non numeric features from feature list + non_temporal_features = ['attack_ID', 'title', 'attack', 'dimension', 'size', 'size_dimension_ratio'] + metrics_list = [] + features = list(np.unique(self.features_ul + self.features_sl)) + + for metric in features: + if metric not in non_temporal_features: + metrics_list.append(metric.split('-')[0]) + + #----- Frame grabber ----- + # Frame grabber times + start = timeit.default_timer() + + # Instantiate Frame Grabber class + samplegraber = ParallelGraber(self.max_samples, self.use_gpu, self.do_profiling, self.debug) + + samplegraber.addgraber(source_video) + for rendition in renditions: + samplegraber.addgraber(rendition['path']) + sampledata = samplegraber.captureall() + + initialize_time = timeit.default_timer() - start + + # ----- Feature calculation ----- + # Register times for asset processing + start = timeit.default_timer() + + featureCompare = SampleCompare(metrics_list, features, self.do_profiling) + + # Assemble output dataframe with processed metrics + metrics_df, pixels_df, dimensions_df = featureCompare.process(sampledata, renditions, source_video) + + # Record time for processing of assets metrics + process_time = timeit.default_timer() - start + + # ----- Inference part ----- + x_renditions_sl = np.asarray(metrics_df[self.features_sl]) + x_renditions_ul = np.asarray(metrics_df[self.features_ul]) + x_renditions_ul = self.loaded_scaler.transform(x_renditions_ul) + + np.set_printoptions(precision=6, suppress=True) + logger.debug(f'INPUT SL ARRAY: {x_renditions_sl}') + logger.debug(f'Unscaled INPUT UL ARRAY: {np.asarray(metrics_df[self.features_ul])}') + logger.debug(f'SCALED INPUT UL ARRAY: {x_renditions_ul}') + # Make predictions for given data + start = timeit.default_timer() + predictions_df = pd.DataFrame() + predictions_df['sl_pred_tamper'] = self.loaded_model_sl.predict(x_renditions_sl) + predictions_df['ocsvm_dist'] = self.loaded_model_ul.decision_function(x_renditions_ul) + predictions_df['ul_pred_tamper'] = (-self.loaded_model_ul.predict(x_renditions_ul)+1)/2 + predictions_df['meta_pred_tamper'] = predictions_df.apply(self.meta_model, axis=1) + prediction_time = timeit.default_timer() - start + + # Add predictions to rendition dictionary + i = 0 + for _, rendition in enumerate(renditions): + if rendition['video_available']: + rendition.pop('path', None) + rendition['ocsvm_dist'] = float(predictions_df['ocsvm_dist'].iloc[i]) + rendition['tamper_ul'] = int(predictions_df['ul_pred_tamper'].iloc[i]) + rendition['tamper_sl'] = int(predictions_df['sl_pred_tamper'].iloc[i]) + rendition['tamper'] = int(predictions_df['meta_pred_tamper'].iloc[i]) + # Append the post-verification of resolution and pixel count + if 'pixels' in rendition: + rendition['pixels_post_verification'] = float(rendition['pixels']) / pixels_df[i] + if 'resolution' in rendition: + rendition['resolution']['height_post_verification'] = float(rendition['resolution']['height']) / int(dimensions_df[i].split(':')[0]) + rendition['resolution']['width_post_verification'] = float(rendition['resolution']['width']) / int(dimensions_df[i].split(':')[1]) + i += 1 + + if self.do_profiling: + logger.info(f'Features used: {features}') + logger.info(f'Total time: {timeit.default_timer() - total_start}') + logger.info(f'Initialization time: {initialize_time}') + logger.info(f'Process time: {process_time}') + logger.info(f'Prediction time: {prediction_time}') + + return renditions + finally: + for f in self.tmp_files: + if os.path.exists(f): + os.remove(f) + self.tmp_files.clear() + + def retrieve_models(self, uri, model_dir): + """ + Function to obtain pre-trained model for verification predictions + """ + with file_locker.FileLocker('model_op.lock'): + model_file = uri.split('/')[-1] + model_file_sl = f'{model_file}_cb_sl' + # Create target Directory if don't exist + if not os.path.exists(model_dir): + try: + os.mkdir(model_dir) + logger.info(f'Directory created: {model_dir}') + logger.info('Model download started') + filename, _ = urllib.request.urlretrieve(uri, + filename='{}/{}'.format(model_dir, model_file)) + logger.info(f'Model {filename} downloaded') + with tarfile.open(filename) as tar_f: + tar_f.extractall(model_dir) + + return model_dir, model_file, model_file_sl + except Exception as exc: + if os.path.exists(model_dir): + os.rmdir(model_dir) + logger.exception('Unable to untar model') + raise exc + else: + logger.debug(f'Directory {model_dir} already exists, skipping download') + + def get_video_audio(self, uri): + """ + Function to obtain a path to a video and audio files from url or local path + """ + video_file = None + audio_file = None + if uri.lower().startswith('http'): + try: + file_name = '/tmp/{}'.format(uuid.uuid4()) + logger.info(f'File download started: {file_name}') + video_file, _ = urllib.request.urlretrieve(uri, filename=file_name) + self.tmp_files.append(video_file) + logger.info(f'File {file_name} downloaded to {video_file}') + except Exception as e: + logger.exception('Unable to download HTTP video file') + else: + if os.path.isfile(uri): + video_file = uri + logger.info(f'Video file {video_file} available in file system') + else: + logger.info(f'Video file {video_file} NOT available in file system') + + if video_file: + audio_file = '{}_audio.wav'.format(video_file) + logger.info('Extracting audio track') + ffmpeg = subprocess.Popen(' '.join(['ffmpeg', + '-i', + video_file, + '-vn', + '-acodec', + 'pcm_s16le', + '-loglevel', + 'quiet', + audio_file]), stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True) + stdout, stderr = ffmpeg.communicate() + if ffmpeg.returncode: + logger.error(f'Could not extract audio from video file {stderr}') + if os.path.isfile(audio_file): + logger.info(f'Audio file {audio_file} available in file system') + self.tmp_files.append(audio_file) + else: + logger.info(f'Audio file {audio_file} NOT available in file system') + audio_file = None + return video_file, audio_file + + def load_models(self): + """ + Cache models to memory + @return: + """ + # Configure UL model for inference + model_name_ul = 'OCSVM' + scaler_type = 'StandardScaler' + learning_type = 'UL' + self.loaded_model_ul = load(open('{}/{}.joblib'.format(self.model_dir, + model_name_ul), 'rb')) + + self.loaded_scaler = load(open('{}/{}_{}.joblib'.format(self.model_dir, + learning_type, + scaler_type), 'rb')) + # Configure SL model for inference + model_name_sl = 'CB_Binary' + self.loaded_model_sl = CatBoostClassifier().load_model('{}/{}.cbm'.format(self.model_dir, + model_name_sl)) + + # Open model configuration files + with open('{}/param_{}.json'.format(self.model_dir, model_name_ul)) as json_file: + params = json.load(json_file) + self.features_ul = params['features'] + with open('{}/param_{}.json'.format(self.model_dir, model_name_sl)) as json_file: + params = json.load(json_file) + self.features_sl = params['features'] \ No newline at end of file diff --git a/parallel/test_parallel.py b/parallel/test_parallel.py new file mode 100644 index 00000000..d905e334 --- /dev/null +++ b/parallel/test_parallel.py @@ -0,0 +1,30 @@ +import requests +import json +import random +import numpy as np +import pandas as pd +import os +import tqdm +import glob +from parallel.parallelverifier import ParallelVerifier +import logging +from scripts.asset_processor import VideoAssetProcessor, VideoCapture +import timeit +import pytest +import cProfile + +pd.options.display.width = 0 +pd.set_option('display.max_columns', None) + +class TestParallelDecode: + + def test_parallel_decodetamper(self): + debug = False + src = 'testing/tests/data/master2_4s_1080.mp4' + rend_path = 'testing/tests/data/rend2_4s_1080_adv_attack.mp4' + verifier = ParallelVerifier(10, 'http://storage.googleapis.com/verification-models/verification-metamodel-2020-07-06.tar.xz', False, False, debug) + verification_result = verifier.verify(src, [{'uri': rend_path}]) + print(verification_result) + assert verification_result[0]['tamper'] == 1 + + \ No newline at end of file diff --git a/verifier/file_locker.py b/verifier/file_locker.py index ab06214f..05f4bba3 100644 --- a/verifier/file_locker.py +++ b/verifier/file_locker.py @@ -1,7 +1,7 @@ """ Provides file-based interprocess lock """ -import fcntl +#import fcntl class FileLocker: @@ -10,8 +10,8 @@ def __init__(self, lock_file) -> None: def __enter__(self): self.fp = open(self.lock_file, "wb+") - fcntl.flock(self.fp.fileno(), fcntl.LOCK_EX) + #fcntl.flock(self.fp.fileno(), fcntl.LOCK_EX) def __exit__(self, _type, value, tb): - fcntl.flock(self.fp.fileno(), fcntl.LOCK_UN) + #fcntl.flock(self.fp.fileno(), fcntl.LOCK_UN) self.fp.close() From cda6b95042c48788f39f21c776ab0a3d8f892efc Mon Sep 17 00:00:00 2001 From: Oscar <54097108+oscar-davids@users.noreply.github.com> Date: Fri, 14 Aug 2020 09:21:51 +0800 Subject: [PATCH 06/12] Delete work-diary.me --- work-diary.me | 30 ------------------------------ 1 file changed, 30 deletions(-) delete mode 100644 work-diary.me diff --git a/work-diary.me b/work-diary.me deleted file mode 100644 index 0f66060c..00000000 --- a/work-diary.me +++ /dev/null @@ -1,30 +0,0 @@ -https://www.kaggle.com/c/youtube8m-2019/overview/getting-started -https://github.com/google/youtube-8m#running-on-googles-cloud-machine-learning-platform -https://research.google.com/youtube8m/download.html - -Supervised Model - Keras - Random Forest - AdaBoost - SVM - XGBoost -Unsupervised Model - OC SVM - Isolation Forest - Autoencoder - -Attack methods - 1.Scaling and bitrate reduction - 2.Watermarks - 3.Flips / rotations - 4.Vertical flip - 5.Horizontal flip - 6.Rotate 90 degrees clockwise - 7.Rotate 90 degrees counterclockwise - 8.Black and white - 9.Chroma subsampling - 10.low bitrate - 11.vignette - - - From 94de062031d94ea3f4e1e2c016766ec5523f9cae Mon Sep 17 00:00:00 2001 From: oscar-davids Date: Fri, 14 Aug 2020 09:25:09 +0800 Subject: [PATCH 07/12] fix package --- parallel/parallelcompare.py | 4 ---- parallel/parallelgraber.py | 3 --- parallel/parallelverifier.py | 1 - 3 files changed, 8 deletions(-) diff --git a/parallel/parallelcompare.py b/parallel/parallelcompare.py index 29d37380..415a0cb9 100644 --- a/parallel/parallelcompare.py +++ b/parallel/parallelcompare.py @@ -11,15 +11,11 @@ from parallel.parallelgraber import Sample from concurrent.futures.thread import ThreadPoolExecutor -from concurrent.futures import ProcessPoolExecutor import multiprocessing from scipy.spatial import distance from scripts.asset_processor.video_metrics import VideoMetrics -from scripts.asset_processor.video_asset_processor import VideoAssetProcessor -from scripts.asset_processor.video_capture import VideoCapture - class SampleCompare: def __init__(self, metrics_list, features_list, do_profiling=False): """ diff --git a/parallel/parallelgraber.py b/parallel/parallelgraber.py index fc56b166..018a066e 100644 --- a/parallel/parallelgraber.py +++ b/parallel/parallelgraber.py @@ -1,17 +1,14 @@ - import os import shutil import timeit import numpy as np import cv2 from concurrent.futures.thread import ThreadPoolExecutor -from concurrent.futures import ProcessPoolExecutor from collections import deque import multiprocessing -from scripts.asset_processor.video_asset_processor import VideoAssetProcessor from scripts.asset_processor.video_capture import VideoCapture class Sample: diff --git a/parallel/parallelverifier.py b/parallel/parallelverifier.py index 24f0c6f5..580496bc 100644 --- a/parallel/parallelverifier.py +++ b/parallel/parallelverifier.py @@ -17,7 +17,6 @@ from catboost import CatBoostRegressor from verifier import file_locker -from scripts.asset_processor.video_asset_processor import VideoAssetProcessor from parallel.parallelgraber import ParallelGraber from parallel.parallelcompare import SampleCompare From 04431849acdf778c5db5bde31ee5800b8a68ca16 Mon Sep 17 00:00:00 2001 From: Oscar <54097108+oscar-davids@users.noreply.github.com> Date: Fri, 14 Aug 2020 11:27:30 +0800 Subject: [PATCH 08/12] Update file_locker.py --- verifier/file_locker.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/verifier/file_locker.py b/verifier/file_locker.py index 05f4bba3..ab06214f 100644 --- a/verifier/file_locker.py +++ b/verifier/file_locker.py @@ -1,7 +1,7 @@ """ Provides file-based interprocess lock """ -#import fcntl +import fcntl class FileLocker: @@ -10,8 +10,8 @@ def __init__(self, lock_file) -> None: def __enter__(self): self.fp = open(self.lock_file, "wb+") - #fcntl.flock(self.fp.fileno(), fcntl.LOCK_EX) + fcntl.flock(self.fp.fileno(), fcntl.LOCK_EX) def __exit__(self, _type, value, tb): - #fcntl.flock(self.fp.fileno(), fcntl.LOCK_UN) + fcntl.flock(self.fp.fileno(), fcntl.LOCK_UN) self.fp.close() From 47f9146832e3ddd6eb5148ea25ebbd36220c4761 Mon Sep 17 00:00:00 2001 From: oscar-davids Date: Fri, 14 Aug 2020 13:34:19 +0800 Subject: [PATCH 09/12] update upstream --- parallel/parallelverifier.py | 4 ++-- verifier/verifier.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/parallel/parallelverifier.py b/parallel/parallelverifier.py index 580496bc..aa7b10d4 100644 --- a/parallel/parallelverifier.py +++ b/parallel/parallelverifier.py @@ -97,8 +97,8 @@ def pre_verify(self, source, rendition): # Create dictionary with passed / failed verification parameters if rendition.get('resolution'): - rendition['resolution']['height_pre_verification'] = metadata['height'] / float(rendition['resolution']['height']) - rendition['resolution']['width_pre_verification'] = metadata['width'] / float(rendition['resolution']['width']) + rendition['resolution']['height_pre_verification'] = metadata["resolution"]['height'] / float(rendition['resolution']['height']) + rendition['resolution']['width_pre_verification'] = metadata["resolution"]['width'] / float(rendition['resolution']['width']) if 'frame_rate' in rendition: rend_exp_fps = float(rendition['frame_rate']) or source['frame_rate'] diff --git a/verifier/verifier.py b/verifier/verifier.py index 5fe40c86..c37972bc 100644 --- a/verifier/verifier.py +++ b/verifier/verifier.py @@ -98,8 +98,8 @@ def pre_verify(self, source, rendition): # Create dictionary with passed / failed verification parameters if rendition.get('resolution'): - rendition['resolution']['height_pre_verification'] = metadata['height'] / float(rendition['resolution']['height']) - rendition['resolution']['width_pre_verification'] = metadata['width'] / float(rendition['resolution']['width']) + rendition['resolution']['height_pre_verification'] = metadata["resolution"]['height'] / float(rendition['resolution']['height']) + rendition['resolution']['width_pre_verification'] = metadata["resolution"]['width'] / float(rendition['resolution']['width']) if 'frame_rate' in rendition: rend_exp_fps = float(rendition['frame_rate']) or source['frame_rate'] From ad4a43c9dec375afb8ac7c2a19c444eb8d0a21f7 Mon Sep 17 00:00:00 2001 From: oscar-davids Date: Fri, 14 Aug 2020 13:46:29 +0800 Subject: [PATCH 10/12] fix tab string --- parallel/parallelverifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parallel/parallelverifier.py b/parallel/parallelverifier.py index aa7b10d4..cd6d2c7f 100644 --- a/parallel/parallelverifier.py +++ b/parallel/parallelverifier.py @@ -97,7 +97,7 @@ def pre_verify(self, source, rendition): # Create dictionary with passed / failed verification parameters if rendition.get('resolution'): - rendition['resolution']['height_pre_verification'] = metadata["resolution"]['height'] / float(rendition['resolution']['height']) + rendition['resolution']['height_pre_verification'] = metadata["resolution"]['height'] / float(rendition['resolution']['height']) rendition['resolution']['width_pre_verification'] = metadata["resolution"]['width'] / float(rendition['resolution']['width']) if 'frame_rate' in rendition: From 1466eabcf12cea3e798c31defd102062d5d3652c Mon Sep 17 00:00:00 2001 From: Oscar <54097108+oscar-davids@users.noreply.github.com> Date: Fri, 14 Aug 2020 16:43:55 +0800 Subject: [PATCH 11/12] Update test_parallel.py --- parallel/test_parallel.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/parallel/test_parallel.py b/parallel/test_parallel.py index d905e334..e804944c 100644 --- a/parallel/test_parallel.py +++ b/parallel/test_parallel.py @@ -11,7 +11,6 @@ from scripts.asset_processor import VideoAssetProcessor, VideoCapture import timeit import pytest -import cProfile pd.options.display.width = 0 pd.set_option('display.max_columns', None) @@ -27,4 +26,4 @@ def test_parallel_decodetamper(self): print(verification_result) assert verification_result[0]['tamper'] == 1 - \ No newline at end of file + From d5ebe206980b542e5837016371b7204836b43076 Mon Sep 17 00:00:00 2001 From: Oscar <54097108+oscar-davids@users.noreply.github.com> Date: Fri, 28 Aug 2020 11:10:45 +0800 Subject: [PATCH 12/12] Update verifier.py --- verifier/verifier.py | 49 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/verifier/verifier.py b/verifier/verifier.py index c37972bc..5e75d67b 100644 --- a/verifier/verifier.py +++ b/verifier/verifier.py @@ -127,6 +127,55 @@ def verify(self, source_uri, renditions): Function that returns the predicted compliance of a list of renditions with respect to a given source file using a specified model. """ + """ + if usecengine: + renditionlist = "" + for rendition in renditions: + renditionlist = renditionlist + (rendition['uri'] + ',') + + try: + #[metatamper, videoalive, audioalive, fps, width, height, audiodiff, + # sizeratio, dctdiff, gaussiamse, gaussiandiff, gaussianthreshold, histogramdiff] + diffmatrix = extractfts.calc_featurediff(source_uri, renditionlist, self.max_samples) + + for i, rendition in enumerate(renditions): + rendition['audio_dist'] = -1.0 + rendition['video_available'] = None + rendition['audio_available'] = None + if diffmatrix[i,1] == 1: + rendition['video_available'] = True + if diffmatrix[i,2] == 1: + rendition['audio_available'] = True + rendition['audio_dist'] = diffmatrix[i,6] + + rendition['fps'] = diffmatrix[i,3] + rendition['pixels'] = diffmatrix[i,4] * diffmatrix[i,5] + + x_renditions_sl = diffmatrix[:, 7:] + x_renditions_ul = diffmatrix[:, 7:] + + x_renditions_ul = self.loaded_scaler.transform(x_renditions_ul) + np.set_printoptions(precision=6, suppress=True) + # Make predictions for given data + start = timeit.default_timer() + predictions_df = pd.DataFrame() + predictions_df['sl_pred_tamper'] = self.loaded_model_sl.predict(x_renditions_sl) + predictions_df['ocsvm_dist'] = self.loaded_model_ul.decision_function(x_renditions_ul) + predictions_df['ul_pred_tamper'] = (-self.loaded_model_ul.predict(x_renditions_ul) + 1) / 2 + predictions_df['meta_pred_tamper'] = predictions_df.apply(self.meta_model, axis=1) + prediction_time = timeit.default_timer() - start + i = 0 + for _, rendition in enumerate(renditions): + if rendition['video_available']: + rendition['ocsvm_dist'] = float(predictions_df['ocsvm_dist'].iloc[i]) + rendition['tamper_ul'] = int(predictions_df['ul_pred_tamper'].iloc[i]) + rendition['tamper_sl'] = int(predictions_df['sl_pred_tamper'].iloc[i]) + rendition['tamper'] = int(predictions_df['meta_pred_tamper'].iloc[i]) + i += 1 + return renditions + except Exception as e: + print(e) + """ total_start = timeit.default_timer() source_video, source_audio = self.get_video_audio(source_uri) if not source_video and not source_audio: