diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a8e5812e8..779867bde 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,14 +1,15 @@ +repos: - repo: local hooks: - id: yapf name: yapf - entry: yapf + entry: bash -c 'yapf "$@"; git add -u' -- language: system args: [-i, --style .style.yapf] files: \.py$ - repo: https://github.com/pre-commit/pre-commit-hooks - sha: a11d9314b22d8f8c7556443875b731ef05965464 + rev: a11d9314b22d8f8c7556443875b731ef05965464 hooks: - id: check-merge-conflict - id: check-symlinks diff --git a/configs/segmentation/bcn/bgm/50salads/full/split1.yaml b/configs/segmentation/bcn/bgm/50salads/full/split1.yaml new file mode 100644 index 000000000..2fdd7160b --- /dev/null +++ b/configs/segmentation/bcn/bgm/50salads/full/split1.yaml @@ -0,0 +1,75 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "50salads" + use_full: True + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "50salads" + use_full: True + test_mode: "more" + results_path: "./output/BCN/50salads/split1/BcnBgmFull/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/train.split1.bundle" + use_full: True + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split1.bundle" + use_full: True + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split1.bundle" + use_full: True + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.001 + milestones: [100, 200] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmFull_Inference_helper' + num_channels: 2048 + sample_rate: 2 + result_path: './inference/' + +output_dir: "./output/BCN/50salads/split1/BcnBgmFull/" +log_interval: 2000 +epochs: 300 +log_level: "DEBUG" +save_interval: 2000 +model_name: "BcnBgmFull" diff --git a/configs/segmentation/bcn/bgm/50salads/full/split2.yaml b/configs/segmentation/bcn/bgm/50salads/full/split2.yaml new file mode 100644 index 000000000..414603150 --- /dev/null +++ b/configs/segmentation/bcn/bgm/50salads/full/split2.yaml @@ -0,0 +1,75 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "50salads" + use_full: True + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "50salads" + use_full: True + test_mode: "more" + results_path: "./output/BCN/50salads/split2/BcnBgmFull/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/train.split2.bundle" + use_full: True + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split2.bundle" + use_full: True + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split2.bundle" + use_full: True + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.001 + milestones: [100, 200] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmFull_Inference_helper' + num_channels: 2048 + sample_rate: 2 + result_path: './inference/' + +output_dir: "./output/BCN/50salads/split2/BcnBgmFull/" +log_interval: 2000 +epochs: 300 +log_level: "DEBUG" +save_interval: 2000 +model_name: "BcnBgmFull" diff --git a/configs/segmentation/bcn/bgm/50salads/full/split3.yaml b/configs/segmentation/bcn/bgm/50salads/full/split3.yaml new file mode 100644 index 000000000..d436ff5c2 --- /dev/null +++ b/configs/segmentation/bcn/bgm/50salads/full/split3.yaml @@ -0,0 +1,75 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "50salads" + use_full: True + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "50salads" + use_full: True + test_mode: "more" + results_path: "./output/BCN/50salads/split3/BcnBgmFull/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/train.split3.bundle" + use_full: True + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split3.bundle" + use_full: True + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split3.bundle" + use_full: True + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.001 + milestones: [100, 200] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmFull_Inference_helper' + num_channels: 2048 + sample_rate: 2 + result_path: './inference/' + +output_dir: "./output/BCN/50salads/split3/BcnBgmFull/" +log_interval: 2000 +epochs: 300 +log_level: "DEBUG" +save_interval: 2000 +model_name: "BcnBgmFull" diff --git a/configs/segmentation/bcn/bgm/50salads/full/split4.yaml b/configs/segmentation/bcn/bgm/50salads/full/split4.yaml new file mode 100644 index 000000000..937788a00 --- /dev/null +++ b/configs/segmentation/bcn/bgm/50salads/full/split4.yaml @@ -0,0 +1,75 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "50salads" + use_full: True + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "50salads" + use_full: True + test_mode: "more" + results_path: "./output/BCN/50salads/split4/BcnBgmFull/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/train.split4.bundle" + use_full: True + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split4.bundle" + use_full: True + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split4.bundle" + use_full: True + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.001 + milestones: [100, 200] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmFull_Inference_helper' + num_channels: 2048 + sample_rate: 2 + result_path: './inference/' + +output_dir: "./output/BCN/50salads/split4/BcnBgmFull/" +log_interval: 2000 +epochs: 300 +log_level: "DEBUG" +save_interval: 2000 +model_name: "BcnBgmFull" diff --git a/configs/segmentation/bcn/bgm/50salads/full/split5.yaml b/configs/segmentation/bcn/bgm/50salads/full/split5.yaml new file mode 100644 index 000000000..75366068c --- /dev/null +++ b/configs/segmentation/bcn/bgm/50salads/full/split5.yaml @@ -0,0 +1,75 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "50salads" + use_full: True + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "50salads" + use_full: True + test_mode: "more" + results_path: "./output/BCN/50salads/split5/BcnBgmFull/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/train.split5.bundle" + use_full: True + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split5.bundle" + use_full: True + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split5.bundle" + use_full: True + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.001 + milestones: [100, 200] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmFull_Inference_helper' + num_channels: 2048 + sample_rate: 2 + result_path: './inference/' + +output_dir: "./output/BCN/50salads/split5/BcnBgmFull/" +log_interval: 2000 +epochs: 300 +log_level: "DEBUG" +save_interval: 2000 +model_name: "BcnBgmFull" diff --git a/configs/segmentation/bcn/bgm/50salads/resized/split1.yaml b/configs/segmentation/bcn/bgm/50salads/resized/split1.yaml new file mode 100644 index 000000000..23d79c7a4 --- /dev/null +++ b/configs/segmentation/bcn/bgm/50salads/resized/split1.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "50salads" + use_full: False + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "50salads" + use_full: False + test_mode: "more" + results_path: "./output/BCN/50salads/split1/BcnBgmResized/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/train.split1.bundle" + use_full: False + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split1.bundle" + use_full: False + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split1.bundle" + use_full: False + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.001 + milestones: [100, 200] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmResized_Inference_helper' + num_channels: 2048 + sample_rate: 2 + result_path: './inference/' + mode: 'more' + temporal_dim: 400 + dataset: '50salads' + +output_dir: "./output/BCN/50salads/split1/BcnBgmResized/" +model_name: "BcnBgmResized" +log_interval: 2000 +epochs: 300 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/bgm/50salads/resized/split2.yaml b/configs/segmentation/bcn/bgm/50salads/resized/split2.yaml new file mode 100644 index 000000000..5e799fec3 --- /dev/null +++ b/configs/segmentation/bcn/bgm/50salads/resized/split2.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "50salads" + use_full: False + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "50salads" + use_full: False + test_mode: "more" + results_path: "./output/BCN/50salads/split2/BcnBgmResized/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/train.split2.bundle" + use_full: False + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split2.bundle" + use_full: False + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split2.bundle" + use_full: False + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.001 + milestones: [100, 200] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmResized_Inference_helper' + num_channels: 2048 + sample_rate: 2 + result_path: './inference/' + mode: 'more' + temporal_dim: 400 + dataset: '50salads' + +output_dir: "./output/BCN/50salads/split2/BcnBgmResized/" +model_name: "BcnBgmResized" +log_interval: 2000 +epochs: 300 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/bgm/50salads/resized/split3.yaml b/configs/segmentation/bcn/bgm/50salads/resized/split3.yaml new file mode 100644 index 000000000..b7353f6ec --- /dev/null +++ b/configs/segmentation/bcn/bgm/50salads/resized/split3.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "50salads" + use_full: False + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "50salads" + use_full: False + test_mode: "more" + results_path: "./output/BCN/50salads/split3/BcnBgmResized/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/train.split3.bundle" + use_full: False + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split3.bundle" + use_full: False + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split3.bundle" + use_full: False + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.001 + milestones: [100, 200] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmResized_Inference_helper' + num_channels: 2048 + sample_rate: 2 + result_path: './inference/' + mode: 'more' + temporal_dim: 400 + dataset: '50salads' + +output_dir: "./output/BCN/50salads/split3/BcnBgmResized/" +model_name: "BcnBgmResized" +log_interval: 2000 +epochs: 300 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/bgm/50salads/resized/split4.yaml b/configs/segmentation/bcn/bgm/50salads/resized/split4.yaml new file mode 100644 index 000000000..00cf83a65 --- /dev/null +++ b/configs/segmentation/bcn/bgm/50salads/resized/split4.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "50salads" + use_full: False + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "50salads" + use_full: False + test_mode: "more" + results_path: "./output/BCN/50salads/split4/BcnBgmResized/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/train.split4.bundle" + use_full: False + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split4.bundle" + use_full: False + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split4.bundle" + use_full: False + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.001 + milestones: [100, 200] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmResized_Inference_helper' + num_channels: 2048 + sample_rate: 2 + result_path: './inference/' + mode: 'more' + temporal_dim: 400 + dataset: '50salads' + +output_dir: "./output/BCN/50salads/split4/BcnBgmResized/" +model_name: "BcnBgmResized" +log_interval: 2000 +epochs: 300 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/bgm/50salads/resized/split5.yaml b/configs/segmentation/bcn/bgm/50salads/resized/split5.yaml new file mode 100644 index 000000000..c7ce9dd8b --- /dev/null +++ b/configs/segmentation/bcn/bgm/50salads/resized/split5.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "50salads" + use_full: False + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "50salads" + use_full: False + test_mode: "more" + results_path: "./output/BCN/50salads/split5/BcnBgmResized/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/train.split5.bundle" + use_full: False + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split5.bundle" + use_full: False + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/50salads/splits/test.split5.bundle" + use_full: False + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.001 + milestones: [100, 200] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmResized_Inference_helper' + num_channels: 2048 + sample_rate: 2 + result_path: './inference/' + mode: 'more' + temporal_dim: 400 + dataset: '50salads' + +output_dir: "./output/BCN/50salads/split5/BcnBgmResized/" +model_name: "BcnBgmResized" +log_interval: 2000 +epochs: 300 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/bgm/breakfast/full/split1.yaml b/configs/segmentation/bcn/bgm/breakfast/full/split1.yaml new file mode 100644 index 000000000..b2c34251a --- /dev/null +++ b/configs/segmentation/bcn/bgm/breakfast/full/split1.yaml @@ -0,0 +1,75 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "breakfast" + use_full: True + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "breakfast" + use_full: True + test_mode: "more" + results_path: "./output/BCN/breakfast/split1/BcnBgmFull/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/train.split1.bundle" + use_full: True + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/test.split1.bundle" + use_full: True + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/test.split1.bundle" + use_full: True + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.0001 + milestones: [32, 65] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmFull_Inference_helper' + num_channels: 2048 + sample_rate: 1 + result_path: './inference/' + +output_dir: "./output/BCN/breakfast/split1/BcnBgmFull/" +model_name: "BcnBgmFull" +log_interval: 2000 +epochs: 100 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/bgm/breakfast/full/split2.yaml b/configs/segmentation/bcn/bgm/breakfast/full/split2.yaml new file mode 100644 index 000000000..cfb1b39bb --- /dev/null +++ b/configs/segmentation/bcn/bgm/breakfast/full/split2.yaml @@ -0,0 +1,75 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "breakfast" + use_full: True + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "breakfast" + use_full: True + test_mode: "more" + results_path: "./output/BCN/breakfast/split2/BcnBgmFull/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/train.split2.bundle" + use_full: True + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/test.split2.bundle" + use_full: True + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/test.split2.bundle" + use_full: True + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.0001 + milestones: [32, 65] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmFull_Inference_helper' + num_channels: 2048 + sample_rate: 1 + result_path: './inference/' + +output_dir: "./output/BCN/breakfast/split2/BcnBgmFull/" +model_name: "BcnBgmFull" +log_interval: 2000 +epochs: 100 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/bgm/breakfast/full/split3.yaml b/configs/segmentation/bcn/bgm/breakfast/full/split3.yaml new file mode 100644 index 000000000..67a23c4e4 --- /dev/null +++ b/configs/segmentation/bcn/bgm/breakfast/full/split3.yaml @@ -0,0 +1,75 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "breakfast" + use_full: True + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "breakfast" + use_full: True + test_mode: "more" + results_path: "./output/BCN/breakfast/split3/BcnBgmFull/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/train.split3.bundle" + use_full: True + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/test.split3.bundle" + use_full: True + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/test.split3.bundle" + use_full: True + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.0001 + milestones: [32, 65] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmFull_Inference_helper' + num_channels: 2048 + sample_rate: 1 + result_path: './inference/' + +output_dir: "./output/BCN/breakfast/split3/BcnBgmFull/" +model_name: "BcnBgmFull" +log_interval: 2000 +epochs: 100 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/bgm/breakfast/full/split4.yaml b/configs/segmentation/bcn/bgm/breakfast/full/split4.yaml new file mode 100644 index 000000000..9a6574fac --- /dev/null +++ b/configs/segmentation/bcn/bgm/breakfast/full/split4.yaml @@ -0,0 +1,75 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "breakfast" + use_full: True + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "breakfast" + use_full: True + test_mode: "more" + results_path: "./output/BCN/breakfast/split4/BcnBgmFull/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/train.split4.bundle" + use_full: True + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/test.split4.bundle" + use_full: True + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/test.split4.bundle" + use_full: True + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.0001 + milestones: [32, 65] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmFull_Inference_helper' + num_channels: 2048 + sample_rate: 1 + result_path: './inference/' + +output_dir: "./output/BCN/breakfast/split4/BcnBgmFull/" +model_name: "BcnBgmFull" +log_interval: 2000 +epochs: 100 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/bgm/breakfast/resized/split1.yaml b/configs/segmentation/bcn/bgm/breakfast/resized/split1.yaml new file mode 100644 index 000000000..ac030d7f7 --- /dev/null +++ b/configs/segmentation/bcn/bgm/breakfast/resized/split1.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "breakfast" + use_full: False + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "breakfast" + use_full: False + test_mode: "more" + results_path: "./output/BCN/breakfast/split1/BcnBgmResized/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/train.split1.bundle" + use_full: False + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/test.split1.bundle" + use_full: False + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/test.split1.bundle" + use_full: False + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.0001 + milestones: [32, 65] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmResized_Inference_helper' + num_channels: 2048 + sample_rate: 1 + result_path: './inference/' + mode: 'more' + temporal_dim: 300 + dataset: 'breakfast' + +output_dir: "./output/BCN/breakfast/split1/BcnBgmResized/" +model_name: "BcnBgmResized" +log_interval: 2000 +epochs: 100 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/bgm/breakfast/resized/split2.yaml b/configs/segmentation/bcn/bgm/breakfast/resized/split2.yaml new file mode 100644 index 000000000..01e03d16e --- /dev/null +++ b/configs/segmentation/bcn/bgm/breakfast/resized/split2.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "breakfast" + use_full: False + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "breakfast" + use_full: False + test_mode: "more" + results_path: "./output/BCN/breakfast/split2/BcnBgmResized/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/train.split2.bundle" + use_full: False + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/test.split2.bundle" + use_full: False + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/test.split2.bundle" + use_full: False + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.0001 + milestones: [32, 65] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmResized_Inference_helper' + num_channels: 2048 + sample_rate: 1 + result_path: './inference/' + mode: 'more' + temporal_dim: 300 + dataset: 'breakfast' + +output_dir: "./output/BCN/breakfast/split2/BcnBgmResized/" +model_name: "BcnBgmResized" +log_interval: 2000 +epochs: 100 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/bgm/breakfast/resized/split3.yaml b/configs/segmentation/bcn/bgm/breakfast/resized/split3.yaml new file mode 100644 index 000000000..8a10f571e --- /dev/null +++ b/configs/segmentation/bcn/bgm/breakfast/resized/split3.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "breakfast" + use_full: False + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "breakfast" + use_full: False + test_mode: "more" + results_path: "./output/BCN/breakfast/split3/BcnBgmResized/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/train.split3.bundle" + use_full: False + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/test.split3.bundle" + use_full: False + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/test.split3.bundle" + use_full: False + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.0001 + milestones: [32, 65] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmResized_Inference_helper' + num_channels: 2048 + sample_rate: 1 + result_path: './inference/' + mode: 'more' + temporal_dim: 300 + dataset: 'breakfast' + +output_dir: "./output/BCN/breakfast/split3/BcnBgmResized/" +model_name: "BcnBgmResized" +log_interval: 2000 +epochs: 100 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/bgm/breakfast/resized/split4.yaml b/configs/segmentation/bcn/bgm/breakfast/resized/split4.yaml new file mode 100644 index 000000000..45392c364 --- /dev/null +++ b/configs/segmentation/bcn/bgm/breakfast/resized/split4.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "breakfast" + use_full: False + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "breakfast" + use_full: False + test_mode: "more" + results_path: "./output/BCN/breakfast/split4/BcnBgmResized/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/train.split4.bundle" + use_full: False + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/test.split4.bundle" + use_full: False + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/breakfast/splits/test.split4.bundle" + use_full: False + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.0001 + milestones: [32, 65] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmResized_Inference_helper' + num_channels: 2048 + sample_rate: 1 + result_path: './inference/' + mode: 'more' + temporal_dim: 300 + dataset: 'breakfast' + +output_dir: "./output/BCN/breakfast/split4/BcnBgmResized/" +model_name: "BcnBgmResized" +log_interval: 2000 +epochs: 100 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/bgm/example.yaml b/configs/segmentation/bcn/bgm/example.yaml new file mode 100644 index 000000000..c245517f7 --- /dev/null +++ b/configs/segmentation/bcn/bgm/example.yaml @@ -0,0 +1,79 @@ +# BCN-bgm parameter +MODEL: #MODEL field + framework: "BcnBgm" #Mandatory, indicate the type of network, associate to the 'paddlevideo/modeling/framework/'. + backbone: + name: "BcnBgm" #Mandatory, indicate the type of backbone, associate to the 'paddlevideo/modeling/backbones/'. + dataset: "50salads" #Optional [50salads, gtea, breakfast], indicate the type of dataset, associate to the 'paddlevidel/data'. + use_full: True #Optional [True, False], use the full model or resized model. + loss: + name: "BcnBgmLoss" #Mandatory, indicate the type of loss, associate to the 'paddlevideo/modeling/losses/'. + head: + name: "BcnBgmHead" #Mandatory, indicate the type of loss, associate to the 'paddlevideo/modeling/heads/'. + dataset: "50salads" #Optional [50salads, gtea, breakfast], indicate the type of dataset, associate to the 'paddlevidel/data'. + use_full: True #Mandatory [True, False], use the full model or resized model. + test_mode: "more" #Optional [more, less], how to calculate the results of the test set. + results_path: "./output/BCN/50salads/split1/BcnBgmResized/results" #Optional, the path to save the segmentation result, like "./output/BCN/[gtea, 50salads, breakfast]/[split1, split2...]/[BcnBgmFull, BcnBgmResized]/results". + +DATASET: #DATASET field + batch_size: 1 #Mandatory, batch size per gpu. + valid_batch_size: 1 #Mandatory, valid batch size per gpu. + test_batch_size: 1 #Mandatory, test batch size per gpu. + num_workers: 4 #Optional, the number of subprocess on each GPU. + train: + format: 'BcnBgmDataset' #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'. + file_path: "./data/50salads/splits/train.split1.bundle" #Optional, choose split + use_full: True #Optional [True, False], use the full model or resized model. + bd_ratio: 0.05 #Mandatory + valid: + format: 'BcnBgmDataset' #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'. + file_path: "./data/50salads/splits/train.split1.bundle" #Optional, choose split + use_full: True #Optional [True, False], use the full model or resized model. + bd_ratio: 0.05 #Mandatory + test: + format: 'BcnBgmDataset' #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'. + file_path: "./data/50salads/splits/train.split1.bundle" #Optional, choose split + use_full: True #Optional [True, False], use the full model or resized model. + bd_ratio: 0.05 #Mandatory + +PIPELINE: #PIPELINE field + train: #Mandotary, indicate the pipeline to deal with the training data, please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "GetBcnBgmTrainLabel" #Mandatory, processing raw label. + + valid: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "GetBcnBgmTrainLabel" #Mandatory, processing raw label. + + test: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "GetBcnBgmTrainLabel" #Mandatory, processing raw label. + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' #Mandatory, the type of optimizer, please to the 'paddlevideo/solver/' + learning_rate: #Optional, the type of learning rate scheduler, please refer to the 'paddlevideo/solver/' + name: 'CustomMultiStepDecay' + learning_rate: 0.001 #Optional, the initial learning rate. + milestones: [100, 200] #Optional, milestones=[int(epochs / 3), int(2 * epochs / 3)] in bcn-bgm model. + gamma: 0.3 #Optional, attenuation coefficient. + weight_decay: #Optional, regularization. + name: 'L2' + value: 0.0001 + +METRIC: + name: 'BcnBgmMetric' #Mandatory, the type of optimizer, please to the 'paddlevideo/metrics' + +INFERENCE: + name: 'BcnBgmResized_Inference_helper' #Optional, the type of inference_helper, please to the 'tools/utils.py' + num_channels: 2048 #Mandatory, the number of in channel + sample_rate: 1 #Optional, sample_rate for feature + result_path: './inference/' #Optional, the path to save + mode: 'more' #Optional [more, less], how to calculate the results of the test set, only use in resized + temporal_dim: 300 #Optional, only use in resized + dataset: "50salads" #Optional [50salads, gtea, breakfast], indicate the type of dataset, associate to the 'paddlevidel/data', only use in resized + +output_dir: "./output/BCN/50salads/split1/BcnBgmResized/" #Optional, like "./output/BCN/[gtea, 50salads, breakfast]/[split1, split2...]/[BcnBgmFull, BcnBgmResized]/results". +model_name: "BcnBgmFull" #Optional, model name like [BcnBgmFull, BcnBgmResized]. +log_interval: 2000 #Optional, the interval of logger. +epochs: 300 #Optional, total epoch +log_level: "DEBUG" #Optional, the logger level. +save_interval: 2000 #Optional, the interval of save. diff --git a/configs/segmentation/bcn/bgm/gtea/full/split1.yaml b/configs/segmentation/bcn/bgm/gtea/full/split1.yaml new file mode 100644 index 000000000..b630248ea --- /dev/null +++ b/configs/segmentation/bcn/bgm/gtea/full/split1.yaml @@ -0,0 +1,76 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "gtea" + use_full: True + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "gtea" + use_full: True + test_mode: "more" + results_path: "./output/BCN/gtea/split1/BcnBgmFull/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/train.split1.bundle" + use_full: True + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/test.split1.bundle" + use_full: True + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/test.split1.bundle" + use_full: True + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.0002 + milestones: [32, 65] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmFull_Inference_helper' + num_channels: 2048 + sample_rate: 1 + result_path: './inference/' + + +output_dir: "./output/BCN/gtea/split1/BcnBgmFull/" +log_interval: 2000 +epochs: 100 +log_level: "DEBUG" +save_interval: 2000 +model_name: 'BcnBgmFull' diff --git a/configs/segmentation/bcn/bgm/gtea/full/split2.yaml b/configs/segmentation/bcn/bgm/gtea/full/split2.yaml new file mode 100644 index 000000000..6bd9a8ea6 --- /dev/null +++ b/configs/segmentation/bcn/bgm/gtea/full/split2.yaml @@ -0,0 +1,75 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "gtea" + use_full: True + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "gtea" + use_full: True + test_mode: "more" + results_path: "./output/BCN/gtea/split2/BcnBgmFull/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/train.split2.bundle" + use_full: True + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/test.split2.bundle" + use_full: True + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/test.split2.bundle" + use_full: True + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.0002 + milestones: [32, 65] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmFull_Inference_helper' + num_channels: 2048 + sample_rate: 1 + result_path: './inference/' + +output_dir: "./output/BCN/gtea/split2/BcnBgmFull/" +log_interval: 2000 +epochs: 100 +log_level: "DEBUG" +save_interval: 2000 +model_name: 'BcnBgmFull' diff --git a/configs/segmentation/bcn/bgm/gtea/full/split3.yaml b/configs/segmentation/bcn/bgm/gtea/full/split3.yaml new file mode 100644 index 000000000..e07511887 --- /dev/null +++ b/configs/segmentation/bcn/bgm/gtea/full/split3.yaml @@ -0,0 +1,75 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "gtea" + use_full: True + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "gtea" + use_full: True + test_mode: "more" + results_path: "./output/BCN/gtea/split3/BcnBgmFull/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/train.split3.bundle" + use_full: True + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/test.split3.bundle" + use_full: True + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/test.split3.bundle" + use_full: True + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.0002 + milestones: [32, 65] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmFull_Inference_helper' + num_channels: 2048 + sample_rate: 1 + result_path: './inference/' + +output_dir: "./output/BCN/gtea/split3/BcnBgmFull/" +log_interval: 2000 +epochs: 100 +log_level: "DEBUG" +save_interval: 2000 +model_name: 'BcnBgmFull' diff --git a/configs/segmentation/bcn/bgm/gtea/full/split4.yaml b/configs/segmentation/bcn/bgm/gtea/full/split4.yaml new file mode 100644 index 000000000..6883dd558 --- /dev/null +++ b/configs/segmentation/bcn/bgm/gtea/full/split4.yaml @@ -0,0 +1,75 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "gtea" + use_full: True + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "gtea" + use_full: True + test_mode: "more" + results_path: "./output/BCN/gtea/split4/BcnBgmFull/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/train.split4.bundle" + use_full: True + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/test.split4.bundle" + use_full: True + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/test.split4.bundle" + use_full: True + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.0002 + milestones: [32, 65] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmFull_Inference_helper' + num_channels: 2048 + sample_rate: 1 + result_path: './inference/' + +output_dir: "./output/BCN/gtea/split4/BcnBgmFull/" +log_interval: 2000 +epochs: 100 +log_level: "DEBUG" +save_interval: 2000 +model_name: 'BcnBgmFull' diff --git a/configs/segmentation/bcn/bgm/gtea/resized/split1.yaml b/configs/segmentation/bcn/bgm/gtea/resized/split1.yaml new file mode 100644 index 000000000..e56b7d28e --- /dev/null +++ b/configs/segmentation/bcn/bgm/gtea/resized/split1.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "gtea" + use_full: False + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "gtea" + use_full: False + test_mode: "more" + results_path: "./output/BCN/gtea/split1/BcnBgmResized/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/train.split1.bundle" + use_full: False + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/test.split1.bundle" + use_full: False + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/test.split1.bundle" + use_full: False + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.0002 + milestones: [32, 65] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmResized_Inference_helper' + num_channels: 2048 + sample_rate: 1 + result_path: './inference/' + mode: 'more' + temporal_dim: 300 + dataset: 'gtea' + +output_dir: "./output/BCN/gtea/split1/BcnBgmResized/" +model_name: "BcnBgmResized" +log_interval: 2000 +epochs: 100 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/bgm/gtea/resized/split2.yaml b/configs/segmentation/bcn/bgm/gtea/resized/split2.yaml new file mode 100644 index 000000000..b6cbaf1fa --- /dev/null +++ b/configs/segmentation/bcn/bgm/gtea/resized/split2.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "gtea" + use_full: False + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "gtea" + use_full: False + test_mode: "more" + results_path: "./output/BCN/gtea/split2/BcnBgmResized/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/train.split2.bundle" + use_full: False + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/test.split2.bundle" + use_full: False + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/test.split2.bundle" + use_full: False + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.0002 + milestones: [32, 65] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmResized_Inference_helper' + num_channels: 2048 + sample_rate: 1 + result_path: './inference/' + mode: 'more' + temporal_dim: 300 + dataset: 'gtea' + +output_dir: "./output/BCN/gtea/split2/BcnBgmResized/" +model_name: "BcnBgmResized" +log_interval: 2000 +epochs: 100 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/bgm/gtea/resized/split3.yaml b/configs/segmentation/bcn/bgm/gtea/resized/split3.yaml new file mode 100644 index 000000000..404806b1d --- /dev/null +++ b/configs/segmentation/bcn/bgm/gtea/resized/split3.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "gtea" + use_full: False + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "gtea" + use_full: False + test_mode: "more" + results_path: "./output/BCN/gtea/split3/BcnBgmResized/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/train.split3.bundle" + use_full: False + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/test.split3.bundle" + use_full: False + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/test.split3.bundle" + use_full: False + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.0002 + milestones: [32, 65] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmResized_Inference_helper' + num_channels: 2048 + sample_rate: 1 + result_path: './inference/' + mode: 'more' + temporal_dim: 300 + dataset: 'gtea' + +output_dir: "./output/BCN/gtea/split3/BcnBgmResized/" +model_name: "BcnBgmResized" +log_interval: 2000 +epochs: 100 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/bgm/gtea/resized/split4.yaml b/configs/segmentation/bcn/bgm/gtea/resized/split4.yaml new file mode 100644 index 000000000..40d5fd8b6 --- /dev/null +++ b/configs/segmentation/bcn/bgm/gtea/resized/split4.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnBgm" + backbone: + name: "BcnBgm" + dataset: "gtea" + use_full: False + loss: + name: "BcnBgmLoss" + head: + name: "BcnBgmHead" + dataset: "gtea" + use_full: False + test_mode: "more" + results_path: "./output/BCN/gtea/split4/BcnBgmResized/results" + +DATASET: #DATASET field + batch_size: 1 + valid_batch_size: 1 + test_batch_size: 1 + num_workers: 4 + train: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/train.split4.bundle" + use_full: False + bd_ratio: 0.05 + valid: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/test.split4.bundle" + use_full: False + bd_ratio: 0.05 + test: + format: 'BcnBgmDataset' + file_path: "./data/gtea/splits/test.split4.bundle" + use_full: False + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: + decode: + name: "GetBcnBgmTrainLabel" + + valid: + decode: + name: "GetBcnBgmTrainLabel" + + test: + decode: + name: "GetBcnBgmTrainLabel" + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + name: 'CustomMultiStepDecay' + learning_rate: 0.0002 + milestones: [32, 65] + gamma: 0.3 + weight_decay: + name: 'L2' + value: 0.0001 + +METRIC: #METRIC field + name: 'BcnBgmMetric' + +INFERENCE: + name: 'BcnBgmResized_Inference_helper' + num_channels: 2048 + sample_rate: 1 + result_path: './inference/' + mode: 'more' + temporal_dim: 300 + dataset: 'gtea' + +output_dir: "./output/BCN/gtea/split4/BcnBgmResized/" +model_name: "BcnBgmResized" +log_interval: 2000 +epochs: 100 +log_level: "DEBUG" +save_interval: 2000 diff --git a/configs/segmentation/bcn/model/50salads/split1.yaml b/configs/segmentation/bcn/model/50salads/split1.yaml new file mode 100644 index 000000000..0d6d769d8 --- /dev/null +++ b/configs/segmentation/bcn/model/50salads/split1.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnModel" + data_path: "./data/50salads/splits/train.split1.bundle" + bgm_result_path: "./output/BCN/50salads/split1/BcnBgmResized/results" + bgm_pdparams: "./output/BCN/50salads/split1/BcnBgmFull/BcnBgmFull_best.pdparams" + use_lbp: True + num_post: 4 + backbone: + name: "BcnModel" #Optional, model'. + num_stages: 4 + num_layers: 12 + num_f_maps: 256 + dim: 2048 + num_classes: 19 + dataset: "50salads" + use_lbp: True + num_soft_lbp: 1 + loss: + name: "BcnModelLoss" + +DATASET: #DATASET field + batch_size: 1 #Mandatory, batch size per gpu. + valid_batch_size: 1 #Optional, valid batch size per gpu. + test_batch_size: 1 #Optional, test batch size per gpu. + num_workers: 2 #Mandatory, the number of subprocess on each GPU. + train: + format: 'BcnModelDataset' + file_path: "./data/50salads/splits/train.split1.bundle" + bd_ratio: 0.05 + valid: + format: 'BcnModelDataset' + file_path: "./data/50salads/splits/test.split1.bundle" + bd_ratio: 0.05 + test: + format: 'BcnModelDataset' + file_path: "./data/50salads/splits/test.split1.bundle" + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: #Mandotary, indicate the pipeline to deal with the training data, please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + valid: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + test: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + - + name: 'CustomMultiStepDecay' + learning_rate: 0.001 + milestones: [30] + gamma: 0.3 + - + name: 'CustomMultiStepDecay' + learning_rate: 0.0001 + milestones: [30] + gamma: 0.3 + +METRIC: + name: 'BcnModelMetric' + overlap: [.1, .25, .5] + actions_map_file_path: "./data/50salads/mapping.txt" + log_path: "./output/BCN/50salads/split1/BcnModel/metric.csv" + dataset: "50salads" + +output_dir: "./output/BCN/50salads/split1/BcnModel/" +model_name: "BcnModel" #Mandatory, model name. +log_interval: 20 #Optional, the interval of logger. +epochs: 50 #Mandatory, total epoch +log_level: "DEBUG" #Optional, the logger level. +save_interval: 2000 diff --git a/configs/segmentation/bcn/model/50salads/split2.yaml b/configs/segmentation/bcn/model/50salads/split2.yaml new file mode 100644 index 000000000..8ec39ce39 --- /dev/null +++ b/configs/segmentation/bcn/model/50salads/split2.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnModel" + data_path: "./data/50salads/splits/train.split2.bundle" + bgm_result_path: "./output/BCN/50salads/split2/BcnBgmResized/results" + bgm_pdparams: "./output/BCN/50salads/split2/BcnBgmFull/BcnBgmFull_best.pdparams" + use_lbp: True + num_post: 4 + backbone: + name: "BcnModel" #Optional, model'. + num_stages: 4 + num_layers: 12 + num_f_maps: 256 + dim: 2048 + num_classes: 19 + dataset: "50salads" + use_lbp: True + num_soft_lbp: 1 + loss: + name: "BcnModelLoss" + +DATASET: #DATASET field + batch_size: 1 #Mandatory, batch size per gpu. + valid_batch_size: 1 #Optional, valid batch size per gpu. + test_batch_size: 1 #Optional, test batch size per gpu. + num_workers: 2 #Mandatory, the number of subprocess on each GPU. + train: + format: 'BcnModelDataset' + file_path: "./data/50salads/splits/train.split2.bundle" + bd_ratio: 0.05 + valid: + format: 'BcnModelDataset' + file_path: "./data/50salads/splits/test.split2.bundle" + bd_ratio: 0.05 + test: + format: 'BcnModelDataset' + file_path: "./data/50salads/splits/test.split2.bundle" + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: #Mandotary, indicate the pipeline to deal with the training data, please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + valid: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + test: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + - + name: 'CustomMultiStepDecay' + learning_rate: 0.001 + milestones: [30] + gamma: 0.3 + - + name: 'CustomMultiStepDecay' + learning_rate: 0.0001 + milestones: [30] + gamma: 0.3 + +METRIC: + name: 'BcnModelMetric' + overlap: [.1, .25, .5] + actions_map_file_path: "./data/50salads/mapping.txt" + log_path: "./output/BCN/50salads/split2/BcnModel/metric.csv" + dataset: "50salads" + +output_dir: "./output/BCN/50salads/split2/BcnModel/" +model_name: "BcnModel" #Mandatory, model name. +log_interval: 20 #Optional, the interval of logger. +epochs: 50 #Mandatory, total epoch +log_level: "DEBUG" #Optional, the logger level. +save_interval: 2000 diff --git a/configs/segmentation/bcn/model/50salads/split3.yaml b/configs/segmentation/bcn/model/50salads/split3.yaml new file mode 100644 index 000000000..0964dccca --- /dev/null +++ b/configs/segmentation/bcn/model/50salads/split3.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnModel" + data_path: "./data/50salads/splits/train.split3.bundle" + bgm_result_path: "./output/BCN/50salads/split3/BcnBgmResized/results" + bgm_pdparams: "./output/BCN/50salads/split3/BcnBgmFull/BcnBgmFull_best.pdparams" + use_lbp: True + num_post: 4 + backbone: + name: "BcnModel" #Optional, model'. + num_stages: 4 + num_layers: 12 + num_f_maps: 256 + dim: 2048 + num_classes: 19 + dataset: "50salads" + use_lbp: True + num_soft_lbp: 1 + loss: + name: "BcnModelLoss" + +DATASET: #DATASET field + batch_size: 1 #Mandatory, batch size per gpu. + valid_batch_size: 1 #Optional, valid batch size per gpu. + test_batch_size: 1 #Optional, test batch size per gpu. + num_workers: 2 #Mandatory, the number of subprocess on each GPU. + train: + format: 'BcnModelDataset' + file_path: "./data/50salads/splits/train.split3.bundle" + bd_ratio: 0.05 + valid: + format: 'BcnModelDataset' + file_path: "./data/50salads/splits/test.split3.bundle" + bd_ratio: 0.05 + test: + format: 'BcnModelDataset' + file_path: "./data/50salads/splits/test.split3.bundle" + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: #Mandotary, indicate the pipeline to deal with the training data, please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + valid: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + test: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + - + name: 'CustomMultiStepDecay' + learning_rate: 0.001 + milestones: [30] + gamma: 0.3 + - + name: 'CustomMultiStepDecay' + learning_rate: 0.0001 + milestones: [30] + gamma: 0.3 + +METRIC: + name: 'BcnModelMetric' + overlap: [.1, .25, .5] + actions_map_file_path: "./data/50salads/mapping.txt" + log_path: "./output/BCN/50salads/split3/BcnModel/metric.csv" + dataset: "50salads" + +output_dir: "./output/BCN/50salads/split3/BcnModel/" +model_name: "BcnModel" #Mandatory, model name. +log_interval: 20 #Optional, the interval of logger. +epochs: 50 #Mandatory, total epoch +log_level: "DEBUG" #Optional, the logger level. +save_interval: 2000 diff --git a/configs/segmentation/bcn/model/50salads/split4.yaml b/configs/segmentation/bcn/model/50salads/split4.yaml new file mode 100644 index 000000000..424195da5 --- /dev/null +++ b/configs/segmentation/bcn/model/50salads/split4.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnModel" + data_path: "./data/50salads/splits/train.split4.bundle" + bgm_result_path: "./output/BCN/50salads/split4/BcnBgmResized/results" + bgm_pdparams: "./output/BCN/50salads/split4/BcnBgmFull/BcnBgmFull_best.pdparams" + use_lbp: True + num_post: 4 + backbone: + name: "BcnModel" #Optional, model'. + num_stages: 4 + num_layers: 12 + num_f_maps: 256 + dim: 2048 + num_classes: 19 + dataset: "50salads" + use_lbp: True + num_soft_lbp: 1 + loss: + name: "BcnModelLoss" + +DATASET: #DATASET field + batch_size: 1 #Mandatory, batch size per gpu. + valid_batch_size: 1 #Optional, valid batch size per gpu. + test_batch_size: 1 #Optional, test batch size per gpu. + num_workers: 2 #Mandatory, the number of subprocess on each GPU. + train: + format: 'BcnModelDataset' + file_path: "./data/50salads/splits/train.split4.bundle" + bd_ratio: 0.05 + valid: + format: 'BcnModelDataset' + file_path: "./data/50salads/splits/test.split4.bundle" + bd_ratio: 0.05 + test: + format: 'BcnModelDataset' + file_path: "./data/50salads/splits/test.split4.bundle" + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: #Mandotary, indicate the pipeline to deal with the training data, please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + valid: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + test: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + - + name: 'CustomMultiStepDecay' + learning_rate: 0.001 + milestones: [30] + gamma: 0.3 + - + name: 'CustomMultiStepDecay' + learning_rate: 0.0001 + milestones: [30] + gamma: 0.3 + +METRIC: + name: 'BcnModelMetric' + overlap: [.1, .25, .5] + actions_map_file_path: "./data/50salads/mapping.txt" + log_path: "./output/BCN/50salads/split4/BcnModel/metric.csv" + dataset: "50salads" + +output_dir: "./output/BCN/50salads/split4/BcnModel/" +model_name: "BcnModel" #Mandatory, model name. +log_interval: 20 #Optional, the interval of logger. +epochs: 50 #Mandatory, total epoch +log_level: "DEBUG" #Optional, the logger level. +save_interval: 2000 diff --git a/configs/segmentation/bcn/model/50salads/split5.yaml b/configs/segmentation/bcn/model/50salads/split5.yaml new file mode 100644 index 000000000..e8fe34d61 --- /dev/null +++ b/configs/segmentation/bcn/model/50salads/split5.yaml @@ -0,0 +1,78 @@ +MODEL: #MODEL field + framework: "BcnModel" + data_path: "./data/50salads/splits/train.split5.bundle" + bgm_result_path: "./output/BCN/50salads/split5/BcnBgmResized/results" + bgm_pdparams: "./output/BCN/50salads/split5/BcnBgmFull/BcnBgmFull_best.pdparams" + use_lbp: True + num_post: 4 + backbone: + name: "BcnModel" #Optional, model'. + num_stages: 4 + num_layers: 12 + num_f_maps: 256 + dim: 2048 + num_classes: 19 + dataset: "50salads" + use_lbp: True + num_soft_lbp: 1 + loss: + name: "BcnModelLoss" + +DATASET: #DATASET field + batch_size: 1 #Mandatory, batch size per gpu. + valid_batch_size: 1 #Optional, valid batch size per gpu. + test_batch_size: 1 #Optional, test batch size per gpu. + num_workers: 2 #Mandatory, the number of subprocess on each GPU. + train: + format: 'BcnModelDataset' + file_path: "./data/50salads/splits/train.split5.bundle" + bd_ratio: 0.05 + valid: + format: 'BcnModelDataset' + file_path: "./data/50salads/splits/test.split5.bundle" + bd_ratio: 0.05 + test: + format: 'BcnModelDataset' + file_path: "./data/50salads/splits/test.split5.bundle" + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: #Mandotary, indicate the pipeline to deal with the training data, please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + valid: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + test: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + - + name: 'CustomMultiStepDecay' + learning_rate: 0.001 + milestones: [30] + gamma: 0.3 + - + name: 'CustomMultiStepDecay' + learning_rate: 0.0001 + milestones: [30] + gamma: 0.3 + +METRIC: + name: 'BcnModelMetric' + overlap: [.1, .25, .5] + actions_map_file_path: "./data/50salads/mapping.txt" + log_path: "./output/BCN/50salads/split5/BcnModel/metric.csv" + dataset: "50salads" + +output_dir: "./output/BCN/50salads/split5/BcnModel/" +model_name: "BcnModel" #Mandatory, model name. +log_interval: 20 #Optional, the interval of logger. +epochs: 50 #Mandatory, total epoch +log_level: "DEBUG" #Optional, the logger level. +save_interval: 2000 diff --git a/configs/segmentation/bcn/model/breakfast/split1.yaml b/configs/segmentation/bcn/model/breakfast/split1.yaml new file mode 100644 index 000000000..a3f4b523c --- /dev/null +++ b/configs/segmentation/bcn/model/breakfast/split1.yaml @@ -0,0 +1,79 @@ +MODEL: #MODEL field + framework: "BcnModel" + data_path: "./data/breakfast/splits/train.split1.bundle" + bgm_result_path: "./output/BCN/breakfast/split1/BcnBgmResized/results" + bgm_pdparams: "./output/BCN/breakfast/split1/BcnBgmFull/BcnBgmFull_best.pdparams" + use_lbp: True + num_post: 4 + backbone: + name: "BcnModel" #Optional, model'. + num_stages: 4 + num_layers: 12 + num_f_maps: 256 + dim: 2048 + num_classes: 48 + dataset: "breakfast" + use_lbp: True + num_soft_lbp: 1 + loss: + name: "BcnModelLoss" + +DATASET: #DATASET field + batch_size: 1 #Mandatory, batch size per gpu. + valid_batch_size: 1 #Optional, valid batch size per gpu. + test_batch_size: 1 #Optional, test batch size per gpu. + num_workers: 2 #Mandatory, the number of subprocess on each GPU. + train: + format: 'BcnModelDataset' + file_path: "./data/breakfast/splits/train.split1.bundle" + bd_ratio: 0.05 + valid: + format: 'BcnModelDataset' + file_path: "./data/breakfast/splits/test.split1.bundle" + bd_ratio: 0.05 + test: + format: 'BcnModelDataset' + file_path: "./data/breakfast/splits/test.split1.bundle" + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: #Mandotary, indicate the pipeline to deal with the training data, please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + valid: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + test: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + - + name: 'CustomMultiStepDecay' + learning_rate: 0.0005 + milestones: [20] + gamma: 0.3 + - + name: 'CustomMultiStepDecay' + learning_rate: 0.00005 + milestones: [20] + gamma: 0.3 + +METRIC: + name: 'BcnModelMetric' + overlap: [.1, .25, .5] + actions_map_file_path: "./data/breakfast/mapping.txt" + log_path: "./output/BCN/breakfast/split1/BcnModel/metric.csv" + dataset: "breakfast" + + +output_dir: "./output/BCN/breakfast/split1/BcnModel/" +model_name: "BcnModel" #Mandatory, model name. +log_interval: 2000 #Optional, the interval of logger. +epochs: 50 #Mandatory, total epoch +log_level: "DEBUG" #Optional, the logger level. +save_interval: 2000 diff --git a/configs/segmentation/bcn/model/breakfast/split2.yaml b/configs/segmentation/bcn/model/breakfast/split2.yaml new file mode 100644 index 000000000..91da76417 --- /dev/null +++ b/configs/segmentation/bcn/model/breakfast/split2.yaml @@ -0,0 +1,79 @@ +MODEL: #MODEL field + framework: "BcnModel" + data_path: "./data/breakfast/splits/train.split2.bundle" + bgm_result_path: "./output/BCN/breakfast/split2/BcnBgmResized/results" + bgm_pdparams: "./output/BCN/breakfast/split2/BcnBgmFull/BcnBgmFull_best.pdparams" + use_lbp: True + num_post: 4 + backbone: + name: "BcnModel" #Optional, model'. + num_stages: 4 + num_layers: 12 + num_f_maps: 256 + dim: 2048 + num_classes: 48 + dataset: "breakfast" + use_lbp: True + num_soft_lbp: 1 + loss: + name: "BcnModelLoss" + +DATASET: #DATASET field + batch_size: 1 #Mandatory, batch size per gpu. + valid_batch_size: 1 #Optional, valid batch size per gpu. + test_batch_size: 1 #Optional, test batch size per gpu. + num_workers: 2 #Mandatory, the number of subprocess on each GPU. + train: + format: 'BcnModelDataset' + file_path: "./data/breakfast/splits/train.split2.bundle" + bd_ratio: 0.05 + valid: + format: 'BcnModelDataset' + file_path: "./data/breakfast/splits/test.split2.bundle" + bd_ratio: 0.05 + test: + format: 'BcnModelDataset' + file_path: "./data/breakfast/splits/test.split2.bundle" + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: #Mandotary, indicate the pipeline to deal with the training data, please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + valid: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + test: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + - + name: 'CustomMultiStepDecay' + learning_rate: 0.0005 + milestones: [20] + gamma: 0.3 + - + name: 'CustomMultiStepDecay' + learning_rate: 0.00005 + milestones: [20] + gamma: 0.3 + +METRIC: + name: 'BcnModelMetric' + overlap: [.1, .25, .5] + actions_map_file_path: "./data/breakfast/mapping.txt" + log_path: "./output/BCN/breakfast/split2/BcnModel/metric.csv" + dataset: "breakfast" + + +output_dir: "./output/BCN/breakfast/split2/BcnModel/" +model_name: "BcnModel" #Mandatory, model name. +log_interval: 2000 #Optional, the interval of logger. +epochs: 50 #Mandatory, total epoch +log_level: "DEBUG" #Optional, the logger level. +save_interval: 2000 diff --git a/configs/segmentation/bcn/model/breakfast/split3.yaml b/configs/segmentation/bcn/model/breakfast/split3.yaml new file mode 100644 index 000000000..216df06df --- /dev/null +++ b/configs/segmentation/bcn/model/breakfast/split3.yaml @@ -0,0 +1,79 @@ +MODEL: #MODEL field + framework: "BcnModel" + data_path: "./data/breakfast/splits/train.split3.bundle" + bgm_result_path: "./output/BCN/breakfast/split3/BcnBgmResized/results" + bgm_pdparams: "./output/BCN/breakfast/split3/BcnBgmFull/BcnBgmFull_best.pdparams" + use_lbp: True + num_post: 4 + backbone: + name: "BcnModel" #Optional, model'. + num_stages: 4 + num_layers: 12 + num_f_maps: 256 + dim: 2048 + num_classes: 48 + dataset: "breakfast" + use_lbp: True + num_soft_lbp: 1 + loss: + name: "BcnModelLoss" + +DATASET: #DATASET field + batch_size: 1 #Mandatory, batch size per gpu. + valid_batch_size: 1 #Optional, valid batch size per gpu. + test_batch_size: 1 #Optional, test batch size per gpu. + num_workers: 2 #Mandatory, the number of subprocess on each GPU. + train: + format: 'BcnModelDataset' + file_path: "./data/breakfast/splits/train.split3.bundle" + bd_ratio: 0.05 + valid: + format: 'BcnModelDataset' + file_path: "./data/breakfast/splits/test.split3.bundle" + bd_ratio: 0.05 + test: + format: 'BcnModelDataset' + file_path: "./data/breakfast/splits/test.split3.bundle" + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: #Mandotary, indicate the pipeline to deal with the training data, please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + valid: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + test: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + - + name: 'CustomMultiStepDecay' + learning_rate: 0.0005 + milestones: [20] + gamma: 0.3 + - + name: 'CustomMultiStepDecay' + learning_rate: 0.00005 + milestones: [20] + gamma: 0.3 + +METRIC: + name: 'BcnModelMetric' + overlap: [.1, .25, .5] + actions_map_file_path: "./data/breakfast/mapping.txt" + log_path: "./output/BCN/breakfast/split3/BcnModel/metric.csv" + dataset: "breakfast" + + +output_dir: "./output/BCN/breakfast/split3/BcnModel/" +model_name: "BcnModel" #Mandatory, model name. +log_interval: 2000 #Optional, the interval of logger. +epochs: 50 #Mandatory, total epoch +log_level: "DEBUG" #Optional, the logger level. +save_interval: 2000 diff --git a/configs/segmentation/bcn/model/breakfast/split4.yaml b/configs/segmentation/bcn/model/breakfast/split4.yaml new file mode 100644 index 000000000..93b1ade9a --- /dev/null +++ b/configs/segmentation/bcn/model/breakfast/split4.yaml @@ -0,0 +1,79 @@ +MODEL: #MODEL field + framework: "BcnModel" + data_path: "./data/breakfast/splits/train.split4.bundle" + bgm_result_path: "./output/BCN/breakfast/split4/BcnBgmResized/results" + bgm_pdparams: "./output/BCN/breakfast/split4/BcnBgmFull/BcnBgmFull_best.pdparams" + use_lbp: True + num_post: 4 + backbone: + name: "BcnModel" #Optional, model'. + num_stages: 4 + num_layers: 12 + num_f_maps: 256 + dim: 2048 + num_classes: 48 + dataset: "breakfast" + use_lbp: True + num_soft_lbp: 1 + loss: + name: "BcnModelLoss" + +DATASET: #DATASET field + batch_size: 1 #Mandatory, batch size per gpu. + valid_batch_size: 1 #Optional, valid batch size per gpu. + test_batch_size: 1 #Optional, test batch size per gpu. + num_workers: 2 #Mandatory, the number of subprocess on each GPU. + train: + format: 'BcnModelDataset' + file_path: "./data/breakfast/splits/train.split4.bundle" + bd_ratio: 0.05 + valid: + format: 'BcnModelDataset' + file_path: "./data/breakfast/splits/test.split4.bundle" + bd_ratio: 0.05 + test: + format: 'BcnModelDataset' + file_path: "./data/breakfast/splits/test.split4.bundle" + bd_ratio: 0.05 + +PIPELINE: #PIPELINE field + train: #Mandotary, indicate the pipeline to deal with the training data, please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + valid: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + test: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + - + name: 'CustomMultiStepDecay' + learning_rate: 0.0005 + milestones: [20] + gamma: 0.3 + - + name: 'CustomMultiStepDecay' + learning_rate: 0.00005 + milestones: [20] + gamma: 0.3 + +METRIC: + name: 'BcnModelMetric' + overlap: [.1, .25, .5] + actions_map_file_path: "./data/breakfast/mapping.txt" + log_path: "./output/BCN/breakfast/split4/BcnModel/metric.csv" + dataset: "breakfast" + + +output_dir: "./output/BCN/breakfast/split4/BcnModel/" +model_name: "BcnModel" #Mandatory, model name. +log_interval: 2000 #Optional, the interval of logger. +epochs: 50 #Mandatory, total epoch +log_level: "DEBUG" #Optional, the logger level. +save_interval: 2000 diff --git a/configs/segmentation/bcn/model/example.yaml b/configs/segmentation/bcn/model/example.yaml new file mode 100644 index 000000000..919f44593 --- /dev/null +++ b/configs/segmentation/bcn/model/example.yaml @@ -0,0 +1,79 @@ +MODEL: #MODEL field + framework: "BcnModel" #Mandatory, indicate the type of network, associate to the 'paddlevideo/modeling/framework/'. + data_path: "./data/50salads/splits/train.split1.bundle" #Mandatory, the path to [.bundle] file of train data + bgm_result_path: "./output/BCN/50salads/split1/BcnBgmResized/results" #Mandatory, the path to results of resized-resolution bgm + bgm_pdparams: "./output/BCN/50salads/split1/BcnBgmFull/BcnBgmFull_best.pdparams" #Mandatory, the path to full-resolution bgm weight + use_lbp: True #Optional [True, False], use the full model or resized model + num_post: 4 #Optional, the layer number of lbp + backbone: + name: "BcnModel" #Mandatory, indicate the type of backbone, associate to the 'paddlevideo/modeling/backbones/'. + num_stages: 4 #Optional, the stage number of model + num_layers: 12 #Optional, the layer number in each stage + num_f_maps: 256 #Optional, the number of hidden channels in each layer + dim: 2048 #Mandatory, the number of channels for input + num_classes: 19 #Optional, the number of categories + dataset: "50salads" #Optional, dataset + use_lbp: True #Optional [True, False], use the full model or resized model + num_soft_lbp: 1 #Optional, the layer number of soft_lbp + loss: + name: "BcnModelLoss" + +DATASET: #DATASET field + batch_size: 1 #Mandatory, batch size per gpu. + valid_batch_size: 1 #Mandatory, valid batch size per gpu. + test_batch_size: 1 #Mandatory, test batch size per gpu. + num_workers: 2 #Optional, the number of subprocess on each GPU. + train: + format: 'BcnModelDataset' #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'. + file_path: "./data/50salads/splits/train.split1.bundle" #Optional, choose split + bd_ratio: 0.05 # Mandatory + valid: + format: 'BcnModelDataset' #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'. + file_path: "./data/50salads/splits/test.split1.bundle" #Optional, choose split + bd_ratio: 0.05 #Mandatory + test: + format: 'BcnModelDataset' #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'. + file_path: "./data/50salads/splits/test.split1.bundle" #Optional, choose split + bd_ratio: 0.05 #Mandatory + +PIPELINE: #PIPELINE field + train: #Mandotary, indicate the pipeline to deal with the training data, please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Mandatory + + valid: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Mandatory + + test: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Mandatory + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' #Mandatory, the type of optimizer, please to the 'paddlevideo/solver/' + learning_rate: #Mandatory, the type of optimizer, please to the 'paddlevideo/solver/' + - + name: 'CustomMultiStepDecay' #Mandatory, the learning rate scheduler for main model + learning_rate: 0.001 + milestones: [30] + gamma: 0.3 + - + name: 'CustomMultiStepDecay' #Mandatory, the learning rate scheduler for bgm model + learning_rate: 0.0001 + milestones: [30] + gamma: 0.3 + +METRIC: + name: 'BcnModelMetric' #Mandatory, the type of optimizer, please to the 'paddlevideo/metrics' + overlap: [.1, .25, .5] #Optional, the iou threshold value when calculating f-score + actions_map_file_path: "./data/50salads/mapping.txt" # Mandatory, the path to actions_map + log_path: "./output/BCN/50salads/split1/BcnModel/metric.csv" #Optional, the path to save metric results + dataset: "50salads" #Optional, choose dataset + + +output_dir: "./output/BCN/50salads/split1/BcnModel/" #Optional, path to save output +model_name: "BcnModel" #Mandatory, model name. +log_interval: 20 #Optional, the interval of logger. +epochs: 50 #Mandatory, total epoch +log_level: "DEBUG" #Optional, the logger level. +save_interval: 2000 #Optional, the interval of save. diff --git a/configs/segmentation/bcn/model/gtea/split1.yaml b/configs/segmentation/bcn/model/gtea/split1.yaml new file mode 100644 index 000000000..3367e919c --- /dev/null +++ b/configs/segmentation/bcn/model/gtea/split1.yaml @@ -0,0 +1,82 @@ +MODEL: #MODEL field + framework: "BcnModel" + data_path: "./data/gtea/splits/train.split1.bundle" + bgm_result_path: "./output/BCN/gtea/split1/BcnBgmResized/results" + bgm_pdparams: "./output/BCN/gtea/split1/BcnBgmFull/BcnBgmFull_best.pdparams" + use_lbp: True #.. + num_post: 4 #.. + backbone: + name: "BcnModel" #Optional, model'. + num_stages: 4 #.. + num_layers: 10 #.. + num_f_maps: 256 #.. + dim: 2048 #.. + num_classes: 11 #.. + dataset: "gtea" #.. + use_lbp: True #.. + num_soft_lbp: 1 #.. + loss: + name: "BcnModelLoss" #.. + +DATASET: #DATASET field + batch_size: 1 #Mandatory, batch size per gpu. + valid_batch_size: 1 #Optional, valid batch size per gpu. + test_batch_size: 1 #Optional, test batch size per gpu. + num_workers: 2 #Mandatory, the number of subprocess on each GPU. + train: + format: 'BcnModelDataset' #.. + file_path: "./data/gtea/splits/train.split1.bundle" #.. + bd_ratio: 0.05 #.. + valid: + format: 'BcnModelDataset' #.. + file_path: "./data/gtea/splits/test.split1.bundle" #.. + bd_ratio: 0.05 #.. + test: + format: 'BcnModelDataset' #.. + file_path: "./data/gtea/splits/test.split1.bundle" #.. + bd_ratio: 0.05 #.. + +PIPELINE: #PIPELINE field + train: #Mandotary, indicate the pipeline to deal with the training data, please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + valid: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + test: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + - + name: 'CustomMultiStepDecay' #.. + learning_rate: 0.0005 #.. + milestones: [20] #.. + gamma: 0.3 #.. + - + name: 'CustomMultiStepDecay' #.. + learning_rate: 0.00005 #.. + milestones: [20] #.. + gamma: 0.3 #.. + +METRIC: + name: 'BcnModelMetric' + overlap: [.1, .25, .5] + actions_map_file_path: "./data/gtea/mapping.txt" + log_path: "./output/BCN/gtea/split1/BcnModel/metric.csv" + dataset: "gtea" + +INFERENCE: + name: 'BcnModel_Inference_helper' + num_channels: 2048 + +output_dir: "./output/BCN/gtea/split1/BcnModel/" +model_name: "BcnModel" #Mandatory, model name. +log_interval: 2000 #Optional, the interval of logger. +epochs: 60 #Mandatory, total epoch +log_level: "DEBUG" #Optional, the logger level. +save_interval: 2000 diff --git a/configs/segmentation/bcn/model/gtea/split2.yaml b/configs/segmentation/bcn/model/gtea/split2.yaml new file mode 100644 index 000000000..72003ff70 --- /dev/null +++ b/configs/segmentation/bcn/model/gtea/split2.yaml @@ -0,0 +1,79 @@ +MODEL: #MODEL field + framework: "BcnModel" + data_path: "./data/gtea/splits/train.split2.bundle" + bgm_result_path: "./output/BCN/gtea/split2/BcnBgmResized/results" + bgm_pdparams: "./output/BCN/gtea/split2/BcnBgmFull/BcnBgmFull_best.pdparams" + use_lbp: True #.. + num_post: 4 #.. + backbone: + name: "BcnModel" #Optional, model'. + num_stages: 4 #.. + num_layers: 10 #.. + num_f_maps: 256 #.. + dim: 2048 #.. + num_classes: 11 #.. + dataset: "gtea" #.. + use_lbp: True #.. + num_soft_lbp: 1 #.. + loss: + name: "BcnModelLoss" #.. + +DATASET: #DATASET field + batch_size: 1 #Mandatory, batch size per gpu. + valid_batch_size: 1 #Optional, valid batch size per gpu. + test_batch_size: 1 #Optional, test batch size per gpu. + num_workers: 2 #Mandatory, the number of subprocess on each GPU. + train: + format: 'BcnModelDataset' #.. + file_path: "./data/gtea/splits/train.split2.bundle" #.. + bd_ratio: 0.05 #.. + valid: + format: 'BcnModelDataset' #.. + file_path: "./data/gtea/splits/test.split2.bundle" #.. + bd_ratio: 0.05 #.. + test: + format: 'BcnModelDataset' #.. + file_path: "./data/gtea/splits/test.split2.bundle" #.. + bd_ratio: 0.05 #.. + +PIPELINE: #PIPELINE field + train: #Mandotary, indicate the pipeline to deal with the training data, please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + valid: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + test: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + - + name: 'CustomMultiStepDecay' #.. + learning_rate: 0.0005 #.. + milestones: [20] #.. + gamma: 0.3 #.. + - + name: 'CustomMultiStepDecay' #.. + learning_rate: 0.00005 #.. + milestones: [20] #.. + gamma: 0.3 #.. + +METRIC: + name: 'BcnModelMetric' + overlap: [.1, .25, .5] + actions_map_file_path: "./data/gtea/mapping.txt" + log_path: "./output/BCN/gtea/split2/BcnModel/metric.csv" + dataset: "gtea" + + +output_dir: "./output/BCN/gtea/split2/BcnModel/" +model_name: "BcnModel" #Mandatory, model name. +log_interval: 2000 #Optional, the interval of logger. +epochs: 60 #Mandatory, total epoch +log_level: "DEBUG" #Optional, the logger level. +save_interval: 2000 diff --git a/configs/segmentation/bcn/model/gtea/split3.yaml b/configs/segmentation/bcn/model/gtea/split3.yaml new file mode 100644 index 000000000..3165e4274 --- /dev/null +++ b/configs/segmentation/bcn/model/gtea/split3.yaml @@ -0,0 +1,79 @@ +MODEL: #MODEL field + framework: "BcnModel" + data_path: "./data/gtea/splits/train.split3.bundle" + bgm_result_path: "./output/BCN/gtea/split3/BcnBgmResized/results" + bgm_pdparams: "./output/BCN/gtea/split3/BcnBgmFull/BcnBgmFull_best.pdparams" + use_lbp: True #.. + num_post: 4 #.. + backbone: + name: "BcnModel" #Optional, model'. + num_stages: 4 #.. + num_layers: 10 #.. + num_f_maps: 256 #.. + dim: 2048 #.. + num_classes: 11 #.. + dataset: "gtea" #.. + use_lbp: True #.. + num_soft_lbp: 1 #.. + loss: + name: "BcnModelLoss" #.. + +DATASET: #DATASET field + batch_size: 1 #Mandatory, batch size per gpu. + valid_batch_size: 1 #Optional, valid batch size per gpu. + test_batch_size: 1 #Optional, test batch size per gpu. + num_workers: 2 #Mandatory, the number of subprocess on each GPU. + train: + format: 'BcnModelDataset' #.. + file_path: "./data/gtea/splits/train.split3.bundle" #.. + bd_ratio: 0.05 #.. + valid: + format: 'BcnModelDataset' #.. + file_path: "./data/gtea/splits/test.split3.bundle" #.. + bd_ratio: 0.05 #.. + test: + format: 'BcnModelDataset' #.. + file_path: "./data/gtea/splits/test.split3.bundle" #.. + bd_ratio: 0.05 #.. + +PIPELINE: #PIPELINE field + train: #Mandotary, indicate the pipeline to deal with the training data, please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + valid: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + test: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + - + name: 'CustomMultiStepDecay' #.. + learning_rate: 0.0005 #.. + milestones: [20] #.. + gamma: 0.3 #.. + - + name: 'CustomMultiStepDecay' #.. + learning_rate: 0.00005 #.. + milestones: [20] #.. + gamma: 0.3 #.. + +METRIC: + name: 'BcnModelMetric' + overlap: [.1, .25, .5] + actions_map_file_path: "./data/gtea/mapping.txt" + log_path: "./output/BCN/gtea/split3/BcnModel/metric.csv" + dataset: "gtea" + + +output_dir: "./output/BCN/gtea/split3/BcnModel/" +model_name: "BcnModel" #Mandatory, model name. +log_interval: 2000 #Optional, the interval of logger. +epochs: 60 #Mandatory, total epoch +log_level: "DEBUG" #Optional, the logger level. +save_interval: 2000 diff --git a/configs/segmentation/bcn/model/gtea/split4.yaml b/configs/segmentation/bcn/model/gtea/split4.yaml new file mode 100644 index 000000000..9f03b9355 --- /dev/null +++ b/configs/segmentation/bcn/model/gtea/split4.yaml @@ -0,0 +1,86 @@ +MODEL: #MODEL field + framework: "BcnModel" + data_path: "./data/gtea/splits/train.split4.bundle" + bgm_result_path: "./output/BCN/gtea/split4/BcnBgmResized/results" + bgm_pdparams: "./output/BCN/gtea/split4/BcnBgmFull/BcnBgmFull_best.pdparams" + use_lbp: True #.. + num_post: 4 #.. + backbone: + name: "BcnModel" #Optional, model'. + num_stages: 4 #.. + num_layers: 10 #.. + num_f_maps: 256 #.. + dim: 2048 #.. + num_classes: 11 #.. + dataset: "gtea" #.. + use_lbp: True #.. + num_soft_lbp: 1 #.. + loss: + name: "BcnModelLoss" #.. + +DATASET: #DATASET field + batch_size: 1 #Mandatory, batch size per gpu. + valid_batch_size: 1 #Optional, valid batch size per gpu. + test_batch_size: 1 #Optional, test batch size per gpu. + num_workers: 2 #Mandatory, the number of subprocess on each GPU. + train: + format: 'BcnModelDataset' #.. + file_path: "./data/gtea/splits/train.split4.bundle" #.. + bd_ratio: 0.05 #.. + valid: + format: 'BcnModelDataset' #.. + file_path: "./data/gtea/splits/test.split4.bundle" #.. + bd_ratio: 0.05 #.. + test: + format: 'BcnModelDataset' #.. + file_path: "./data/gtea/splits/test.split4.bundle" #.. + bd_ratio: 0.05 #.. + +PIPELINE: #PIPELINE field + train: #Mandotary, indicate the pipeline to deal with the training data, please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + valid: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + + test: #Mandatory, indicate the pipeline to deal with the validing data. please refer to the 'paddlevideo/loader/pipelines/' + decode: + name: "BcnModelPipeline" #Decoder type. + +OPTIMIZER: #OPTIMIZER field + name: 'Adam' + learning_rate: + - + name: 'CustomMultiStepDecay' #.. + learning_rate: 0.0005 #.. + milestones: [20] #.. + gamma: 0.3 #.. + - + name: 'CustomMultiStepDecay' #.. + learning_rate: 0.00005 #.. + milestones: [20] #.. + gamma: 0.3 #.. + +METRIC: + name: 'BcnModelMetric' + overlap: [.1, .25, .5] + actions_map_file_path: "./data/gtea/mapping.txt" + log_path: "./output/BCN/gtea/split4/BcnModel/metric.csv" + dataset: "gtea" + +INFERENCE: + name: 'BCN_Inference_helper' + num_channels: 2048 + # actions_map_file_path: "./data/gtea/mapping.txt" + # postprocessing_method: "refinement_with_boundary" + # boundary_threshold: 0.5 + # feature_path: "./data/gtea/features" + +output_dir: "./output/BCN/gtea/split4/BcnModel/" +model_name: "BcnModel" #Mandatory, model name. +log_interval: 2000 #Optional, the interval of logger. +epochs: 60 #Mandatory, total epoch +log_level: "DEBUG" #Optional, the logger level. +save_interval: 2000 diff --git a/docs/en/dataset/SegmentationDataset.md b/docs/en/dataset/SegmentationDataset.md new file mode 100644 index 000000000..46295ddb6 --- /dev/null +++ b/docs/en/dataset/SegmentationDataset.md @@ -0,0 +1,35 @@ +English | [简体中文](../../zh-CN/dataset/SegmentationDataset.md) + +# Video Action Segmentation Dataset + +The video motion segmentation model uses breakfast, 50salads and gtea data sets. The use method is to use the features extracted by the pre training model, which can be obtained from the ms-tcn official code base.[feat](https://zenodo.org/record/3625992#.Xiv9jGhKhPY) + +- Dataset tree +```txt +─── GTEA + ├── features + │ ├── S1_Cheese_C1.npy + │ ├── S1_Coffee_C1.npy + │ ├── S1_CofHoney_C1.npy + │ └── ... + ├── groundTruth + │ ├── S1_Cheese_C1.txt + │ ├── S1_Coffee_C1.txt + │ ├── S1_CofHoney_C1.txt + │ └── ... + ├── splits + │ ├── test.split1.bundle + │ ├── test.split2.bundle + │ ├── test.split3.bundle + │ └── ... + └── mapping.txt +``` + +- data tree +```txt +─── data + ├── 50salads + ├── breakfast + ├── gtea + └── ... +``` diff --git a/docs/en/model_zoo/segmentation/Temporal_action_segmentation.md b/docs/en/model_zoo/segmentation/Temporal_action_segmentation.md new file mode 100644 index 000000000..8027de53f --- /dev/null +++ b/docs/en/model_zoo/segmentation/Temporal_action_segmentation.md @@ -0,0 +1,19 @@ +[English](../../../en/model_zoo/segmentation/Trmporal_action_segmentation.md) | 简体中文 + +本仓库提供经典和热门时序动作分割模型的性能和精度对比 + +| Model | Metrics | Value | Flops(M) |Params(M) | test time(ms) bs=1 | test time(ms) bs=2 | inference time(ms) bs=1 | inference time(ms) bs=2 | +| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | +| MS-TCN | F1@0.5 | 38.8% | 791.360 | 0.8 | 170 | - | 10.68 | - | +| ASRF | F1@0.5 | 55.7% | 1,283.328 | 1.3 | 190 | - | 16.34 | - | +| BCN | F1@0.5 | 56.0% | 14,463.616 | 14.0 | 268 | - | - | - | + +* 模型名称:填写模型的具体名字,比如PP-TSM +* Metrics:填写模型测试时所用的指标,使用的数据集为**breakfast** +* Value:填写Metrics指标对应的数值,一般保留小数点后一位 +* Flops:模型一次前向运算所需的浮点运算量,可以调用PaddleVideo/tools/summary.py脚本计算(不同模型可能需要稍作修改),保留小数点后一位,使用数据**输入形状为(1, 2048, 1000)的张量**测得 +* Params(M):模型参数量,和Flops一起会被脚本计算出来,保留小数点后一位 +* test time(ms) bs=1:python脚本开batchsize=1测试时,一个样本所需的耗时,保留小数点后两位。测试使用的数据集为**breakfast**。 +* test time(ms) bs=2:python脚本开batchsize=2测试时,一个样本所需的耗时,保留小数点后两位。时序动作分割模型一般是全卷积网络,所以训练、测试和推理的batch_size都是1。测试使用的数据集为**breakfast**。 +* inference time(ms) bs=1:推理模型用GPU(默认V100)开batchsize=1测试时,一个样本所需的耗时,保留小数点后两位。推理使用的数据集为**breakfast**。 +* inference time(ms) bs=2:推理模型用GPU(默认V100)开batchsize=1测试时,一个样本所需的耗时,保留小数点后两位。时序动作分割模型一般是全卷积网络,所以训练、测试和推理的batch_size都是1。推理使用的数据集为**breakfast**。 diff --git a/docs/en/model_zoo/segmentation/bcn.md b/docs/en/model_zoo/segmentation/bcn.md new file mode 100644 index 000000000..5e0b80d6a --- /dev/null +++ b/docs/en/model_zoo/segmentation/bcn.md @@ -0,0 +1,160 @@ +[简体中文](../../../zh-CN/model_zoo/segmentation/bcn.md) | English + +# BCN Video action segmentation model + +--- +## Contents + +- [Introduction](#Introduction) +- [Data](#Data) +- [Train&Test](#Train&Test) +- [Inference](#Inference) +- [Reference](#Reference) + +## Introduction + +BCN model is an improvement on the video motion segmentation model ms-tcn, which was published on WACV in 2021. We reproduce the officially implemented pytorch code and obtain approximate results in paddlevideo. + + +

+
+BCN Overview +

+ +## Data + +BCN can choose 50salads, breakfast, GTEA as trianing set. Please refer to Video Action Segmentation dataset download and preparation doc [Video Action Segmentation dataset](../../dataset/SegmentationDataset.md) + +## Train&Test + +BCN is a two-stage model that requires the barrier Generation Module (BGM) to be trained first, and then the BGM and the main model to be trained together. + +All the following **DS** is **breakfast, 50salads or gtea**, and **SP** is the split number **1-5** for 50salads and **1-4** for the other two datasets. +e.g. +```bash +python main.py --validate -c configs/segmentation/bcn/bgm/[DS]/full/split[SP].yaml +``` +Can be replaced with: +```bash +python main.py --validate -c configs/segmentation/bcn/bgm/gtea/full/split1.yaml +``` + +### 1. Train and test the full-resolution barrier generation module + +```bash +export CUDA_VISIBLE_DEVICES=3 +python main.py --validate -c configs/segmentation/bcn/bgm/[DS]/full/split[SP].yaml +python main.py --test -c configs/segmentation/bcn/bgm/[DS]/full/split[SP].yaml \ + -w output/BCN/[DS]/split[SP]/BcnBgmFull/BcnBgmFull_epoch_00001.pdparams +``` + +### 2. Train and test the resized-resolution barrier generation module + +```bash +export CUDA_VISIBLE_DEVICES=3 +python main.py --validate -c configs/segmentation/bcn/bgm/[DS]/resized/split[SP].yaml +python main.py --test -c configs/segmentation/bcn/bgm/[DS]/resized/split[SP].yaml \ + -w output/BCN/[DS]/split[SP]/BcnBgmResized/BcnBgmResized_epoch_00001.pdparams +``` + +### 3. Train and test the BCN + +```bash +export CUDA_VISIBLE_DEVICES=3 +python3.7 main.py --validate -c configs/segmentation/bcn/model/[DS]/split[SP].yaml +python3.7 main.py --test -c configs/segmentation/bcn/model/[DS]/split[SP].yaml \ + -w output/BCN/[DS]/split[SP]/BcnModel/BcnModel_epoch_00001.pdparams +``` + +The results of the measurement are saved in: +``` +output/BCN/[DS]/split[SP]/BcnModel/metric.csv +``` + +### Results in paddle + +- Start the training by using the above command line or script program. There is no need to use the pre training model. The video action segmentation model is usually a full convolution network. Due to the different lengths of videos, the `DATASET.batch_size` of the video action segmentation model is usually set to `1`, that is, batch training is not required. At present, only **single sample** training is supported. + +- The reproduction of pytorch comes from the official [code base](https://github.com/MCG-NJU/BCN) + +Metrics on Breakfast dataset: + +| Model | Acc | Edit | F1@0.1 | F1@0.25 | F1@0.5 | +| :---: | :---: | :---: | :---: | :---: | :---: | +| paper | 70.4% | 66.2% | 68.7% | 65.5% | 55.0% | +| pytorch | 70.9% | 66.7% | 68.5% | 65.9% | 55.8% | +| paddle | 70.8% | 66.4% | 68.9% | 65.9% | 56.0% | + +Metrics on 50salads dataset: + +| Model | Acc | Edit | F1@0.1 | F1@0.25 | F1@0.5 | +| :---: | :---: | :---: | :---: | :---: | :---: | +| paper | 84.4% | 74.3% | 82.1% | 81.3% | 74.0% | +| pytorch | 84.5% | 76.8% | 83.3% | 81.3% | 73.5% | +| paddle | 85.0% | 75.4% | 83.0% | 81.5% | 73.8% | + +Metrics on gtea dataset: + +| Model | Acc | Edit | F1@0.1 | F1@0.25 | F1@0.5 | +| :---: | :---: | :---: | :---: | :---: | :---: | +| paper | 79.8% | 84.4% | 88.5% | 87.1% | 77.3% | +| pytorch | 78.8% | 82.8% | 87.3% | 85.0% | 75.1% | +| paddle | 78.9% | 82.6% | 88.9% | 86.4% | 73.8% | + + +## Inference + +### 1. Export inference full-resolution barrier generation module + +```bash +python3.7 tools/export_model.py \ + -c configs/segmentation/bcn/bgm/[DS]/full/split[SP].yaml \ + --p output/BCN/[DS]/split[SP]/BcnBgmFull/BcnBgmFull_best.pdparams \ + -o ./inference +``` + +### 2. Infer, taking gtea/ Features/s1_cheese_C1.npy as an example + +```bash +python3.7 tools/predict.py --input_file data/gtea/features/S1_Cheese_C1.npy + --config configs/segmentation/bcn/bgm/gtea/full/split1.yaml + --model_file inference/BcnBgmFull.pdmodel + --params_file inference/BcnBgmFull.pdiparams --use_gpu=True + --use_tensorrt=False +``` + +### 3. Export inference resized-resolution barrier generation module + +```bash +python3.7 tools/export_model.py \ + -c configs/segmentation/bcn/bgm/[DS]/resized/split[SP].yaml \ + --p output/BCN/[DS]/split[SP]/BcnBgmResized/BcnBgmResized_best.pdparams \ + -o ./inference +``` + +### 4. Infer, taking gtea/ Features/s1_cheese_C1.npy as an example + +```bash +python3.7 tools/predict.py --input_file data/gtea/features/S1_Cheese_C1.npy + --config configs/segmentation/bcn/bgm/gtea/resized/split1.yaml + --model_file inference/BcnBgmResized.pdmodel + --params_file inference/BcnBgmResized.pdiparams --use_gpu=True + --use_tensorrt=False +``` + +### 5. Export inference bcn model + +None + +Since BCN uses the Unfold function, when generating the static model, the input will contain the variable dimension -1. Unfold is temporarily unable to process the input that contains -1 from Shape. + +

+
+BCN Error +

+ +- Args usage please refer to [Model Inference](https://github.com/PaddlePaddle/PaddleVideo/blob/release/2.0/docs/zh-CN/start.md#2-%E6%A8%A1%E5%9E%8B%E6%8E%A8%E7%90%86). + +## Reference + +- [Boundary-Aware Cascade Networks for Temporal Action Segmentation](https://github.com/MCG-NJU/BCN/blob/master/demo/ECCV20-BCN.pdf), Zhenzhi Wang, Ziteng Gao, Limin Wang, Zhifeng Li, Gangshan Wu diff --git a/docs/images/bcn.png b/docs/images/bcn.png new file mode 100644 index 000000000..ec93b3698 Binary files /dev/null and b/docs/images/bcn.png differ diff --git a/docs/images/bcn_error.png b/docs/images/bcn_error.png new file mode 100644 index 000000000..37d9abc19 Binary files /dev/null and b/docs/images/bcn_error.png differ diff --git a/docs/zh-CN/dataset/SegmentationDataset.md b/docs/zh-CN/dataset/SegmentationDataset.md new file mode 100644 index 000000000..9ba6d2ec0 --- /dev/null +++ b/docs/zh-CN/dataset/SegmentationDataset.md @@ -0,0 +1,35 @@ +简体中文 | [English](../../en/dataset/SegmentationDataset.md) + +# 视频动作分割模型数据使用说明 + +视频动作分割模型使用breakfast、50salads和GTEA数据集,使用方法为使用预训练模型提取的特征,可以从MS-TCN官方代码库中获取。[feat](https://zenodo.org/record/3625992#.Xiv9jGhKhPY) + +- 数据集文件树形式 +```txt +─── GTEA + ├── features + │ ├── S1_Cheese_C1.npy + │ ├── S1_Coffee_C1.npy + │ ├── S1_CofHoney_C1.npy + │ └── ... + ├── groundTruth + │ ├── S1_Cheese_C1.txt + │ ├── S1_Coffee_C1.txt + │ ├── S1_CofHoney_C1.txt + │ └── ... + ├── splits + │ ├── test.split1.bundle + │ ├── test.split2.bundle + │ ├── test.split3.bundle + │ └── ... + └── mapping.txt +``` + +- 数据集存放文件数形式 +```txt +─── data + ├── 50salads + ├── breakfast + ├── gtea + └── ... +``` diff --git a/docs/zh-CN/model_zoo/segmentation/Temporal_action_segmentation.md b/docs/zh-CN/model_zoo/segmentation/Temporal_action_segmentation.md new file mode 100644 index 000000000..8027de53f --- /dev/null +++ b/docs/zh-CN/model_zoo/segmentation/Temporal_action_segmentation.md @@ -0,0 +1,19 @@ +[English](../../../en/model_zoo/segmentation/Trmporal_action_segmentation.md) | 简体中文 + +本仓库提供经典和热门时序动作分割模型的性能和精度对比 + +| Model | Metrics | Value | Flops(M) |Params(M) | test time(ms) bs=1 | test time(ms) bs=2 | inference time(ms) bs=1 | inference time(ms) bs=2 | +| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | +| MS-TCN | F1@0.5 | 38.8% | 791.360 | 0.8 | 170 | - | 10.68 | - | +| ASRF | F1@0.5 | 55.7% | 1,283.328 | 1.3 | 190 | - | 16.34 | - | +| BCN | F1@0.5 | 56.0% | 14,463.616 | 14.0 | 268 | - | - | - | + +* 模型名称:填写模型的具体名字,比如PP-TSM +* Metrics:填写模型测试时所用的指标,使用的数据集为**breakfast** +* Value:填写Metrics指标对应的数值,一般保留小数点后一位 +* Flops:模型一次前向运算所需的浮点运算量,可以调用PaddleVideo/tools/summary.py脚本计算(不同模型可能需要稍作修改),保留小数点后一位,使用数据**输入形状为(1, 2048, 1000)的张量**测得 +* Params(M):模型参数量,和Flops一起会被脚本计算出来,保留小数点后一位 +* test time(ms) bs=1:python脚本开batchsize=1测试时,一个样本所需的耗时,保留小数点后两位。测试使用的数据集为**breakfast**。 +* test time(ms) bs=2:python脚本开batchsize=2测试时,一个样本所需的耗时,保留小数点后两位。时序动作分割模型一般是全卷积网络,所以训练、测试和推理的batch_size都是1。测试使用的数据集为**breakfast**。 +* inference time(ms) bs=1:推理模型用GPU(默认V100)开batchsize=1测试时,一个样本所需的耗时,保留小数点后两位。推理使用的数据集为**breakfast**。 +* inference time(ms) bs=2:推理模型用GPU(默认V100)开batchsize=1测试时,一个样本所需的耗时,保留小数点后两位。时序动作分割模型一般是全卷积网络,所以训练、测试和推理的batch_size都是1。推理使用的数据集为**breakfast**。 diff --git a/docs/zh-CN/model_zoo/segmentation/bcn.md b/docs/zh-CN/model_zoo/segmentation/bcn.md new file mode 100644 index 000000000..65ce79e01 --- /dev/null +++ b/docs/zh-CN/model_zoo/segmentation/bcn.md @@ -0,0 +1,158 @@ +[English](../../../en/model_zoo/segmentation/bcn.md) | 简体中文 + +# BCN 视频动作分割模型 + +--- +## 内容 + +- [模型简介](#模型简介) +- [数据准备](#数据准备) +- [模型训练和测试](#模型训练和测试) +- [模型推理](#模型推理) +- [参考论文](#参考论文) + +## 模型简介 + +BCN模型是在视频动作分割模型MS-TCN上的改进,发表在2020年的ECCV上。我们对官方实现的pytorch代码进行复现,在PaddleVideo获得了近似的结果。 + +

+
+BCN Overview +

+ +## 数据准备 + +BCN的训练数据可以选择50salads、breakfast、GTEA三个数据集,数据下载及准备请参考[视频动作分割数据集](../../dataset/SegmentationDataset.md) + +## 模型训练和测试 + +BCN是两阶段模型,需要先训练barrier generation module(bgm),然后将bgm与主模型共同训练。 +以下所有**DS**应替换为**breakfast/50salads/gtea**, **SP**在50salads为**1-5**,在其他两个数据集中为**1-4**。 +例如, +```bash +python main.py --validate -c configs/segmentation/bcn/bgm/[DS]/full/split[SP].yaml +``` +可以替换为: +```bash +python main.py --validate -c configs/segmentation/bcn/bgm/gtea/full/split1.yaml +``` + +### 1. 训练full-resolution barrier generation module,并测试 + +```bash +export CUDA_VISIBLE_DEVICES=3 +python main.py --validate -c configs/segmentation/bcn/bgm/[DS]/full/split[SP].yaml +python main.py --test -c configs/segmentation/bcn/bgm/[DS]/full/split[SP].yaml \ + -w output/BCN/[DS]/split[SP]/BcnBgmFull/BcnBgmFull_epoch_00001.pdparams +``` + +### 2. 训练resized-resolution barrier generation module,并测试 + +```bash +export CUDA_VISIBLE_DEVICES=3 +python main.py --validate -c configs/segmentation/bcn/bgm/[DS]/resized/split[SP].yaml +python main.py --test -c configs/segmentation/bcn/bgm/[DS]/resized/split[SP].yaml \ + -w output/BCN/[DS]/split[SP]/BcnBgmResized/BcnBgmResized_epoch_00001.pdparams +``` + +### 3. 训练BCN,并测试 + +```bash +export CUDA_VISIBLE_DEVICES=3 +python3.7 main.py --validate -c configs/segmentation/bcn/model/[DS]/split[SP].yaml +python3.7 main.py --test -c configs/segmentation/bcn/model/[DS]/split[SP].yaml \ + -w output/BCN/[DS]/split[SP]/BcnModel/BcnModel_epoch_00001.pdparams +``` + +metric保存在: +``` +output/BCN/[DS]/split[SP]/BcnModel/metric.csv +``` + +### 复现结果 + +- 从头开始训练,使用上述启动命令行或者脚本程序即可启动训练,不需要用到预训练模型,视频动作分割模型通常为全卷积网络,由于视频的长度不一,故视频动作分割模型的scr字段通常设为1,即不需要批量训练,目前也仅支持**单样本**训练 + +- pytorch的复现来源于官方提供的[代码库](https://github.com/MCG-NJU/BCN) + +在Breakfast数据集下评估精度如下: + +| Model | Acc | Edit | F1@0.1 | F1@0.25 | F1@0.5 | +| :---: | :---: | :---: | :---: | :---: | :---: | +| paper | 70.4% | 66.2% | 68.7% | 65.5% | 55.0% | +| pytorch | 70.9% | 66.7% | 68.5% | 65.9% | 55.8% | +| paddle | 70.8% | 66.4% | 68.9% | 65.9% | 56.0% | + +在50salads数据集下评估精度如下: + +| Model | Acc | Edit | F1@0.1 | F1@0.25 | F1@0.5 | +| :---: | :---: | :---: | :---: | :---: | :---: | +| paper | 84.4% | 74.3% | 82.1% | 81.3% | 74.0% | +| pytorch | 84.5% | 76.8% | 83.3% | 81.3% | 73.5% | +| paddle | 85.0% | 75.4% | 83.0% | 81.5% | 73.8% | + +在GTEA数据集下评估精度如下: + +| Model | Acc | Edit | F1@0.1 | F1@0.25 | F1@0.5 | +| :---: | :---: | :---: | :---: | :---: | :---: | +| paper | 79.8% | 84.4% | 88.5% | 87.1% | 77.3% | +| pytorch | 78.8% | 82.8% | 87.3% | 85.0% | 75.1% | +| paddle | 78.9% | 82.6% | 88.9% | 86.4% | 73.8% | + + +## 模型推理 + +### 1. 导出动态full-resolution barrier generation module + +```bash +python3.7 tools/export_model.py \ + -c configs/segmentation/bcn/bgm/[DS]/full/split[SP].yaml \ + --p output/BCN/[DS]/split[SP]/BcnBgmFull/BcnBgmFull_best.pdparams \ + -o ./inference +``` + +### 2. 使用预测引擎推理full-resolution barrier generation module, 以gtea/features/S1_Cheese_C1.npy为例 + +```bash +python3.7 tools/predict.py --input_file data/gtea/features/S1_Cheese_C1.npy + --config configs/segmentation/bcn/bgm/gtea/full/split1.yaml + --model_file inference/BcnBgmFull.pdmodel + --params_file inference/BcnBgmFull.pdiparams --use_gpu=True + --use_tensorrt=False +``` + +### 3. 导出动态resized-resolution barrier generation module + +```bash +python3.7 tools/export_model.py \ + -c configs/segmentation/bcn/bgm/[DS]/resized/split[SP].yaml \ + --p output/BCN/[DS]/split[SP]/BcnBgmResized/BcnBgmResized_best.pdparams \ + -o ./inference +``` + +### 4. 使用预测引擎推理resized-resolution barrier generation module, 以gtea/features/S1_Cheese_C1.npy为例 + +```bash +python3.7 tools/predict.py --input_file data/gtea/features/S1_Cheese_C1.npy + --config configs/segmentation/bcn/bgm/gtea/resized/split1.yaml + --model_file inference/BcnBgmResized.pdmodel + --params_file inference/BcnBgmResized.pdiparams --use_gpu=True + --use_tensorrt=False +``` + +### 5. 导出动态bcn + +暂无 + +由于BCN使用了unfold函数,在生成静态模型时,输入会包含可变维度-1,unfold暂时无法处理shape包含-1的输入。 + +

+
+BCN Error +

+ +- 各参数含义可参考[模型推理方法](https://github.com/PaddlePaddle/PaddleVideo/blob/release/2.0/docs/zh-CN/start.md#2-%E6%A8%A1%E5%9E%8B%E6%8E%A8%E7%90%86) + +## 参考论文 + +- [Boundary-Aware Cascade Networks for Temporal Action Segmentation](https://github.com/MCG-NJU/BCN/blob/master/demo/ECCV20-BCN.pdf), Zhenzhi Wang, Ziteng Gao, Limin Wang, Zhifeng Li, Gangshan Wu diff --git a/paddlevideo/loader/dataset/__init__.py b/paddlevideo/loader/dataset/__init__.py index eba7ac339..d6fc0ab9d 100644 --- a/paddlevideo/loader/dataset/__init__.py +++ b/paddlevideo/loader/dataset/__init__.py @@ -23,6 +23,7 @@ from .skeleton import SkeletonDataset from .slowfast_video import SFVideoDataset from .video import VideoDataset +from .bcn_dataset import BcnBgmDataset, BcnModelDataset from .MRI import MRIDataset from .MRI_SlowFast import SFMRIDataset @@ -30,5 +31,5 @@ 'VideoDataset', 'FrameDataset', 'SFVideoDataset', 'BMNDataset', 'FeatureDataset', 'SkeletonDataset', 'AVADataset', 'MonoDataset', 'MSRVTTDataset', 'ActBertDataset', 'DavisDataset', 'MRIDataset', - 'SFMRIDataset' + 'SFMRIDataset', 'BcnBgmDataset', 'BcnModelDataset' ] diff --git a/paddlevideo/loader/dataset/bcn_dataset.py b/paddlevideo/loader/dataset/bcn_dataset.py new file mode 100644 index 000000000..d45488367 --- /dev/null +++ b/paddlevideo/loader/dataset/bcn_dataset.py @@ -0,0 +1,360 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import json +import paddle +import paddle.nn.functional as F +import numpy as np + +from ..registry import DATASETS +from .base import BaseDataset +from ...utils import get_logger + +logger = get_logger("paddlevideo") + + +@DATASETS.register() +class BcnBgmDataset(BaseDataset): + """Video dataset for BCN bgm model. + """ + + def __init__( + self, + file_path, + pipeline, + # mode, + use_full, + bd_ratio=0.05, + **kwargs, + ): + super().__init__(file_path, pipeline, **kwargs) + + # assert parameter + # assert mode in ['train', 'test'], "mode parameter must be 'train' or 'test'" + assert use_full in [True, + False], "use_full parameter must be True or False" + assert '//' not in file_path, "don't use '//' in file_path, please use '/'" + + # set parameter + self.boundary_ratio = bd_ratio + # self.mode = mode + self.use_full = use_full + + # get other parameter from file_path + file_path_list = file_path.split('/') + root = '/'.join(file_path_list[:-2]) + '/' + + self.dataset = file_path_list[-3] + self.gt_path = root + 'groundTruth/' + self.features_path = root + 'features/' + mapping_file = root + 'mapping.txt' + file_ptr = open(mapping_file, 'r') + actions = file_ptr.read().split('\n')[:-1] + file_ptr.close() + self.actions_dict = dict() + for a in actions: + self.actions_dict[a.split()[1]] = int(a.split()[0]) + # self.num_classes = len(actions_dict) + + # see mapping.txt for details + if self.dataset == '50salads': + self.bg_class = [17, 18] # background + self.resized_temporal_scale = 400 + self.sample_rate = 2 + elif self.dataset == 'gtea': + self.boundary_ratio = 0.1 + self.bg_class = [10] + self.resized_temporal_scale = 300 # 100 in bcn-torch + self.sample_rate = 1 + elif self.dataset == 'breakfast': + self.bg_class = [0] + self.resized_temporal_scale = 300 + self.sample_rate = 1 + + # get all data_path + self.file_path = file_path + file_ptr = open(file_path, 'r') + self.list_of_examples = file_ptr.read().split('\n')[:-1] + file_ptr.close() + + def load_file(self): + """Load index file to get video information.""" + file_ptr = open(self.file_path, 'r') + self.list_of_examples = file_ptr.read().split('\n')[:-1] + file_ptr.close() + return self.list_of_examples + + def prepare_train(self, idx): + """TRAIN & VALID: Prepare data for training/valid given the index.""" + feature_tensor, target_tensor, anchor_xmin, anchor_xmax = self._get_base_data( + idx) + result = dict() + result['feature_tensor'] = feature_tensor + result['target_tensor'] = target_tensor + result['anchor_xmin'] = anchor_xmin + result['anchor_xmax'] = anchor_xmax + result['idx'] = idx + result['pipeline_parameter'] = { + 'use_full': self.use_full, + 'resized_temporal_scale': self.resized_temporal_scale, + 'bg_class': self.bg_class, + 'boundary_ratio': self.boundary_ratio + } + result['video_name'] = self.list_of_examples[idx] + return self.pipeline(result) + + def prepare_test(self, idx): + """TEST: Prepare the data for test given the index.""" + + return self.prepare_train(idx) + + def _get_base_data(self, index): + """Get base data for dataset.""" + features = np.load(self.features_path + + self.list_of_examples[index].split('.')[0] + '.npy') + features = copy.deepcopy(features) + file_ptr = open(self.gt_path + self.list_of_examples[index], 'r') + content = file_ptr.read().split('\n')[:-1] # read ground truth + content = copy.deepcopy(content) + + # initialize and produce gt vector + classes = np.zeros(min(np.shape(features)[1], len(content))) + for i in range(len(classes)): + classes[i] = self.actions_dict[content[i]] + + # sample information by skipping each sample_rate frames + features = features[:, ::self.sample_rate] + feature_tensor = paddle.to_tensor(features, dtype='float32') + temporal_scale = feature_tensor.shape[1] + temporal_gap = 1.0 / temporal_scale + if self.use_full == False: + num_frames = np.shape(features)[1] + feature_tensor = feature_tensor.unsqueeze(0) + if self.dataset == 'breakfast': # for breakfast dataset, there are extremely short videos + factor = 1 + while factor * num_frames < self.resized_temporal_scale: + factor = factor + 1 + feature_tensor = F.interpolate(feature_tensor, + scale_factor=(factor), + mode='linear', + align_corners=False, + data_format='NCW') + feature_tensor = F.interpolate(feature_tensor.unsqueeze(3), + size=(self.resized_temporal_scale, + 1), + mode='nearest').squeeze(3) + feature_tensor = feature_tensor.squeeze(0) + temporal_scale = self.resized_temporal_scale + temporal_gap = 1.0 / temporal_scale + target = classes[::self.sample_rate] + target_tensor = paddle.to_tensor(target, dtype='int64') + anchor_xmin = [temporal_gap * i for i in range(temporal_scale)] + anchor_xmax = [temporal_gap * i for i in range(1, temporal_scale + 1)] + + return feature_tensor, target_tensor, anchor_xmin, anchor_xmax + + def __len__(self): + return len(self.list_of_examples) + + +@DATASETS.register() +class BcnModelDataset(BaseDataset): + """Video dataset for BCN main model. + """ + + def __init__( + self, + file_path, + pipeline, + # mode, + bd_ratio=0.05, + **kwargs, + ): + super().__init__(file_path, pipeline, **kwargs) + + # assert parameter + assert '//' not in file_path, "don't use '//' in file_path, please use '/'" + + # set parameter + self.boundary_ratio = bd_ratio + + # get other parameter from file_path + file_path_list = file_path.split('/') + root = '/'.join(file_path_list[:-2]) + '/' + + self.dataset = file_path_list[-3] + self.gt_path = root + 'groundTruth/' + self.features_path = root + 'features/' + mapping_file = root + 'mapping.txt' + file_ptr = open(mapping_file, 'r') + actions = file_ptr.read().split('\n')[:-1] + file_ptr.close() + self.actions_dict = dict() + for a in actions: + self.actions_dict[a.split()[1]] = int(a.split()[0]) + self.num_classes = len(self.actions_dict) + + # see mapping.txt for details + if self.dataset == '50salads': + self.bg_class = [17, 18] + self.sample_rate = 2 + elif self.dataset == 'gtea': + self.boundary_ratio = 0.1 + self.bg_class = [10] + self.sample_rate = 1 + elif self.dataset == 'breakfast': + self.bg_class = [0] + self.sample_rate = 1 + + # get all data_path + self.index = 0 + self.file_path = file_path + file_ptr = open(file_path, 'r') + self.list_of_examples = file_ptr.read().split('\n')[:-1] + file_ptr.close() + + def load_file(self): + """Load index file to get video information.""" + file_ptr = open(self.file_path, 'r') + self.list_of_examples = file_ptr.read().split('\n')[:-1] + file_ptr.close() + return self.list_of_examples + + def prepare_train(self, idx): + """TRAIN & VALID: Prepare data for training/valid given the index.""" + feature_tensor, target_tensor, mask, anchor_xmin, anchor_xmax = self._get_base_data( + idx) + match_score_start, match_score_end = self._get_train_label( + idx, target_tensor, anchor_xmin, anchor_xmax) + match_score = paddle.concat( + (match_score_start.unsqueeze(0), match_score_end.unsqueeze(0)), 0) + match_score = paddle.max(match_score, 0) #.values() + result = dict() + result['feature_tensor'] = feature_tensor + result['target_tensor'] = target_tensor + result['mask'] = mask + result['match_score'] = match_score + result['video_name'] = self.list_of_examples[idx] + return result + + def prepare_test(self, idx): + """TEST: Prepare the data for test given the index.""" + + return self.prepare_train(idx) + + def __len__(self): + return len(self.list_of_examples) + + def _get_base_data(self, index): + """Get base data for dataset.""" + features = np.load(self.features_path + + self.list_of_examples[index].split('.')[0] + '.npy') + file_ptr = open(self.gt_path + self.list_of_examples[index], 'r') + content = file_ptr.read().split('\n')[:-1] # read ground truth + # initialize and produce gt vector + classes = np.zeros(min(np.shape(features)[1], len(content))) + for i in range(len(classes)): + classes[i] = self.actions_dict[content[i]] + + # sample information by skipping each sample_rate frames + features = features[:, ::self.sample_rate] + target = classes[::self.sample_rate] + + # create pytorch tensor + feature_tensor = paddle.to_tensor(features) + feature_tensor = paddle.cast(feature_tensor, 'float32') + target_tensor = paddle.to_tensor(target) + target_tensor = paddle.cast(target_tensor, 'int64') + mask = paddle.ones([self.num_classes, np.shape(target)[0]]) + mask = paddle.cast(mask, 'float32') + + total_frame = target_tensor.shape[0] + temporal_scale = total_frame + temporal_gap = 1.0 / temporal_scale + anchor_xmin = [temporal_gap * i for i in range(temporal_scale)] + anchor_xmax = [temporal_gap * i for i in range(1, temporal_scale + 1)] + return feature_tensor, target_tensor, mask, anchor_xmin, anchor_xmax + + def _get_train_label(self, index, target_tensor, anchor_xmin, anchor_xmax): + """Process base data to get train label.""" + total_frame = target_tensor.shape[0] + temporal_scale = total_frame + temporal_gap = 1.0 / temporal_scale + gt_label, gt_starts, gt_ends = self._get_labels_start_end_time( + target_tensor, self.bg_class) # original length + gt_label, gt_starts, gt_ends = np.array(gt_label), np.array( + gt_starts), np.array(gt_ends) + gt_starts, gt_ends = gt_starts.astype(np.float64), gt_ends.astype( + np.float64) + gt_starts, gt_ends = gt_starts / total_frame, gt_ends / total_frame # length to 0~1 + + gt_lens = gt_ends - gt_starts + gt_len_small = np.maximum(temporal_gap, self.boundary_ratio * gt_lens) + gt_start_bboxs = np.stack( + (gt_starts - gt_len_small / 2, gt_starts + gt_len_small / 2), + axis=1) + gt_end_bboxs = np.stack( + (gt_ends - gt_len_small / 2, gt_ends + gt_len_small / 2), axis=1) + + match_score_start = [] + for jdx in range(len(anchor_xmin)): + match_score_start.append( + np.max( + self._ioa_with_anchors(anchor_xmin[jdx], anchor_xmax[jdx], + gt_start_bboxs[:, 0], + gt_start_bboxs[:, 1]))) + match_score_end = [] + for jdx in range(len(anchor_xmin)): + match_score_end.append( + np.max( + self._ioa_with_anchors(anchor_xmin[jdx], anchor_xmax[jdx], + gt_end_bboxs[:, 0], + gt_end_bboxs[:, 1]))) + match_score_start = paddle.to_tensor(match_score_start) + match_score_end = paddle.to_tensor(match_score_end) + return match_score_start, match_score_end + + def _ioa_with_anchors(self, anchors_min, anchors_max, box_min, box_max): + """Calculate score""" + len_anchors = anchors_max - anchors_min + int_xmin = np.maximum(anchors_min, box_min) + int_xmax = np.minimum(anchors_max, box_max) + inter_len = np.maximum(int_xmax - int_xmin, 0.0) + scores = np.divide(inter_len, len_anchors) + return scores + + def _get_labels_start_end_time(self, target_tensor, bg_class): + """Get labels clip:[label, start time, end time]""" + labels = [] + starts = [] + ends = [] + target = target_tensor.numpy() + last_label = target[0] + if target[0] not in bg_class: + labels.append(target[0]) + starts.append(0) + + for i in range(np.shape(target)[0]): + if target[i] != last_label: + if target[i] not in bg_class: + labels.append(target[i]) + starts.append(i) + if last_label not in bg_class: + ends.append(i) + last_label = target[i] + + if last_label not in bg_class: + ends.append(np.shape(target)[0] - 1) + return labels, starts, ends diff --git a/paddlevideo/loader/pipelines/__init__.py b/paddlevideo/loader/pipelines/__init__.py index 515595be0..622cea918 100644 --- a/paddlevideo/loader/pipelines/__init__.py +++ b/paddlevideo/loader/pipelines/__init__.py @@ -29,6 +29,7 @@ from .sample_ava import * from .segmentation import MultiNorm, MultiRestrictSize from .skeleton_pipeline import AutoPadding, Iden, SkeletonNorm +from .bcn_pipeline import GetBcnBgmTrainLabel from .decode_sampler_MRI import SFMRI_DecodeSampler __all__ = [ @@ -39,5 +40,6 @@ 'GetVideoLabel', 'Cutmix', 'CenterCrop', 'RandomCrop', 'LoadFeat', 'RandomCap', 'JitterScale', 'Iden', 'VideoMix', 'ColorJitter', 'RandomFlip', 'ToArray', 'FeaturePadding', 'GetMatchMap', 'GroupRandomFlip', 'MultiCrop', - 'SFMRI_DecodeSampler', 'MultiRestrictSize', 'MultiNorm', 'RandomResizedCrop' + 'SFMRI_DecodeSampler', 'MultiRestrictSize', 'MultiNorm', + 'RandomResizedCrop', 'GetBcnBgmTrainLabel' ] diff --git a/paddlevideo/loader/pipelines/bcn_pipeline.py b/paddlevideo/loader/pipelines/bcn_pipeline.py new file mode 100644 index 000000000..cc5b920c0 --- /dev/null +++ b/paddlevideo/loader/pipelines/bcn_pipeline.py @@ -0,0 +1,135 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import paddle +from ..registry import PIPELINES +"""pipeline ops for BCN Net. +""" + + +@PIPELINES.register() +class GetBcnBgmTrainLabel(object): + """Get train label for bcn_bgm_model.""" + + def __init__(self): + pass + + def __call__(self, result): + new_results = dict() + + # get pipeline parameter + pipeline_parameter = result['pipeline_parameter'] + self.use_full = pipeline_parameter['use_full'] + self.resized_temporal_scale = pipeline_parameter[ + 'resized_temporal_scale'] + self.bg_class = pipeline_parameter['bg_class'] + self.boundary_ratio = pipeline_parameter['boundary_ratio'] + + # get train_label + match_score_start, match_score_end = self._get_train_label(result['target_tensor'], \ + result['anchor_xmin'], result['anchor_xmax']) + match_score = paddle.concat( + (match_score_start.unsqueeze(0), match_score_end.unsqueeze(0)), 0) + match_score = paddle.max(match_score, 0) + + # get new_results + new_results['feature_tensor'] = result['feature_tensor'] + new_results['match_score'] = match_score + new_results['video_name'] = result['video_name'] + + return new_results + + def _get_labels_start_end_time(self, target_tensor, bg_class): + labels = [] + starts = [] + ends = [] + target = target_tensor.numpy() + last_label = target[0] + if target[0] not in bg_class: + labels.append(target[0]) + starts.append(0) + + for i in range(np.shape(target)[0]): + if target[i] != last_label: + if target[i] not in bg_class: + labels.append(target[i]) + starts.append(i) + if last_label not in bg_class: + ends.append(i) + last_label = target[i] + + if last_label not in bg_class: + ends.append(np.shape(target)[0] - 1) + return labels, starts, ends + + def _get_train_label(self, target_tensor, anchor_xmin, anchor_xmax): + total_frame = target_tensor.shape[0] + if self.use_full: + temporal_gap = 1.0 / total_frame + else: + temporal_gap = 1.0 / self.resized_temporal_scale + gt_label, gt_starts, gt_ends = self._get_labels_start_end_time( + target_tensor, self.bg_class) # original length + gt_label, gt_starts, gt_ends = np.array(gt_label), np.array( + gt_starts), np.array(gt_ends) + gt_starts, gt_ends = gt_starts.astype(np.float), gt_ends.astype( + np.float) + gt_starts, gt_ends = gt_starts / total_frame, gt_ends / total_frame # length to 0~1 + + gt_lens = gt_ends - gt_starts + gt_len_small = np.maximum(temporal_gap, self.boundary_ratio * gt_lens) + gt_start_bboxs = np.stack( + (gt_starts - gt_len_small / 2, gt_starts + gt_len_small / 2), + axis=1) + gt_end_bboxs = np.stack( + (gt_ends - gt_len_small / 2, gt_ends + gt_len_small / 2), axis=1) + + match_score_start = [] + for jdx in range(len(anchor_xmin)): + match_score_start.append( + np.max( + self._ioa_with_anchors(anchor_xmin[jdx], anchor_xmax[jdx], + gt_start_bboxs[:, 0], + gt_start_bboxs[:, 1]))) + match_score_end = [] + for jdx in range(len(anchor_xmin)): + match_score_end.append( + np.max( + self._ioa_with_anchors(anchor_xmin[jdx], anchor_xmax[jdx], + gt_end_bboxs[:, 0], + gt_end_bboxs[:, 1]))) + match_score_start = paddle.to_tensor(match_score_start) + match_score_end = paddle.to_tensor(match_score_end) + return match_score_start, match_score_end + + def _ioa_with_anchors(self, anchors_min, anchors_max, box_min, box_max): + len_anchors = anchors_max - anchors_min + int_xmin = np.maximum(anchors_min, box_min) + int_xmax = np.minimum(anchors_max, box_max) + inter_len = np.maximum(int_xmax - int_xmin, 0.0) + scores = np.divide(inter_len, len_anchors) + return scores + + +@PIPELINES.register() +class BcnModelPipeline(object): + """BCN main model do not need pipeline.""" + + def __init__(self): + pass + + def __call__(self, result): + return result diff --git a/paddlevideo/metrics/__init__.py b/paddlevideo/metrics/__init__.py index c64ba4e0d..43fc7fb91 100644 --- a/paddlevideo/metrics/__init__.py +++ b/paddlevideo/metrics/__init__.py @@ -22,6 +22,7 @@ from .skeleton_metric import SkeletonMetric from .transnetv2_metric import TransNetV2Metric from .youtube8m.eval_util import HitOneMetric +from .bcn_metric import BcnBgmMetric, BcnModelMetric from .ava_metric import AVAMetric from .vos_metric import VOSMetric from .center_crop_metric_MRI import CenterCropMetric_MRI @@ -29,5 +30,6 @@ __all__ = [ 'METRIC', 'build_metric', 'MultiCropMetric', 'BMNMetric', 'CenterCropMetric', 'SkeletonMetric', 'HitOneMetric', 'TransNetV2Metric', - 'DepthMetric', 'MSRVTTMetric', 'VOSMetric', 'CenterCropMetric_MRI','AVAMetric' + 'DepthMetric', 'MSRVTTMetric', 'VOSMetric', 'CenterCropMetric_MRI', + 'AVAMetric', 'BcnBgmMetric', 'BcnModelMetric' ] diff --git a/paddlevideo/metrics/bcn_metric.py b/paddlevideo/metrics/bcn_metric.py new file mode 100644 index 000000000..38aa5262e --- /dev/null +++ b/paddlevideo/metrics/bcn_metric.py @@ -0,0 +1,386 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +import os +import json +import numpy as np +import pandas as pd +import paddle +import csv + +from .registry import METRIC +from .base import BaseMetric +from paddlevideo.utils import get_logger + +logger = get_logger("paddlevideo") + + +def BGM_cal_P_R(y, BGM_output): + """Calculate precession and recall, only use in BCN_bgm. + """ + precision, recall = cal_P_R(BGM_output, y) + return precision, recall + + +def cal_P_R(anchors, scores, acc_threshold=0.5): + """Calculate precession and recall by anchors and scores. + """ + scores = paddle.reshape(scores, [scores.shape[-1]]) + anchors = paddle.reshape(anchors, [anchors.shape[-1]]) + # output = (anchors > acc_threshold).int().cpu() + output = paddle.cast((anchors > acc_threshold), 'int64').cpu() + # gt=(scores > acc_threshold).int().cpu() + gt = paddle.cast((scores > acc_threshold), 'int64').cpu() + TP = 0.0 + FP = 0.0 + FN = 0.0 + if scores.shape[0] == 0: + return 0.0, 0.0 + for i in range(scores.shape[0]): + if output[i] == 1: + if output[i] == gt[i]: + TP = TP + 1 + else: + FP = FP + 1 + else: + if gt[i] == 1: + FN = FN + 1 + if (TP + FP) == 0: + return 0.0, 0.0 + precision = TP / (TP + FP) + recall = TP / (TP + FN) + return precision, recall + + +@METRIC.register +class BcnBgmMetric(BaseMetric): + """ + Metrics for bgm model of BCN + """ + + def __init__(self, data_size, batch_size, log_interval=1): + """ + Init for BCN metrics. + """ + super().__init__(data_size, batch_size, log_interval) + self.sum_precision = 0. + self.sum_recall = 0. + self.cnt4data = 0 + + def update(self, batch_id, data, outputs): + """update metrics during each iter + """ + batch_precision, batch_recall = BGM_cal_P_R(data['match_score'], + outputs) + self.sum_precision += batch_precision + self.sum_recall += batch_recall + self.cnt4data += 1 + # f1_score = 2 * (batch_precision * batch_recall) / (batch_precision + batch_recall) + # if batch_id % self.log_interval == 0: + # logger.info("Processing................ batch {}, f1 {}".format(batch_id, f1_score)) + + def accumulate(self): + """accumulate metrics when finished all iters. + """ + f1_score = 2 * ((self.sum_precision / self.cnt4data) * (self.sum_recall / self.cnt4data)) / \ + ((self.sum_precision / self.cnt4data) + (self.sum_recall / self.cnt4data)) + logger.info("Processing................ \t acc:{:.4f}\t recall:{:.4f}\t f1:{:.4f}".format(\ + (self.sum_precision / self.cnt4data), (self.sum_recall / self.cnt4data), f1_score)) + # reset + self.sum_precision = 0. + self.sum_recall = 0. + self.cnt4data = 0 + + return f1_score + + +def get_labels_start_end_time(frame_wise_labels, bg_class=["background"]): + """Get each segment of [label, start_time, end_time]. + """ + labels = [] + starts = [] + ends = [] + last_label = frame_wise_labels[0] + if frame_wise_labels[0] not in bg_class: + labels.append(frame_wise_labels[0]) + starts.append(0) + for i in range(len(frame_wise_labels)): + if frame_wise_labels[i] != last_label: + if frame_wise_labels[i] not in bg_class: + labels.append(frame_wise_labels[i]) + starts.append(i) + if last_label not in bg_class: + ends.append(i) + last_label = frame_wise_labels[i] + if last_label not in bg_class: + ends.append(i + 1) + return labels, starts, ends + + +def levenstein(p, y, norm=False): + """Calculate edit score. + """ + m_row = len(p) + n_col = len(y) + D = np.zeros([m_row + 1, n_col + 1], np.float) + for i in range(m_row + 1): + D[i, 0] = i + for i in range(n_col + 1): + D[0, i] = i + + for j in range(1, n_col + 1): + for i in range(1, m_row + 1): + if y[j - 1] == p[i - 1]: + D[i, j] = D[i - 1, j - 1] + else: + D[i, j] = min(D[i - 1, j] + 1, D[i, j - 1] + 1, + D[i - 1, j - 1] + 1) + + if norm: + score = (1 - D[-1, -1] / max(m_row, n_col)) * 100 + else: + score = D[-1, -1] + + return score + + +def edit_score(recognized, ground_truth, norm=True, bg_class=["background"]): + """Get labels and calculate edit score. + """ + P, _, _ = get_labels_start_end_time(recognized, bg_class) + Y, _, _ = get_labels_start_end_time(ground_truth, bg_class) + return levenstein(P, Y, norm) + + +def f_score(recognized, ground_truth, overlap, bg_class=["background"]): + """Calculate f-score. + """ + p_label, p_start, p_end = get_labels_start_end_time(recognized, bg_class) + y_label, y_start, y_end = get_labels_start_end_time(ground_truth, bg_class) + + tp = 0 + fp = 0 + + hits = np.zeros(len(y_label)) + + for j in range(len(p_label)): + intersection = np.minimum(p_end[j], y_end) - np.maximum( + p_start[j], y_start) + union = np.maximum(p_end[j], y_end) - np.minimum(p_start[j], y_start) + IoU = (1.0 * intersection / union) * ( + [p_label[j] == y_label[x] for x in range(len(y_label))]) + # Get the best scoring segment + idx = np.array(IoU).argmax() + + if IoU[idx] >= overlap and not hits[idx]: + tp += 1 + hits[idx] = 1 + else: + fp += 1 + fn = len(y_label) - sum(hits) + return float(tp), float(fp), float(fn) + + +def create_csv(path): + """Create csv file. + """ + dir_path = '/'.join(path.split('/')[:-1]) + if not os.path.exists(dir_path): + os.makedirs(dir_path) + with open(path, "w+", newline='') as file: + csv_file = csv.writer(file) + head = ["Acc", "Edit", "F1@10", "F1@25", "F1@50"] + csv_file.writerow(head) + + +def append_csv(path, metric_list): + """Additional written to csv file. + """ + with open(path, "a+", newline='' + ) as file: # 处理csv读写时不同换行符 linux:\n windows:\r\n mac:\r + csv_file = csv.writer(file) + datas = [metric_list] + csv_file.writerows(datas) + + +@METRIC.register +class BcnModelMetric(BaseMetric): + """ + For Video Segmentation main model. + """ + + def __init__(self, + data_size, + batch_size, + overlap, + actions_map_file_path, + log_path, + dataset, + log_interval=1): + """prepare for metrics + """ + super().__init__(data_size, batch_size, log_interval) + # actions dict generate + file_ptr = open(actions_map_file_path, 'r') + actions = file_ptr.read().split('\n')[:-1] + file_ptr.close() + self.actions_dict = dict() + for a in actions: + self.actions_dict[a.split()[1]] = int(a.split()[0]) + + if os.path.exists(log_path): + os.remove(log_path) + create_csv(log_path) + self.log_path = log_path + + self.overlap = overlap + self.overlap_len = len(overlap) + + bg_class = ["action_start", "action_end"] + if dataset == 'gtea': + bg_class = ['background'] + if dataset == 'breakfast': + bg_class = ['SIL'] + self.bg_class = bg_class + + self.total_tp = np.zeros(self.overlap_len) + self.total_fp = np.zeros(self.overlap_len) + self.total_fn = np.zeros(self.overlap_len) + self.total_correct = 0 + self.total_edit = 0 + self.total_frame = 0 + self.total_video = 0 + + def update(self, batch_id, data, outputs): + """update metrics during each iter + """ + groundTruth = data['target_tensor'] + + outputs_np = outputs.cpu().detach().numpy() + gt_np = groundTruth.cpu().detach().numpy()[0, :] + + recognition = [] + for i in range(outputs_np.shape[0]): + recognition = np.concatenate((recognition, [ + list(self.actions_dict.keys())[list( + self.actions_dict.values()).index(outputs_np[i])] + ])) + recog_content = list(recognition) + + gt_content = [] + for i in range(gt_np.shape[0]): + gt_content = np.concatenate((gt_content, [ + list(self.actions_dict.keys())[list( + self.actions_dict.values()).index(gt_np[i])] + ])) + gt_content = list(gt_content) + + tp, fp, fn = np.zeros(self.overlap_len), np.zeros( + self.overlap_len), np.zeros(self.overlap_len) + + correct = 0 + total = 0 + edit = 0 + + for i in range(len(gt_content)): + total += 1 + #accumulate + self.total_frame += 1 + + if gt_content[i] == recog_content[i]: + correct += 1 + #accumulate + self.total_correct += 1 + + edit_num = edit_score(recog_content, gt_content, bg_class=self.bg_class) + edit += edit_num + self.total_edit += edit_num + + for s in range(self.overlap_len): + tp1, fp1, fn1 = f_score(recog_content, + gt_content, + self.overlap[s], + bg_class=self.bg_class) + tp[s] += tp1 + fp[s] += fp1 + fn[s] += fn1 + + # accumulate + self.total_tp[s] += tp1 + self.total_fp[s] += fp1 + self.total_fn[s] += fn1 + + # accumulate + self.total_video += 1 + + Acc = 100 * float(correct) / total + Edit = (1.0 * edit) / 1.0 + Fscore = dict() + for s in range(self.overlap_len): + precision = tp[s] / float(tp[s] + fp[s]) + recall = tp[s] / float(tp[s] + fn[s]) + + f1 = 2.0 * (precision * recall) / (precision + recall) + + f1 = np.nan_to_num(f1) * 100 + Fscore[self.overlap[s]] = f1 + + # preds ensemble + # if batch_id % self.log_interval == 0: + # logger.info("batch_id:[{:d}] model performence".format(batch_id)) + # logger.info("Acc: {:.4f}".format(Acc)) + # logger.info('Edit: {:.4f}'.format(Edit)) + # for s in range(len(self.overlap)): + # logger.info('F1@{:0.2f}: {:.4f}'.format( + # self.overlap[s], Fscore[self.overlap[s]])) + + def accumulate(self): + """accumulate metrics when finished all iters. + """ + Acc = 100 * float(self.total_correct) / self.total_frame + Edit = (1.0 * self.total_edit) / self.total_video + Fscore = dict() + for s in range(self.overlap_len): + precision = self.total_tp[s] / float(self.total_tp[s] + + self.total_fp[s]) + recall = self.total_tp[s] / float(self.total_tp[s] + + self.total_fn[s]) + + f1 = 2.0 * (precision * recall) / (precision + recall) + + f1 = np.nan_to_num(f1) * 100 + Fscore[self.overlap[s]] = f1 + + # preds ensemble + logger.info("dataset model performence:") + logger.info("Acc: {:.4f}".format(Acc)) + logger.info('Edit: {:.4f}'.format(Edit)) + for s in range(len(self.overlap)): + logger.info('F1@{:0.2f}: {:.4f}'.format(self.overlap[s], + Fscore[self.overlap[s]])) + + # clear for next epoch + self.total_tp = np.zeros(self.overlap_len) + self.total_fp = np.zeros(self.overlap_len) + self.total_fn = np.zeros(self.overlap_len) + self.total_correct = 0 + self.total_edit = 0 + self.total_frame = 0 + self.total_video = 0 + + # log metric + metric_list = [Acc, Edit] + for s in range(self.overlap_len): + metric_list.append(Fscore[self.overlap[s]]) + append_csv(self.log_path, metric_list) + + return [Acc, Edit, Fscore] diff --git a/paddlevideo/modeling/backbones/__init__.py b/paddlevideo/modeling/backbones/__init__.py index ac06cb4fa..165abc36f 100644 --- a/paddlevideo/modeling/backbones/__init__.py +++ b/paddlevideo/modeling/backbones/__init__.py @@ -30,11 +30,12 @@ from .transnetv2 import TransNetV2 from .vit import VisionTransformer from .vit_tweaks import VisionTransformer_tweaks +from .bcn import BcnBgm, BcnModel __all__ = [ 'ResNet', 'ResNetTSM', 'ResNetTweaksTSM', 'ResNetSlowFast', 'BMN', 'ResNetTweaksTSN', 'VisionTransformer', 'STGCN', 'AGCN', 'TransNetV2', 'SwinTransformer3D', 'BertForMultiModalPreTraining', 'ADDS_DepthNet', 'VisionTransformer_tweaks', 'ResNetTSN_MRI', 'ResNetTSM_MRI', - 'ResNetSlowFast_MRI', 'CFBI' + 'ResNetSlowFast_MRI', 'CFBI', 'BcnBgm', 'BcnModel' ] diff --git a/paddlevideo/modeling/backbones/bcn.py b/paddlevideo/modeling/backbones/bcn.py new file mode 100644 index 000000000..bd821428f --- /dev/null +++ b/paddlevideo/modeling/backbones/bcn.py @@ -0,0 +1,527 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import numpy as np +import paddle +import paddle.nn.functional as F +import paddle.nn as nn +import copy +from ..registry import BACKBONES + + +def _calculate_fan_in_and_fan_out(tensor): + dimensions = len(tensor.shape) + if dimensions < 2: + raise ValueError("Fan in and fan out can not be computed \ + for tensor with fewer than 2 dimensions") + + if dimensions == 2: # Linear + fan_in = tensor.shape[1] + fan_out = tensor.shape[0] + else: + num_input_fmaps = tensor.shape[1] + num_output_fmaps = tensor.shape[0] + receptive_field_size = 1 + if tensor.dim() > 2: + receptive_field_size = tensor[0][0].numel() + fan_in = num_input_fmaps * receptive_field_size + fan_out = num_output_fmaps * receptive_field_size + + return fan_in, fan_out + + +def calculate_gain(nonlinearity=None, a=None): + """calculate_gain like torch + """ + if nonlinearity == 'tanh': + return 5.0 / 3 + elif nonlinearity == 'relu': + return math.sqrt(2.0) + elif nonlinearity == 'leaky_relu': + if a is not None: + return math.sqrt(2.0 / (1 + a**2)) + else: + return math.sqrt(2.0 / (1 + 0.01**2)) + elif nonlinearity == 'selu': + return 3.0 / 4 + else: + return 1 + + +def KaimingUniform_like_torch(weight_npy, + mode='fan_in', + nonlinearity='leaky_relu'): + """KaimingUniform_like_torch + """ + fan_in, fan_out = _calculate_fan_in_and_fan_out(weight_npy) + if mode == 'fan_in': + fan_mode = fan_in + else: + fan_mode = fan_out + a = math.sqrt(5.0) + gain = calculate_gain(nonlinearity=nonlinearity, a=a) + std = gain / math.sqrt(fan_mode) + bound = math.sqrt(3.0) * std + return np.random.uniform(-bound, bound, weight_npy.shape) + + +def init_bias(weight_npy, bias_npy): + """init_bias like torhc + """ + fan_in, fan_out = _calculate_fan_in_and_fan_out(weight_npy) + bound = 1.0 / math.sqrt(fan_in) + return np.random.uniform(-bound, bound, bias_npy.shape) + + +class BgmDilatedResidualLayer(nn.Layer): + """mstcn layer + """ + + def __init__(self, dilation, in_channels, out_channels): + super(BgmDilatedResidualLayer, self).__init__() + self.conv_dilated = nn.Conv1D(in_channels, + out_channels, + 3, + padding=dilation, + dilation=dilation) + self.conv_1x1 = nn.Conv1D(out_channels, out_channels, 1) + self.dropout = nn.Dropout() + + def forward(self, x): + """mstcn layer forward + """ + out = F.relu(self.conv_dilated(x)) + out = self.conv_1x1(out) + out = self.dropout(out) + return (x + out) + + +class FullBGM(nn.Layer): + """FullBGM in BCN_bgm + """ + + def __init__(self): + super(FullBGM, self).__init__() + self.feat_dim = 2048 + self.batch_size = 1 + self.c_hidden = 256 + self.bgm_best_loss = 10000000 + self.bgm_best_f1 = -10000000 + self.bgm_best_precision = -10000000 + self.output_dim = 1 + self.num_layers = 3 + self.conv_in = nn.Conv1D(self.feat_dim, self.c_hidden, 1) + self.layers = nn.LayerList( + [copy.deepcopy(BgmDilatedResidualLayer(2 ** (i + 2), self.c_hidden, self.c_hidden)) \ + for i in range(self.num_layers)] + ) + self.conv_out = nn.Conv1D(self.c_hidden, self.output_dim, 1) + + def forward(self, x): + """FullBGM forward + """ + out = self.conv_in(x) + for layer in self.layers: + out = layer(out) + out = self.conv_out(out) + out = F.sigmoid(0.01 * out) + return out + + def init_weights(self): + """init_weights by kaiming uniform + """ + for layer in self.sublayers(): + if isinstance(layer, nn.Conv1D): + layer.weight.set_value( + KaimingUniform_like_torch(layer.weight).astype('float32')) + if layer.bias is not None: + layer.bias.set_value( + init_bias(layer.weight, layer.bias).astype('float32')) + + +class ResizedBGM(nn.Layer): + """ResizedBGM in BCN_bgm + """ + + def __init__(self, dataset): + super(ResizedBGM, self).__init__() + self.feat_dim = 2048 + if dataset == 'breakfast' or dataset == 'gtea': + self.temporal_dim = 300 + elif dataset == '50salads': + self.temporal_dim = 400 + self.batch_size = 40 + self.batch_size_test = 10 + self.c_hidden = 512 + self.bgm_best_loss = 10000000 + self.bgm_best_f1 = -10000000 + self.output_dim = 1 + self.conv1 = nn.Conv1D(in_channels=self.feat_dim, + out_channels=self.c_hidden, + kernel_size=3, + stride=1, + padding=1, + groups=1) + self.conv2 = nn.Conv1D(in_channels=self.c_hidden, + out_channels=self.c_hidden, + kernel_size=3, + stride=1, + padding=1, + groups=1) + self.conv3 = nn.Conv1D(in_channels=self.c_hidden, + out_channels=self.output_dim, + kernel_size=1, + stride=1, + padding=0) + + def forward(self, x): + """ResizedBGM forward + """ + x = F.relu(self.conv1(x)) + x = F.relu(self.conv2(x)) + x = F.sigmoid(self.conv3(x)) + return x + + def init_weights(self): + """init_weights by kaiming uniform + """ + for layer in self.sublayers(): + if isinstance(layer, nn.Conv1D): + layer.weight.set_value( + KaimingUniform_like_torch(layer.weight).astype('float32')) + if layer.bias is not None: + layer.bias.set_value( + init_bias(layer.weight, layer.bias).astype('float32')) + + +@BACKBONES.register() +class BcnBgm(nn.Layer): + """for BCN_bgm + """ + + def __init__(self, dataset, use_full): + super(BcnBgm, self).__init__() + if use_full: + self.bgm = FullBGM() + else: + self.bgm = ResizedBGM(dataset) + + def init_weights(self): + """init_weights by kaiming uniform + """ + for layer in self.sublayers(): + if isinstance(layer, nn.Conv1D): + layer.weight.set_value( + KaimingUniform_like_torch(layer.weight).astype('float32')) + if layer.bias is not None: + layer.bias.set_value( + init_bias(layer.weight, layer.bias).astype('float32')) + + def forward(self, x): + """bgm forward + """ + return self.bgm(x) + + +class SingleStageModel(nn.Layer): + """SingleStageModel in mstcn + """ + + def __init__(self, num_layers, num_f_maps, dim, num_classes): + super(SingleStageModel, self).__init__() + self.conv_1x1 = nn.Conv1D(dim, num_f_maps, 1) + self.layers = nn.LayerList([ + copy.deepcopy(DilatedResidualLayer(2**i, num_f_maps, num_f_maps)) + for i in range(num_layers) + ]) + self.conv_out = nn.Conv1D(num_f_maps, num_classes, 1) + + def forward(self, x, mask): + """forward + """ + feature = self.conv_1x1(x) + for layer in self.layers: + feature = layer(feature, mask) + out = self.conv_out(feature) * mask[:, 0:1, :] + return out, feature * mask[:, 0:1, :] + + +class DilatedResidualLayer(nn.Layer): + """DilatedResidualLayer in mstcn + """ + + def __init__(self, dilation, in_channels, out_channels): + super(DilatedResidualLayer, self).__init__() + self.conv_dilated = nn.Conv1D(in_channels, + out_channels, + 3, + padding=dilation, + dilation=dilation) + self.conv_1x1 = nn.Conv1D(out_channels, out_channels, 1) + self.dropout = nn.Dropout() # default value is 0.5 + self.bn = nn.BatchNorm1D(in_channels, + epsilon=1e-08, + momentum=0.1, + use_global_stats=True) + + def forward(self, x, mask, use_bn=False): + """forward + """ + out = F.relu(self.conv_dilated(x)) + out = self.conv_1x1(out) + if use_bn: + out = self.bn(out) + else: + out = self.dropout(out) + return (x + out) * mask[:, 0:1, :] + + +def MultiplyList(myList): + """multiplyList + """ + result = 1 + for x in myList: + result = result * x + return [result] + + +@BACKBONES.register() +class BcnModel(nn.Layer): + def __init__(self, num_stages, num_layers, num_f_maps, dim, num_classes, dataset, use_lbp, num_soft_lbp, \ + pretrained=None): + super(BcnModel, self).__init__() + self.num_stages = num_stages # number of cascade stages + self.stage1 = SingleStageModel(num_layers, num_f_maps, dim, + num_classes) # cascade stage 1 + stages = [ + copy.deepcopy( + SingleStageModel(num_layers, num_f_maps, + dim + (s + 1) * num_f_maps, num_classes)) + for s in range(num_stages - 1) + ] + self.stages = nn.LayerList(stages) # cascade stage 2,...,n + self.stageF = SingleStageModel(num_layers, 64, num_classes, + num_classes) # fusion stage + self.bgm = FullBGM() + self.lbp_in = LocalBarrierPooling(7, alpha=1) + self.use_lbp = use_lbp + self.num_soft_lbp = num_soft_lbp + self.num_classes = num_classes + if dataset == '50salads': + self.lbp_out = LocalBarrierPooling(99, alpha=0.2) # has lbp_post + if dataset == 'breakfast': + self.lbp_out = LocalBarrierPooling(159, alpha=0.3) # has lbp_post + if dataset == 'gtea': + self.lbp_out = LocalBarrierPooling( + 99, alpha=1 + ) # no lbp_post for gtea (because of bad barrier quality of resized BGM due to small dataset size), so alpha=1 + + def init_weights(self): + """init_weights by kaiming uniform + """ + for layer in self.sublayers(): + if isinstance(layer, nn.Conv1D): + layer.weight.set_value( + KaimingUniform_like_torch(layer.weight).astype('float32')) + if layer.bias is not None: + layer.bias.set_value( + init_bias(layer.weight, layer.bias).astype('float32')) + + def forward(self, x, mask, gt_target=None, soft_threshold=0.8): + """ forward""" + mask.stop_gradient = True + x.stop_gradient = True + adjusted_weight = mask[:, 0:1, :].clone().detach().unsqueeze( + 0) # weights for SC + for i in range(self.num_stages - 1): + adjusted_weight = paddle.concat( + (adjusted_weight, mask[:, + 0:1, :].clone().detach().unsqueeze(0))) + confidence = [] + feature = [] + if gt_target is not None: + gt_target = gt_target.unsqueeze(0) + + # stage 1 + out1, feature1 = self.stage1(x, mask) + outputs = out1.unsqueeze(0) + feature.append(feature1) + confidence.append(F.softmax(out1, axis=1) * mask[:, 0:1, :]) + confidence[0].stop_gradient = True + + if gt_target is None: + max_conf = paddle.max(confidence[0], axis=1) + max_conf = max_conf.unsqueeze(1).clone().detach() + max_conf.stop_gradient = True + decrease_flag = (max_conf > soft_threshold) + decrease_flag = paddle.cast(decrease_flag, 'float32') + increase_flag = mask[:, 0:1, :].clone().detach() - decrease_flag + adjusted_weight[1] = max_conf.neg().exp( + ) * decrease_flag + max_conf.exp() * increase_flag # for stage 2 + else: + one_hot = F.one_hot(gt_target[0], self.num_classes) + gt_conf = ((confidence[0] * + paddle.transpose(one_hot, [0, 2, 1])).sum(1))[0] + gt_conf = paddle.to_tensor(gt_conf).unsqueeze(0).unsqueeze(0) + decrease_flag = (gt_conf > soft_threshold) + decrease_flag = paddle.cast(decrease_flag, 'float32') + increase_flag = mask[:, 0:1, :].clone().detach() - decrease_flag + adjusted_weight[1] = gt_conf.neg().exp( + ) * decrease_flag + gt_conf.exp() * increase_flag + + # stage 2,...,n + curr_stage = 0 + for s in self.stages: + # for s_i in range(self.num_stages - 2): + curr_stage = curr_stage + 1 + temp = feature[0] + for i in range(1, len(feature)): + temp = paddle.concat( + (temp, feature[i]), axis=1) * mask[:, 0:1, :] + temp = paddle.concat((temp, x), axis=1) + curr_out, curr_feature = s(temp, mask) + outputs = paddle.concat((outputs, curr_out.unsqueeze(0)), axis=0) + feature.append(curr_feature) + confidence.append(F.softmax(curr_out, axis=1) * mask[:, 0:1, :]) + confidence[curr_stage].stop_gradient = True + if curr_stage < self.num_stages - 1: # curr_stage starts from 0 + + if gt_target is None: + max_conf = paddle.max(confidence[curr_stage], axis=1) + max_conf = max_conf.unsqueeze(1).clone().detach() + max_conf.stop_gradient = True + decrease_flag = (max_conf > soft_threshold) + decrease_flag = paddle.cast(decrease_flag, 'float32') + increase_flag = mask[:, 0:1, :].clone().detach( + ) - decrease_flag + adjusted_weight[curr_stage + 1] = max_conf.neg().exp( + ) * decrease_flag + max_conf.exp( + ) * increase_flag # output the weight for the next stage + else: + one_hot = F.one_hot(gt_target[0], self.num_classes) + gt_conf = ((confidence[curr_stage] * + paddle.transpose(one_hot, [0, 2, 1])).sum(1))[0] + gt_conf = paddle.to_tensor(gt_conf).unsqueeze(0).unsqueeze( + 0) + decrease_flag = (gt_conf > soft_threshold) + decrease_flag = paddle.cast(decrease_flag, 'float32') + increase_flag = mask[:, 0:1, :].clone().detach( + ) - decrease_flag + adjusted_weight[curr_stage + 1] = gt_conf.neg().exp( + ) * decrease_flag + gt_conf.exp() * increase_flag + + output_weight = adjusted_weight.detach() + output_weight.stop_gradient = True + adjusted_weight = adjusted_weight / paddle.sum( + adjusted_weight, 0) # normalization among stages + temp = F.softmax(out1, axis=1) * adjusted_weight[0] + for i in range(1, self.num_stages): + temp += F.softmax(outputs[i], axis=1) * adjusted_weight[i] + confidenceF = temp * mask[:, 0:1, :] # input of fusion stage + + # Inner LBP for confidenceF + barrier, BGM_output = self.fullBarrier(x) + if self.use_lbp: + confidenceF = self.lbp_in(confidenceF, barrier) + + # fusion stage: for more consistent output because of the combination of cascade stages may have much fluctuations + out, _ = self.stageF(confidenceF, mask) # use mixture of cascade stages + + # Final LBP for output + if self.use_lbp: + for i in range(self.num_soft_lbp): + out = self.lbp_out(out, barrier) + + confidence_last = paddle.clip( + F.softmax(out, axis=1), min=1e-4, max=1 - + 1e-4) * mask[:, 0:1, :] # torch.clamp for training stability + outputs = paddle.concat((outputs, confidence_last.unsqueeze(0)), axis=0) + return outputs, BGM_output, output_weight + + def fullBarrier(self, feature_tensor): + """fullBarrier + """ + BGM_output = self.bgm(feature_tensor) + barrier = BGM_output + return barrier, BGM_output + + +def im2col(input_data, kh, kw, stride=1, pad=0, dilation=1): + """ + calculate im2col + """ + N, C, H, W = input_data.shape + dh, dw = dilation * (kh - 1) + 1, dilation * (kw - 1) + 1 + h_out = (H + 2 * pad - dh) // stride + 1 + w_out = (W + 2 * pad - dw) // stride + 1 + img = F.pad(input_data, [pad, pad, pad, pad], "constant", value=0) + col = paddle.zeros((N, C, dh, dw, h_out, w_out)) + + for y in range(dh): + y_max = y + stride * h_out + for x in range(dw): + x_max = x + stride * w_out + col[:, :, y, x, :, :] += img[:, :, y:y_max:stride, x:x_max:stride] + res = col.reshape([N, C * dh * dw, h_out * w_out]) + return res + + +def unfold_1d(x, kernel_size=7, pad_value=0): + """unfold_1d + """ + B, C, T = x.shape + padding = kernel_size // 2 + x = x.unsqueeze(-1) + x = F.pad(x, (0, 0, padding, padding), value=pad_value) + x = paddle.cast(x, 'float32') + D = F.unfold(x, [kernel_size, 1]) + # D = im2col(x, kernel_size, 1) + return paddle.reshape(D, [B, C, kernel_size, T]) + + +def dual_barrier_weight(b, kernel_size=7, alpha=0.2): + """dual_barrier_weight + """ + K = kernel_size + b = unfold_1d(b, kernel_size=K, pad_value=20) + # b: (B, 1, K, T) + HL = K // 2 + left = paddle.flip( + paddle.cumsum(paddle.flip(b[:, :, :HL + 1, :], [2]), axis=2), + [2])[:, :, :-1, :] + right = paddle.cumsum(b[:, :, -HL - 1:, :], axis=2)[:, :, 1:, :] + middle = paddle.zeros_like(b[:, :, 0:1, :]) + #middle = b[:, :, HL:-HL, :] + weight = alpha * paddle.concat((left, middle, right), axis=2) + return weight.neg().exp() + + +class LocalBarrierPooling(nn.Layer): + """LBP in BCN paper + """ + + def __init__(self, kernel_size=99, alpha=0.2): + super(LocalBarrierPooling, self).__init__() + self.kernel_size = kernel_size + self.alpha = alpha + + def forward(self, x, barrier): + """ + x: (B, C, T) + barrier: (B, 1, T) (>=0) + """ + xs = unfold_1d(x, self.kernel_size) + w = dual_barrier_weight(barrier, self.kernel_size, self.alpha) + return (xs * w).sum(axis=2) / ((w).sum(axis=2) + np.exp(-10)) diff --git a/paddlevideo/modeling/builder.py b/paddlevideo/modeling/builder.py index be8a6caf2..40ba181d4 100644 --- a/paddlevideo/modeling/builder.py +++ b/paddlevideo/modeling/builder.py @@ -16,7 +16,8 @@ from ..utils import build from .registry import (BACKBONES, BBOX_ASSIGNERS, BBOX_CODERS, BBOX_SAMPLERS, DETECTORS, ESTIMATORS, HEADS, LOCALIZERS, LOSSES, - MULTIMODAL, PARTITIONERS, RECOGNIZERS, ROI_EXTRACTORS) + MULTIMODAL, PARTITIONERS, RECOGNIZERS, ROI_EXTRACTORS, + SEGMENTERS) def build_backbone(cfg): @@ -94,6 +95,11 @@ def build_multimodal(cfg): return build(cfg, MULTIMODAL, key='framework') +def build_segmenter(cfg): + """Build segmenter.""" + return build(cfg, SEGMENTERS, key='framework') + + def build_segment(cfg): """Build segment.""" return build(cfg, SEGMENT, key='framework') @@ -114,6 +120,8 @@ def build_model(cfg): return build_estimator(cfg) elif framework_type in MULTIMODAL: return build_multimodal(cfg) + elif framework_type in SEGMENTERS: + return build_segmenter(cfg) elif framework_type in SEGMENT: return build_segment(cfg) else: diff --git a/paddlevideo/modeling/framework/__init__.py b/paddlevideo/modeling/framework/__init__.py index c0e5f4673..e3fa88539 100644 --- a/paddlevideo/modeling/framework/__init__.py +++ b/paddlevideo/modeling/framework/__init__.py @@ -17,10 +17,12 @@ from .partitioners import BasePartitioner, TransNetV2Partitioner from .recognizers import BaseRecognizer, Recognizer2D from .multimodal import ActBert, BaseMultimodal +from .segmenters import BcnBgm, BcnModel from .segment import BaseSegment, CFBI __all__ = [ 'BaseRecognizer', 'Recognizer2D', 'BaseLocalizer', 'BMNLocalizer', 'BasePartitioner', 'TransNetV2Partitioner', 'BaseEstimator', - 'DepthEstimator', 'BaseMultimodal', 'ActBert', 'BaseSegment', 'CFBI' + 'DepthEstimator', 'BaseMultimodal', 'ActBert', 'BaseSegment', 'CFBI', + 'BcnBgm', 'BcnModel' ] diff --git a/paddlevideo/modeling/framework/segmenters/__init__.py b/paddlevideo/modeling/framework/segmenters/__init__.py new file mode 100644 index 000000000..19fa726e1 --- /dev/null +++ b/paddlevideo/modeling/framework/segmenters/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +from .base import BaseSegmenter +from .bcn import BcnBgm, BcnModel + +__all__ = ["BaseSegmenter", "BcnBgm", "BcnModel"] diff --git a/paddlevideo/modeling/framework/segmenters/base.py b/paddlevideo/modeling/framework/segmenters/base.py new file mode 100644 index 000000000..f4670a2b1 --- /dev/null +++ b/paddlevideo/modeling/framework/segmenters/base.py @@ -0,0 +1,97 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +from abc import abstractmethod +from ... import builder +import paddle.nn as nn + + +class BaseSegmenter(nn.Layer): + """Base class for segementers. + + All segementers should subclass it. + All subclass should overwrite: + + - Methods:``train_step``, supporting to forward when training. + - Methods:``valid_step``, supporting to forward when validating. + - Methods:``test_step``, supporting to forward when testing. + + Args: + backbone (dict): Backbone modules to extract feature. + head (dict): Classification head to process feature. + loss(dict): Loss function. + + """ + + def __init__(self, backbone=None, head=None, loss=None): + + super().__init__() + if backbone is not None: + self.backbone = builder.build_backbone(backbone) + if hasattr(self.backbone, 'init_weights'): + self.backbone.init_weights() + else: + self.backbone = None + + if head is not None: + self.head_name = head.name + self.head = builder.build_head(head) + if hasattr(self.head, 'init_weights'): + self.head.init_weights() + else: + self.head = None + + if loss is not None: + self.loss = builder.build_loss(loss) + else: + self.loss = None + + def forward(self, data_batch, mode='infer'): + """ + 1. Define how the model is going to run, from input to output. + 2. Console of train, valid, test or infer step + 3. Set mode='infer' is used for saving inference model, refer to tools/export_model.py + """ + if mode == 'train': + return self.train_step(data_batch) + elif mode == 'valid': + return self.val_step(data_batch) + elif mode == 'test': + return self.test_step(data_batch) + elif mode == 'infer': + return self.infer_step(data_batch) + else: + raise NotImplementedError + + @abstractmethod + def train_step(self, data_batch, **kwargs): + """Training step. + """ + raise NotImplementedError + + @abstractmethod + def val_step(self, data_batch, **kwargs): + """Validating step. + """ + raise NotImplementedError + + @abstractmethod + def test_step(self, data_batch, **kwargs): + """Test step. + """ + raise NotImplementedError + + @abstractmethod + def infer_step(self, data_batch, **kwargs): + """Infer step. + """ + raise NotImplementedError diff --git a/paddlevideo/modeling/framework/segmenters/bcn.py b/paddlevideo/modeling/framework/segmenters/bcn.py new file mode 100644 index 000000000..749aa64d9 --- /dev/null +++ b/paddlevideo/modeling/framework/segmenters/bcn.py @@ -0,0 +1,290 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +from ...registry import SEGMENTERS +from .base import BaseSegmenter +from paddlevideo.utils import load + +import paddle +import paddle.nn.functional as F +import paddle.nn as nn +import pandas as pd +import numpy as np + + +def unfold_1d(x, kernel_size=7, pad_value=0): + """unfold_1d + """ + B, C, T = x.shape + padding = kernel_size // 2 + x = x.unsqueeze(-1) + x = F.pad(x, (0, 0, padding, padding), value=pad_value) + x = paddle.cast(x, 'float32') + D = F.unfold(x, [kernel_size, 1]) + return paddle.reshape(D, [B, C, kernel_size, T]) + + +def dual_barrier_weight(b, kernel_size=7, alpha=0.2): + """dual_barrier_weight + """ + K = kernel_size + b = unfold_1d(b, kernel_size=K, pad_value=20) + # b: (B, 1, K, T) + HL = K // 2 + left = paddle.flip( + paddle.cumsum(paddle.flip(b[:, :, :HL + 1, :], [2]), axis=2), + [2])[:, :, :-1, :] + right = paddle.cumsum(b[:, :, -HL - 1:, :], axis=2)[:, :, 1:, :] + middle = paddle.zeros_like(b[:, :, 0:1, :]) + #middle = b[:, :, HL:-HL, :] + weight = alpha * paddle.concat((left, middle, right), axis=2) + return weight.neg().exp() + + +class LocalBarrierPooling(nn.Layer): + """LocalBarrierPooling + """ + + def __init__(self, kernel_size=99, alpha=0.2): + super(LocalBarrierPooling, self).__init__() + self.kernel_size = kernel_size + self.alpha = alpha + + def forward(self, x, barrier): + """ + x: (B, C, T) + barrier: (B, 1, T) (>=0) + """ + xs = unfold_1d(x, self.kernel_size) + w = dual_barrier_weight(barrier, self.kernel_size, self.alpha) + return (xs * w).sum(axis=2) / ((w).sum(axis=2) + np.exp(-10)) + + +@SEGMENTERS.register() +class BcnBgm(BaseSegmenter): + """BCN model framework.""" + + def forward_net(self, video_feature): + """Define how the model is going to train, from input to output. + """ + feature = self.backbone(video_feature) + return feature + + def train_step(self, data_batch): + """Training step. + """ + feature, label = data_batch['feature_tensor'], data_batch['match_score'] + self.backbone.train() + outputs = self.forward_net(feature) + train_loss = self.loss(label, outputs) + loss_metrics = {} + loss_metrics['loss'] = train_loss + + return loss_metrics + + def val_step(self, data_batch): + """Validating setp. + """ + if isinstance(data_batch, dict): + feature = data_batch['feature_tensor'] + elif isinstance(data_batch, list): + feature = data_batch[0] + self.backbone.eval() + outputs = self.forward_net(feature) + + return outputs + + def test_step(self, data_batch): + """Testing setp. + """ + outputs = self.val_step(data_batch) + self.head(outputs, data_batch['video_name'][0].split('.')[0]) + + return outputs + + def infer_step(self, data_batch): + """Infering setp. + """ + outputs = self.val_step(data_batch) + + return outputs + + +@SEGMENTERS.register() +class BcnModel(BaseSegmenter): + """BCN model framework. + e.g. + data_path = ./data/50salads/splits/train.split1.bundle + bgm_result_path = ./output/BcnBgmResized/results + """ + + def __init__(self, data_path, bgm_result_path, bgm_pdparams, use_lbp, + num_post, **kwargs): + super(BcnModel, self).__init__(**kwargs) + # assert parameter + assert '//' not in data_path, "don't use '//' in data_path, please use '/'" + self.use_lbp = use_lbp + self.bgm_result_path = bgm_result_path + self.num_post = num_post + + self.iter = 0 + self.epoch = 0 + + file_ptr = open(data_path, 'r') + list_of_examples = file_ptr.read().split('\n')[:-1] + file_ptr.close() + self.epoch_iters = len(list_of_examples) + + dataset = data_path.split('/')[-3] + freeze_epochs = 15 + pooling_length = 99 + if dataset == 'breakfast': + freeze_epochs = 20 + pooling_length = 159 + elif dataset == 'gtea': + freeze_epochs = 18 + self.freeze_epochs = freeze_epochs + self.pooling_length = pooling_length + self.dataset = dataset + self.lbp = LocalBarrierPooling(pooling_length, alpha=2) + self.backbone.bgm.set_state_dict( + self.transformer_param_dict(load(bgm_pdparams))) + + def transformer_param_dict(self, param_dict): + """transformer param_dict for bgm + """ + new_param_dict = dict() + for k in param_dict.keys(): + new_param_dict['.'.join( + k.split('.')[2:])] = param_dict[k].cpu().detach().numpy() + return new_param_dict + + def update_iter(self): + """update_iter only use in train + """ + if (self.epoch == 0) and (self.iter + == 0) and (self.epoch <= self.freeze_epochs): + self.freeze(self.backbone.bgm, True) + + self.iter += 1 + if self.iter >= self.epoch_iters: + self.iter = 0 + self.epoch += 1 + if self.epoch > self.freeze_epochs: + self.freeze(self.backbone.bgm, False) + + def freeze(self, sub_layer, flag): + """freezing layer + """ + for _, param in sub_layer.named_parameters(): + param.stop_gradient = flag + + def forward_net(self, batch_input, mask, gt_target=None): + """Define how the model is going to train, from input to output. + """ + outputs, BGM_output, output_weight = self.backbone(batch_input, mask) + self.update_iter() + return outputs, BGM_output, output_weight + + def train_step(self, data_batch): + """Training step. + """ + if isinstance(data_batch, dict): + input_x, batch_target = data_batch['feature_tensor'], data_batch[ + 'target_tensor'] + mask = data_batch['mask'] + elif isinstance(data_batch, list): + input_x, batch_target = data_batch[0], data_batch[1] + mask = data_batch[2] + + predictions, _, adjust_weight = self.forward_net( + input_x, mask, batch_target) + + train_loss = self.loss(predictions, adjust_weight, batch_target, mask) + loss_metrics = {} + loss_metrics['loss'] = train_loss + + return loss_metrics + + def val_step(self, data_batch): + """Validating setp. + """ + if isinstance(data_batch, dict): + input_x = data_batch['feature_tensor'] + mask = data_batch['mask'] + video_name = data_batch['video_name'] + elif isinstance(data_batch, list): + input_x = data_batch[0] + mask = data_batch[4] + video_name = data_batch[5] + predictions, _, _ = self.forward_net(input_x, mask) + predictions = predictions[-1] + if self.use_lbp and self.dataset != 'gtea': + num_frames = np.shape(input_x)[2] + if self.dataset in ['50salads', 'breakfast']: + video_name = '/' + video_name[0].split('.')[0] + barrier_file = self.bgm_result_path + video_name + ".csv" + barrier = pd.read_csv(barrier_file) + barrier = np.transpose(np.array(barrier)) + temporal_scale = np.shape(barrier)[1] + barrier = paddle.to_tensor(barrier) + + if temporal_scale < num_frames: + interpolation = paddle.round( + paddle.to_tensor([ + float(num_frames) / temporal_scale * (i + 0.5) + for i in range(temporal_scale) + ])) + interpolation = paddle.cast(interpolation, 'int64') + resize_barrier = paddle.to_tensor([0.0] * num_frames) + resize_barrier[interpolation] = barrier[0] + resize_barrier = resize_barrier.unsqueeze(0).unsqueeze(0) + else: + resize_barrier = barrier + resize_barrier = resize_barrier.unsqueeze( + 0) # size=[1,1,num_frames] + if temporal_scale < num_frames: + for i in range(self.num_post): + predictions = self.lbp(predictions, resize_barrier) + else: + predictions = F.interpolate(predictions, size=[temporal_scale], mode='linear', \ + align_corners=False, data_format='NCW') + for i in range(self.num_post): + predictions = self.lbp(predictions, resize_barrier) + predictions = F.interpolate(predictions, size=[num_frames], mode='linear', \ + align_corners=False, data_format='NCW') + + predicted = paddle.argmax(predictions, 1) + predicted = predicted.squeeze() + return predicted + + def test_step(self, data_batch): + """Testing setp. + """ + + return self.val_step(data_batch) + + def infer_step(self, data_batch): + """Infering setp. + """ + # return self.val_step(data_batch) + if isinstance(data_batch, list): + input_x = data_batch[0] + mask = data_batch[1] + else: + input_x = data_batch + mask = paddle.ones([1, 1, input_x.shape[2]]) + + predictions, _, _ = self.forward_net(input_x, mask) + predicted = paddle.argmax(predictions, 1) + predicted = predicted.squeeze() + return predicted diff --git a/paddlevideo/modeling/heads/__init__.py b/paddlevideo/modeling/heads/__init__.py index 1f39d2f13..407ad5fcd 100644 --- a/paddlevideo/modeling/heads/__init__.py +++ b/paddlevideo/modeling/heads/__init__.py @@ -29,10 +29,11 @@ from .transnetv2_head import TransNetV2Head from .tsm_head import TSMHead from .tsn_head import TSNHead +from .bcn_head import BcnBgmHead __all__ = [ 'BaseHead', 'TSNHead', 'TSMHead', 'ppTSMHead', 'ppTSNHead', 'SlowFastHead', 'AttentionLstmHead', 'TimeSformerHead', 'STGCNHead', 'TransNetV2Head', 'I3DHead', 'SingleRoIExtractor3D', 'AVARoIHead', 'BBoxHeadAVA', 'AddsHead', - 'ppTimeSformerHead', 'CollaborativeEnsemblerMS' + 'ppTimeSformerHead', 'CollaborativeEnsemblerMS', 'BcnBgmHead' ] diff --git a/paddlevideo/modeling/heads/bcn_head.py b/paddlevideo/modeling/heads/bcn_head.py new file mode 100644 index 000000000..765940b96 --- /dev/null +++ b/paddlevideo/modeling/heads/bcn_head.py @@ -0,0 +1,109 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import pandas as pd +from scipy import signal +import os +import copy +import numpy as np + +from .base import BaseHead +from ..registry import HEADS +from ..weight_init import weight_init_ + + +@HEADS.register() +class BcnBgmHead(BaseHead): + """ + Head for Bcn bgm model. + Args: + just for test. + """ + + def __init__(self, + use_full, + test_mode, + results_path, + dataset, + in_channels=-1, + num_classes=-1, + **kwargs): + super().__init__(num_classes, in_channels, **kwargs) + assert test_mode in ['less', 'more'], "test_mode must be less or more" + + self.use_full = use_full + self.test_mode = test_mode + + if not os.path.exists(results_path): + os.makedirs(results_path) + self.results_path = results_path + if dataset == 'breakfast' or dataset == 'gtea': + self.temporal_dim = 300 + elif dataset == '50salads': + self.temporal_dim = 400 + + def forward(self, outputs, video_name): + """don't need any parameter, just process result and save + """ + outputs = copy.deepcopy(outputs) + outputs = outputs.cpu().detach().numpy() + columns = ["barrier"] + if self.use_full: + barrier_threshold = 0.5 + barrier = (outputs > barrier_threshold) * outputs + video_result = barrier[0] + + video_result = video_result.transpose([1, 0]) + video_df = pd.DataFrame(list(video_result), columns=columns) + video_df.to_csv(os.path.join(self.results_path, + video_name + ".csv"), + index=False) + + else: + if self.test_mode == 'less': + barrier_threshold = 0.5 + barrier = (outputs > barrier_threshold) * outputs + video_result = barrier[0] + + maximum = signal.argrelmax(video_result[0]) + flag = np.array([0] * self.temporal_dim) + flag[maximum] = 1 + + video_result = video_result * flag + video_df = pd.DataFrame(list(video_result.transpose([1, 0])), + columns=columns) + video_df.to_csv(os.path.join(self.results_path, + video_name + ".csv"), + index=False) + elif self.test_mode == 'more': + barrier = (outputs > 0.3) * outputs + high_barrier = (outputs > 0.8) + video_result = barrier[0] + maximum1 = signal.argrelmax(video_result[0]) + maximum2 = high_barrier[0] + + flag = np.array([0] * self.temporal_dim) + flag[maximum1] = 1 + flag = np.clip((flag + maximum2), 0, 1) + + video_result = video_result * flag + video_df = pd.DataFrame(list(video_result.transpose([1, 0])), + columns=columns) + video_df.to_csv(os.path.join(self.results_path, + video_name + ".csv"), + index=False) + + return None # just process and save, don't need return diff --git a/paddlevideo/modeling/losses/__init__.py b/paddlevideo/modeling/losses/__init__.py index cbe9c08ec..99b77dc79 100644 --- a/paddlevideo/modeling/losses/__init__.py +++ b/paddlevideo/modeling/losses/__init__.py @@ -18,8 +18,9 @@ from .depth_loss import ADDSLoss from .transnetv2_loss import TransNetV2Loss from .actbert_loss import ActBertLoss +from .bcn_loss import BcnBgmLoss, BcnModelLoss __all__ = [ 'CrossEntropyLoss', 'BMNLoss', 'TransNetV2Loss', 'ActBertLoss', 'ADDSLoss', - 'BaseWeightedLoss' + 'BaseWeightedLoss', 'BcnBgmLoss', 'BcnModelLoss' ] diff --git a/paddlevideo/modeling/losses/bcn_loss.py b/paddlevideo/modeling/losses/bcn_loss.py new file mode 100644 index 000000000..4a00177b5 --- /dev/null +++ b/paddlevideo/modeling/losses/bcn_loss.py @@ -0,0 +1,131 @@ +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from ..registry import LOSSES +from .base import BaseWeightedLoss + + +def bi_loss(scores, anchors, bgm_match_threshold=0.5): + """ + cross_entropy loss + :param scores: gt + :param anchors: predict result + :param bgm_match_threshold: threshold for selecting positive samples + :return: + """ + scores = paddle.reshape(scores, [scores.shape[-1]]) + anchors = paddle.reshape(anchors, [anchors.shape[-1]]) + # pmask = (scores> bgm_match_threshold).float() + pmask = paddle.cast((scores > bgm_match_threshold), 'float32') + num_positive = paddle.sum(pmask) + num_entries = len(scores) + ratio = num_entries / num_positive + + coef_0 = 0.5 * (ratio) / (ratio - 1) + coef_1 = coef_0 * (ratio - 1) + loss = coef_1 * pmask * paddle.log(anchors + 0.00001) + \ + coef_0 * (1.0 - pmask) * paddle.log(1.0 - anchors * 0.999999) + loss = -1 * paddle.mean(loss) + num_sample = [paddle.sum(pmask), ratio] + return loss, num_sample + + +def BGM_loss_calc(anchors, match_scores): + """BGM_loss_calc + """ + loss_start_small, num_sample_start_small = bi_loss(match_scores, anchors) + loss_dict = {"loss": loss_start_small, "num_sample": num_sample_start_small} + return loss_dict + + +@LOSSES.register() +class BcnBgmLoss(BaseWeightedLoss): + """BcnBgmLoss""" + + def forward(self, label, outputs): + """Forward function. + """ + loss_dict = BGM_loss_calc(outputs, label) + return loss_dict["loss"] + + +def MultiplyList(myList): + """multiplyList + """ + result = 1 + for x in myList: + result = result * x + return [result] + + +@LOSSES.register() +class BcnModelLoss(BaseWeightedLoss): + """BcnModelLoss""" + + def __init__(self): + super().__init__() + self.maskCE = nn.CrossEntropyLoss( + ignore_index=-100, reduction='none') # for cascade stages + self.mse = nn.MSELoss(reduction='none') + self.nll = nn.NLLLoss(ignore_index=-100, + reduction='none') # for fusion stage + + def forward(self, predictions, adjust_weight, batch_target, mask): + """Forward function. + """ + loss = 0. + num_stages = len(predictions) - 1 + balance_weight = [1.0] * num_stages + batch_target = paddle.reshape(batch_target, + MultiplyList(batch_target.shape)) + + # num_stages is number of cascade stages + for num_stage in range(num_stages): + adjust_weight[num_stage].stop_gradient = True + + # balance_weight = a / b + a = paddle.sum(adjust_weight[0], 2) + a = paddle.reshape(a, MultiplyList(a.shape)) + a = paddle.cast(a, 'float32') + + b = paddle.sum(adjust_weight[num_stage], 2) + b = paddle.reshape(b, MultiplyList(b.shape)) + b = paddle.cast(b, 'float32') + + balance_weight[num_stage] = paddle.mean(a / b) + + # calculate mask ce_loss + p = predictions[num_stage] + ce_p = paddle.transpose(p, [0, 2, 1]) + ce_p = paddle.reshape( + ce_p, [ce_p.shape[0] * ce_p.shape[1], ce_p.shape[2]]) + ce_mask = adjust_weight[num_stage] + ce_mask = paddle.reshape(ce_mask, MultiplyList(ce_mask.shape)) + + ce_loss = 1 * balance_weight[num_stage] * paddle.mean( + self.maskCE(ce_p, batch_target) * ce_mask) + loss += ce_loss + + # calculate tmse + loss += 0.3 * paddle.mean( + paddle.clip(self.mse( + F.log_softmax(p[:, :, 1:], axis=1), + F.log_softmax(p.detach()[:, :, :-1], axis=1)), + min=0, + max=8) * mask[:, :, 1:]) + + # fusion stage + p = predictions[-1] + nll_p = paddle.transpose(p, [0, 2, 1]) + nll_p = paddle.reshape( + nll_p, [nll_p.shape[0] * nll_p.shape[1], nll_p.shape[2]]) + loss += paddle.mean(self.nll(paddle.log(nll_p), batch_target)) + + loss += 0.5 * paddle.mean( + paddle.clip(self.mse(F.log_softmax(p[:, :, 1:], axis=1), + F.log_softmax(p.detach()[:, :, :-1], axis=1)), + min=0, + max=8) * mask[:, :, 1:]) + + return loss diff --git a/paddlevideo/modeling/registry.py b/paddlevideo/modeling/registry.py index ace3074a5..b925f384f 100644 --- a/paddlevideo/modeling/registry.py +++ b/paddlevideo/modeling/registry.py @@ -27,4 +27,5 @@ BBOX_CODERS = Registry('bbox_coder') ESTIMATORS = Registry('estimator') MULTIMODAL = Registry('multimodal') +SEGMENTERS = Registry('Segmenters') SEGMENT = Registry('segment') diff --git a/paddlevideo/solver/custom_lr.py b/paddlevideo/solver/custom_lr.py index cd02c4e97..2c9de1f45 100644 --- a/paddlevideo/solver/custom_lr.py +++ b/paddlevideo/solver/custom_lr.py @@ -36,6 +36,7 @@ class CustomWarmupCosineDecay(LRScheduler): Returns: ``CosineAnnealingDecay`` instance to schedule learning rate. """ + def __init__(self, warmup_start_lr, warmup_epochs, @@ -109,6 +110,7 @@ class CustomWarmupPiecewiseDecay(LRScheduler): Returns: ``CustomWarmupPiecewiseDecay`` instance to schedule learning rate. """ + def __init__(self, warmup_start_lr, warmup_epochs, @@ -203,12 +205,20 @@ def get_lr(self): class CustomPiecewiseDecay(PiecewiseDecay): + def __init__(self, **kargs): kargs.pop('num_iters') super().__init__(**kargs) +class CustomMultiStepDecay(MultiStepDecay): + + def __init__(self, **kargs): + super().__init__(**kargs) + + class CustomWarmupCosineStepDecay(LRScheduler): + def __init__(self, warmup_iters, warmup_ratio=0.1, diff --git a/paddlevideo/tasks/train.py b/paddlevideo/tasks/train.py index 86c733d65..686981ed2 100644 --- a/paddlevideo/tasks/train.py +++ b/paddlevideo/tasks/train.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +from ..metrics.ava_utils import collect_results_cpu +import shutil +import pickle +import time +import os import os.path as osp import time - import numpy as np + import paddle import paddle.distributed as dist import paddle.distributed.fleet as fleet @@ -23,13 +28,20 @@ load, log_batch, log_epoch, mkdir, save) from ..loader.builder import build_dataloader, build_dataset -from ..metrics.ava_utils import collect_results_cpu from ..modeling.builder import build_model +from ..metrics import build_metric from ..solver import build_lr, build_optimizer from ..utils import do_preciseBN +from paddlevideo.utils import get_logger +from paddlevideo.utils import (build_record, log_batch, log_epoch, save, load, + mkdir) +import sys +import numpy as np +from pathlib import Path -paddle.framework.seed(1234) -np.random.seed(1234) +paddle.framework.seed(1538574472) +paddle.seed(1538574472) +np.random.seed(1538574472) def train_model(cfg, @@ -125,10 +137,44 @@ def train_model(cfg, ) valid_loader = build_dataloader(valid_dataset, **validate_dataloader_setting) + cfg.METRIC.data_size = len(valid_dataset) + cfg.METRIC.batch_size = batch_size + cfg.METRIC.log_interval = cfg.log_interval + # build metric + if cfg.MODEL.framework in ["BcnBgm", "BcnModel"]: + Metric = build_metric(cfg.METRIC) # 3. Construct solver. - lr = build_lr(cfg.OPTIMIZER.learning_rate, len(train_loader)) - optimizer = build_optimizer(cfg.OPTIMIZER, lr, model=model) + if cfg.MODEL.framework == "BcnModel": + lr_list = [] + for sub_learning_rate in cfg.OPTIMIZER.get("learning_rate"): + lr = build_lr(sub_learning_rate, len(train_loader)) + lr_list.append(lr) + model.backbone.bgm.weight_attr = paddle.ParamAttr( + learning_rate=lr_list[1]) + model.backbone.bgm.bias_attr = paddle.ParamAttr( + learning_rate=lr_list[1]) + optimizer = build_optimizer(cfg.OPTIMIZER, + lr_list[0], + parameter_list=[{ + 'params': + model.backbone.stage1.parameters() + }, { + 'params': + model.backbone.stages.parameters() + }, { + 'params': + model.backbone.stageF.parameters() + }, { + 'params': + model.backbone.bgm.parameters(), + 'learning_rate': + 1 + }]) + else: + lr = build_lr(cfg.OPTIMIZER.learning_rate, len(train_loader)) + optimizer = build_optimizer(cfg.OPTIMIZER, lr, model=model) + if use_fleet: optimizer = fleet.distributed_optimizer(optimizer) # Resume @@ -240,11 +286,15 @@ def train_model(cfg, log_batch(record_list, i, epoch + 1, cfg.epochs, "train", ips) # learning rate iter step - if cfg.OPTIMIZER.learning_rate.get("iter_step"): + if (cfg.MODEL.framework != "BcnModel") and ( + cfg.OPTIMIZER.learning_rate.get("iter_step")): lr.step() # learning rate epoch step - if not cfg.OPTIMIZER.learning_rate.get("iter_step"): + if cfg.MODEL.framework == "BcnModel": + for sub_lr in lr_list: + sub_lr.step() + elif not cfg.OPTIMIZER.learning_rate.get("iter_step"): lr.step() ips = "avg_ips: {:.5f} instance/sec.".format( @@ -263,11 +313,17 @@ def evaluate(best): #single_gpu_test and multi_gpu_test for i, data in enumerate(valid_loader): outputs = model(data, mode='valid') + + if cfg.MODEL.framework in ["BcnBgm", "BcnModel"]: + Metric.update(i, data, outputs) + if cfg.MODEL.framework == "FastRCNN": results.extend(outputs) #log_record - if cfg.MODEL.framework != "FastRCNN": + if cfg.MODEL.framework not in [ + "FastRCNN", "BcnBgm", "BcnModel" + ]: for name, value in outputs.items(): if name in record_list: record_list[name].update(value, batch_size) @@ -312,6 +368,11 @@ def evaluate(best): best = record_list[top_flag].avg best_flag = True + if cfg.MODEL.framework in ["BcnBgm", "BcnModel"]: + new_best = Metric.accumulate() + if not isinstance(new_best, list) and new_best > best: + best = new_best + best_flag = True return best, best_flag # use precise bn to improve acc diff --git a/tools/export_model.py b/tools/export_model.py index 829e67ed8..8641604bc 100644 --- a/tools/export_model.py +++ b/tools/export_model.py @@ -135,6 +135,11 @@ def get_input_spec(cfg, model_name): ], dtype='float32'), ]] + elif model_name in ['BcnModel', 'BcnBgmFull', 'BcnBgmResized']: + input_spec = [[ + InputSpec(shape=[1, cfg.num_channels, None], dtype='float32'), + InputSpec(shape=[1, 1, None], dtype='float32', name='mask'), + ]] elif model_name in ['TransNetV2']: input_spec = [[ InputSpec(shape=[ @@ -190,6 +195,7 @@ def main(): input_spec = get_input_spec(cfg.INFERENCE, model_name) model = to_static(model, input_spec=input_spec) + print(model.parameters) paddle.jit.save(model, osp.join(args.output_path, model_name)) print( f"model ({model_name}) has been already saved in ({args.output_path}).") diff --git a/tools/summary.py b/tools/summary.py index f7f98e0f9..dd1f46032 100644 --- a/tools/summary.py +++ b/tools/summary.py @@ -69,11 +69,11 @@ def main(): img_size = args.img_size num_seg = args.num_seg #NOTE: only support tsm now, will refine soon - params_info = paddle.summary(model, (1, 1, num_seg, 3, img_size, img_size)) + params_info = paddle.summary(model, (1, 2048, 1000)) print(params_info) if args.FLOPs: - flops_info = paddleslim.analysis.flops(model, [1, 1, num_seg, 3, img_size, img_size]) + flops_info = paddleslim.analysis.flops(model, [1, 2048, 1000]) print(flops_info) diff --git a/tools/utils.py b/tools/utils.py index 010631768..aca2d3ca8 100644 --- a/tools/utils.py +++ b/tools/utils.py @@ -24,6 +24,8 @@ import paddle.nn.functional as F import pandas from PIL import Image +from scipy import signal +import pandas as pd import shutil __dir__ = os.path.dirname(os.path.abspath(__file__)) @@ -165,6 +167,218 @@ def postprocess(self, output, print_output=True): print("\ttop-{0} score: {1}".format(j + 1, scores[j])) +@INFERENCE.register() +class BcnBgmFull_Inference_helper(Base_Inference_helper): + + def __init__(self, + num_channels, + sample_rate, + result_path, + mode=None, + temporal_dim=None, + dataset=None): + self.num_channels = num_channels + self.sample_rate = sample_rate + self.result_path = result_path + + def preprocess(self, input_file_txt): + """ + input_file: str, feature file list txt path + return: list + """ + if not isinstance(input_file_txt, list): + self.input_file_txt = [input_file_txt] + features = np.load(input_file_txt) + features = features[:, ::self.sample_rate] + feature_tensor = paddle.to_tensor(features, dtype='float32') + return [feature_tensor.unsqueeze(0)] + else: + self.input_file_txt = input_file_txt + out_list = [] + for input_file in input_file_txt: + features = np.load(input_file_txt) + features = features[:, ::self.sample_rate] + feature_tensor = paddle.to_tensor(features, dtype='float32') + out_list.append(feature_tensor.unsqueeze(0)) + return out_list + + def postprocess(self, outputs_list, print_output=True): + for outputs, input_file in zip(outputs_list, self.input_file_txt): + columns = ["barrier"] + + barrier_threshold = 0.5 + barrier = (outputs > barrier_threshold) * outputs + video_result = barrier[0] + + video_result = video_result.transpose([1, 0]) + video_df = pd.DataFrame(list(video_result), columns=columns) + video_df.to_csv(os.path.join( + self.result_path, + input_file.split('/')[-1].split('.')[0] + ".csv"), + index=False) + + +@INFERENCE.register() +class BcnBgmResized_Inference_helper(Base_Inference_helper): + + def __init__(self, num_channels, sample_rate, result_path, mode, + temporal_dim, dataset): + self.num_channels = num_channels + self.sample_rate = sample_rate + self.result_path = result_path + self.test_mode = mode + self.temporal_dim = temporal_dim + self.dataset = dataset + + def resized_feature(self, feature_tensor): + num_frames = feature_tensor.shape[1] + feature_tensor = feature_tensor.unsqueeze(0) + if self.dataset == 'breakfast': # for breakfast dataset, there are extremely short videos + factor = 1 + while factor * num_frames < self.temporal_dim: + factor = factor + 1 + feature_tensor = F.interpolate(feature_tensor, + scale_factor=(factor), + mode='linear', + align_corners=False, + data_format='NCW') + feature_tensor = F.interpolate(feature_tensor.unsqueeze(3), + size=(self.temporal_dim, 1), + mode='nearest').squeeze(3) + return feature_tensor + + def preprocess(self, input_file_txt): + """ + input_file: str, feature file list txt path + return: list + """ + if not isinstance(input_file_txt, list): + self.input_file_txt = [input_file_txt] + features = np.load(input_file_txt) + features = features[:, ::self.sample_rate] + feature_tensor = paddle.to_tensor(features, dtype='float32') + return [self.resized_feature(feature_tensor)] + else: + self.input_file_txt = input_file_txt + out_list = [] + for input_file in input_file_txt: + features = np.load(input_file_txt) + features = features[:, ::self.sample_rate] + feature_tensor = paddle.to_tensor(features, dtype='float32') + out_list.append(self.resized_feature(feature_tensor)) + return out_list + + def postprocess(self, outputs_list, print_output=True): + for outputs, input_file in zip(outputs_list, self.input_file_txt): + columns = ["barrier"] + if self.test_mode == 'less': + barrier_threshold = 0.5 + barrier = (outputs > barrier_threshold) * outputs + video_result = barrier[0] + + maximum = signal.argrelmax(video_result[0]) + flag = np.array([0] * self.temporal_dim) + flag[maximum] = 1 + + video_result = video_result * flag + video_df = pd.DataFrame(list(video_result.transpose([1, 0])), + columns=columns) + video_df.to_csv(os.path.join( + self.result_path, + input_file.split('/')[-1].split('.')[0] + ".csv"), + index=False) + elif self.test_mode == 'more': + barrier = (outputs > 0.3) * outputs + high_barrier = (outputs > 0.8) + video_result = barrier[0] + maximum1 = signal.argrelmax(video_result[0]) + maximum2 = high_barrier[0] + + flag = np.array([0] * self.temporal_dim) + flag[maximum1] = 1 + flag = np.clip((flag + maximum2), 0, 1) + + video_result = video_result * flag + video_df = pd.DataFrame(list(video_result.transpose([1, 0])), + columns=columns) + video_df.to_csv(os.path.join( + self.result_path, + input_file.split('/')[-1].split('.')[0] + ".csv"), + index=False) + + +@INFERENCE.register() +class BcnModel_Inference_helper(Base_Inference_helper): + + def __init__(self, num_channels, sample_rate, result_path, mode, + temporal_dim, dataset): + self.num_channels = num_channels + self.sample_rate = sample_rate + self.result_path = result_path + self.test_mode = mode + self.temporal_dim = temporal_dim + self.dataset = dataset + + def preprocess(self, input_file_txt): + """ + input_file: str, feature file list txt path + return: list + """ + if not isinstance(input_file_txt, list): + self.input_file_txt = [input_file_txt] + features = np.load(input_file_txt) + features = features[:, ::self.sample_rate] + feature_tensor = paddle.to_tensor(features, dtype='float32') + return [self.resized_feature(feature_tensor)] + else: + self.input_file_txt = input_file_txt + out_list = [] + for input_file in input_file_txt: + features = np.load(input_file_txt) + features = features[:, ::self.sample_rate] + feature_tensor = paddle.to_tensor(features, dtype='float32') + out_list.append(self.resized_feature(feature_tensor)) + return out_list + + def postprocess(self, outputs_list, print_output=True): + for outputs, input_file in zip(outputs_list, self.input_file_txt): + columns = ["barrier"] + if self.test_mode == 'less': + barrier_threshold = 0.5 + barrier = (outputs > barrier_threshold) * outputs + video_result = barrier[0] + + maximum = signal.argrelmax(video_result[0]) + flag = np.array([0] * self.temporal_dim) + flag[maximum] = 1 + + video_result = video_result * flag + video_df = pd.DataFrame(list(video_result.transpose([1, 0])), + columns=columns) + video_df.to_csv(os.path.join( + self.result_path, + input_file.split('/')[-1].split('.')[0] + ".csv"), + index=False) + elif self.test_mode == 'more': + barrier = (outputs > 0.3) * outputs + high_barrier = (outputs > 0.8) + video_result = barrier[0] + maximum1 = signal.argrelmax(video_result[0]) + maximum2 = high_barrier[0] + + flag = np.array([0] * self.temporal_dim) + flag[maximum1] = 1 + flag = np.clip((flag + maximum2), 0, 1) + + video_result = video_result * flag + video_df = pd.DataFrame(list(video_result.transpose([1, 0])), + columns=columns) + video_df.to_csv(os.path.join( + self.result_path, + input_file.split('/')[-1].split('.')[0] + ".csv"), + index=False) + + @INFERENCE.register() class ppTSM_Inference_helper(Base_Inference_helper): @@ -825,8 +1039,10 @@ def _convertPNG(self, image_numpy): im = Image.fromarray(colormapped_im) return im + @INFERENCE.register() class VideoSwin_Inference_helper(Base_Inference_helper): + def __init__(self, num_seg=4, seg_len=32, @@ -904,6 +1120,7 @@ def postprocess(self, output, print_output=True): print("\ttop-{0} class: {1}".format(j + 1, classes[j])) print("\ttop-{0} score: {1}".format(j + 1, scores[j])) + @INFERENCE.register() class AVA_SlowFast_FastRCNN_Inference_helper(Base_Inference_helper): @@ -1066,7 +1283,7 @@ def postprocess(self, outputs, print_output=True): human_detection = self.human_detections[t_index] output = outputs[index] - result = output #长度为类别个数,不包含背景 + result = output #长度为类别个数,不包含背景 person_num = self.person_num_list[index] @@ -1081,7 +1298,7 @@ def postprocess(self, outputs, print_output=True): # N proposals for i in range(person_num): prediction.append([]) - + # Perform action score thr for i in range(len(result)): # for class if i + 1 not in self.class_whitelist: @@ -1089,7 +1306,8 @@ def postprocess(self, outputs, print_output=True): for j in range(person_num): if result[i][j, 4] > self.config.MODEL.head['action_thr']: prediction[j].append( - (self.label_map[i + 1], result[i][j, 4])) # label_map is a dict, label index start from 1 + (self.label_map[i + 1], result[i][j, 4] + )) # label_map is a dict, label index start from 1 predictions.append(prediction) results = []