diff --git a/Port_Weights_Assert.ipynb b/Port_Weights_Assert.ipynb new file mode 100644 index 0000000..8ce3502 --- /dev/null +++ b/Port_Weights_Assert.ipynb @@ -0,0 +1,147 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gGWhoJ0axBQz" + }, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('./assert')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MuiAFtFUxIFD" + }, + "outputs": [], + "source": [ + "import torch\n", + "from model import E2E" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9VnaBzts2BK4" + }, + "outputs": [], + "source": [ + "import itertools\n", + "protocols = ['pa', 'la']\n", + "networks = ['attentive_filtering_network', 'dilated_resnet', 'senet34', 'senet50']\n", + "all_networks = list(itertools.product(protocols, networks))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5bt6169p4nbv" + }, + "outputs": [], + "source": [ + "\n", + "def port_weights(protocol, network):\n", + " models_dict = {'attentive_filtering_network': 5, 'dilated_resnet': 1, 'senet34':7, \n", + " 'senet50': 6}\n", + " model_params = {\n", + " 'MODEL_SELECT' : models_dict[network], # which model \n", + " 'NUM_SPOOF_CLASS' : 2, # x-class classification\n", + " 'FOCAL_GAMMA' : None, # gamma parameter for focal loss; if obj is not focal loss, set this to None \n", + " 'NUM_RESNET_BLOCK' : 5, # number of resnet blocks in ResNet \n", + " 'AFN_UPSAMPLE' : 'Bilinear', # upsampling method in AFNet: Conv or Bilinear\n", + " 'AFN_ACTIVATION' : 'sigmoid', # activation function in AFNet: sigmoid, softmaxF, softmaxT\n", + " 'NUM_HEADS' : 3, # number of heads for multi-head att in SAFNet \n", + " 'SAFN_HIDDEN' : 10, # hidden dim for SAFNet\n", + " 'SAFN_DIM' : 'T', # SAFNet attention dim: T or F\n", + " 'RNN_HIDDEN' : 128, # hidden dim for RNN\n", + " 'RNN_LAYERS' : 4, # number of hidden layers for RNN\n", + " 'RNN_BI': True, # bidirecitonal/unidirectional for RNN\n", + " 'DROPOUT_R' : 0.0, # dropout rate \n", + " }\n", + " model = E2E(**model_params)\n", + " pa_weights = torch.load(f'./ASSERT/pretrained/{protocol}/{network}', map_location='cpu', encoding='bytes')\n", + " # Convert the first level keys.\n", + " data_dict = dict(pa_weights)\n", + " for key in list(data_dict):\n", + " if type(key) is bytes:\n", + " data_dict[key.decode()] = data_dict[key]\n", + " data_dict.pop(key)\n", + " data_dict['state_dict'] = dict(data_dict['state_dict'])\n", + " for key in list(data_dict['state_dict']):\n", + " if type(key) is bytes:\n", + " data_dict['state_dict'][key.decode()] = data_dict['state_dict'][key]\n", + " data_dict['state_dict'].pop(key)\n", + " model.load_state_dict(data_dict['state_dict'])\n", + " torch.save(data_dict, f'./ASSERT/pretrained/{protocol}/{network}.py3.ckpt')\n", + " print(f\"Ported {network} - {protocol}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LFH4ZAwB7yAE", + "outputId": "3ab0a784-8d91-4a8e-ad54-ba2ec1c8ff3e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "attentive filtering network\n", + "Ported attentive_filtering_network - pa\n", + "resnet\n", + "Ported dilated_resnet - pa\n", + "squeeze-and-excitation network\n", + "Ported senet34 - pa\n", + "squeeze-and-excitation network\n", + "Ported senet50 - pa\n", + "attentive filtering network\n", + "Ported attentive_filtering_network - la\n", + "resnet\n", + "Ported dilated_resnet - la\n", + "squeeze-and-excitation network\n", + "Ported senet34 - la\n", + "squeeze-and-excitation network\n", + "Ported senet50 - la\n" + ] + } + ], + "source": [ + "for _p, _n in all_networks:\n", + " port_weights(_p, _n)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "forensic_examiner_audio", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.8 (main, Nov 24 2022, 14:13:03) [GCC 11.2.0]" + }, + "vscode": { + "interpreter": { + "hash": "e2b5310373df8c4f0bc118e06d390d9464bd5fe0a9f4e308bd14694ffbb1bd37" + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/assert/model.py b/assert/model.py index d12ea6e..3b3f675 100644 --- a/assert/model.py +++ b/assert/model.py @@ -16,13 +16,21 @@ def E2E(MODEL_SELECT, NUM_SPOOF_CLASS, NUM_RESNET_BLOCK, AFN_UPSAMPLE, AFN_ACTIV elif MODEL_SELECT == 5: print('attentive filtering network') model = attentive_filtering_network.SpoofSmallAFNet257_400(NUM_SPOOF_CLASS, AFN_UPSAMPLE, AFN_ACTIVATION, NUM_RESNET_BLOCK, FOCAL_LOSS) + elif MODEL_SELECT == 6: print('squeeze-and-excitation network') #model = senet.se_resnet18(num_classes=NUM_SPOOF_CLASS, focal_loss=FOCAL_LOSS) - #model = senet.se_resnet34(num_classes=NUM_SPOOF_CLASS, focal_loss=FOCAL_LOSS) + # model = senet.se_resnet34(num_classes=NUM_SPOOF_CLASS, focal_loss=FOCAL_LOSS) model = senet.se_resnet50(num_classes=NUM_SPOOF_CLASS, focal_loss=FOCAL_LOSS) #model = senet.se_resnet101(num_classes=NUM_SPOOF_CLASS, focal_loss=FOCAL_LOSS) #model = senet.se_resnet152(num_classes=NUM_SPOOF_CLASS, focal_loss=FOCAL_LOSS) + elif MODEL_SELECT == 7: + print('squeeze-and-excitation network') + #model = senet.se_resnet18(num_classes=NUM_SPOOF_CLASS, focal_loss=FOCAL_LOSS) + model = senet.se_resnet34(num_classes=NUM_SPOOF_CLASS, focal_loss=FOCAL_LOSS) + # model = senet.se_resnet50(num_classes=NUM_SPOOF_CLASS, focal_loss=FOCAL_LOSS) + #model = senet.se_resnet101(num_classes=NUM_SPOOF_CLASS, focal_loss=FOCAL_LOSS) + #model = senet.se_resnet152(num_classes=NUM_SPOOF_CLASS, focal_loss=FOCAL_LOSS) return model diff --git a/assert/src/resnet.py b/assert/src/resnet.py index 9fcef97..46fd5e7 100644 --- a/assert/src/resnet.py +++ b/assert/src/resnet.py @@ -47,21 +47,21 @@ def __init__(self, num_classes, binary=False, resnet_blocks=1, input_size=(1,256 self.mp1 = nn.MaxPool2d(kernel_size=(2,2)) self.cnn1 = nn.Conv2d(8, 16, kernel_size=(3,3), dilation=(2,2)) ## block 2 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(16)) self.block2 = nn.Sequential(*layers) self.mp2 = nn.MaxPool2d(kernel_size=(2,2)) self.cnn2 = nn.Conv2d(16, 32, kernel_size=(3,3), dilation=(4,4)) ## block 3 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(32)) self.block3 = nn.Sequential(*layers) self.mp3 = nn.MaxPool2d(kernel_size=(2,2)) self.cnn3 = nn.Conv2d(32, 64, kernel_size=(3,3), dilation=(4,4)) ## block 4 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(64)) self.block4 = nn.Sequential(*layers) @@ -72,7 +72,7 @@ def __init__(self, num_classes, binary=False, resnet_blocks=1, input_size=(1,256 self.fc = nn.Linear(self.flat_feats, 100) self.bn = nn.BatchNorm1d(100) - self.re = nn.ReLU(inplace=True) + self.re = nn.ReLU(inplace=True) self.fc_out = nn.Linear(100, num_classes) ## Weights initialization @@ -83,26 +83,26 @@ def __init__(self, num_classes, binary=False, resnet_blocks=1, input_size=(1,256 nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) - def forward(self, x): - x = self.expansion(x) - ## block 1 - x = self.cnn1(self.mp1(self.block1(x))) - #print(x.size()) - ## block 2 - x = self.cnn2(self.mp2(self.block2(x))) - #print(x.size()) - ## block 3 - x = self.cnn3(self.mp3(self.block3(x))) - #print(x.size()) - ## block 4 - x = self.cnn4(self.mp4(self.block4(x))) - #print(x.size()) - ## FC - x = self.fc_out(self.re(self.bn(self.fc(x.view(-1, self.flat_feats))))) + def forward(self, x): + x = self.expansion(x) + ## block 1 + x = self.cnn1(self.mp1(self.block1(x))) #print(x.size()) - - if self.binary: return x - else: return F.log_softmax(x, dim=-1) # take log-softmax over C classes + ## block 2 + x = self.cnn2(self.mp2(self.block2(x))) + #print(x.size()) + ## block 3 + x = self.cnn3(self.mp3(self.block3(x))) + #print(x.size()) + ## block 4 + x = self.cnn4(self.mp4(self.block4(x))) + #print(x.size()) + ## FC + x = self.fc_out(self.re(self.bn(self.fc(x.view(-1, self.flat_feats))))) + #print(x.size()) + + if self.binary: return x + else: return F.log_softmax(x, dim=-1) # take log-softmax over C classes class SpoofSmallResNet257_400(nn.Module): ''' small ResNet (less GPU memory) for 257 by 400 feature map ''' @@ -121,21 +121,21 @@ def __init__(self, num_classes, resnet_blocks=1, focal_loss=False, input_size=(1 self.mp1 = nn.MaxPool2d(kernel_size=(2,2)) self.cnn1 = nn.Conv2d(8, 16, kernel_size=(3,3), dilation=(2,2)) ## block 2 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(16)) self.block2 = nn.Sequential(*layers) self.mp2 = nn.MaxPool2d(kernel_size=(2,2)) self.cnn2 = nn.Conv2d(16, 32, kernel_size=(3,3), dilation=(4,4)) ## block 3 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(32)) self.block3 = nn.Sequential(*layers) self.mp3 = nn.MaxPool2d(kernel_size=(2,2)) self.cnn3 = nn.Conv2d(32, 64, kernel_size=(3,3), dilation=(4,4)) ## block 4 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(64)) self.block4 = nn.Sequential(*layers) @@ -146,7 +146,7 @@ def __init__(self, num_classes, resnet_blocks=1, focal_loss=False, input_size=(1 self.fc = nn.Linear(self.flat_feats, 100) self.bn = nn.BatchNorm1d(100) - self.re = nn.ReLU(inplace=True) + self.re = nn.ReLU(inplace=True) self.fc_out = nn.Linear(100, num_classes) ## Weights initialization @@ -157,26 +157,26 @@ def __init__(self, num_classes, resnet_blocks=1, focal_loss=False, input_size=(1 nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) - def forward(self, x): - x = self.expansion(x) - ## block 1 - x = self.cnn1(self.mp1(self.block1(x))) - #print(x.size()) - ## block 2 - x = self.cnn2(self.mp2(self.block2(x))) + def forward(self, x): + x = self.expansion(x) + ## block 1 + x = self.cnn1(self.mp1(self.block1(x))) #print(x.size()) - ## block 3 - x = self.cnn3(self.mp3(self.block3(x))) + ## block 2 + x = self.cnn2(self.mp2(self.block2(x))) + #print(x.size()) + ## block 3 + x = self.cnn3(self.mp3(self.block3(x))) + #print(x.size()) + ## block 4 + x = self.cnn4(self.mp4(self.block4(x))) #print(x.size()) - ## block 4 - x = self.cnn4(self.mp4(self.block4(x))) - #print(x.size()) - ## FC - x = self.fc_out(self.re(self.bn(self.fc(x.view(-1, self.flat_feats))))) - #print(x.size()) - - if self.focal_loss: return x - else: return F.log_softmax(x, dim=-1) # take log-softmax over C classes + ## FC + x = self.fc_out(self.re(self.bn(self.fc(x.view(-1, self.flat_feats))))) + #print(x.size()) + + if self.focal_loss: return x + else: return F.log_softmax(x, dim=-1) # take log-softmax over C classes class SpoofResNet30_400(nn.Module): ''' primative ResNet for 30 by 400 feature map ''' @@ -193,28 +193,28 @@ def __init__(self, num_classes, resnet_blocks=1, input_size=(1,30,400)): self.mp1 = nn.MaxPool2d(kernel_size=(1,1)) self.cnn1 = nn.Conv2d(16, 32, kernel_size=(3,3), dilation=(1,2)) ## block 2 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(32)) self.block2 = nn.Sequential(*layers) self.mp2 = nn.MaxPool2d(kernel_size=(1,1)) self.cnn2 = nn.Conv2d(32, 32, kernel_size=(3,3), dilation=(2,4)) ## block 3 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(32)) self.block3 = nn.Sequential(*layers) self.mp3 = nn.MaxPool2d(kernel_size=(1,2)) self.cnn3 = nn.Conv2d(32, 32, kernel_size=(3,3), dilation=(2,4)) ## block 4 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(32)) self.block4 = nn.Sequential(*layers) self.mp4 = nn.MaxPool2d(kernel_size=(1,2)) self.cnn4 = nn.Conv2d(32, 32, kernel_size=(3,3), dilation=(4,8)) ## block 5 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(32)) self.block5 = nn.Sequential(*layers) @@ -225,7 +225,7 @@ def __init__(self, num_classes, resnet_blocks=1, input_size=(1,30,400)): self.fc = nn.Linear(self.flat_feats, 100) self.bn = nn.BatchNorm1d(100) - self.re = nn.ReLU(inplace=True) + self.re = nn.ReLU(inplace=True) self.fc_out = nn.Linear(100, num_classes) ## Weights initialization @@ -254,7 +254,7 @@ def forward(self, x): x = self.cnn5(self.mp5(self.block5(x))) #print(x.size()) ## FC - x = self.fc_out(self.re(self.bn(self.fc(x.view(-1, self.flat_feats))))) + x = self.fc_out(self.re(self.bn(self.fc(x.view(-1, self.flat_feats))))) #print(x.size()) return F.log_softmax(x, dim=-1) # take log-softmax over C classes @@ -277,28 +277,28 @@ def __init__(self, num_classes, resnet_blocks=1, input_size=(1,257,500)): self.mp1 = nn.MaxPool2d(kernel_size=(1,1)) self.cnn1 = nn.Conv2d(16, 32, kernel_size=(3,3), dilation=(2,2)) ## block 2 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(32)) self.block2 = nn.Sequential(*layers) self.mp2 = nn.MaxPool2d(kernel_size=(1,1)) self.cnn2 = nn.Conv2d(32, 32, kernel_size=(3,3), dilation=(4,4)) ## block 3 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(32)) self.block3 = nn.Sequential(*layers) self.mp3 = nn.MaxPool2d(kernel_size=(2,2)) self.cnn3 = nn.Conv2d(32, 32, kernel_size=(3,3), dilation=(4,4)) ## block 4 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(32)) self.block4 = nn.Sequential(*layers) self.mp4 = nn.MaxPool2d(kernel_size=(2,2)) self.cnn4 = nn.Conv2d(32, 32, kernel_size=(3,3), dilation=(8,8)) ## block 5 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(32)) self.block5 = nn.Sequential(*layers) @@ -309,7 +309,7 @@ def __init__(self, num_classes, resnet_blocks=1, input_size=(1,257,500)): self.fc = nn.Linear(self.flat_feats, 100) self.bn = nn.BatchNorm1d(100) - self.re = nn.ReLU(inplace=True) + self.re = nn.ReLU(inplace=True) self.fc_out = nn.Linear(100, num_classes) ## Weights initialization @@ -323,9 +323,9 @@ def __init__(self, num_classes, resnet_blocks=1, input_size=(1,257,500)): def forward(self, x): ##print(x.size()) x = self.expansion(x) - ## block 1 + ## block 1 x = self.cnn1(self.mp1(self.block1(x))) - ##print(x.size()) + ##print(x.size()) ## block 2 x = self.cnn2(self.mp2(self.block2(x))) ##print(x.size()) @@ -334,14 +334,14 @@ def forward(self, x): ##print(x.size()) ## block 4 x = self.cnn4(self.mp4(self.block4(x))) - ##print(x.size()) + ##print(x.size()) ## block 5 x = self.cnn5(self.mp5(self.block5(x))) ##print(x.size()) ## FC - x = self.fc_out(self.re(self.bn(self.fc(x.view(-1, self.flat_feats))))) + x = self.fc_out(self.re(self.bn(self.fc(x.view(-1, self.flat_feats))))) ##print(x.size()) - + return F.log_softmax(x, dim=-1) # take log-softmax over C classes def predict(self, x): @@ -362,28 +362,28 @@ def __init__(self, num_classes, resnet_blocks=1, input_size=(1,257,400)): self.mp1 = nn.MaxPool2d(kernel_size=(1,1)) self.cnn1 = nn.Conv2d(16, 32, kernel_size=(3,3), dilation=(2,2)) ## block 2 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(32)) self.block2 = nn.Sequential(*layers) self.mp2 = nn.MaxPool2d(kernel_size=(1,1)) self.cnn2 = nn.Conv2d(32, 32, kernel_size=(3,3), dilation=(4,4)) ## block 3 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(32)) self.block3 = nn.Sequential(*layers) self.mp3 = nn.MaxPool2d(kernel_size=(2,2)) self.cnn3 = nn.Conv2d(32, 32, kernel_size=(3,3), dilation=(4,4)) ## block 4 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(32)) self.block4 = nn.Sequential(*layers) self.mp4 = nn.MaxPool2d(kernel_size=(2,2)) self.cnn4 = nn.Conv2d(32, 32, kernel_size=(3,3), dilation=(8,8)) ## block 5 - layers = [] + layers = [] for i in range(resnet_blocks): layers.append(ResBasicBlock(32)) self.block5 = nn.Sequential(*layers) @@ -394,7 +394,7 @@ def __init__(self, num_classes, resnet_blocks=1, input_size=(1,257,400)): self.fc = nn.Linear(self.flat_feats, 100) self.bn = nn.BatchNorm1d(100) - self.re = nn.ReLU(inplace=True) + self.re = nn.ReLU(inplace=True) self.fc_out = nn.Linear(100, num_classes) ## Weights initialization @@ -423,7 +423,7 @@ def forward(self, x): x = self.cnn5(self.mp5(self.block5(x))) ##print(x.size()) ## FC - x = self.fc_out(self.re(self.bn(self.fc(x.view(-1, self.flat_feats))))) + x = self.fc_out(self.re(self.bn(self.fc(x.view(-1, self.flat_feats))))) ##print(x.size()) return F.log_softmax(x, dim=-1) # take log-softmax over C classes diff --git a/pretrained/la/attentive_filtering_network.py3.ckpt b/pretrained/la/attentive_filtering_network.py3.ckpt new file mode 100644 index 0000000..79aca14 Binary files /dev/null and b/pretrained/la/attentive_filtering_network.py3.ckpt differ diff --git a/pretrained/la/dilated_resnet.py3.ckpt b/pretrained/la/dilated_resnet.py3.ckpt new file mode 100644 index 0000000..fc02c33 Binary files /dev/null and b/pretrained/la/dilated_resnet.py3.ckpt differ diff --git a/pretrained/la/senet34.py3.ckpt b/pretrained/la/senet34.py3.ckpt new file mode 100644 index 0000000..a1a7164 Binary files /dev/null and b/pretrained/la/senet34.py3.ckpt differ diff --git a/pretrained/la/senet50.py3.ckpt b/pretrained/la/senet50.py3.ckpt new file mode 100644 index 0000000..5153fda Binary files /dev/null and b/pretrained/la/senet50.py3.ckpt differ diff --git a/pretrained/pa/attentive_filtering_network.py3.ckpt b/pretrained/pa/attentive_filtering_network.py3.ckpt new file mode 100644 index 0000000..ce51f0e Binary files /dev/null and b/pretrained/pa/attentive_filtering_network.py3.ckpt differ diff --git a/pretrained/pa/dilated_resnet.py3.ckpt b/pretrained/pa/dilated_resnet.py3.ckpt new file mode 100644 index 0000000..23f87ef Binary files /dev/null and b/pretrained/pa/dilated_resnet.py3.ckpt differ diff --git a/pretrained/pa/senet34.py3.ckpt b/pretrained/pa/senet34.py3.ckpt new file mode 100644 index 0000000..516859c Binary files /dev/null and b/pretrained/pa/senet34.py3.ckpt differ diff --git a/pretrained/pa/senet50.py3.ckpt b/pretrained/pa/senet50.py3.ckpt new file mode 100644 index 0000000..0c4b5e0 Binary files /dev/null and b/pretrained/pa/senet50.py3.ckpt differ