Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion INSTALLATION_SUPPORT.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ The library can be installed with:
```shell
pip install difflogic
```
> ⚠️ Note that `difflogic` requires CUDA, the CUDA Toolkit (for compilation), and `torch>=1.9.0` (matching the CUDA version).
> ⚠️ Note that, by default, `difflogic` requires CUDA, the CUDA Toolkit (for compilation), and `torch>=1.9.0` (matching the CUDA version). CUDA can be disable by setting a flag like so `export DIFFLOGIC_BUILD_CUDA_EXT=false` before running `pip install .` Only the much slower pure python implementation is available in that case.

**It is very important that the installed version of PyTorch was compiled with a CUDA version that is compatible with the CUDA version of the locally installed CUDA Toolkit.**

Expand Down
8 changes: 5 additions & 3 deletions difflogic/difflogic.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import warnings
import torch
import difflogic_cuda
import numpy as np
from .functional import bin_op_s, get_unique_connections, GradFactor
from .packbitstensor import PackBitsTensor

try:
import difflogic_cuda
except ImportError:
warnings.warn('failed to import difflogic_cuda. no cuda features will be available', ImportWarning)

########################################################################################################################

Expand Down Expand Up @@ -95,9 +99,7 @@ def forward_python(self, x):
assert x.shape[-1] == self.in_dim, (x[0].shape[-1], self.in_dim)

if self.indices[0].dtype == torch.int64 or self.indices[1].dtype == torch.int64:
print(self.indices[0].dtype, self.indices[1].dtype)
self.indices = self.indices[0].long(), self.indices[1].long()
print(self.indices[0].dtype, self.indices[1].dtype)

a, b = x[..., self.indices[0]], x[..., self.indices[1]]
if self.training:
Expand Down
7 changes: 6 additions & 1 deletion difflogic/packbitstensor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import difflogic_cuda
import warnings
import torch
import numpy as np

try:
import difflogic_cuda
except ImportError:
warnings.warn('failed to import difflogic_cuda. no cuda features will be available', ImportWarning)


class PackBitsTensor:
def __init__(self, t: torch.BoolTensor, bit_count=32, device='cuda'):
Expand Down
49 changes: 31 additions & 18 deletions experiments/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@
64: torch.float64
}

IMPL_TO_DEVICE = {
'cuda': 'cuda',
'python': 'cpu'
}


def load_dataset(args):
validation_loader = None
Expand Down Expand Up @@ -117,7 +122,13 @@ def num_classes_of_dataset(dataset):


def get_model(args):
llkw = dict(grad_factor=args.grad_factor, connections=args.connections)

llkw = {
'grad_factor': args.grad_factor,
'connections': args.connections,
'implementation': args.implementation,
'device': IMPL_TO_DEVICE[args.implementation]
}

in_dim = input_dim_of_dataset(args.dataset)
class_count = num_classes_of_dataset(args.dataset)
Expand Down Expand Up @@ -158,7 +169,7 @@ def get_model(args):
'total_num_weights': total_num_weights,
})

model = model.to('cuda')
model = model.to(llkw['device'])

print(model)
if args.experiment_id is not None:
Expand All @@ -181,28 +192,28 @@ def train(model, x, y, loss_fn, optimizer):
return loss.item()


def eval(model, loader, mode):
def eval(model, loader, mode, device='cuda'):
orig_mode = model.training
with torch.no_grad():
model.train(mode=mode)
res = np.mean(
[
(model(x.to('cuda').round()).argmax(-1) == y.to('cuda')).to(torch.float32).mean().item()
(model(x.to(device).round()).argmax(-1) == y.to(device)).to(torch.float32).mean().item()
for x, y in loader
]
)
model.train(mode=orig_mode)
return res.item()


def packbits_eval(model, loader):
def packbits_eval(model, loader, device='cuda'):
orig_mode = model.training
with torch.no_grad():
model.eval()
res = np.mean(
[
(model(PackBitsTensor(x.to('cuda').reshape(x.shape[0], -1).round().bool())).argmax(-1) == y.to(
'cuda')).to(torch.float32).mean().item()
(model(PackBitsTensor(x.to(device).reshape(x.shape[0], -1).round().bool())).argmax(-1) == y.to(
device)).to(torch.float32).mean().item()
for x, y in loader
]
)
Expand Down Expand Up @@ -258,6 +269,8 @@ def packbits_eval(model, loader):

print(vars(args))

device = IMPL_TO_DEVICE[args.implementation]

assert args.num_iterations % args.eval_freq == 0, (
f'iteration count ({args.num_iterations}) has to be divisible by evaluation frequency ({args.eval_freq})'
)
Expand All @@ -283,23 +296,23 @@ def packbits_eval(model, loader):
desc='iteration',
total=args.num_iterations,
):
x = x.to(BITS_TO_TORCH_FLOATING_POINT_TYPE[args.training_bit_count]).to('cuda')
y = y.to('cuda')
x = x.to(BITS_TO_TORCH_FLOATING_POINT_TYPE[args.training_bit_count]).to(device)
y = y.to(device)

loss = train(model, x, y, loss_fn, optim)

if (i+1) % args.eval_freq == 0:
if args.extensive_eval:
train_accuracy_train_mode = eval(model, train_loader, mode=True)
valid_accuracy_eval_mode = eval(model, validation_loader, mode=False)
valid_accuracy_train_mode = eval(model, validation_loader, mode=True)
train_accuracy_train_mode = eval(model, train_loader, mode=True, device=device)
valid_accuracy_eval_mode = eval(model, validation_loader, mode=False, device=device)
valid_accuracy_train_mode = eval(model, validation_loader, mode=True, device=device)
else:
train_accuracy_train_mode = -1
valid_accuracy_eval_mode = -1
valid_accuracy_train_mode = -1
train_accuracy_eval_mode = eval(model, train_loader, mode=False)
test_accuracy_eval_mode = eval(model, test_loader, mode=False)
test_accuracy_train_mode = eval(model, test_loader, mode=True)
train_accuracy_eval_mode = eval(model, train_loader, mode=False, device=device)
test_accuracy_eval_mode = eval(model, test_loader, mode=False, device=device)
test_accuracy_train_mode = eval(model, test_loader, mode=True, device=device)

r = {
'train_acc_eval_mode': train_accuracy_eval_mode,
Expand All @@ -311,9 +324,9 @@ def packbits_eval(model, loader):
}

if args.packbits_eval:
r['train_acc_eval'] = packbits_eval(model, train_loader)
r['valid_acc_eval'] = packbits_eval(model, train_loader)
r['test_acc_eval'] = packbits_eval(model, test_loader)
r['train_acc_eval'] = packbits_eval(model, train_loader, device=device)
r['valid_acc_eval'] = packbits_eval(model, train_loader, device=device)
r['test_acc_eval'] = packbits_eval(model, test_loader, device=device)

if args.experiment_id is not None:
results.store_results(r)
Expand Down
25 changes: 19 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,25 @@
import os
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension

with open('README.md', 'r', encoding='utf-8') as fh:
long_description = fh.read()


# decide from env variable if cuda extension should be built (default is 'true')
build_cuda_ext = os.getenv('DIFFLOGIC_BUILD_CUDA_EXT', 'true').lower()

if build_cuda_ext == 'true':
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
ext_modules = [
CUDAExtension('difflogic_cuda', [
'difflogic/cuda/difflogic.cpp',
'difflogic/cuda/difflogic_kernel.cu',
], extra_compile_args={'nvcc': ['-lineinfo']})
]
else:
ext_modules = []


setup(
name='difflogic',
version='0.1.0',
Expand All @@ -25,11 +41,8 @@
],
package_dir={'difflogic': 'difflogic'},
packages=['difflogic'],
ext_modules=[CUDAExtension('difflogic_cuda', [
'difflogic/cuda/difflogic.cpp',
'difflogic/cuda/difflogic_kernel.cu',
], extra_compile_args={'nvcc': ['-lineinfo']})],
cmdclass={'build_ext': BuildExtension},
ext_modules=ext_modules,
cmdclass={'build_ext': BuildExtension} if ext_modules else {}, # Only if building extensions
python_requires='>=3.6',
install_requires=[
'torch>=1.6.0',
Expand Down