Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 82 additions & 59 deletions bindings/python/chunk_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,12 @@ PYBIND11_MODULE(chunking_cpp, m) {

std::vector<std::vector<double>> nested_data;
nested_data.reserve(buf.shape[0]);
auto ptr = static_cast<double*>(buf.ptr);

for (py::ssize_t i = 0; i < buf.shape[0]; i++) {
std::vector<double> row(ptr + i * buf.shape[1], ptr + (i + 1) * buf.shape[1]);
nested_data.push_back(std::move(row));
for (size_t i = 0; i < buf.shape[0]; ++i) {
std::vector<double> row(
static_cast<double*>(buf.ptr) + i * buf.shape[1],
static_cast<double*>(buf.ptr) + (i + 1) * buf.shape[1]);
nested_data.push_back(row);
}
self.add(nested_data);
})
Expand All @@ -81,26 +82,21 @@ PYBIND11_MODULE(chunking_cpp, m) {
auto chunks = self.get_chunks();
py::list result;
for (const auto& chunk : chunks) {
py::list chunk_list;
for (const auto& row : chunk) {
chunk_list.append(py::array_t<double>(row.size(), row.data()));
// Convert each chunk to numpy array
ssize_t rows = chunk.size();
ssize_t cols = rows > 0 ? chunk[0].size() : 0;

auto array = py::array_t<double>({rows, cols});
auto buf = array.request();
double* ptr = static_cast<double*>(buf.ptr);

for (size_t i = 0; i < rows; ++i) {
std::copy(chunk[i].begin(), chunk[i].end(), ptr + i * cols);
}
result.append(chunk_list);
result.append(array);
}
return result;
})
.def("chunk_by_size", [](chunk_processing::Chunk<std::vector<double>>& self, size_t size) {
auto chunks = self.chunk_by_size(size);
py::list result;
for (const auto& chunk : chunks) {
py::list chunk_list;
for (const auto& row : chunk) {
chunk_list.append(py::array_t<double>(row.size(), row.data()));
}
result.append(chunk_list);
}
return result;
});
});

py::class_<chunk_processing::Chunk<std::vector<std::vector<double>>>>(m, "Chunk3D")
.def(py::init<size_t>())
Expand All @@ -114,19 +110,18 @@ PYBIND11_MODULE(chunking_cpp, m) {

std::vector<std::vector<std::vector<double>>> nested_data;
nested_data.reserve(buf.shape[0]);
auto ptr = static_cast<double*>(buf.ptr);

for (py::ssize_t i = 0; i < buf.shape[0]; i++) {
double* ptr = static_cast<double*>(buf.ptr);
for (size_t i = 0; i < buf.shape[0]; ++i) {
std::vector<std::vector<double>> matrix;
matrix.reserve(buf.shape[1]);

for (py::ssize_t j = 0; j < buf.shape[1]; j++) {
for (size_t j = 0; j < buf.shape[1]; ++j) {
std::vector<double> row(
ptr + (i * buf.shape[1] * buf.shape[2]) + (j * buf.shape[2]),
ptr + (i * buf.shape[1] * buf.shape[2]) + ((j + 1) * buf.shape[2]));
matrix.push_back(std::move(row));
matrix.push_back(row);
}
nested_data.push_back(std::move(matrix));
nested_data.push_back(matrix);
}
self.add(nested_data);
})
Expand All @@ -135,32 +130,24 @@ PYBIND11_MODULE(chunking_cpp, m) {
auto chunks = self.get_chunks();
py::list result;
for (const auto& chunk : chunks) {
py::list chunk_matrices;
for (const auto& matrix : chunk) {
py::list matrix_rows;
for (const auto& row : matrix) {
matrix_rows.append(py::array_t<double>(row.size(), row.data()));
// Convert each chunk to numpy array
if (chunk.empty() || chunk[0].empty()) continue;

ssize_t depth = chunk.size();
ssize_t rows = chunk[0].size();
ssize_t cols = chunk[0][0].size();

auto array = py::array_t<double>({depth, rows, cols});
auto buf = array.request();
double* ptr = static_cast<double*>(buf.ptr);

for (size_t i = 0; i < depth; ++i) {
for (size_t j = 0; j < rows; ++j) {
std::copy(chunk[i][j].begin(), chunk[i][j].end(),
ptr + (i * rows * cols) + (j * cols));
}
chunk_matrices.append(matrix_rows);
}
result.append(chunk_matrices);
}
return result;
})
.def("chunk_by_size",
[](chunk_processing::Chunk<std::vector<std::vector<double>>>& self, size_t size) {
auto chunks = self.chunk_by_size(size);
py::list result;
for (const auto& chunk : chunks) {
py::list chunk_matrices;
for (const auto& matrix : chunk) {
py::list matrix_rows;
for (const auto& row : matrix) {
matrix_rows.append(py::array_t<double>(row.size(), row.data()));
}
chunk_matrices.append(matrix_rows);
}
result.append(chunk_matrices);
result.append(array);
}
return result;
});
Expand All @@ -180,7 +167,20 @@ PYBIND11_MODULE(chunking_cpp, m) {
.def("get_window_size", &neural_chunking::NeuralChunking<double>::get_window_size)
.def("get_threshold", &neural_chunking::NeuralChunking<double>::get_threshold)
.def("set_window_size", &neural_chunking::NeuralChunking<double>::set_window_size)
.def("set_threshold", &neural_chunking::NeuralChunking<double>::set_threshold);
.def("set_threshold", &neural_chunking::NeuralChunking<double>::set_threshold)
.def("set_learning_rate", &neural_chunking::NeuralChunking<double>::set_learning_rate)
.def("get_learning_rate", &neural_chunking::NeuralChunking<double>::get_learning_rate)
.def("set_batch_size", &neural_chunking::NeuralChunking<double>::set_batch_size)
.def("get_batch_size", &neural_chunking::NeuralChunking<double>::get_batch_size)
.def("set_activation", &neural_chunking::NeuralChunking<double>::set_activation)
.def("get_activation", &neural_chunking::NeuralChunking<double>::get_activation)
.def("set_epochs", &neural_chunking::NeuralChunking<double>::set_epochs)
.def("get_epochs", &neural_chunking::NeuralChunking<double>::get_epochs)
.def("train", [](neural_chunking::NeuralChunking<double>& self,
const std::vector<double>& data) {
auto losses = self.train(data);
return py::array_t<double>(losses.size(), losses.data());
});

// GPU Chunking
#ifdef HAVE_CUDA
Expand All @@ -192,24 +192,45 @@ PYBIND11_MODULE(chunking_cpp, m) {
// Sophisticated Chunking
py::class_<sophisticated_chunking::WaveletChunking<double>>(m, "WaveletChunking")
.def(py::init<size_t, double>())
.def("chunk", &sophisticated_chunking::WaveletChunking<double>::chunk)
.def("chunk", [](sophisticated_chunking::WaveletChunking<double>& self,
const std::vector<double>& data) {
auto chunks = self.chunk(data);
py::list result;
for (const auto& chunk : chunks) {
result.append(py::array_t<double>(chunk.size(), chunk.data()));
}
return result;
})
.def("set_window_size", &sophisticated_chunking::WaveletChunking<double>::set_window_size)
.def("get_window_size", &sophisticated_chunking::WaveletChunking<double>::get_window_size)
.def("set_threshold", &sophisticated_chunking::WaveletChunking<double>::set_threshold)
.def("get_threshold", &sophisticated_chunking::WaveletChunking<double>::get_threshold);
.def("get_threshold", &sophisticated_chunking::WaveletChunking<double>::get_threshold)
.def("get_wavelet_type", &sophisticated_chunking::WaveletChunking<double>::get_wavelet_type)
.def("set_wavelet_type", &sophisticated_chunking::WaveletChunking<double>::set_wavelet_type);

py::class_<sophisticated_chunking::MutualInformationChunking<double>>(
m, "MutualInformationChunking")
.def(py::init<size_t, double>())
.def("chunk", &sophisticated_chunking::MutualInformationChunking<double>::chunk);
.def("chunk", [](sophisticated_chunking::MutualInformationChunking<double>& self,
const std::vector<double>& data) {
auto chunks = self.chunk(data);
py::list result;
for (const auto& chunk : chunks) {
// Convert each chunk to numpy array
result.append(py::array_t<double>(chunk.size(), chunk.data()));
}
return result;
});

py::class_<sophisticated_chunking::DTWChunking<double>>(m, "DTWChunking")
.def(py::init<size_t, double>())
.def(py::init<size_t, double>(), py::arg("window_size") = 10, py::arg("threshold") = 1.0)
.def("chunk", &sophisticated_chunking::DTWChunking<double>::chunk)
.def("set_window_size", &sophisticated_chunking::DTWChunking<double>::set_window_size)
.def("get_window_size", &sophisticated_chunking::DTWChunking<double>::get_window_size)
.def("get_dtw_threshold", &sophisticated_chunking::DTWChunking<double>::get_dtw_threshold)
.def("set_window_size", &sophisticated_chunking::DTWChunking<double>::set_window_size)
.def("set_dtw_threshold", &sophisticated_chunking::DTWChunking<double>::set_dtw_threshold)
.def("get_dtw_threshold", &sophisticated_chunking::DTWChunking<double>::get_dtw_threshold);
.def("get_distance_metric", &sophisticated_chunking::DTWChunking<double>::get_distance_metric)
.def("set_distance_metric", &sophisticated_chunking::DTWChunking<double>::set_distance_metric);

// Chunk Metrics
py::class_<chunk_metrics::ChunkQualityAnalyzer<double>>(m, "ChunkQualityAnalyzer")
Expand Down Expand Up @@ -277,7 +298,9 @@ PYBIND11_MODULE(chunking_cpp, m) {
.def_readwrite("strategy_name", &chunk_benchmark::BenchmarkResult::strategy_name);

py::class_<chunk_benchmark::ChunkBenchmark<double>>(m, "ChunkBenchmark")
.def(py::init<const std::vector<double>&, size_t>())
.def(py::init<const std::vector<double>&, size_t>(),
py::arg("data"),
py::arg("num_iterations") = 100)
.def("add_strategy", &chunk_benchmark::ChunkBenchmark<double>::add_strategy)
.def("benchmark_chunking", &chunk_benchmark::ChunkBenchmark<double>::benchmark_chunking)
.def("save_results", &chunk_benchmark::ChunkBenchmark<double>::save_results);
Expand Down
54 changes: 45 additions & 9 deletions include/chunk_visualization.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,20 +171,56 @@ class CHUNK_EXPORT ChunkVisualizer {
}

void export_to_graphviz(const std::string& filename = "chunks.dot") {
std::string actual_filename = output_dir + "/" + filename;
// Ensure the output directory exists
std::filesystem::create_directories(output_dir);

// Create full path
std::string actual_filename;
if (filename.find('/') != std::string::npos) {
// If filename contains a path, use it as is
actual_filename = filename;
} else {
// Otherwise, append to output_dir
actual_filename = output_dir + "/" + filename;
}

// Create the file
std::ofstream file(actual_filename);
if (!file) {
throw chunk_processing::VisualizationError("Failed to create GraphViz file");
if (!file.is_open()) {
throw chunk_processing::VisualizationError(
"Failed to create GraphViz file: " + actual_filename);
}

file << "digraph chunks {\n";
for (size_t i = 0; i < data.size(); ++i) {
file << " chunk" << i << " [label=\"Value: " << format_value(data[i]) << "\"];\n";
if (i > 0) {
file << " chunk" << (i - 1) << " -> chunk" << i << ";\n";
try {
file << "digraph chunks {\n";
for (size_t i = 0; i < data.size(); ++i) {
file << " chunk" << i << " [label=\"Value: " << format_value(data[i]) << "\"];\n";
if (i > 0) {
file << " chunk" << (i - 1) << " -> chunk" << i << ";\n";
}
}
file << "}\n";

// Ensure everything is written
file.flush();

if (file.fail()) {
throw chunk_processing::VisualizationError(
"Failed to write to GraphViz file: " + actual_filename);
}
} catch (const std::exception& e) {
throw chunk_processing::VisualizationError(
std::string("Error writing GraphViz file: ") + e.what());
}

file.close();

// Verify the file was created and has content
if (!std::filesystem::exists(actual_filename) ||
std::filesystem::file_size(actual_filename) == 0) {
throw chunk_processing::VisualizationError(
"GraphViz file was not created properly: " + actual_filename);
}
file << "}\n";
}

void visualize_boundaries() {
Expand Down
Loading
Loading