Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions quantization/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ edition = "2021"
[features]
dump_image = ["dep:image"]

[build-dependencies]
cc = "1.0"

[dependencies]
serde = { version = "~1.0", features = ["derive"] }
serde_json = "~1.0"
Expand Down
41 changes: 0 additions & 41 deletions quantization/build.rs

This file was deleted.

122 changes: 0 additions & 122 deletions quantization/cpp/avx2.c

This file was deleted.

107 changes: 0 additions & 107 deletions quantization/cpp/sse.c

This file was deleted.

6 changes: 1 addition & 5 deletions quantization/src/encoded_vectors_binary.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::encoded_vectors::validate_vector_parameters;
use crate::simd::sse2::xor_popcnt::impl_xor_popcnt_sse;
use crate::utils::{transmute_from_u8_to_slice, transmute_to_u8_slice};
use crate::{
DistanceType, EncodedStorage, EncodedStorageBuilder, EncodedVectors, EncodingError,
Expand Down Expand Up @@ -214,11 +215,6 @@ impl<TStorage: EncodedStorage> EncodedVectors<EncodedBinVector> for EncodedVecto
}
}

#[cfg(target_arch = "x86_64")]
extern "C" {
fn impl_xor_popcnt_sse(query_ptr: *const u64, vector_ptr: *const u64, count: u32) -> u32;
}

#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
extern "C" {
fn impl_xor_popcnt_neon(query_ptr: *const u64, vector_ptr: *const u64, count: u32) -> u32;
Expand Down
13 changes: 4 additions & 9 deletions quantization/src/encoded_vectors_u8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ use std::path::Path;

use crate::encoded_vectors::validate_vector_parameters;
use crate::quantile::{find_min_max_from_iter, find_quantile_interval};
use crate::simd::avx2::dot_u8::impl_score_dot_avx;
use crate::simd::avx2::manhattan_u8::impl_score_l1_avx;
use crate::simd::sse2::dot_u8::impl_score_dot_sse;
use crate::simd::sse2::manhattan_u8::impl_score_l1_sse;
use crate::{
encoded_storage::{EncodedStorage, EncodedStorageBuilder},
encoded_vectors::{DistanceType, EncodedVectors, VectorParameters},
Expand Down Expand Up @@ -473,15 +477,6 @@ fn impl_score_l1(q_ptr: *const u8, v_ptr: *const u8, actual_dim: usize) -> i32 {
}
}

#[cfg(target_arch = "x86_64")]
extern "C" {
fn impl_score_dot_avx(query_ptr: *const u8, vector_ptr: *const u8, dim: u32) -> f32;
fn impl_score_l1_avx(query_ptr: *const u8, vector_ptr: *const u8, dim: u32) -> f32;

fn impl_score_dot_sse(query_ptr: *const u8, vector_ptr: *const u8, dim: u32) -> f32;
fn impl_score_l1_sse(query_ptr: *const u8, vector_ptr: *const u8, dim: u32) -> f32;
}

#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
extern "C" {
fn impl_score_dot_neon(query_ptr: *const u8, vector_ptr: *const u8, dim: u32) -> f32;
Expand Down
1 change: 1 addition & 0 deletions quantization/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ pub mod encoded_vectors_pq;
pub mod encoded_vectors_u8;
pub mod kmeans;
pub mod quantile;
pub mod simd;
mod utils;

use std::fmt::Display;
Expand Down
40 changes: 40 additions & 0 deletions quantization/src/simd/avx2/dot_u8.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
use std::arch::x86_64::*;

use super::hsum256_epi32_avx;

#[target_feature(enable = "avx2")]
#[allow(clippy::missing_safety_doc)]
pub unsafe fn impl_score_dot_avx(query_ptr: *const u8, vector_ptr: *const u8, dim: u32) -> f32 {
let mut v_ptr = vector_ptr as *const __m256i;
let mut q_ptr = query_ptr as *const __m256i;

let mut mul1 = _mm256_setzero_si256();
let mask_epu32 = _mm256_set1_epi32(0xFFFF);
for _ in 0..dim / 32 {
let v = _mm256_loadu_si256(v_ptr);
let q = _mm256_loadu_si256(q_ptr);
v_ptr = v_ptr.add(1);
q_ptr = q_ptr.add(1);

let s = _mm256_maddubs_epi16(v, q);
let s_low = _mm256_cvtepi16_epi32(_mm256_castsi256_si128(s));
let s_high = _mm256_cvtepi16_epi32(_mm256_extractf128_si256(s, 1));
mul1 = _mm256_add_epi32(mul1, s_low);
mul1 = _mm256_add_epi32(mul1, s_high);
}

// the vector sizes are assumed to be multiples of 16, check if one last 16-element part remaining
if dim % 32 != 0 {
let v_short = _mm_loadu_si128(v_ptr as *const __m128i);
let q_short = _mm_loadu_si128(q_ptr as *const __m128i);

let v1 = _mm256_cvtepu8_epi16(v_short);
let q1 = _mm256_cvtepu8_epi16(q_short);

let s = _mm256_mullo_epi16(v1, q1);
mul1 = _mm256_add_epi32(mul1, _mm256_and_si256(s, mask_epu32));
mul1 = _mm256_add_epi32(mul1, _mm256_srli_epi32(s, 16));
}

hsum256_epi32_avx(mul1) as f32
}
Loading