diff --git a/README.md b/README.md index e5a9a0b..0972a17 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,14 @@ pip install -e . pip install git+https://github.com/barhanc/fastmap.git ``` -# Benchmarking +# Benchmarks + +To benchmark and test the implemented algorithms you need additional packages: `pytest`, `mapel` and +`tqdm`. You can install those packages on your own or (if you chose to install the package from +source) use +```shell +pip install -e '.[testing]' +``` In the [example.py](/examples/example.py) file we provide a minimal reproducible example which generates a map of elections with 8 candidates and 96 voters that requires computing 46056 diff --git a/fastmap/kemeny.py b/fastmap/kemeny.py new file mode 100644 index 0000000..ef84377 --- /dev/null +++ b/fastmap/kemeny.py @@ -0,0 +1,163 @@ +import ctypes +import numpy as np + +from typing import Literal, Optional +from mapel.elections.objects.Election import Election + + +def swap_distance_between_potes( + pote_1: list[int], + pote_2: list[int], + method: Literal["bf", "ms", "pd"] = "bf", +) -> int: + """Computes the exact or approximated swap distance between two lists of positional votes (potes). + + This function calculates an approximation of the swap distance between two lists of positional votes + (potes). The swap distance is computed using different heuristics, which depend on the chosen `method`. + + NOTE: This function is a Python wrapper around a C extension. For implementation details, see the + 'fastmap._swap_distance_between_potes' module. + + Args: + pote_1: + A numpy array representing the first positional vote (pote_1). The array elements should + represent the rank positions of candidates in the votes. Shape (n,). + + pote_2: + A numpy array representing the second positional vote (pote_2), with the same shape as + `pote_1`. Each element should represent the rank position of the corresponding candidate in the vote. + + method: + The method used to calculate the swap distance. Should be one of the following: + + `"bf"` - Brute-force approach that calculates the swap distance by checking all possible swaps + between elements of `pote_1` and `pote_2`. This method works well for small lists but is computationally expensive for larger ones. + + `"pd"` - Positional difference method. This method approximates the swap distance by summing the + absolute differences in corresponding positions in `pote_1` and `pote_2`. It provides a fast + approximation for the swap distance but may not capture all the structural differences. + + `"ms"` - Merge-sort based approach that counts the number of inversions between `pote_1` and `pote_2`. + It is particularly efficient for larger lists but may not be the most precise for small lists. + + Returns: + int: + The calculated exact or approximated swap distance between `pote_1` and `pote_2` based on the selected `method`. + + Raises: + ImportError: + If the C extension for computing the swap distance cannot be imported or is not available. + + AssertionError: + If the input arrays do not meet the expected conditions (e.g., they are not numpy arrays, they have + different shapes, or the `method` is not one of the valid options). + """ + try: + import fastmap._swap_distance_between_potes + except ImportError as e: + raise ImportError("Error while importing C extension for computing Approx Swap Distance between Potes") from e + + methods: dict[str, int] = {"bf": 0, "pd": 1, "ms": 2} + + # assert isinstance(pote_1, np.ndarray) and isinstance(pote_2, np.ndarray), "Expected numpy arrays" + # assert pote_1.shape == pote_2.shape, "Expected arrays to have the same shape" + assert method in methods, "Expected one of available methods: 'bf', 'pd', 'ms'" + return fastmap._swap_distance_between_potes.swap_distance_between_potes(pote_1, pote_2, methods[method]) + + +def local_search_kKemeny_single_k( + election: Election, + k: int, + l: int, + starting: Optional[np.ndarray[int]] = None, +) -> dict[str, int]: + """Performs local search for k-Kemeny problem optimization using a single k. + + Args: + election: + Election object from mapof library. + k: + The number of candidates to consider in the local search. + l: + A parameter controlling the local search's stopping criteria. + starting: + An optional 1-D numpy array of integers indicating the starting ranking (default is None). + + Returns: + An integer representing the minimum distance between voters in the k-Kemeny problem optimization. + + Raises: + ImportError: Raises exception if C extension module for Kemeny local search is not found. + ValueError: If the input votes array has an incompatible shape or invalid data. + """ + + try: + import fastmap._local_search_kkemeny_single_k + except ImportError as e: + raise ImportError("Error while importing C extension for computing local search Kemeny") from e + + assert isinstance(election, Election), "Expected an Election object" + + if starting is not None: + assert isinstance(starting, np.ndarray), "Starting should be a numpy array" + assert starting.ndim == 1 and starting.size == k, "Starting array must be 1-D with size k" + assert starting.dtype == np.int32, "Expected starting array of dtype np.int32" + starting_ptr = starting.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) + else: + starting_ptr = list(range(election.num_candidates)) + result = fastmap._local_search_kkemeny_single_k.local_search_kkemeny_single_k( + election.votes, k, l, election.num_voters, election.num_candidates, starting_ptr + ) + + return {"value": result} + + +def simulated_annealing( + election: Election, + initial_temp: float = 50.0, + cooling_rate: float = 0.995, + max_iterations: int = 500, +) -> dict[str, list[int]]: + """ + Performs simulated annealing for Kemeny ranking optimization. + + Args: + rankings: + A 2D list of integers representing rankings for the election. + n: + The number of items (candidates) in each ranking. + initial_temp: + The initial temperature for the simulated annealing process. + cooling_rate: + The rate at which the temperature is reduced during annealing. + max_iterations: + The maximum number of iterations for the annealing process. + + Returns: + An integer representing the minimum distance between voters in the Kemeny problem optimization. + + Raises: + ImportError: If the C extension module for simulated annealing is not found. + ValueError: If input rankings are invalid or improperly formatted. + + Notes: + - This function wraps a C extension to optimize performance. + - Rankings should be a complete and valid 2D list of integers. + """ + try: + import fastmap._simulated_annealing + except ImportError as e: + raise ImportError("C extension for simulated annealing could not be imported.") from e + + assert isinstance(election, Election), "Expected an Election object" + + # Call the C extension function + try: + best_ranking = fastmap._simulated_annealing.simulated_annealing( + election.votes, election.num_candidates, initial_temp, cooling_rate, max_iterations + ) + except Exception as e: + raise ValueError("Error during simulated annealing process.") from e + + # Calculate Kemeny distance for the best ranking + return {"value": best_ranking} diff --git a/fastmap/kkemeny/kemeny.h b/fastmap/kkemeny/kemeny.h new file mode 100644 index 0000000..9cb11e0 --- /dev/null +++ b/fastmap/kkemeny/kemeny.h @@ -0,0 +1,1126 @@ +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include +#include +#include + + +static inline void create_pairwise_matrix(int num_voters, int num_candidates, int **votes, double **matrix) { + for (int i = 0; i < num_candidates; i++) { + for (int j = 0; j < num_candidates; j++) { + matrix[i][j] = 0.0; + } + } + + for (int v = 0; v < num_voters; v++) { + for (int c1 = 0; c1 < num_candidates; c1++) { + for (int c2 = c1 + 1; c2 < num_candidates; c2++) { + int candidate1 = votes[v][c1]; + int candidate2 = votes[v][c2]; + matrix[candidate1][candidate2] += 1.0; + } + } + } + + for (int i = 0; i < num_candidates; i++) { + for (int j = i + 1; j < num_candidates; j++) { + matrix[i][j] /= num_voters; + matrix[j][i] = 1.0 - matrix[i][j]; + } + } +} + +static inline void calculate_cand_dom_dist(int **votes, int num_voters, int num_candidates, double **distances) { + + double **pairwise_matrix = (double **)malloc(num_candidates * sizeof(double *)); + for (int i = 0; i < num_candidates; i++) { + pairwise_matrix[i] = (double *)malloc(num_candidates * sizeof(double)); + } + + create_pairwise_matrix(num_voters, num_candidates, votes, pairwise_matrix); + + for (int i = 0; i < num_candidates; i++) { + for (int j = 0; j < num_candidates; j++) { + if (i != j) { + distances[i][j] = fabs(pairwise_matrix[i][j] - 0.5); + } else { + distances[i][j] = 0.0; + } + } + } + + for (int i = 0; i < num_candidates; i++) { + free(pairwise_matrix[i]); + } + free(pairwise_matrix); +} + +static inline double calculate_distance(int *arr, double **wmg, int m) { + double dist = 0; + for (int i = 0; i < m; i++) { + for (int j = i + 1; j < m; j++) { + dist += wmg[arr[j]][arr[i]]; + } + } + return dist; +} + +static inline void swap(int *a, int *b) { + int temp = *a; + *a = *b; + *b = temp; +} + +static inline void generate_next_permutation(int *arr, int n, int pos) { + int i; + for (i = n - 1; i > pos; i--) { + if (arr[i] > arr[pos]) { + break; + } + } + swap(&arr[pos], &arr[i]); +} + +static inline void kemeny_ranking(int **votes, int num_voters, int num_candidates, int *best, double *best_d) { + double **matrix = (double **)malloc(num_candidates * sizeof(double *)); + for (int i = 0; i < num_candidates; i++) { + matrix[i] = (double *)malloc(num_candidates * sizeof(double)); + } + + create_pairwise_matrix(num_voters, num_candidates, votes, matrix); + int *arr = (int *)malloc(num_candidates * sizeof(int)); + for (int i = 0; i < num_candidates; i++) { + arr[i] = i; + } + + *best_d = INFINITY; + + while (1) { + double dist = calculate_distance(arr, matrix, num_candidates); + + if (dist < *best_d) { + for (int i = 0; i < num_candidates; i++) { + best[i] = arr[i]; + } + *best_d = dist; + } + + int pos = -1; + for (int i = num_candidates - 2; i >= 0; i--) { + if (arr[i] < arr[i + 1]) { + pos = i; + break; + } + } + + if (pos == -1) { + break; + } + + generate_next_permutation(arr, num_candidates, pos); + + int left = pos + 1, right = num_candidates - 1; + while (left < right) { + swap(&arr[left], &arr[right]); + left++; + right--; + } + } + + for (int i = 0; i < num_candidates; i++) { + free(matrix[i]); + } + free(matrix); + free(arr); +} + +int swap_distance_between_votes(int *v1, int *v2, int n) { + int swap_distance = 0; + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + if ((v1[i] > v1[j] && v2[i] < v2[j]) || (v1[i] < v1[j] && v2[i] > v2[j])) { + swap_distance++; + } + } + } + return swap_distance; +} + +void compute_potes(int **votes, int votes_num, int n, int **potes) { + for (int v = 0; v < votes_num; v++) { + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + if (votes[v][j] == i) { + potes[v][i] = j; + break; + } + } + } + } +} + +void calculate_vote_swap_dist(int **votes, int votes_num, int **distances, int n) { + int **potes = (int **)malloc(votes_num * sizeof(int *)); + for (int i = 0; i < votes_num; i++) { + potes[i] = (int *)malloc(n * sizeof(int)); + } + + compute_potes(votes, votes_num, n, potes); + + for (int v1 = 0; v1 < votes_num; v1++) { + for (int v2 = 0; v2 < votes_num; v2++) { + distances[v1][v2] = swap_distance_between_votes(potes[v1], potes[v2], n); + } + } + + for (int i = 0; i < votes_num; i++) { + free(potes[i]); + } + free(potes); +} + +static inline int distances_to_rankings(int *rankings, int **distances, int k, int votes_num) { + int sum = 0; + for (int i = 0; i < votes_num; i++) { + int min_distance = distances[rankings[0]][i]; + for (int j = 1; j < k; j++) { + if (distances[rankings[j]][i] < min_distance) { + min_distance = distances[rankings[j]][i]; + } + } + sum += min_distance; + } + return sum; +} + +static inline bool find_improvement(int **distances, int *d, int *starting, int *rest, int votes_num, int k, int l) { + int *ranks = (int *)malloc(k * sizeof(int)); + bool found_improvement = false; + + for (int i = 0; i < (1 << k); i++) { + if (__builtin_popcount(i) != l) continue; + + int cut[l]; + int cut_index = 0; + for (int j = 0; j < k; j++) { + if ((i & (1 << j)) != 0) { + cut[cut_index++] = j; + } + } + + for (int m = 0; m < (1 << (votes_num - k)); m++) { + if (__builtin_popcount(m) != l) continue; + + int paste[l]; + int paste_index = 0; + for (int n = 0; n < (votes_num - k); n++) { + if ((m & (1 << n)) != 0) { + paste[paste_index++] = rest[n]; + } + } + + int j = 0; + for (int i = 0; i < k; i++) { + if (j < l && cut[j] == i) { + ranks[i] = paste[j++]; + } else { + ranks[i] = starting[i]; + } + } + + bool valid = true; + for (int a = 0; a < k; a++) { + for (int b = a + 1; b < k; b++) { + if (ranks[a] == ranks[b]) { + valid = false; + break; + } + } + if (!valid) break; + } + + if (valid) { + int d_new = distances_to_rankings(ranks, distances, k, votes_num); + if (*d > d_new) { + *d = d_new; + for (int a = 0; a < k; a++) { + starting[a] = ranks[a]; + } + found_improvement = true; + break; + } + } + } + + if (found_improvement) break; + } + + free(ranks); + return found_improvement; +} + +static inline void restore_order(int *x, int len) { + for (int i = 0; i < len; i++) { + for (int j = len - i; j < len; j++) { + if (x[j] >= x[len - i - 1]) { + x[j] += 1; + } + } + } +} + +static inline int local_search_kKemeny_single_k(int **votes, int k, int l, int votes_num, int cand_num, int *starting) { + bool starting_allocated = false; + if (!starting) { + starting = (int *)malloc(k * sizeof(int)); + starting_allocated = true; + for (int i = 0; i < k; i++) { + starting[i] = i; + } + } + + // Allocate distances + int **distances = (int **)malloc(votes_num * sizeof(int *)); + for (int i = 0; i < votes_num; i++) { + distances[i] = (int *)malloc(votes_num * sizeof(int)); + } + + // Calculate distances + calculate_vote_swap_dist(votes, votes_num, distances, cand_num); + + // Initial distance computation + int d = distances_to_rankings(starting, distances, k, votes_num); + + bool check = true; + while (check) { + // Allocate rest array + int *rest = (int *)malloc((votes_num - k) * sizeof(int)); + int rest_index = 0; + + for (int i = 0; i < votes_num; i++) { + bool is_starting = false; + for (int j = 0; j < k; j++) { + if (starting[j] == i) { + is_starting = true; + break; + } + } + if (!is_starting) { + rest[rest_index++] = i; + } + } + + check = find_improvement(distances, &d, starting, rest, votes_num, k, l); + + free(rest); // Free rest array + } + + // Free distances array + for (int i = 0; i < votes_num; i++) { + free(distances[i]); + } + free(distances); + + if (starting_allocated) { + free(starting); + } + + return d; +} + +double *local_search_kKemeny(int **votes, int num_voters, int num_candidates, int l, int *starting) { + double max_dist = num_candidates * (num_candidates - 1) / 2.0; + double *res = (double *)malloc(num_voters * sizeof(double)); + if (res == NULL) { + printf("Memory allocation failed\n"); + return NULL; + } + + int filled_count = 0; + for (int k = 1; k < num_voters; k++) { // Loop from 1 to num_voters - 1 + double d; + if (starting == NULL) { + d = local_search_kKemeny_single_k(votes, k, l, num_voters, num_candidates, NULL) / (max_dist * num_voters); + } else { + int *starting_k = (int *)malloc(k * sizeof(int)); + if (starting_k == NULL) { + printf("Memory allocation failed\n"); + free(res); + return NULL; + } + + for (int i = 0; i < k; i++) { + starting_k[i] = starting[i]; + } + + d = local_search_kKemeny_single_k(votes, k, l, num_voters, num_candidates, starting_k) / (max_dist * num_voters); + free(starting_k); + } + + if (d > 0) { + res[k - 1] = d; + filled_count++; + } else { + break; + } + } + + // Fill remaining entries in `res` with 0 if any + for (int k = filled_count; k < num_voters; k++) { + res[k] = 0.0; + } + + return res; +} + + +static inline void update_indexes(int* indexes, int index, int no_start_indexes) { + for (int i = index; i < no_start_indexes-1; i++) { + indexes[i] = indexes[i+1]; + } +} + +static inline double polarization_index(int **votes, int num_voters, int num_candidates) { + int **distances = (int **)malloc(num_voters * sizeof(int *)); + for (int i = 0; i < num_voters; i++) { + distances[i] = (int *)malloc(num_voters * sizeof(int)); + } + calculate_vote_swap_dist(votes, num_voters, distances, num_candidates); + + int best_1 = 0; + int min_sum = INT_MAX; + for (int i = 0; i < num_voters; i++) { + int sum = 0; + for (int j = 0; j < num_voters; j++) { + sum += distances[i][j]; + } + if (sum < min_sum) { + min_sum = sum; + best_1 = i; + } + } + + int *best_vec = distances[best_1]; + int first_kemeny = 0; + for (int i = 0; i < num_voters; i++) { + first_kemeny += best_vec[i]; + } + + int **new_distances = (int **)malloc((num_voters - 1) * sizeof(int *)); + for (int i = 0, k = 0; i < num_voters; i++) { + if (i == best_1) continue; + new_distances[k] = (int *)malloc(num_voters * sizeof(int)); + memcpy(new_distances[k], distances[i], num_voters * sizeof(int)); + k++; + } + + int **relatives = (int **)malloc((num_voters - 1) * sizeof(int *)); + for (int i = 0; i < num_voters - 1; i++) { + relatives[i] = (int *)malloc(num_voters * sizeof(int)); + for (int j = 0; j < num_voters; j++) { + relatives[i][j] = new_distances[i][j] - best_vec[j]; + if (relatives[i][j] >= 0) { + relatives[i][j] = 0; + } + } + } + + int best_2 = 0; + min_sum = INT_MAX; + for (int i = 0; i < num_voters - 1; i++) { + int sum = 0; + for (int j = 0; j < num_voters; j++) { + sum += relatives[i][j]; + } + if (sum < min_sum) { + min_sum = sum; + best_2 = i; + } + } + + if (best_2 >= best_1) { + best_2 += 1; + } + + int chosen[2] = {best_1, best_2}; + if (chosen[0] > chosen[1]) { + int temp = chosen[0]; + chosen[0] = chosen[1]; + chosen[1] = temp; + } + + int second_kemeny_1 = local_search_kKemeny_single_k(votes, 2, 1, num_voters, num_candidates, chosen); + int second_kemeny_2 = local_search_kKemeny_single_k(votes, 2, 1, num_voters, num_candidates, NULL); + int second_kemeny = second_kemeny_1 < second_kemeny_2 ? second_kemeny_1 : second_kemeny_2; + + double max_dist = (num_candidates) * (num_candidates - 1) / 2.0; + double value = 2 * (first_kemeny - second_kemeny) / (double)(num_voters) / max_dist; + + for (int i = 0; i < num_voters; i++) { + if (distances[i] != NULL) free(distances[i]); + } + free(distances); + + for (int i = 0; i < num_voters - 1; i++) { + free(new_distances[i]); + } + free(new_distances); + + for (int i = 0; i < num_voters - 1; i++) { + free(relatives[i]); + } + free(relatives); + + return value; +} + +static inline double diversity_index(int** votes, int num_voters, int num_candidates) { + int max_dist = num_candidates * (num_candidates - 1) / 2; + double* res = (double*)malloc(num_voters * sizeof(double)); + int** distances = (int**)malloc(num_voters * sizeof(int*)); + for (int i = 0; i < num_voters; ++i) { + distances[i] = (int*)malloc(num_voters * sizeof(int)); + } + + calculate_vote_swap_dist(votes, num_voters, distances, num_candidates); + + int* chosen_votes = (int*)malloc(num_voters * sizeof(int)); + int* indexes_left = (int*)malloc(num_voters * sizeof(int)); + + for (int i = 0; i < num_voters; i++) { + indexes_left[i] = i; + } + + double* dist_array = (double*)malloc(num_voters * sizeof(double)); + for (int i = 0; i < num_voters; ++i) { + dist_array[i] = 0; + for (int j = 0; j < num_voters; ++j) { + dist_array[i] += distances[i][j]; + } + } + + int best = 0; + for (int i = 1; i < num_voters; ++i) { + if (dist_array[i] < dist_array[best]) { + best = i; + } + } + + chosen_votes[0] = best; + update_indexes(indexes_left, best, num_voters); + double* best_vec = (double*)malloc(num_voters * sizeof(double)); + for (int i = 0; i < num_voters; ++i) { + best_vec[i] = distances[best][i]; + } + + res[0] = 0; + for (int i = 0; i < num_voters; ++i) { + res[0] += best_vec[i]; + } + res[0] = res[0] / max_dist / num_voters; + + free(dist_array); + + for (int i = 1; i < num_voters; ++i) { + int** relatives = (int**)malloc((num_voters - i) * sizeof(int*)); + for (int j = 0; j < num_voters - i; ++j) { + relatives[j] = (int*)malloc(num_voters * sizeof(int)); + } + + for (int j = 0; j < num_voters - i; ++j) { + for (int k = 0; k < num_voters; ++k) { + relatives[j][k] = distances[indexes_left[j]][k] - best_vec[k]; + if (relatives[j][k] > 0) relatives[j][k] = 0; + } + } + + int* dist_new = (int*)malloc((num_voters - i) * sizeof(int)); + for (int j = 0; j < num_voters - i; ++j) { + dist_new[j] = 0; + for (int k = 0; k < num_voters; ++k) { + dist_new[j] += relatives[j][k]; + } + } + + best = 0; + for (int j = 1; j < num_voters - i; ++j) { + if (dist_new[j] < dist_new[best]) { + best = j; + } + } + chosen_votes[i] = best; + update_indexes(indexes_left, best, num_voters); + for (int j = 0; j < num_voters; ++j) { + best_vec[j] += relatives[best][j]; + } + + res[i] = 0; + for (int j = 0; j < num_voters; ++j) { + res[i] += best_vec[j]; + } + res[i] = res[i] / max_dist / num_voters; + + free(dist_new); + for (int j = 0; j < num_voters - i; ++j) { + free(relatives[j]); + } + free(relatives); + } + + restore_order(chosen_votes, num_voters); + + double *res_1 = local_search_kKemeny(votes, num_voters, num_candidates, 1, chosen_votes); + double *res_2 = local_search_kKemeny(votes, num_voters, num_candidates, 1, NULL); + + double *min_values = (double *)malloc(num_voters * sizeof(double)); + if (min_values == NULL) { + free(res); + free(chosen_votes); + for (int i = 0; i < num_voters; i++) { + free(distances[i]); + } + free(distances); + free(res_1); + free(res_2); + } + for (int i = 0; i < num_voters; i++) { + min_values[i] = res_1[i] < res_2[i] ? res_1[i] : res_2[i]; + } + + double diversity_index_value = 0; + for (int i = 0; i < num_voters; i++) { + diversity_index_value += min_values[i] / (i + 1); + } + + free(best_vec); + free(chosen_votes); + free(indexes_left); + for (int i = 0; i < num_voters; ++i) { + free(distances[i]); + } + free(distances); + free(res); + free(min_values); + free(res_1); + free(res_2); + + return diversity_index_value; +} + +static inline double polarization_1by2Kemenys(int** votes, int num_voters, int num_candidates) { + double res = 0.0; + int* indexes_left = (int*)malloc(num_voters * sizeof(int)); + for (int i = 0; i < num_voters; i++) { + indexes_left[i] = i; + } + + int** distances = (int**)malloc(num_voters * sizeof(int*)); + for (int i = 0; i < num_voters; ++i) { + distances[i] = (int*)malloc(num_voters * sizeof(int)); + } + + calculate_vote_swap_dist(votes, num_voters, distances, num_candidates); + + double* dist_array = (double*)malloc(num_voters * sizeof(double)); + for (int i = 0; i < num_voters; ++i) { + dist_array[i] = 0; + for (int j = 0; j < num_voters; ++j) { + dist_array[i] += distances[i][j]; + } + } + + int best = 0; + for (int i = 1; i < num_voters; ++i) { + if (dist_array[i] < dist_array[best]) { + best = i; + } + } + + double* best_vec = (double*)malloc(num_voters * sizeof(double)); + for (int i = 0; i < num_voters; ++i) { + best_vec[i] = distances[best][i]; + } + + for (int i = 0; i < num_voters; ++i) { + res += best_vec[i]; + } + + double first_kemeny = res; + + update_indexes(indexes_left, best, num_voters); + free(dist_array); + + int** relatives = (int**)malloc((num_voters - 1) * sizeof(int*)); + for (int j = 0; j < num_voters - 1; ++j) { + relatives[j] = (int*)malloc(num_voters * sizeof(int)); + } + + for (int j = 0; j < num_voters - 1; ++j) { + for (int k = 0; k < num_voters; ++k) { + relatives[j][k] = distances[indexes_left[j]][k] - best_vec[k]; + if (relatives[j][k] > 0) relatives[j][k] = 0; + } + } + + int* dist_new = (int*)malloc((num_voters - 1) * sizeof(int)); + for (int j = 0; j < num_voters - 1; ++j) { + dist_new[j] = 0; + for (int k = 0; k < num_voters; ++k) { + dist_new[j] += relatives[j][k]; + } + } + + best = 0; + for (int j = 1; j < num_voters - 1; ++j) { + if (dist_new[j] < dist_new[best]) { + best = j; + } + } + update_indexes(indexes_left, best, num_voters); + for (int j = 0; j < num_voters; ++j) { + best_vec[j] += relatives[best][j]; + } + + res = 0.0; + for (int j = 0; j < num_voters; ++j) { + res += best_vec[j]; + } + double second_kemeny = res; + + free(dist_new); + for (int j = 0; j < num_voters - 1; ++j) { + free(relatives[j]); + } + free(relatives); + + free(best_vec); + free(indexes_left); + for (int i = 0; i < num_voters; ++i) { + free(distances[i]); + } + free(distances); + + int max_dist = num_candidates * (num_candidates - 1) / 2; + + return (first_kemeny - second_kemeny) / num_voters / max_dist; +} + +static inline double greedy_kmeans_summed(int** votes, int num_voters, int num_candidates) { + double* res = (double*)malloc(num_voters * sizeof(double)); + double sum = 0.0; + int* indexes_left = (int*)malloc(num_voters * sizeof(int)); + for (int i = 0; i < num_voters; i++) { + indexes_left[i] = i; + } + + int** distances = (int**)malloc(num_voters * sizeof(int*)); + for (int i = 0; i < num_voters; ++i) { + distances[i] = (int*)malloc(num_voters * sizeof(int)); + } + + calculate_vote_swap_dist(votes, num_voters, distances, num_candidates); + for (int i = 0; i < num_voters; i++) { + for (int j = 0; j < num_voters; j++) { + distances[i][j] = distances[i][j] * distances[i][j]; + } + } + + double* dist_array = (double*)malloc(num_voters * sizeof(double)); + for (int i = 0; i < num_voters; ++i) { + dist_array[i] = 0; + for (int j = 0; j < num_voters; ++j) { + dist_array[i] += distances[i][j]; + } + } + + int best = 0; + for (int i = 1; i < num_voters; ++i) { + if (dist_array[i] < dist_array[best]) { + best = i; + } + } + + double* best_vec = (double*)malloc(num_voters * sizeof(double)); + for (int i = 0; i < num_voters; ++i) { + best_vec[i] = distances[best][i]; + } + + res[0] = 0; + for (int i = 0; i < num_voters; ++i) { + res[0] += best_vec[i]; + } + + sum += res[0]; + + update_indexes(indexes_left, best, num_voters); + free(dist_array); + + for (int i = 1; i < num_voters; ++i) { + int** relatives = (int**)malloc((num_voters - i) * sizeof(int*)); + for (int j = 0; j < num_voters - i; ++j) { + relatives[j] = (int*)malloc(num_voters * sizeof(int)); + } + + for (int j = 0; j < num_voters - i; ++j) { + for (int k = 0; k < num_voters; ++k) { + relatives[j][k] = distances[indexes_left[j]][k] - best_vec[k]; + if (relatives[j][k] > 0) relatives[j][k] = 0; + } + } + + int* dist_new = (int*)malloc((num_voters - i) * sizeof(int)); + for (int j = 0; j < num_voters - i; ++j) { + dist_new[j] = 0; + for (int k = 0; k < num_voters; ++k) { + dist_new[j] += relatives[j][k]; + } + } + + best = 0; + for (int j = 1; j < num_voters - i; ++j) { + if (dist_new[j] < dist_new[best]) { + best = j; + } + } + update_indexes(indexes_left, best, num_voters); + for (int j = 0; j < num_voters; ++j) { + best_vec[j] += relatives[best][j]; + } + + res[i] = 0.0; + for (int j = 0; j < num_voters; ++j) { + res[i] += best_vec[j]; + } + sum += res[i]; + + free(dist_new); + for (int j = 0; j < num_voters - i; ++j) { + free(relatives[j]); + } + free(relatives); + } + + free(best_vec); + free(indexes_left); + for (int i = 0; i < num_voters; ++i) { + free(distances[i]); + } + free(distances); + free(res); + + return sum; +} + +static inline double greedy_kKemenys_summed(int** votes, int num_voters, int num_candidates) { + double* res = (double*)malloc(num_voters * sizeof(double)); + double sum = 0.0; + int* indexes_left = (int*)malloc(num_voters * sizeof(int)); + for (int i = 0; i < num_voters; i++) { + indexes_left[i] = i; + } + + int** distances = (int**)malloc(num_voters * sizeof(int*)); + for (int i = 0; i < num_voters; ++i) { + distances[i] = (int*)malloc(num_voters * sizeof(int)); + } + + calculate_vote_swap_dist(votes, num_voters, distances, num_candidates); + + double* dist_array = (double*)malloc(num_voters * sizeof(double)); + for (int i = 0; i < num_voters; ++i) { + dist_array[i] = 0; + for (int j = 0; j < num_voters; ++j) { + dist_array[i] += distances[i][j]; + } + } + + int best = 0; + for (int i = 1; i < num_voters; ++i) { + if (dist_array[i] < dist_array[best]) { + best = i; + } + } + + double* best_vec = (double*)malloc(num_voters * sizeof(double)); + for (int i = 0; i < num_voters; ++i) { + best_vec[i] = distances[best][i]; + } + + res[0] = 0; + for (int i = 0; i < num_voters; ++i) { + res[0] += best_vec[i]; + } + + sum += res[0]; + + update_indexes(indexes_left, best, num_voters); + free(dist_array); + + for (int i = 1; i < num_voters; ++i) { + int** relatives = (int**)malloc((num_voters - i) * sizeof(int*)); + for (int j = 0; j < num_voters - i; ++j) { + relatives[j] = (int*)malloc(num_voters * sizeof(int)); + } + + for (int j = 0; j < num_voters - i; ++j) { + for (int k = 0; k < num_voters; ++k) { + relatives[j][k] = distances[indexes_left[j]][k] - best_vec[k]; + if (relatives[j][k] > 0) relatives[j][k] = 0; + } + } + + int* dist_new = (int*)malloc((num_voters - i) * sizeof(int)); + for (int j = 0; j < num_voters - i; ++j) { + dist_new[j] = 0; + for (int k = 0; k < num_voters; ++k) { + dist_new[j] += relatives[j][k]; + } + } + + best = 0; + for (int j = 1; j < num_voters - i; ++j) { + if (dist_new[j] < dist_new[best]) { + best = j; + } + } + update_indexes(indexes_left, best, num_voters); + for (int j = 0; j < num_voters; ++j) { + best_vec[j] += relatives[best][j]; + } + + res[i] = 0.0; + for (int j = 0; j < num_voters; ++j) { + res[i] += best_vec[j]; + } + sum += res[i]; + + free(dist_new); + for (int j = 0; j < num_voters - i; ++j) { + free(relatives[j]); + } + free(relatives); + } + + free(best_vec); + free(indexes_left); + for (int i = 0; i < num_voters; ++i) { + free(distances[i]); + } + free(distances); + free(res); + + return sum; +} + +static inline double greedy_kKemenys_divk_summed(int** votes, int num_voters, int num_candidates) { + double* res = (double*)malloc(num_voters * sizeof(double)); + double sum = 0.0; + int* indexes_left = (int*)malloc(num_voters * sizeof(int)); + for (int i = 0; i < num_voters; i++) { + indexes_left[i] = i; + } + + int** distances = (int**)malloc(num_voters * sizeof(int*)); + for (int i = 0; i < num_voters; ++i) { + distances[i] = (int*)malloc(num_voters * sizeof(int)); + } + + calculate_vote_swap_dist(votes, num_voters, distances, num_candidates); + + double* dist_array = (double*)malloc(num_voters * sizeof(double)); + for (int i = 0; i < num_voters; ++i) { + dist_array[i] = 0; + for (int j = 0; j < num_voters; ++j) { + dist_array[i] += distances[i][j]; + } + } + + int best = 0; + for (int i = 1; i < num_voters; ++i) { + if (dist_array[i] < dist_array[best]) { + best = i; + } + } + + double* best_vec = (double*)malloc(num_voters * sizeof(double)); + for (int i = 0; i < num_voters; ++i) { + best_vec[i] = distances[best][i]; + } + + res[0] = 0; + for (int i = 0; i < num_voters; ++i) { + res[0] += best_vec[i]; + } + + sum += res[0]; + + update_indexes(indexes_left, best, num_voters); + free(dist_array); + + for (int i = 1; i < num_voters; ++i) { + int** relatives = (int**)malloc((num_voters - i) * sizeof(int*)); + for (int j = 0; j < num_voters - i; ++j) { + relatives[j] = (int*)malloc(num_voters * sizeof(int)); + } + + for (int j = 0; j < num_voters - i; ++j) { + for (int k = 0; k < num_voters; ++k) { + relatives[j][k] = distances[indexes_left[j]][k] - best_vec[k]; + if (relatives[j][k] > 0) relatives[j][k] = 0; + } + } + + int* dist_new = (int*)malloc((num_voters - i) * sizeof(int)); + for (int j = 0; j < num_voters - i; ++j) { + dist_new[j] = 0; + for (int k = 0; k < num_voters; ++k) { + dist_new[j] += relatives[j][k]; + } + } + + best = 0; + for (int j = 1; j < num_voters - i; ++j) { + if (dist_new[j] < dist_new[best]) { + best = j; + } + } + update_indexes(indexes_left, best, num_voters); + for (int j = 0; j < num_voters; ++j) { + best_vec[j] += relatives[best][j]; + } + + res[i] = 0.0; + for (int j = 0; j < num_voters; ++j) { + res[i] += best_vec[j]; + } + sum += res[i] / (i + 1); + + free(dist_new); + for (int j = 0; j < num_voters - i; ++j) { + free(relatives[j]); + } + free(relatives); + } + + free(best_vec); + free(indexes_left); + for (int i = 0; i < num_voters; ++i) { + free(distances[i]); + } + free(distances); + free(res); + int max_dist = num_candidates * (num_candidates - 1) / 2; + return sum / num_voters / max_dist; +} + + +static inline double greedy_2kKemenys_summed(int** votes, int num_voters, int num_candidates) { + double* res = (double*)malloc(num_voters * sizeof(double)); + double sum = 0.0; + int* indexes_left = (int*)malloc(num_voters * sizeof(int)); + for (int i = 0; i < num_voters; i++) { + indexes_left[i] = i; + } + + int** distances = (int**)malloc(num_voters * sizeof(int*)); + for (int i = 0; i < num_voters; ++i) { + distances[i] = (int*)malloc(num_voters * sizeof(int)); + } + + calculate_vote_swap_dist(votes, num_voters, distances, num_candidates); + + double* dist_array = (double*)malloc(num_voters * sizeof(double)); + for (int i = 0; i < num_voters; ++i) { + dist_array[i] = 0; + for (int j = 0; j < num_voters; ++j) { + dist_array[i] += distances[i][j]; + } + } + + int best = 0; + for (int i = 1; i < num_voters; ++i) { + if (dist_array[i] < dist_array[best]) { + best = i; + } + } + + double* best_vec = (double*)malloc(num_voters * sizeof(double)); + for (int i = 0; i < num_voters; ++i) { + best_vec[i] = distances[best][i]; + } + + res[0] = 0; + for (int i = 0; i < num_voters; ++i) { + res[0] += best_vec[i]; + } + + sum += res[0]; + + update_indexes(indexes_left, best, num_voters); + free(dist_array); + + int k = 2; + + for (int i = 1; i < num_voters; ++i) { + int** relatives = (int**)malloc((num_voters - i) * sizeof(int*)); + for (int j = 0; j < num_voters - i; ++j) { + relatives[j] = (int*)malloc(num_voters * sizeof(int)); + } + + for (int j = 0; j < num_voters - i; ++j) { + for (int k = 0; k < num_voters; ++k) { + relatives[j][k] = distances[indexes_left[j]][k] - best_vec[k]; + if (relatives[j][k] > 0) relatives[j][k] = 0; + } + } + + int* dist_new = (int*)malloc((num_voters - i) * sizeof(int)); + for (int j = 0; j < num_voters - i; ++j) { + dist_new[j] = 0; + for (int k = 0; k < num_voters; ++k) { + dist_new[j] += relatives[j][k]; + } + } + + best = 0; + for (int j = 1; j < num_voters - i; ++j) { + if (dist_new[j] < dist_new[best]) { + best = j; + } + } + update_indexes(indexes_left, best, num_voters); + for (int j = 0; j < num_voters; ++j) { + best_vec[j] += relatives[best][j]; + } + + if (i + 1 == k) { + res[i] = 0.0; + for (int j = 0; j < num_voters; ++j) { + res[i] += best_vec[j]; + } + sum += res[i]; + k = k * 2; + } + + free(dist_new); + for (int j = 0; j < num_voters - i; ++j) { + free(relatives[j]); + } + free(relatives); + } + + free(best_vec); + free(indexes_left); + for (int i = 0; i < num_voters; ++i) { + free(distances[i]); + } + free(distances); + free(res); + int max_dist = num_candidates * (num_candidates - 1) / 2; + return sum / num_voters / max_dist / 2; +} + +#endif // UTILS_H diff --git a/fastmap/kkemeny/local_search_kkemeny_single_k.c b/fastmap/kkemeny/local_search_kkemeny_single_k.c new file mode 100644 index 0000000..35006b3 --- /dev/null +++ b/fastmap/kkemeny/local_search_kkemeny_single_k.c @@ -0,0 +1,70 @@ +#include +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include +#include +#include "kemeny.h" + +static PyObject* py_local_search_kkemeny_single_k(PyObject* self, PyObject* args) { + PyObject* votes_obj; + int k, l, votes_num, cand_num; + PyObject* starting_obj = NULL; + + // Parse the input tuple (expecting a numpy array and integers) + if (!PyArg_ParseTuple(args, "Oiiii|O", &votes_obj, &k, &l, &votes_num, &cand_num, &starting_obj)) { + return NULL; + } + + // Convert the input PyObject to a numpy array for votes + PyArrayObject* votes_array = (PyArrayObject*)PyArray_FROM_OTF(votes_obj, NPY_INT32, NPY_ARRAY_IN_ARRAY); + if (votes_array == NULL) { + return NULL; + } + + // Prepare the starting array if provided, else allocate default starting + int* starting = NULL; + if (starting_obj) { + // Convert starting to C array + PyArrayObject* starting_array = (PyArrayObject*)PyArray_FROM_OTF(starting_obj, NPY_INT32, NPY_ARRAY_IN_ARRAY); + if (starting_array == NULL) { + Py_DECREF(votes_array); + return NULL; + } + starting = (int*)PyArray_DATA(starting_array); + Py_DECREF(starting_array); + } + + // Allocate and prepare the votes array + int** votes = (int**)malloc(votes_num * sizeof(int*)); + for (int i = 0; i < votes_num; i++) { + votes[i] = (int*)malloc(cand_num * sizeof(int)); + for (int j = 0; j < cand_num; j++) { + votes[i][j] = *(int*)PyArray_GETPTR2(votes_array, i, j); + } + } + + int result = local_search_kKemeny_single_k(votes, k, l, votes_num, cand_num, starting); + + for (int i = 0; i < votes_num; i++) { + free(votes[i]); + } + free(votes); + Py_DECREF(votes_array); + + return Py_BuildValue("i", result); +} + +static PyMethodDef methods[] = { + { "local_search_kkemeny_single_k", (PyCFunction)py_local_search_kkemeny_single_k, METH_VARARGS, "Local Search Kemeny Single K.\n" }, + { NULL, NULL, 0, NULL } +}; + +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, "_local_search_kkemeny_single_k", "Kemeny module.\n", -1, methods, + NULL, NULL, NULL, NULL +}; + +PyMODINIT_FUNC +PyInit__local_search_kkemeny_single_k(void) { + import_array(); + return PyModule_Create(&moduledef); +} diff --git a/fastmap/kkemeny/simulated_annealing.c b/fastmap/kkemeny/simulated_annealing.c new file mode 100644 index 0000000..af56c9a --- /dev/null +++ b/fastmap/kkemeny/simulated_annealing.c @@ -0,0 +1,292 @@ +#include +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include +#include +#include +#include + +/* + * Function: kemeny_distance + * ------------------------- + * Computes the Kemeny distance for a given ranking and pairwise counts. + * + * Parameters: + * - `ranking`: Pointer to the ranking array. + * - `n`: Number of items in the ranking. + * - `pairwise_counts`: 2D array representing pairwise disagreements between items. + * + * Returns: + * - int: The Kemeny distance for the given ranking. + * + * Complexity: + * - Time: O(n^2), where `n` is the size of the ranking. + * - Space: O(1), no additional memory allocation. + * + * Notes: + * - This function assumes valid input arrays. + */ +static inline int kemeny_distance(int* ranking, int n, int** pairwise_counts) { + int distance = 0; + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + if (ranking[i] < ranking[j]) + distance += pairwise_counts[j][i]; + else + distance += pairwise_counts[i][j]; + } + } + return distance; +} + +/* + * Function: swap + * -------------- + * Swaps two elements in an array. + * + * Parameters: + * - `a`: Pointer to the first element. + * - `b`: Pointer to the second element. + * + * Complexity: + * - Time: O(1), constant-time operation. + * - Space: O(1), no additional memory allocation. + * + * Notes: + * - Swaps the values at the provided pointers. + */ +static inline void swap(int* a, int* b) { + int temp = *a; + *a = *b; + *b = temp; +} + +/* + * Function: generate_neighbor + * --------------------------- + * Generates a neighboring ranking by swapping two random elements. + * + * Parameters: + * - `ranking`: Pointer to the ranking array. + * - `n`: Number of items in the ranking. + * + * Complexity: + * - Time: O(1), constant-time operation. + * - Space: O(1), no additional memory allocation. + * + * Notes: + * - Ensures the generated neighbor differs from the original ranking. + */ +static inline void generate_neighbor(int* ranking, int n) { + int i = rand() % n; + int j = rand() % n; + while (i == j) { + j = rand() % n; + } + swap(&ranking[i], &ranking[j]); +} + +/* + * Function: precompute_pairwise_disagreements + * ------------------------------------------- + * Precomputes pairwise disagreements based on a set of rankings. + * + * Parameters: + * - `rankings`: 2D array of rankings. + * - `num_rankings`: Number of rankings in the dataset. + * - `n`: Number of items in each ranking. + * - `pairwise_counts`: 2D array to store pairwise disagreement counts. + * + * Complexity: + * - Time: O(num_rankings * n^2), where `n` is the number of items. + * - Space: O(n^2), memory allocated for pairwise counts. + * + * Notes: + * - Populates the `pairwise_counts` array with computed values. + */ +static inline void precompute_pairwise_disagreements(int** rankings, int num_rankings, int n, int** pairwise_counts) { + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + pairwise_counts[i][j] = 0; + } + } + + for (int r = 0; r < num_rankings; r++) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + int index_i = -1, index_j = -1; + for (int k = 0; k < n; k++) { + if (rankings[r][k] == i) { + index_i = k; + } else if (rankings[r][k] == j) { + index_j = k; + } + if (index_i != -1 && index_j != -1) { + break; + } + } + + if (index_i < index_j) { + pairwise_counts[i][j] += 1; + } else { + pairwise_counts[j][i] += 1; + } + } + } + } +} + +/* + * Function: simulated_annealing + * ----------------------------- + * Performs simulated annealing to find the best ranking that minimizes the Kemeny distance. + * + * Parameters: + * - `rankings`: 2D array of rankings. + * - `num_rankings`: Number of rankings in the dataset. + * - `n`: Number of items in each ranking. + * - `initial_temp`: Initial temperature for simulated annealing. + * - `cooling_rate`: Rate at which the temperature decreases. + * - `max_iterations`: Maximum number of iterations. + * - `best_ranking`: Pointer to store the best ranking found. + * + * Returns: + * - int: The Kemeny distance for the best ranking found. + * + * Complexity: + * - Time: O(max_iterations * n^2), due to repeated neighbor evaluations. + * - Space: O(n^2), for pairwise counts storage. + * + * Notes: + * - Uses a probabilistic approach to escape local minima. + */ +static inline int simulated_annealing(int** rankings, int num_rankings, int n, double initial_temp, double cooling_rate, int max_iterations, int* best_ranking) { + int** pairwise_counts = (int**)malloc(n * sizeof(int*)); + for (int i = 0; i < n; i++) { + pairwise_counts[i] = (int*)malloc(n * sizeof(int)); + } + precompute_pairwise_disagreements(rankings, num_rankings, n, pairwise_counts); + + int* current_ranking = (int*)malloc(n * sizeof(int)); + for (int i = 0; i < n; i++) { + current_ranking[i] = i; + best_ranking[i] = i; + } + + int best_distance = kemeny_distance(current_ranking, n, pairwise_counts); + int current_distance = best_distance; + + double temperature = initial_temp; + + for (int iteration = 0; iteration < max_iterations; iteration++) { + int* neighbor_ranking = (int*)malloc(n * sizeof(int)); + for (int i = 0; i < n; i++) { + neighbor_ranking[i] = current_ranking[i]; + } + generate_neighbor(neighbor_ranking, n); + int neighbor_distance = kemeny_distance(neighbor_ranking, n, pairwise_counts); + + if (neighbor_distance < current_distance || (exp((current_distance - neighbor_distance) / temperature) > ((double)rand() / RAND_MAX))) { + for (int i = 0; i < n; i++) { + current_ranking[i] = neighbor_ranking[i]; + } + current_distance = neighbor_distance; + } + + if (current_distance < best_distance) { + for (int i = 0; i < n; i++) { + best_ranking[i] = current_ranking[i]; + } + best_distance = current_distance; + } + + temperature *= cooling_rate; + free(neighbor_ranking); + } + + for (int i = 0; i < n; i++) { + free(pairwise_counts[i]); + } + free(pairwise_counts); + free(current_ranking); + + return best_distance; +} + + +/* + * Function: py_simulated_annealing + * --------------------------------- + * Python wrapper for the simulated annealing function. + * + * Parameters: + * - Python arguments: + * - `rankings` (list of lists): 2D list of rankings. + * - `n` (int): Number of items in each ranking. + * - `initial_temp` (float): Initial temperature. + * - `cooling_rate` (float): Cooling rate for temperature. + * - `max_iterations` (int): Maximum number of iterations. + * + * Returns: + * - list: Best ranking found. + * + * Notes: + * - Validates Python arguments and performs conversions. + */ +static PyObject *py_simulated_annealing(PyObject* self, PyObject* args) { + PyObject* rankings_list; + int n, max_iterations; + double initial_temp, cooling_rate; + + if (!PyArg_ParseTuple(args, "O!idii", &PyList_Type, &rankings_list, &n, &initial_temp, &cooling_rate, &max_iterations)) { + return NULL; + } + + int num_rankings = PyList_Size(rankings_list); + int** rankings = (int**)malloc(num_rankings * sizeof(int*)); + for (int i = 0; i < num_rankings; i++) { + PyObject* ranking_row = PyList_GetItem(rankings_list, i); + rankings[i] = (int*)malloc(n * sizeof(int)); + for (int j = 0; j < n; j++) { + rankings[i][j] = PyLong_AsLong(PyList_GetItem(ranking_row, j)); + } + } + + int* best_ranking = (int*)malloc(n * sizeof(int)); + int best_distance = simulated_annealing(rankings, num_rankings, n, initial_temp, cooling_rate, max_iterations, best_ranking); + + PyObject* best_ranking_list = PyList_New(n); + for (int i = 0; i < n; i++) { + PyList_SetItem(best_ranking_list, i, PyLong_FromLong(best_ranking[i])); + } + + for (int i = 0; i < num_rankings; i++) { + free(rankings[i]); + } + free(rankings); + free(best_ranking); + + return Py_BuildValue("i", best_distance); +} + +/* + * Module definition: simulated_annealing + * --------------------------------------- + * Provides Python bindings for the simulated annealing algorithm. + */ +static PyMethodDef SimulatedAnnealingMethods[] = { + {"simulated_annealing", (PyCFunction)py_simulated_annealing, METH_VARARGS, "Simulated Annealing for k-Kemeny distance."}, + {NULL, NULL, 0, NULL} +}; + +static struct PyModuleDef simulatedannealingmodule = { + PyModuleDef_HEAD_INIT, + "_simulated_annealing", + "Simulated Annealing Module for Kemeny distance.", + -1, + SimulatedAnnealingMethods +}; + +PyMODINIT_FUNC PyInit__simulated_annealing(void) { + import_array(); + return PyModule_Create(&simulatedannealingmodule); +} \ No newline at end of file diff --git a/fastmap/kkemeny/swap_distance_between_potes.c b/fastmap/kkemeny/swap_distance_between_potes.c new file mode 100644 index 0000000..57bba3c --- /dev/null +++ b/fastmap/kkemeny/swap_distance_between_potes.c @@ -0,0 +1,366 @@ +#include +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include +#include +#include +#include + +/* + * Function: merge + * ---------------- + * Merges two sorted subarrays of `pote_1` and `pote_2` while counting the inversions between them. + * Inversions occur when the order between elements in `pote_1` and `pote_2` is inconsistent. + * + * Parameters: + * - `pote_1`: Pointer to the first positional vote. + * - `pote_2`: Pointer to the second positional vote. + * - `left`: Starting index of the subarray. + * - `mid`: Midpoint index dividing the subarrays. + * - `right`: Ending index of the subarray. + * + * Returns: + * - int: The number of inversions found during the merge. + * + * Complexity: + * - Time: O(n), where `n` is the size of the subarray. + * - Space: O(n), due to the use of temporary arrays. + * + * Notes: + * - This function is internally used by `merge_and_count` for recursive inversion counting. + */ +static inline int merge(int *pote_1, int *pote_2, int left, int mid, int right) { + int n1 = mid - left + 1; + int n2 = right - mid; + + // Temporary arrays + int *temp_1 = (int *)malloc(n1 * sizeof(int)); + int *temp_2 = (int *)malloc(n1 * sizeof(int)); + int *temp_3 = (int *)malloc(n2 * sizeof(int)); + int *temp_4 = (int *)malloc(n2 * sizeof(int)); + + // Copy data to temporary arrays + for (int i = 0; i < n1; i++) { + temp_1[i] = pote_1[left + i]; + temp_2[i] = pote_2[left + i]; + } + for (int j = 0; j < n2; j++) { + temp_3[j] = pote_1[mid + 1 + j]; + temp_4[j] = pote_2[mid + 1 + j]; + } + + int i = 0, j = 0, k = left, inv_count = 0; + + // Merge temporary arrays back into pote_1 and pote_2 + while (i < n1 && j < n2) { + if (temp_1[i] <= temp_3[j]) { + pote_1[k] = temp_1[i]; + pote_2[k] = temp_2[i]; + i++; + } else { + pote_1[k] = temp_3[j]; + pote_2[k] = temp_4[j]; + j++; + inv_count += (n1 - i); // Count inversions + } + k++; + } + + // Copy the remaining elements + while (i < n1) { + pote_1[k] = temp_1[i]; + pote_2[k] = temp_2[i]; + i++; + k++; + } + while (j < n2) { + pote_1[k] = temp_3[j]; + pote_2[k] = temp_4[j]; + j++; + k++; + } + + // Free temporary arrays + free(temp_1); + free(temp_2); + free(temp_3); + free(temp_4); + + return inv_count; +} + +/* + * Function: merge_and_count + * -------------------------- + * Recursively divides the array into subarrays, counts inversions within subarrays, and merges them + * using the `merge` function to count cross-inversions. + * + * Parameters: + * - `pote_1`: Pointer to the first positional vote. + * - `pote_2`: Pointer to the second positional vote. + * - `left`: Starting index of the subarray. + * - `right`: Ending index of the subarray. + * + * Returns: + * - int: The total number of inversions in the given range `[left, right]`. + * + * Complexity: + * - Time: O(n log n), where `n` is the size of the array. + * - Space: O(n), due to temporary storage during merging. + * + * Notes: + * - This function is part of the merge-sort-based inversion counting method. + */ +static inline int merge_and_count(int *pote_1, int *pote_2, int left, int right) { + if (left >= right) { + return 0; + } + + int mid = (left + right) / 2; + int inv_count = 0; + + inv_count += merge_and_count(pote_1, pote_2, left, mid); + inv_count += merge_and_count(pote_1, pote_2, mid + 1, right); + inv_count += merge(pote_1, pote_2, left, mid, right); + + return inv_count; +} + +/* + * Function: swap_distance_between_potes_ms + * ---------------------------------------- + * Heurestic approximating the swap distance (number of inversions) between two lists of positional votes using + * a merge-sort-based approach. + * + * Parameters: + * - `pote_1`: Pointer to the first positional vote. + * - `pote_2`: Pointer to the second positional vote. + * - `n`: The length of the arrays. + * + * Returns: + * - int: The number of inversions between `pote_1` and `pote_2`. + * + * Complexity: + * - Time: O(n log n), where `n` is the size of the input arrays. + * - Space: O(n), due to temporary storage for copying the arrays. + * + * Notes: + * - This method approximates swap distance between two potes by calculating the number of inversions between them. It + * works really well for pote lengths of >100 where approx ratio starts converging towards 1, but becomes inefficient + * and inaccurate for smaller lengths, where Positional Distance Heuristic or exact method will perform better. + */ +static inline int swap_distance_between_potes_ms(int *pote_1, int *pote_2, int n) { + int *pote_1_copy = (int *)malloc(n * sizeof(int)); + int *pote_2_copy = (int *)malloc(n * sizeof(int)); + + // Copy arrays + memcpy(pote_1_copy, pote_1, n * sizeof(int)); + memcpy(pote_2_copy, pote_2, n * sizeof(int)); + + // Call merge_and_count + int distance = merge_and_count(pote_1_copy, pote_2_copy, 0, n - 1); + + // Free memory + free(pote_1_copy); + free(pote_2_copy); + + return distance; +} + +/* + * Function: swap_distance_between_potes_pd + * ---------------------------------------- + * Computes a heuristic estimate of the swap distance between two lists of positional votes by + * summing the absolute positional differences. + * + * Parameters: + * - `potes1`: Pointer to the first positional vote. + * - `potes2`: Pointer to the second positional vote. + * - `n`: The length of the arrays. + * + * Returns: + * - int: The heuristic estimate of the swap distance. + * + * Complexity: + * - Time: O(n), where `n` is the size of the input arrays. + * - Space: O(1), no additional memory allocation. + * + * Notes: + * - This heuristic is really fast and work pretty well for small length values, but growing size the approx ratio + * doesn't converge and stays somewhere in the range of 1.25-1.50. + */ +int swap_distance_between_potes_pd(int *potes1, int *potes2, int n) { + int heuristic_value = 0; + + for (int i = 0; i < n; i++) { + heuristic_value += abs(potes1[i] - potes2[i]); + } + + return heuristic_value; +} + +/* + * Function: swap_distance_between_potes_bf + * ---------------------------------------- + * Computes the swap distance between two lists of positional votes using a brute-force approach. + * + * Parameters: + * - `pote1`: Pointer to the first array vote. + * - `pote2`: Pointer to the second positional vote. + * - `n`: The length of the arrays. + * + * Returns: + * - int: The swap distance. + * + * Complexity: + * - Time: Triangular O(n^2), where `n` is the size of the input arrays. + * - Space: O(1), no additional memory allocation. + * + * Notes: + * - This method gives the exact swap distance between potes. + */ +static inline int swap_distance_between_potes_bf(int *pote1, int *pote2, int n) { + int swap_distance = 0; + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + if ((pote1[i] > pote1[j] && pote2[i] < pote2[j]) || (pote1[i] < pote1[j] && pote2[i] > pote2[j])) { + swap_distance++; + } + } + } + + return swap_distance; +} + +/* + * Function: py_swap_distance_between_potes + * ---------------------------------------- + * Python wrapper for calculating swap distance or heuristics between two positional votes (potes). + * Supports three methods: brute force, positional difference heuristic, and merge-sort inversion counting. + * + * Parameters: + * - `args`: Python tuple containing: + * 1. `potes1` (list[int]): First positional vote. + * 2. `potes2` (list[int]): Second positional vote. + * 3. `method` (int): The computation method to use: + * - 0: Brute-force swap distance calculation. + * - 1: Positional difference heuristic. + * - 2: Merge-sort inversion counting heuristic. + * + * Returns: + * - Python integer: The computed swap distance or heuristic value. + * + * Raises: + * - `ValueError`: If input arguments are invalid (e.g., non-lists or mismatched lengths). + * - `MemoryError`: If memory allocation fails. + * + * Complexity: + * - Depends on the selected method: + * - Brute Force: O(n^2) + * - Positional Difference: O(n) + * - Merge Sort: O(n log n) + * + * Example Usage in Python: + * ```python + * import swap_distance + * + * potes1 = [3, 1, 2, 4] + * potes2 = [1, 2, 3, 4] + * + * ### Brute force + * print(_swap_distance_between_potes.py_swap_distance_between_potes(potes1, potes2, 0)) + * + * ### Positional difference heuristic + * print(_swap_distance_between_potes.py_swap_distance_between_potes(potes1, potes2, 1)) + * + * ### Merge-sort inversion counting heuristic + * print(_swap_distance_between_potes.py_swap_distance_between_potes(potes1, potes2, 2)) + * ``` + */ +static PyObject *py_swap_distance_between_potes(PyObject *self, PyObject *args) { + PyObject *result = NULL; + PyObject *obj_potes1 = NULL; + PyObject *obj_potes2 = NULL; + int method = 0; + size_t length = 0; + + // Parse Python arguments + if (!PyArg_ParseTuple(args, "OOi", &obj_potes1, &obj_potes2, &method)) { + PyErr_SetString(PyExc_ValueError, "Invalid arguments. Expected (list, list, method)."); + return NULL; + } + + // Convert Python lists to C arrays + if (!PyList_Check(obj_potes1) || !PyList_Check(obj_potes2)) { + PyErr_SetString(PyExc_TypeError, "Arguments must be lists."); + return NULL; + } + + length = PyList_Size(obj_potes1); + if (length != (size_t)PyList_Size(obj_potes2)) { + PyErr_SetString(PyExc_ValueError, "Lists must have the same length."); + return NULL; + } + + int *potes1 = (int *)malloc(length * sizeof(int)); + int *potes2 = (int *)malloc(length * sizeof(int)); + if (!potes1 || !potes2) { + PyErr_SetString(PyExc_MemoryError, "Failed to allocate memory."); + free(potes1); + free(potes2); + return NULL; + } + + // Populate C arrays from Python lists + for (size_t i = 0; i < length; i++) { + potes1[i] = (int)PyLong_AsLong(PyList_GetItem(obj_potes1, i)); + potes2[i] = (int)PyLong_AsLong(PyList_GetItem(obj_potes2, i)); + } + + // Call the appropriate method + int result_value = -1; + switch (method) { + case 0: + result_value = swap_distance_between_potes_bf(potes1, potes2, length); + break; + case 1: + result_value = swap_distance_between_potes_pd(potes1, potes2, length); + break; + case 2: + result_value = swap_distance_between_potes_ms(potes1, potes2, length); + break; + default: + PyErr_SetString(PyExc_ValueError, "Invalid method. Supported methods: 0, 1, 2."); + free(potes1); + free(potes2); + return NULL; + } + + // Free allocated memory + free(potes1); + free(potes2); + + // Return result as a Python integer + result = PyLong_FromLong(result_value); + return result; +} + +// Method definition table +static PyMethodDef methods[] = { + {"swap_distance_between_potes", (PyCFunction)py_swap_distance_between_potes, METH_VARARGS, + "Calculate exact or approximated swap distance between two potes."}, + {NULL, NULL, 0, NULL} // Sentinel +}; + +// Module definition +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_swap_distance_between_potes", // Module name + "Calculate exact or approximated swap distance between potes.", // Module docstring + -1, // Size of per-interpreter state of the module, or -1 if module keeps state in global variables + methods}; + +// Module initialization function +PyMODINIT_FUNC PyInit__swap_distance_between_potes(void) { + import_array(); // Initialize the NumPy C API + return PyModule_Create(&moduledef); +} diff --git a/setup.py b/setup.py index d39e661..adbac76 100644 --- a/setup.py +++ b/setup.py @@ -26,6 +26,7 @@ python_requires=">=3.10", extras_require={ "testing": [ + "pytest", "mapel", "tqdm", ] @@ -55,6 +56,22 @@ extra_compile_args=CFLAGS, include_dirs=["fastmap/lap/", "fastmap/qap/", "fastmap/utils/"], ), + Extension( + name="fastmap._swap_distance_between_potes", + sources=["fastmap/kkemeny/swap_distance_between_potes.c"], + extra_compile_args=CFLAGS, + ), + Extension( + name="fastmap._local_search_kkemeny_single_k", + sources=["fastmap/kkemeny/local_search_kkemeny_single_k.c"], + extra_compile_args=CFLAGS, + include_dirs=["fastmap/kkemeny/"], + ), + Extension( + name="fastmap._simulated_annealing", + sources=["fastmap/kkemeny/simulated_annealing.c"], + extra_compile_args=CFLAGS, + ), ], include_dirs=[np.get_include()], )