diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a2a2ff4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,7 @@ +Copyright 2024 Erik Terpstra + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/input.yaml b/input.yaml new file mode 100644 index 0000000..278bd23 --- /dev/null +++ b/input.yaml @@ -0,0 +1,20 @@ +statements: +- Climate change requires immediate action +- Nuclear power is necessary for clean energy +- Carbon tax should be implemented globally +- Individual actions matter for sustainability +- Companies should be held liable for emissions + +votes: + # Group 1: Environmental purists (anti-nuclear) + user1: [agree, disagree, agree, agree, agree] + user2: [agree, disagree, agree, agree, agree] + user3: [agree, disagree, agree, agree, agree] + # Group 2: Tech-focused environmentalists (pro-nuclear) + user4: [agree, agree, agree, disagree, agree] + user5: [agree, agree, agree, disagree, agree] + user6: [agree, agree, agree, disagree, agree] + # Group 3: Business-oriented (anti-regulation) + user7: [agree, agree, disagree, disagree, disagree] + user8: [agree, agree, disagree, disagree, disagree] + user9: [agree, agree, disagree, disagree, disagree] diff --git a/polislite.py b/polislite.py index 9b04e3a..da4486a 100644 --- a/polislite.py +++ b/polislite.py @@ -1,21 +1,31 @@ +import yaml import numpy as np from sklearn.decomposition import PCA from scipy.cluster import hierarchy from sklearn.metrics import silhouette_score from collections import defaultdict +from jinja2 import Template +from pathlib import Path class PolisClusterer: def __init__(self, min_clusters=2, max_clusters=6): self.pca = PCA(n_components=2) self.min_clusters = min_clusters self.max_clusters = max_clusters + template_path = Path(__file__).parent / 'report_template.j2' + self.template = Template(template_path.read_text()) + @staticmethod + def load_from_yaml(filepath): + with open(filepath) as f: + data = yaml.safe_load(f) + vote_map = {'agree': 1, 'disagree': -1} + votes = [[vote_map.get(v, 0) for v in user_votes] + for user_votes in data['votes'].values()] + return data['statements'], votes + def analyze_opinions(self, votes, statements): - vote_matrix = np.array([ - [1 if v == 'agree' else -1 if v == 'disagree' else 0 - for v in voter_votes] - for voter_votes in votes - ]) + vote_matrix = np.array(votes) self._handle_sparse_votes(vote_matrix) points_2d = self._compute_pca(vote_matrix) @@ -40,7 +50,6 @@ def _compute_pattern_difference(self, clusters, points): cluster_means = {k: np.mean(v, axis=0) for k, v in cluster_means.items()} - # Compute average distance between cluster centers diffs = [] for i in cluster_means: for j in cluster_means: @@ -75,55 +84,34 @@ def _generate_report(self, vote_matrix, clusters, statements): statement_scores = np.mean(vote_matrix, axis=0) agreement_levels = np.std(vote_matrix, axis=0) - print('Consensus Statements:') - for stmt, score, agree in zip(statements, statement_scores, agreement_levels): - if agree < 0.5: - consensus = 'strong agreement' if score > 0.5 else 'strong disagreement' - print(f'- {stmt} ({consensus})') - - print('\nDivisive Statements:') - for stmt, agree in zip(statements, agreement_levels): - if agree >= 0.5: - print(f'- {stmt}') - cluster_opinions = defaultdict(list) for i, cluster_id in enumerate(clusters): cluster_opinions[cluster_id].append(vote_matrix[i]) - print('\nGroup Positions:') + # Pre-process the group data to include only significant opinions + group_data = {} for grp_id in sorted(cluster_opinions.keys()): opinions = np.mean(cluster_opinions[grp_id], axis=0) - print(f'\nGroup {grp_id} characteristics:') - for stmt, opinion in zip(statements, opinions): - if abs(opinion) > 0.5: - stance = 'strongly agrees with' if opinion > 0 else 'strongly disagrees with' - print(f'- {stance}: {stmt}') + significant_opinions = [ + (stmt, opinion) for stmt, opinion in zip(statements, opinions) + if abs(opinion) > 0.5 + ] + group_data[grp_id] = significant_opinions -# Example usage -statements = [ - 'Climate change requires immediate action', - 'Nuclear power is necessary for clean energy', - 'Carbon tax should be implemented globally', - 'Individual actions matter for sustainability', - 'Companies should be held liable for emissions' -] + print(self.template.render( + consensus_data=zip(statements, statement_scores, agreement_levels), + divisive_data=zip(statements, agreement_levels), + group_data=group_data + )) -votes = [ - # Group 1: Environmental purists (anti-nuclear) - ['agree', 'disagree', 'agree', 'agree', 'agree'], - ['agree', 'disagree', 'agree', 'agree', 'agree'], - ['agree', 'disagree', 'agree', 'agree', 'agree'], - - # Group 2: Tech-focused environmentalists (pro-nuclear) - ['agree', 'agree', 'agree', 'disagree', 'agree'], - ['agree', 'agree', 'agree', 'disagree', 'agree'], - ['agree', 'agree', 'agree', 'disagree', 'agree'], - - # Group 3: Business-oriented (anti-regulation) - ['agree', 'agree', 'disagree', 'disagree', 'disagree'], - ['agree', 'agree', 'disagree', 'disagree', 'disagree'], - ['agree', 'agree', 'disagree', 'disagree', 'disagree'] -] +def main(yaml_file): + clusterer = PolisClusterer() + statements, votes = PolisClusterer.load_from_yaml(yaml_file) + points, clusters = clusterer.analyze_opinions(votes, statements) -clusterer = PolisClusterer() -points, clusters = clusterer.analyze_opinions(votes, statements) +if __name__ == '__main__': + import sys + if len(sys.argv) != 2: + print('Usage: python polislite.py input.yaml') + sys.exit(1) + main(sys.argv[1]) diff --git a/report_template.j2 b/report_template.j2 new file mode 100644 index 0000000..04f7bd7 --- /dev/null +++ b/report_template.j2 @@ -0,0 +1,21 @@ +Consensus Statements: +{%- for stmt, score, agree in consensus_data %} +{%- if agree < 0.5 %} +- {{ stmt }} ({{ 'strong agreement' if score > 0.5 else 'strong disagreement' }}) +{%- endif %} +{%- endfor %} + +Divisive Statements: +{%- for stmt, agree in divisive_data %} +{%- if agree >= 0.5 %} +- {{ stmt }} +{%- endif %} +{%- endfor %} + +Group Positions: +{%- for grp_id, positions in group_data.items() %} +Group {{ grp_id }} characteristics: +{%- for stmt, opinion in positions %} +- {{ 'strongly agrees with' if opinion > 0 else 'strongly disagrees with' }}: {{ stmt }} +{%- endfor %} +{%- endfor %}