Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ Pretty and opinionated topic model visualization in Python.

https://github.com/x-tabdeveloping/topicwizard/assets/13087737/9736f33c-6865-4ed4-bc17-d8e6369bda80

## New in version 1.1.3

You can now specify your own font that should be used for wordclouds.
This makes topicwizard usable with Chinese and other non-indo-european scripts.

```python
topicwizard.visualize(topic_data=topic_data, wordcloud_font_path="NotoSansTC-Bold.ttf")
```

## New in version 1.1.0 🌟

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "topic-wizard"
version = "1.1.2"
version = "1.1.3"
description = "Pretty and opinionated topic model visualization in Python."
authors = ["Márton Kardos <power.up1163@gmail.com>"]
license = "MIT"
Expand Down
20 changes: 16 additions & 4 deletions topicwizard/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def get_dash_app(
exclude_pages: Optional[Set[PageName]] = None,
document_names: Optional[List[str]] = None,
group_labels: Optional[List[str]] = None,
wordcloud_font_path: Optional[str] = None,
) -> Dash:
"""Returns topicwizard Dash application.

Expand All @@ -50,6 +51,9 @@ def get_dash_app(
You can pass it along if you have genre labels for example.
In this case an additional page will get created with information
about how these groups relate to topics and words in the corpus.
wordcloud_font_path: str, default None
Font to use for generating wordclouds.
Open Sans is used by default.

Returns
-------
Expand All @@ -64,6 +68,7 @@ def get_dash_app(
or [f"Document {i}" for i, _ in enumerate(topic_data["corpus"])],
group_labels=group_labels,
exclude_pages=exclude_pages,
wordcloud_font_path=wordcloud_font_path,
)
app = Dash(
__name__,
Expand All @@ -82,7 +87,9 @@ def get_dash_app(
return app


def load_app(filename: str, exclude_pages: Optional[Iterable[PageName]] = None) -> Dash:
def load_app(
filename: str, exclude_pages: Optional[Iterable[PageName]] = None, **kwargs
) -> Dash:
"""Loads and prepares saved app from disk.

Parameters
Expand All @@ -100,7 +107,7 @@ def load_app(filename: str, exclude_pages: Optional[Iterable[PageName]] = None)
exclude_pages = set()
else:
exclude_pages = set(exclude_pages)
return get_dash_app(**data, exclude_pages=exclude_pages)
return get_dash_app(**data, exclude_pages=exclude_pages, **kwargs)


def open_url(url: str) -> None:
Expand Down Expand Up @@ -156,6 +163,7 @@ def load(
filename: str,
exclude_pages: Optional[Iterable[PageName]] = None,
port: int = 8050,
**kwargs,
) -> Optional[threading.Thread]:
"""Visualizes topic model data loaded from disk.

Expand All @@ -179,7 +187,7 @@ def load(
"""
print("Preparing data")
exclude_pages = set() if exclude_pages is None else set(exclude_pages)
app = load_app(filename, exclude_pages=exclude_pages)
app = load_app(filename, exclude_pages=exclude_pages, **kwargs)
return run_app(app, port=port)


Expand Down Expand Up @@ -211,6 +219,7 @@ def visualize(
exclude_pages: Optional[Iterable[PageName]] = None,
group_labels: Optional[List[str]] = None,
port: int = 8050,
wordcloud_font_path: Optional[str] = None,
**kwargs,
) -> Optional[threading.Thread]:
"""Visualizes your topic model with topicwizard.
Expand Down Expand Up @@ -238,7 +247,9 @@ def visualize(
You can pass it along if you have genre labels for example.
In this case an additional page will get created with information
about how these groups relate to topics and words in the corpus.

wordcloud_font_path: str, default None
Font to use for generating wordclouds.
Open Sans is used by default.

Returns
-------
Expand Down Expand Up @@ -278,5 +289,6 @@ def visualize(
document_names=document_names,
exclude_pages=exclude_pages,
group_labels=group_labels,
wordcloud_font_path=wordcloud_font_path,
)
return run_app(app, port=port)
4 changes: 4 additions & 0 deletions topicwizard/blueprints/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def create_blueprint(
word_positions: Optional[np.ndarray] = None,
topic_positions: Optional[np.ndarray] = None,
document_positions: Optional[np.ndarray] = None,
wordcloud_font_path: Optional[str] = None,
) -> DashBlueprint:
# --------[ Collecting blueprints ]--------
topic_blueprint = (
Expand All @@ -43,6 +44,7 @@ def create_blueprint(
corpus=corpus,
topic_names=topic_names,
topic_positions=topic_positions,
wordcloud_font_path=wordcloud_font_path,
)
if "topics" not in exclude_pages
else create_blank_page("topics")
Expand All @@ -59,6 +61,7 @@ def create_blueprint(
corpus=corpus,
topic_names=topic_names,
document_positions=document_positions,
wordcloud_font_path=wordcloud_font_path,
)
if "documents" not in exclude_pages
else create_blank_page("documents")
Expand Down Expand Up @@ -87,6 +90,7 @@ def create_blueprint(
corpus=corpus,
topic_names=topic_names,
group_labels=group_labels,
wordcloud_font_path=wordcloud_font_path,
)
if group_labels is not None
else create_blank_page("groups")
Expand Down
10 changes: 7 additions & 3 deletions topicwizard/blueprints/groups.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, List
from typing import Any, List, Optional

import dash_mantine_components as dmc
import numpy as np
Expand All @@ -10,7 +10,8 @@
import topicwizard.prepare.groups as prepare
from topicwizard.components.groups.group_barplot import create_group_barplot
from topicwizard.components.groups.group_map import create_group_map
from topicwizard.components.groups.group_wordcloud import create_group_wordcloud
from topicwizard.components.groups.group_wordcloud import \
create_group_wordcloud
from topicwizard.help.utils import make_helper


Expand All @@ -20,6 +21,7 @@ def create_blueprint(
document_topic_matrix: np.ndarray,
topic_term_matrix: np.ndarray,
group_labels: List[str],
wordcloud_font_path: Optional[str] = None,
**kwargs,
) -> DashBlueprint:
# --------[ Preparing data ]--------
Expand Down Expand Up @@ -47,7 +49,9 @@ def create_blueprint(
group_map = create_group_map(
group_positions, group_importances, group_names, dominant_topics, topic_colors
)
group_wordcloud = create_group_wordcloud(group_term_importances, vocab)
group_wordcloud = create_group_wordcloud(
group_term_importances, vocab, wordcloud_font_path=wordcloud_font_path
)
group_barchart = create_group_barplot(group_topic_importances, topic_colors)
blueprints = [
group_map,
Expand Down
5 changes: 4 additions & 1 deletion topicwizard/blueprints/topics.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def create_blueprint(
topic_term_matrix: np.ndarray,
topic_names: List[str],
topic_positions: Optional[np.ndarray] = None,
wordcloud_font_path: Optional[str] = None,
**kwargs,
) -> DashBlueprint:
# --------[ Preparing data ]--------
Expand All @@ -61,7 +62,9 @@ def create_blueprint(
topic_positions, topic_importances, topic_names
)
topic_barplot = create_topic_barplot(topic_term_matrix, vocab)
wordcloud = create_wordcloud(topic_term_matrix, vocab)
wordcloud = create_wordcloud(
topic_term_matrix, vocab, wordcloud_font_path=wordcloud_font_path
)
blueprints = [
intertopic_map,
topic_switcher,
Expand Down
4 changes: 2 additions & 2 deletions topicwizard/components/groups/group_wordcloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


def create_group_wordcloud(
group_term_importances: np.ndarray, vocab: np.ndarray
group_term_importances: np.ndarray, vocab: np.ndarray, wordcloud_font_path=None
) -> DashBlueprint:
group_wordcloud = DashBlueprint()

Expand All @@ -25,6 +25,6 @@ def create_group_wordcloud(
)
def update_plot(selected_group: int) -> go.Figure:
top_words = prepare.top_words(selected_group, 60, group_term_importances, vocab)
return plots.wordcloud(top_words)
return plots.wordcloud(top_words, custom_font_path=wordcloud_font_path)

return group_wordcloud
4 changes: 2 additions & 2 deletions topicwizard/components/topics/wordcloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import topicwizard.prepare.topics as prepare


def create_wordcloud(topic_term_matrix, vocab):
def create_wordcloud(topic_term_matrix, vocab, wordcloud_font_path=None):
wordcloud = DashBlueprint()
top_bar = prepare.calculate_top_words(
topic_id=0,
Expand All @@ -32,6 +32,6 @@ def update(current_topic: int) -> go.Figure:
components=topic_term_matrix,
vocab=vocab,
)
return plots.wordcloud(top_bar)
return plots.wordcloud(top_bar, custom_font_path=wordcloud_font_path)

return wordcloud
15 changes: 13 additions & 2 deletions topicwizard/figures/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,12 @@ def group_topic_barcharts(


def group_wordclouds(
topic_data: TopicData, group_labels: List[str], top_n: int = 30, n_columns: int = 4
topic_data: TopicData,
group_labels: List[str],
top_n: int = 30,
n_columns: int = 4,
custom_font_path: str = None,
color_scheme: str = "twilight",
) -> go.Figure:
"""Plots wordclouds for each group.

Expand All @@ -177,6 +182,10 @@ def group_wordclouds(
Number of words to display for each group.
n_columns: int, default 4
Number of columns the faceted plot should have.
custom_font_path: str, default None
Path to custom font to use to render the wordcloud.
color_scheme: str, default 'twilight'
Matplotlib color scheme to use for the plot.
"""
# Factorizing group labels
group_id_labels, group_names = pd.factorize(group_labels)
Expand All @@ -203,7 +212,9 @@ def group_wordclouds(
top_words = prepare.top_words(
group_id, top_n, group_term_importances, topic_data["vocab"]
)
subfig = plots.wordcloud(top_words)
subfig = plots.wordcloud(
top_words, color_scheme=color_scheme, custom_font_path=custom_font_path
)
row, column = (group_id // n_columns) + 1, (group_id % n_columns) + 1
fig.add_trace(subfig.data[0], row=row, col=column)
fig.update_layout(
Expand Down
7 changes: 6 additions & 1 deletion topicwizard/figures/topics.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def topic_wordclouds(
top_n: int = 30,
n_columns: int = 4,
color_scheme: str = "copper",
custom_font_path=None,
) -> go.Figure:
"""Plots most relevant words as word clouds for every topic.

Expand All @@ -124,6 +125,8 @@ def topic_wordclouds(
Number of columns in the subplot grid.
color_scheme: str, default 'copper'
Matplotlib color scheme to use for the wordcloud.
custom_font_path: str, default None
Path to custom font to use to render the wordcloud.
"""
n_topics = topic_data["topic_term_matrix"].shape[0]
(
Expand All @@ -150,7 +153,9 @@ def topic_wordclouds(
components=topic_term_importances,
vocab=topic_data["vocab"],
)
subfig = plots.wordcloud(top_words, color_scheme=color_scheme)
subfig = plots.wordcloud(
top_words, color_scheme=color_scheme, custom_font_path=custom_font_path
)
row, column = (topic_id // n_columns) + 1, (topic_id % n_columns) + 1
fig.add_trace(subfig.data[0], row=row, col=column)
fig.update_layout(
Expand Down
10 changes: 9 additions & 1 deletion topicwizard/plots/documents.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module containing plotting utilities for documents."""

from typing import Dict, Iterable, List, Optional

import numpy as np
Expand All @@ -9,6 +10,8 @@
from PIL import Image
from wordcloud import WordCloud

from topicwizard.plots.utils import get_default_font_path


def document_map(
x: np.ndarray,
Expand Down Expand Up @@ -209,11 +212,16 @@ def document_timeline(


def document_wordcloud(
doc_id: int, document_term_matrix: np.ndarray, vocab: np.ndarray
doc_id: int,
document_term_matrix: np.ndarray,
vocab: np.ndarray,
custom_font_path=None,
) -> go.Figure:
coo = spr.coo_array(document_term_matrix[[doc_id], :])
term_dict = {vocab[column]: data for column, data in zip(coo.col, coo.data)}
font_path = custom_font_path or get_default_font_path().absolute()
cloud = WordCloud(
font_path=font_path,
width=800,
height=800,
background_color="white",
Expand Down
10 changes: 8 additions & 2 deletions topicwizard/plots/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from PIL import Image
from wordcloud import WordCloud

from topicwizard.plots.utils import get_default_font_path


def group_map(
x: np.ndarray,
Expand Down Expand Up @@ -133,17 +135,21 @@ def group_topics_barchart(top_topics: pd.DataFrame, topic_colors: np.ndarray):
return fig


def wordcloud(top_words: pd.DataFrame) -> go.Figure:
def wordcloud(
top_words: pd.DataFrame, custom_font_path=None, color_scheme: str = "twilight"
) -> go.Figure:
"""Plots most relevant words for current topic as a worcloud."""
top_dict = {
word: importance
for word, importance in zip(top_words.word, top_words.importance)
}
font_path = custom_font_path or get_default_font_path().absolute()
cloud = WordCloud(
font_path=font_path,
width=800,
height=1060,
background_color="white",
colormap="twilight",
colormap=color_scheme,
scale=4,
).generate_from_frequencies(top_dict)
image = cloud.to_image()
Expand Down
8 changes: 7 additions & 1 deletion topicwizard/plots/topics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from sklearn.preprocessing import minmax_scale
from wordcloud import WordCloud

from topicwizard.plots.utils import get_default_font_path


def intertopic_map(
x: np.ndarray,
Expand Down Expand Up @@ -140,15 +142,19 @@ def topic_plot(top_words: pd.DataFrame):
return fig


def wordcloud(top_words: pd.DataFrame, color_scheme: str = "copper") -> go.Figure:
def wordcloud(
top_words: pd.DataFrame, color_scheme: str = "copper", custom_font_path=None
) -> go.Figure:
"""Plots most relevant words for current topic as a worcloud."""
top_dict = {
word: importance
for word, importance in zip(
top_words.word, 0.1 + minmax_scale(top_words.importance)
)
}
font_path = custom_font_path or get_default_font_path().absolute()
cloud = WordCloud(
font_path=font_path,
width=800,
height=1060,
background_color="white",
Expand Down
Loading
Loading