From b0f5769de25e60f9c3d3c26551e79ef25c2bc75e Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Sun, 9 Mar 2025 16:15:56 -0700
Subject: [PATCH 01/25] replace deprecated .run() with .invoke() refactor how
 chain prompt is handled

---
 keybert/llm/_langchain.py | 58 +++++++++++++++++++++++----------------
 1 file changed, 35 insertions(+), 23 deletions(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index f786109e..84527e04 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -1,27 +1,37 @@
-from tqdm import tqdm
 from typing import List
-from langchain.docstore.document import Document
+
+from tqdm import tqdm
+
 from keybert.llm._base import BaseLLM
 from keybert.llm._utils import process_candidate_keywords
 
+DEFAULT_PROMPT_TEMPLATE = """
+# Task
+You are provided with a document and a list of candidate keywords.
+Your task to is extract keywords from the document.
+Use the candidate keywords to guide your extraction.
 
-DEFAULT_PROMPT = "What is this document about? Please provide keywords separated by commas."
+# Document
+{DOCUMENT}
+
+# Candidate Keywords
+{CANDIDATES}
+
+
+Now extract the keywords from the document.
+Your output must be a list of comma-separated keywords.
+"""
 
 
 class LangChain(BaseLLM):
     """Using chains in langchain to generate keywords.
 
-    Currently, only chains from question answering is implemented. See:
-    https://langchain.readthedocs.io/en/latest/modules/chains/combine_docs_examples/question_answering.html
 
-    NOTE: The resulting keywords are expected to be separated by commas so
-    any changes to the prompt will have to make sure that the resulting
-    keywords are comma-separated.
+    NOTE: The resulting keywords are expected to a list of comma-sparated str so
+    any changes to the prompt will have to ensure the foramt.
 
     Arguments:
         chain: A langchain chain that has two input parameters, `input_documents` and `query`.
-        prompt: The prompt to be used in the model. If no prompt is given,
-                `self.default_prompt_` is used instead.
         verbose: Set this to True if you want to see a progress bar for the
                  keyword extraction.
 
@@ -32,14 +42,21 @@ class LangChain(BaseLLM):
     like openai:
 
     `pip install langchain`
-    `pip install openai`
+    `pip install langchain-openai`
 
     Then, you can create your chain as follows:
 
     ```python
-    from langchain.chains.question_answering import load_qa_chain
-    from langchain.llms import OpenAI
-    chain = load_qa_chain(OpenAI(temperature=0, openai_api_key=my_openai_api_key), chain_type="stuff")
+    from langchain.prompts import PromptTemplate
+    from langchain_core.output_parsers import StrOutputParser
+    from langchain_openai import ChatOpenAI
+    _llm = ChatOpenAI(
+        model="gpt-4o",
+        api_key="my-openai-api-key",
+        temperature=0,
+    )
+    _prompt = PromptTemplate.from_template(LangChain.DEFAULT_PROMPT_TEMPLATE) # the default prompt from KeyBERT
+    chain = _prompt | _llm
     ```
 
     Finally, you can pass the chain to KeyBERT as follows:
@@ -54,8 +71,8 @@ class LangChain(BaseLLM):
     # Load it in KeyLLM
     kw_model = KeyLLM(llm)
 
-    # Extract keywords
     document = "The website mentions that it only takes a couple of days to deliver but I still have not received mine."
+    candidates = ["days", "website", "deliver", "received"]
     keywords = kw_model.extract_keywords(document)
     ```
 
@@ -67,15 +84,14 @@ class LangChain(BaseLLM):
     ```
     """
 
+    DEFAULT_PROMPT_TEMPLATE = DEFAULT_PROMPT_TEMPLATE
+
     def __init__(
         self,
         chain,
-        prompt: str = None,
         verbose: bool = False,
     ):
         self.chain = chain
-        self.prompt = prompt if prompt is not None else DEFAULT_PROMPT
-        self.default_prompt_ = DEFAULT_PROMPT
         self.verbose = verbose
 
     def extract_keywords(self, documents: List[str], candidate_keywords: List[List[str]] = None):
@@ -95,11 +111,7 @@ def extract_keywords(self, documents: List[str], candidate_keywords: List[List[s
         candidate_keywords = process_candidate_keywords(documents, candidate_keywords)
 
         for document, candidates in tqdm(zip(documents, candidate_keywords), disable=not self.verbose):
-            prompt = self.prompt.replace("[DOCUMENT]", document)
-            if candidates is not None:
-                prompt = prompt.replace("[CANDIDATES]", ", ".join(candidates))
-            input_document = Document(page_content=document)
-            keywords = self.chain.run(input_documents=[input_document], question=self.prompt).strip()
+            keywords = self.chain.invoke({"DOCUMENT": document, "CANDIDATES": candidates})
             keywords = [keyword.strip() for keyword in keywords.split(",")]
             all_keywords.append(keywords)
 

From 2c3a00f68ab92a8fa5960a4726b995434fbf1582 Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Sun, 9 Mar 2025 16:29:05 -0700
Subject: [PATCH 02/25] docstring

---
 keybert/llm/_langchain.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index 84527e04..6a935f92 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -19,7 +19,7 @@
 
 
 Now extract the keywords from the document.
-Your output must be a list of comma-separated keywords.
+The keywords must be comma separated .
 """
 
 
@@ -55,8 +55,8 @@ class LangChain(BaseLLM):
         api_key="my-openai-api-key",
         temperature=0,
     )
-    _prompt = PromptTemplate.from_template(LangChain.DEFAULT_PROMPT_TEMPLATE) # the default prompt from KeyBERT
-    chain = _prompt | _llm
+    _prompt = PromptTemplate.from_template(LangChain.DEFAULT_PROMPT_TEMPLATE)  # the default prompt from KeyBERT
+    chain = _prompt | _llm | StrOutputParser()
     ```
 
     Finally, you can pass the chain to KeyBERT as follows:
@@ -71,9 +71,18 @@ class LangChain(BaseLLM):
     # Load it in KeyLLM
     kw_model = KeyLLM(llm)
 
-    document = "The website mentions that it only takes a couple of days to deliver but I still have not received mine."
-    candidates = ["days", "website", "deliver", "received"]
-    keywords = kw_model.extract_keywords(document)
+    # Extract keywords
+    docs = [
+        "KeyBERT: A minimal method for keyword extraction with BERT. The keyword extraction is done by finding the sub-phrases in a document that are the most similar to the document itself. First, document embeddings are extracted with BERT to get a document-level representation. Then, word embeddings are extracted for N-gram words/phrases. Finally, we use cosine similarity to find the words/phrases that are the most similar to the document. The most similar words could then be identified as the words that best describe the entire document.",
+        "KeyLLM: A minimal method for keyword extraction with Large Language Models (LLM). The keyword extraction is done by simply asking the LLM to extract a number of keywords from a single piece of text.",
+    ]
+    candidates = [
+        ["keyword extraction", "Large Language Models", "LLM", "BERT", "transformer", "embeddings"],
+        ["keyword extraction", "Large Language Models", "LLM", "BERT", "transformer", "embeddings"],
+    ]
+    keywords = kw_model.extract_keywords(docs=docs, candidate_keywords=candidates)
+    print(keywords)
+    # [['keyword extraction', 'BERT', 'embeddings'], ['keyword extraction', 'Large Language Models', 'LLM']]
     ```
 
     You can also use a custom prompt:

From 36ad9309add91e8e5b555e29421ad981feccf63e Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Sun, 9 Mar 2025 18:50:20 -0700
Subject: [PATCH 03/25] doc

---
 keybert/llm/_langchain.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index 6a935f92..0e537c3f 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -47,7 +47,7 @@ class LangChain(BaseLLM):
     Then, you can create your chain as follows:
 
     ```python
-    from langchain.prompts import PromptTemplate
+    from langchain.prompts import ChatPromptTemplate
     from langchain_core.output_parsers import StrOutputParser
     from langchain_openai import ChatOpenAI
     _llm = ChatOpenAI(
@@ -55,7 +55,11 @@ class LangChain(BaseLLM):
         api_key="my-openai-api-key",
         temperature=0,
     )
-    _prompt = PromptTemplate.from_template(LangChain.DEFAULT_PROMPT_TEMPLATE)  # the default prompt from KeyBERT
+    _prompt = ChatPromptTemplate(
+        [
+            ("human", LangChain.DEFAULT_PROMPT_TEMPLATE),  # the default prompt from KeyBERT
+        ]
+    )
     chain = _prompt | _llm | StrOutputParser()
     ```
 

From b4a3a74c1a8b666acf97815cf0a188ac3c4a7520 Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Sun, 9 Mar 2025 18:51:06 -0700
Subject: [PATCH 04/25] doc

---
 keybert/llm/_langchain.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index 0e537c3f..9c26bbc8 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -50,6 +50,9 @@ class LangChain(BaseLLM):
     from langchain.prompts import ChatPromptTemplate
     from langchain_core.output_parsers import StrOutputParser
     from langchain_openai import ChatOpenAI
+
+    from keybert.llm import LangChain
+
     _llm = ChatOpenAI(
         model="gpt-4o",
         api_key="my-openai-api-key",

From de786a308166dacea475c8546217eb07e346340f Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Sun, 16 Mar 2025 19:22:37 -0700
Subject: [PATCH 05/25] allow input arg prompt; replace chain with llm for
 simplify user exp

---
 keybert/llm/_langchain.py | 61 +++++++++++++++++++++++++++------------
 1 file changed, 42 insertions(+), 19 deletions(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index 9c26bbc8..54c0276d 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -1,25 +1,17 @@
 from typing import List
 
+from langchain.prompts import ChatPromptTemplate, PromptTemplate
+from langchain_core.language_models.chat_models import BaseChatModel as LCChatModel
+from langchain_core.language_models.llms import BaseLLM as LCBaseLLM
+from langchain_core.output_parsers import CommaSeparatedListOutputParser
 from tqdm import tqdm
 
 from keybert.llm._base import BaseLLM
 from keybert.llm._utils import process_candidate_keywords
 
-DEFAULT_PROMPT_TEMPLATE = """
-# Task
-You are provided with a document and a list of candidate keywords.
-Your task to is extract keywords from the document.
-Use the candidate keywords to guide your extraction.
-
-# Document
-{DOCUMENT}
-
-# Candidate Keywords
-{CANDIDATES}
-
-
-Now extract the keywords from the document.
-The keywords must be comma separated .
+"""NOTE: langchain >= 0.1 is required. Which supports:
+- chain.invoke()
+- LangChain Expression Language (LCEL) is used and it is not compatible with langchain < 0.1.
 """
 
 
@@ -100,14 +92,46 @@ class LangChain(BaseLLM):
     ```
     """
 
-    DEFAULT_PROMPT_TEMPLATE = DEFAULT_PROMPT_TEMPLATE
+    DEFAULT_PROMPT_TEMPLATE = """
+# Task
+You are provided with a document and possiblily a list of candidate keywords.
+
+If no candidate keywords are provided, your task to is extract keywords from the document.
+If candidate keywords are provided, your task is  to improve the candidate keywords to best describe the topic of the document.
+
+# Document
+{DOCUMENT}
+
+# Candidate Keywords
+{CANDIDATES}
+
+
+Now extract the keywords from the document.
+The keywords must be comma separated.
+"""
 
     def __init__(
         self,
-        chain,
+        llm: LCChatModel | LCBaseLLM,
+        prompt: str = None,
         verbose: bool = False,
     ):
-        self.chain = chain
+        self.llm = llm
+        self.prompt = (
+            prompt.replace("[DOCUMENT]", "{DOCUMENT}").replace("[CANDIDATES]", "{CANDIDATES}")
+            if prompt is not None
+            else self.DEFAULT_PROMPT_TEMPLATE
+        )
+
+        if isinstance(llm, LCChatModel):
+            # a chat model (modern ones)
+            self.chain = ChatPromptTemplate([("human", self.prompt)]) | llm | CommaSeparatedListOutputParser()
+        elif isinstance(llm, LCBaseLLM):
+            # a completion model (usually legacy)
+            self.chain = PromptTemplate(template=self.prompt) | llm | CommaSeparatedListOutputParser()
+        else:
+            raise ValueError("A LangChain LLM must be either a chat model or a completion model.")
+
         self.verbose = verbose
 
     def extract_keywords(self, documents: List[str], candidate_keywords: List[List[str]] = None):
@@ -128,7 +152,6 @@ def extract_keywords(self, documents: List[str], candidate_keywords: List[List[s
 
         for document, candidates in tqdm(zip(documents, candidate_keywords), disable=not self.verbose):
             keywords = self.chain.invoke({"DOCUMENT": document, "CANDIDATES": candidates})
-            keywords = [keyword.strip() for keyword in keywords.split(",")]
             all_keywords.append(keywords)
 
         return all_keywords

From a40f8864263f90d8475143a4b775f885a4d22b74 Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Sun, 16 Mar 2025 19:35:38 -0700
Subject: [PATCH 06/25] doc

---
 keybert/llm/_langchain.py | 58 +++++++++++++++++++++++----------------
 1 file changed, 35 insertions(+), 23 deletions(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index 54c0276d..48a26327 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -19,13 +19,21 @@ class LangChain(BaseLLM):
     """Using chains in langchain to generate keywords.
 
 
-    NOTE: The resulting keywords are expected to a list of comma-sparated str so
-    any changes to the prompt will have to ensure the foramt.
+
 
     Arguments:
-        chain: A langchain chain that has two input parameters, `input_documents` and `query`.
+        llm: A langchain LLM class. e.g ChatOpenAI, OpenAI, etc.
+        prompt: The prompt to be used in the model. If no prompt is given,
+                `self.DEFAULT_PROMPT_TEMPLATE` is used instead.
+                NOTE:
+                1. Placeholders
+                - [DOCUMENT]: Required. The document to extract keywords from.
+                - [CANDIDATES]: Optional. The candidate keywords to fine-tune the extraction.
+                2. Output format instructions
+                - The resulting keywords are expected to a list of comma-sparated str so ensure the foramt in your prompt.
+                    e.g. "The output must be a list of comma separated keywords."
         verbose: Set this to True if you want to see a progress bar for the
-                 keyword extraction.
+                keyword extraction.
 
     Usage:
 
@@ -39,23 +47,13 @@ class LangChain(BaseLLM):
     Then, you can create your chain as follows:
 
     ```python
-    from langchain.prompts import ChatPromptTemplate
-    from langchain_core.output_parsers import StrOutputParser
     from langchain_openai import ChatOpenAI
 
-    from keybert.llm import LangChain
-
     _llm = ChatOpenAI(
         model="gpt-4o",
         api_key="my-openai-api-key",
         temperature=0,
     )
-    _prompt = ChatPromptTemplate(
-        [
-            ("human", LangChain.DEFAULT_PROMPT_TEMPLATE),  # the default prompt from KeyBERT
-        ]
-    )
-    chain = _prompt | _llm | StrOutputParser()
     ```
 
     Finally, you can pass the chain to KeyBERT as follows:
@@ -65,7 +63,7 @@ class LangChain(BaseLLM):
     from keybert import KeyLLM
 
     # Create your LLM
-    llm = LangChain(chain)
+    llm = LangChain(_llm)
 
     # Load it in KeyLLM
     kw_model = KeyLLM(llm)
@@ -75,13 +73,29 @@ class LangChain(BaseLLM):
         "KeyBERT: A minimal method for keyword extraction with BERT. The keyword extraction is done by finding the sub-phrases in a document that are the most similar to the document itself. First, document embeddings are extracted with BERT to get a document-level representation. Then, word embeddings are extracted for N-gram words/phrases. Finally, we use cosine similarity to find the words/phrases that are the most similar to the document. The most similar words could then be identified as the words that best describe the entire document.",
         "KeyLLM: A minimal method for keyword extraction with Large Language Models (LLM). The keyword extraction is done by simply asking the LLM to extract a number of keywords from a single piece of text.",
     ]
+    keywords = kw_model.extract_keywords(docs=docs)
+    print(keywords)
+
+    # Output:
+    # [
+    #     ['KeyBERT', 'keyword extraction', 'BERT', 'document embeddings', 'word embeddings', 'N-gram phrases', 'cosine similarity', 'document representation'],
+    #     ['KeyLLM', 'keyword extraction', 'Large Language Models', 'LLM', 'minimal method']
+    # ]
+
+
+    # fine tune with candidate keywords
     candidates = [
         ["keyword extraction", "Large Language Models", "LLM", "BERT", "transformer", "embeddings"],
         ["keyword extraction", "Large Language Models", "LLM", "BERT", "transformer", "embeddings"],
     ]
     keywords = kw_model.extract_keywords(docs=docs, candidate_keywords=candidates)
     print(keywords)
-    # [['keyword extraction', 'BERT', 'embeddings'], ['keyword extraction', 'Large Language Models', 'LLM']]
+
+    # Output:
+    # [
+    #     ['keyword extraction', 'BERT', 'document embeddings', 'word embeddings', 'cosine similarity', 'N-gram phrases'],
+    #     ['KeyLLM', 'keyword extraction', 'Large Language Models', 'LLM']
+    # ]
     ```
 
     You can also use a custom prompt:
@@ -100,10 +114,10 @@ class LangChain(BaseLLM):
 If candidate keywords are provided, your task is  to improve the candidate keywords to best describe the topic of the document.
 
 # Document
-{DOCUMENT}
+[DOCUMENT]
 
 # Candidate Keywords
-{CANDIDATES}
+[CANDIDATES]
 
 
 Now extract the keywords from the document.
@@ -117,11 +131,9 @@ def __init__(
         verbose: bool = False,
     ):
         self.llm = llm
-        self.prompt = (
-            prompt.replace("[DOCUMENT]", "{DOCUMENT}").replace("[CANDIDATES]", "{CANDIDATES}")
-            if prompt is not None
-            else self.DEFAULT_PROMPT_TEMPLATE
-        )
+        self.prompt = prompt if prompt is not None else self.DEFAULT_PROMPT_TEMPLATE
+        # format for langchain template placeholders
+        self.prompt = self.prompt.replace("[DOCUMENT]", "{DOCUMENT}").replace("[CANDIDATES]", "{CANDIDATES}")
 
         if isinstance(llm, LCChatModel):
             # a chat model (modern ones)

From 56ba04dee3be13f0cdca6959b0b406acaacae1c1 Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Sun, 16 Mar 2025 19:47:46 -0700
Subject: [PATCH 07/25] format

---
 keybert/llm/_langchain.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index 48a26327..d5d66751 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -18,9 +18,6 @@
 class LangChain(BaseLLM):
     """Using chains in langchain to generate keywords.
 
-
-
-
     Arguments:
         llm: A langchain LLM class. e.g ChatOpenAI, OpenAI, etc.
         prompt: The prompt to be used in the model. If no prompt is given,

From 98723ab1302a58888fead1d8f045f38109a9da59 Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Sun, 16 Mar 2025 20:32:06 -0700
Subject: [PATCH 08/25] remove note

---
 keybert/llm/_langchain.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index d5d66751..11ae441b 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -22,10 +22,10 @@ class LangChain(BaseLLM):
         llm: A langchain LLM class. e.g ChatOpenAI, OpenAI, etc.
         prompt: The prompt to be used in the model. If no prompt is given,
                 `self.DEFAULT_PROMPT_TEMPLATE` is used instead.
-                NOTE:
+                THe prompt should contain:
                 1. Placeholders
-                - [DOCUMENT]: Required. The document to extract keywords from.
-                - [CANDIDATES]: Optional. The candidate keywords to fine-tune the extraction.
+                - `[DOCUMENT]`: Required. The document to extract keywords from.
+                - `[CANDIDATES]`: Optional. The candidate keywords to fine-tune the extraction.
                 2. Output format instructions
                 - The resulting keywords are expected to a list of comma-sparated str so ensure the foramt in your prompt.
                     e.g. "The output must be a list of comma separated keywords."

From e4969e85254255ca843bc979bf9af086ce69223d Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Sun, 16 Mar 2025 20:36:39 -0700
Subject: [PATCH 09/25] refactor chain construction

---
 keybert/llm/_langchain.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index 11ae441b..bec7b93a 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -129,17 +129,18 @@ def __init__(
     ):
         self.llm = llm
         self.prompt = prompt if prompt is not None else self.DEFAULT_PROMPT_TEMPLATE
-        # format for langchain template placeholders
+        # format prompt for langchain template placeholders
         self.prompt = self.prompt.replace("[DOCUMENT]", "{DOCUMENT}").replace("[CANDIDATES]", "{CANDIDATES}")
 
-        if isinstance(llm, LCChatModel):
-            # a chat model (modern ones)
-            self.chain = ChatPromptTemplate([("human", self.prompt)]) | llm | CommaSeparatedListOutputParser()
-        elif isinstance(llm, LCBaseLLM):
-            # a completion model (usually legacy)
-            self.chain = PromptTemplate(template=self.prompt) | llm | CommaSeparatedListOutputParser()
-        else:
-            raise ValueError("A LangChain LLM must be either a chat model or a completion model.")
+        assert isinstance(llm, (LCChatModel, LCBaseLLM)), (
+            "A LangChain LLM must be either a chat model or a completion model."
+        )
+        prompt_template = (
+            ChatPromptTemplate([("human", self.prompt)])
+            if isinstance(llm, LCChatModel)
+            else PromptTemplate(template=self.prompt)
+        )
+        self.chain = prompt_template | llm | CommaSeparatedListOutputParser()
 
         self.verbose = verbose
 

From 0add85d7adc35c5db032063911357ac132c196c7 Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Sun, 16 Mar 2025 20:38:28 -0700
Subject: [PATCH 10/25] format

---
 keybert/llm/_langchain.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index bec7b93a..4b0087a7 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -129,21 +129,23 @@ def __init__(
     ):
         self.llm = llm
         self.prompt = prompt if prompt is not None else self.DEFAULT_PROMPT_TEMPLATE
+        self.verbose = verbose
+
         # format prompt for langchain template placeholders
         self.prompt = self.prompt.replace("[DOCUMENT]", "{DOCUMENT}").replace("[CANDIDATES]", "{CANDIDATES}")
-
+        # llm type check
         assert isinstance(llm, (LCChatModel, LCBaseLLM)), (
             "A LangChain LLM must be either a chat model or a completion model."
         )
+        # langchain prompt template
         prompt_template = (
             ChatPromptTemplate([("human", self.prompt)])
             if isinstance(llm, LCChatModel)
             else PromptTemplate(template=self.prompt)
         )
+        # chain
         self.chain = prompt_template | llm | CommaSeparatedListOutputParser()
 
-        self.verbose = verbose
-
     def extract_keywords(self, documents: List[str], candidate_keywords: List[List[str]] = None):
         """Extract topics.
 

From f416ddef57ba28ec560c25c5e80911a1646e63ed Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Sun, 16 Mar 2025 20:49:21 -0700
Subject: [PATCH 11/25] refactor: extract get_chain method

---
 keybert/llm/_langchain.py | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index 4b0087a7..566a68fe 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -9,7 +9,8 @@
 from keybert.llm._base import BaseLLM
 from keybert.llm._utils import process_candidate_keywords
 
-"""NOTE: langchain >= 0.1 is required. Which supports:
+"""NOTE
+langchain >= 0.1 is required. Which supports:
 - chain.invoke()
 - LangChain Expression Language (LCEL) is used and it is not compatible with langchain < 0.1.
 """
@@ -130,21 +131,7 @@ def __init__(
         self.llm = llm
         self.prompt = prompt if prompt is not None else self.DEFAULT_PROMPT_TEMPLATE
         self.verbose = verbose
-
-        # format prompt for langchain template placeholders
-        self.prompt = self.prompt.replace("[DOCUMENT]", "{DOCUMENT}").replace("[CANDIDATES]", "{CANDIDATES}")
-        # llm type check
-        assert isinstance(llm, (LCChatModel, LCBaseLLM)), (
-            "A LangChain LLM must be either a chat model or a completion model."
-        )
-        # langchain prompt template
-        prompt_template = (
-            ChatPromptTemplate([("human", self.prompt)])
-            if isinstance(llm, LCChatModel)
-            else PromptTemplate(template=self.prompt)
-        )
-        # chain
-        self.chain = prompt_template | llm | CommaSeparatedListOutputParser()
+        self.chain = self._get_chain()
 
     def extract_keywords(self, documents: List[str], candidate_keywords: List[List[str]] = None):
         """Extract topics.
@@ -167,3 +154,15 @@ def extract_keywords(self, documents: List[str], candidate_keywords: List[List[s
             all_keywords.append(keywords)
 
         return all_keywords
+
+    def _get_chain(self):
+        """Get the chain using LLM and prompt."""
+        # format prompt for langchain template placeholders
+        prompt = self.prompt.replace("[DOCUMENT]", "{DOCUMENT}").replace("[CANDIDATES]", "{CANDIDATES}")
+
+        # langchain prompt template
+        is_chat_model = isinstance(self.llm, LCChatModel)
+        prompt_template = ChatPromptTemplate([("human", prompt)]) if is_chat_model else PromptTemplate(template=prompt)
+
+        # chain
+        return prompt_template | self.llm | CommaSeparatedListOutputParser()

From a0dc89afa18cf600e62a945a12145695a68caa11 Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Sun, 16 Mar 2025 21:24:06 -0700
Subject: [PATCH 12/25] output format: abandon comma separated list

---
 keybert/llm/_langchain.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index 566a68fe..1c167b1a 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -3,7 +3,7 @@
 from langchain.prompts import ChatPromptTemplate, PromptTemplate
 from langchain_core.language_models.chat_models import BaseChatModel as LCChatModel
 from langchain_core.language_models.llms import BaseLLM as LCBaseLLM
-from langchain_core.output_parsers import CommaSeparatedListOutputParser
+from langchain_core.output_parsers import StrOutputParser
 from tqdm import tqdm
 
 from keybert.llm._base import BaseLLM
@@ -23,12 +23,13 @@ class LangChain(BaseLLM):
         llm: A langchain LLM class. e.g ChatOpenAI, OpenAI, etc.
         prompt: The prompt to be used in the model. If no prompt is given,
                 `self.DEFAULT_PROMPT_TEMPLATE` is used instead.
-                THe prompt should contain:
+                NOTE: The prompt should contain:
                 1. Placeholders
                 - `[DOCUMENT]`: Required. The document to extract keywords from.
                 - `[CANDIDATES]`: Optional. The candidate keywords to fine-tune the extraction.
                 2. Output format instructions
-                - The resulting keywords are expected to a list of comma-sparated str so ensure the foramt in your prompt.
+                - The prompt must include the output format instruction
+                    that extracted keywords should be sparated by comma.
                     e.g. "The output must be a list of comma separated keywords."
         verbose: Set this to True if you want to see a progress bar for the
                 keyword extraction.
@@ -120,6 +121,7 @@ class LangChain(BaseLLM):
 
 Now extract the keywords from the document.
 The keywords must be comma separated.
+For example: "keyword1, keyword2, keyword3"
 """
 
     def __init__(
@@ -151,6 +153,7 @@ def extract_keywords(self, documents: List[str], candidate_keywords: List[List[s
 
         for document, candidates in tqdm(zip(documents, candidate_keywords), disable=not self.verbose):
             keywords = self.chain.invoke({"DOCUMENT": document, "CANDIDATES": candidates})
+            keywords = [keyword.strip() for keyword in keywords.split(",")]
             all_keywords.append(keywords)
 
         return all_keywords
@@ -165,4 +168,4 @@ def _get_chain(self):
         prompt_template = ChatPromptTemplate([("human", prompt)]) if is_chat_model else PromptTemplate(template=prompt)
 
         # chain
-        return prompt_template | self.llm | CommaSeparatedListOutputParser()
+        return prompt_template | self.llm | StrOutputParser()

From 272be982dec9b7c98bbd979f4c50d75f9cfb1ce4 Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Sun, 16 Mar 2025 21:29:28 -0700
Subject: [PATCH 13/25] rename type alias

---
 keybert/llm/_langchain.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index 1c167b1a..6164c64b 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -1,8 +1,8 @@
 from typing import List
 
 from langchain.prompts import ChatPromptTemplate, PromptTemplate
-from langchain_core.language_models.chat_models import BaseChatModel as LCChatModel
-from langchain_core.language_models.llms import BaseLLM as LCBaseLLM
+from langchain_core.language_models.chat_models import BaseChatModel as LangChainBaseChatModel
+from langchain_core.language_models.llms import BaseLLM as LangChainBaseLLM
 from langchain_core.output_parsers import StrOutputParser
 from tqdm import tqdm
 
@@ -126,7 +126,7 @@ class LangChain(BaseLLM):
 
     def __init__(
         self,
-        llm: LCChatModel | LCBaseLLM,
+        llm: LangChainBaseChatModel | LangChainBaseLLM,
         prompt: str = None,
         verbose: bool = False,
     ):
@@ -162,10 +162,9 @@ def _get_chain(self):
         """Get the chain using LLM and prompt."""
         # format prompt for langchain template placeholders
         prompt = self.prompt.replace("[DOCUMENT]", "{DOCUMENT}").replace("[CANDIDATES]", "{CANDIDATES}")
-
+        # check if the model is a chat model
+        is_chat_model = isinstance(self.llm, LangChainBaseChatModel)
         # langchain prompt template
-        is_chat_model = isinstance(self.llm, LCChatModel)
         prompt_template = ChatPromptTemplate([("human", prompt)]) if is_chat_model else PromptTemplate(template=prompt)
-
         # chain
         return prompt_template | self.llm | StrOutputParser()

From fc4765e669b9c93d682e55ccd0ca4c8d25099973 Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Sun, 16 Mar 2025 21:33:51 -0700
Subject: [PATCH 14/25] output format

---
 keybert/llm/_langchain.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index 6164c64b..3ccba6e7 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -28,9 +28,8 @@ class LangChain(BaseLLM):
                 - `[DOCUMENT]`: Required. The document to extract keywords from.
                 - `[CANDIDATES]`: Optional. The candidate keywords to fine-tune the extraction.
                 2. Output format instructions
-                - The prompt must include the output format instruction
-                    that extracted keywords should be sparated by comma.
-                    e.g. "The output must be a list of comma separated keywords."
+                - Include this or somethign similar in your prompt:
+                    "Extracted keywords must be separated by comma."
         verbose: Set this to True if you want to see a progress bar for the
                 keyword extraction.
 

From f24809955111c104be7738ba1fe4fcf6de239963 Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Sun, 16 Mar 2025 21:35:00 -0700
Subject: [PATCH 15/25] typo

---
 keybert/llm/_langchain.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index 3ccba6e7..0cd90392 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -28,7 +28,7 @@ class LangChain(BaseLLM):
                 - `[DOCUMENT]`: Required. The document to extract keywords from.
                 - `[CANDIDATES]`: Optional. The candidate keywords to fine-tune the extraction.
                 2. Output format instructions
-                - Include this or somethign similar in your prompt:
+                - Include this or something similar in your prompt:
                     "Extracted keywords must be separated by comma."
         verbose: Set this to True if you want to see a progress bar for the
                 keyword extraction.

From 2c314221e58a1970b77c1b256fd5009034e160db Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Wed, 19 Mar 2025 16:38:03 -0700
Subject: [PATCH 16/25] doc update

---
 docs/guides/llms.md | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/docs/guides/llms.md b/docs/guides/llms.md
index b7b07f0d..499ba602 100644
--- a/docs/guides/llms.md
+++ b/docs/guides/llms.md
@@ -172,32 +172,43 @@ kw_model = KeyLLM(llm)
 
 ### **LangChain**
 
-To use LangChain, we can simply load in any LLM and pass that as a QA-chain to KeyLLM.
+To use `langchain` LLM client in KeyLLM, we can simply load in any LLM in `langchain` and pass that to KeyLLM.
 
-We install the package first:
+We install langchain and corresponding LLM provider package first. Take OpenAI as an example:
 
 ```bash
 pip install langchain
+pip install langchain-openai # LLM provider package
 ```
+> [!NOTE] 
+> KeyBERT only supports `langchain >= 0.1`
+
 
-Then we run LangChain as follows:
+Then create your LLM client with `langchain`
 
 
 ```python
-from langchain.chains.question_answering import load_qa_chain
-from langchain.llms import OpenAI
-chain = load_qa_chain(OpenAI(temperature=0, openai_api_key=my_openai_api_key), chain_type="stuff")
+from langchain_openai import ChatOpenAI
+
+_llm = ChatOpenAI(
+    model="gpt-4o",
+    api_key="my-openai-api-key",
+    temperature=0,
+)
 ```
 
-Finally, you can pass the chain to KeyBERT as follows:
+Finally, pass the `langchain` llm client to KeyBERT as follows:
 
 ```python
 from keybert.llm import LangChain
 from keybert import KeyLLM
 
 # Create your LLM
-llm = LangChain(chain)
+llm = LangChain(_llm)
 
 # Load it in KeyLLM
 kw_model = KeyLLM(llm)
+
+# Extract keywords
+keywords = kw_model.extract_keywords(MY_DOCUMENTS)
 ```

From 558a1140f1505911e1b03565bd451885f6e2c7bf Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Wed, 19 Mar 2025 16:43:48 -0700
Subject: [PATCH 17/25] add langchain import error handling

---
 keybert/llm/_langchain.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index 0cd90392..aa49f3dd 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -1,18 +1,27 @@
 from typing import List
 
-from langchain.prompts import ChatPromptTemplate, PromptTemplate
-from langchain_core.language_models.chat_models import BaseChatModel as LangChainBaseChatModel
-from langchain_core.language_models.llms import BaseLLM as LangChainBaseLLM
-from langchain_core.output_parsers import StrOutputParser
+try:
+    import langchain
+    from langchain.prompts import ChatPromptTemplate, PromptTemplate
+    from langchain_core.language_models.chat_models import BaseChatModel as LangChainBaseChatModel
+    from langchain_core.language_models.llms import BaseLLM as LangChainBaseLLM
+    from langchain_core.output_parsers import StrOutputParser
+
+    assert langchain.__version__ >= "0.1"
+except ImportError as e:
+    raise ImportError("LangChain is not installed. Please install it using `pip install langchain`.") from e
+except AssertionError:
+    raise ImportError("LangChain version >= 0.1 is required. Please update it using `pip install --upgrade langchain`.")
+
 from tqdm import tqdm
 
 from keybert.llm._base import BaseLLM
 from keybert.llm._utils import process_candidate_keywords
 
 """NOTE
-langchain >= 0.1 is required. Which supports:
-- chain.invoke()
-- LangChain Expression Language (LCEL) is used and it is not compatible with langchain < 0.1.
+KeyBERT only supports `langchain >= 0.1` which features:
+- [Runnable Interface](https://python.langchain.com/docs/concepts/runnables/)
+- [LangChain Expression Language (LCEL)](https://python.langchain.com/docs/concepts/lcel/)
 """
 
 

From 7c389204c7e8fc2321558fb0204d1d66d2409056 Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Wed, 19 Mar 2025 16:45:26 -0700
Subject: [PATCH 18/25] format

---
 docs/guides/llms.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/guides/llms.md b/docs/guides/llms.md
index 499ba602..a98237fc 100644
--- a/docs/guides/llms.md
+++ b/docs/guides/llms.md
@@ -180,7 +180,7 @@ We install langchain and corresponding LLM provider package first. Take OpenAI a
 pip install langchain
 pip install langchain-openai # LLM provider package
 ```
-> [!NOTE] 
+> [!NOTE]
 > KeyBERT only supports `langchain >= 0.1`
 
 

From 7b9e0161956875d1f048f816daa53b76ae0aa0b6 Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Wed, 19 Mar 2025 17:00:54 -0700
Subject: [PATCH 19/25] use packaging for version compare

---
 keybert/llm/_langchain.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index aa49f3dd..34c26799 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -1,5 +1,7 @@
 from typing import List
 
+from packaging.version import Version
+
 try:
     import langchain
     from langchain.prompts import ChatPromptTemplate, PromptTemplate
@@ -7,7 +9,7 @@
     from langchain_core.language_models.llms import BaseLLM as LangChainBaseLLM
     from langchain_core.output_parsers import StrOutputParser
 
-    assert langchain.__version__ >= "0.1"
+    assert Version(langchain.__version__) >= Version("0.1")
 except ImportError as e:
     raise ImportError("LangChain is not installed. Please install it using `pip install langchain`.") from e
 except AssertionError:

From 1f50569cea6f5f2c94c7929220e20033c2739937 Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Wed, 19 Mar 2025 17:06:42 -0700
Subject: [PATCH 20/25] remove import check

---
 keybert/llm/_langchain.py | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index 34c26799..681f2536 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -1,20 +1,9 @@
 from typing import List
 
-from packaging.version import Version
-
-try:
-    import langchain
-    from langchain.prompts import ChatPromptTemplate, PromptTemplate
-    from langchain_core.language_models.chat_models import BaseChatModel as LangChainBaseChatModel
-    from langchain_core.language_models.llms import BaseLLM as LangChainBaseLLM
-    from langchain_core.output_parsers import StrOutputParser
-
-    assert Version(langchain.__version__) >= Version("0.1")
-except ImportError as e:
-    raise ImportError("LangChain is not installed. Please install it using `pip install langchain`.") from e
-except AssertionError:
-    raise ImportError("LangChain version >= 0.1 is required. Please update it using `pip install --upgrade langchain`.")
-
+from langchain.prompts import ChatPromptTemplate, PromptTemplate
+from langchain_core.language_models.chat_models import BaseChatModel as LangChainBaseChatModel
+from langchain_core.language_models.llms import BaseLLM as LangChainBaseLLM
+from langchain_core.output_parsers import StrOutputParser
 from tqdm import tqdm
 
 from keybert.llm._base import BaseLLM

From 541a7653e44e7d244ef249210ab99e7d14b8ca12 Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Wed, 19 Mar 2025 17:38:33 -0700
Subject: [PATCH 21/25] import and version check for langchain

---
 keybert/llm/__init__.py   | 5 +++++
 keybert/llm/_langchain.py | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/keybert/llm/__init__.py b/keybert/llm/__init__.py
index abe7c7d6..af41b9cc 100644
--- a/keybert/llm/__init__.py
+++ b/keybert/llm/__init__.py
@@ -1,3 +1,5 @@
+from packaging.version import InvalidVersion
+
 from keybert._utils import NotInstalled
 from keybert.llm._base import BaseLLM
 
@@ -35,6 +37,9 @@
 except ModuleNotFoundError:
     msg = "`pip install langchain` \n\n"
     LangChain = NotInstalled("langchain", "langchain", custom_msg=msg)
+except InvalidVersion as e:
+    msg = f"`pip install -U langchain` \n\nsince {e}\n\n"
+    LangChain = NotInstalled("langchain", "langchain", custom_msg=msg)
 
 # LiteLLM
 try:
diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index 681f2536..e81a551b 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -1,14 +1,19 @@
 from typing import List
 
+import langchain
 from langchain.prompts import ChatPromptTemplate, PromptTemplate
 from langchain_core.language_models.chat_models import BaseChatModel as LangChainBaseChatModel
 from langchain_core.language_models.llms import BaseLLM as LangChainBaseLLM
 from langchain_core.output_parsers import StrOutputParser
+from packaging.version import InvalidVersion, Version
 from tqdm import tqdm
 
 from keybert.llm._base import BaseLLM
 from keybert.llm._utils import process_candidate_keywords
 
+if Version(langchain.__version__) < Version("0.1"):
+    raise InvalidVersion("langchain>=0.1 is required.")
+
 """NOTE
 KeyBERT only supports `langchain >= 0.1` which features:
 - [Runnable Interface](https://python.langchain.com/docs/concepts/runnables/)

From 3f4008399e7d1795d73c5c0bc45dd45c002c180f Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Wed, 19 Mar 2025 17:48:24 -0700
Subject: [PATCH 22/25] amend

---
 keybert/llm/_langchain.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index e81a551b..806ef6c3 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -12,7 +12,7 @@
 from keybert.llm._utils import process_candidate_keywords
 
 if Version(langchain.__version__) < Version("0.1"):
-    raise InvalidVersion("langchain>=0.1 is required.")
+    raise InvalidVersion(f"langchain>=0.1 is required, but langchain=={langchain.__version__} is installed.")
 
 """NOTE
 KeyBERT only supports `langchain >= 0.1` which features:

From 02fd3bf74cb6ee98f81e313f9d2ea9510074c28a Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Fri, 21 Mar 2025 22:29:24 -0700
Subject: [PATCH 23/25] drop packaging lib

---
 keybert/llm/__init__.py   | 4 +---
 keybert/llm/_langchain.py | 5 ++---
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/keybert/llm/__init__.py b/keybert/llm/__init__.py
index af41b9cc..9daa8fa2 100644
--- a/keybert/llm/__init__.py
+++ b/keybert/llm/__init__.py
@@ -1,5 +1,3 @@
-from packaging.version import InvalidVersion
-
 from keybert._utils import NotInstalled
 from keybert.llm._base import BaseLLM
 
@@ -37,7 +35,7 @@
 except ModuleNotFoundError:
     msg = "`pip install langchain` \n\n"
     LangChain = NotInstalled("langchain", "langchain", custom_msg=msg)
-except InvalidVersion as e:
+except ImportError as e:
     msg = f"`pip install -U langchain` \n\nsince {e}\n\n"
     LangChain = NotInstalled("langchain", "langchain", custom_msg=msg)
 
diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index 806ef6c3..c5852606 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -5,14 +5,13 @@
 from langchain_core.language_models.chat_models import BaseChatModel as LangChainBaseChatModel
 from langchain_core.language_models.llms import BaseLLM as LangChainBaseLLM
 from langchain_core.output_parsers import StrOutputParser
-from packaging.version import InvalidVersion, Version
 from tqdm import tqdm
 
 from keybert.llm._base import BaseLLM
 from keybert.llm._utils import process_candidate_keywords
 
-if Version(langchain.__version__) < Version("0.1"):
-    raise InvalidVersion(f"langchain>=0.1 is required, but langchain=={langchain.__version__} is installed.")
+if langchain.__version__ < "0.1":  # for more complicated version comparison, use packaging.version
+    raise ImportError(f"langchain>=0.1 is required, but {langchain.__version__} is installed.")
 
 """NOTE
 KeyBERT only supports `langchain >= 0.1` which features:

From fe85c7eb839a878d1fc30f203ed37398e25adb44 Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Fri, 21 Mar 2025 23:27:20 -0700
Subject: [PATCH 24/25] refactor import error handle

---
 keybert/llm/__init__.py   | 19 +++++++++++++------
 keybert/llm/_langchain.py |  4 ----
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/keybert/llm/__init__.py b/keybert/llm/__init__.py
index 9daa8fa2..0819f96f 100644
--- a/keybert/llm/__init__.py
+++ b/keybert/llm/__init__.py
@@ -32,12 +32,19 @@
 # LangChain Generator
 try:
     from keybert.llm._langchain import LangChain
-except ModuleNotFoundError:
-    msg = "`pip install langchain` \n\n"
-    LangChain = NotInstalled("langchain", "langchain", custom_msg=msg)
-except ImportError as e:
-    msg = f"`pip install -U langchain` \n\nsince {e}\n\n"
-    LangChain = NotInstalled("langchain", "langchain", custom_msg=msg)
+except ModuleNotFoundError as e:
+    match e.name:
+        case "langchain":
+            msg = "`pip install langchain` \n\n"
+            LangChain = NotInstalled("langchain", "langchain", custom_msg=msg)
+        case "langchain_core":
+            msg = "`pip install -U langchain` to upgrade to langchain>=0.1\n\n"
+            LangChain = NotInstalled("langchain", "langchain", custom_msg=msg)
+        case _:
+            raise e
+# except ImportError as e:
+#     msg = f"`pip install -U langchain` \n\nsince {e}\n\n"
+#     LangChain = NotInstalled("langchain", "langchain", custom_msg=msg)
 
 # LiteLLM
 try:
diff --git a/keybert/llm/_langchain.py b/keybert/llm/_langchain.py
index c5852606..681f2536 100644
--- a/keybert/llm/_langchain.py
+++ b/keybert/llm/_langchain.py
@@ -1,6 +1,5 @@
 from typing import List
 
-import langchain
 from langchain.prompts import ChatPromptTemplate, PromptTemplate
 from langchain_core.language_models.chat_models import BaseChatModel as LangChainBaseChatModel
 from langchain_core.language_models.llms import BaseLLM as LangChainBaseLLM
@@ -10,9 +9,6 @@
 from keybert.llm._base import BaseLLM
 from keybert.llm._utils import process_candidate_keywords
 
-if langchain.__version__ < "0.1":  # for more complicated version comparison, use packaging.version
-    raise ImportError(f"langchain>=0.1 is required, but {langchain.__version__} is installed.")
-
 """NOTE
 KeyBERT only supports `langchain >= 0.1` which features:
 - [Runnable Interface](https://python.langchain.com/docs/concepts/runnables/)

From 2569f4fc3656178a4afb6b6ef6cca8c7574472ff Mon Sep 17 00:00:00 2001
From: Shengbo Ma <sma@esri.com>
Date: Fri, 21 Mar 2025 23:34:01 -0700
Subject: [PATCH 25/25] remove match not support in 3.9

---
 keybert/llm/__init__.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/keybert/llm/__init__.py b/keybert/llm/__init__.py
index 0819f96f..b222d953 100644
--- a/keybert/llm/__init__.py
+++ b/keybert/llm/__init__.py
@@ -33,18 +33,15 @@
 try:
     from keybert.llm._langchain import LangChain
 except ModuleNotFoundError as e:
-    match e.name:
-        case "langchain":
-            msg = "`pip install langchain` \n\n"
-            LangChain = NotInstalled("langchain", "langchain", custom_msg=msg)
-        case "langchain_core":
-            msg = "`pip install -U langchain` to upgrade to langchain>=0.1\n\n"
-            LangChain = NotInstalled("langchain", "langchain", custom_msg=msg)
-        case _:
-            raise e
-# except ImportError as e:
-#     msg = f"`pip install -U langchain` \n\nsince {e}\n\n"
-#     LangChain = NotInstalled("langchain", "langchain", custom_msg=msg)
+    if e.name == "langchain":
+        msg = "`pip install langchain` \n\n"
+        LangChain = NotInstalled("langchain", "langchain", custom_msg=msg)
+    elif e.name == "langchain_core":
+        msg = "`pip install -U langchain` to upgrade to langchain>=0.1\n\n"
+        LangChain = NotInstalled("langchain", "langchain", custom_msg=msg)
+    else:
+        # not caused by importing langchain or langchain_core
+        raise e
 
 # LiteLLM
 try: