diff --git a/RAG_book.ipynb b/RAG_book.ipynb new file mode 100644 index 0000000..32628fc --- /dev/null +++ b/RAG_book.ipynb @@ -0,0 +1,1095 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kUfin90gHUKc", + "outputId": "b24d6726-79dc-4d47-ed22-157d12030962" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (9.4.0)\n", + "Collecting PyPDF2\n", + " Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m232.6/232.6 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: PyPDF2\n", + "Successfully installed PyPDF2-3.0.1\n", + "Collecting openai\n", + " Downloading openai-1.3.0-py3-none-any.whl (220 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m220.3/220.3 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: anyio<4,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai) (3.7.1)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai) (1.7.0)\n", + "Collecting httpx<1,>=0.23.0 (from openai)\n", + " Downloading httpx-0.25.1-py3-none-any.whl (75 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.0/75.0 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from openai) (1.10.13)\n", + "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.10/dist-packages (from openai) (4.66.1)\n", + "Requirement already satisfied: typing-extensions<5,>=4.5 in /usr/local/lib/python3.10/dist-packages (from openai) (4.5.0)\n", + "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.5.0->openai) (3.4)\n", + "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.5.0->openai) (1.3.0)\n", + "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.5.0->openai) (1.1.3)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai) (2023.7.22)\n", + "Collecting httpcore (from httpx<1,>=0.23.0->openai)\n", + " Downloading httpcore-1.0.2-py3-none-any.whl (76 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.9/76.9 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting h11<0.15,>=0.13 (from httpcore->httpx<1,>=0.23.0->openai)\n", + " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: h11, httpcore, httpx, openai\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "llmx 0.0.15a0 requires cohere, which is not installed.\n", + "llmx 0.0.15a0 requires tiktoken, which is not installed.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed h11-0.14.0 httpcore-1.0.2 httpx-0.25.1 openai-1.3.0\n", + "Collecting langchain\n", + " Downloading langchain-0.0.336-py3-none-any.whl (2.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (6.0.1)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.0.23)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (3.8.6)\n", + "Requirement already satisfied: anyio<4.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (3.7.1)\n", + "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (4.0.3)\n", + "Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)\n", + " Downloading dataclasses_json-0.6.2-py3-none-any.whl (28 kB)\n", + "Collecting jsonpatch<2.0,>=1.33 (from langchain)\n", + " Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n", + "Collecting langsmith<0.1.0,>=0.0.63 (from langchain)\n", + " Downloading langsmith-0.0.64-py3-none-any.whl (45 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.23.5)\n", + "Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.10.13)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.31.0)\n", + "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (8.2.3)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.1.0)\n", + "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (3.3.2)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.4)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.2)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", + "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<4.0->langchain) (3.4)\n", + "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4.0->langchain) (1.3.0)\n", + "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4.0->langchain) (1.1.3)\n", + "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\n", + " Downloading marshmallow-3.20.1-py3-none-any.whl (49 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\n", + " Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", + "Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain)\n", + " Downloading jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\n", + "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (4.5.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (2023.7.22)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain) (3.0.1)\n", + "Requirement already satisfied: packaging>=17.0 in /usr/local/lib/python3.10/dist-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json<0.7,>=0.5.7->langchain) (23.2)\n", + "Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain)\n", + " Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", + "Installing collected packages: mypy-extensions, marshmallow, jsonpointer, typing-inspect, langsmith, jsonpatch, dataclasses-json, langchain\n", + "Successfully installed dataclasses-json-0.6.2 jsonpatch-1.33 jsonpointer-2.4 langchain-0.0.336 langsmith-0.0.64 marshmallow-3.20.1 mypy-extensions-1.0.0 typing-inspect-0.9.0\n", + "Collecting faiss-cpu\n", + " Downloading faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.6/17.6 MB\u001b[0m \u001b[31m67.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: faiss-cpu\n", + "Successfully installed faiss-cpu-1.7.4\n", + "Collecting tiktoken\n", + " Downloading tiktoken-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m21.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken) (2023.6.3)\n", + "Requirement already satisfied: requests>=2.26.0 in /usr/local/lib/python3.10/dist-packages (from tiktoken) (2.31.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken) (2023.7.22)\n", + "Installing collected packages: tiktoken\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "llmx 0.0.15a0 requires cohere, which is not installed.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed tiktoken-0.5.1\n" + ] + } + ], + "source": [ + "%pip install Pillow\n", + "%pip install PyPDF2\n", + "%pip install openai\n", + "%pip install langchain\n", + "%pip install faiss-cpu\n", + "%pip install tiktoken" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "G_xL6UZ6HUKf" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import PyPDF2\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain.embeddings.azure_openai import AzureOpenAIEmbeddings\n", + "from langchain.vectorstores import FAISS\n", + "from openai import AzureOpenAI\n", + "from langchain.chat_models import AzureChatOpenAI\n", + "from langchain.chains.retrieval_qa.base import RetrievalQA" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NWETSxuaHUKg" + }, + "outputs": [], + "source": [ + "api_key = \"a34a353bd97c49f2917b7eb8d824e7ef\"\n", + "api_base = \"https://openapiazconf.openai.azure.com/\"\n", + "api_type = 'azure'\n", + "api_version = '2023-10-01-preview'\n", + "embedding_model_name = 'text-embedding-ada-002'\n", + "embedding_deployment_name='embedding'\n", + "deployment_name='azconfmodel'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OOpX9xS-HUKg" + }, + "outputs": [], + "source": [ + "client = AzureOpenAI(\n", + " api_key=api_key,\n", + " api_version=api_version,\n", + " azure_endpoint = api_base\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UBhfUandHUKg" + }, + "outputs": [], + "source": [ + "def extract_data_from_scanned_pdf(file_path: str) -> str:\n", + " data = \"\"\n", + " reader = PyPDF2.PdfReader(file_path)\n", + " for i in range(len(reader.pages)):\n", + " data += reader.pages[i].extract_text()\n", + "\n", + " return data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "g3s7UQpFHUKh" + }, + "outputs": [], + "source": [ + "def get_data_chunks(data: str, chunk_size: int):\n", + " text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0, separator=\"\\n\", length_function=len)\n", + " chunks = text_splitter.split_text(data)\n", + " return chunks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K_tBMR5LHUKh" + }, + "outputs": [], + "source": [ + "file_path = \"/sample.pdf\";" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LIIVodgbHUKh", + "outputId": "3560227d-67b0-4c6e-cb93-5cf02288b6dc" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/sample.pdf\n" + ] + } + ], + "source": [ + "print(file_path)\n", + "raw_data = extract_data_from_scanned_pdf(file_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "06ZwPgWIHUKh", + "outputId": "4f111063-27d4-4979-9716-227f8207e209" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:langchain.text_splitter:Created a chunk of size 73, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 128, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 116, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 25, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 29, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 24, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 43, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 29, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 28, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 23, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 29, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 57, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 31, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 54, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 21, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 28, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 32, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 21, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 37, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 28, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 32, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 28, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 45, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 26, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 58, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 45, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 24, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 122, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 82, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 118, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 119, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 33, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 253, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 218, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 177, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 62, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 29, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 117, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 128, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 56, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 25, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 122, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 233, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 130, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 136, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 88, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 35, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 118, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 238, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 117, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 45, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 124, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 507, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 129, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 120, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 225, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 43, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 129, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 234, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 41, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 83, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 81, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 187, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 67, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 25, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 102, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 122, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 75, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 62, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 121, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 129, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 121, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 33, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 30, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 117, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 44, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 37, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 27, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 22, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 51, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 31, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 34, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 41, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 45, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 28, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 60, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 128, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 207, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 62, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 28, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 130, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 98, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 27, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 96, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 119, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 26, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 101, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 98, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 107, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 106, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 113, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 37, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 126, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 25, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 23, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 185, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 71, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 117, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 130, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 19, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 248, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 120, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 94, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 71, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 30, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 126, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 124, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 128, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 93, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 137, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 130, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 121, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 86, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 24, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 250, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 33, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 38, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 106, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 40, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 26, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 119, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 250, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 126, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 237, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 121, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 119, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 113, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 64, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 58, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 118, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 23, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 65, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 33, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 62, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 121, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 37, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 124, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 121, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 94, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 131, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 38, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 85, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 114, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 73, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 124, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 118, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 112, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 94, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 110, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 106, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 35, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 114, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 40, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 22, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 58, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 58, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 99, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 93, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 58, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 58, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 99, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 116, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 60, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 323, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 31, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 45, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 47, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 111, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 55, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 56, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 107, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 113, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 121, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 44, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 53, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 118, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 78, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 30, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 89, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 88, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 39, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 118, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 25, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 124, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 19, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 101, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 100, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 108, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 67, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 124, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 23, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 52, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 91, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 55, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 251, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 367, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 63, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 96, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 50, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 118, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 69, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 168, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 119, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 121, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 117, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 120, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 119, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 124, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 118, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 122, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 113, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 108, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 32, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 122, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 67, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 118, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 120, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 51, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 119, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 122, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 116, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 115, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 119, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 88, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 21, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 166, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 365, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 197, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 82, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 102, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 118, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 105, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 109, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 117, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 142, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 48, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 26, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 183, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 107, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 109, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 79, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 115, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 49, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 119, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 124, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 114, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 126, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 41, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 28, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 131, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 121, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 51, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 32, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 128, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 25, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 31, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 129, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 112, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 48, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 37, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 30, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 120, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 46, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 54, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 28, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 122, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 120, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 102, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 104, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 31, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 118, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 117, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 83, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 122, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 32, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 129, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 179, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 57, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 126, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 129, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 115, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 36, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 128, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 251, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 246, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 110, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 259, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 153, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 21, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 124, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 105, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 119, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 84, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 124, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 100, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 109, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 37, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 120, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 132, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 121, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 189, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 93, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 34, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 26, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 115, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 81, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 115, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 18, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 71, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 49, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 28, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 100, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 178, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 84, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 48, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 28, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 77, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 130, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 129, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 388, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 143, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 32, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 122, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 129, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 132, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 131, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 47, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 126, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 68, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 134, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 94, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 28, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 118, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 131, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 75, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 121, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 45, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 45, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 130, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 133, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 121, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 122, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 124, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 191, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 75, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 130, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 22, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 104, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 27, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 132, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 265, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 92, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 120, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 76, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 58, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 120, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 56, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 45, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 118, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 120, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 29, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 122, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 115, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 121, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 120, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 27, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 130, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 117, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 101, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 128, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 122, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 124, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 208, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 42, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 126, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 48, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 17, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 64, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 40, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 53, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 65, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 87, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 62, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 42, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 47, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 35, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 39, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 36, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 21, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 32, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 36, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 62, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 24, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 64, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 74, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 55, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 31, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 42, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 25, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 34, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 23, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 19, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 17, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 88, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 44, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 27, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 45, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 30, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 67, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 142, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 129, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 93, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 35, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 79, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 37, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 50, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 99, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 17, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 35, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 78, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 37, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 49, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 17, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 35, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 78, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 37, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 49, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 17, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 35, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 78, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 37, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 49, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 17, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 35, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 78, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 37, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 49, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 17, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 35, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 78, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 37, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 49, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 138, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 140, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 78, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 128, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 83, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 58, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 36, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 143, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 49, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 47, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 27, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 27, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 26, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 146, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 19, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 22, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 21, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 141, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 29, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 25, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 30, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 117, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 135, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 148, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 44, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 26, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 18, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 26, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 18, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 26, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 18, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 145, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 27, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 139, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 138, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 127, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 145, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 145, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 142, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 71, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 32, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 32, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 32, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 32, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 24, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 141, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 141, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 44, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 45, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 53, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 50, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 135, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 141, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 144, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 337, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 130, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 40, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 130, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 253, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 54, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 82, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 122, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 44, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 130, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 110, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 23, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 116, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 51, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 218, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 36, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 125, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 97, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 139, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 128, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 100, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 123, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 112, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 74, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 128, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 47, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 124, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 66, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 75, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 41, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 69, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 80, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 67, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 57, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 102, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 44, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 24, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 99, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 54, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 20, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 109, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 216, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 72, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 122, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 118, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 126, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 130, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 134, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 114, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 23, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 59, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 253, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 51, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 105, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 109, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 55, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 110, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 107, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 49, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 18, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 144, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 129, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 130, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 131, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 129, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 143, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 20, which is longer than the specified 16\n", + "WARNING:langchain.text_splitter:Created a chunk of size 50, which is longer than the specified 16\n" + ] + } + ], + "source": [ + "chunk_size=16\n", + "chunks = get_data_chunks(raw_data, chunk_size=chunk_size) # create text chunks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pFNnb6D6HUKi", + "outputId": "a0d34f3c-ec4f-4ff9-87db-9607c3730069" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/langchain/embeddings/azure_openai.py:101: UserWarning: As of openai>=1.0.0, Azure endpoints should be specified via the `azure_endpoint` param not `openai_api_base` (or alias `base_url`). Updating `openai_api_base` from https://openapiazconf.openai.azure.com/ to https://openapiazconf.openai.azure.com//openai.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "embeddings = AzureOpenAIEmbeddings(model=embedding_deployment_name,\n", + " openai_api_base=api_base,\n", + " api_key=api_key,\n", + " openai_api_type=api_type)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1V4T_ASeHUKi", + "outputId": "b69451bb-042d-43da-bc05-82116f1d91a3" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['21. WHERE TO SEND MATERIALS NOT SUBMITTED ELECTRONICALLY', '22. TECHNICAL PROBLEMS WITH THE APPLICATION', '1. APPLICATION DEADLINE', 'Your application and all supporting documents must be received by the Harvard Law School Graduate Program office no later', 'than 11:59 p.m. U.S. Eastern time on December 1 (and preferably by November 15).', 'Please note that the application deadline of December 1 will be strictly observed. It is your responsibility to make', 'certain that all supporting materials (e.g., un official transcript, recommendations, TOEFL report) reach the Graduate', 'Program office by the deadline.', 'Please do not call or email us about the possibility of submitting materials after the deadline. If you have completed and', 'submitted your online application by the deadline, we will not disqualify your application if some of your supporting documents (e.g., unofficial transcript, recommendations , TOEFL report) have not been received by the deadline. However, we will begin']\n" + ] + } + ], + "source": [ + "test_chunks = chunks[25:35]\n", + "print(test_chunks)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "H7hdAWfKHUKj", + "outputId": "05e5163d-b1bb-4a4f-8a84-7fa7ac645398" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:langchain.embeddings.openai:Warning: model not found. Using cl100k_base encoding.\n" + ] + } + ], + "source": [ + "knowledge_hub = FAISS.from_texts(test_chunks, embeddings)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "J5Rt8b8CHUKj" + }, + "outputs": [], + "source": [ + "retriever = knowledge_hub.as_retriever(\n", + " search_type=\"similarity\", search_kwargs={\"k\": 2}\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "o3Pzi1wiHUKj", + "outputId": "aba2f9b5-d9ed-4531-b982-44e3e29ad933" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/langchain/chat_models/azure_openai.py:155: UserWarning: As of openai>=1.0.0, Azure endpoints should be specified via the `azure_endpoint` param not `openai_api_base` (or alias `base_url`). Updating `openai_api_base` from https://openapiazconf.openai.azure.com/ to https://openapiazconf.openai.azure.com/openai.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/langchain/chat_models/azure_openai.py:162: UserWarning: As of openai>=1.0.0, if `deployment_name` (or alias `azure_deployment`) is specified then `openai_api_base` (or alias `base_url`) should not be. Instead use `deployment_name` (or alias `azure_deployment`) and `azure_endpoint`.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/langchain/chat_models/azure_openai.py:170: UserWarning: As of openai>=1.0.0, if `openai_api_base` (or alias `base_url`) is specified it is expected to be of the form https://example-resource.azure.openai.com/openai/deployments/example-deployment. Updating https://openapiazconf.openai.azure.com/ to https://openapiazconf.openai.azure.com/openai.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "llm = AzureChatOpenAI(\n", + " deployment_name=deployment_name,\n", + " openai_api_base=api_base,\n", + " openai_api_key=api_key,\n", + " openai_api_type=api_type,\n", + " openai_api_version=api_version,\n", + " temperature=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AsI2Y_JnHUKk" + }, + "outputs": [], + "source": [ + "chain_type= 'stuff'\n", + "chain = RetrievalQA.from_chain_type(\n", + " llm=llm,\n", + " chain_type=chain_type,\n", + " retriever=retriever,\n", + " return_source_documents=True,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Vq3mG_fCHUKk", + "outputId": "ab83ceb0-9501-498e-9ad6-7dcf810d92bf" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:langchain.embeddings.openai:Warning: model not found. Using cl100k_base encoding.\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{'query': 'all supporting materials', 'result': 'Based on the given context, it is not clear where to send all supporting materials that were not submitted electronically. It is recommended to check the specific instructions provided by the Graduate program or to contact them directly for clarification.', 'source_documents': [Document(page_content='certain that all supporting materials (e.g., un official transcript, recommendations, TOEFL report) reach the Graduate'), Document(page_content='21. WHERE TO SEND MATERIALS NOT SUBMITTED ELECTRONICALLY')]}\n" + ] + } + ], + "source": [ + "\n", + "question='all supporting materials'\n", + "result = chain({\"query\": question})\n", + "print(result)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "colab": { + "provenance": [], + "include_colab_link": true + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file