From 8b647a4df3f3dda61da6bf383a9628e25cd1a395 Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Mon, 23 Oct 2023 16:13:22 -0400 Subject: [PATCH 01/11] minor update to docs --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d7413fa..d521513 100644 --- a/README.md +++ b/README.md @@ -9,11 +9,11 @@ -Buster is a question-answering chatbot that can be tuned to any source of documentations. +Buster is retrieval-augmented generation (RAG) module that can be tuned to any source of documentation. # Demo -In order to view the full abilities of Buster, you can play with our [live demo here](https://huggingface.co/spaces/jerpint/buster). +In order to demo Buster's abilities, you can play with our [live demo here](https://huggingface.co/spaces/jerpint/buster). We scraped the documentation of [huggingface 🤗 Transformers](https://huggingface.co/docs/transformers/index) and instructed Buster to answer questions related to its usage. # Quickstart @@ -45,7 +45,7 @@ export OPENAI_API_KEY=sk-... # Generating your own embeddings -Once your local version of Buster is up and running, the next step is for you to be able to import your own data. +Once your local version of Buster is properly installed, the next step is for you to be able to import your own data. We will be using the `stackoverflow.csv` file in the `buster/examples/` folder for this. This is the same data that was used to generate the demo app's embeddings. You will first ingest the documents to be ready for buster. In this example, we use Deeplake's vector store, but you can always write your own custom `DocumentManager`: From 81b7df6835ee9d8105e81872ba6275dcf7b514c3 Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Mon, 23 Oct 2023 16:23:08 -0400 Subject: [PATCH 02/11] first pass at adding documentation --- .readthedocs.yaml | 13 +++++++ docs/Makefile | 20 ++++++++++ docs/conf.py | 79 ++++++++++++++++++++++++++++++++++++++++ docs/index.rst | 37 +++++++++++++++++++ docs/make.bat | 35 ++++++++++++++++++ docs/requirements.txt | 6 +++ docs/usage/guide.rst | 16 ++++++++ docs/usage/quickstart.md | 6 +++ 8 files changed, 212 insertions(+) create mode 100644 .readthedocs.yaml create mode 100644 docs/Makefile create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/make.bat create mode 100644 docs/requirements.txt create mode 100644 docs/usage/guide.rst create mode 100644 docs/usage/quickstart.md diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..9138a7f --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,13 @@ +version: "2" + +build: + os: "ubuntu-22.04" + tools: + python: "3.10" + +python: + install: + - requirements: docs/requirements.txt + +sphinx: + configuration: docs/source/conf.py diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..841f03b --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,79 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + +# -- Project information ----------------------------------------------------- + +project = 'buster 🤖' +author = 'jerpint, hbertrand' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [] + +# enable use of markdown files +extensions.append('myst_parser') + +# use the readthedocs theme +extensions.append('sphinx_rtd_theme') +extensions.append('sphinx.ext.napoleon') +extensions.append('sphinxcontrib.katex') + +# autoapi extension for doc strings +extensions.append('autoapi.extension') +autoapi_type = 'python' +autoapi_dirs = ['../buster/'] + + +# Skip docstrings for loggers and tests +def check_skip_member(app, what, name, obj, skip, options): + """Skips documentation when the function returns True.""" + SKIP_PATTERNS = ["test_", "logger"] + for pattern in SKIP_PATTERNS: + if pattern in name: + print("Skipping documentation for: ", name) + return True + return False + + +def setup(app): + """Handler to connect to the autoapi app.""" + app.connect("autoapi-skip-member", check_skip_member) + + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..3b86916 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,37 @@ +.. amlrt_project documentation master file, created by + sphinx-quickstart on Fri Jul 3 10:11:19 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Buster's documentation! +============================================= + +About the project +----------------- +Buster is a RAG library. + + +.. toctree:: + :caption: Quick Start + :maxdepth: 1 + + usage/quickstart + +.. toctree:: + :caption: User Guide + :maxdepth: 1 + + usage/guide + +.. toctree:: + :maxdepth: 2 + :caption: API Reference + + autoapi/index + +Useful links +============ + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..2119f51 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..498e45b --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,6 @@ +myst-parser +sphinx +sphinx-autoapi +sphinx-rtd-theme +sphinxcontrib-napoleon +sphinxcontrib-katex diff --git a/docs/usage/guide.rst b/docs/usage/guide.rst new file mode 100644 index 0000000..0db3d3d --- /dev/null +++ b/docs/usage/guide.rst @@ -0,0 +1,16 @@ +User Guide +========== + +Quick Start +----------- + +To get started, you have to first begin! + +Everybody loves Schrodinger's equation, why not put it everywhere? + +.. math:: + i \hbar \frac{\partial}{\partial t}\Psi(\mathbf{r},t) = \hat H \Psi(\mathbf{r},t) + +You can also add math or even link directly in your docstrings! For an example, click at the docstrings here: + +:py:meth:`amlrt_project.models.optim.load_loss` diff --git a/docs/usage/quickstart.md b/docs/usage/quickstart.md new file mode 100644 index 0000000..cccecea --- /dev/null +++ b/docs/usage/quickstart.md @@ -0,0 +1,6 @@ +# Getting started +## Quickstart + +Put your project instructions here, like + +`pip install -e .` From 8f42a991c8538e96b1f07352817565a4188780b4 Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Mon, 23 Oct 2023 16:27:29 -0400 Subject: [PATCH 03/11] change path --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 9138a7f..b668cb0 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -10,4 +10,4 @@ python: - requirements: docs/requirements.txt sphinx: - configuration: docs/source/conf.py + configuration: docs/conf.py From ea6ceba5ab4c2b53e0989c4fe9a22bfc1c8af07c Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Tue, 24 Oct 2023 09:35:59 -0400 Subject: [PATCH 04/11] ignore docs build folder --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 27b6457..8e61123 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +# Ignore docs +docs/_build/ # database files *.db From a3c0e5aa5feb7157f5929d3ecef14404733a5dcb Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Tue, 24 Oct 2023 09:45:07 -0400 Subject: [PATCH 05/11] update quickstart --- docs/index.rst | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 3b86916..df4c168 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,12 +3,23 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to Buster's documentation! +Buster 🤖 ============================================= About the project ----------------- -Buster is a RAG library. +Buster is a library for Retrieval-Augmented Generation (RAG). +It leverages LLMs and embeddings to provide answers to questions grounded in references. + +Buster is open-source and hackable. +It includes many features out of the box, and is intended for deployment. + + +Demo +---- + +In order to demo Buster's abilities, you can play with our `live demo here `__. +We scraped the documentation of `huggingface 🤗 Transformers `__ and instructed Buster to answer questions related to its usage. .. toctree:: From 49587fc5da97a82e8c995751c40a3e72d59542ff Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Tue, 24 Oct 2023 09:58:28 -0400 Subject: [PATCH 06/11] add getting started --- docs/index.rst | 4 ++-- docs/usage/installation.md | 26 ++++++++++++++++++++++++++ docs/usage/quickstart.md | 6 ------ 3 files changed, 28 insertions(+), 8 deletions(-) create mode 100644 docs/usage/installation.md delete mode 100644 docs/usage/quickstart.md diff --git a/docs/index.rst b/docs/index.rst index df4c168..766aeb1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -23,10 +23,10 @@ We scraped the documentation of `huggingface 🤗 Transformers =3.10 + +```bash +pip install buster-doctalk +``` + +Then, go to the examples folder and launch the app. +We've included small sample data off stackoverflow-ai questions that you can test your setup with to try app: + +```bash +cd buster/buster/examples +gradio gradio_app.py +``` + +This will launch the gradio app locally. + + +**NOTE**: The demo uses chatGPT to generate text and compute embeddings, make sure to set a valid openai API key: +```bash +export OPENAI_API_KEY=sk-... +``` \ No newline at end of file diff --git a/docs/usage/quickstart.md b/docs/usage/quickstart.md deleted file mode 100644 index cccecea..0000000 --- a/docs/usage/quickstart.md +++ /dev/null @@ -1,6 +0,0 @@ -# Getting started -## Quickstart - -Put your project instructions here, like - -`pip install -e .` From 51d7fa4aa61f4fe94f9e9e640eead31b5a4021ef Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Wed, 25 Oct 2023 13:23:46 -0400 Subject: [PATCH 07/11] WIP --- docs/conf.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 841f03b..920b051 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,8 +16,8 @@ # -- Project information ----------------------------------------------------- -project = 'buster 🤖' -author = 'jerpint, hbertrand' +project = "buster 🤖" +author = "jerpint, hbertrand" # -- General configuration --------------------------------------------------- @@ -28,17 +28,17 @@ extensions = [] # enable use of markdown files -extensions.append('myst_parser') +extensions.append("myst_parser") # use the readthedocs theme -extensions.append('sphinx_rtd_theme') -extensions.append('sphinx.ext.napoleon') -extensions.append('sphinxcontrib.katex') +extensions.append("sphinx_rtd_theme") +extensions.append("sphinx.ext.napoleon") +extensions.append("sphinxcontrib.katex") # autoapi extension for doc strings -extensions.append('autoapi.extension') -autoapi_type = 'python' -autoapi_dirs = ['../buster/'] +extensions.append("autoapi.extension") +autoapi_type = "python" +autoapi_dirs = ["../buster/"] # Skip docstrings for loggers and tests @@ -58,12 +58,12 @@ def setup(app): # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # -- Options for HTML output ------------------------------------------------- @@ -76,4 +76,4 @@ def setup(app): # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] From 6a91a1d0c6b638301571ecbd02319fe8ca5794ff Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Wed, 25 Oct 2023 15:02:20 -0400 Subject: [PATCH 08/11] update docs --- docs/index.rst | 14 ++++++-------- docs/usage/installation.md | 34 +++++++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 766aeb1..f6c53fe 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,8 +1,3 @@ -.. amlrt_project documentation master file, created by - sphinx-quickstart on Fri Jul 3 10:11:19 2020. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - Buster 🤖 ============================================= @@ -24,15 +19,18 @@ We scraped the documentation of `huggingface 🤗 Transformers Date: Wed, 25 Oct 2023 15:02:47 -0400 Subject: [PATCH 09/11] add more pages --- docs/usage/components_overview.md | 3 +++ docs/usage/configuration.md | 3 +++ docs/usage/custom_docs.md | 3 +++ 3 files changed, 9 insertions(+) create mode 100644 docs/usage/components_overview.md create mode 100644 docs/usage/configuration.md create mode 100644 docs/usage/custom_docs.md diff --git a/docs/usage/components_overview.md b/docs/usage/components_overview.md new file mode 100644 index 0000000..b0f4d55 --- /dev/null +++ b/docs/usage/components_overview.md @@ -0,0 +1,3 @@ +# Overview + +Completers, Retrievers, etc. diff --git a/docs/usage/configuration.md b/docs/usage/configuration.md new file mode 100644 index 0000000..3ed4f29 --- /dev/null +++ b/docs/usage/configuration.md @@ -0,0 +1,3 @@ +# Configuration + +Buster uses a config file to setup most of the app. \ No newline at end of file diff --git a/docs/usage/custom_docs.md b/docs/usage/custom_docs.md new file mode 100644 index 0000000..52d2ec5 --- /dev/null +++ b/docs/usage/custom_docs.md @@ -0,0 +1,3 @@ +# Adding Documents + +To add your own documents, \ No newline at end of file From 68061332fa32bcd598f0f4433147471c7c6319ac Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Wed, 25 Oct 2023 15:04:12 -0400 Subject: [PATCH 10/11] add badge to README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index d521513..0196d31 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ [![PyPI](https://img.shields.io/pypi/v/buster-doctalk?logo=pypi)](https://pypi.org/project/buster-doctalk) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![Hugging Face Spaces](https://img.shields.io/badge/🤗%20Hugging%20Face-Buster%20Demo-blue)](https://huggingface.co/spaces/jerpint/buster) +[![Documentation Status](https://readthedocs.org/projects/buster/badge/?version=latest)](https://buster.readthedocs.io/en/latest/?badge=latest) From 24470f96e76bd54b94abfd30ce116c79e710e117 Mon Sep 17 00:00:00 2001 From: Jeremy Pinto Date: Fri, 27 Oct 2023 15:01:15 -0400 Subject: [PATCH 11/11] add more content --- docs/index.rst | 12 +++-- docs/usage/components.md | 19 +++++++ docs/usage/configuration.md | 104 +++++++++++++++++++++++++++++++++++- 3 files changed, 128 insertions(+), 7 deletions(-) create mode 100644 docs/usage/components.md diff --git a/docs/index.rst b/docs/index.rst index f6c53fe..2f03f02 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -22,6 +22,13 @@ We scraped the documentation of `huggingface 🤗 Transformers