diff --git a/.github/workflows/mypy_ruff.yml b/.github/workflows/mypy_ruff.yml
index 629908b..f7e423f 100644
--- a/.github/workflows/mypy_ruff.yml
+++ b/.github/workflows/mypy_ruff.yml
@@ -22,7 +22,7 @@ jobs:
       - name: Run dependency libraries
         run: |
           pip install -e .
-          pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 torchtext==0.16.0
+          pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0
           pip install mypy ruff
 
       - name: Run mypy
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 61c107f..476084b 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -28,7 +28,7 @@ jobs:
       - name: Run dependency libraries
         run: |
           pip install -e .
-          pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 torchtext==0.16.0
+          pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0
           pip install pytest
       - name: Run pytest
         run: pytest tests/test_models.py
diff --git a/README.md b/README.md
index 949ff7a..19d568a 100755
--- a/README.md
+++ b/README.md
@@ -24,10 +24,10 @@ Install ffmpeg first. If you are an Ubuntu user, run:
 ```
 apt install ffmpeg
 ```
-Then, install pytorch, torchvision, torchaudio, and torchtext based on your GPU environments.
+Then, install pytorch, torchvision, and torchaudio based on your GPU environments.
 Note that the inference API is available for CPU environments. We tested the codes on Python 3.9 and CUDA 11.8:
 ```
-pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 torchtext==0.16.0 --index-url https://download.pytorch.org/whl/cu118
+pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118
 ```
 Finally, run to install dependency libraries:
 ```
diff --git a/lighthouse/common/vocab/__init__.py b/lighthouse/common/vocab/__init__.py
new file mode 100644
index 0000000..c20fd45
--- /dev/null
+++ b/lighthouse/common/vocab/__init__.py
@@ -0,0 +1,13 @@
+import warnings
+
+from .vectors import CharNGram, FastText, GloVe, pretrained_aliases, Vectors
+from .vocab import Vocab
+
+__all__ = [
+    "Vocab",
+    "GloVe",
+    "FastText",
+    "CharNGram",
+    "pretrained_aliases",
+    "Vectors",
+]
diff --git a/lighthouse/common/vocab/vectors.py b/lighthouse/common/vocab/vectors.py
new file mode 100644
index 0000000..f1eacca
--- /dev/null
+++ b/lighthouse/common/vocab/vectors.py
@@ -0,0 +1,301 @@
+import gzip
+import logging
+import os
+import tarfile
+import zipfile
+from functools import partial
+from urllib.request import urlretrieve
+
+import torch
+from tqdm import tqdm
+
+logger = logging.getLogger(__name__)
+
+
+def _infer_shape(f):
+    num_lines, vector_dim = 0, None
+    for line in f:
+        if vector_dim is None:
+            row = line.rstrip().split(b" ")
+            vector = row[1:]
+            # Assuming word, [vector] format
+            if len(vector) > 2:
+                # The header present in some (w2v) formats contains two elements.
+                vector_dim = len(vector)
+                num_lines += 1  # First element read
+        else:
+            num_lines += 1
+    f.seek(0)
+    return num_lines, vector_dim
+
+
+class Vectors:
+    def __init__(self, name, cache=None, url=None, unk_init=None, max_vectors=None) -> None:
+        """
+        Args:
+
+            name: name of the file that contains the vectors
+            cache: directory for cached vectors
+            url: url for download if vectors not found in cache
+            unk_init (callback): by default, initialize out-of-vocabulary word vectors
+                to zero vectors; can be any function that takes in a Tensor and returns a Tensor of the same size
+            max_vectors (int): this can be used to limit the number of
+                pre-trained vectors loaded.
+                Most pre-trained vector sets are sorted
+                in the descending order of word frequency.
+                Thus, in situations where the entire set doesn't fit in memory,
+                or is not needed for another reason, passing `max_vectors`
+                can limit the size of the loaded set.
+        """
+
+        cache = ".vector_cache" if cache is None else cache
+        self.itos = None
+        self.stoi = None
+        self.vectors = None
+        self.dim = None
+        self.unk_init = torch.Tensor.zero_ if unk_init is None else unk_init
+        self.cache(name, cache, url=url, max_vectors=max_vectors)
+
+    def __getitem__(self, token):
+        if token in self.stoi:
+            return self.vectors[self.stoi[token]]
+        else:
+            return self.unk_init(torch.Tensor(self.dim))
+
+    def __contains__(self, token):
+        return token in self.stoi
+
+    def cache(self, name, cache, url=None, max_vectors=None):
+        import ssl
+
+        ssl._create_default_https_context = ssl._create_unverified_context
+        if os.path.isfile(name):
+            path = name
+            if max_vectors:
+                file_suffix = "_{}.pt".format(max_vectors)
+            else:
+                file_suffix = ".pt"
+            path_pt = os.path.join(cache, os.path.basename(name)) + file_suffix
+        else:
+            path = os.path.join(cache, name)
+            if max_vectors:
+                file_suffix = "_{}.pt".format(max_vectors)
+            else:
+                file_suffix = ".pt"
+            path_pt = path + file_suffix
+
+        if not os.path.isfile(path_pt):
+            if not os.path.isfile(path) and url:
+                logger.info("Downloading vectors from {}".format(url))
+                if not os.path.exists(cache):
+                    os.makedirs(cache)
+                dest = os.path.join(cache, os.path.basename(url))
+                if not os.path.isfile(dest):
+                    with tqdm(unit="B", unit_scale=True, miniters=1, desc=dest) as t:
+                        try:
+                            urlretrieve(url, dest, reporthook=reporthook(t))
+                        except KeyboardInterrupt as e:  # remove the partial zip file
+                            os.remove(dest)
+                            raise e
+                logger.info("Extracting vectors into {}".format(cache))
+                ext = os.path.splitext(dest)[1][1:]
+                if ext == "zip":
+                    with zipfile.ZipFile(dest, "r") as zf:
+                        zf.extractall(cache)
+                elif ext == "gz":
+                    if dest.endswith(".tar.gz"):
+                        with tarfile.open(dest, "r:gz") as tar:
+                            tar.extractall(path=cache)
+            if not os.path.isfile(path):
+                raise RuntimeError("no vectors found at {}".format(path))
+
+            logger.info("Loading vectors from {}".format(path))
+            ext = os.path.splitext(path)[1][1:]
+            if ext == "gz":
+                open_file = gzip.open
+            else:
+                open_file = open
+
+            vectors_loaded = 0
+            with open_file(path, "rb") as f:
+                num_lines, dim = _infer_shape(f)
+                if not max_vectors or max_vectors > num_lines:
+                    max_vectors = num_lines
+
+                itos, vectors, dim = [], torch.zeros((max_vectors, dim)), None
+
+                for line in tqdm(f, total=max_vectors):
+                    # Explicitly splitting on " " is important, so we don't
+                    # get rid of Unicode non-breaking spaces in the vectors.
+                    entries = line.rstrip().split(b" ")
+
+                    word, entries = entries[0], entries[1:]
+                    if dim is None and len(entries) > 1:
+                        dim = len(entries)
+                    elif len(entries) == 1:
+                        logger.warning(
+                            "Skipping token {} with 1-dimensional " "vector {}; likely a header".format(word, entries)
+                        )
+                        continue
+                    elif dim != len(entries):
+                        raise RuntimeError(
+                            "Vector for token {} has {} dimensions, but previously "
+                            "read vectors have {} dimensions. All vectors must have "
+                            "the same number of dimensions.".format(word, len(entries), dim)
+                        )
+
+                    try:
+                        if isinstance(word, bytes):
+                            word = word.decode("utf-8")
+                    except UnicodeDecodeError:
+                        logger.info("Skipping non-UTF8 token {}".format(repr(word)))
+                        continue
+
+                    vectors[vectors_loaded] = torch.tensor([float(x) for x in entries])
+                    vectors_loaded += 1
+                    itos.append(word)
+
+                    if vectors_loaded == max_vectors:
+                        break
+
+            self.itos = itos
+            self.stoi = {word: i for i, word in enumerate(itos)}
+            self.vectors = torch.Tensor(vectors).view(-1, dim)
+            self.dim = dim
+            logger.info("Saving vectors to {}".format(path_pt))
+            if not os.path.exists(cache):
+                os.makedirs(cache)
+            torch.save((self.itos, self.stoi, self.vectors, self.dim), path_pt)
+        else:
+            logger.info("Loading vectors from {}".format(path_pt))
+            self.itos, self.stoi, self.vectors, self.dim = torch.load(path_pt)
+
+    def __len__(self):
+        return len(self.vectors)
+
+    def get_vecs_by_tokens(self, tokens, lower_case_backup=False):
+        """Look up embedding vectors of tokens.
+
+        Args:
+            tokens: a token or a list of tokens. if `tokens` is a string,
+                returns a 1-D tensor of shape `self.dim`; if `tokens` is a
+                list of strings, returns a 2-D tensor of shape=(len(tokens),
+                self.dim).
+            lower_case_backup : Whether to look up the token in the lower case.
+                If False, each token in the original case will be looked up;
+                if True, each token in the original case will be looked up first,
+                if not found in the keys of the property `stoi`, the token in the
+                lower case will be looked up. Default: False.
+
+        Examples:
+            >>> examples = ['chip', 'baby', 'Beautiful']
+            >>> vec = text.vocab.GloVe(name='6B', dim=50)
+            >>> ret = vec.get_vecs_by_tokens(examples, lower_case_backup=True)
+        """
+        to_reduce = False
+
+        if not isinstance(tokens, list):
+            tokens = [tokens]
+            to_reduce = True
+
+        if not lower_case_backup:
+            indices = [self[token] for token in tokens]
+        else:
+            indices = [self[token] if token in self.stoi else self[token.lower()] for token in tokens]
+
+        vecs = torch.stack(indices)
+        return vecs[0] if to_reduce else vecs
+
+
+class GloVe(Vectors):
+    url = {
+        "42B": "http://nlp.stanford.edu/data/glove.42B.300d.zip",
+        "840B": "http://nlp.stanford.edu/data/glove.840B.300d.zip",
+        "twitter.27B": "http://nlp.stanford.edu/data/glove.twitter.27B.zip",
+        "6B": "http://nlp.stanford.edu/data/glove.6B.zip",
+    }
+
+    def __init__(self, name="840B", dim=300, **kwargs) -> None:
+        url = self.url[name]
+        name = "glove.{}.{}d.txt".format(name, str(dim))
+        super(GloVe, self).__init__(name, url=url, **kwargs)
+
+
+class FastText(Vectors):
+
+    url_base = "https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.{}.vec"
+
+    def __init__(self, language="en", **kwargs) -> None:
+        url = self.url_base.format(language)
+        name = os.path.basename(url)
+        super(FastText, self).__init__(name, url=url, **kwargs)
+
+
+class CharNGram(Vectors):
+
+    name = "charNgram.txt"
+    url = "http://www.logos.t.u-tokyo.ac.jp/~hassy/publications/arxiv2016jmt/" "jmt_pre-trained_embeddings.tar.gz"
+
+    def __init__(self, **kwargs) -> None:
+        super(CharNGram, self).__init__(self.name, url=self.url, **kwargs)
+
+    def __getitem__(self, token):
+        vector = torch.Tensor(1, self.dim).zero_()
+        if token == "<unk>":
+            return self.unk_init(vector)
+        chars = ["#BEGIN#"] + list(token) + ["#END#"]
+        num_vectors = 0
+        for n in [2, 3, 4]:
+            end = len(chars) - n + 1
+            grams = [chars[i : (i + n)] for i in range(end)]
+            for gram in grams:
+                gram_key = "{}gram-{}".format(n, "".join(gram))
+                if gram_key in self.stoi:
+                    vector += self.vectors[self.stoi[gram_key]]
+                    num_vectors += 1
+        if num_vectors > 0:
+            vector /= num_vectors
+        else:
+            vector = self.unk_init(vector)
+        return vector
+
+
+def reporthook(t):
+    """
+    https://github.com/tqdm/tqdm.
+    """
+    last_b = [0]
+
+    def inner(b=1, bsize=1, tsize=None):
+        """
+        b: int, optional
+        Number of blocks just transferred [default: 1].
+        bsize: int, optional
+        Size of each block (in tqdm units) [default: 1].
+        tsize: int, optional
+        Total size (in tqdm units). If [default: None] remains unchanged.
+        """
+        if tsize is not None:
+            t.total = tsize
+        t.update((b - last_b[0]) * bsize)
+        last_b[0] = b
+
+    return inner
+
+
+pretrained_aliases = {
+    "charngram.100d": partial(CharNGram),
+    "fasttext.en.300d": partial(FastText, language="en"),
+    "fasttext.simple.300d": partial(FastText, language="simple"),
+    "glove.42B.300d": partial(GloVe, name="42B", dim="300"),
+    "glove.840B.300d": partial(GloVe, name="840B", dim="300"),
+    "glove.twitter.27B.25d": partial(GloVe, name="twitter.27B", dim="25"),
+    "glove.twitter.27B.50d": partial(GloVe, name="twitter.27B", dim="50"),
+    "glove.twitter.27B.100d": partial(GloVe, name="twitter.27B", dim="100"),
+    "glove.twitter.27B.200d": partial(GloVe, name="twitter.27B", dim="200"),
+    "glove.6B.50d": partial(GloVe, name="6B", dim="50"),
+    "glove.6B.100d": partial(GloVe, name="6B", dim="100"),
+    "glove.6B.200d": partial(GloVe, name="6B", dim="200"),
+    "glove.6B.300d": partial(GloVe, name="6B", dim="300"),
+}
+"""Mapping from string name to factory function"""
diff --git a/lighthouse/common/vocab/vocab.py b/lighthouse/common/vocab/vocab.py
new file mode 100644
index 0000000..68e1635
--- /dev/null
+++ b/lighthouse/common/vocab/vocab.py
@@ -0,0 +1,171 @@
+from typing import Dict, List, Optional
+
+import torch
+import torch.nn as nn
+
+
+def _log_class_usage(klass):
+    identifier = "torchtext"
+    if klass and hasattr(klass, "__name__"):
+        identifier += f".{klass.__name__}"
+    torch._C._log_api_usage_once(identifier)
+
+
+class Vocab(nn.Module):
+    __jit_unused_properties__ = ["is_jitable"]
+    r"""Creates a vocab object which maps tokens to indices.
+
+    Args:
+        vocab (torch.classes.torchtext.Vocab or torchtext._torchtext.Vocab): a cpp vocab object.
+    """
+
+    def __init__(self, vocab) -> None:
+        super(Vocab, self).__init__()
+        self.vocab = vocab
+        _log_class_usage(__class__)
+
+    @property
+    def is_jitable(self):
+        return isinstance(self.vocab, torch._C.ScriptObject)
+
+    @torch.jit.export
+    def forward(self, tokens: List[str]) -> List[int]:
+        r"""Calls the `lookup_indices` method
+
+        Args:
+            tokens: a list of tokens used to lookup their corresponding `indices`.
+
+        Returns:
+            The indices associated with a list of `tokens`.
+        """
+        return self.vocab.lookup_indices(tokens)
+
+    @torch.jit.export
+    def __len__(self) -> int:
+        r"""
+        Returns:
+            The length of the vocab.
+        """
+        return len(self.vocab)
+
+    @torch.jit.export
+    def __contains__(self, token: str) -> bool:
+        r"""
+        Args:
+            token: The token for which to check the membership.
+
+        Returns:
+            Whether the token is member of vocab or not.
+        """
+        return self.vocab.__contains__(token)
+
+    @torch.jit.export
+    def __getitem__(self, token: str) -> int:
+        r"""
+        Args:
+            token: The token used to lookup the corresponding index.
+
+        Returns:
+            The index corresponding to the associated token.
+        """
+        return self.vocab[token]
+
+    @torch.jit.export
+    def set_default_index(self, index: Optional[int]) -> None:
+        r"""
+        Args:
+            index: Value of default index. This index will be returned when OOV token is queried.
+        """
+        self.vocab.set_default_index(index)
+
+    @torch.jit.export
+    def get_default_index(self) -> Optional[int]:
+        r"""
+        Returns:
+            Value of default index if it is set.
+        """
+        return self.vocab.get_default_index()
+
+    @torch.jit.export
+    def insert_token(self, token: str, index: int) -> None:
+        r"""
+        Args:
+            token: The token used to lookup the corresponding index.
+            index: The index corresponding to the associated token.
+        Raises:
+            RuntimeError: If `index` is not in range [0, Vocab.size()] or if `token` already exists in the vocab.
+        """
+        self.vocab.insert_token(token, index)
+
+    @torch.jit.export
+    def append_token(self, token: str) -> None:
+        r"""
+        Args:
+            token: The token used to lookup the corresponding index.
+
+        Raises:
+            RuntimeError: If `token` already exists in the vocab
+        """
+        self.vocab.append_token(token)
+
+    @torch.jit.export
+    def lookup_token(self, index: int) -> str:
+        r"""
+        Args:
+            index: The index corresponding to the associated token.
+
+        Returns:
+            token: The token used to lookup the corresponding index.
+
+        Raises:
+            RuntimeError: If `index` not in range [0, itos.size()).
+        """
+        return self.vocab.lookup_token(index)
+
+    @torch.jit.export
+    def lookup_tokens(self, indices: List[int]) -> List[str]:
+        r"""
+        Args:
+            indices: The `indices` used to lookup their corresponding`tokens`.
+
+        Returns:
+            The `tokens` associated with `indices`.
+
+        Raises:
+            RuntimeError: If an index within `indices` is not int range [0, itos.size()).
+        """
+        return self.vocab.lookup_tokens(indices)
+
+    @torch.jit.export
+    def lookup_indices(self, tokens: List[str]) -> List[int]:
+        r"""
+        Args:
+            tokens: the tokens used to lookup their corresponding `indices`.
+
+        Returns:
+            The 'indices` associated with `tokens`.
+        """
+        return self.vocab.lookup_indices(tokens)
+
+    @torch.jit.export
+    def get_stoi(self) -> Dict[str, int]:
+        r"""
+        Returns:
+            Dictionary mapping tokens to indices.
+        """
+        return self.vocab.get_stoi()
+
+    @torch.jit.export
+    def get_itos(self) -> List[str]:
+        r"""
+        Returns:
+            List mapping indices to tokens.
+        """
+        return self.vocab.get_itos()
+
+    def __prepare_scriptable__(self):
+        r"""Return a JITable Vocab."""
+        if not self.is_jitable:
+            cpp_vocab = torch.classes.torchtext.Vocab(self.vocab.itos_, self.vocab.default_index_)
+            return Vocab(cpp_vocab)
+        return self
diff --git a/lighthouse/feature_extractor/text_encoders/glove.py b/lighthouse/feature_extractor/text_encoders/glove.py
index ef65df8..05bdf06 100644
--- a/lighthouse/feature_extractor/text_encoders/glove.py
+++ b/lighthouse/feature_extractor/text_encoders/glove.py
@@ -1,6 +1,6 @@
 import torch
 from typing import Tuple
-from torchtext import vocab
+from lighthouse.common import vocab
 
 class GloVe:
     def __init__(
@@ -35,4 +35,4 @@ def __call__(
         word_inds = torch.LongTensor(
             [self._vocab.stoi.get(w.lower(), 400000) for w in query.split()])
         mask = torch.ones((1, word_inds.shape[0])).to(self._device)
-        return self._embedding(word_inds).unsqueeze(0).to(self._device), mask
\ No newline at end of file
+        return self._embedding(word_inds).unsqueeze(0).to(self._device), mask
diff --git a/mypy.ini b/mypy.ini
index 54eecc0..41e0b02 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -13,9 +13,6 @@ ignore_missing_imports = True
 [mypy-torchlibrosa.*]
 ignore_missing_imports = True
 
-[mypy-torchtext.*]
-ignore_missing_imports = True
-
 [mypy-clip.*]
 ignore_missing_imports = True
 
diff --git a/training/cg_detr_dataset.py b/training/cg_detr_dataset.py
index 64c932e..25725c1 100644
--- a/training/cg_detr_dataset.py
+++ b/training/cg_detr_dataset.py
@@ -41,10 +41,10 @@
 import random
 import logging
 from os.path import join, exists
+from lighthouse.common import vocab
 from lighthouse.common.utils.basic_utils import load_jsonl, l2_normalize_np_array
 from lighthouse.common.utils.tensor_utils import pad_sequences_1d
 from lighthouse.common.utils.span_utils import span_xx_to_cxw
-from torchtext import vocab
 import torch.nn as nn
 
 logger = logging.getLogger(__name__)
diff --git a/training/dataset.py b/training/dataset.py
index d8053d7..0adacdb 100755
--- a/training/dataset.py
+++ b/training/dataset.py
@@ -62,10 +62,10 @@
 import random
 import logging
 from os.path import join, exists
+from lighthouse.common import vocab
 from lighthouse.common.utils.basic_utils import load_jsonl, l2_normalize_np_array
 from lighthouse.common.utils.tensor_utils import pad_sequences_1d
 from lighthouse.common.utils.span_utils import span_xx_to_cxw
-from torchtext import vocab
 import torch.nn as nn
 
 logger = logging.getLogger(__name__)