diff --git a/data/alpaca_52k.yaml b/data/alpaca_52k.yaml
new file mode 100644
index 0000000..018ca32
--- /dev/null
+++ b/data/alpaca_52k.yaml
@@ -0,0 +1,17 @@
+pretty_name: alpaca_data_52k
+license:
+  - cc-by-4.0
+language:
+  - en
+multilinguality:
+  - monolingual
+download_link:
+  - https://huggingface.co/datasets/joecodecreations/alpaca_data_52k/resolve/main/alpaca_data.jsonl
+source:
+  - https://huggingface.co/datasets/joecodecreations/alpaca_data_52k
+task_types:
+  - instruction-tuning
+description:
+  - "dialogue instruction-tuning / instruction following; example: <human>: xxxx\n<bot>: yyyy, Data is from stanford_alpaca project, data is for fine-tuning instruction-following where data generated by the techniques in Self-Instruct: Aligning Language Model with Self Generated Instructions. Yizhong Wang, Yeganeh Kordi, Swaroop Mishra, Alisa Liu, Noah A. Smith, Daniel Khashabi, Hannaneh Hajishirzi. https://arxiv.org/abs/2212.10560"
+processed_by:
+  - Joey Sanchez (https://github.com/joecodecreations)