diff --git a/data/alpaca_52k.yaml b/data/alpaca_52k.yaml new file mode 100644 index 0000000..018ca32 --- /dev/null +++ b/data/alpaca_52k.yaml @@ -0,0 +1,17 @@ +pretty_name: alpaca_data_52k +license: + - cc-by-4.0 +language: + - en +multilinguality: + - monolingual +download_link: + - https://huggingface.co/datasets/joecodecreations/alpaca_data_52k/resolve/main/alpaca_data.jsonl +source: + - https://huggingface.co/datasets/joecodecreations/alpaca_data_52k +task_types: + - instruction-tuning +description: + - "dialogue instruction-tuning / instruction following; example: : xxxx\n: yyyy, Data is from stanford_alpaca project, data is for fine-tuning instruction-following where data generated by the techniques in Self-Instruct: Aligning Language Model with Self Generated Instructions. Yizhong Wang, Yeganeh Kordi, Swaroop Mishra, Alisa Liu, Noah A. Smith, Daniel Khashabi, Hannaneh Hajishirzi. https://arxiv.org/abs/2212.10560" +processed_by: + - Joey Sanchez (https://github.com/joecodecreations)