revdotcom · pique0822 · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024
diff --git a/earnings22/README.md b/earnings22/README.md
@@ -59,16 +59,14 @@ Tables found in the paper along with all entity class WER can be found within th
 All of our analysis on this dataset is done through the use of our newly released [fstalign](https://github.com/revdotcom/fstalign/tree/master) tool. We strongly recommend the use of this tool to quickly get started using the *Earnings-22* dataset.
 
 # Cite this Dataset
-This dataset has been submitted to Interspeech 2022.
-The paper describing our methods and results can be found on arXiv at https://arxiv.org/abs/2203.15591.
+The paper describing our methods and results can be found at https://czasopisma.uni.lodz.pl/research/article/view/21579. An earlier version of our work can be found at https://arxiv.org/abs/2203.15591.
 ```
-@misc{https://doi.org/10.48550/arxiv.2203.15591,
-  doi = {10.48550/ARXIV.2203.15591},
-  url = {https://arxiv.org/abs/2203.15591},
-  author = {Del Rio, Miguel and Ha, Peter and McNamara, Quinten and Miller, Corey and Chandra, Shipra},
-  keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
-  title = {Earnings-22: A Practical Benchmark for Accents in the Wild},
-  publisher = {arXiv},
-  year = {2022},
-  copyright = {Creative Commons Attribution Share Alike 4.0 International}
+@article{earnings22,
+  title={Accents in Speech Recognition through the Lens of a {W}orld {E}nglishes Evaluation Set},
+  author={Del Rio, Miguel and Miller, Corey and Profant, Jan and Drexler-Fox, Jennifer and McNamara, Quinn and Bhandari, Nishchal and Delworth, Natalie and Pirkin, Ilya and Jett\'e, Mig\"uel and Chandra, Shipra and Ha, Peter and Westerman, Ryan},
+  journal={Research in Language},
+  volume={21},
+  number={3},
+  pages={225--244},
+  year={2023}
 }
diff --git a/earnings22/subset10/README.md b/earnings22/subset10/README.md
@@ -0,0 +1,26 @@
+[![License: CC BY-SA 4.0](https://img.shields.io/badge/License-CC%20BY--SA%204.0-lightgrey.svg)](LICENSE.md)
+
+# Earnings-22 Subset 10
+
+The Earnings-22 Subset 10 dataset is an 11-hour corpus of ten, English-language earnings calls collected from global companies. These files were randomly selected from the full Earnings-22 corpus.
+
+The transcripts were created by Rev transcriptionists in two different styles, and are separated by subdirectory. The `verbatim` transcripts are created by the transcriptionists writing exactly what they hear, including filler words, stutters, interjections (active listening) and repetitions. The `nonverbatim` transcripts are created by lightly editting for readability. Without changing the structure or meaning of the speech.
+
+For more information, see Rev's [Transcription Style Guide](https://cf-public.rev.com/styleguide/transcription/Transcription+Style+Guide+v5.pdf) on page 6-8.
+
+# Cite this Dataset
+The paper describing the original Earnings-22 data can be found at https://czasopisma.uni.lodz.pl/research/article/view/21579. An earlier version of that work can be found at https://arxiv.org/abs/2203.15591.
+The paper describing the subset's creation can be found on arXiv (to be published soon).
+
+If you'd like to use this subset please cite the following:
+```
+@article{earnings22,
+  title={Accents in Speech Recognition through the Lens of a {W}orld {E}nglishes Evaluation Set},
+  author={Del Rio, Miguel and Miller, Corey and Profant, Jan and Drexler-Fox, Jennifer and McNamara, Quinn and Bhandari, Nishchal and Delworth, Natalie and Pirkin, Ilya and Jett\'e, Mig\"uel and Chandra, Shipra and Ha, Peter and Westerman, Ryan},
+  journal={Research in Language},
+  volume={21},
+  number={3},
+  pages={225--244},
+  year={2023}
+}
+```
diff --git a/earnings22/subset10/media/4453225.mp3 b/earnings22/subset10/media/4453225.mp3
@@ -0,0 +1 @@
+../../media/4453225.mp3
diff --git a/earnings22/subset10/media/4469088.mp3 b/earnings22/subset10/media/4469088.mp3
@@ -0,0 +1 @@
+../../media/4469088.mp3
diff --git a/earnings22/subset10/media/4470684.mp3 b/earnings22/subset10/media/4470684.mp3
@@ -0,0 +1 @@
+../../media/4470684.mp3
diff --git a/earnings22/subset10/media/4474506.mp3 b/earnings22/subset10/media/4474506.mp3
@@ -0,0 +1 @@
+../../media/4474506.mp3
diff --git a/earnings22/subset10/media/4479944.mp3 b/earnings22/subset10/media/4479944.mp3
@@ -0,0 +1 @@
+../../media/4479944.mp3
diff --git a/earnings22/subset10/media/4481952.mp3 b/earnings22/subset10/media/4481952.mp3
@@ -0,0 +1 @@
+../../media/4481952.mp3
diff --git a/earnings22/subset10/media/4482383.mp3 b/earnings22/subset10/media/4482383.mp3
@@ -0,0 +1 @@
+../../media/4482383.mp3
diff --git a/earnings22/subset10/media/4482613.mp3 b/earnings22/subset10/media/4482613.mp3
@@ -0,0 +1 @@
+../../media/4482613.mp3
diff --git a/earnings22/subset10/media/4483937.mp3 b/earnings22/subset10/media/4483937.mp3
@@ -0,0 +1 @@
+../../media/4483937.mp3
diff --git a/earnings22/subset10/media/4485192.mp3 b/earnings22/subset10/media/4485192.mp3
@@ -0,0 +1 @@
+../../media/4485192.mp3