diff --git a/modules/core/src/main/scala/ceesvee/CsvParser.scala b/modules/core/src/main/scala/ceesvee/CsvParser.scala index 90a6ac9..d2264e1 100644 --- a/modules/core/src/main/scala/ceesvee/CsvParser.scala +++ b/modules/core/src/main/scala/ceesvee/CsvParser.scala @@ -90,24 +90,25 @@ object CsvParser { } /** - * Splits the given strings into CSV lines by splitting on either '\r\n' and + * Splits the given strings into CSV lines by splitting on either '\r\n' or * '\n'. * * '"' is the only valid escape for nested double quotes. */ @throws[Error.LineTooLong]("if a line is longer than `maximumLineLength`") + def splitLines(in: Iterator[String], options: Options): Iterator[String] = new SplitLinesIterator(in, options) @SuppressWarnings(Array( "org.wartremover.warts.MutableDataStructures", "org.wartremover.warts.Throw", "org.wartremover.warts.Var", )) - def splitLines(in: Iterator[String], options: Options): Iterator[String] = new Iterator[String] { + private final class SplitLinesIterator(in: Iterator[String], options: Options) extends Iterator[String] { private val toOutput = mutable.Queue.empty[String] private var state = State.initial - override def hasNext: Boolean = toOutput.nonEmpty || in.hasNext || state.leftover.nonEmpty + override def hasNext = toOutput.nonEmpty || in.hasNext || state.leftover.nonEmpty - @tailrec override def next(): String = { + @tailrec override def next() = { if (toOutput.nonEmpty) { toOutput.dequeue() } else { @@ -204,8 +205,7 @@ object CsvParser { if (insideQuote) { i += 1 } else { - val sliceEnd = if (previousCarriageReturn) i - 1 else i - val _ = builder += concat.substring(sliceStart, sliceEnd) + val _ = builder += concat.substring(sliceStart, i) i += 1 sliceStart = i } diff --git a/modules/core/src/test/scala/ceesvee/CsvParserSpec.scala b/modules/core/src/test/scala/ceesvee/CsvParserSpec.scala index 39c0d76..b0491fa 100644 --- a/modules/core/src/test/scala/ceesvee/CsvParserSpec.scala +++ b/modules/core/src/test/scala/ceesvee/CsvParserSpec.scala @@ -22,6 +22,22 @@ object CsvParserSpec extends ZIOSpecDefault with CsvParserParserSuite { assertTrue(lines == List("abc\rdef", "ghi", "jkl")) && assertTrue(state.leftover == "mno") }, + test("trailing new lines aligned to vector boundary") { + val strings = List( + "012345678901234567890123456789012345678901234567890123456789abc\r", + "012345678901234567890123456789012345678901234567890123456789abc\r", + "\n012345678901234567890123456789012345678901234567890123456789ab\n", + "012345678901234567890123456789012345678901234567890123456789abcd", + "\nmno", + ) + val (state, lines) = CsvParser.splitStrings(strings, CsvParser.State.initial) + assertTrue(lines == List( + "012345678901234567890123456789012345678901234567890123456789abc\r012345678901234567890123456789012345678901234567890123456789abc", + "012345678901234567890123456789012345678901234567890123456789ab", + "012345678901234567890123456789012345678901234567890123456789abcd", + )) && + assertTrue(state.leftover == "mno") + }, test("trailing double quotes") { val strings = List( "a,\"b\"",