From 8d30ce89432beb53cdf73476b51de043c8029e89 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sat, 2 Aug 2025 15:03:29 +0200 Subject: [PATCH 01/35] Implement snappy compression in userland code --- src/main/php/io/streams/Compression.class.php | 4 +- .../php/io/streams/compress/Snappy.class.php | 260 ++++++++++++++++++ .../unittest/CompressionTest.class.php | 2 +- 3 files changed, 263 insertions(+), 3 deletions(-) create mode 100755 src/main/php/io/streams/compress/Snappy.class.php diff --git a/src/main/php/io/streams/Compression.class.php b/src/main/php/io/streams/Compression.class.php index b1534d8..200e16e 100755 --- a/src/main/php/io/streams/Compression.class.php +++ b/src/main/php/io/streams/Compression.class.php @@ -1,6 +1,6 @@ add(new Gzip(), new Bzip2(), new Brotli()); + self::$algorithms= (new Algorithms())->add(new Gzip(), new Bzip2(), new Brotli(), new Snappy()); } /** diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php new file mode 100755 index 0000000..7a6db2e --- /dev/null +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -0,0 +1,260 @@ +> 8); + } + }; + $copy ?? $copy= function($i, $l) { + if ($l < 12 && $i < 2048) { + return pack('CC', 1 + (($l - 4) << 2) + ((($i & 0xffffffff) >> 8) << 5), $i & 0xff); + } else { + return pack('CCC', 2 + (($l - 1) << 2), $i & 0xff, ($i & 0xffffffff) >> 8); + } + }; + + $out= ''; + + // Output length as varint + $length= strlen($data); + shift: $l= $length & 0x7f; + $length= ($length & 0xffffffff) >> 7; + if ($length > 0) { + $out.= chr($l + 0x80); + goto shift; + } + $out.= chr($l); + + // Compare 4-byte offsets in data at offsets a and b + $equals32= fn($a, $b) => ( + $data[$a] === $data[$b] && + $data[$a + 1] === $data[$b + 1] && + $data[$a + 2] === $data[$b + 2] && + $data[$a + 3] === $data[$b + 3] + ); + + for ($emit= $pos= 0, $end= $length= strlen($data); $pos < $length; $pos= $end) { + $fragment= min($length - $pos, self::BLOCK_SIZE); + $end= $pos + $fragment; + $emit= $pos; + if ($fragment <= self::INPUT_MARGIN) continue; + + $bits= 1; + while ((1 << $bits) <= $fragment && $bits <= self::HASH_BITS) { + $bits++; + } + $bits--; + $shift= 32 - $bits; + $hashtable= array_fill(0, 1 << $bits, 0); + + $start= $pos; + $limit= $end - self::INPUT_MARGIN; + $next= ((unpack('V', $data, ++$pos)[1] * self::HASH_KEY) & 0xffffffff) >> $shift; + + // Emit literals + next: $forward= $pos; + $skip= 32; + do { + $pos= $forward; + $hash= $next; + $forward+= ($skip & 0xffffffff) >> 5; + $skip++; + if ($pos > $limit) continue 2; + + $next= ((unpack('V', $data, $forward)[1] * self::HASH_KEY) & 0xffffffff) >> $shift; + $candidate= $start + $hashtable[$hash]; + $hashtable[$hash]= ($pos - $start) & 0xffff; + } while (!$equals32($pos, $candidate)); + + $out.= $literal($pos - $emit).substr($data, $emit, $pos - $emit); + + // Emit copy instructions + do { + $offset= $pos - $candidate; + $matched= 4; + while ($pos + $matched < $end && $data[$pos + $matched] === $data[$candidate + $matched]) { + $matched++; + } + $pos+= $matched; + + while ($matched >= 68) { + $out.= $copy($offset, 64); + $matched-= 64; + } + if ($matched > 64) { + $out.= $copy($offset, 60); + $matched-= 60; + } + $out.= $copy($offset, $matched); + $emit= $pos; + + if ($pos >= $limit) continue 2; + + $hash= ((unpack('V', $data, $pos - 1)[1] * self::HASH_KEY) & 0xffffffff) >> $shift; + $hashtable[$hash]= ($pos - 1 - $start) & 0xffff; + $hash= ((unpack('V', $data, $pos)[1] * self::HASH_KEY) & 0xffffffff) >> $shift; + $candidate= $start + $hashtable[$hash]; + $hashtable[$hash]= ($pos - $start) & 0xffff; + } while ($equals32($pos, $candidate)); + + $pos++; + $next= ((unpack('V', $data, $pos)[1] * self::HASH_KEY) & 0xffffffff) >> $shift; + goto next; + } + + if ($emit < $end) { + $out.= $literal($end - $emit).substr($data, $emit, $end - $emit); + } + + return $out; + } + + /** Decompresses bytes */ + public function decompress(string $bytes): string { + $out= ''; + + // Read uncompressed length from varint + for ($length= $pos= $shift= 0, $c= 255; $shift < 32, $c >= 128; $pos++, $shift+= 7) { + $c= ord($bytes[$pos]); + $length|= ($c & 0x7f) << $shift; + } + + // Decompress using literal and copy operations + $end= strlen($bytes); + while ($pos < $end) { + $c= ord($bytes[$pos++]); + switch ($c & 0x03) { + case 0: + $l= 1 + ($c >> 2); + if ($l > 60) { + if ($pos + 3 >= $end) throw new IOException('Position out of range'); + + $s= $l - 60; + $l= unpack('V', $bytes, $pos)[1]; + $l= ($l & self::WORD_MASK[$s]) + 1; + $pos+= $s; + } + if ($pos + $l > $end) throw new IOException('Not enough input for literal, expecting '.$l); + + $out.= substr($bytes, $pos, $l); + $pos+= $l; + break; + + case 1: + $l= 4 + (($c >> 2) & 0x7); + $offset= ord($bytes[$pos]) + (($c >> 5) << 8); + for ($i= 0, $end= strlen($out) - $offset; $i < $l; $i++) { + $out.= $out[$end + $i]; + } + $pos++; + break; + + case 2: + if ($pos + 1 >= $end) throw new IOException('Position out of range'); + + $l= 1 + ($c >> 2); + $offset= unpack('v', $bytes, $pos)[1]; + for ($i= 0, $end= strlen($out) - $offset; $i < $l; $i++) { + $out.= $out[$end + $i]; + } + $pos+= 2; + break; + + case 3: + if ($pos + 3 >= $end) throw new IOException('Position out of range'); + + $l= 1 + ($c >> 2); + $offset= unpack('V', $bytes, $pos)[1]; + for ($i= 0, $end= strlen($out) - $offset; $i < $l; $i++) { + $out.= $out[$end + $i]; + } + $pos+= 4; + break; + + default: + throw new IOException('Unexpected operation '.($c & 0x3)); + } + } + + // Verify uncompressed length + if ($length !== ($l= strlen($out))) { + throw new IOException('Expected length '.$length.', have '.$l); + } + + return $out; + } + + /** Opens an input stream for reading */ + public function open(InputStream $in): InputStream { + + // FIXME Solve this without buffering + $bytes= ''; + while ($in->available()) { + $bytes.= $in->read(); + } + return newinstance(InputStream::class, [], [ + 'pos' => 0, + 'bytes' => $this->decompress($bytes), + 'available' => function() { return strlen($this->bytes) - $this->pos; }, + 'read' => function($limit= 4096) { + $chunk= substr($this->bytes, $this->pos, $limit); + $this->pos+= strlen($chunk); + return $chunk; + }, + 'close' => function() { } + ]); + } + + /** Opens an output stream for writing */ + public function create(OutputStream $out, int $level= Compression::DEFAULT): OutputStream { + + // FIXME Solve this without buffering + $self= $this; + return newinstance(OutputStream::class, [], [ + 'bytes' => '', + 'write' => function($bytes) { $this->bytes.= $bytes; }, + 'flush' => function() { }, + 'close' => function() use($self, $out) { + if (null !== $this->bytes) { + $out->write($self->compress($this->bytes)); + $this->bytes= null; + } + } + ]); + } +} \ No newline at end of file diff --git a/src/test/php/io/streams/compress/unittest/CompressionTest.class.php b/src/test/php/io/streams/compress/unittest/CompressionTest.class.php index 9310f03..b09b155 100755 --- a/src/test/php/io/streams/compress/unittest/CompressionTest.class.php +++ b/src/test/php/io/streams/compress/unittest/CompressionTest.class.php @@ -56,7 +56,7 @@ public function enumerating_included_algorithms() { foreach (Compression::algorithms() as $name => $algorithm) { $names[]= $name; } - Assert::equals(['gzip', 'bzip2', 'brotli'], $names); + Assert::equals(['gzip', 'bzip2', 'brotli', 'snappy'], $names); } #[Test] From 44c009fbc1fa6d4b2fe029051b79ceb3ab46fd00 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sat, 2 Aug 2025 15:07:51 +0200 Subject: [PATCH 02/35] Fix PHP < 7.4 by using long closures --- src/main/php/io/streams/compress/Snappy.class.php | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index 7a6db2e..524e59d 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -61,12 +61,14 @@ public function compress(string $data, int $level= Compression::DEFAULT): string $out.= chr($l); // Compare 4-byte offsets in data at offsets a and b - $equals32= fn($a, $b) => ( - $data[$a] === $data[$b] && - $data[$a + 1] === $data[$b + 1] && - $data[$a + 2] === $data[$b + 2] && - $data[$a + 3] === $data[$b + 3] - ); + $equals32= function($a, $b) use(&$data) { + return ( + $data[$a] === $data[$b] && + $data[$a + 1] === $data[$b + 1] && + $data[$a + 2] === $data[$b + 2] && + $data[$a + 3] === $data[$b + 3] + ); + }; for ($emit= $pos= 0, $end= $length= strlen($data); $pos < $length; $pos= $end) { $fragment= min($length - $pos, self::BLOCK_SIZE); From 39f73c5f6f91b6c06918a7d08c7cdd7f6b3a1b8b Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sat, 2 Aug 2025 20:56:49 +0200 Subject: [PATCH 03/35] Implement reading snappy compressed data from a stream --- .../php/io/streams/compress/Snappy.class.php | 30 +--- .../compress/SnappyInputStream.class.php | 140 ++++++++++++++++++ 2 files changed, 147 insertions(+), 23 deletions(-) create mode 100755 src/main/php/io/streams/compress/SnappyInputStream.class.php diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index 524e59d..97c3591 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -157,21 +157,21 @@ public function decompress(string $bytes): string { } // Decompress using literal and copy operations - $end= strlen($bytes); - while ($pos < $end) { + $limit= strlen($bytes); + while ($pos < $limit) { $c= ord($bytes[$pos++]); switch ($c & 0x03) { case 0: $l= 1 + ($c >> 2); if ($l > 60) { - if ($pos + 3 >= $end) throw new IOException('Position out of range'); + if ($pos + 3 >= $limit) throw new IOException('Position out of range'); $s= $l - 60; $l= unpack('V', $bytes, $pos)[1]; $l= ($l & self::WORD_MASK[$s]) + 1; $pos+= $s; } - if ($pos + $l > $end) throw new IOException('Not enough input for literal, expecting '.$l); + if ($pos + $l > $limit) throw new IOException('Not enough input for literal, expecting '.$l); $out.= substr($bytes, $pos, $l); $pos+= $l; @@ -187,7 +187,7 @@ public function decompress(string $bytes): string { break; case 2: - if ($pos + 1 >= $end) throw new IOException('Position out of range'); + if ($pos + 1 >= $limit) throw new IOException('Position out of range'); $l= 1 + ($c >> 2); $offset= unpack('v', $bytes, $pos)[1]; @@ -198,7 +198,7 @@ public function decompress(string $bytes): string { break; case 3: - if ($pos + 3 >= $end) throw new IOException('Position out of range'); + if ($pos + 3 >= $limit) throw new IOException('Position out of range'); $l= 1 + ($c >> 2); $offset= unpack('V', $bytes, $pos)[1]; @@ -223,23 +223,7 @@ public function decompress(string $bytes): string { /** Opens an input stream for reading */ public function open(InputStream $in): InputStream { - - // FIXME Solve this without buffering - $bytes= ''; - while ($in->available()) { - $bytes.= $in->read(); - } - return newinstance(InputStream::class, [], [ - 'pos' => 0, - 'bytes' => $this->decompress($bytes), - 'available' => function() { return strlen($this->bytes) - $this->pos; }, - 'read' => function($limit= 4096) { - $chunk= substr($this->bytes, $this->pos, $limit); - $this->pos+= strlen($chunk); - return $chunk; - }, - 'close' => function() { } - ]); + return new SnappyInputStream($in); } /** Opens an output stream for writing */ diff --git a/src/main/php/io/streams/compress/SnappyInputStream.class.php b/src/main/php/io/streams/compress/SnappyInputStream.class.php new file mode 100755 index 0000000..ebfb86b --- /dev/null +++ b/src/main/php/io/streams/compress/SnappyInputStream.class.php @@ -0,0 +1,140 @@ +buffer) < $n) { + if ($this->in->available()) { + $this->buffer.= $this->in->read(); + } else { + throw new IOException('Not enough input, expected '.$n); + } + } + + $chunk= substr($this->buffer, 0, $n); + $this->buffer= substr($this->buffer, $n); + return $chunk; + } + + /** + * Creates a new decompressing input stream + * + * @param io.streams.InputStream $in The stream to read from + */ + public function __construct(InputStream $in) { + $this->in= $in; + $this->out= ''; + for ($shift= 0, $c= 255; $shift < 32, $c >= 128; $shift+= 7) { + $c= ord($this->bytes(1)); + $this->read|= ($c & 0x7f) << $shift; + } + } + + /** + * Read a string + * + * @param int limit default 8192 + * @return string + */ + public function read($limit= 8192) { + $pos= $start= strlen($this->out); + $limit= min($limit + $start, $this->read); + + while ($pos < $limit) { + $c= ord($this->bytes(1)); + switch ($c & 0x03) { + case 0: + $l= 1 + ($c >> 2); + if ($l > 60) { + $s= $l - 60; + $bytes= $this->bytes(4); + $l= unpack('V', $bytes)[1]; + $l= ($l & Snappy::WORD_MASK[$s]) + 1; + $this->buffer= substr($bytes, $s).$this->buffer; + } + $this->out.= $this->bytes($l); + break; + + case 1: + $l= 4 + (($c >> 2) & 0x7); + $offset= ord($this->bytes(1)) + (($c >> 5) << 8); + for ($i= 0, $end= strlen($this->out) - $offset; $i < $l; $i++) { + $this->out.= $this->out[$end + $i]; + } + break; + + case 2: + $l= 1 + ($c >> 2); + $offset= unpack('v', $this->bytes(2))[1]; + for ($i= 0, $end= strlen($this->out) - $offset; $i < $l; $i++) { + $this->out.= $this->out[$end + $i]; + } + break; + + case 3: + $l= 1 + ($c >> 2); + $offset= unpack('V', $this->bytes(4))[1]; + for ($i= 0, $end= strlen($this->out) - $offset; $i < $l; $i++) { + $this->out.= $this->out[$end + $i]; + } + break; + + default: + throw new IOException('Unexpected operation '.($c & 0x3)); + } + $pos+= $l; + } + + // Once block size is reached, offets never reference anything before. + if (strlen($this->out) > Snappy::BLOCK_SIZE) { + $chunk= substr($this->out, $start); + $this->out= substr($this->out, Snappy::BLOCK_SIZE); + $this->read-= Snappy::BLOCK_SIZE; + return $chunk; + } + + return substr($this->out, $start); + } + + /** + * Returns the number of bytes that can be read from this stream + * without blocking. + * + * @return int + */ + public function available() { + return $this->read - strlen($this->out); + } + + /** + * Close this buffer. + * + * @return void + */ + public function close() { + $this->in->close(); + } + + /** + * Destructor. Ensures output stream is closed. + */ + public function __destruct() { + $this->close(); + } +} \ No newline at end of file From 72f4860104e1e1774e3c8415ae6a2c29ecac91b3 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sat, 2 Aug 2025 21:14:51 +0200 Subject: [PATCH 04/35] Add unittests for SnappyInputStream --- .../compress/SnappyInputStream.class.php | 2 + .../unittest/SnappyInputStreamTest.class.php | 37 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100755 src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php diff --git a/src/main/php/io/streams/compress/SnappyInputStream.class.php b/src/main/php/io/streams/compress/SnappyInputStream.class.php index ebfb86b..e5dcd11 100755 --- a/src/main/php/io/streams/compress/SnappyInputStream.class.php +++ b/src/main/php/io/streams/compress/SnappyInputStream.class.php @@ -5,6 +5,8 @@ /** * Snappy input stream + * + * @test io.streams.compress.unittest.SnappyInputStreamTest */ class SnappyInputStream implements InputStream { private $in, $out; diff --git a/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php b/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php new file mode 100755 index 0000000..72837eb --- /dev/null +++ b/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php @@ -0,0 +1,37 @@ +fixture("\x00"); + } + + #[Test] + public function literal() { + Assert::equals('Hello', Streams::readAll($this->fixture("\005\020Hello"))); + } + + #[Test] + public function copy() { + Assert::equals( + "Hello\n=================", + Streams::readAll($this->fixture("\026\030Hello\n=\076\001\000")) + ); + } + + #[Test, Expect(IOException::class)] + public function not_enough_input() { + Streams::readAll($this->fixture("\x01")); + } +} \ No newline at end of file From 536b6c53e2928bb1e82c17fc59d520abd0ca1663 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sat, 2 Aug 2025 21:17:53 +0200 Subject: [PATCH 05/35] Extend test suite with erroneous snappy compressed data --- .../io/streams/compress/unittest/CompressionTest.class.php | 5 +++++ .../compress/unittest/SnappyInputStreamTest.class.php | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/src/test/php/io/streams/compress/unittest/CompressionTest.class.php b/src/test/php/io/streams/compress/unittest/CompressionTest.class.php index b09b155..3103148 100755 --- a/src/test/php/io/streams/compress/unittest/CompressionTest.class.php +++ b/src/test/php/io/streams/compress/unittest/CompressionTest.class.php @@ -48,6 +48,11 @@ private function erroneous() { if ($bzip2->supported() && PHP_VERSION_ID >= 70400) { yield [$bzip2, "BZh61AY&SY\331"]; } + + $snappy= $algorithms->named('snappy'); + if ($snappy->supported()) { + yield [$snappy, "\002"]; + } } #[Test] diff --git a/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php b/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php index 72837eb..0780963 100755 --- a/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php +++ b/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php @@ -30,6 +30,11 @@ public function copy() { ); } + #[Test, Expect(IOException::class)] + public function from_empty() { + Streams::readAll($this->fixture('')); + } + #[Test, Expect(IOException::class)] public function not_enough_input() { Streams::readAll($this->fixture("\x01")); From 371d9d5b2fee7b73df6603c8177d59af324d3953 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sat, 2 Aug 2025 21:27:25 +0200 Subject: [PATCH 06/35] Remove unreachable code --- src/main/php/io/streams/compress/Snappy.class.php | 3 --- src/main/php/io/streams/compress/SnappyInputStream.class.php | 3 --- 2 files changed, 6 deletions(-) diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index 97c3591..340d370 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -207,9 +207,6 @@ public function decompress(string $bytes): string { } $pos+= 4; break; - - default: - throw new IOException('Unexpected operation '.($c & 0x3)); } } diff --git a/src/main/php/io/streams/compress/SnappyInputStream.class.php b/src/main/php/io/streams/compress/SnappyInputStream.class.php index e5dcd11..9759242 100755 --- a/src/main/php/io/streams/compress/SnappyInputStream.class.php +++ b/src/main/php/io/streams/compress/SnappyInputStream.class.php @@ -96,9 +96,6 @@ public function read($limit= 8192) { $this->out.= $this->out[$end + $i]; } break; - - default: - throw new IOException('Unexpected operation '.($c & 0x3)); } $pos+= $l; } From 90fef34c915bfeb35ac9c70379e6863fecd0cb76 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 09:22:39 +0200 Subject: [PATCH 07/35] Free all previous unreferenced blocks --- .../compress/SnappyInputStream.class.php | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/main/php/io/streams/compress/SnappyInputStream.class.php b/src/main/php/io/streams/compress/SnappyInputStream.class.php index 9759242..3922dbf 100755 --- a/src/main/php/io/streams/compress/SnappyInputStream.class.php +++ b/src/main/php/io/streams/compress/SnappyInputStream.class.php @@ -10,7 +10,7 @@ */ class SnappyInputStream implements InputStream { private $in, $out; - private $read= 0; + private $limit= 0; private $buffer= ''; /** @@ -44,7 +44,7 @@ public function __construct(InputStream $in) { $this->out= ''; for ($shift= 0, $c= 255; $shift < 32, $c >= 128; $shift+= 7) { $c= ord($this->bytes(1)); - $this->read|= ($c & 0x7f) << $shift; + $this->limit|= ($c & 0x7f) << $shift; } } @@ -56,7 +56,7 @@ public function __construct(InputStream $in) { */ public function read($limit= 8192) { $pos= $start= strlen($this->out); - $limit= min($limit + $start, $this->read); + $limit= min($limit + $start, $this->limit); while ($pos < $limit) { $c= ord($this->bytes(1)); @@ -100,15 +100,16 @@ public function read($limit= 8192) { $pos+= $l; } - // Once block size is reached, offets never reference anything before. - if (strlen($this->out) > Snappy::BLOCK_SIZE) { - $chunk= substr($this->out, $start); + $chunk= substr($this->out, $start); + + // Once block size is reached, offets never reference anything before, + // free memory by removing one block from the front of the output. + while (strlen($this->out) > Snappy::BLOCK_SIZE) { $this->out= substr($this->out, Snappy::BLOCK_SIZE); - $this->read-= Snappy::BLOCK_SIZE; - return $chunk; + $this->limit-= Snappy::BLOCK_SIZE; } - return substr($this->out, $start); + return $chunk; } /** @@ -118,7 +119,7 @@ public function read($limit= 8192) { * @return int */ public function available() { - return $this->read - strlen($this->out); + return $this->limit - strlen($this->out); } /** From 85c25abc8751312d3cd640500d08009c1eeb72ee Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 09:56:59 +0200 Subject: [PATCH 08/35] Simplify reading literal lengths --- .../php/io/streams/compress/Snappy.class.php | 17 ++++++++--------- .../compress/SnappyInputStream.class.php | 14 ++++++-------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index 340d370..19bae6d 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -9,7 +9,6 @@ class Snappy extends Algorithm { const HASH_KEY = 0x1e35a7bd; const HASH_BITS = 14; const INPUT_MARGIN = 15; - const WORD_MASK = [0, 0xff, 0xffff, 0xffffff, 0xffffffff]; /** Returns whether this algorithm is supported in the current setup */ public function supported(): bool { return true; } @@ -162,15 +161,15 @@ public function decompress(string $bytes): string { $c= ord($bytes[$pos++]); switch ($c & 0x03) { case 0: - $l= 1 + ($c >> 2); - if ($l > 60) { - if ($pos + 3 >= $limit) throw new IOException('Position out of range'); - - $s= $l - 60; - $l= unpack('V', $bytes, $pos)[1]; - $l= ($l & self::WORD_MASK[$s]) + 1; - $pos+= $s; + $l= $c >> 2; + if (60 === $l) { + if ($pos + 1 >= $limit) throw new IOException('Position out of range'); + $l= unpack('C', $bytes, $pos)[1]; + } else if (61 === $l) { + if ($pos + 2 >= $limit) throw new IOException('Position out of range'); + $l= unpack('v', $bytes, $pos)[1]; } + $l++; if ($pos + $l > $limit) throw new IOException('Not enough input for literal, expecting '.$l); $out.= substr($bytes, $pos, $l); diff --git a/src/main/php/io/streams/compress/SnappyInputStream.class.php b/src/main/php/io/streams/compress/SnappyInputStream.class.php index 3922dbf..8bb7e8a 100755 --- a/src/main/php/io/streams/compress/SnappyInputStream.class.php +++ b/src/main/php/io/streams/compress/SnappyInputStream.class.php @@ -62,15 +62,13 @@ public function read($limit= 8192) { $c= ord($this->bytes(1)); switch ($c & 0x03) { case 0: - $l= 1 + ($c >> 2); - if ($l > 60) { - $s= $l - 60; - $bytes= $this->bytes(4); - $l= unpack('V', $bytes)[1]; - $l= ($l & Snappy::WORD_MASK[$s]) + 1; - $this->buffer= substr($bytes, $s).$this->buffer; + $l= $c >> 2; + if (60 === $l) { + $l= unpack('C', $this->bytes(1))[1]; + } else if (61 === $l) { + $l= unpack('v', $this->bytes(2))[1]; } - $this->out.= $this->bytes($l); + $this->out.= $this->bytes(++$l); break; case 1: From fa5ad1a1998831c93b1556038112dcd6ea6740fc Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 10:17:21 +0200 Subject: [PATCH 09/35] Unify error messages --- src/main/php/io/streams/compress/Snappy.class.php | 14 ++++++++------ .../unittest/SnappyInputStreamTest.class.php | 13 +++++++------ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index 19bae6d..29384f6 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -148,9 +148,10 @@ public function compress(string $data, int $level= Compression::DEFAULT): string /** Decompresses bytes */ public function decompress(string $bytes): string { $out= ''; + $pos= 0; // Read uncompressed length from varint - for ($length= $pos= $shift= 0, $c= 255; $shift < 32, $c >= 128; $pos++, $shift+= 7) { + for ($length= $shift= 0, $c= 255; $shift < 32, $c >= 128; $pos++, $shift+= 7) { $c= ord($bytes[$pos]); $length|= ($c & 0x7f) << $shift; } @@ -163,14 +164,15 @@ public function decompress(string $bytes): string { case 0: $l= $c >> 2; if (60 === $l) { - if ($pos + 1 >= $limit) throw new IOException('Position out of range'); + if ($pos + 1 >= $limit) throw new IOException('Not enough input, expected 1'); $l= unpack('C', $bytes, $pos)[1]; } else if (61 === $l) { - if ($pos + 2 >= $limit) throw new IOException('Position out of range'); + if ($pos + 2 >= $limit) throw new IOException('Not enough input, expected 2'); $l= unpack('v', $bytes, $pos)[1]; } + $l++; - if ($pos + $l > $limit) throw new IOException('Not enough input for literal, expecting '.$l); + if ($pos + $l > $limit) throw new IOException('Not enough input, expected '.$l); $out.= substr($bytes, $pos, $l); $pos+= $l; @@ -186,7 +188,7 @@ public function decompress(string $bytes): string { break; case 2: - if ($pos + 1 >= $limit) throw new IOException('Position out of range'); + if ($pos + 1 >= $limit) throw new IOException('Not enough input, expected 1'); $l= 1 + ($c >> 2); $offset= unpack('v', $bytes, $pos)[1]; @@ -197,7 +199,7 @@ public function decompress(string $bytes): string { break; case 3: - if ($pos + 3 >= $limit) throw new IOException('Position out of range'); + if ($pos + 3 >= $limit) throw new IOException('Not enough input, expected 3'); $l= 1 + ($c >> 2); $offset= unpack('V', $bytes, $pos)[1]; diff --git a/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php b/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php index 0780963..72731ae 100755 --- a/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php +++ b/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php @@ -3,7 +3,7 @@ use io\IOException; use io\streams\compress\SnappyInputStream; use io\streams\{Streams, MemoryInputStream}; -use test\{Assert, Expect, Test}; +use test\{Assert, Expect, Test, Values}; class SnappyInputStreamTest { @@ -17,9 +17,10 @@ public function can_create() { $this->fixture("\x00"); } - #[Test] - public function literal() { - Assert::equals('Hello', Streams::readAll($this->fixture("\005\020Hello"))); + #[Test, Values([[5, "\005\020"], [255, "\377\001\360\376"], [256, "\200\002\364\377\000"]])] + public function literals($length, $encoded) { + $payload= str_repeat('*', $length); + Assert::equals($payload, Streams::readAll($this->fixture($encoded.$payload))); } #[Test] @@ -30,12 +31,12 @@ public function copy() { ); } - #[Test, Expect(IOException::class)] + #[Test, Expect(class: IOException::class, message: 'Not enough input, expected 1')] public function from_empty() { Streams::readAll($this->fixture('')); } - #[Test, Expect(IOException::class)] + #[Test, Expect(class: IOException::class, message: 'Not enough input, expected 1')] public function not_enough_input() { Streams::readAll($this->fixture("\x01")); } From 53f42ffc6435e0f4e69049f9d0d4a759f8cc619a Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 10:25:57 +0200 Subject: [PATCH 10/35] Extract encoding length as helper --- .../php/io/streams/compress/Snappy.class.php | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index 29384f6..9c5046f 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -25,6 +25,20 @@ public function extension(): string { return '.sn'; } /** Maps fastest, default and strongest levels */ public function level(int $select): int { return 0; } + /** Output length as varint */ + public static function length(int $length): string { + $out= ''; + + shift: $l= $length & 0x7f; + $length= ($length & 0xffffffff) >> 7; + if ($length > 0) { + $out.= chr($l + 0x80); + goto shift; + } + + return $out.chr($l); + } + /** Compresses data */ public function compress(string $data, int $level= Compression::DEFAULT): string { static $literal, $copy; @@ -47,18 +61,6 @@ public function compress(string $data, int $level= Compression::DEFAULT): string } }; - $out= ''; - - // Output length as varint - $length= strlen($data); - shift: $l= $length & 0x7f; - $length= ($length & 0xffffffff) >> 7; - if ($length > 0) { - $out.= chr($l + 0x80); - goto shift; - } - $out.= chr($l); - // Compare 4-byte offsets in data at offsets a and b $equals32= function($a, $b) use(&$data) { return ( @@ -69,6 +71,7 @@ public function compress(string $data, int $level= Compression::DEFAULT): string ); }; + $out= self::length(strlen($data)); for ($emit= $pos= 0, $end= $length= strlen($data); $pos < $length; $pos= $end) { $fragment= min($length - $pos, self::BLOCK_SIZE); $end= $pos + $fragment; From e2fac75fccb21ea100f1da3930ec8ed172fc9c28 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 14:17:41 +0200 Subject: [PATCH 11/35] Adjust compress() and create() signatures --- src/main/php/io/streams/compress/Snappy.class.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index 9c5046f..7f7218c 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -40,7 +40,7 @@ public static function length(int $length): string { } /** Compresses data */ - public function compress(string $data, int $level= Compression::DEFAULT): string { + public function compress(string $data, $options= null): string { static $literal, $copy; // Helper functions @@ -228,7 +228,7 @@ public function open(InputStream $in): InputStream { } /** Opens an output stream for writing */ - public function create(OutputStream $out, int $level= Compression::DEFAULT): OutputStream { + public function create(OutputStream $out, $options= null): OutputStream { // FIXME Solve this without buffering $self= $this; From ff4614fb5980d487979f1646ae80b6014152b4cc Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 14:34:32 +0200 Subject: [PATCH 12/35] Extract buffered compression into its dedicated class --- .../compress/BufferedOutputStream.class.php | 61 +++++++++++++++++++ .../php/io/streams/compress/Snappy.class.php | 19 ++---- .../BufferedOutputStreamTest.class.php | 36 +++++++++++ 3 files changed, 102 insertions(+), 14 deletions(-) create mode 100755 src/main/php/io/streams/compress/BufferedOutputStream.class.php create mode 100755 src/test/php/io/streams/compress/unittest/BufferedOutputStreamTest.class.php diff --git a/src/main/php/io/streams/compress/BufferedOutputStream.class.php b/src/main/php/io/streams/compress/BufferedOutputStream.class.php new file mode 100755 index 0000000..977f579 --- /dev/null +++ b/src/main/php/io/streams/compress/BufferedOutputStream.class.php @@ -0,0 +1,61 @@ +compress= [$compress, 'compress']; + } else if (is_callable($compress)) { + $this->compress= $compress; + } else { + throw new IllegalArgumentException('Expected an Algorithm or a callable, have '.typeof($compress)); + } + $this->out= $out; + } + + /** + * Write a string + * + * @param var $arg + * @return void + */ + public function write($arg) { + $this->buffer.= $arg; + } + + /** + * Flush this buffer + * + * @return void + */ + public function flush() { + // NOOP + } + + /** + * Closes this object. May be called more than once, which may + * not fail - that is, if the object is already closed, this + * method should have no effect. + * + * @return void + */ + public function close() { + if (null !== $this->buffer) { + $this->out->write(($this->compress)($this->buffer)); + $this->buffer= null; + } + } +} \ No newline at end of file diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index 7f7218c..99dc352 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -229,19 +229,10 @@ public function open(InputStream $in): InputStream { /** Opens an output stream for writing */ public function create(OutputStream $out, $options= null): OutputStream { - - // FIXME Solve this without buffering - $self= $this; - return newinstance(OutputStream::class, [], [ - 'bytes' => '', - 'write' => function($bytes) { $this->bytes.= $bytes; }, - 'flush' => function() { }, - 'close' => function() use($self, $out) { - if (null !== $this->bytes) { - $out->write($self->compress($this->bytes)); - $this->bytes= null; - } - } - ]); + if (null !== ($length= Options::from($options)->length)) { + return new SnappyOutputStream($out, $length); + } else { + return new BufferedOutputStream($out, [$this, 'compress']); + } } } \ No newline at end of file diff --git a/src/test/php/io/streams/compress/unittest/BufferedOutputStreamTest.class.php b/src/test/php/io/streams/compress/unittest/BufferedOutputStreamTest.class.php new file mode 100755 index 0000000..e82950b --- /dev/null +++ b/src/test/php/io/streams/compress/unittest/BufferedOutputStreamTest.class.php @@ -0,0 +1,36 @@ +write('Test'); + $compress->write('ed'); + $compress->close(); + + Assert::equals('Z:6', $out->bytes()); + } +} \ No newline at end of file From b8bb093a0d92eb4fde6dd70e35c94c37e87afda9 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 15:38:04 +0200 Subject: [PATCH 13/35] Add output stream --- .../compress/SnappyInputStream.class.php | 6 +- .../compress/SnappyOutputStream.class.php | 162 ++++++++++++++++++ .../unittest/SnappyInputStreamTest.class.php | 2 +- .../unittest/SnappyOutputStreamTest.class.php | 47 +++++ 4 files changed, 211 insertions(+), 6 deletions(-) create mode 100755 src/main/php/io/streams/compress/SnappyOutputStream.class.php create mode 100755 src/test/php/io/streams/compress/unittest/SnappyOutputStreamTest.class.php diff --git a/src/main/php/io/streams/compress/SnappyInputStream.class.php b/src/main/php/io/streams/compress/SnappyInputStream.class.php index 8bb7e8a..03f18b0 100755 --- a/src/main/php/io/streams/compress/SnappyInputStream.class.php +++ b/src/main/php/io/streams/compress/SnappyInputStream.class.php @@ -3,11 +3,7 @@ use io\IOException; use io\streams\InputStream; -/** - * Snappy input stream - * - * @test io.streams.compress.unittest.SnappyInputStreamTest - */ +/** @test io.streams.compress.unittest.SnappyInputStreamTest */ class SnappyInputStream implements InputStream { private $in, $out; private $limit= 0; diff --git a/src/main/php/io/streams/compress/SnappyOutputStream.class.php b/src/main/php/io/streams/compress/SnappyOutputStream.class.php new file mode 100755 index 0000000..83b5113 --- /dev/null +++ b/src/main/php/io/streams/compress/SnappyOutputStream.class.php @@ -0,0 +1,162 @@ +out= $out; + $this->out->write(Snappy::length($length)); + } + + /** Encode literal operation */ + private function literal(int $l): string { + if ($l <= 60) { + return chr(($l - 1) << 2); + } else if ($l < 256) { + return pack('CC', 60 << 2, $l - 1); + } else { + return pack('CCC', 61 << 2, ($l - 1) & 0xff, (($l - 1) & 0xffffffff) >> 8); + } + } + + /** Encode copy operation */ + private function copy(int $i, int $l): string { + if ($l < 12 && $i < 2048) { + return pack('CC', 1 + (($l - 4) << 2) + ((($i & 0xffffffff) >> 8) << 5), $i & 0xff); + } else { + return pack('CCC', 2 + (($l - 1) << 2), $i & 0xff, ($i & 0xffffffff) >> 8); + } + } + + /** Compare 4-byte offsets in data at offsets a and b */ + private function equals32(int $a, int $b): bool { + return ( + $this->buffer[$a] === $this->buffer[$b] && + $this->buffer[$a + 1] === $this->buffer[$b + 1] && + $this->buffer[$a + 2] === $this->buffer[$b + 2] && + $this->buffer[$a + 3] === $this->buffer[$b + 3] + ); + } + + /** Compresses a fragment and returns last emitted position */ + private function fragment() { + $end= min(strlen($this->buffer), Snappy::BLOCK_SIZE); + if ($end <= Snappy::INPUT_MARGIN) return 0; + + $pos= $emit= 0; + $bits= 1; + while ((1 << $bits) <= $end && $bits <= Snappy::HASH_BITS) { + $bits++; + } + $bits--; + $shift= 32 - $bits; + $hashtable= array_fill(0, 1 << $bits, 0); + + $start= $pos; + $limit= $end - Snappy::INPUT_MARGIN; + $next= ((unpack('V', $this->buffer, ++$pos)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; + + // Emit literals + next: $forward= $pos; + $skip= 32; + do { + $pos= $forward; + $hash= $next; + $forward+= ($skip & 0xffffffff) >> 5; + $skip++; + if ($pos > $limit) return $emit; + + $next= ((unpack('V', $this->buffer, $forward)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; + $candidate= $start + $hashtable[$hash]; + $hashtable[$hash]= ($pos - $start) & 0xffff; + } while (!$this->equals32($pos, $candidate)); + + $this->out->write($this->literal($pos - $emit).substr($this->buffer, $emit, $pos - $emit)); + + // Emit copy instructions + do { + $offset= $pos - $candidate; + $matched= 4; + while ($pos + $matched < $end && $this->buffer[$pos + $matched] === $this->buffer[$candidate + $matched]) { + $matched++; + } + $pos+= $matched; + + while ($matched >= 68) { + $this->out->write($this->copy($offset, 64)); + $matched-= 64; + } + if ($matched > 64) { + $this->out->write($this->copy($offset, 60)); + $matched-= 60; + } + $this->out->write($this->copy($offset, $matched)); + $emit= $pos; + + if ($pos >= $limit) return $emit; + + $hash= ((unpack('V', $this->buffer, $pos - 1)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; + $hashtable[$hash]= ($pos - 1 - $start) & 0xffff; + $hash= ((unpack('V', $this->buffer, $pos)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; + $candidate= $start + $hashtable[$hash]; + $hashtable[$hash]= ($pos - $start) & 0xffff; + } while ($this->equals32($pos, $candidate)); + + $pos++; + $next= ((unpack('V', $this->buffer, $pos)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; + goto next; + } + + /** + * Write a string + * + * @param var $arg + * @return void + */ + public function write($arg) { + if (strlen($this->buffer) <= Snappy::BLOCK_SIZE) { + $this->buffer.= $arg; + } else { + $this->buffer= substr($this->buffer, $this->fragment()); + } + } + + /** + * Flush this buffer (except if it's smaller than the input margin) + * + * @return void + */ + public function flush() { + $this->buffer= substr($this->buffer, $this->fragment()); + } + + /** + * Closes this object. May be called more than once, which may + * not fail - that is, if the object is already closed, this + * method should have no effect. + * + * @return void + */ + public function close() { + $end= strlen($this->buffer); + if ($end > 0) { + $emit= $this->fragment(); + if ($emit < $end) { + $this->out->write($this->literal($end - $emit).substr($this->buffer, $emit, $end - $emit)); + } + $this->buffer= ''; + } + + $this->out->close(); + } +} \ No newline at end of file diff --git a/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php b/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php index 72731ae..9b4131a 100755 --- a/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php +++ b/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php @@ -27,7 +27,7 @@ public function literals($length, $encoded) { public function copy() { Assert::equals( "Hello\n=================", - Streams::readAll($this->fixture("\026\030Hello\n=\076\001\000")) + Streams::readAll($this->fixture("\027\030Hello\n=\076\001\000")) ); } diff --git a/src/test/php/io/streams/compress/unittest/SnappyOutputStreamTest.class.php b/src/test/php/io/streams/compress/unittest/SnappyOutputStreamTest.class.php new file mode 100755 index 0000000..6e8c4b4 --- /dev/null +++ b/src/test/php/io/streams/compress/unittest/SnappyOutputStreamTest.class.php @@ -0,0 +1,47 @@ +fixture(new MemoryOutputStream(), 0); + } + + #[Test, Values([[0, "\000"], [5, "\005"], [255, "\377\001"], [256, "\200\002"], [65536, "\200\200\004"]])] + public function length_as_varint($length, $expected) { + $out= new MemoryOutputStream(); + $this->fixture($out, $length); + + Assert::equals(new Bytes($expected), new Bytes($out->bytes())); + } + + #[Test] + public function literal() { + $out= new MemoryOutputStream(); + $compress= $this->fixture($out, 5); + $compress->write('Hello'); + $compress->close(); + + Assert::equals(new Bytes("\005\020Hello"), new Bytes($out->bytes())); + } + + #[Test] + public function copy() { + $out= new MemoryOutputStream(); + $compress= $this->fixture($out, 23); + $compress->write("Hello\n================="); + $compress->close(); + + Assert::equals(new Bytes("\027\030Hello\n=\076\001\000"), new Bytes($out->bytes())); + } +} \ No newline at end of file From c4eb43d288d08d864ce4c8639037c0d213f14372 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 15:39:12 +0200 Subject: [PATCH 14/35] Remove defaults --- .../io/streams/compress/unittest/CompressionTest.class.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/php/io/streams/compress/unittest/CompressionTest.class.php b/src/test/php/io/streams/compress/unittest/CompressionTest.class.php index 3103148..5d5e786 100755 --- a/src/test/php/io/streams/compress/unittest/CompressionTest.class.php +++ b/src/test/php/io/streams/compress/unittest/CompressionTest.class.php @@ -106,7 +106,7 @@ public function unknown($name) { #[Test, Values(from: 'algorithms')] public function compress_roundtrip($compressed) { - $bytes= $compressed->compress('Test', Compression::DEFAULT); + $bytes= $compressed->compress('Test'); $result= $compressed->decompress($bytes); Assert::equals('Test', $result); @@ -116,7 +116,7 @@ public function compress_roundtrip($compressed) { public function streams_roundtrip($compressed) { $target= new MemoryOutputStream(); - $out= $compressed->create($target, Compression::DEFAULT); + $out= $compressed->create($target); $out->write('Test'); $out->close(); From d44d87a753ce9710047ce09db740f7a937aac5a2 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 15:45:57 +0200 Subject: [PATCH 15/35] Mention Snappy in README file [skip ci] --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 00abb51..b190239 100755 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Compression streams [![Supports PHP 8.0+](https://raw.githubusercontent.com/xp-framework/web/master/static/php-8_0plus.svg)](http://php.net/) [![Latest Stable Version](https://poser.pugx.org/xp-forge/compression/version.svg)](https://packagist.org/packages/xp-forge/compression) -Compressing output and decompressing input streams including GZip, BZip2 and Brotli. +Compressing output and decompressing input streams including GZip, BZip2 and Brotli and Snappy. Examples -------- @@ -39,8 +39,9 @@ $out->close(); Dependencies ------------ -Compression algorithms are implemented in C and thus require a specific PHP extension: +Compression algorithms might require a specific PHP extension: +* **Snappy** - no dependencies, implemented in userland * **GZip** - requires PHP's ["zlib" extension](https://www.php.net/zlib) * **Bzip2** - requires PHP's ["bzip2" extension](https://www.php.net/bzip2) * **Brotli** - requires https://github.com/kjdev/php-ext-brotli From b667e277fa4f6481f495df4de65b696539769132 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 15:47:46 +0200 Subject: [PATCH 16/35] QA: Use short closures where applicable --- src/main/php/io/streams/compress/Snappy.class.php | 14 ++++++-------- .../unittest/BufferedOutputStreamTest.class.php | 4 ++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index 99dc352..c8ff874 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -62,14 +62,12 @@ public function compress(string $data, $options= null): string { }; // Compare 4-byte offsets in data at offsets a and b - $equals32= function($a, $b) use(&$data) { - return ( - $data[$a] === $data[$b] && - $data[$a + 1] === $data[$b + 1] && - $data[$a + 2] === $data[$b + 2] && - $data[$a + 3] === $data[$b + 3] - ); - }; + $equals32= fn($a, $b) => ( + $data[$a] === $data[$b] && + $data[$a + 1] === $data[$b + 1] && + $data[$a + 2] === $data[$b + 2] && + $data[$a + 3] === $data[$b + 3] + ); $out= self::length(strlen($data)); for ($emit= $pos= 0, $end= $length= strlen($data); $pos < $length; $pos= $end) { diff --git a/src/test/php/io/streams/compress/unittest/BufferedOutputStreamTest.class.php b/src/test/php/io/streams/compress/unittest/BufferedOutputStreamTest.class.php index e82950b..a9d201d 100755 --- a/src/test/php/io/streams/compress/unittest/BufferedOutputStreamTest.class.php +++ b/src/test/php/io/streams/compress/unittest/BufferedOutputStreamTest.class.php @@ -14,7 +14,7 @@ public function can_create_with_algorithm() { #[Test] public function can_create_with_function() { - new BufferedOutputStream(new MemoryOutputStream(), function($data) { return $data; }); + new BufferedOutputStream(new MemoryOutputStream(), fn($data) => $data); } #[Test, Expect(IllegalArgumentException::class)] @@ -26,7 +26,7 @@ public function illegal_compress() { public function writes_on_close() { $out= new MemoryOutputStream(); - $compress= new BufferedOutputStream($out, function($data) { return 'Z:'.strlen($data); }); + $compress= new BufferedOutputStream($out, fn($data) => 'Z:'.strlen($data)); $compress->write('Test'); $compress->write('ed'); $compress->close(); From 91c21a4e2aceb915441514d93422082b35482e2b Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 15:54:55 +0200 Subject: [PATCH 17/35] Add compression test for repeated input --- .../unittest/SnappyOutputStreamTest.class.php | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/test/php/io/streams/compress/unittest/SnappyOutputStreamTest.class.php b/src/test/php/io/streams/compress/unittest/SnappyOutputStreamTest.class.php index 6e8c4b4..09ea037 100755 --- a/src/test/php/io/streams/compress/unittest/SnappyOutputStreamTest.class.php +++ b/src/test/php/io/streams/compress/unittest/SnappyOutputStreamTest.class.php @@ -44,4 +44,17 @@ public function copy() { Assert::equals(new Bytes("\027\030Hello\n=\076\001\000"), new Bytes($out->bytes())); } + + #[Test] + public function repeated_input_compressed() { + $out= new MemoryOutputStream(); + $compress= $this->fixture($out, 20); + $compress->write('Hello'); + $compress->write('Hello'); + $compress->write('Hello'); + $compress->write('Hello'); + $compress->close(); + + Assert::equals(new Bytes("\024\020Hello:\005\000"), new Bytes($out->bytes())); + } } \ No newline at end of file From d6907ff8c299eec9c267ebb7796a2f457e2fb8ec Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 18:37:09 +0200 Subject: [PATCH 18/35] Fix decompressing literals --- src/main/php/io/streams/compress/Snappy.class.php | 2 ++ .../compress/unittest/SnappyInputStreamTest.class.php | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index c8ff874..7af8106 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -167,9 +167,11 @@ public function decompress(string $bytes): string { if (60 === $l) { if ($pos + 1 >= $limit) throw new IOException('Not enough input, expected 1'); $l= unpack('C', $bytes, $pos)[1]; + $pos++; } else if (61 === $l) { if ($pos + 2 >= $limit) throw new IOException('Not enough input, expected 2'); $l= unpack('v', $bytes, $pos)[1]; + $pos+= 2; } $l++; diff --git a/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php b/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php index 9b4131a..47ff065 100755 --- a/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php +++ b/src/test/php/io/streams/compress/unittest/SnappyInputStreamTest.class.php @@ -23,6 +23,16 @@ public function literals($length, $encoded) { Assert::equals($payload, Streams::readAll($this->fixture($encoded.$payload))); } + #[Test] + public function consecutive_literals() { + $ones= str_repeat('1', 255); + $twos= str_repeat('2', 255); + Assert::equals( + $ones.$twos, + Streams::readAll($this->fixture("\376\003\360\376{$ones}\360\376{$twos}")) + ); + } + #[Test] public function copy() { Assert::equals( From 6474a4e3238e290441b9d5e71e17b11e60dabd08 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 19:46:09 +0200 Subject: [PATCH 19/35] Fix fragment encoding in compress() --- .../php/io/streams/compress/Snappy.class.php | 121 +++++++++--------- 1 file changed, 61 insertions(+), 60 deletions(-) diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index 7af8106..cdc151a 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -74,73 +74,74 @@ public function compress(string $data, $options= null): string { $fragment= min($length - $pos, self::BLOCK_SIZE); $end= $pos + $fragment; $emit= $pos; - if ($fragment <= self::INPUT_MARGIN) continue; - $bits= 1; - while ((1 << $bits) <= $fragment && $bits <= self::HASH_BITS) { - $bits++; - } - $bits--; - $shift= 32 - $bits; - $hashtable= array_fill(0, 1 << $bits, 0); - - $start= $pos; - $limit= $end - self::INPUT_MARGIN; - $next= ((unpack('V', $data, ++$pos)[1] * self::HASH_KEY) & 0xffffffff) >> $shift; - - // Emit literals - next: $forward= $pos; - $skip= 32; - do { - $pos= $forward; - $hash= $next; - $forward+= ($skip & 0xffffffff) >> 5; - $skip++; - if ($pos > $limit) continue 2; - - $next= ((unpack('V', $data, $forward)[1] * self::HASH_KEY) & 0xffffffff) >> $shift; - $candidate= $start + $hashtable[$hash]; - $hashtable[$hash]= ($pos - $start) & 0xffff; - } while (!$equals32($pos, $candidate)); - - $out.= $literal($pos - $emit).substr($data, $emit, $pos - $emit); - - // Emit copy instructions - do { - $offset= $pos - $candidate; - $matched= 4; - while ($pos + $matched < $end && $data[$pos + $matched] === $data[$candidate + $matched]) { - $matched++; + if ($fragment >= self::INPUT_MARGIN) { + $bits= 1; + while ((1 << $bits) <= $fragment && $bits <= self::HASH_BITS) { + $bits++; } - $pos+= $matched; + $bits--; + $shift= 32 - $bits; + $hashtable= array_fill(0, 1 << $bits, 0); + + $start= $pos; + $limit= $end - self::INPUT_MARGIN; + $next= ((unpack('V', $data, ++$pos)[1] * self::HASH_KEY) & 0xffffffff) >> $shift; + + // Emit literals + next: $forward= $pos; + $skip= 32; + do { + $pos= $forward; + $hash= $next; + $forward+= ($skip & 0xffffffff) >> 5; + $skip++; + if ($pos > $limit) goto emit; + + $next= ((unpack('V', $data, $forward)[1] * self::HASH_KEY) & 0xffffffff) >> $shift; + $candidate= $start + $hashtable[$hash]; + $hashtable[$hash]= ($pos - $start) & 0xffff; + } while (!$equals32($pos, $candidate)); + + $out.= $literal($pos - $emit).substr($data, $emit, $pos - $emit); + + // Emit copy instructions + do { + $offset= $pos - $candidate; + $matched= 4; + while ($pos + $matched < $end && $data[$pos + $matched] === $data[$candidate + $matched]) { + $matched++; + } + $pos+= $matched; - while ($matched >= 68) { - $out.= $copy($offset, 64); - $matched-= 64; - } - if ($matched > 64) { - $out.= $copy($offset, 60); - $matched-= 60; - } - $out.= $copy($offset, $matched); - $emit= $pos; + while ($matched >= 68) { + $out.= $copy($offset, 64); + $matched-= 64; + } + if ($matched > 64) { + $out.= $copy($offset, 60); + $matched-= 60; + } + $out.= $copy($offset, $matched); + $emit= $pos; - if ($pos >= $limit) continue 2; + if ($pos >= $limit) goto emit; - $hash= ((unpack('V', $data, $pos - 1)[1] * self::HASH_KEY) & 0xffffffff) >> $shift; - $hashtable[$hash]= ($pos - 1 - $start) & 0xffff; - $hash= ((unpack('V', $data, $pos)[1] * self::HASH_KEY) & 0xffffffff) >> $shift; - $candidate= $start + $hashtable[$hash]; - $hashtable[$hash]= ($pos - $start) & 0xffff; - } while ($equals32($pos, $candidate)); + $hash= ((unpack('V', $data, $pos - 1)[1] * self::HASH_KEY) & 0xffffffff) >> $shift; + $hashtable[$hash]= ($pos - 1 - $start) & 0xffff; + $hash= ((unpack('V', $data, $pos)[1] * self::HASH_KEY) & 0xffffffff) >> $shift; + $candidate= $start + $hashtable[$hash]; + $hashtable[$hash]= ($pos - $start) & 0xffff; + } while ($equals32($pos, $candidate)); - $pos++; - $next= ((unpack('V', $data, $pos)[1] * self::HASH_KEY) & 0xffffffff) >> $shift; - goto next; - } + $pos++; + $next= ((unpack('V', $data, $pos)[1] * self::HASH_KEY) & 0xffffffff) >> $shift; + goto next; + } - if ($emit < $end) { - $out.= $literal($end - $emit).substr($data, $emit, $end - $emit); + emit: if ($emit < $end) { + $out.= $literal($end - $emit).substr($data, $emit, $end - $emit); + } } return $out; From 6e66f00f2efc9c314cb293f1953139ac762fb509 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 19:58:45 +0200 Subject: [PATCH 20/35] Fix "Argument 3 ($offset) must be contained in argument 2 ($data)" Discovered when integration-testing with the official test data from https://github.com/google/snappy/tree/main/testdata --- src/main/php/io/streams/compress/Snappy.class.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index cdc151a..e5dc592 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -96,7 +96,7 @@ public function compress(string $data, $options= null): string { $hash= $next; $forward+= ($skip & 0xffffffff) >> 5; $skip++; - if ($pos > $limit) goto emit; + if ($pos > $limit || $forward > $limit) goto emit; $next= ((unpack('V', $data, $forward)[1] * self::HASH_KEY) & 0xffffffff) >> $shift; $candidate= $start + $hashtable[$hash]; From 8085296672c5ebf9646dfda55bf8697c3beb0b22 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 20:16:41 +0200 Subject: [PATCH 21/35] QA: Remove emit initialization --- src/main/php/io/streams/compress/Snappy.class.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index e5dc592..7b8b501 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -70,7 +70,7 @@ public function compress(string $data, $options= null): string { ); $out= self::length(strlen($data)); - for ($emit= $pos= 0, $end= $length= strlen($data); $pos < $length; $pos= $end) { + for ($pos= 0, $end= $length= strlen($data); $pos < $length; $pos= $end) { $fragment= min($length - $pos, self::BLOCK_SIZE); $end= $pos + $fragment; $emit= $pos; From bb159bb4e7033e4a65b3dcdca6e8d9f53742f5d0 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 21:27:50 +0200 Subject: [PATCH 22/35] Fix fragment encoding when streaming --- .../compress/SnappyOutputStream.class.php | 157 ++++++++++-------- 1 file changed, 85 insertions(+), 72 deletions(-) diff --git a/src/main/php/io/streams/compress/SnappyOutputStream.class.php b/src/main/php/io/streams/compress/SnappyOutputStream.class.php index 83b5113..b67073d 100755 --- a/src/main/php/io/streams/compress/SnappyOutputStream.class.php +++ b/src/main/php/io/streams/compress/SnappyOutputStream.class.php @@ -51,70 +51,87 @@ private function equals32(int $a, int $b): bool { /** Compresses a fragment and returns last emitted position */ private function fragment() { $end= min(strlen($this->buffer), Snappy::BLOCK_SIZE); - if ($end <= Snappy::INPUT_MARGIN) return 0; - $pos= $emit= 0; - $bits= 1; - while ((1 << $bits) <= $end && $bits <= Snappy::HASH_BITS) { - $bits++; - } - $bits--; - $shift= 32 - $bits; - $hashtable= array_fill(0, 1 << $bits, 0); - - $start= $pos; - $limit= $end - Snappy::INPUT_MARGIN; - $next= ((unpack('V', $this->buffer, ++$pos)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; - - // Emit literals - next: $forward= $pos; - $skip= 32; - do { - $pos= $forward; - $hash= $next; - $forward+= ($skip & 0xffffffff) >> 5; - $skip++; - if ($pos > $limit) return $emit; - - $next= ((unpack('V', $this->buffer, $forward)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; - $candidate= $start + $hashtable[$hash]; - $hashtable[$hash]= ($pos - $start) & 0xffff; - } while (!$this->equals32($pos, $candidate)); - - $this->out->write($this->literal($pos - $emit).substr($this->buffer, $emit, $pos - $emit)); - - // Emit copy instructions - do { - $offset= $pos - $candidate; - $matched= 4; - while ($pos + $matched < $end && $this->buffer[$pos + $matched] === $this->buffer[$candidate + $matched]) { - $matched++; - } - $pos+= $matched; + $out= ''; - while ($matched >= 68) { - $this->out->write($this->copy($offset, 64)); - $matched-= 64; - } - if ($matched > 64) { - $this->out->write($this->copy($offset, 60)); - $matched-= 60; - } - $this->out->write($this->copy($offset, $matched)); - $emit= $pos; + // Compare 4-byte offsets in data at offsets a and b + $equals32= fn($a, $b) => ( + $this->buffer[$a] === $this->buffer[$b] && + $this->buffer[$a + 1] === $this->buffer[$b + 1] && + $this->buffer[$a + 2] === $this->buffer[$b + 2] && + $this->buffer[$a + 3] === $this->buffer[$b + 3] + ); - if ($pos >= $limit) return $emit; + if ($end >= Snappy::INPUT_MARGIN) { + $bits= 1; + while ((1 << $bits) <= $end && $bits <= Snappy::HASH_BITS) { + $bits++; + } + $bits--; + $shift= 32 - $bits; + $hashtable= array_fill(0, 1 << $bits, 0); + + $start= $pos; + $limit= $end - Snappy::INPUT_MARGIN; + $next= ((unpack('V', $this->buffer, ++$pos)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; + + // Emit literals + next: $forward= $pos; + $skip= 32; + do { + $pos= $forward; + $hash= $next; + $forward+= ($skip & 0xffffffff) >> 5; + $skip++; + if ($pos > $limit || $forward > $limit) goto emit; + + $next= ((unpack('V', $this->buffer, $forward)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; + $candidate= $start + $hashtable[$hash]; + $hashtable[$hash]= ($pos - $start) & 0xffff; + } while (!$equals32($pos, $candidate)); + + $out.= $this->literal($pos - $emit).substr($this->buffer, $emit, $pos - $emit); + + // Emit copy instructions + do { + $offset= $pos - $candidate; + $matched= 4; + while ($pos + $matched < $end && $this->buffer[$pos + $matched] === $this->buffer[$candidate + $matched]) { + $matched++; + } + $pos+= $matched; + + while ($matched >= 68) { + $out.= $this->copy($offset, 64); + $matched-= 64; + } + if ($matched > 64) { + $out.= $this->copy($offset, 60); + $matched-= 60; + } + $out.= $this->copy($offset, $matched); + $emit= $pos; + + if ($pos >= $limit) goto emit; + + $hash= ((unpack('V', $this->buffer, $pos - 1)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; + $hashtable[$hash]= ($pos - 1 - $start) & 0xffff; + $hash= ((unpack('V', $this->buffer, $pos)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; + $candidate= $start + $hashtable[$hash]; + $hashtable[$hash]= ($pos - $start) & 0xffff; + } while ($equals32($pos, $candidate)); + + $pos++; + $next= ((unpack('V', $this->buffer, $pos)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; + goto next; + } - $hash= ((unpack('V', $this->buffer, $pos - 1)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; - $hashtable[$hash]= ($pos - 1 - $start) & 0xffff; - $hash= ((unpack('V', $this->buffer, $pos)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; - $candidate= $start + $hashtable[$hash]; - $hashtable[$hash]= ($pos - $start) & 0xffff; - } while ($this->equals32($pos, $candidate)); + emit: if ($emit < $end) { + $out.= $this->literal($end - $emit).substr($this->buffer, $emit, $end - $emit); + } - $pos++; - $next= ((unpack('V', $this->buffer, $pos)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; - goto next; + $this->buffer= substr($this->buffer, $end); + return $out; } /** @@ -124,20 +141,21 @@ private function fragment() { * @return void */ public function write($arg) { - if (strlen($this->buffer) <= Snappy::BLOCK_SIZE) { - $this->buffer.= $arg; - } else { - $this->buffer= substr($this->buffer, $this->fragment()); + $this->buffer.= $arg; + if (strlen($this->buffer) > Snappy::BLOCK_SIZE) { + $this->out->write($this->fragment()); } } /** - * Flush this buffer (except if it's smaller than the input margin) + * Flush this buffer * * @return void */ public function flush() { - $this->buffer= substr($this->buffer, $this->fragment()); + if (strlen($this->buffer) > 0) { + $this->out->write($this->fragment()); + } } /** @@ -148,15 +166,10 @@ public function flush() { * @return void */ public function close() { - $end= strlen($this->buffer); - if ($end > 0) { - $emit= $this->fragment(); - if ($emit < $end) { - $this->out->write($this->literal($end - $emit).substr($this->buffer, $emit, $end - $emit)); - } + if (strlen($this->buffer) > 0) { + $this->out->write($this->fragment()); $this->buffer= ''; } - $this->out->close(); } } \ No newline at end of file From 084171da687de2f2e1a6f2a7f87d6033d2ee8b99 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 21:31:14 +0200 Subject: [PATCH 23/35] Fix repeated_input_compressed() test --- .../streams/compress/unittest/SnappyOutputStreamTest.class.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/php/io/streams/compress/unittest/SnappyOutputStreamTest.class.php b/src/test/php/io/streams/compress/unittest/SnappyOutputStreamTest.class.php index 09ea037..b28baf3 100755 --- a/src/test/php/io/streams/compress/unittest/SnappyOutputStreamTest.class.php +++ b/src/test/php/io/streams/compress/unittest/SnappyOutputStreamTest.class.php @@ -53,8 +53,9 @@ public function repeated_input_compressed() { $compress->write('Hello'); $compress->write('Hello'); $compress->write('Hello'); + $compress->write('!'); $compress->close(); - Assert::equals(new Bytes("\024\020Hello:\005\000"), new Bytes($out->bytes())); + Assert::equals(new Bytes("\024\020Hello:\005\000\000!"), new Bytes($out->bytes())); } } \ No newline at end of file From ded371211c58aeadd5ff190388320a939a0f6aa6 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 21:48:44 +0200 Subject: [PATCH 24/35] Remove inline helper --- .../io/streams/compress/SnappyOutputStream.class.php | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/main/php/io/streams/compress/SnappyOutputStream.class.php b/src/main/php/io/streams/compress/SnappyOutputStream.class.php index b67073d..c2b3435 100755 --- a/src/main/php/io/streams/compress/SnappyOutputStream.class.php +++ b/src/main/php/io/streams/compress/SnappyOutputStream.class.php @@ -54,14 +54,6 @@ private function fragment() { $pos= $emit= 0; $out= ''; - // Compare 4-byte offsets in data at offsets a and b - $equals32= fn($a, $b) => ( - $this->buffer[$a] === $this->buffer[$b] && - $this->buffer[$a + 1] === $this->buffer[$b + 1] && - $this->buffer[$a + 2] === $this->buffer[$b + 2] && - $this->buffer[$a + 3] === $this->buffer[$b + 3] - ); - if ($end >= Snappy::INPUT_MARGIN) { $bits= 1; while ((1 << $bits) <= $end && $bits <= Snappy::HASH_BITS) { @@ -88,7 +80,7 @@ private function fragment() { $next= ((unpack('V', $this->buffer, $forward)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; $candidate= $start + $hashtable[$hash]; $hashtable[$hash]= ($pos - $start) & 0xffff; - } while (!$equals32($pos, $candidate)); + } while (!$this->equals32($pos, $candidate)); $out.= $this->literal($pos - $emit).substr($this->buffer, $emit, $pos - $emit); @@ -119,7 +111,7 @@ private function fragment() { $hash= ((unpack('V', $this->buffer, $pos)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; $candidate= $start + $hashtable[$hash]; $hashtable[$hash]= ($pos - $start) & 0xffff; - } while ($equals32($pos, $candidate)); + } while ($this->equals32($pos, $candidate)); $pos++; $next= ((unpack('V', $this->buffer, $pos)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; From d2fe0bf4ae747735d8c87a78493441f563ea53e8 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 22:06:30 +0200 Subject: [PATCH 25/35] Remove start which is always 0 --- .../io/streams/compress/SnappyOutputStream.class.php | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/main/php/io/streams/compress/SnappyOutputStream.class.php b/src/main/php/io/streams/compress/SnappyOutputStream.class.php index c2b3435..bfeaef4 100755 --- a/src/main/php/io/streams/compress/SnappyOutputStream.class.php +++ b/src/main/php/io/streams/compress/SnappyOutputStream.class.php @@ -63,7 +63,6 @@ private function fragment() { $shift= 32 - $bits; $hashtable= array_fill(0, 1 << $bits, 0); - $start= $pos; $limit= $end - Snappy::INPUT_MARGIN; $next= ((unpack('V', $this->buffer, ++$pos)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; @@ -78,8 +77,8 @@ private function fragment() { if ($pos > $limit || $forward > $limit) goto emit; $next= ((unpack('V', $this->buffer, $forward)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; - $candidate= $start + $hashtable[$hash]; - $hashtable[$hash]= ($pos - $start) & 0xffff; + $candidate= $hashtable[$hash]; + $hashtable[$hash]= $pos & 0xffff; } while (!$this->equals32($pos, $candidate)); $out.= $this->literal($pos - $emit).substr($this->buffer, $emit, $pos - $emit); @@ -107,10 +106,10 @@ private function fragment() { if ($pos >= $limit) goto emit; $hash= ((unpack('V', $this->buffer, $pos - 1)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; - $hashtable[$hash]= ($pos - 1 - $start) & 0xffff; + $hashtable[$hash]= ($pos - 1) & 0xffff; $hash= ((unpack('V', $this->buffer, $pos)[1] * Snappy::HASH_KEY) & 0xffffffff) >> $shift; - $candidate= $start + $hashtable[$hash]; - $hashtable[$hash]= ($pos - $start) & 0xffff; + $candidate= $hashtable[$hash]; + $hashtable[$hash]= $pos & 0xffff; } while ($this->equals32($pos, $candidate)); $pos++; From 2ae0b9883904defd51b4430e4ce5636e46541dd9 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 22:46:33 +0200 Subject: [PATCH 26/35] QA: Extract encoding methods to Snappy class --- .../php/io/streams/compress/Snappy.class.php | 53 ++++++++++--------- .../compress/SnappyOutputStream.class.php | 30 ++--------- 2 files changed, 32 insertions(+), 51 deletions(-) diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index 7b8b501..e9cbdca 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -25,7 +25,7 @@ public function extension(): string { return '.sn'; } /** Maps fastest, default and strongest levels */ public function level(int $select): int { return 0; } - /** Output length as varint */ + /** Encode length as varint */ public static function length(int $length): string { $out= ''; @@ -39,29 +39,30 @@ public static function length(int $length): string { return $out.chr($l); } + /** Encode literal operation */ + public static function literal(int $l): string { + if ($l <= 60) { + return chr(($l - 1) << 2); + } else if ($l < 256) { + return pack('CC', 60 << 2, $l - 1); + } else { + return pack('CCC', 61 << 2, ($l - 1) & 0xff, (($l - 1) & 0xffffffff) >> 8); + } + } + + /** Encode copy operation */ + public static function copy(int $i, int $l): string { + if ($l < 12 && $i < 2048) { + return pack('CC', 1 + (($l - 4) << 2) + ((($i & 0xffffffff) >> 8) << 5), $i & 0xff); + } else { + return pack('CCC', 2 + (($l - 1) << 2), $i & 0xff, ($i & 0xffffffff) >> 8); + } + } + /** Compresses data */ public function compress(string $data, $options= null): string { - static $literal, $copy; - - // Helper functions - $literal ?? $literal= function($l) { - if ($l <= 60) { - return chr(($l - 1) << 2); - } else if ($l < 256) { - return pack('CC', 60 << 2, $l - 1); - } else { - return pack('CCC', 61 << 2, ($l - 1) & 0xff, (($l - 1) & 0xffffffff) >> 8); - } - }; - $copy ?? $copy= function($i, $l) { - if ($l < 12 && $i < 2048) { - return pack('CC', 1 + (($l - 4) << 2) + ((($i & 0xffffffff) >> 8) << 5), $i & 0xff); - } else { - return pack('CCC', 2 + (($l - 1) << 2), $i & 0xff, ($i & 0xffffffff) >> 8); - } - }; - // Compare 4-byte offsets in data at offsets a and b + // Inlined comparison of 4-byte offsets in data at offsets a and b $equals32= fn($a, $b) => ( $data[$a] === $data[$b] && $data[$a + 1] === $data[$b + 1] && @@ -103,7 +104,7 @@ public function compress(string $data, $options= null): string { $hashtable[$hash]= ($pos - $start) & 0xffff; } while (!$equals32($pos, $candidate)); - $out.= $literal($pos - $emit).substr($data, $emit, $pos - $emit); + $out.= self::literal($pos - $emit).substr($data, $emit, $pos - $emit); // Emit copy instructions do { @@ -115,14 +116,14 @@ public function compress(string $data, $options= null): string { $pos+= $matched; while ($matched >= 68) { - $out.= $copy($offset, 64); + $out.= self::copy($offset, 64); $matched-= 64; } if ($matched > 64) { - $out.= $copy($offset, 60); + $out.= self::copy($offset, 60); $matched-= 60; } - $out.= $copy($offset, $matched); + $out.= self::copy($offset, $matched); $emit= $pos; if ($pos >= $limit) goto emit; @@ -140,7 +141,7 @@ public function compress(string $data, $options= null): string { } emit: if ($emit < $end) { - $out.= $literal($end - $emit).substr($data, $emit, $end - $emit); + $out.= self::literal($end - $emit).substr($data, $emit, $end - $emit); } } diff --git a/src/main/php/io/streams/compress/SnappyOutputStream.class.php b/src/main/php/io/streams/compress/SnappyOutputStream.class.php index bfeaef4..06277c5 100755 --- a/src/main/php/io/streams/compress/SnappyOutputStream.class.php +++ b/src/main/php/io/streams/compress/SnappyOutputStream.class.php @@ -18,26 +18,6 @@ public function __construct(OutputStream $out, $length) { $this->out->write(Snappy::length($length)); } - /** Encode literal operation */ - private function literal(int $l): string { - if ($l <= 60) { - return chr(($l - 1) << 2); - } else if ($l < 256) { - return pack('CC', 60 << 2, $l - 1); - } else { - return pack('CCC', 61 << 2, ($l - 1) & 0xff, (($l - 1) & 0xffffffff) >> 8); - } - } - - /** Encode copy operation */ - private function copy(int $i, int $l): string { - if ($l < 12 && $i < 2048) { - return pack('CC', 1 + (($l - 4) << 2) + ((($i & 0xffffffff) >> 8) << 5), $i & 0xff); - } else { - return pack('CCC', 2 + (($l - 1) << 2), $i & 0xff, ($i & 0xffffffff) >> 8); - } - } - /** Compare 4-byte offsets in data at offsets a and b */ private function equals32(int $a, int $b): bool { return ( @@ -81,7 +61,7 @@ private function fragment() { $hashtable[$hash]= $pos & 0xffff; } while (!$this->equals32($pos, $candidate)); - $out.= $this->literal($pos - $emit).substr($this->buffer, $emit, $pos - $emit); + $out.= Snappy::literal($pos - $emit).substr($this->buffer, $emit, $pos - $emit); // Emit copy instructions do { @@ -93,14 +73,14 @@ private function fragment() { $pos+= $matched; while ($matched >= 68) { - $out.= $this->copy($offset, 64); + $out.= Snappy::copy($offset, 64); $matched-= 64; } if ($matched > 64) { - $out.= $this->copy($offset, 60); + $out.= Snappy::copy($offset, 60); $matched-= 60; } - $out.= $this->copy($offset, $matched); + $out.= Snappy::copy($offset, $matched); $emit= $pos; if ($pos >= $limit) goto emit; @@ -118,7 +98,7 @@ private function fragment() { } emit: if ($emit < $end) { - $out.= $this->literal($end - $emit).substr($this->buffer, $emit, $end - $emit); + $out.= Snappy::literal($end - $emit).substr($this->buffer, $emit, $end - $emit); } $this->buffer= substr($this->buffer, $end); From 03d23c6dd5ee3f6d6a06e0772e51e0dd2cfd62e2 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 22:51:47 +0200 Subject: [PATCH 27/35] Ensure streams are closed --- src/main/php/io/streams/compress/SnappyInputStream.class.php | 4 +--- .../php/io/streams/compress/SnappyOutputStream.class.php | 5 +++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/php/io/streams/compress/SnappyInputStream.class.php b/src/main/php/io/streams/compress/SnappyInputStream.class.php index 03f18b0..873a753 100755 --- a/src/main/php/io/streams/compress/SnappyInputStream.class.php +++ b/src/main/php/io/streams/compress/SnappyInputStream.class.php @@ -125,9 +125,7 @@ public function close() { $this->in->close(); } - /** - * Destructor. Ensures output stream is closed. - */ + /** Ensures input stream is closed */ public function __destruct() { $this->close(); } diff --git a/src/main/php/io/streams/compress/SnappyOutputStream.class.php b/src/main/php/io/streams/compress/SnappyOutputStream.class.php index 06277c5..7c23880 100755 --- a/src/main/php/io/streams/compress/SnappyOutputStream.class.php +++ b/src/main/php/io/streams/compress/SnappyOutputStream.class.php @@ -143,4 +143,9 @@ public function close() { } $this->out->close(); } + + /** Ensures output stream is closed */ + public function __destruct() { + $this->close(); + } } \ No newline at end of file From fff87b08443c193b33580306bddb3d071ce95468 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 22:56:13 +0200 Subject: [PATCH 28/35] Close underlying stream in BufferedOutputStream --- .../compress/BufferedOutputStream.class.php | 6 ++++++ .../BufferedOutputStreamTest.class.php | 18 +++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/main/php/io/streams/compress/BufferedOutputStream.class.php b/src/main/php/io/streams/compress/BufferedOutputStream.class.php index 977f579..22b72ea 100755 --- a/src/main/php/io/streams/compress/BufferedOutputStream.class.php +++ b/src/main/php/io/streams/compress/BufferedOutputStream.class.php @@ -57,5 +57,11 @@ public function close() { $this->out->write(($this->compress)($this->buffer)); $this->buffer= null; } + $this->out->close(); + } + + /** Ensures output stream is closed */ + public function __destruct() { + $this->close(); } } \ No newline at end of file diff --git a/src/test/php/io/streams/compress/unittest/BufferedOutputStreamTest.class.php b/src/test/php/io/streams/compress/unittest/BufferedOutputStreamTest.class.php index a9d201d..3b0fb01 100755 --- a/src/test/php/io/streams/compress/unittest/BufferedOutputStreamTest.class.php +++ b/src/test/php/io/streams/compress/unittest/BufferedOutputStreamTest.class.php @@ -1,7 +1,7 @@ bytes()); } + + #[Test] + public function closes_underlying_stream() { + $out= new class() implements OutputStream { + public $closed= false; + public function write($bytes) { } + public function flush() { } + public function close() { $this->closed= true; } + }; + + $compress= new BufferedOutputStream($out, new None()); + $closed= $out->closed; + $compress->close(); + + Assert::equals([false, true], [$closed, $out->closed]); + } } \ No newline at end of file From f6cd74b3a596057c9c411c125bc2449606d9c7f8 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 23:00:45 +0200 Subject: [PATCH 29/35] Refrain from initializing length twice --- src/main/php/io/streams/compress/Snappy.class.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index e9cbdca..97d8067 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -61,6 +61,8 @@ public static function copy(int $i, int $l): string { /** Compresses data */ public function compress(string $data, $options= null): string { + $length= strlen($data); + $out= self::length($length); // Inlined comparison of 4-byte offsets in data at offsets a and b $equals32= fn($a, $b) => ( @@ -70,8 +72,7 @@ public function compress(string $data, $options= null): string { $data[$a + 3] === $data[$b + 3] ); - $out= self::length(strlen($data)); - for ($pos= 0, $end= $length= strlen($data); $pos < $length; $pos= $end) { + for ($pos= 0; $pos < $length; $pos= $end) { $fragment= min($length - $pos, self::BLOCK_SIZE); $end= $pos + $fragment; $emit= $pos; From e95440f535c5e255730d9374f64a72229fa32450 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Sun, 3 Aug 2025 23:09:27 +0200 Subject: [PATCH 30/35] Add snappy to Compression::algorithms() output [skip ci] --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index b190239..0dba918 100755 --- a/README.md +++ b/README.md @@ -97,6 +97,7 @@ io.streams.compress.Algorithms@{ io.streams.compress.Gzip(token: gzip, extension: .gz, supported: true, levels: 1..9) io.streams.compress.Bzip2(token: bzip2, extension: .bz2, supported: false, levels: 1..9) io.streams.compress.Brotli(token: br, extension: .br, supported: true, levels: 1..11) + io.streams.compress.Snappy(token: snappy, extension: .sn, supported: true, levels: 0..0) } ``` From 8251a876758ef79546dcddabfc263b35f24d1e06 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Tue, 5 Aug 2025 21:09:25 +0200 Subject: [PATCH 31/35] Add buffered input to complement buffered output --- .../compress/BufferedInputStream.class.php | 78 +++++++++++++++++++ .../BufferedInputStreamTest.class.php | 36 +++++++++ 2 files changed, 114 insertions(+) create mode 100755 src/main/php/io/streams/compress/BufferedInputStream.class.php create mode 100755 src/test/php/io/streams/compress/unittest/BufferedInputStreamTest.class.php diff --git a/src/main/php/io/streams/compress/BufferedInputStream.class.php b/src/main/php/io/streams/compress/BufferedInputStream.class.php new file mode 100755 index 0000000..8684cbc --- /dev/null +++ b/src/main/php/io/streams/compress/BufferedInputStream.class.php @@ -0,0 +1,78 @@ +decompress= [$decompress, 'decompress']; + } else if (is_callable($decompress)) { + $this->decompress= $decompress; + } else { + throw new IllegalArgumentException('Expected an Algorithm or a callable, have '.typeof($decompress)); + } + $this->in= $in; + } + + /** @return string */ + private function buffer() { + if (null === $this->buffer) { + $compressed= ''; + while ($this->in->available()) { + $compressed.= $this->in->read(); + } + + $this->buffer= ($this->decompress)($compressed); + } + return $this->buffer; + } + + /** + * Read a string + * + * @param int limit default 8192 + * @return string + */ + public function read($limit= 8192) { + $chunk= substr($this->buffer(), $this->position, $limit); + $this->position+= strlen($chunk); + return $chunk; + } + + /** + * Returns the number of bytes that can be read from this stream + * without blocking. + * + * @return int + */ + public function available() { + return strlen($this->buffer()) - $this->position; + } + + /** + * Close this buffer. + * + * @return void + */ + public function close() { + $this->buffer= null; + $this->in->close(); + } + + /** Ensures input stream is closed */ + public function __destruct() { + $this->close(); + } +} \ No newline at end of file diff --git a/src/test/php/io/streams/compress/unittest/BufferedInputStreamTest.class.php b/src/test/php/io/streams/compress/unittest/BufferedInputStreamTest.class.php new file mode 100755 index 0000000..da9f27d --- /dev/null +++ b/src/test/php/io/streams/compress/unittest/BufferedInputStreamTest.class.php @@ -0,0 +1,36 @@ + $data); + } + + #[Test, Expect(IllegalArgumentException::class)] + public function illegal_compress() { + new BufferedInputStream(new MemoryInputStream(''), null); + } + + #[Test, Values([1, 8192, 8193, 65536])] + public function read_completely($repeat) { + $in= new BufferedInputStream(new MemoryInputStream($repeat), fn($data) => str_repeat('*', (int)$data)); + + $decompressed= ''; + while ($in->available()) { + $decompressed.= $in->read(); + } + + Assert::equals($repeat, strlen($decompressed)); + } +} \ No newline at end of file From d0468c68dce8e800a00cce5add05f299eaf4b4f5 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Thu, 7 Aug 2025 22:21:38 +0200 Subject: [PATCH 32/35] Support literal lengths encoded with 1 to 4 extra bytes (not just 2) --- src/main/php/io/streams/compress/Snappy.class.php | 13 +++++-------- .../io/streams/compress/SnappyInputStream.class.php | 6 ++---- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index 97d8067..c8646a9 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -167,14 +167,11 @@ public function decompress(string $bytes): string { switch ($c & 0x03) { case 0: $l= $c >> 2; - if (60 === $l) { - if ($pos + 1 >= $limit) throw new IOException('Not enough input, expected 1'); - $l= unpack('C', $bytes, $pos)[1]; - $pos++; - } else if (61 === $l) { - if ($pos + 2 >= $limit) throw new IOException('Not enough input, expected 2'); - $l= unpack('v', $bytes, $pos)[1]; - $pos+= 2; + if ($l >= 60) { + $n= $l - 59; + if ($pos + $n >= $limit) throw new IOException('Not enough input, expected '.$n); + $l= unpack('P', str_pad(substr($bytes, $pos, $n), 8, "\0"))[1]; + $pos+= $n; } $l++; diff --git a/src/main/php/io/streams/compress/SnappyInputStream.class.php b/src/main/php/io/streams/compress/SnappyInputStream.class.php index 873a753..bb73fce 100755 --- a/src/main/php/io/streams/compress/SnappyInputStream.class.php +++ b/src/main/php/io/streams/compress/SnappyInputStream.class.php @@ -59,10 +59,8 @@ public function read($limit= 8192) { switch ($c & 0x03) { case 0: $l= $c >> 2; - if (60 === $l) { - $l= unpack('C', $this->bytes(1))[1]; - } else if (61 === $l) { - $l= unpack('v', $this->bytes(2))[1]; + if ($l >= 60) { + $l= unpack('P', str_pad($this->bytes($l - 59), 8, "\0"))[1]; } $this->out.= $this->bytes(++$l); break; From 8e4ec2ac21e65299ec75003be99a85bd55bc91cc Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Thu, 7 Aug 2025 22:34:06 +0200 Subject: [PATCH 33/35] Implement COPY_4 in compressor --- src/main/php/io/streams/compress/Snappy.class.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/php/io/streams/compress/Snappy.class.php b/src/main/php/io/streams/compress/Snappy.class.php index c8646a9..0151118 100755 --- a/src/main/php/io/streams/compress/Snappy.class.php +++ b/src/main/php/io/streams/compress/Snappy.class.php @@ -54,8 +54,10 @@ public static function literal(int $l): string { public static function copy(int $i, int $l): string { if ($l < 12 && $i < 2048) { return pack('CC', 1 + (($l - 4) << 2) + ((($i & 0xffffffff) >> 8) << 5), $i & 0xff); - } else { + } else if ($i < 65536) { return pack('CCC', 2 + (($l - 1) << 2), $i & 0xff, ($i & 0xffffffff) >> 8); + } else { + return pack('CV', 3 + (($l - 1) << 2), $i & 0xffffffff); } } From 1c46b039328130e4d1f4b0d84425343ede0252a1 Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Fri, 15 Aug 2025 21:04:34 +0200 Subject: [PATCH 34/35] Add quote about snappy from Wikipedia [skip ci] --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 03edd7b..55c8898 100755 --- a/README.md +++ b/README.md @@ -139,4 +139,5 @@ $in->close(); See also -------- -* The PHP RFC [Modern Compression](https://wiki.php.net/rfc/modern_compression) suggests adding *zstd* and *brotli* into PHP. \ No newline at end of file +* The PHP RFC [Modern Compression](https://wiki.php.net/rfc/modern_compression) suggests adding *zstd* and *brotli* into PHP. +* Snappy *does not aim for maximum compression, or compatibility with any other compression library; instead, it aims for very high speeds and reasonable compression*, quoting [its Wikipedia page](https://en.wikipedia.org/wiki/Snappy_(compression)) \ No newline at end of file From 9b812e78ea44393584a43d8229b91c49c77a298d Mon Sep 17 00:00:00 2001 From: Timm Friebe Date: Fri, 15 Aug 2025 21:22:57 +0200 Subject: [PATCH 35/35] Be consistent about ordering of algorithms [skip ci] --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 55c8898..5d0e982 100755 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Compression streams [![Supports PHP 8.0+](https://raw.githubusercontent.com/xp-framework/web/master/static/php-8_0plus.svg)](http://php.net/) [![Latest Stable Version](https://poser.pugx.org/xp-forge/compression/version.svg)](https://packagist.org/packages/xp-forge/compression) -Compressing output and decompressing input streams including GZip, BZip2, Brotli, ZStandard and Snappy. +Compressing output and decompressing input streams including GZip, BZip2, Brotli, Snappy and ZStandard. Examples -------- @@ -41,10 +41,10 @@ Dependencies ------------ Compression algorithms might require a specific PHP extension: -* **Snappy** - no dependencies, implemented in userland * **GZip** - requires PHP's ["zlib" extension](https://www.php.net/zlib) * **Bzip2** - requires PHP's ["bzip2" extension](https://www.php.net/bzip2) * **Brotli** - requires https://github.com/kjdev/php-ext-brotli +* **Snappy** - *no dependencies, implemented in userland* * **ZStandard** - requires https://github.com/kjdev/php-ext-zstd Accessing these algorithms can be done via the `Compression` API: