Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A bunch of enhancements for dealing with large tar archives #37

Merged
merged 6 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 121 additions & 14 deletions src/Tar.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
class Tar extends Archive
{
const READ_CHUNK_SIZE = 1048576; // 1MB

protected $file = '';
protected $comptype = Archive::COMPRESS_AUTO;
Expand All @@ -23,6 +24,9 @@ class Tar extends Archive
protected $memory = '';
protected $closed = true;
protected $writeaccess = false;
protected $position = 0;
protected $contentUntil = 0;
protected $skipUntil = 0;

/**
* Sets the compression to use
Expand Down Expand Up @@ -72,6 +76,7 @@ public function open($file)
throw new ArchiveIOException('Could not open file for reading: '.$this->file);
}
$this->closed = false;
$this->position = 0;
}

/**
Expand Down Expand Up @@ -118,12 +123,37 @@ public function yieldContents()
continue;
}

$this->skipbytes(ceil($header['size'] / 512) * 512);
$this->contentUntil = $this->position + $header['size'];
$this->skipUntil = $this->position + ceil($header['size'] / 512) * 512;

yield $this->header2fileinfo($header);

$skip = $this->skipUntil - $this->position;
if ($skip > 0) {
$this->skipbytes($skip);
}
}

$this->close();
}

/**
* Reads content of a current archive entry.
*
* Works only when iterating trough the archive using the generator returned
* by the yieldContents().
*
* @param int $length maximum number of bytes to read
*
* @return string
*/
public function readCurrentEntry($length = PHP_INT_MAX)
{
$length = (int) min($length, $this->contentUntil - $this->position);
if ($length === 0) {
return '';
}
return $this->readbytes($length);
}

/**
Expand Down Expand Up @@ -290,16 +320,27 @@ public function addFile($file, $fileinfo = '')
throw new ArchiveIOException('Could not open file for reading: ' . $file);
}
while (!feof($fp)) {
$data = fread($fp, 512);
$read += strlen($data);
// for performance reasons read bigger chunks at once
$data = fread($fp, self::READ_CHUNK_SIZE);
if ($data === false) {
break;
}
if ($data === '') {
break;
}
$packed = pack("a512", $data);
$this->writebytes($packed);
$dataLen = strlen($data);
$read += $dataLen;
// how much of data read fully fills 512-byte blocks?
$passLen = ($dataLen >> 9) << 9;
if ($passLen === $dataLen) {
// all - just write the data
$this->writebytes($data);
} else {
// directly write what fills 512-byte blocks fully
$this->writebytes(substr($data, 0, $passLen));
// pad the reminder to 512 bytes
$this->writebytes(pack("a512", substr($data, $passLen)));
}
}
fclose($fp);

Expand Down Expand Up @@ -335,8 +376,11 @@ public function addData($fileinfo, $data)
$fileinfo->setSize($len);
$this->writeFileHeader($fileinfo);

for ($s = 0; $s < $len; $s += 512) {
$this->writebytes(pack("a512", substr($data, $s, 512)));
// write directly everything but the last block which needs padding
$passLen = ($len >> 9) << 9;
$this->writebytes(substr($data, 0, $passLen));
if ($passLen < $len) {
$this->writebytes(pack("a512", substr($data, $passLen, 512)));
}

if (is_callable($this->callback)) {
Expand Down Expand Up @@ -439,12 +483,14 @@ public function save($file)
protected function readbytes($length)
{
if ($this->comptype === Archive::COMPRESS_GZIP) {
return @gzread($this->fh, $length);
$ret = @gzread($this->fh, $length);
} elseif ($this->comptype === Archive::COMPRESS_BZIP) {
return @bzread($this->fh, $length);
$ret = @bzread($this->fh, $length);
} else {
return @fread($this->fh, $length);
$ret = @fread($this->fh, $length);
}
$this->position += strlen($ret);
return $ret;
}

/**
Expand Down Expand Up @@ -494,6 +540,7 @@ protected function skipbytes($bytes)
} else {
@fseek($this->fh, $bytes, SEEK_CUR);
}
$this->position += $bytes;
}

/**
Expand Down Expand Up @@ -553,8 +600,8 @@ protected function writeRawFileHeader($name, $uid, $gid, $perm, $size, $mtime, $
$uid = sprintf("%6s ", decoct($uid));
$gid = sprintf("%6s ", decoct($gid));
$perm = sprintf("%6s ", decoct($perm));
$size = sprintf("%11s ", decoct($size));
$mtime = sprintf("%11s", decoct($mtime));
$size = self::numberEncode($size, 12);
$mtime = self::numberEncode($size, 12);

$data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime);
$data_last = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, "");
Expand Down Expand Up @@ -614,8 +661,8 @@ protected function parseHeader($block)
$return['perm'] = OctDec(trim($header['perm']));
$return['uid'] = OctDec(trim($header['uid']));
$return['gid'] = OctDec(trim($header['gid']));
$return['size'] = OctDec(trim($header['size']));
$return['mtime'] = OctDec(trim($header['mtime']));
$return['size'] = self::numberDecode($header['size']);
$return['mtime'] = self::numberDecode($header['mtime']);
$return['typeflag'] = $header['typeflag'];
$return['link'] = trim($header['link']);
$return['uname'] = trim($header['uname']);
Expand Down Expand Up @@ -713,4 +760,64 @@ public function filetype($file)
return Archive::COMPRESS_NONE;
}

/**
* Decodes numeric values according to the
* https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions
* (basically with support for big numbers)
*
* @param string $field
* $return int
*/
static public function numberDecode($field)
{
$firstByte = ord(substr($field, 0, 1));
if ($firstByte === 255) {
$value = -1 << (8 * strlen($field));
$shift = 0;
for ($i = strlen($field) - 1; $i >= 0; $i--) {
$value += ord(substr($field, $i, 1)) << $shift;
$shift += 8;
}
} elseif ($firstByte === 128) {
$value = 0;
$shift = 0;
for ($i = strlen($field) - 1; $i > 0; $i--) {
$value += ord(substr($field, $i, 1)) << $shift;
$shift += 8;
}
} else {
$value = octdec(trim($field));
}
return $value;
}

/**
* Encodes numeric values according to the
* https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions
* (basically with support for big numbers)
*
* @param int $value
* @param int $length field length
* @return string
*/
static public function numberEncode($value, $length)
{
// old implementations leave last byte empty
// octal encoding encodes three bits per byte
$maxValue = 1 << (($length - 1) * 3);
if ($value < 0) {
// PHP already stores integers as 2's complement
$value = pack(PHP_INT_SIZE === 8 ? 'J' : 'N', (int) $value);
$encoded = str_repeat(chr(255), max(1, $length - PHP_INT_SIZE));
$encoded .= substr($value, max(0, PHP_INT_SIZE - $length + 1));
} elseif ($value >= $maxValue) {
$value = pack(PHP_INT_SIZE === 8 ? 'J' : 'N', (int) $value);
$encoded = chr(128) . str_repeat(chr(0), max(0, $length - PHP_INT_SIZE - 1));
$encoded .= substr($value, max(0, PHP_INT_SIZE - $length + 1));
} else {
$encoded = sprintf("%" . ($length - 1) . "s ", decoct($value));
}
return $encoded;
}
}

51 changes: 51 additions & 0 deletions tests/TarTestCase.php
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,57 @@ public function testSaveWithInvalidDestinationFile()
$this->assertTrue(true); // succeed if no exception, yet
}

public function testNumberEncodeDecode()
{
// 2^34 + 17 = 2^2 * 2^32 + 17
$refValue = (1 << 34) + 17;
$encoded = Tar::numberEncode($refValue, 12);
$this->assertEquals(pack('CCnNN', 128, 0, 0, 1 << 2, 17), $encoded);
$decoded = Tar::numberDecode($encoded);
$this->assertEquals($refValue, $decoded);

$encoded = Tar::numberEncode($refValue, 7);
$this->assertEquals(pack('CnN', 128, 1 << 2, 17), $encoded);
$decoded = Tar::numberDecode($encoded);
$this->assertEquals($refValue, $decoded);

$refValue = -1234;
$encoded = Tar::numberEncode($refValue, 12);
$this->assertEquals(pack('CCnNN', 0xFF, 0xFF, 0xFFFF, 0xFFFFFFFF, -1234), $encoded);
$decoded = Tar::numberDecode($encoded);
$this->assertEquals($refValue, $decoded);

$encoded = Tar::numberEncode($refValue, 3);
$this->assertEquals(pack('Cn', 0xFF, -1234), $encoded);
$decoded = Tar::numberDecode($encoded);
$this->assertEquals($refValue, $decoded);
}

public function testReadCurrentEntry()
{
$tar = new Tar();
$tar->open(__DIR__ . '/tar/test.tar');
$out = sys_get_temp_dir() . '/dwtartest' . md5(time());
$tar->extract($out);

$tar = new Tar();
$tar->open(__DIR__ . '/tar/test.tar');
$pathsRead = array();
foreach($tar->yieldContents() as $i) {
$this->assertFileExists($out . '/' . $i->getPath());
if ($i->getIsdir()) {
$this->assertEquals('', $tar->readCurrentEntry());
} else {
$this->assertStringEqualsFile($out . '/' . $i->getPath(), $tar->readCurrentEntry());
}
$pathsRead[] = $i->getPath();
}
$pathsReadRef = array('tar', 'tar/testdata1.txt', 'tar/foobar', 'tar/foobar/testdata2.txt');
$this->assertEquals($pathsReadRef, $pathsRead);

self::RDelete($out);
}

/**
* recursive rmdir()/unlink()
*
Expand Down
Loading