Skip to content

Commit 257a9ab

Browse files
authored
Access data using an iterator to stream records (#11)
Thanks @smalot for the contribution!
1 parent d3cdb97 commit 257a9ab

File tree

2 files changed

+74
-23
lines changed

2 files changed

+74
-23
lines changed

lib/avro/data_file.php

+58-23
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,6 @@ private function read_header()
293293
* @return array of data from object container.
294294
* @throws AvroDataIOException
295295
* @throws AvroIOException
296-
* @internal Would be nice to implement data() as an iterator, I think
297296
*/
298297
public function data()
299298
{
@@ -309,34 +308,70 @@ public function data()
309308
if ($this->is_eof())
310309
break;
311310

312-
$length = $this->read_block_header();
313-
$decoder = $this->decoder;
314-
if ($this->codec == AvroDataIO::DEFLATE_CODEC) {
315-
if (!function_exists('gzinflate')) {
316-
throw new AvroDataIOException('"gzinflate" function not available, "zlib" extension required.');
317-
}
318-
$compressed = $decoder->read($length);
319-
$datum = gzinflate($compressed);
320-
$decoder = new AvroIOBinaryDecoder(new AvroStringIO($datum));
321-
} elseif ($this->codec == AvroDataIO::SNAPPY_CODEC) {
322-
if (!function_exists('snappy_uncompress')) {
323-
throw new AvroDataIOException('"snappy_uncompress" function not available, "snappy" extension required.');
324-
}
325-
$compressed = $decoder->read($length-4);
326-
$datum = snappy_uncompress($compressed);
327-
$crc32 = unpack('N', $decoder->read(4));
328-
if ($crc32[1] != crc32($datum)) {
329-
throw new AvroDataIOException('Invalid CRC32 checksum.');
330-
}
331-
$decoder = new AvroIOBinaryDecoder(new AvroStringIO($datum));
332-
}
311+
$decoder = $this->apply_codec($this->decoder, $this->codec);
333312
}
334-
$data []= $this->datum_reader->read($decoder);
313+
$data[] = $this->datum_reader->read($decoder);
335314
$this->block_count -= 1;
336315
}
337316
return $data;
338317
}
339318

319+
/**
320+
* @throws AvroDataIOException
321+
* @throws AvroIOException
322+
*/
323+
public function data_iterator()
324+
{
325+
while (true)
326+
{
327+
if (0 == $this->block_count)
328+
{
329+
if ($this->is_eof())
330+
break;
331+
332+
if ($this->skip_sync())
333+
if ($this->is_eof())
334+
break;
335+
336+
$decoder = $this->apply_codec($this->decoder, $this->codec);
337+
}
338+
yield $this->datum_reader->read($decoder);
339+
$this->block_count -= 1;
340+
}
341+
}
342+
343+
/**
344+
* @param AvroIOBinaryDecoder $decoder
345+
* @param string $codec
346+
* @return AvroIOBinaryDecoder
347+
* @throws AvroDataIOException
348+
* @throws AvroIOException
349+
*/
350+
protected function apply_codec($decoder, $codec)
351+
{
352+
$length = $this->read_block_header();
353+
if ($codec == AvroDataIO::DEFLATE_CODEC) {
354+
if (!function_exists('gzinflate')) {
355+
throw new AvroDataIOException('"gzinflate" function not available, "zlib" extension required.');
356+
}
357+
$compressed = $decoder->read($length);
358+
$datum = gzinflate($compressed);
359+
$decoder = new AvroIOBinaryDecoder(new AvroStringIO($datum));
360+
} elseif ($codec == AvroDataIO::SNAPPY_CODEC) {
361+
if (!function_exists('snappy_uncompress')) {
362+
throw new AvroDataIOException('"snappy_uncompress" function not available, "snappy" extension required.');
363+
}
364+
$compressed = $decoder->read($length-4);
365+
$datum = snappy_uncompress($compressed);
366+
$crc32 = unpack('N', $decoder->read(4));
367+
if ($crc32[1] != crc32($datum)) {
368+
throw new AvroDataIOException('Invalid CRC32 checksum.');
369+
}
370+
$decoder = new AvroIOBinaryDecoder(new AvroStringIO($datum));
371+
}
372+
return $decoder;
373+
}
374+
340375
/**
341376
* Closes this writer (and its AvroIO object.)
342377
* @uses AvroIO::close()

test/FileIOTest.php

+16
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,17 @@ private function read()
4848
return $reader->data();
4949
}
5050

51+
private function readIterator()
52+
{
53+
$fileName = $this->getFileName();
54+
$reader = AvroDataIO::open_file($fileName);
55+
$data = [];
56+
foreach ($reader->data_iterator() as $row) {
57+
$data[] = $row;
58+
}
59+
return $data;
60+
}
61+
5162
public function testReading()
5263
{
5364
$expected = [
@@ -62,7 +73,12 @@ public function testReading()
6273
'favorite_numbers' => [],
6374
]
6475
];
76+
77+
// Classic loading.
6578
$this->assertEquals($expected, $this->read());
79+
80+
// Iterator loading.
81+
$this->assertEquals($expected, $this->readIterator());
6682
}
6783

6884
/**

0 commit comments

Comments
 (0)