Friday, 5 March 2021

Count number of PDF file pages in PHP

Below function returns the number of pages count in PHP
function getPageCount(StreamInterface $stream): int
{
        $result = 0;

        $stream->rewind();

        while (! $stream->eof()) {
            $chunk = $stream->read(4096);

            //  looking for root node PDF 1.7+
            $found = \preg_match('/\/Type\s*?\/Pages(?:(?!\/Parent).)*\/Count\s?(?\d+)/', $chunk, $matches);

            if (0 < $found) {
                return (int) $matches['value'];
            }

            //  looking for root node PDF < 1.7
            $found = \preg_match('/\/Count\s?(?\d+)\s?\/Type\s*?\/Pages/', $chunk, $matches);

            if (0 < $found) {
                return (int) $matches['value'];
            }
            
            //  looking for root node PDF 1.7
            // Both regex1 & regex2 should work, but $regex2 is preferred.
            $regex1 = '/(?<=\/Type\s\/Pages\s(.*)\/Count\s)\d*/gs';
            $regex2 = '/\/Type\s*?\/Pages\s.*\/Count\s?(?\d+)/s';
            $found = \preg_match($regex2, $chunk, $matches);
            
            if (0 < $found) {
                return (int) $matches['value'];
            }

            //  looking for /Type/Page
            $found = \preg_match_all('/\/Type\s*?\/Page\s+/', $chunk, $matches);

            if (0 < $found) {
                $result += $found;
            }
        }

        $stream->rewind();

        return $result;
}