From 75160b12821f7f4299cce7f0b69c83c1502ae071 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anton=20Luka=20=C5=A0ijanec?= Date: Mon, 27 May 2024 13:08:29 +0200 Subject: 2024-02-19 upstream --- .../src/PhpSpreadsheet/Reader/Ods.php | 795 +++++++++++++++++++++ 1 file changed, 795 insertions(+) create mode 100644 vendor/phpoffice/phpspreadsheet/src/PhpSpreadsheet/Reader/Ods.php (limited to 'vendor/phpoffice/phpspreadsheet/src/PhpSpreadsheet/Reader/Ods.php') diff --git a/vendor/phpoffice/phpspreadsheet/src/PhpSpreadsheet/Reader/Ods.php b/vendor/phpoffice/phpspreadsheet/src/PhpSpreadsheet/Reader/Ods.php new file mode 100644 index 0000000..ea46b75 --- /dev/null +++ b/vendor/phpoffice/phpspreadsheet/src/PhpSpreadsheet/Reader/Ods.php @@ -0,0 +1,795 @@ +securityScanner = XmlScanner::getInstance($this); + } + + /** + * Can the current IReader read the file? + * + * @param string $pFilename + * + * @return bool + */ + public function canRead($pFilename) + { + File::assertFile($pFilename); + + $mimeType = 'UNKNOWN'; + + // Load file + + $zip = new ZipArchive(); + if ($zip->open($pFilename) === true) { + // check if it is an OOXML archive + $stat = $zip->statName('mimetype'); + if ($stat && ($stat['size'] <= 255)) { + $mimeType = $zip->getFromName($stat['name']); + } elseif ($zip->statName('META-INF/manifest.xml')) { + $xml = simplexml_load_string( + $this->securityScanner->scan($zip->getFromName('META-INF/manifest.xml')), + 'SimpleXMLElement', + Settings::getLibXmlLoaderOptions() + ); + $namespacesContent = $xml->getNamespaces(true); + if (isset($namespacesContent['manifest'])) { + $manifest = $xml->children($namespacesContent['manifest']); + foreach ($manifest as $manifestDataSet) { + $manifestAttributes = $manifestDataSet->attributes($namespacesContent['manifest']); + if ($manifestAttributes->{'full-path'} == '/') { + $mimeType = (string) $manifestAttributes->{'media-type'}; + + break; + } + } + } + } + + $zip->close(); + } + + return $mimeType === 'application/vnd.oasis.opendocument.spreadsheet'; + } + + /** + * Reads names of the worksheets from a file, without parsing the whole file to a PhpSpreadsheet object. + * + * @param string $pFilename + * + * @return string[] + */ + public function listWorksheetNames($pFilename) + { + File::assertFile($pFilename); + + $zip = new ZipArchive(); + if ($zip->open($pFilename) !== true) { + throw new ReaderException('Could not open ' . $pFilename . ' for reading! Error opening file.'); + } + + $worksheetNames = []; + + $xml = new XMLReader(); + $xml->xml( + $this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'), + null, + Settings::getLibXmlLoaderOptions() + ); + $xml->setParserProperty(2, true); + + // Step into the first level of content of the XML + $xml->read(); + while ($xml->read()) { + // Quickly jump through to the office:body node + while ($xml->name !== 'office:body') { + if ($xml->isEmptyElement) { + $xml->read(); + } else { + $xml->next(); + } + } + // Now read each node until we find our first table:table node + while ($xml->read()) { + if ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) { + // Loop through each table:table node reading the table:name attribute for each worksheet name + do { + $worksheetNames[] = $xml->getAttribute('table:name'); + $xml->next(); + } while ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT); + } + } + } + + return $worksheetNames; + } + + /** + * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). + * + * @param string $pFilename + * + * @return array + */ + public function listWorksheetInfo($pFilename) + { + File::assertFile($pFilename); + + $worksheetInfo = []; + + $zip = new ZipArchive(); + if ($zip->open($pFilename) !== true) { + throw new ReaderException('Could not open ' . $pFilename . ' for reading! Error opening file.'); + } + + $xml = new XMLReader(); + $xml->xml( + $this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'), + null, + Settings::getLibXmlLoaderOptions() + ); + $xml->setParserProperty(2, true); + + // Step into the first level of content of the XML + $xml->read(); + while ($xml->read()) { + // Quickly jump through to the office:body node + while ($xml->name !== 'office:body') { + if ($xml->isEmptyElement) { + $xml->read(); + } else { + $xml->next(); + } + } + // Now read each node until we find our first table:table node + while ($xml->read()) { + if ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) { + $worksheetNames[] = $xml->getAttribute('table:name'); + + $tmpInfo = [ + 'worksheetName' => $xml->getAttribute('table:name'), + 'lastColumnLetter' => 'A', + 'lastColumnIndex' => 0, + 'totalRows' => 0, + 'totalColumns' => 0, + ]; + + // Loop through each child node of the table:table element reading + $currCells = 0; + do { + $xml->read(); + if ($xml->name == 'table:table-row' && $xml->nodeType == XMLReader::ELEMENT) { + $rowspan = $xml->getAttribute('table:number-rows-repeated'); + $rowspan = empty($rowspan) ? 1 : $rowspan; + $tmpInfo['totalRows'] += $rowspan; + $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells); + $currCells = 0; + // Step into the row + $xml->read(); + do { + $doread = true; + if ($xml->name == 'table:table-cell' && $xml->nodeType == XMLReader::ELEMENT) { + if (!$xml->isEmptyElement) { + ++$currCells; + $xml->next(); + $doread = false; + } + } elseif ($xml->name == 'table:covered-table-cell' && $xml->nodeType == XMLReader::ELEMENT) { + $mergeSize = $xml->getAttribute('table:number-columns-repeated'); + $currCells += (int) $mergeSize; + } + if ($doread) { + $xml->read(); + } + } while ($xml->name != 'table:table-row'); + } + } while ($xml->name != 'table:table'); + + $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells); + $tmpInfo['lastColumnIndex'] = $tmpInfo['totalColumns'] - 1; + $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1); + $worksheetInfo[] = $tmpInfo; + } + } + } + + return $worksheetInfo; + } + + /** + * Loads PhpSpreadsheet from file. + * + * @param string $pFilename + * + * @return Spreadsheet + */ + public function load($pFilename) + { + // Create new Spreadsheet + $spreadsheet = new Spreadsheet(); + + // Load into this instance + return $this->loadIntoExisting($pFilename, $spreadsheet); + } + + /** + * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. + * + * @param string $pFilename + * + * @return Spreadsheet + */ + public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) + { + File::assertFile($pFilename); + + $timezoneObj = new DateTimeZone('Europe/London'); + $GMT = new DateTimeZone('UTC'); + + $zip = new ZipArchive(); + if ($zip->open($pFilename) !== true) { + throw new Exception("Could not open {$pFilename} for reading! Error opening file."); + } + + // Meta + + $xml = @simplexml_load_string( + $this->securityScanner->scan($zip->getFromName('meta.xml')), + 'SimpleXMLElement', + Settings::getLibXmlLoaderOptions() + ); + if ($xml === false) { + throw new Exception('Unable to read data from {$pFilename}'); + } + + $namespacesMeta = $xml->getNamespaces(true); + + (new DocumentProperties($spreadsheet))->load($xml, $namespacesMeta); + + // Styles + + $dom = new DOMDocument('1.01', 'UTF-8'); + $dom->loadXML( + $this->securityScanner->scan($zip->getFromName('styles.xml')), + Settings::getLibXmlLoaderOptions() + ); + + $pageSettings = new PageSettings($dom); + + // Main Content + + $dom = new DOMDocument('1.01', 'UTF-8'); + $dom->loadXML( + $this->securityScanner->scan($zip->getFromName('content.xml')), + Settings::getLibXmlLoaderOptions() + ); + + $officeNs = $dom->lookupNamespaceUri('office'); + $tableNs = $dom->lookupNamespaceUri('table'); + $textNs = $dom->lookupNamespaceUri('text'); + $xlinkNs = $dom->lookupNamespaceUri('xlink'); + + $pageSettings->readStyleCrossReferences($dom); + + // Content + + $spreadsheets = $dom->getElementsByTagNameNS($officeNs, 'body') + ->item(0) + ->getElementsByTagNameNS($officeNs, 'spreadsheet'); + + foreach ($spreadsheets as $workbookData) { + /** @var DOMElement $workbookData */ + $tables = $workbookData->getElementsByTagNameNS($tableNs, 'table'); + + $worksheetID = 0; + foreach ($tables as $worksheetDataSet) { + /** @var DOMElement $worksheetDataSet */ + $worksheetName = $worksheetDataSet->getAttributeNS($tableNs, 'name'); + + // Check loadSheetsOnly + if ( + isset($this->loadSheetsOnly) + && $worksheetName + && !in_array($worksheetName, $this->loadSheetsOnly) + ) { + continue; + } + + $worksheetStyleName = $worksheetDataSet->getAttributeNS($tableNs, 'style-name'); + + // Create sheet + if ($worksheetID > 0) { + $spreadsheet->createSheet(); // First sheet is added by default + } + $spreadsheet->setActiveSheetIndex($worksheetID); + + if ($worksheetName) { + // Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in + // formula cells... during the load, all formulae should be correct, and we're simply + // bringing the worksheet name in line with the formula, not the reverse + $spreadsheet->getActiveSheet()->setTitle((string) $worksheetName, false, false); + } + + // Go through every child of table element + $rowID = 1; + foreach ($worksheetDataSet->childNodes as $childNode) { + /** @var DOMElement $childNode */ + + // Filter elements which are not under the "table" ns + if ($childNode->namespaceURI != $tableNs) { + continue; + } + + $key = $childNode->nodeName; + + // Remove ns from node name + if (strpos($key, ':') !== false) { + $keyChunks = explode(':', $key); + $key = array_pop($keyChunks); + } + + switch ($key) { + case 'table-header-rows': + /// TODO :: Figure this out. This is only a partial implementation I guess. + // ($rowData it's not used at all and I'm not sure that PHPExcel + // has an API for this) + +// foreach ($rowData as $keyRowData => $cellData) { +// $rowData = $cellData; +// break; +// } + break; + case 'table-row': + if ($childNode->hasAttributeNS($tableNs, 'number-rows-repeated')) { + $rowRepeats = $childNode->getAttributeNS($tableNs, 'number-rows-repeated'); + } else { + $rowRepeats = 1; + } + + $columnID = 'A'; + foreach ($childNode->childNodes as $key => $cellData) { + // @var \DOMElement $cellData + + if ($this->getReadFilter() !== null) { + if (!$this->getReadFilter()->readCell($columnID, $rowID, $worksheetName)) { + ++$columnID; + + continue; + } + } + + // Initialize variables + $formatting = $hyperlink = null; + $hasCalculatedValue = false; + $cellDataFormula = ''; + + if ($cellData->hasAttributeNS($tableNs, 'formula')) { + $cellDataFormula = $cellData->getAttributeNS($tableNs, 'formula'); + $hasCalculatedValue = true; + } + + // Annotations + $annotation = $cellData->getElementsByTagNameNS($officeNs, 'annotation'); + + if ($annotation->length > 0) { + $textNode = $annotation->item(0)->getElementsByTagNameNS($textNs, 'p'); + + if ($textNode->length > 0) { + $text = $this->scanElementForText($textNode->item(0)); + + $spreadsheet->getActiveSheet() + ->getComment($columnID . $rowID) + ->setText($this->parseRichText($text)); +// ->setAuthor( $author ) + } + } + + // Content + + /** @var DOMElement[] $paragraphs */ + $paragraphs = []; + + foreach ($cellData->childNodes as $item) { + /** @var DOMElement $item */ + + // Filter text:p elements + if ($item->nodeName == 'text:p') { + $paragraphs[] = $item; + } + } + + if (count($paragraphs) > 0) { + // Consolidate if there are multiple p records (maybe with spans as well) + $dataArray = []; + + // Text can have multiple text:p and within those, multiple text:span. + // text:p newlines, but text:span does not. + // Also, here we assume there is no text data is span fields are specified, since + // we have no way of knowing proper positioning anyway. + + foreach ($paragraphs as $pData) { + $dataArray[] = $this->scanElementForText($pData); + } + $allCellDataText = implode("\n", $dataArray); + + $type = $cellData->getAttributeNS($officeNs, 'value-type'); + + switch ($type) { + case 'string': + $type = DataType::TYPE_STRING; + $dataValue = $allCellDataText; + + foreach ($paragraphs as $paragraph) { + $link = $paragraph->getElementsByTagNameNS($textNs, 'a'); + if ($link->length > 0) { + $hyperlink = $link->item(0)->getAttributeNS($xlinkNs, 'href'); + } + } + + break; + case 'boolean': + $type = DataType::TYPE_BOOL; + $dataValue = ($allCellDataText == 'TRUE') ? true : false; + + break; + case 'percentage': + $type = DataType::TYPE_NUMERIC; + $dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); + + // percentage should always be float + //if (floor($dataValue) == $dataValue) { + // $dataValue = (int) $dataValue; + //} + $formatting = NumberFormat::FORMAT_PERCENTAGE_00; + + break; + case 'currency': + $type = DataType::TYPE_NUMERIC; + $dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); + + if (floor($dataValue) == $dataValue) { + $dataValue = (int) $dataValue; + } + $formatting = NumberFormat::FORMAT_CURRENCY_USD_SIMPLE; + + break; + case 'float': + $type = DataType::TYPE_NUMERIC; + $dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); + + if (floor($dataValue) == $dataValue) { + if ($dataValue == (int) $dataValue) { + $dataValue = (int) $dataValue; + } + } + + break; + case 'date': + $type = DataType::TYPE_NUMERIC; + $value = $cellData->getAttributeNS($officeNs, 'date-value'); + + $dateObj = new DateTime($value, $GMT); + $dateObj->setTimeZone($timezoneObj); + [$year, $month, $day, $hour, $minute, $second] = explode( + ' ', + $dateObj->format('Y m d H i s') + ); + + $dataValue = Date::formattedPHPToExcel( + (int) $year, + (int) $month, + (int) $day, + (int) $hour, + (int) $minute, + (int) $second + ); + + if ($dataValue != floor($dataValue)) { + $formatting = NumberFormat::FORMAT_DATE_XLSX15 + . ' ' + . NumberFormat::FORMAT_DATE_TIME4; + } else { + $formatting = NumberFormat::FORMAT_DATE_XLSX15; + } + + break; + case 'time': + $type = DataType::TYPE_NUMERIC; + + $timeValue = $cellData->getAttributeNS($officeNs, 'time-value'); + + $dataValue = Date::PHPToExcel( + strtotime( + '01-01-1970 ' . implode(':', sscanf($timeValue, 'PT%dH%dM%dS')) + ) + ); + $formatting = NumberFormat::FORMAT_DATE_TIME4; + + break; + default: + $dataValue = null; + } + } else { + $type = DataType::TYPE_NULL; + $dataValue = null; + } + + if ($hasCalculatedValue) { + $type = DataType::TYPE_FORMULA; + $cellDataFormula = substr($cellDataFormula, strpos($cellDataFormula, ':=') + 1); + $cellDataFormula = $this->convertToExcelFormulaValue($cellDataFormula); + } + + if ($cellData->hasAttributeNS($tableNs, 'number-columns-repeated')) { + $colRepeats = (int) $cellData->getAttributeNS($tableNs, 'number-columns-repeated'); + } else { + $colRepeats = 1; + } + + if ($type !== null) { + for ($i = 0; $i < $colRepeats; ++$i) { + if ($i > 0) { + ++$columnID; + } + + if ($type !== DataType::TYPE_NULL) { + for ($rowAdjust = 0; $rowAdjust < $rowRepeats; ++$rowAdjust) { + $rID = $rowID + $rowAdjust; + + $cell = $spreadsheet->getActiveSheet() + ->getCell($columnID . $rID); + + // Set value + if ($hasCalculatedValue) { + $cell->setValueExplicit($cellDataFormula, $type); + } else { + $cell->setValueExplicit($dataValue, $type); + } + + if ($hasCalculatedValue) { + $cell->setCalculatedValue($dataValue); + } + + // Set other properties + if ($formatting !== null) { + $spreadsheet->getActiveSheet() + ->getStyle($columnID . $rID) + ->getNumberFormat() + ->setFormatCode($formatting); + } else { + $spreadsheet->getActiveSheet() + ->getStyle($columnID . $rID) + ->getNumberFormat() + ->setFormatCode(NumberFormat::FORMAT_GENERAL); + } + + if ($hyperlink !== null) { + $cell->getHyperlink() + ->setUrl($hyperlink); + } + } + } + } + } + + // Merged cells + if ( + $cellData->hasAttributeNS($tableNs, 'number-columns-spanned') + || $cellData->hasAttributeNS($tableNs, 'number-rows-spanned') + ) { + if (($type !== DataType::TYPE_NULL) || (!$this->readDataOnly)) { + $columnTo = $columnID; + + if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned')) { + $columnIndex = Coordinate::columnIndexFromString($columnID); + $columnIndex += (int) $cellData->getAttributeNS($tableNs, 'number-columns-spanned'); + $columnIndex -= 2; + + $columnTo = Coordinate::stringFromColumnIndex($columnIndex + 1); + } + + $rowTo = $rowID; + + if ($cellData->hasAttributeNS($tableNs, 'number-rows-spanned')) { + $rowTo = $rowTo + (int) $cellData->getAttributeNS($tableNs, 'number-rows-spanned') - 1; + } + + $cellRange = $columnID . $rowID . ':' . $columnTo . $rowTo; + $spreadsheet->getActiveSheet()->mergeCells($cellRange); + } + } + + ++$columnID; + } + $rowID += $rowRepeats; + + break; + } + } + $pageSettings->setPrintSettingsForWorksheet($spreadsheet->getActiveSheet(), $worksheetStyleName); + ++$worksheetID; + } + + $this->readDefinedRanges($spreadsheet, $workbookData, $tableNs); + $this->readDefinedExpressions($spreadsheet, $workbookData, $tableNs); + } + $spreadsheet->setActiveSheetIndex(0); + // Return + return $spreadsheet; + } + + /** + * Recursively scan element. + * + * @return string + */ + protected function scanElementForText(DOMNode $element) + { + $str = ''; + foreach ($element->childNodes as $child) { + /** @var DOMNode $child */ + if ($child->nodeType == XML_TEXT_NODE) { + $str .= $child->nodeValue; + } elseif ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName == 'text:s') { + // It's a space + + // Multiple spaces? + /** @var DOMAttr $cAttr */ + $cAttr = $child->attributes->getNamedItem('c'); + if ($cAttr) { + $multiplier = (int) $cAttr->nodeValue; + } else { + $multiplier = 1; + } + + $str .= str_repeat(' ', $multiplier); + } + + if ($child->hasChildNodes()) { + $str .= $this->scanElementForText($child); + } + } + + return $str; + } + + /** + * @param string $is + * + * @return RichText + */ + private function parseRichText($is) + { + $value = new RichText(); + $value->createText($is); + + return $value; + } + + private function convertToExcelAddressValue(string $openOfficeAddress): string + { + $excelAddress = $openOfficeAddress; + + // Cell range 3-d reference + // As we don't support 3-d ranges, we're just going to take a quick and dirty approach + // and assume that the second worksheet reference is the same as the first + $excelAddress = preg_replace('/\$?([^\.]+)\.([^\.]+):\$?([^\.]+)\.([^\.]+)/miu', '$1!$2:$4', $excelAddress); + // Cell range reference in another sheet + $excelAddress = preg_replace('/\$?([^\.]+)\.([^\.]+):\.([^\.]+)/miu', '$1!$2:$3', $excelAddress); + // Cell reference in another sheet + $excelAddress = preg_replace('/\$?([^\.]+)\.([^\.]+)/miu', '$1!$2', $excelAddress); + // Cell range reference + $excelAddress = preg_replace('/\.([^\.]+):\.([^\.]+)/miu', '$1:$2', $excelAddress); + // Simple cell reference + $excelAddress = preg_replace('/\.([^\.]+)/miu', '$1', $excelAddress); + + return $excelAddress; + } + + private function convertToExcelFormulaValue(string $openOfficeFormula): string + { + $temp = explode('"', $openOfficeFormula); + $tKey = false; + foreach ($temp as &$value) { + // Only replace in alternate array entries (i.e. non-quoted blocks) + if ($tKey = !$tKey) { + // Cell range reference in another sheet + $value = preg_replace('/\[\$?([^\.]+)\.([^\.]+):\.([^\.]+)\]/miu', '$1!$2:$3', $value); + // Cell reference in another sheet + $value = preg_replace('/\[\$?([^\.]+)\.([^\.]+)\]/miu', '$1!$2', $value); + // Cell range reference + $value = preg_replace('/\[\.([^\.]+):\.([^\.]+)\]/miu', '$1:$2', $value); + // Simple cell reference + $value = preg_replace('/\[\.([^\.]+)\]/miu', '$1', $value); + + $value = Calculation::translateSeparator(';', ',', $value, $inBraces); + } + } + + // Then rebuild the formula string + $excelFormula = implode('"', $temp); + + return $excelFormula; + } + + /** + * Read any Named Ranges that are defined in this spreadsheet. + */ + private function readDefinedRanges(Spreadsheet $spreadsheet, DOMElement $workbookData, string $tableNs): void + { + $namedRanges = $workbookData->getElementsByTagNameNS($tableNs, 'named-range'); + foreach ($namedRanges as $definedNameElement) { + $definedName = $definedNameElement->getAttributeNS($tableNs, 'name'); + $baseAddress = $definedNameElement->getAttributeNS($tableNs, 'base-cell-address'); + $range = $definedNameElement->getAttributeNS($tableNs, 'cell-range-address'); + + $baseAddress = $this->convertToExcelAddressValue($baseAddress); + $range = $this->convertToExcelAddressValue($range); + + $this->addDefinedName($spreadsheet, $baseAddress, $definedName, $range); + } + } + + /** + * Read any Named Formulae that are defined in this spreadsheet. + */ + private function readDefinedExpressions(Spreadsheet $spreadsheet, DOMElement $workbookData, string $tableNs): void + { + $namedExpressions = $workbookData->getElementsByTagNameNS($tableNs, 'named-expression'); + foreach ($namedExpressions as $definedNameElement) { + $definedName = $definedNameElement->getAttributeNS($tableNs, 'name'); + $baseAddress = $definedNameElement->getAttributeNS($tableNs, 'base-cell-address'); + $expression = $definedNameElement->getAttributeNS($tableNs, 'expression'); + + $baseAddress = $this->convertToExcelAddressValue($baseAddress); + $expression = $this->convertToExcelFormulaValue($expression); + + $this->addDefinedName($spreadsheet, $baseAddress, $definedName, $expression); + } + } + + /** + * Assess scope and store the Defined Name. + */ + private function addDefinedName(Spreadsheet $spreadsheet, string $baseAddress, string $definedName, string $value): void + { + [$sheetReference] = Worksheet::extractSheetTitle($baseAddress, true); + $worksheet = $spreadsheet->getSheetByName($sheetReference); + // Worksheet might still be null if we're only loading selected sheets rather than the full spreadsheet + if ($worksheet !== null) { + $spreadsheet->addDefinedName(DefinedName::createInstance((string) $definedName, $worksheet, $value)); + } + } +} -- cgit v1.2.3