<?php declare(strict_types=1); /* * This file is part of the package itzbund/gsb-metadata-cleaner of the GSB 11 Project by ITZBund. * * Copyright (C) 2023 - 2024 Bundesrepublik Deutschland, vertreten durch das * BMI/ITZBund. Author: Ole Hartwig, Patrick Schriner * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ namespace ITZBund\GsbMetadataCleaner\Service; use Doctrine\DBAL\Exception; use ITZBund\GsbMetadataCleaner\Configuration\ExtensionConfiguration as GsbMetadataCleanerExtensionConfiguration; use Psr\Log\LoggerInterface; use Psr\Log\LogLevel; use TYPO3\CMS\Core\Database\Connection; use TYPO3\CMS\Core\Database\ConnectionPool; use TYPO3\CMS\Core\Resource\FileInterface; use TYPO3\CMS\Core\Utility\CommandUtility; use TYPO3\CMS\Core\Utility\GeneralUtility; class ExifToolService { /** * @var array<string> */ private array $forcedKeepTagsForPdf = ['Xmp-dc:Rights']; /** * @var array<string> */ private array $forcedKeepTagsForImage = ['copyright']; public function __construct( private readonly GsbMetadataCleanerExtensionConfiguration $extensionConfiguration, private readonly LoggerInterface $logger ) {} /** * We want to process images and pdf files * * exiftool might throw an error on processing files it cannot write, but will leave the file as is * * @return bool */ public function canProcessFile(FileInterface $file): bool { // a bit odd, but passing writeable=false will give us the absolute path for a local file // where writeable=true will give us a temp copy $absoluteFilePath = $file->getStorage()->getFileForLocalProcessing($file, false); if (!is_file($absoluteFilePath) || !is_writable($absoluteFilePath)) { return false; } $mimeType = strtolower($file->getMimeType()); [$fileType,$specificType] = explode('/', $mimeType); if ((($fileType === 'image') && ($specificType !== 'svg')) || $mimeType === 'application/pdf') { return true; } return false; } /** * Strip the file given of it's exif metadata, *except* for "copyright" (if applicable) * * @param FileInterface $file the file to be processesed * * @throws \InvalidArgumentException */ public function removeMetadata(FileInterface $file): void { $filePath = $file->getStorage()->getFileForLocalProcessing($file, false); if (!is_file($filePath) && !is_writable($filePath)) { throw new \InvalidArgumentException('File not writeable', 1701857184); } $command = sprintf( '%s -overwrite_original -all= -tagsFromFile @ %s %s', escapeshellarg($this->extensionConfiguration->getExifToolPath()), $this->getEscapedTagArguments($file), escapeshellarg($filePath) ); $output = []; $returnValue = 0; CommandUtility::exec($command, $output, $returnValue); if ($returnValue > 0) { $this->logger->log(LogLevel::ERROR, 'exiftool failed to strip tags', array_merge($output, ['file' => $filePath])); } else { $this->logger->log(LogLevel::DEBUG, 'exiftool used to strip tags', ['file' => $filePath]); if ($this->shouldUseQpdf($file)) { $this->linearizePdf($file); } } } /** * Check if a file should be fed to qpdf * * @param FileInterface $file the file to be processed * @return bool true if it's a pdf and the setting has been enabled */ protected function shouldUseQpdf(FileInterface $file): bool { if (strtolower($file->getMimeType()) === 'application/pdf' && $this->extensionConfiguration->getUseQpdf()) { return true; } return false; } /** * Linearize a PDF file * This results in old tags beeing deleted for good and the pdf getting optimized for the web * * @param FileInterface $file the file to be processed * @throws \InvalidArgumentException */ protected function linearizePdf(FileInterface $file): void { $filePath = $file->getStorage()->getFileForLocalProcessing($file, false); if (!is_file($filePath) && !is_writable($filePath)) { throw new \InvalidArgumentException('File not writeable', 1701857185); } $command = sprintf( '%s --replace-input --linearize %s', escapeshellarg($this->extensionConfiguration->getQpdfToolPath()), escapeshellarg($filePath) ); $output = []; $returnValue = 0; CommandUtility::exec($command, $output, $returnValue); if ($returnValue > 0) { $this->logger->log(LogLevel::ERROR, 'qpdf failed to linearize file', array_merge($output, ['file' => $filePath])); } else { $this->logger->log(LogLevel::DEBUG, 'qpdf linearized file', ['file' => $filePath]); } } /** * Get all "keep" tag-related shell arguments to exiftool * * @param FileInterface $file the processed file * @return string * @throws \InvalidArgumentException * @throws \UnexpectedValueException * @throws Exception */ protected function getEscapedTagArguments(FileInterface $file): string { $tags = []; if (strtolower($file->getMimeType()) == 'application/pdf') { $tags = $this->getKeepTagsForPdfFromStorage($file); $tags = array_merge($tags, $this->forcedKeepTagsForPdf); } else { $tags = $this->getKeepTagsForImageFromStorage($file); $tags = array_merge($tags, $this->forcedKeepTagsForImage); } $escapedTagArguments = array_map(function ($tag) { return escapeshellarg('-' . $tag); }, $tags); return implode(' ', $escapedTagArguments); } /** * Get the exiftool_keep_pd_tags value of the file's storage as an array * * @return array<string> * @throws \InvalidArgumentException * @throws \UnexpectedValueException * @throws Exception */ protected function getKeepTagsForPdfFromStorage(FileInterface $file): array { $storage = $file->getStorage(); $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('sys_file_storage'); $keepPdfTags = $queryBuilder->select('exiftool_keep_pdf_tags') ->from('sys_file_storage') ->where($queryBuilder->expr()->eq('uid', $queryBuilder->createNamedParameter($storage->getUid(), Connection::PARAM_INT))) ->executeQuery() ->fetchOne(); if ($keepPdfTags === false) { $keepPdfTags = ''; } return GeneralUtility::trimExplode(',', (string)$keepPdfTags, true); } /** * Get the exiftool_keep_image_tags value of the file's storage as an array * * @return array<string> * @throws \InvalidArgumentException * @throws \UnexpectedValueException * @throws Exception */ protected function getKeepTagsForImageFromStorage(FileInterface $file): array { $storage = $file->getStorage(); $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('sys_file_storage'); $keepImageTags = $queryBuilder->select('exiftool_keep_image_tags') ->from('sys_file_storage') ->where($queryBuilder->expr()->eq('uid', $queryBuilder->createNamedParameter($storage->getUid(), Connection::PARAM_INT))) ->executeQuery() ->fetchOne(); if ($keepImageTags === false) { $keepImageTags = ''; } return GeneralUtility::trimExplode(',', (string)$keepImageTags, true); } }