

 * This file is part of the package itzbund/gsb-metadata-cleaner of the GSB 11 Project by ITZBund.
 * Copyright (C) 2023 - 2024 Bundesrepublik Deutschland, vertreten durch das
 * BMI/ITZBund. Author: Ole Hartwig, Patrick Schriner
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.

namespace ITZBund\GsbMetadataCleaner\Service;

use Doctrine\DBAL\Exception;
use ITZBund\GsbMetadataCleaner\Configuration\ExtensionConfiguration as GsbMetadataCleanerExtensionConfiguration;
use Psr\Log\LoggerInterface;
use Psr\Log\LogLevel;
use TYPO3\CMS\Core\Database\Connection;
use TYPO3\CMS\Core\Database\ConnectionPool;
use TYPO3\CMS\Core\Resource\FileInterface;
use TYPO3\CMS\Core\Utility\CommandUtility;
use TYPO3\CMS\Core\Utility\GeneralUtility;

class ExifToolService
     * @var array<string>
    private array $forcedKeepTagsForPdf = ['Xmp-dc:Rights'];

     * @var array<string>
    private array $forcedKeepTagsForImage = ['copyright'];

    public function __construct(
        private readonly GsbMetadataCleanerExtensionConfiguration $extensionConfiguration,
        private readonly LoggerInterface $logger
    ) {}

     * We want to process images and pdf files
     * exiftool might throw an error on processing files it cannot write, but will leave the file as is
     * @return bool
    public function canProcessFile(FileInterface $file): bool
        // a bit odd, but passing writeable=false will give us the absolute path for a local file
        // where writeable=true will give us a temp copy
        $absoluteFilePath = $file->getStorage()->getFileForLocalProcessing($file, false);
        if (!is_file($absoluteFilePath) || !is_writable($absoluteFilePath)) {
            return false;

        $mimeType = strtolower($file->getMimeType());
        [$fileType,$specificType] = explode('/', $mimeType);
        if ((($fileType === 'image') && ($specificType !== 'svg')) || $mimeType === 'application/pdf') {
            return true;
        return false;

     * Strip the file given of it's exif metadata, *except* for "copyright" (if applicable)
     * @param FileInterface $file the file to be processesed
     * @throws \InvalidArgumentException
    public function removeMetadata(FileInterface $file): void
        $filePath = $file->getStorage()->getFileForLocalProcessing($file, false);
        if (!is_file($filePath) && !is_writable($filePath)) {
            throw new \InvalidArgumentException('File not writeable', 1701857184);
        $command = sprintf(
            '%s -overwrite_original -all= -tagsFromFile @ %s %s',
        $output = [];
        $returnValue = 0;
        CommandUtility::exec($command, $output, $returnValue);
        if ($returnValue > 0) {
            $this->logger->log(LogLevel::ERROR, 'exiftool failed to strip tags', array_merge($output, ['file' => $filePath]));
        } else {
            $this->logger->log(LogLevel::DEBUG, 'exiftool used to strip tags', ['file' => $filePath]);
            if ($this->shouldUseQpdf($file)) {

     * Check if a file should be fed to qpdf
     * @param FileInterface $file the file to be processed
     * @return bool true if it's a pdf and the setting has been enabled
    protected function shouldUseQpdf(FileInterface $file): bool
        if (strtolower($file->getMimeType()) === 'application/pdf' && $this->extensionConfiguration->getUseQpdf()) {
            return true;
        return false;

     * Linearize a PDF file
     * This results in old tags beeing deleted for good and the pdf getting optimized for the web
     * @param FileInterface $file the file to be processed
     * @throws \InvalidArgumentException
    protected function linearizePdf(FileInterface $file): void
        $filePath = $file->getStorage()->getFileForLocalProcessing($file, false);
        if (!is_file($filePath) && !is_writable($filePath)) {
            throw new \InvalidArgumentException('File not writeable', 1701857185);
        $command = sprintf(
            '%s --replace-input --linearize %s',
        $output = [];
        $returnValue = 0;
        CommandUtility::exec($command, $output, $returnValue);
        if ($returnValue > 0) {
            $this->logger->log(LogLevel::ERROR, 'qpdf failed to linearize file', array_merge($output, ['file' => $filePath]));
        } else {
            $this->logger->log(LogLevel::DEBUG, 'qpdf linearized file', ['file' => $filePath]);

     * Get all "keep" tag-related shell arguments to exiftool
     * @param FileInterface $file the processed file
     * @return string
     * @throws \InvalidArgumentException
     * @throws \UnexpectedValueException
     * @throws Exception
    protected function getEscapedTagArguments(FileInterface $file): string
        $tags = [];
        if (strtolower($file->getMimeType()) == 'application/pdf') {
            $tags = $this->getKeepTagsForPdfFromStorage($file);
            $tags = array_merge($tags, $this->forcedKeepTagsForPdf);
        } else {
            $tags = $this->getKeepTagsForImageFromStorage($file);
            $tags = array_merge($tags, $this->forcedKeepTagsForImage);
        $escapedTagArguments = array_map(function ($tag) {
            return escapeshellarg('-' . $tag);
        }, $tags);
        return implode(' ', $escapedTagArguments);

     * Get the exiftool_keep_pd_tags value of the file's storage as an array
     * @return array<string>
     * @throws \InvalidArgumentException
     * @throws \UnexpectedValueException
     * @throws Exception
    protected function getKeepTagsForPdfFromStorage(FileInterface $file): array
        $storage = $file->getStorage();
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('sys_file_storage');
        $keepPdfTags = $queryBuilder->select('exiftool_keep_pdf_tags')
            ->where($queryBuilder->expr()->eq('uid', $queryBuilder->createNamedParameter($storage->getUid(), Connection::PARAM_INT)))
        if ($keepPdfTags === false) {
            $keepPdfTags = '';
        return GeneralUtility::trimExplode(',', (string)$keepPdfTags, true);

     * Get the exiftool_keep_image_tags value of the file's storage as an array
     * @return array<string>
     * @throws \InvalidArgumentException
     * @throws \UnexpectedValueException
     * @throws Exception
    protected function getKeepTagsForImageFromStorage(FileInterface $file): array
        $storage = $file->getStorage();
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('sys_file_storage');
        $keepImageTags = $queryBuilder->select('exiftool_keep_image_tags')
            ->where($queryBuilder->expr()->eq('uid', $queryBuilder->createNamedParameter($storage->getUid(), Connection::PARAM_INT)))
        if ($keepImageTags === false) {
            $keepImageTags = '';
        return GeneralUtility::trimExplode(',', (string)$keepImageTags, true);