Newer
Older
* This file is part of the package itzbund/gsb-metadata-cleaner of the GSB 11 Project by ITZBund.
*
* Copyright (C) 2023 - 2024 Bundesrepublik Deutschland, vertreten durch das
* BMI/ITZBund. Author: Ole Hartwig, Patrick Schriner
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
use ITZBund\GsbMetadataCleaner\Configuration\ExtensionConfiguration as GsbMetadataCleanerExtensionConfiguration;
use Psr\Log\LoggerInterface;
use Psr\Log\LogLevel;
use TYPO3\CMS\Core\Database\ConnectionPool;
use TYPO3\CMS\Core\Resource\FileInterface;
use TYPO3\CMS\Core\Utility\GeneralUtility;
/**
* @var array<string>
*/
private array $forcedKeepTagsForPdf = ['Xmp-dc:Rights'];
/**
* @var array<string>
*/
private array $forcedKeepTagsForImage = ['copyright'];
private readonly GsbMetadataCleanerExtensionConfiguration $extensionConfiguration,
private readonly LoggerInterface $logger
/**
* We want to process images and pdf files
*
* exiftool might throw an error on processing files it cannot write, but will leave the file as is
*
* @return bool
*/
public function canProcessFile(FileInterface $file): bool
{
// a bit odd, but passing writeable=false will give us the absolute path for a local file
// where writeable=true will give us a temp copy
$absoluteFilePath = $file->getStorage()->getFileForLocalProcessing($file, false);
if (!is_file($absoluteFilePath) || !is_writable($absoluteFilePath)) {
return false;
}
$mimeType = strtolower($file->getMimeType());
[$fileType,$specificType] = explode('/', $mimeType);
if ((($fileType === 'image') && ($specificType !== 'svg')) || $mimeType === 'application/pdf') {
return true;
}
return false;
}
* Strip the file given of it's exif metadata, *except* for "copyright" (if applicable)
* @param FileInterface $file the file to be processesed
*
* @throws \InvalidArgumentException
public function removeMetadata(FileInterface $file): void
$filePath = $file->getStorage()->getFileForLocalProcessing($file, false);
if (!is_file($filePath) && !is_writable($filePath)) {
throw new \InvalidArgumentException('File not writeable', 1701857184);
'%s -overwrite_original -all= -tagsFromFile @ %s %s',
escapeshellarg($this->extensionConfiguration->getExifToolPath()),
$this->getEscapedTagArguments($file),
$output = [];
$returnValue = 0;
CommandUtility::exec($command, $output, $returnValue);
if ($returnValue > 0) {
$this->logger->log(LogLevel::ERROR, 'exiftool failed to strip tags', array_merge($output, ['file' => $filePath]));
} else {
$this->logger->log(LogLevel::DEBUG, 'exiftool used to strip tags', ['file' => $filePath]);
if ($this->shouldUseQpdf($file)) {
$this->linearizePdf($file);
}
* @param FileInterface $file the file to be processed
* @return bool true if it's a pdf and the setting has been enabled
protected function shouldUseQpdf(FileInterface $file): bool
if (strtolower($file->getMimeType()) === 'application/pdf' && $this->extensionConfiguration->getUseQpdf()) {
return true;
/**
* Linearize a PDF file
* This results in old tags beeing deleted for good and the pdf getting optimized for the web
*
* @param FileInterface $file the file to be processed
* @throws \InvalidArgumentException
*/
protected function linearizePdf(FileInterface $file): void
{
$filePath = $file->getStorage()->getFileForLocalProcessing($file, false);
if (!is_file($filePath) && !is_writable($filePath)) {
throw new \InvalidArgumentException('File not writeable', 1701857185);
}
$command = sprintf(
'%s --replace-input --linearize %s',
escapeshellarg($this->extensionConfiguration->getQpdfToolPath()),
escapeshellarg($filePath)
);
$output = [];
$returnValue = 0;
CommandUtility::exec($command, $output, $returnValue);
if ($returnValue > 0) {
$this->logger->log(LogLevel::ERROR, 'qpdf failed to linearize file', array_merge($output, ['file' => $filePath]));
} else {
$this->logger->log(LogLevel::DEBUG, 'qpdf linearized file', ['file' => $filePath]);
/**
* Get all "keep" tag-related shell arguments to exiftool
*
* @param FileInterface $file the processed file
* @return string
* @throws \InvalidArgumentException
* @throws \UnexpectedValueException
protected function getEscapedTagArguments(FileInterface $file): string
if (strtolower($file->getMimeType()) == 'application/pdf') {
$tags = $this->getKeepTagsForPdfFromStorage($file);
$tags = array_merge($tags, $this->forcedKeepTagsForPdf);
$tags = $this->getKeepTagsForImageFromStorage($file);
$tags = array_merge($tags, $this->forcedKeepTagsForImage);
$escapedTagArguments = array_map(function ($tag) {
return escapeshellarg('-' . $tag);
}, $tags);
return implode(' ', $escapedTagArguments);
/**
* Get the exiftool_keep_pd_tags value of the file's storage as an array
*
* @return array<string>
* @throws \InvalidArgumentException
* @throws \UnexpectedValueException
* @throws Exception
*/
protected function getKeepTagsForPdfFromStorage(FileInterface $file): array
{
$storage = $file->getStorage();
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('sys_file_storage');
$keepPdfTags = $queryBuilder->select('exiftool_keep_pdf_tags')
->from('sys_file_storage')
->where($queryBuilder->expr()->eq('uid', $queryBuilder->createNamedParameter($storage->getUid(), Connection::PARAM_INT)))
->executeQuery()
->fetchOne();
if ($keepPdfTags === false) {
$keepPdfTags = '';
}
return GeneralUtility::trimExplode(',', (string)$keepPdfTags, true);
/**
* Get the exiftool_keep_image_tags value of the file's storage as an array
*
* @return array<string>
* @throws \InvalidArgumentException
* @throws \UnexpectedValueException
* @throws Exception
*/
protected function getKeepTagsForImageFromStorage(FileInterface $file): array
{
$storage = $file->getStorage();
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('sys_file_storage');
$keepImageTags = $queryBuilder->select('exiftool_keep_image_tags')
->from('sys_file_storage')
->where($queryBuilder->expr()->eq('uid', $queryBuilder->createNamedParameter($storage->getUid(), Connection::PARAM_INT)))
->executeQuery()
->fetchOne();
if ($keepImageTags === false) {
$keepImageTags = '';
}
return GeneralUtility::trimExplode(',', (string)$keepImageTags, true);