diff --git a/src/ConfluencePageContentDownloader.php b/src/ConfluencePageContentDownloader.php index 6920545..5da8d73 100755 --- a/src/ConfluencePageContentDownloader.php +++ b/src/ConfluencePageContentDownloader.php @@ -8,6 +8,7 @@ use Artemeon\Confluence\Endpoint\Download; use Artemeon\Confluence\Endpoint\Dto\ConfluencePage; use Artemeon\Confluence\MacroReplacer\MacroReplacerInterface; +use DOMDocument; use Exception; class ConfluencePageContentDownloader @@ -25,6 +26,8 @@ public function __construct(Content $contentEndpoint, Download $downloadEndpoint public function downloadPageContent(ConfluencePage $page, bool $withAttachments = true): void { + $page = $this->repairPageContent($page); + try { foreach ($this->macroReplacers as $macroReplacer) { if ($macroReplacer instanceof MacroReplacerInterface) { @@ -42,10 +45,29 @@ public function downloadPageContent(ConfluencePage $page, bool $withAttachments foreach ($attachments as $attachment) { $this->downloadEndpoint->downloadAttachment($attachment); } - } catch (Exception $e) { echo 'An error has occurred: ' . $e->getMessage(); } } + private function repairPageContent(ConfluencePage $page): ConfluencePage + { + $previousLibxmlState = libxml_use_internal_errors(true); + + $domDocument = new DOMDocument(); + $domDocument->loadHTML($page->getContent()); + if (!$domDocument->validate()) { + $pageContent = ''; + foreach ($domDocument->getElementsByTagName('body')->item(0)->childNodes as $child) { + $pageContent .= $domDocument->saveHTML($child); + } + + $page->setContent($pageContent); + } + + libxml_clear_errors(); + libxml_use_internal_errors($previousLibxmlState); + + return $page; + } } diff --git a/src/Endpoint/Content.php b/src/Endpoint/Content.php index c59ba1a..b8911c1 100755 --- a/src/Endpoint/Content.php +++ b/src/Endpoint/Content.php @@ -94,7 +94,11 @@ public function findChildAttachments(string $pageId): array { $response = $this->client->get( 'wiki/rest/api/content/' . $pageId . '/child/attachment', - array_merge([], $this->auth->getAuthenticationArray()) + array_merge([ + 'query' => [ + 'expand' => 'history,history.lastUpdated' + ] + ], $this->auth->getAuthenticationArray()) ); if ($response->getStatusCode() === 200) { diff --git a/src/Endpoint/Download.php b/src/Endpoint/Download.php index 0b0f1e2..d2366e2 100755 --- a/src/Endpoint/Download.php +++ b/src/Endpoint/Download.php @@ -50,12 +50,33 @@ public function downloadAttachment(ConfluenceAttachment $attachment): void return; } - // Verwende den relativen Pfad aus der API, um das Attachment herunterzuladen - $attachmentContent = $this->client->get( - '/wiki/' . $attachment->findDownloadPath(), - array_merge([], $this->auth->getAuthenticationArray()) - )->getBody()->getContents(); + if ($this->shouldAttachmentBeUpdated($attachment)) { + // Verwende den relativen Pfad aus der API, um das Attachment herunterzuladen + $attachmentContent = $this->client->get( + '/wiki/' . $attachment->findDownloadPath(), + array_merge([], $this->auth->getAuthenticationArray()) + )->getBody()->getContents(); - file_put_contents($this->downloadFolder . '/' . $attachment->getTitle(), $attachmentContent); + file_put_contents($this->getAttachmentFilePath($attachment), $attachmentContent); + } + } + + private function getAttachmentFilePath(ConfluenceAttachment $attachment): string + { + return $this->downloadFolder . '/' . $attachment->getTitle(); + } + + private function shouldAttachmentBeUpdated(ConfluenceAttachment $attachment): bool + { + $filepath = $this->getAttachmentFilePath($attachment); + + if (file_exists($filepath)) { + $filemtime = filemtime($filepath); + if (is_int($filemtime)) { + return $filemtime < $attachment->getLastUpdated()->getTimestamp(); + } + } + + return true; } } diff --git a/src/Endpoint/Dto/ConfluenceAttachment.php b/src/Endpoint/Dto/ConfluenceAttachment.php index 70fefaf..2841d1f 100755 --- a/src/Endpoint/Dto/ConfluenceAttachment.php +++ b/src/Endpoint/Dto/ConfluenceAttachment.php @@ -4,17 +4,22 @@ namespace Artemeon\Confluence\Endpoint\Dto; +use DateTime; + class ConfluenceAttachment { private array $rawData; private string $title; + private ?DateTime $lastUpdated; + public function __construct(array $rawData) { $this->rawData = $rawData; $this->title = $rawData['title']; + $this->lastUpdated = isset($rawData['history']['lastUpdated']['when']) ? new DateTime($rawData['history']['lastUpdated']['when']) : null; } public function findDownloadPath(): ?string @@ -26,4 +31,9 @@ public function getTitle(): string { return $this->title; } + + public function getLastUpdated(): ?DateTime + { + return $this->lastUpdated; + } }