From 6df25480da4bd7950d17b7a33c6668f95b1540c5 Mon Sep 17 00:00:00 2001 From: Ivan Pakhomov Date: Tue, 5 Feb 2019 22:29:34 +0300 Subject: [PATCH 1/2] Drop any tags from h1 --- src/Readability.php | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Readability.php b/src/Readability.php index 7b7eed6b..02849edd 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -522,7 +522,17 @@ private function getArticleTitle() $hOnes = $this->dom->getElementsByTagName('h1'); if ($hOnes->length === 1) { - $curTitle = $hOnes->item(0)->nodeValue; + $header = $hOnes->item(0); + + try { + foreach ($header->getElementsByTagName('*') as $element) { + $header->removeChild($element); + } + } catch (\Exception $e) { + } + + $curTitle = $header->nodeValue; + $this->logger->info(sprintf('[Metadata] Using title from an H1 node: \'%s\'', $curTitle)); } } From 9672a556aaa512abb5c75b5e28003c72258081e8 Mon Sep 17 00:00:00 2001 From: Ivan Pakhomov Date: Tue, 5 Feb 2019 22:40:09 +0300 Subject: [PATCH 2/2] Remove spaces --- src/Readability.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Readability.php b/src/Readability.php index 02849edd..e7be4e45 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -530,9 +530,9 @@ private function getArticleTitle() } } catch (\Exception $e) { } - + $curTitle = $header->nodeValue; - + $this->logger->info(sprintf('[Metadata] Using title from an H1 node: \'%s\'', $curTitle)); } }