<?php namespace Pelago\Emogrifier\HtmlProcessor; use Pelago\Emogrifier\CssInliner; use Pelago\Emogrifier\Utilities\ArrayIntersector; /** * This class can remove things from HTML. * * @author Oliver Klee <github@oliverklee.de> * @author Jake Hotson <jake.github@qzdesign.co.uk> */ class HtmlPruner extends AbstractHtmlProcessor { /** * We need to look for display:none, but we need to do a case-insensitive search. Since DOMDocument only * supports XPath 1.0, lower-case() isn't available to us. We've thus far only set attributes to lowercase, * not attribute values. Consequently, we need to translate() the letters that would be in 'NONE' ("NOE") * to lowercase. * * @var string */ const DISPLAY_NONE_MATCHER = '//*[@style and contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")' . ' and not(@class and contains(concat(" ", normalize-space(@class), " "), " -emogrifier-keep "))]'; /** * Removes elements that have a "display: none;" style. * * @return self fluent interface */ public function removeElementsWithDisplayNone() { $elementsWithStyleDisplayNone = $this->xPath->query(self::DISPLAY_NONE_MATCHER); if ($elementsWithStyleDisplayNone->length === 0) { return $this; } /** @var \DOMNode $element */ foreach ($elementsWithStyleDisplayNone as $element) { $parentNode = $element->parentNode; if ($parentNode !== null) { $parentNode->removeChild($element); } } return $this; } /** * Removes classes that are no longer required (e.g. because there are no longer any CSS rules that reference them) * from `class` attributes. * * Note that this does not inspect the CSS, but expects to be provided with a list of classes that are still in use. * * This method also has the (presumably beneficial) side-effect of minifying (removing superfluous whitespace from) * `class` attributes. * * @param string[] $classesToKeep names of classes that should not be removed * * @return self fluent interface */ public function removeRedundantClasses(array $classesToKeep = []) { $elementsWithClassAttribute = $this->xPath->query('//*[@class]'); if ($classesToKeep !== []) { $this->removeClassesFromElements($elementsWithClassAttribute, $classesToKeep); } else { // Avoid unnecessary processing if there are no classes to keep. $this->removeClassAttributeFromElements($elementsWithClassAttribute); } return $this; } /** * Removes classes from the `class` attribute of each element in `$elements`, except any in `$classesToKeep`, * removing the `class` attribute itself if the resultant list is empty. * * @param \DOMNodeList $elements * @param string[] $classesToKeep * * @return void */ private function removeClassesFromElements(\DOMNodeList $elements, array $classesToKeep) { $classesToKeepIntersector = new ArrayIntersector($classesToKeep); /** @var \DOMNode $element */ foreach ($elements as $element) { $elementClasses = \preg_split('/\\s++/', \trim($element->getAttribute('class'))); $elementClassesToKeep = $classesToKeepIntersector->intersectWith($elementClasses); if ($elementClassesToKeep !== []) { $element->setAttribute('class', \implode(' ', $elementClassesToKeep)); } else { $element->removeAttribute('class'); } } } /** * Removes the `class` attribute from each element in `$elements`. * * @param \DOMNodeList $elements * * @return void */ private function removeClassAttributeFromElements(\DOMNodeList $elements) { /** @var \DOMNode $element */ foreach ($elements as $element) { $element->removeAttribute('class'); } } /** * After CSS has been inlined, there will likely be some classes in `class` attributes that are no longer referenced * by any remaining (uninlinable) CSS. This method removes such classes. * * Note that it does not inspect the remaining CSS, but uses information readily available from the `CssInliner` * instance about the CSS rules that could not be inlined. * * @param CssInliner $cssInliner object instance that performed the CSS inlining * * @return self fluent interface * * @throws \BadMethodCallException if `inlineCss` has not first been called on `$cssInliner` */ public function removeRedundantClassesAfterCssInlined(CssInliner $cssInliner) { $classesToKeepAsKeys = []; foreach ($cssInliner->getMatchingUninlinableSelectors() as $selector) { \preg_match_all('/\\.(-?+[_a-zA-Z][\\w\\-]*+)/', $selector, $matches); $classesToKeepAsKeys += \array_fill_keys($matches[1], true); } $this->removeRedundantClasses(\array_keys($classesToKeepAsKeys)); return $this; } }