<?php
/*
* This file is part of PHPUnit.
*
* (c) Sebastian Bergmann <sebastian@phpunit.de>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* XML helpers.
*
* @since Class available since Release 3.2.0
*/
class PHPUnit_Util_XML
{
/**
* Escapes a string for the use in XML documents
* Any Unicode character is allowed, excluding the surrogate blocks, FFFE,
* and FFFF (not even as character reference).
* See http://www.w3.org/TR/xml/#charsets
*
* @param string $string
*
* @return string
*
* @since Method available since Release 3.4.6
*/
public static function prepareString($string)
{
return preg_replace(
'/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f]/',
'',
htmlspecialchars(
PHPUnit_Util_String::convertToUtf8($string),
ENT_QUOTES,
'UTF-8'
)
);
}
/**
* Loads an XML (or HTML) file into a DOMDocument object.
*
* @param string $filename
* @param bool $isHtml
* @param bool $xinclude
* @param bool $strict
*
* @return DOMDocument
*
* @since Method available since Release 3.3.0
*/
public static function loadFile($filename, $isHtml = false, $xinclude = false, $strict = false)
{
$reporting = error_reporting(0);
$contents = file_get_contents($filename);
error_reporting($reporting);
if ($contents === false) {
throw new PHPUnit_Framework_Exception(
sprintf(
'Could not read "%s".',
$filename
)
);
}
return self::load($contents, $isHtml, $filename, $xinclude, $strict);
}
/**
* Load an $actual document into a DOMDocument. This is called
* from the selector assertions.
*
* If $actual is already a DOMDocument, it is returned with
* no changes. Otherwise, $actual is loaded into a new DOMDocument
* as either HTML or XML, depending on the value of $isHtml. If $isHtml is
* false and $xinclude is true, xinclude is performed on the loaded
* DOMDocument.
*
* Note: prior to PHPUnit 3.3.0, this method loaded a file and
* not a string as it currently does. To load a file into a
* DOMDocument, use loadFile() instead.
*
* @param string|DOMDocument $actual
* @param bool $isHtml
* @param string $filename
* @param bool $xinclude
* @param bool $strict
*
* @return DOMDocument
*
* @since Method available since Release 3.3.0
*/
public static function load($actual, $isHtml = false, $filename = '', $xinclude = false, $strict = false)
{
if ($actual instanceof DOMDocument) {
return $actual;
}
if (!is_string($actual)) {
throw new PHPUnit_Framework_Exception('Could not load XML from ' . gettype($actual));
}
if ($actual === '') {
throw new PHPUnit_Framework_Exception('Could not load XML from empty string');
}
// Required for XInclude on Windows.
if ($xinclude) {
$cwd = getcwd();
@chdir(dirname($filename));
}
$document = new DOMDocument;
$document->preserveWhiteSpace = false;
$internal = libxml_use_internal_errors(true);
$message = '';
$reporting = error_reporting(0);
if ('' !== $filename) {
// Necessary for xinclude
$document->documentURI = $filename;
}
if ($isHtml) {
$loaded = $document->loadHTML($actual);
} else {
$loaded = $document->loadXML($actual);
}
if (!$isHtml && $xinclude) {
$document->xinclude();
}
foreach (libxml_get_errors() as $error) {
$message .= "\n" . $error->message;
}
libxml_use_internal_errors($internal);
error_reporting($reporting);
if ($xinclude) {
@chdir($cwd);
}
if ($loaded === false || ($strict && $message !== '')) {
if ($filename !== '') {
throw new PHPUnit_Framework_Exception(
sprintf(
'Could not load "%s".%s',
$filename,
$message != '' ? "\n" . $message : ''
)
);
} else {
if ($message === '') {
$message = 'Could not load XML for unknown reason';
}
throw new PHPUnit_Framework_Exception($message);
}
}
return $document;
}
/**
* @param DOMNode $node
*
* @return string
*
* @since Method available since Release 3.4.0
*/
public static function nodeToText(DOMNode $node)
{
if ($node->childNodes->length == 1) {
return $node->textContent;
}
$result = '';
foreach ($node->childNodes as $childNode) {
$result .= $node->ownerDocument->saveXML($childNode);
}
return $result;
}
/**
* @param DOMNode $node
*
* @since Method available since Release 3.3.0
*/
public static function removeCharacterDataNodes(DOMNode $node)
{
if ($node->hasChildNodes()) {
for ($i = $node->childNodes->length - 1; $i >= 0; $i--) {
if (($child = $node->childNodes->item($i)) instanceof DOMCharacterData) {
$node->removeChild($child);
}
}
}
}
/**
* "Convert" a DOMElement object into a PHP variable.
*
* @param DOMElement $element
*
* @return mixed
*
* @since Method available since Release 3.4.0
*/
public static function xmlToVariable(DOMElement $element)
{
$variable = null;
switch ($element->tagName) {
case 'array':
$variable = array();
foreach ($element->childNodes as $entry) {
if (!$entry instanceof DOMElement || $entry->tagName !== 'element') {
continue;
}
$item = $entry->childNodes->item(0);
if ($item instanceof DOMText) {
$item = $entry->childNodes->item(1);
}
$value = self::xmlToVariable($item);
if ($entry->hasAttribute('key')) {
$variable[(string) $entry->getAttribute('key')] = $value;
} else {
$variable[] = $value;
}
}
break;
case 'object':
$className = $element->getAttribute('class');
if ($element->hasChildNodes()) {
$arguments = $element->childNodes->item(1)->childNodes;
$constructorArgs = array();
foreach ($arguments as $argument) {
if ($argument instanceof DOMElement) {
$constructorArgs[] = self::xmlToVariable($argument);
}
}
$class = new ReflectionClass($className);
$variable = $class->newInstanceArgs($constructorArgs);
} else {
$variable = new $className;
}
break;
case 'boolean':
$variable = $element->textContent == 'true' ? true : false;
break;
case 'integer':
case 'double':
case 'string':
$variable = $element->textContent;
settype($variable, $element->tagName);
break;
}
return $variable;
}
/**
* Validate list of keys in the associative array.
*
* @param array $hash
* @param array $validKeys
*
* @return array
*
* @throws PHPUnit_Framework_Exception
*
* @since Method available since Release 3.3.0
*/
public static function assertValidKeys(array $hash, array $validKeys)
{
$valids = array();
// Normalize validation keys so that we can use both indexed and
// associative arrays.
foreach ($validKeys as $key => $val) {
is_int($key) ? $valids[$val] = null : $valids[$key] = $val;
}
$validKeys = array_keys($valids);
// Check for invalid keys.
foreach ($hash as $key => $value) {
if (!in_array($key, $validKeys)) {
$unknown[] = $key;
}
}
if (!empty($unknown)) {
throw new PHPUnit_Framework_Exception(
'Unknown key(s): ' . implode(', ', $unknown)
);
}
// Add default values for any valid keys that are empty.
foreach ($valids as $key => $value) {
if (!isset($hash[$key])) {
$hash[$key] = $value;
}
}
return $hash;
}
/**
* Parse a CSS selector into an associative array suitable for
* use with findNodes().
*
* @param string $selector
* @param mixed $content
*
* @return array
*
* @since Method available since Release 3.3.0
*/
public static function convertSelectToTag($selector, $content = true)
{
$selector = trim(preg_replace("/\s+/", ' ', $selector));
// substitute spaces within attribute value
while (preg_match('/\[[^\]]+"[^"]+\s[^"]+"\]/', $selector)) {
$selector = preg_replace(
'/(\[[^\]]+"[^"]+)\s([^"]+"\])/',
'$1__SPACE__$2',
$selector
);
}
if (strstr($selector, ' ')) {
$elements = explode(' ', $selector);
} else {
$elements = array($selector);
}
$previousTag = array();
foreach (array_reverse($elements) as $element) {
$element = str_replace('__SPACE__', ' ', $element);
// child selector
if ($element == '>') {
$previousTag = array('child' => $previousTag['descendant']);
continue;
}
// adjacent-sibling selector
if ($element == '+') {
$previousTag = array('adjacent-sibling' => $previousTag['descendant']);
continue;
}
$tag = array();
// match element tag
preg_match("/^([^\.#\[]*)/", $element, $eltMatches);
if (!empty($eltMatches[1])) {
$tag['tag'] = $eltMatches[1];
}
// match attributes (\[[^\]]*\]*), ids (#[^\.#\[]*),
// and classes (\.[^\.#\[]*))
preg_match_all(
"/(\[[^\]]*\]*|#[^\.#\[]*|\.[^\.#\[]*)/",
$element,
$matches
);
if (!empty($matches[1])) {
$classes = array();
$attrs = array();
foreach ($matches[1] as $match) {
// id matched
if (substr($match, 0, 1) == '#') {
$tag['id'] = substr($match, 1);
} // class matched
elseif (substr($match, 0, 1) == '.') {
$classes[] = substr($match, 1);
} // attribute matched
elseif (substr($match, 0, 1) == '[' &&
substr($match, -1, 1) == ']') {
$attribute = substr($match, 1, strlen($match) - 2);
$attribute = str_replace('"', '', $attribute);
// match single word
if (strstr($attribute, '~=')) {
list($key, $value) = explode('~=', $attribute);
$value = "regexp:/.*\b$value\b.*/";
} // match substring
elseif (strstr($attribute, '*=')) {
list($key, $value) = explode('*=', $attribute);
$value = "regexp:/.*$value.*/";
} // exact match
else {
list($key, $value) = explode('=', $attribute);
}
$attrs[$key] = $value;
}
}
if (!empty($classes)) {
$tag['class'] = implode(' ', $classes);
}
if (!empty($attrs)) {
$tag['attributes'] = $attrs;
}
}
// tag content
if (is_string($content)) {
$tag['content'] = $content;
}
// determine previous child/descendants
if (!empty($previousTag['descendant'])) {
$tag['descendant'] = $previousTag['descendant'];
} elseif (!empty($previousTag['child'])) {
$tag['child'] = $previousTag['child'];
} elseif (!empty($previousTag['adjacent-sibling'])) {
$tag['adjacent-sibling'] = $previousTag['adjacent-sibling'];
unset($tag['content']);
}
$previousTag = array('descendant' => $tag);
}
return $tag;
}
/**
* Parse an $actual document and return an array of DOMNodes
* matching the CSS $selector. If an error occurs, it will
* return false.
*
* To only return nodes containing a certain content, give
* the $content to match as a string. Otherwise, setting
* $content to true will return all nodes matching $selector.
*
* The $actual document may be a DOMDocument or a string
* containing XML or HTML, identified by $isHtml.
*
* @param array $selector
* @param string $content
* @param mixed $actual
* @param bool $isHtml
*
* @return bool|array
*
* @since Method available since Release 3.3.0
*/
public static function cssSelect($selector, $content, $actual, $isHtml = true)
{
$matcher = self::convertSelectToTag($selector, $content);
$dom = self::load($actual, $isHtml);
$tags = self::findNodes($dom, $matcher, $isHtml);
return $tags;
}
/**
* Parse out the options from the tag using DOM object tree.
*
* @param DOMDocument $dom
* @param array $options
* @param bool $isHtml
*
* @return array
*
* @since Method available since Release 3.3.0
*/
public static function findNodes(DOMDocument $dom, array $options, $isHtml = true)
{
$valid = array(
'id', 'class', 'tag', 'content', 'attributes', 'parent',
'child', 'ancestor', 'descendant', 'children', 'adjacent-sibling'
);
$filtered = array();
$options = self::assertValidKeys($options, $valid);
// find the element by id
if ($options['id']) {
$options['attributes']['id'] = $options['id'];
}
if ($options['class']) {
$options['attributes']['class'] = $options['class'];
}
$nodes = array();
// find the element by a tag type
if ($options['tag']) {
if ($isHtml) {
$elements = self::getElementsByCaseInsensitiveTagName(
$dom,
$options['tag']
);
} else {
$elements = $dom->getElementsByTagName($options['tag']);
}
foreach ($elements as $element) {
$nodes[] = $element;
}
if (empty($nodes)) {
return false;
}
} // no tag selected, get them all
else {
$tags = array(
'a', 'abbr', 'acronym', 'address', 'area', 'b', 'base', 'bdo',
'big', 'blockquote', 'body', 'br', 'button', 'caption', 'cite',
'code', 'col', 'colgroup', 'dd', 'del', 'div', 'dfn', 'dl',
'dt', 'em', 'fieldset', 'form', 'frame', 'frameset', 'h1', 'h2',
'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'html', 'i', 'iframe',
'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'link',
'map', 'meta', 'noframes', 'noscript', 'object', 'ol', 'optgroup',
'option', 'p', 'param', 'pre', 'q', 'samp', 'script', 'select',
'small', 'span', 'strong', 'style', 'sub', 'sup', 'table',
'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'title',
'tr', 'tt', 'ul', 'var',
// HTML5
'article', 'aside', 'audio', 'bdi', 'canvas', 'command',
'datalist', 'details', 'dialog', 'embed', 'figure', 'figcaption',
'footer', 'header', 'hgroup', 'keygen', 'mark', 'meter', 'nav',
'output', 'progress', 'ruby', 'rt', 'rp', 'track', 'section',
'source', 'summary', 'time', 'video', 'wbr'
);
foreach ($tags as $tag) {
if ($isHtml) {
$elements = self::getElementsByCaseInsensitiveTagName(
$dom,
$tag
);
} else {
$elements = $dom->getElementsByTagName($tag);
}
foreach ($elements as $element) {
$nodes[] = $element;
}
}
if (empty($nodes)) {
return false;
}
}
// filter by attributes
if ($options['attributes']) {
foreach ($nodes as $node) {
$invalid = false;
foreach ($options['attributes'] as $name => $value) {
// match by regexp if like "regexp:/foo/i"
if (preg_match('/^regexp\s*:\s*(.*)/i', $value, $matches)) {
if (!preg_match($matches[1], $node->getAttribute($name))) {
$invalid = true;
}
} // class can match only a part
elseif ($name == 'class') {
// split to individual classes
$findClasses = explode(
' ',
preg_replace("/\s+/", ' ', $value)
);
$allClasses = explode(
' ',
preg_replace("/\s+/", ' ', $node->getAttribute($name))
);
// make sure each class given is in the actual node
foreach ($findClasses as $findClass) {
if (!in_array($findClass, $allClasses)) {
$invalid = true;
}
}
} // match by exact string
else {
if ($node->getAttribute($name) != $value) {
$invalid = true;
}
}
}
// if every attribute given matched
if (!$invalid) {
$filtered[] = $node;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return false;
}
}
// filter by content
if ($options['content'] !== null) {
foreach ($nodes as $node) {
$invalid = false;
// match by regexp if like "regexp:/foo/i"
if (preg_match('/^regexp\s*:\s*(.*)/i', $options['content'], $matches)) {
if (!preg_match($matches[1], self::getNodeText($node))) {
$invalid = true;
}
} // match empty string
elseif ($options['content'] === '') {
if (self::getNodeText($node) !== '') {
$invalid = true;
}
} // match by exact string
elseif (strstr(self::getNodeText($node), $options['content']) === false) {
$invalid = true;
}
if (!$invalid) {
$filtered[] = $node;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return false;
}
}
// filter by parent node
if ($options['parent']) {
$parentNodes = self::findNodes($dom, $options['parent'], $isHtml);
$parentNode = isset($parentNodes[0]) ? $parentNodes[0] : null;
foreach ($nodes as $node) {
if ($parentNode !== $node->parentNode) {
continue;
}
$filtered[] = $node;
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return false;
}
}
// filter by child node
if ($options['child']) {
$childNodes = self::findNodes($dom, $options['child'], $isHtml);
$childNodes = !empty($childNodes) ? $childNodes : array();
foreach ($nodes as $node) {
foreach ($node->childNodes as $child) {
foreach ($childNodes as $childNode) {
if ($childNode === $child) {
$filtered[] = $node;
}
}
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return false;
}
}
// filter by adjacent-sibling
if ($options['adjacent-sibling']) {
$adjacentSiblingNodes = self::findNodes($dom, $options['adjacent-sibling'], $isHtml);
$adjacentSiblingNodes = !empty($adjacentSiblingNodes) ? $adjacentSiblingNodes : array();
foreach ($nodes as $node) {
$sibling = $node;
while ($sibling = $sibling->nextSibling) {
if ($sibling->nodeType !== XML_ELEMENT_NODE) {
continue;
}
foreach ($adjacentSiblingNodes as $adjacentSiblingNode) {
if ($sibling === $adjacentSiblingNode) {
$filtered[] = $node;
break;
}
}
break;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return false;
}
}
// filter by ancestor
if ($options['ancestor']) {
$ancestorNodes = self::findNodes($dom, $options['ancestor'], $isHtml);
$ancestorNode = isset($ancestorNodes[0]) ? $ancestorNodes[0] : null;
foreach ($nodes as $node) {
$parent = $node->parentNode;
while ($parent && $parent->nodeType != XML_HTML_DOCUMENT_NODE) {
if ($parent === $ancestorNode) {
$filtered[] = $node;
}
$parent = $parent->parentNode;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return false;
}
}
// filter by descendant
if ($options['descendant']) {
$descendantNodes = self::findNodes($dom, $options['descendant'], $isHtml);
$descendantNodes = !empty($descendantNodes) ? $descendantNodes : array();
foreach ($nodes as $node) {
foreach (self::getDescendants($node) as $descendant) {
foreach ($descendantNodes as $descendantNode) {
if ($descendantNode === $descendant) {
$filtered[] = $node;
}
}
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return false;
}
}
// filter by children
if ($options['children']) {
$validChild = array('count', 'greater_than', 'less_than', 'only');
$childOptions = self::assertValidKeys(
$options['children'],
$validChild
);
foreach ($nodes as $node) {
$childNodes = $node->childNodes;
foreach ($childNodes as $childNode) {
if ($childNode->nodeType !== XML_CDATA_SECTION_NODE &&
$childNode->nodeType !== XML_TEXT_NODE) {
$children[] = $childNode;
}
}
// we must have children to pass this filter
if (!empty($children)) {
// exact count of children
if ($childOptions['count'] !== null) {
if (count($children) !== $childOptions['count']) {
break;
}
} // range count of children
elseif ($childOptions['less_than'] !== null &&
$childOptions['greater_than'] !== null) {
if (count($children) >= $childOptions['less_than'] ||
count($children) <= $childOptions['greater_than']) {
break;
}
} // less than a given count
elseif ($childOptions['less_than'] !== null) {
if (count($children) >= $childOptions['less_than']) {
break;
}
} // more than a given count
elseif ($childOptions['greater_than'] !== null) {
if (count($children) <= $childOptions['greater_than']) {
break;
}
}
// match each child against a specific tag
if ($childOptions['only']) {
$onlyNodes = self::findNodes(
$dom,
$childOptions['only'],
$isHtml
);
// try to match each child to one of the 'only' nodes
foreach ($children as $child) {
$matched = false;
foreach ($onlyNodes as $onlyNode) {
if ($onlyNode === $child) {
$matched = true;
}
}
if (!$matched) {
break 2;
}
}
}
$filtered[] = $node;
}
}
$nodes = $filtered;
if (empty($nodes)) {
return;
}
}
// return the first node that matches all criteria
return !empty($nodes) ? $nodes : array();
}
/**
* Recursively get flat array of all descendants of this node.
*
* @param DOMNode $node
*
* @return array
*
* @since Method available since Release 3.3.0
*/
protected static function getDescendants(DOMNode $node)
{
$allChildren = array();
$childNodes = $node->childNodes ? $node->childNodes : array();
foreach ($childNodes as $child) {
if ($child->nodeType === XML_CDATA_SECTION_NODE ||
$child->nodeType === XML_TEXT_NODE) {
continue;
}
$children = self::getDescendants($child);
$allChildren = array_merge($allChildren, $children, array($child));
}
return isset($allChildren) ? $allChildren : array();
}
/**
* Gets elements by case insensitive tagname.
*
* @param DOMDocument $dom
* @param string $tag
*
* @return DOMNodeList
*
* @since Method available since Release 3.4.0
*/
protected static function getElementsByCaseInsensitiveTagName(DOMDocument $dom, $tag)
{
$elements = $dom->getElementsByTagName(strtolower($tag));
if ($elements->length == 0) {
$elements = $dom->getElementsByTagName(strtoupper($tag));
}
return $elements;
}
/**
* Get the text value of this node's child text node.
*
* @param DOMNode $node
*
* @return string
*
* @since Method available since Release 3.3.0
*/
protected static function getNodeText(DOMNode $node)
{
if (!$node->childNodes instanceof DOMNodeList) {
return '';
}
$result = '';
foreach ($node->childNodes as $childNode) {
if ($childNode->nodeType === XML_TEXT_NODE ||
$childNode->nodeType === XML_CDATA_SECTION_NODE) {
$result .= trim($childNode->data) . ' ';
} else {
$result .= self::getNodeText($childNode);
}
}
return str_replace(' ', ' ', $result);
}
}