Skip to content

Commit

Permalink
Fix parsing of latest version of classic Google search (#111)
Browse files Browse the repository at this point in the history
* Fix parsing of latest version of classic Google search

Closes #105
Closes #110

* Typo fix

* Code cleanup
  • Loading branch information
msiemens authored and gsouf committed Sep 17, 2018
1 parent 6ef29cb commit c6f260e
Show file tree
Hide file tree
Showing 3 changed files with 918 additions and 4 deletions.
20 changes: 16 additions & 4 deletions src/Parser/Evaluated/Rule/Natural/Classical/ClassicalResult.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,23 +32,35 @@ protected function parseNode(GoogleDom $dom, \DomElement $node)

// find the title/url
/* @var $aTag \DOMElement */
$aTag=$dom
->xpathQuery("descendant::h3[@class='r'][1]/a", $node)
$aTag = $dom
->xpathQuery("descendant::*[(self::div or self::h3) and @class='r'][1]/a", $node)
->item(0);
if (!$aTag) {
throw new InvalidDOMException('Cannot parse a classical result.');
}

/* @var $h3Tag \DOMElement */
$h3Tag = $dom
->xpathQuery('descendant::h3', $node)
->item(0);
if (!$h3Tag) {
throw new InvalidDOMException('Cannot parse a classical result.');
}

$destinationTag = $dom
->cssQuery('div.f cite', $node)
->cssQuery('div.f cite, div.TbwUpd cite', $node)
->getNodeAt(0);

if (is_a($destinationTag, Serps\Core\Dom\NullDomNode::class)) {
throw new InvalidDOMException('Cannot parse a classical result.');
}

$descriptionTag = $dom
->xpathQuery("descendant::span[@class='st']", $node)
->item(0);

return [
'title' => $aTag->nodeValue,
'title' => $h3Tag->nodeValue,
'url' => $dom->getUrl()->resolveAsString($aTag->getAttribute('href')),
'destination' => $destinationTag->getNodeValue(),
// trim needed for mobile results coming with an initial space
Expand Down

Large diffs are not rendered by default.

Loading

0 comments on commit c6f260e

Please sign in to comment.