Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add highlight methods to search and search builder #349

Draft
wants to merge 4 commits into
base: 0.6
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion packages/seal-memory-adapter/src/MemorySearcher.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ public function search(Search $search): Result
{
$documents = [];

$searchTerms = [];

/** @var Index $index */
foreach ($search->indexes as $index) {
foreach (MemoryStorage::getDocuments($index) as $identifier => $document) {
Expand All @@ -55,6 +57,7 @@ public function search(Search $search): Result

$text = \json_encode($searchableDocument, \JSON_THROW_ON_ERROR);
$terms = \explode(' ', $filter->query);
$searchTerms = \array_unique([...$searchTerms, ...$terms]);

foreach ($terms as $term) {
if (!\str_contains($text, $term)) {
Expand Down Expand Up @@ -170,8 +173,40 @@ public function search(Search $search): Result

$documents = \array_slice($documents, $search->offset, $search->limit);

$generator = (function () use ($documents): \Generator {
$generator = (function () use ($documents, $search, $searchTerms): \Generator {
foreach ($documents as $document) {
foreach ($search->highlightFields as $highlightField) {
$highlightFieldContent = \json_encode($document[$highlightField], \JSON_THROW_ON_ERROR);
foreach ($searchTerms as $searchTerm) {
$highlightFieldContent = \str_replace(
$searchTerm,
$search->highlightPreTag . $searchTerm . $search->highlightPostTag,
$highlightFieldContent,
);
}

$highlightFieldContent = \str_replace(
$search->highlightPostTag . $search->highlightPostTag,
'',
$highlightFieldContent,
);

$highlightFieldContent = \str_replace(
$search->highlightPostTag . ' ' . $search->highlightPostTag,
' ',
$highlightFieldContent,
);

$document['_formatted'] ??= [];

\assert(
\is_array($document['_formatted']),
'Document with key "_formatted" expected to be array.',
);

$document['_formatted'][$highlightField] = \json_decode($highlightFieldContent, true, 512, \JSON_THROW_ON_ERROR);
}

yield $document;
}
});
Expand Down
2 changes: 1 addition & 1 deletion packages/seal-solr-adapter/src/SolrSchemaManager.php
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ private function createIndexFields(array $fields, string $prefix = '', bool $isP
'type' => $field->searchable ? 'text_general' : 'string',
'indexed' => $field->searchable,
'docValues' => $field->filterable || $field->sortable,
'stored' => false,
'stored' => true, // required to be set to stored for highlighting
'useDocValuesAsStored' => false,
'multiValued' => $isMultiple,
],
Expand Down
46 changes: 41 additions & 5 deletions packages/seal-solr-adapter/src/SolrSearcher.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
use Schranz\Search\SEAL\Search\Result;
use Schranz\Search\SEAL\Search\Search;
use Solarium\Client;
use Solarium\Component\Result\Highlighting\Highlighting;
use Solarium\Core\Query\DocumentInterface;
use Solarium\Core\Query\Helper;

Expand Down Expand Up @@ -116,10 +117,17 @@ public function search(Search $search): Result
$query->addSort($field, $direction);
}

if ([] !== $search->highlightFields) {
$highlighting = $query->getHighlighting();
$highlighting->setFields(\implode(', ', $search->highlightFields));
$highlighting->setSimplePrefix($search->highlightPreTag);
$highlighting->setSimplePostfix($search->highlightPostTag);
}

$result = $this->client->select($query);

return new Result(
$this->hitsToDocuments($search->indexes, $result->getDocuments()),
$this->hitsToDocuments($search->indexes, $result->getDocuments(), $result->getHighlighting()),
(int) $result->getNumFound(),
);
}
Expand All @@ -130,7 +138,7 @@ public function search(Search $search): Result
*
* @return \Generator<int, array<string, mixed>>
*/
private function hitsToDocuments(array $indexes, iterable $hits): \Generator
private function hitsToDocuments(array $indexes, iterable $hits, ?Highlighting $highlighting = null): \Generator
{
$index = $indexes[\array_key_first($indexes)];

Expand All @@ -139,16 +147,44 @@ private function hitsToDocuments(array $indexes, iterable $hits): \Generator
$hit = $hit->getFields();

unset($hit['_version_']);
$identifierFieldName = $index->getIdentifierField()->name;

if ('id' !== $index->getIdentifierField()->name) {
if ('id' !== $identifierFieldName) {
// Solr currently does not support set another identifier then id: https://github.com/schranz-search/schranz-search/issues/87
$id = $hit['id'];
unset($hit['id']);

$hit[$index->getIdentifierField()->name] = $id;
$hit[$identifierFieldName] = $id;
}

$document = $this->marshaller->unmarshall($index->fields, $hit);

if ($highlighting instanceof \Solarium\Component\Result\Highlighting\Highlighting) {
$highlightResult = $highlighting->getResult($hit[$identifierFieldName]);
\assert(
$highlightResult instanceof \Solarium\Component\Result\Highlighting\Result,
'Expected the highlighting exists.',
);

$document['_formatted'] ??= [];

\assert(
\is_array($document['_formatted']),
'Document with key "_formatted" expected to be array.',
);

foreach ($highlightResult->getFields() as $key => $value) {
$fieldConfig = $index->getFieldByPath($key);
// even non-multiple fields are returned as array we need to convert them to string
if (!$fieldConfig->multiple && \is_array($value)) {
$value = \implode(' ', $value);
}

$document['_formatted'][$key] = $value;
}
}

yield $this->marshaller->unmarshall($index->fields, $hit);
yield $document;
}
}

Expand Down
39 changes: 35 additions & 4 deletions packages/seal-typesense-adapter/src/TypesenseSearcher.php
Original file line number Diff line number Diff line change
Expand Up @@ -104,27 +104,58 @@ public function search(Search $search): Result
$searchParams['sort_by'] = \implode(',', $sortBys);
}

if ([] !== $search->highlightFields) {
$searchParams['highlight_fields'] = \implode(', ', $search->highlightFields);
$searchParams['highlight_start_tag'] = $search->highlightPreTag;
$searchParams['highlight_end_tag'] = $search->highlightPostTag;
}

$data = $this->client->collections[$index->name]->documents->search($searchParams);

return new Result(
$this->hitsToDocuments($search->indexes, $data['hits']),
$this->hitsToDocuments($search->indexes, $data['hits'], $search->highlightFields),
$data['found'] ?? null,
);
}

/**
* @param Index[] $indexes
* @param iterable<array<string, mixed>> $hits
* @param array<string> $highlightFields
*
* @return \Generator<int, array<string, mixed>>
*/
private function hitsToDocuments(array $indexes, iterable $hits): \Generator
private function hitsToDocuments(array $indexes, iterable $hits, array $highlightFields = []): \Generator
{
$index = $indexes[\array_key_first($indexes)];

/** @var array{document: array<string, mixed>} $hit */
/** @var array{document: array<string, mixed>, highlight?: array<string, array{snippet: string}>} $hit */
foreach ($hits as $hit) {
yield $this->marshaller->unmarshall($index->fields, $hit['document']);
$document = $this->marshaller->unmarshall($index->fields, $hit['document']);

if ([] === $highlightFields) {
yield $document;

continue;
}

$document['_formatted'] ??= [];

\assert(
\is_array($document['_formatted']),
'Document with key "_formatted" expected to be array.',
);

foreach ($highlightFields as $highlightField) {
\assert(
isset($hit['highlight'][$highlightField]['snippet']),
'Expected highlight field to be set.',
);

$document['_formatted'][$highlightField] = $hit['highlight'][$highlightField]['snippet'];
}

yield $document;
}
}

Expand Down
4 changes: 4 additions & 0 deletions packages/seal/src/Search/Search.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,17 @@ final class Search
* @param array<string, Index> $indexes
* @param object[] $filters
* @param array<string, 'asc'|'desc'> $sortBys
* @param array<string> $highlightFields
*/
public function __construct(
public readonly array $indexes = [],
public readonly array $filters = [],
public readonly array $sortBys = [],
public readonly ?int $limit = null,
public readonly int $offset = 0,
public readonly array $highlightFields = [],
public readonly string $highlightPreTag = '<mark>',
public readonly string $highlightPostTag = '</mark>',
) {
}
}
29 changes: 29 additions & 0 deletions packages/seal/src/Search/SearchBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,15 @@ final class SearchBuilder

private ?int $limit = null;

/**
* @var array<string>
*/
private array $highlightFields = [];

private string $highlightPreTag = '<mark>';

private string $highlightPostTag = '</mark>';

public function __construct(
readonly private Schema $schema,
readonly private SearcherInterface $searcher,
Expand Down Expand Up @@ -82,6 +91,23 @@ public function offset(int $offset): static
return $this;
}

/**
* @param array<string> $fields
*/
public function highlight(array $fields, string $preTag = '<mark>', string $postTag = '</mark>'): static
{
$this->highlightFields = $fields;
$this->highlightPreTag = $preTag;
$this->highlightPostTag = $postTag;

return $this;
}

public function getSearcher(): SearcherInterface
{
return $this->searcher;
}

public function getSearch(): Search
{
return new Search(
Expand All @@ -90,6 +116,9 @@ public function getSearch(): Search
$this->sortBys,
$this->limit,
$this->offset,
$this->highlightFields,
$this->highlightPreTag,
$this->highlightPostTag,
);
}

Expand Down
67 changes: 67 additions & 0 deletions packages/seal/src/Testing/AbstractSearcherTestCase.php
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,73 @@ public function testSearchCondition(): void
}
}

public function testSearchConditionWithHighlight(): void
{
$documents = TestingHelper::createComplexFixtures();

$schema = self::getSchema();

foreach ($documents as $document) {
self::$taskHelper->tasks[] = self::$indexer->save(
$schema->indexes[TestingHelper::INDEX_COMPLEX],
$document,
['return_slow_promise_result' => true],
);
}
self::$taskHelper->waitForAll();

$search = new SearchBuilder($schema, self::$searcher);
$search->addIndex(TestingHelper::INDEX_COMPLEX);
$search->addFilter(new Condition\SearchCondition('Blog'));
$search->highlight(['title'], '<mark>', '</mark>');


$expectedDocumentA = $documents[0];
$expectedDocumentA['_formatted']['title'] = \str_replace(
'Blog',
'<mark>Blog</mark>',
$expectedDocumentA['title'],
);
$expectedDocumentB = $documents[1];
$expectedDocumentB['_formatted']['title'] = \str_replace(
'Blog',
'<mark>Blog</mark>',
$expectedDocumentB['title'],
);

$expectedDocumentsVariantA = [
$expectedDocumentA,
$expectedDocumentB,
];
$expectedDocumentsVariantB = [
$expectedDocumentB,
$expectedDocumentA,
];

$loadedDocuments = [...$search->getResult()];
$this->assertCount(2, $loadedDocuments);

$this->assertTrue(
$expectedDocumentsVariantA === $loadedDocuments
|| $expectedDocumentsVariantB === $loadedDocuments,
'Not correct documents where found.',
);

$search = new SearchBuilder($schema, self::$searcher);
$search->addIndex(TestingHelper::INDEX_COMPLEX);
$search->addFilter(new Condition\SearchCondition('Thing'));

$this->assertSame([$documents[2]], [...$search->getResult()]);

foreach ($documents as $document) {
self::$taskHelper->tasks[] = self::$indexer->delete(
$schema->indexes[TestingHelper::INDEX_COMPLEX],
$document['uuid'],
['return_slow_promise_result' => true],
);
}
}

public function testNoneSearchableFields(): void
{
$documents = TestingHelper::createComplexFixtures();
Expand Down
Loading