diff --git a/README.md b/README.md index 0e13b28..b62583a 100644 --- a/README.md +++ b/README.md @@ -185,6 +185,14 @@ $event->scrapeRequest->url // Url scraped $event->scrapeRequest->type // Request type ``` +## Advanced usage + +There is another event named `ConfigurationScraped` that is triggered when a scrape is done automatically in the +reconfiguration step. It is exactly the same than `Scraped` event. It is named differently because usually it is not +interesting to use it apart from internally to update the dataset. + +`ConfigurationScraped` can be used to do updates or to know internals about the configuration process. + ### Queue workers You need to workers, one for the default queue and another for the `configure` queue. The `configure` worker diff --git a/src/Scraper/Application/Configurator.php b/src/Scraper/Application/Configurator.php index 72b62aa..09e787b 100644 --- a/src/Scraper/Application/Configurator.php +++ b/src/Scraper/Application/Configurator.php @@ -5,6 +5,8 @@ use Goutte\Client; use Illuminate\Support\Collection; use Illuminate\Support\Facades\Log; +use Softonic\LaravelIntelligentScraper\Scraper\Events\ConfigurationScraped; +use Softonic\LaravelIntelligentScraper\Scraper\Events\ScrapeRequest; use Softonic\LaravelIntelligentScraper\Scraper\Exceptions\ConfigurationException; use Softonic\LaravelIntelligentScraper\Scraper\Models\Configuration; use Softonic\LaravelIntelligentScraper\Scraper\Models\ScrapedDataset; @@ -114,7 +116,14 @@ private function findConfigByScrapedData($scrapedData, $crawler, $currentConfigu } } - $this->updateVariant($scrapedData); + event(new ConfigurationScraped( + new ScrapeRequest( + $scrapedData['url'], + $scrapedData['type'] + ), + $scrapedData['data'], + $this->variantGenerator->getId($scrapedData['type']) + )); return $result; } @@ -175,10 +184,4 @@ private function checkConfiguration($data, Collection $finalConfig) throw new ConfigurationException("Field(s) \"{$fieldsMissing}\" not found.", 0); } } - - private function updateVariant($scrapedData): void - { - $scrapedData['variant'] = $this->variantGenerator->getId($scrapedData['type']); - $scrapedData->save(); - } } diff --git a/src/Scraper/Events/ConfigurationScraped.php b/src/Scraper/Events/ConfigurationScraped.php new file mode 100644 index 0000000..356c4f9 --- /dev/null +++ b/src/Scraper/Events/ConfigurationScraped.php @@ -0,0 +1,7 @@ + [ UpdateDataset::class, ], + ConfigurationScraped::class => [ + UpdateDataset::class, + ], ]; /** diff --git a/tests/Unit/Scraper/Application/ConfiguratorTest.php b/tests/Unit/Scraper/Application/ConfiguratorTest.php index ddbd9a6..c45b422 100644 --- a/tests/Unit/Scraper/Application/ConfiguratorTest.php +++ b/tests/Unit/Scraper/Application/ConfiguratorTest.php @@ -6,6 +6,7 @@ use Illuminate\Foundation\Testing\DatabaseMigrations; use Illuminate\Support\Facades\Log; use Mockery\Mock; +use Softonic\LaravelIntelligentScraper\Scraper\Events\ConfigurationScraped; use Softonic\LaravelIntelligentScraper\Scraper\Exceptions\ConfigurationException; use Softonic\LaravelIntelligentScraper\Scraper\Models\Configuration as ConfigurationModel; use Softonic\LaravelIntelligentScraper\Scraper\Models\ScrapedDataset; @@ -150,18 +151,18 @@ public function whenTryToFindNewXpathButNotFoundItShouldLogItAndResetVariant() $this->variantGenerator->shouldReceive('fieldNotFound') ->once(); $this->variantGenerator->shouldReceive('getId') - ->andReturnNull(); + ->andReturn(''); Log::shouldReceive('warning') ->with("Field 'author' with value 'My author' not found for 'https://test.c/123456789012'."); + $this->expectsEvents(ConfigurationScraped::class); + try { $this->configurator->configureFromDataset($posts); } catch (ConfigurationException $e) { $this->assertEquals('Field(s) "author" not found.', $e->getMessage()); } - - $this->assertNull($posts[0]['variant']); } /** @@ -225,18 +226,18 @@ public function whenUseSomeOldXpathButNotFoundNewsItShouldLogItAndResetVariant() $this->variantGenerator->shouldReceive('fieldNotFound') ->once(); $this->variantGenerator->shouldReceive('getId') - ->andReturnNull(); + ->andReturn(''); Log::shouldReceive('warning') ->with("Field 'author' with value 'My author' not found for 'https://test.c/123456789012'."); + $this->expectsEvents(ConfigurationScraped::class); + try { $this->configurator->configureFromDataset($posts); } catch (ConfigurationException $e) { $this->assertEquals('Field(s) "author" not found.', $e->getMessage()); } - - $this->assertNull($posts[0]['variant']); } /** @@ -306,7 +307,7 @@ public function whenTryToFindXpathInMultiplepostsAndNotFoundInAnyItShouldThrowAn $this->variantGenerator->shouldReceive('fieldNotFound') ->times(4); $this->variantGenerator->shouldReceive('getId') - ->andReturnNull(); + ->andReturn(''); Log::shouldReceive('warning') ->with("Field 'title' with value 'My Title' not found for 'https://test.c/123456789012'."); @@ -314,14 +315,13 @@ public function whenTryToFindXpathInMultiplepostsAndNotFoundInAnyItShouldThrowAn Log::shouldReceive('warning') ->with("Field 'author' with value 'My author' not found for 'https://test.c/123456789012'."); + $this->expectsEvents(ConfigurationScraped::class); + try { $this->configurator->configureFromDataset($posts); } catch (ConfigurationException $e) { $this->assertEquals('Field(s) "title,author" not found.', $e->getMessage()); } - - $this->assertNull($posts[0]['variant']); - $this->assertNull($posts[1]['variant']); } /** @@ -413,6 +413,8 @@ public function whenDiscoverDifferentXpathItShouldGetAllOfThemAndUpdateTheVarian $this->variantGenerator->shouldReceive('getId') ->andReturn(10, 20, 30); + $this->expectsEvents(ConfigurationScraped::class); + $configurations = $this->configurator->configureFromDataset($posts); $this->assertInstanceOf(ConfigurationModel::class, $configurations[0]); @@ -436,9 +438,5 @@ public function whenDiscoverDifferentXpathItShouldGetAllOfThemAndUpdateTheVarian ], array_values($configurations[1]['xpaths']) ); - - $this->assertEquals($posts[0]['variant'], 10); - $this->assertEquals($posts[1]['variant'], 20); - $this->assertEquals($posts[2]['variant'], 30); } }