Skip to content

Commit

Permalink
FileFetcher should always reflect use-local-file config (#4035)
Browse files Browse the repository at this point in the history
  • Loading branch information
paul-m committed Nov 3, 2023
1 parent 0f20b80 commit 2be99b4
Show file tree
Hide file tree
Showing 12 changed files with 487 additions and 60 deletions.
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"fmizzell/maquina": "^1.1.1",
"getdkan/contracts": "^1.0.0",
"getdkan/csv-parser": "^1.3.0",
"getdkan/file-fetcher" : "^4.2.1",
"getdkan/file-fetcher" : "^4.2.2",
"getdkan/harvest": "^1.0.0",
"getdkan/procrastinator": "^4.0.0",
"getdkan/rooted-json-data": "^0.1",
Expand Down
10 changes: 5 additions & 5 deletions modules/common/src/DataResource.php
Original file line number Diff line number Diff line change
Expand Up @@ -395,16 +395,16 @@ private static function getDistribution($identifier) {
*/
public function generateChecksum() {
try {
$this->checksum = md5_file($this->filePath);
$this->checksum = md5_file($this->getFilePath());
}
catch (\Throwable $throwable) {
// Re-throw the throwable if we're not in the perspective of a local file
// that doesn't exist. It's valid for a local file to not exist in some
// circumstances.
if (
$this->getPerspective() !== ResourceLocalizer::LOCAL_FILE_PERSPECTIVE ||
!file_exists($this->filePath)
) {
if (!(
$this->getPerspective() === ResourceLocalizer::LOCAL_FILE_PERSPECTIVE &&
!file_exists($this->getFilePath())
)) {
throw $throwable;
}
}
Expand Down
76 changes: 76 additions & 0 deletions modules/common/src/FileFetcher/DkanFileFetcher.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
<?php

namespace Drupal\common\FileFetcher;

use FileFetcher\FileFetcher;

/**
* Allows FileFetcher to be reconfigured for using existing local files.
*
* This DKAN-specific extension of the FileFetcher (which comes from an
* external library) applies the DKAN configuration
* common.settings.always_use_existing_local_perspective
* when selecting the processor. The configuration itself is retrieved
* in FileFetcherFactory and passed to DkanFileFetcher on getInstance().
*/
class DkanFileFetcher extends FileFetcher {

/**
* Tell this file fetcher whether to use local files if they exist.
*
* @param bool $use_local_file
* (optional) Whether to use the local file. If TRUE, we'll use the file
* processor that prefers to use local files. Defaults to TRUE.
*
* @return self
* Fluent interface.
*
* @see https://dkan.readthedocs.io/en/2.x/user-guide/guide_local_files.html
*/
public function setAlwaysUseExistingLocalPerspective(bool $use_local_file = TRUE) : self {
// @todo Re-computing the custom processor classes should be in another
// method that is in the parent class.
if ($use_local_file) {
$this->dkanUseLocalFileProcessor();
}
else {
$this->dkanUseDefaultFileProcessor();
}
return $this;
}

/**
* Configure the processor to respect the local file if it already exists.
*/
protected function dkanUseLocalFileProcessor() {
// Set the state/config to use our remote class.
$this->setProcessors(['processors' => [FileFetcherRemoteUseExisting::class]]);
$this->setStateProperty('processor', FileFetcherRemoteUseExisting::class);
// At this very early stage, update the status if the file already exists.
/** @var \Drupal\common\FileFetcher\FileFetcherRemoteUseExisting $processor */
$processor = $this->getProcessor();
$existing_status = $processor->discoverStatusForExistingFile(
$this->getState(),
$this->getResult()
);
$this->setState($existing_status['state']);
}

/**
* Configure the processor to use its default behavior.
*/
protected function dkanUseDefaultFileProcessor() {
// @todo This ignores any other custom processor classes that might have
// been configured. Improve this situation.
$this->customProcessorClasses = [];
$state = $this->getState();
foreach ($this->getProcessors() as $processor) {
if ($processor->isServerCompatible($state)) {
$state['processor'] = get_class($processor);
break;
}
}
$this->getResult()->setData(json_encode($state));
}

}
13 changes: 7 additions & 6 deletions modules/common/src/FileFetcher/FileFetcherFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,17 @@ public function __construct(JobStoreFactory $jobStoreFactory, ConfigFactoryInter
*/
public function getInstance(string $identifier, array $config = []) {
$config = array_merge($this->configDefault, $config);
// Use our bespoke file fetcher class that uses the existing file if we're
// configured to do so.
if ($this->dkanConfig->get('always_use_existing_local_perspective') ?? FALSE) {
$config['processors'] = [FileFetcherRemoteUseExisting::class];
}
return FileFetcher::get(
$file_fetcher = DkanFileFetcher::get(
$identifier,
$this->jobStoreFactory->getInstance(FileFetcher::class),
$config
);
// Inject our special configuration into the file fetcher, so it can use
// local files rather than re-downloading them.
$file_fetcher->setAlwaysUseExistingLocalPerspective(
(bool) $this->dkanConfig->get('always_use_existing_local_perspective')
);
return $file_fetcher;
}

}
23 changes: 21 additions & 2 deletions modules/common/src/FileFetcher/FileFetcherRemoteUseExisting.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,31 @@ class FileFetcherRemoteUseExisting extends Remote {
*/
public function copy(array $state, Result $result, int $timeLimit = PHP_INT_MAX): array {
// Always short-circuit if the file already exists.
$existing_status = $this->discoverStatusForExistingFile($state, $result);
if ($existing_status['result']->getStatus() === Result::DONE) {
return $existing_status;
}
return parent::copy($state, $result, $timeLimit);
}

/**
* Check for the existing file, setting state and result appropriately.
*
* @param array $state
* State.
* @param \Procrastinator\Result $result
* Result object.
*
* @return array
* Array of $state and $result. Appropriate for return from
* ProcessorInterface::copy.
*/
public function discoverStatusForExistingFile(array $state, Result $result): array {
if (file_exists($state['destination'])) {
$state['total_bytes_copied'] = $state['total_bytes'] = $this->getFilesize($state['destination']);
$result->setStatus(Result::DONE);
return ['state' => $state, 'result' => $result];
}
return parent::copy($state, $result, $timeLimit);
return ['state' => $state, 'result' => $result];
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?php

namespace Drupal\Tests\common\Unit\FileFetcher;

use Drupal\common\FileFetcher\FileFetcherRemoteUseExisting;
use org\bovigo\vfs\vfsStream;
use PHPUnit\Framework\TestCase;
use Procrastinator\Result;

/**
* @covers \Drupal\common\FileFetcher\FileFetcherRemoteUseExisting
* @coversDefaultClass \Drupal\common\FileFetcher\FileFetcherRemoteUseExisting
*/
class FileFetcherRemoteUseExistingTest extends TestCase {

/**
* @covers ::copy
*/
public function testCopy() {
$result = new Result();
$remote = new FileFetcherRemoteUseExisting();

// Set up a file system.
$root = vfsStream::setup();
$file = $root->url() . '/nine_bytes.csv';
$file_contents = '0123,4567';

// Config for processor.
$state = [
'destination' => $file,
'source' => 'https://example.com/bad_path.csv',
];

// Run it for error condition because the file doesn't already exist, so it
// will try to copy the source, but the source URL is bad.
$result_state = $remote->copy($state, $result);
$this->assertEquals(Result::ERROR, $result_state['result']->getStatus());

// Add existing file contents.
file_put_contents($file, $file_contents);

// Run it for re-use of existing file. This will succeed because the file
// is there.
$result_state = $remote->copy($state, $result);

$this->assertEquals(Result::DONE, $result_state['result']->getStatus());
$this->assertEquals(9, $result_state['state']['total_bytes']);
$this->assertEquals(9, $result_state['state']['total_bytes_copied']);
$this->assertStringEqualsFile($file, $file_contents);
}

}
109 changes: 72 additions & 37 deletions modules/datastore/tests/src/Functional/DictionaryEnforcerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,33 @@

use Drupal\datastore\Controller\ImportController;
use Drupal\metastore\DataDictionary\DataDictionaryDiscovery;
use Drupal\Tests\BrowserTestBase;
use Drupal\Tests\common\Traits\CleanUp;
use Drupal\Tests\common\Traits\GetDataTrait;
use Drupal\Tests\metastore\Unit\MetastoreServiceTest;

use RootedData\RootedJsonData;
use Symfony\Component\HttpFoundation\Request;
use weitzman\DrupalTestTraits\ExistingSiteBase;

/**
* DictionaryEnforcer QueueWorker test.
*
* @package Drupal\Tests\datastore\Functional
* @group datastore
* @group functional
* @group btb
*/
class DictionaryEnforcerTest extends ExistingSiteBase {
class DictionaryEnforcerTest extends BrowserTestBase {

use GetDataTrait, CleanUp;

protected $defaultTheme = 'stark';

protected static $modules = [
'datastore',
'node',
];

/**
* Uploaded resource file destination.
*
Expand Down Expand Up @@ -84,7 +94,7 @@ class DictionaryEnforcerTest extends ExistingSiteBase {
*
* @var \Drupal\datastore\Controller\ImportController
*/
protected $webServiceApi;
protected $importController;

/**
* External URL for the fixture CSV file.
Expand All @@ -100,30 +110,25 @@ public function setUp(): void {
parent::setUp();

// Initialize services.
$this->cron = \Drupal::service('cron');
$this->metastore = \Drupal::service('dkan.metastore.service');
$this->uuid = \Drupal::service('uuid');
$this->cron = $this->container->get('cron');
$this->metastore = $this->container->get('dkan.metastore.service');
$this->uuid = $this->container->get('uuid');
$this->validMetadataFactory = MetastoreServiceTest::getValidMetadataFactory($this);
$this->webServiceApi = ImportController::create(\Drupal::getContainer());
$this->datasetStorage = \Drupal::service('dkan.metastore.storage')
$this->importController = ImportController::create(\Drupal::getContainer());
$this->datasetStorage = $this->container->get('dkan.metastore.storage')
->getInstance('dataset');
// Copy resource file to uploads directory.
/** @var \Drupal\Core\File\FileSystemInterface $file_system */
$file_system = \Drupal::service('file_system');
$file_system = $this->container->get('file_system');
$upload_path = $file_system->realpath(self::UPLOAD_LOCATION);
$file_system->prepareDirectory($upload_path, FileSystemInterface::CREATE_DIRECTORY);
$file_system->copy(self::TEST_DATA_PATH . self::RESOURCE_FILE, $upload_path, FileSystemInterface::EXISTS_REPLACE);
// Create resource URL.
$this->resourceUrl = \Drupal::service('stream_wrapper_manager')
$this->resourceUrl = $this->container->get('stream_wrapper_manager')
->getViaUri(self::UPLOAD_LOCATION . self::RESOURCE_FILE)
->getExternalUrl();
}

public function tearDown(): void {
parent::tearDown();
$this->removeAllMappedFiles();
}

/**
* Test dictionary enforcement.
*/
Expand Down Expand Up @@ -177,40 +182,52 @@ public function testDictionaryEnforcement(): void {
];
$data_dict = $this->validMetadataFactory->get($this->getDataDictionary($fields, $indexes, $dict_id), 'data-dictionary');
// Create data-dictionary.
$this->metastore->post('data-dictionary', $data_dict);
$this->metastore->publish('data-dictionary', $dict_id);
$this->assertEquals(
$dict_id,
$this->metastore->post('data-dictionary', $data_dict)
);
// Publish should return FALSE, because the node was already published.
$this->assertFalse($this->metastore->publish('data-dictionary', $dict_id));

// Set global data-dictinary in metastore config.
$metastore_config = \Drupal::configFactory()
->getEditable('metastore.settings');
$metastore_config->set('data_dictionary_mode', DataDictionaryDiscovery::MODE_SITEWIDE);
$metastore_config->set('data_dictionary_sitewide', $dict_id);
$metastore_config->save();
$metastore_config = $this->config('metastore.settings');
$metastore_config->set('data_dictionary_mode', DataDictionaryDiscovery::MODE_SITEWIDE)
->set('data_dictionary_sitewide', $dict_id)
->save();

// Build dataset.
$dataset_id = $this->uuid->generate();
$dataset = $this->validMetadataFactory->get($this->getDataset($dataset_id, 'Test ' . $dataset_id, [$this->resourceUrl], TRUE), 'dataset');
$this->assertInstanceOf(
RootedJsonData::class,
$dataset = $this->validMetadataFactory->get(
$this->getDataset($dataset_id, 'Test ' . $dataset_id, [$this->resourceUrl], TRUE),
'dataset'
)
);
// Create dataset.
$this->metastore->post('dataset', $dataset);
$this->metastore->publish('dataset', $dataset_id);
$this->assertEquals($dataset_id, $this->metastore->post('dataset', $dataset));
// Publish should return FALSE, because the node was already published.
$this->assertFalse($this->metastore->publish('dataset', $dataset_id));

// Run cron to import dataset into datastore.
$this->cron->run();
// Run cron to apply data-dictionary.
$this->cron->run();
// Run queue items to perform the import.
$this->runQueues(['datastore_import', 'post_import']);

// Retrieve dataset distribution ID.
$dataset = $this->metastore->get('dataset', $dataset_id);
$dist_id = $dataset->{'$["%Ref:distribution"][0].identifier'};
// Build mock request.
$request = Request::create('http://blah/api');
$this->assertInstanceOf(
RootedJsonData::class,
$dataset = $this->metastore->get('dataset', $dataset_id)
);
$this->assertNotEmpty(
$dist_id = $dataset->{'$["%Ref:distribution"][0].identifier'} ?? NULL
);
// Retrieve schema for dataset resource.
$response = $this->webServiceApi->summary($dist_id, $request);
$response = $this->importController->summary(
$dist_id,
Request::create('http://blah/api')
);
$this->assertEquals(200, $response->getStatusCode(), $response->getContent());
$result = json_decode($response->getContent(), TRUE);

// Clean up after ourselves, before performing the assertion.
$this->metastore->delete('dataset', $dataset_id);

// Validate schema.
$this->assertEquals([
'numOfColumns' => 6,
Expand Down Expand Up @@ -266,4 +283,22 @@ public function testDictionaryEnforcement(): void {
], $result);
}

/**
* Process queues in a predictable order.
*/
private function runQueues(array $relevantQueues = []) {
/** @var \Drupal\Core\Queue\QueueWorkerManager $queueWorkerManager */
$queueWorkerManager = \Drupal::service('plugin.manager.queue_worker');
/** @var \Drupal\Core\Queue\QueueFactory $queueFactory */
$queueFactory = $this->container->get('queue');
foreach ($relevantQueues as $queueName) {
$worker = $queueWorkerManager->createInstance($queueName);
$queue = $queueFactory->get($queueName);
while ($item = $queue->claimItem()) {
$worker->processItem($item->data);
$queue->deleteItem($item);
}
}
}

}

0 comments on commit 2be99b4

Please sign in to comment.