Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Generic file-based datastore code.""" 

23 

24__all__ = ("FileLikeDatastore", ) 

25 

26import logging 

27from abc import abstractmethod 

28 

29from sqlalchemy import Integer, String 

30 

31from dataclasses import dataclass 

32from typing import Optional, List, Type 

33 

34from lsst.daf.butler import ( 

35 Config, 

36 FileDataset, 

37 DatasetRef, 

38 DatasetTypeNotSupportedError, 

39 Datastore, 

40 DatastoreConfig, 

41 DatastoreValidationError, 

42 FileDescriptor, 

43 FileTemplates, 

44 FileTemplateValidationError, 

45 Formatter, 

46 FormatterFactory, 

47 Location, 

48 LocationFactory, 

49 StorageClass, 

50 StoredFileInfo, 

51) 

52 

53from lsst.daf.butler import ddl 

54from lsst.daf.butler.registry.interfaces import ReadOnlyDatabaseError 

55 

56from lsst.daf.butler.core.repoRelocation import replaceRoot 

57from lsst.daf.butler.core.utils import getInstanceOf, NamedValueSet, getClassOf, transactional 

58from .genericDatastore import GenericBaseDatastore 

59 

60log = logging.getLogger(__name__) 

61 

62 

63class _IngestPrepData(Datastore.IngestPrepData): 

64 """Helper class for FileLikeDatastore ingest implementation. 

65 

66 Parameters 

67 ---------- 

68 datasets : `list` of `FileDataset` 

69 Files to be ingested by this datastore. 

70 """ 

71 def __init__(self, datasets: List[FileDataset]): 

72 super().__init__(ref for dataset in datasets for ref in dataset.refs) 

73 self.datasets = datasets 

74 

75 

76@dataclass(frozen=True) 

77class DatastoreFileGetInformation: 

78 """Collection of useful parameters needed to retrieve a file from 

79 a Datastore. 

80 """ 

81 

82 location: Location 

83 """The location from which to read the dataset.""" 

84 

85 formatter: Formatter 

86 """The `Formatter` to use to deserialize the dataset.""" 

87 

88 info: StoredFileInfo 

89 """Stored information about this file and its formatter.""" 

90 

91 assemblerParams: dict 

92 """Parameters to use for post-processing the retrieved dataset.""" 

93 

94 component: Optional[str] 

95 """The component to be retrieved (can be `None`).""" 

96 

97 readStorageClass: StorageClass 

98 """The `StorageClass` of the dataset being read.""" 

99 

100 

101class FileLikeDatastore(GenericBaseDatastore): 

102 """Generic Datastore for file-based implementations. 

103 

104 Should always be sub-classed since key abstract methods are missing. 

105 

106 Parameters 

107 ---------- 

108 config : `DatastoreConfig` or `str` 

109 Configuration as either a `Config` object or URI to file. 

110 

111 Raises 

112 ------ 

113 ValueError 

114 If root location does not exist and ``create`` is `False` in the 

115 configuration. 

116 """ 

117 

118 defaultConfigFile = None 

119 """Path to configuration defaults. Relative to $DAF_BUTLER_DIR/config or 

120 absolute path. Can be None if no defaults specified. 

121 """ 

122 

123 root: str 

124 """Root directory or URI of this `Datastore`.""" 

125 

126 locationFactory: LocationFactory 

127 """Factory for creating locations relative to the datastore root.""" 

128 

129 formatterFactory: FormatterFactory 

130 """Factory for creating instances of formatters.""" 

131 

132 templates: FileTemplates 

133 """File templates that can be used by this `Datastore`.""" 

134 

135 @classmethod 

136 def setConfigRoot(cls, root, config, full, overwrite=True): 

137 """Set any filesystem-dependent config options for this Datastore to 

138 be appropriate for a new empty repository with the given root. 

139 

140 Parameters 

141 ---------- 

142 root : `str` 

143 URI to the root of the data repository. 

144 config : `Config` 

145 A `Config` to update. Only the subset understood by 

146 this component will be updated. Will not expand 

147 defaults. 

148 full : `Config` 

149 A complete config with all defaults expanded that can be 

150 converted to a `DatastoreConfig`. Read-only and will not be 

151 modified by this method. 

152 Repository-specific options that should not be obtained 

153 from defaults when Butler instances are constructed 

154 should be copied from ``full`` to ``config``. 

155 overwrite : `bool`, optional 

156 If `False`, do not modify a value in ``config`` if the value 

157 already exists. Default is always to overwrite with the provided 

158 ``root``. 

159 

160 Notes 

161 ----- 

162 If a keyword is explicitly defined in the supplied ``config`` it 

163 will not be overridden by this method if ``overwrite`` is `False`. 

164 This allows explicit values set in external configs to be retained. 

165 """ 

166 Config.updateParameters(DatastoreConfig, config, full, 

167 toUpdate={"root": root}, 

168 toCopy=("cls", ("records", "table")), overwrite=overwrite) 

169 

170 @classmethod 

171 def makeTableSpec(cls): 

172 return ddl.TableSpec( 

173 fields=NamedValueSet([ 

174 ddl.FieldSpec(name="dataset_id", dtype=Integer, primaryKey=True), 

175 ddl.FieldSpec(name="path", dtype=String, length=256, nullable=False), 

176 ddl.FieldSpec(name="formatter", dtype=String, length=128, nullable=False), 

177 ddl.FieldSpec(name="storage_class", dtype=String, length=64, nullable=False), 

178 # TODO: should checksum be Base64Bytes instead? 

179 ddl.FieldSpec(name="checksum", dtype=String, length=128, nullable=True), 

180 ddl.FieldSpec(name="file_size", dtype=Integer, nullable=True), 

181 ]), 

182 unique=frozenset(), 

183 ) 

184 

185 def __init__(self, config, registry, butlerRoot=None): 

186 super().__init__(config, registry) 

187 if "root" not in self.config: 187 ↛ 188line 187 didn't jump to line 188, because the condition on line 187 was never true

188 raise ValueError("No root directory specified in configuration") 

189 

190 # Name ourselves either using an explicit name or a name 

191 # derived from the (unexpanded) root 

192 if "name" in self.config: 

193 self.name = self.config["name"] 

194 else: 

195 # We use the unexpanded root in the name to indicate that this 

196 # datastore can be moved without having to update registry. 

197 self.name = "{}@{}".format(type(self).__name__, 

198 self.config["root"]) 

199 

200 # Support repository relocation in config 

201 # Existence of self.root is checked in subclass 

202 self.root = replaceRoot(self.config["root"], butlerRoot) 

203 

204 self.locationFactory = LocationFactory(self.root) 

205 self.formatterFactory = FormatterFactory() 

206 

207 # Now associate formatters with storage classes 

208 self.formatterFactory.registerFormatters(self.config["formatters"], 

209 universe=self.registry.dimensions) 

210 

211 # Read the file naming templates 

212 self.templates = FileTemplates(self.config["templates"], 

213 universe=self.registry.dimensions) 

214 

215 # Storage of paths and formatters, keyed by dataset_id 

216 self._tableName = self.config["records", "table"] 

217 try: 

218 registry.registerOpaqueTable(self._tableName, self.makeTableSpec()) 

219 except ReadOnlyDatabaseError: 

220 # If the database is read only and we just tried and failed to 

221 # create a table, it means someone is trying to create a read-only 

222 # butler client for an empty repo. That should be okay, as long 

223 # as they then try to get any datasets before some other client 

224 # creates the table. Chances are they'rejust validating 

225 # configuration. 

226 pass 

227 

228 # Determine whether checksums should be used 

229 self.useChecksum = self.config.get("checksum", True) 

230 

231 def __str__(self): 

232 return self.root 

233 

234 @abstractmethod 

235 def _artifact_exists(self, location): 

236 """Check that an artifact exists in this datastore at the specified 

237 location. 

238 

239 Parameters 

240 ---------- 

241 location : `Location` 

242 Expected location of the artifact associated with this datastore. 

243 

244 Returns 

245 ------- 

246 exists : `bool` 

247 True if the location can be found, false otherwise. 

248 """ 

249 raise NotImplementedError() 

250 

251 @abstractmethod 

252 def _delete_artifact(self, location): 

253 """Delete the artifact from the datastore. 

254 

255 Parameters 

256 ---------- 

257 location : `Location` 

258 Location of the artifact associated with this datastore. 

259 """ 

260 raise NotImplementedError() 

261 

262 def addStoredItemInfo(self, refs, infos): 

263 # Docstring inherited from GenericBaseDatastore 

264 records = [] 

265 for ref, info in zip(refs, infos): 

266 records.append( 

267 dict(dataset_id=ref.id, formatter=info.formatter, path=info.path, 

268 storage_class=info.storageClass.name, 

269 checksum=info.checksum, file_size=info.file_size) 

270 ) 

271 self.registry.insertOpaqueData(self._tableName, *records) 

272 

273 def getStoredItemInfo(self, ref): 

274 # Docstring inherited from GenericBaseDatastore 

275 records = list(self.registry.fetchOpaqueData(self._tableName, dataset_id=ref.id)) 

276 if len(records) == 0: 

277 raise KeyError(f"Unable to retrieve location associated with dataset {ref}.") 

278 assert len(records) == 1, "Primary key constraint should make more than one result impossible." 

279 record = records[0] 

280 # Convert name of StorageClass to instance 

281 storageClass = self.storageClassFactory.getStorageClass(record["storage_class"]) 

282 return StoredFileInfo(formatter=record["formatter"], 

283 path=record["path"], 

284 storageClass=storageClass, 

285 checksum=record["checksum"], 

286 file_size=record["file_size"]) 

287 

288 def _registered_refs_per_artifact(self, pathInStore): 

289 """Return all dataset refs associated with the supplied path. 

290 

291 Parameters 

292 ---------- 

293 pathInStore : `str` 

294 Path of interest in the data store. 

295 

296 Returns 

297 ------- 

298 ids : `set` of `int` 

299 All `DatasetRef` IDs associated with this path. 

300 """ 

301 records = list(self.registry.fetchOpaqueData(self._tableName, path=pathInStore)) 

302 ids = {r["dataset_id"] for r in records} 

303 return ids 

304 

305 def removeStoredItemInfo(self, ref): 

306 # Docstring inherited from GenericBaseDatastore 

307 self.registry.deleteOpaqueData(self._tableName, dataset_id=ref.id) 

308 

309 def _get_dataset_location_info(self, ref): 

310 """Find the `Location` of the requested dataset in the 

311 `Datastore` and the associated stored file information. 

312 

313 Parameters 

314 ---------- 

315 ref : `DatasetRef` 

316 Reference to the required `Dataset`. 

317 

318 Returns 

319 ------- 

320 location : `Location` 

321 Location of the dataset within the datastore. 

322 Returns `None` if the dataset can not be located. 

323 info : `StoredFileInfo` 

324 Stored information about this file and its formatter. 

325 """ 

326 # Get the file information (this will fail if no file) 

327 try: 

328 storedFileInfo = self.getStoredItemInfo(ref) 

329 except KeyError: 

330 return None, None 

331 

332 # Use the path to determine the location 

333 location = self.locationFactory.fromPath(storedFileInfo.path) 

334 

335 return location, storedFileInfo 

336 

337 def _can_remove_dataset_artifact(self, ref): 

338 """Check that there is only one dataset associated with the 

339 specified artifact. 

340 

341 Parameters 

342 ---------- 

343 ref : `DatasetRef` 

344 Dataset to be removed. 

345 

346 Returns 

347 ------- 

348 can_remove : `Bool` 

349 True if the artifact can be safely removed. 

350 """ 

351 storedFileInfo = self.getStoredItemInfo(ref) 

352 

353 # Get all entries associated with this path 

354 allRefs = self._registered_refs_per_artifact(storedFileInfo.path) 

355 if not allRefs: 355 ↛ 356line 355 didn't jump to line 356, because the condition on line 355 was never true

356 raise RuntimeError(f"Datastore inconsistency error. {storedFileInfo.path} not in registry") 

357 

358 # Get all the refs associated with this dataset if it is a composite 

359 theseRefs = {r.id for r in ref.flatten([ref])} 

360 

361 # Remove these refs from all the refs and if there is nothing left 

362 # then we can delete 

363 remainingRefs = allRefs - theseRefs 

364 

365 if remainingRefs: 

366 return False 

367 return True 

368 

369 def _prepare_for_get(self, ref, parameters=None): 

370 """Check parameters for ``get`` and obtain formatter and 

371 location. 

372 

373 Parameters 

374 ---------- 

375 ref : `DatasetRef` 

376 Reference to the required Dataset. 

377 parameters : `dict` 

378 `StorageClass`-specific parameters that specify, for example, 

379 a slice of the dataset to be loaded. 

380 

381 Returns 

382 ------- 

383 getInfo : `DatastoreFileGetInformation` 

384 Parameters needed to retrieve the file. 

385 """ 

386 log.debug("Retrieve %s from %s with parameters %s", ref, self.name, parameters) 

387 

388 # Get file metadata and internal metadata 

389 location, storedFileInfo = self._get_dataset_location_info(ref) 

390 if location is None: 

391 raise FileNotFoundError(f"Could not retrieve dataset {ref}.") 

392 

393 # We have a write storage class and a read storage class and they 

394 # can be different for concrete composites. 

395 readStorageClass = ref.datasetType.storageClass 

396 writeStorageClass = storedFileInfo.storageClass 

397 

398 # Check that the supplied parameters are suitable for the type read 

399 readStorageClass.validateParameters(parameters) 

400 

401 # Is this a component request? 

402 component = ref.datasetType.component() 

403 

404 formatter = getInstanceOf(storedFileInfo.formatter, 

405 FileDescriptor(location, readStorageClass=readStorageClass, 

406 storageClass=writeStorageClass, parameters=parameters), 

407 ref.dataId) 

408 formatterParams, assemblerParams = formatter.segregateParameters() 

409 

410 return DatastoreFileGetInformation(location, formatter, storedFileInfo, 

411 assemblerParams, component, readStorageClass) 

412 

413 def _prepare_for_put(self, inMemoryDataset, ref): 

414 """Check the arguments for ``put`` and obtain formatter and 

415 location. 

416 

417 Parameters 

418 ---------- 

419 inMemoryDataset : `object` 

420 The dataset to store. 

421 ref : `DatasetRef` 

422 Reference to the associated Dataset. 

423 

424 Returns 

425 ------- 

426 location : `Location` 

427 The location to write the dataset. 

428 formatter : `Formatter` 

429 The `Formatter` to use to write the dataset. 

430 

431 Raises 

432 ------ 

433 TypeError 

434 Supplied object and storage class are inconsistent. 

435 DatasetTypeNotSupportedError 

436 The associated `DatasetType` is not handled by this datastore. 

437 """ 

438 self._validate_put_parameters(inMemoryDataset, ref) 

439 

440 # Work out output file name 

441 try: 

442 template = self.templates.getTemplate(ref) 

443 except KeyError as e: 

444 raise DatasetTypeNotSupportedError(f"Unable to find template for {ref}") from e 

445 

446 location = self.locationFactory.fromPath(template.format(ref)) 

447 

448 # Get the formatter based on the storage class 

449 storageClass = ref.datasetType.storageClass 

450 try: 

451 formatter = self.formatterFactory.getFormatter(ref, 

452 FileDescriptor(location, 

453 storageClass=storageClass), 

454 ref.dataId) 

455 except KeyError as e: 

456 raise DatasetTypeNotSupportedError(f"Unable to find formatter for {ref}") from e 

457 

458 return location, formatter 

459 

460 @abstractmethod 

461 def _standardizeIngestPath(self, path: str, *, transfer: Optional[str] = None) -> str: 

462 """Standardize the path of a to-be-ingested file. 

463 

464 Parameters 

465 ---------- 

466 path : `str` 

467 Path of a file to be ingested. 

468 transfer : `str`, optional 

469 How (and whether) the dataset should be added to the datastore. 

470 See `ingest` for details of transfer modes. 

471 This implementation is provided only so 

472 `NotImplementedError` can be raised if the mode is not supported; 

473 actual transfers are deferred to `_extractIngestInfo`. 

474 

475 Returns 

476 ------- 

477 path : `str` 

478 New path in what the datastore considers standard form. 

479 

480 Notes 

481 ----- 

482 Subclasses of `FileLikeDatastore` should implement this method instead 

483 of `_prepIngest`. It should not modify the data repository or given 

484 file in any way. 

485 

486 Raises 

487 ------ 

488 NotImplementedError 

489 Raised if the datastore does not support the given transfer mode 

490 (including the case where ingest is not supported at all). 

491 FileNotFoundError 

492 Raised if one of the given files does not exist. 

493 """ 

494 raise NotImplementedError("Must be implemented by subclasses.") 

495 

496 @abstractmethod 

497 def _extractIngestInfo(self, path: str, ref: DatasetRef, *, formatter: Type[Formatter], 

498 transfer: Optional[str] = None) -> StoredFileInfo: 

499 """Relocate (if necessary) and extract `StoredFileInfo` from a 

500 to-be-ingested file. 

501 

502 Parameters 

503 ---------- 

504 path : `str` 

505 Path of a file to be ingested. 

506 ref : `DatasetRef` 

507 Reference for the dataset being ingested. Guaranteed to have 

508 ``dataset_id not None`. 

509 formatter : `type` 

510 `Formatter` subclass to use for this dataset. 

511 transfer : `str`, optional 

512 How (and whether) the dataset should be added to the datastore. 

513 See `ingest` for details of transfer modes. 

514 

515 Returns 

516 ------- 

517 info : `StoredFileInfo` 

518 Internal datastore record for this file. This will be inserted by 

519 the caller; the `_extractIngestInfo` is only resposible for 

520 creating and populating the struct. 

521 

522 Raises 

523 ------ 

524 FileNotFoundError 

525 Raised if one of the given files does not exist. 

526 FileExistsError 

527 Raised if transfer is not `None` but the (internal) location the 

528 file would be moved to is already occupied. 

529 """ 

530 raise NotImplementedError("Must be implemented by subclasses.") 

531 

532 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> _IngestPrepData: 

533 # Docstring inherited from Datastore._prepIngest. 

534 filtered = [] 

535 for dataset in datasets: 

536 acceptable = [ref for ref in dataset.refs if self.constraints.isAcceptable(ref)] 

537 if not acceptable: 

538 continue 

539 else: 

540 dataset.refs = acceptable 

541 if dataset.formatter is None: 

542 dataset.formatter = self.formatterFactory.getFormatterClass(dataset.refs[0]) 

543 else: 

544 dataset.formatter = getClassOf(dataset.formatter) 

545 dataset.path = self._standardizeIngestPath(dataset.path, transfer=transfer) 

546 filtered.append(dataset) 

547 return _IngestPrepData(filtered) 

548 

549 @transactional 

550 def _finishIngest(self, prepData: Datastore.IngestPrepData, *, transfer: Optional[str] = None): 

551 # Docstring inherited from Datastore._finishIngest. 

552 refsAndInfos = [] 

553 for dataset in prepData.datasets: 

554 # Do ingest as if the first dataset ref is associated with the file 

555 info = self._extractIngestInfo(dataset.path, dataset.refs[0], formatter=dataset.formatter, 

556 transfer=transfer) 

557 refsAndInfos.extend([(ref, info) for ref in dataset.refs]) 

558 self._register_datasets(refsAndInfos) 

559 

560 def exists(self, ref): 

561 """Check if the dataset exists in the datastore. 

562 

563 Parameters 

564 ---------- 

565 ref : `DatasetRef` 

566 Reference to the required dataset. 

567 

568 Returns 

569 ------- 

570 exists : `bool` 

571 `True` if the entity exists in the `Datastore`. 

572 """ 

573 location, _ = self._get_dataset_location_info(ref) 

574 if location is None: 

575 return False 

576 return self._artifact_exists(location) 

577 

578 def getUri(self, ref, predict=False): 

579 """URI to the Dataset. 

580 

581 Parameters 

582 ---------- 

583 ref : `DatasetRef` 

584 Reference to the required Dataset. 

585 predict : `bool` 

586 If `True`, allow URIs to be returned of datasets that have not 

587 been written. 

588 

589 Returns 

590 ------- 

591 uri : `str` 

592 URI string pointing to the dataset within the datastore. If the 

593 dataset does not exist in the datastore, and if ``predict`` is 

594 `True`, the URI will be a prediction and will include a URI 

595 fragment "#predicted". 

596 If the datastore does not have entities that relate well 

597 to the concept of a URI the returned URI string will be 

598 descriptive. The returned URI is not guaranteed to be obtainable. 

599 

600 Raises 

601 ------ 

602 FileNotFoundError 

603 A URI has been requested for a dataset that does not exist and 

604 guessing is not allowed. 

605 

606 Notes 

607 ----- 

608 When a predicted URI is requested an attempt will be made to form 

609 a reasonable URI based on file templates and the expected formatter. 

610 """ 

611 # if this has never been written then we have to guess 

612 if not self.exists(ref): 

613 if not predict: 

614 raise FileNotFoundError("Dataset {} not in this datastore".format(ref)) 

615 

616 template = self.templates.getTemplate(ref) 

617 location = self.locationFactory.fromPath(template.format(ref)) 

618 storageClass = ref.datasetType.storageClass 

619 formatter = self.formatterFactory.getFormatter(ref, FileDescriptor(location, 

620 storageClass=storageClass)) 

621 # Try to use the extension attribute but ignore problems if the 

622 # formatter does not define one. 

623 try: 

624 location = formatter.makeUpdatedLocation(location) 

625 except Exception: 

626 # Use the default extension 

627 pass 

628 

629 # Add a URI fragment to indicate this is a guess 

630 return location.uri + "#predicted" 

631 

632 # If this is a ref that we have written we can get the path. 

633 # Get file metadata and internal metadata 

634 storedFileInfo = self.getStoredItemInfo(ref) 

635 

636 # Use the path to determine the location 

637 location = self.locationFactory.fromPath(storedFileInfo.path) 

638 

639 return location.uri 

640 

641 @transactional 

642 def trash(self, ref, ignore_errors=True): 

643 """Indicate to the datastore that a dataset can be removed. 

644 

645 Parameters 

646 ---------- 

647 ref : `DatasetRef` 

648 Reference to the required Dataset. 

649 ignore_errors : `bool` 

650 If `True` return without error even if something went wrong. 

651 Problems could occur if another process is simultaneously trying 

652 to delete. 

653 

654 Raises 

655 ------ 

656 FileNotFoundError 

657 Attempt to remove a dataset that does not exist. 

658 """ 

659 # Get file metadata and internal metadata 

660 log.debug("Trashing %s in datastore %s", ref, self.name) 

661 location, _ = self._get_dataset_location_info(ref) 

662 if location is None: 

663 err_msg = f"Requested dataset to trash ({ref}) is not known to datastore {self.name}" 

664 if ignore_errors: 664 ↛ 665line 664 didn't jump to line 665, because the condition on line 664 was never true

665 log.warning(err_msg) 

666 return 

667 else: 

668 raise FileNotFoundError(err_msg) 

669 

670 if not self._artifact_exists(location): 670 ↛ 671line 670 didn't jump to line 671, because the condition on line 670 was never true

671 err_msg = f"Dataset is known to datastore {self.name} but " \ 

672 f"associated artifact ({location.uri}) is missing" 

673 if ignore_errors: 

674 log.warning(err_msg) 

675 return 

676 else: 

677 raise FileNotFoundError(err_msg) 

678 

679 # Mark dataset as trashed 

680 try: 

681 self._move_to_trash_in_registry(ref) 

682 except Exception as e: 

683 if ignore_errors: 

684 log.warning(f"Attempted to mark dataset ({ref}) to be trashed in datastore {self.name} " 

685 f"but encountered an error: {e}") 

686 pass 

687 else: 

688 raise 

689 

690 @transactional 

691 def emptyTrash(self, ignore_errors=True): 

692 """Remove all datasets from the trash. 

693 

694 Parameters 

695 ---------- 

696 ignore_errors : `bool` 

697 If `True` return without error even if something went wrong. 

698 Problems could occur if another process is simultaneously trying 

699 to delete. 

700 """ 

701 log.debug("Emptying trash in datastore %s", self.name) 

702 trashed = self.registry.getTrashedDatasets(self.name) 

703 

704 for ref in trashed: 

705 location, _ = self._get_dataset_location_info(ref) 

706 

707 if location is None: 707 ↛ 708line 707 didn't jump to line 708, because the condition on line 707 was never true

708 err_msg = f"Requested dataset ({ref}) does not exist in datastore {self.name}" 

709 if ignore_errors: 

710 log.warning(err_msg) 

711 continue 

712 else: 

713 raise FileNotFoundError(err_msg) 

714 

715 if not self._artifact_exists(location): 715 ↛ 716line 715 didn't jump to line 716, because the condition on line 715 was never true

716 err_msg = f"Dataset {location.uri} no longer present in datastore {self.name}" 

717 if ignore_errors: 

718 log.warning(err_msg) 

719 continue 

720 else: 

721 raise FileNotFoundError(err_msg) 

722 

723 # Can only delete the artifact if there are no references 

724 # to the file from untrashed dataset refs. 

725 if self._can_remove_dataset_artifact(ref): 

726 # Point of no return for this artifact 

727 log.debug("Removing artifact %s from datastore %s", location.uri, self.name) 

728 try: 

729 self._delete_artifact(location) 

730 except Exception as e: 

731 if ignore_errors: 

732 log.critical("Encountered error removing artifact %s from datastore %s: %s", 

733 location.uri, self.name, e) 

734 else: 

735 raise 

736 

737 # Now must remove the entry from the internal registry even if the 

738 # artifact removal failed and was ignored, 

739 # otherwise the removal check above will never be true 

740 try: 

741 self.removeStoredItemInfo(ref) 

742 except Exception as e: 

743 if ignore_errors: 

744 log.warning(f"Error removing dataset %s (%s) from internal registry of %s: %s", 

745 ref.id, location.uri, self.name, e) 

746 continue 

747 else: 

748 raise 

749 

750 # Inform registry that we have removed items from datastore 

751 # This should work even if another process is clearing out those rows 

752 self.registry.emptyDatasetLocationsTrash(self.name, trashed) 

753 

754 def validateConfiguration(self, entities, logFailures=False): 

755 """Validate some of the configuration for this datastore. 

756 

757 Parameters 

758 ---------- 

759 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

760 Entities to test against this configuration. Can be differing 

761 types. 

762 logFailures : `bool`, optional 

763 If `True`, output a log message for every validation error 

764 detected. 

765 

766 Raises 

767 ------ 

768 DatastoreValidationError 

769 Raised if there is a validation problem with a configuration. 

770 All the problems are reported in a single exception. 

771 

772 Notes 

773 ----- 

774 This method checks that all the supplied entities have valid file 

775 templates and also have formatters defined. 

776 """ 

777 

778 templateFailed = None 

779 try: 

780 self.templates.validateTemplates(entities, logFailures=logFailures) 

781 except FileTemplateValidationError as e: 

782 templateFailed = str(e) 

783 

784 formatterFailed = [] 

785 for entity in entities: 

786 try: 

787 self.formatterFactory.getFormatterClass(entity) 

788 except KeyError as e: 

789 formatterFailed.append(str(e)) 

790 if logFailures: 790 ↛ 785line 790 didn't jump to line 785, because the condition on line 790 was never false

791 log.fatal("Formatter failure: %s", e) 

792 

793 if templateFailed or formatterFailed: 

794 messages = [] 

795 if templateFailed: 795 ↛ 796line 795 didn't jump to line 796, because the condition on line 795 was never true

796 messages.append(templateFailed) 

797 if formatterFailed: 797 ↛ 799line 797 didn't jump to line 799, because the condition on line 797 was never false

798 messages.append(",".join(formatterFailed)) 

799 msg = ";\n".join(messages) 

800 raise DatastoreValidationError(msg) 

801 

802 def getLookupKeys(self): 

803 # Docstring is inherited from base class 

804 return self.templates.getLookupKeys() | self.formatterFactory.getLookupKeys() | \ 

805 self.constraints.getLookupKeys() 

806 

807 def validateKey(self, lookupKey, entity): 

808 # Docstring is inherited from base class 

809 # The key can be valid in either formatters or templates so we can 

810 # only check the template if it exists 

811 if lookupKey in self.templates: 

812 try: 

813 self.templates[lookupKey].validateTemplate(entity) 

814 except FileTemplateValidationError as e: 

815 raise DatastoreValidationError(e) from e