Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23"""Generic file-based datastore code.""" 

24 

25__all__ = ("FileLikeDatastore", ) 

26 

27import logging 

28from abc import abstractmethod 

29 

30from sqlalchemy import BigInteger, String 

31 

32from dataclasses import dataclass 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 ClassVar, 

37 Dict, 

38 Iterable, 

39 List, 

40 Mapping, 

41 Optional, 

42 Set, 

43 Tuple, 

44 Type, 

45 Union, 

46) 

47 

48from lsst.daf.butler import ( 

49 ButlerURI, 

50 CompositesMap, 

51 Config, 

52 FileDataset, 

53 DatasetRef, 

54 DatasetType, 

55 DatasetTypeNotSupportedError, 

56 Datastore, 

57 DatastoreConfig, 

58 DatastoreValidationError, 

59 FileDescriptor, 

60 FileTemplates, 

61 FileTemplateValidationError, 

62 Formatter, 

63 FormatterFactory, 

64 Location, 

65 LocationFactory, 

66 StorageClass, 

67 StoredFileInfo, 

68) 

69 

70from lsst.daf.butler import ddl 

71from lsst.daf.butler.registry.interfaces import ( 

72 ReadOnlyDatabaseError, 

73 DatastoreRegistryBridge, 

74) 

75 

76from lsst.daf.butler.core.repoRelocation import replaceRoot 

77from lsst.daf.butler.core.utils import getInstanceOf, getClassOf, transactional 

78from .genericDatastore import GenericBaseDatastore 

79 

80if TYPE_CHECKING: 80 ↛ 81line 80 didn't jump to line 81, because the condition on line 80 was never true

81 from lsst.daf.butler import LookupKey 

82 from lsst.daf.butler.registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager 

83 

84log = logging.getLogger(__name__) 

85 

86# String to use when a Python None is encountered 

87NULLSTR = "__NULL_STRING__" 

88 

89 

90class _IngestPrepData(Datastore.IngestPrepData): 

91 """Helper class for FileLikeDatastore ingest implementation. 

92 

93 Parameters 

94 ---------- 

95 datasets : `list` of `FileDataset` 

96 Files to be ingested by this datastore. 

97 """ 

98 def __init__(self, datasets: List[FileDataset]): 

99 super().__init__(ref for dataset in datasets for ref in dataset.refs) 

100 self.datasets = datasets 

101 

102 

103@dataclass(frozen=True) 

104class DatastoreFileGetInformation: 

105 """Collection of useful parameters needed to retrieve a file from 

106 a Datastore. 

107 """ 

108 

109 location: Location 

110 """The location from which to read the dataset.""" 

111 

112 formatter: Formatter 

113 """The `Formatter` to use to deserialize the dataset.""" 

114 

115 info: StoredFileInfo 

116 """Stored information about this file and its formatter.""" 

117 

118 assemblerParams: Dict[str, Any] 

119 """Parameters to use for post-processing the retrieved dataset.""" 

120 

121 formatterParams: Dict[str, Any] 

122 """Parameters that were understood by the associated formatter.""" 

123 

124 component: Optional[str] 

125 """The component to be retrieved (can be `None`).""" 

126 

127 readStorageClass: StorageClass 

128 """The `StorageClass` of the dataset being read.""" 

129 

130 

131class FileLikeDatastore(GenericBaseDatastore): 

132 """Generic Datastore for file-based implementations. 

133 

134 Should always be sub-classed since key abstract methods are missing. 

135 

136 Parameters 

137 ---------- 

138 config : `DatastoreConfig` or `str` 

139 Configuration as either a `Config` object or URI to file. 

140 bridgeManager : `DatastoreRegistryBridgeManager` 

141 Object that manages the interface between `Registry` and datastores. 

142 butlerRoot : `str`, optional 

143 New datastore root to use to override the configuration value. 

144 

145 Raises 

146 ------ 

147 ValueError 

148 If root location does not exist and ``create`` is `False` in the 

149 configuration. 

150 """ 

151 

152 defaultConfigFile: ClassVar[Optional[str]] = None 

153 """Path to configuration defaults. Accessed within the ``config`` resource 

154 or relative to a search path. Can be None if no defaults specified. 

155 """ 

156 

157 root: str 

158 """Root directory or URI of this `Datastore`.""" 

159 

160 locationFactory: LocationFactory 

161 """Factory for creating locations relative to the datastore root.""" 

162 

163 formatterFactory: FormatterFactory 

164 """Factory for creating instances of formatters.""" 

165 

166 templates: FileTemplates 

167 """File templates that can be used by this `Datastore`.""" 

168 

169 composites: CompositesMap 

170 """Determines whether a dataset should be disassembled on put.""" 

171 

172 @classmethod 

173 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

174 """Set any filesystem-dependent config options for this Datastore to 

175 be appropriate for a new empty repository with the given root. 

176 

177 Parameters 

178 ---------- 

179 root : `str` 

180 URI to the root of the data repository. 

181 config : `Config` 

182 A `Config` to update. Only the subset understood by 

183 this component will be updated. Will not expand 

184 defaults. 

185 full : `Config` 

186 A complete config with all defaults expanded that can be 

187 converted to a `DatastoreConfig`. Read-only and will not be 

188 modified by this method. 

189 Repository-specific options that should not be obtained 

190 from defaults when Butler instances are constructed 

191 should be copied from ``full`` to ``config``. 

192 overwrite : `bool`, optional 

193 If `False`, do not modify a value in ``config`` if the value 

194 already exists. Default is always to overwrite with the provided 

195 ``root``. 

196 

197 Notes 

198 ----- 

199 If a keyword is explicitly defined in the supplied ``config`` it 

200 will not be overridden by this method if ``overwrite`` is `False`. 

201 This allows explicit values set in external configs to be retained. 

202 """ 

203 Config.updateParameters(DatastoreConfig, config, full, 

204 toUpdate={"root": root}, 

205 toCopy=("cls", ("records", "table")), overwrite=overwrite) 

206 

207 @classmethod 

208 def makeTableSpec(cls) -> ddl.TableSpec: 

209 return ddl.TableSpec( 

210 fields=[ 

211 ddl.FieldSpec(name="dataset_id", dtype=BigInteger, primaryKey=True), 

212 ddl.FieldSpec(name="path", dtype=String, length=256, nullable=False), 

213 ddl.FieldSpec(name="formatter", dtype=String, length=128, nullable=False), 

214 ddl.FieldSpec(name="storage_class", dtype=String, length=64, nullable=False), 

215 # Use empty string to indicate no component 

216 ddl.FieldSpec(name="component", dtype=String, length=32, primaryKey=True), 

217 # TODO: should checksum be Base64Bytes instead? 

218 ddl.FieldSpec(name="checksum", dtype=String, length=128, nullable=True), 

219 ddl.FieldSpec(name="file_size", dtype=BigInteger, nullable=True), 

220 ], 

221 unique=frozenset(), 

222 ) 

223 

224 def __init__(self, config: Union[DatastoreConfig, str], 

225 bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None): 

226 super().__init__(config, bridgeManager) 

227 if "root" not in self.config: 227 ↛ 228line 227 didn't jump to line 228, because the condition on line 227 was never true

228 raise ValueError("No root directory specified in configuration") 

229 

230 # Name ourselves either using an explicit name or a name 

231 # derived from the (unexpanded) root 

232 if "name" in self.config: 

233 self.name = self.config["name"] 

234 else: 

235 # We use the unexpanded root in the name to indicate that this 

236 # datastore can be moved without having to update registry. 

237 self.name = "{}@{}".format(type(self).__name__, 

238 self.config["root"]) 

239 

240 # Support repository relocation in config 

241 # Existence of self.root is checked in subclass 

242 self.root = replaceRoot(self.config["root"], butlerRoot) 

243 

244 self.locationFactory = LocationFactory(self.root) 

245 self.formatterFactory = FormatterFactory() 

246 

247 # Now associate formatters with storage classes 

248 self.formatterFactory.registerFormatters(self.config["formatters"], 

249 universe=bridgeManager.universe) 

250 

251 # Read the file naming templates 

252 self.templates = FileTemplates(self.config["templates"], 

253 universe=bridgeManager.universe) 

254 

255 # See if composites should be disassembled 

256 self.composites = CompositesMap(self.config["composites"], 

257 universe=bridgeManager.universe) 

258 

259 tableName = self.config["records", "table"] 

260 try: 

261 # Storage of paths and formatters, keyed by dataset_id 

262 self._table = bridgeManager.opaque.register(tableName, self.makeTableSpec()) 

263 # Interface to Registry. 

264 self._bridge = bridgeManager.register(self.name) 

265 except ReadOnlyDatabaseError: 

266 # If the database is read only and we just tried and failed to 

267 # create a table, it means someone is trying to create a read-only 

268 # butler client for an empty repo. That should be okay, as long 

269 # as they then try to get any datasets before some other client 

270 # creates the table. Chances are they'rejust validating 

271 # configuration. 

272 pass 

273 

274 # Determine whether checksums should be used 

275 self.useChecksum = self.config.get("checksum", True) 

276 

277 def __str__(self) -> str: 

278 return self.root 

279 

280 @property 

281 def bridge(self) -> DatastoreRegistryBridge: 

282 return self._bridge 

283 

284 @abstractmethod 

285 def _artifact_exists(self, location: Location) -> bool: 

286 """Check that an artifact exists in this datastore at the specified 

287 location. 

288 

289 Parameters 

290 ---------- 

291 location : `Location` 

292 Expected location of the artifact associated with this datastore. 

293 

294 Returns 

295 ------- 

296 exists : `bool` 

297 True if the location can be found, false otherwise. 

298 """ 

299 raise NotImplementedError() 

300 

301 @abstractmethod 

302 def _delete_artifact(self, location: Location) -> None: 

303 """Delete the artifact from the datastore. 

304 

305 Parameters 

306 ---------- 

307 location : `Location` 

308 Location of the artifact associated with this datastore. 

309 """ 

310 raise NotImplementedError() 

311 

312 def addStoredItemInfo(self, refs: Iterable[DatasetRef], infos: Iterable[StoredFileInfo]) -> None: 

313 # Docstring inherited from GenericBaseDatastore 

314 records = [] 

315 for ref, info in zip(refs, infos): 

316 # Component should come from ref and fall back on info 

317 component = ref.datasetType.component() 

318 if component is None and info.component is not None: 318 ↛ 319line 318 didn't jump to line 319, because the condition on line 318 was never true

319 component = info.component 

320 if component is None: 

321 # Use empty string since we want this to be part of the 

322 # primary key. 

323 component = NULLSTR 

324 records.append( 

325 dict(dataset_id=ref.id, formatter=info.formatter, path=info.path, 

326 storage_class=info.storageClass.name, component=component, 

327 checksum=info.checksum, file_size=info.file_size) 

328 ) 

329 self._table.insert(*records) 

330 

331 def getStoredItemsInfo(self, ref: DatasetIdRef) -> List[StoredFileInfo]: 

332 # Docstring inherited from GenericBaseDatastore 

333 

334 # Look for the dataset_id -- there might be multiple matches 

335 # if we have disassembled the dataset. 

336 records = list(self._table.fetch(dataset_id=ref.id)) 

337 

338 results = [] 

339 for record in records: 

340 # Convert name of StorageClass to instance 

341 storageClass = self.storageClassFactory.getStorageClass(record["storage_class"]) 

342 component = record["component"] if (record["component"] 

343 and record["component"] != NULLSTR) else None 

344 

345 info = StoredFileInfo(formatter=record["formatter"], 

346 path=record["path"], 

347 storageClass=storageClass, 

348 component=component, 

349 checksum=record["checksum"], 

350 file_size=record["file_size"]) 

351 results.append(info) 

352 

353 return results 

354 

355 def _registered_refs_per_artifact(self, pathInStore: str) -> Set[int]: 

356 """Return all dataset refs associated with the supplied path. 

357 

358 Parameters 

359 ---------- 

360 pathInStore : `str` 

361 Path of interest in the data store. 

362 

363 Returns 

364 ------- 

365 ids : `set` of `int` 

366 All `DatasetRef` IDs associated with this path. 

367 """ 

368 records = list(self._table.fetch(path=pathInStore)) 

369 ids = {r["dataset_id"] for r in records} 

370 return ids 

371 

372 def removeStoredItemInfo(self, ref: DatasetIdRef) -> None: 

373 # Docstring inherited from GenericBaseDatastore 

374 self._table.delete(dataset_id=ref.id) 

375 

376 def _get_dataset_locations_info(self, ref: DatasetIdRef) -> List[Tuple[Location, StoredFileInfo]]: 

377 r"""Find all the `Location`\ s of the requested dataset in the 

378 `Datastore` and the associated stored file information. 

379 

380 Parameters 

381 ---------- 

382 ref : `DatasetRef` 

383 Reference to the required `Dataset`. 

384 

385 Returns 

386 ------- 

387 results : `list` [`tuple` [`Location`, `StoredFileInfo` ]] 

388 Location of the dataset within the datastore and 

389 stored information about each file and its formatter. 

390 """ 

391 # Get the file information (this will fail if no file) 

392 records = self.getStoredItemsInfo(ref) 

393 

394 # Use the path to determine the location 

395 return [(self.locationFactory.fromPath(r.path), r) for r in records] 

396 

397 def _can_remove_dataset_artifact(self, ref: DatasetIdRef, location: Location) -> bool: 

398 """Check that there is only one dataset associated with the 

399 specified artifact. 

400 

401 Parameters 

402 ---------- 

403 ref : `DatasetRef` or `FakeDatasetRef` 

404 Dataset to be removed. 

405 location : `Location` 

406 The location of the artifact to be removed. 

407 

408 Returns 

409 ------- 

410 can_remove : `Bool` 

411 True if the artifact can be safely removed. 

412 """ 

413 

414 # Get all entries associated with this path 

415 allRefs = self._registered_refs_per_artifact(location.pathInStore) 

416 if not allRefs: 416 ↛ 417line 416 didn't jump to line 417, because the condition on line 416 was never true

417 raise RuntimeError(f"Datastore inconsistency error. {location.pathInStore} not in registry") 

418 

419 # Remove these refs from all the refs and if there is nothing left 

420 # then we can delete 

421 remainingRefs = allRefs - {ref.id} 

422 

423 if remainingRefs: 

424 return False 

425 return True 

426 

427 def _prepare_for_get(self, ref: DatasetRef, 

428 parameters: Optional[Mapping[str, Any]] = None) -> List[DatastoreFileGetInformation]: 

429 """Check parameters for ``get`` and obtain formatter and 

430 location. 

431 

432 Parameters 

433 ---------- 

434 ref : `DatasetRef` 

435 Reference to the required Dataset. 

436 parameters : `dict` 

437 `StorageClass`-specific parameters that specify, for example, 

438 a slice of the dataset to be loaded. 

439 

440 Returns 

441 ------- 

442 getInfo : `list` [`DatastoreFileGetInformation`] 

443 Parameters needed to retrieve each file. 

444 """ 

445 log.debug("Retrieve %s from %s with parameters %s", ref, self.name, parameters) 

446 

447 # Get file metadata and internal metadata 

448 fileLocations = self._get_dataset_locations_info(ref) 

449 if not fileLocations: 

450 raise FileNotFoundError(f"Could not retrieve dataset {ref}.") 

451 

452 # The storage class we want to use eventually 

453 refStorageClass = ref.datasetType.storageClass 

454 

455 if len(fileLocations) > 1: 

456 disassembled = True 

457 else: 

458 disassembled = False 

459 

460 # Is this a component request? 

461 refComponent = ref.datasetType.component() 

462 

463 fileGetInfo = [] 

464 for location, storedFileInfo in fileLocations: 

465 

466 # The storage class used to write the file 

467 writeStorageClass = storedFileInfo.storageClass 

468 

469 # If this has been disassembled we need read to match the write 

470 if disassembled: 

471 readStorageClass = writeStorageClass 

472 else: 

473 readStorageClass = refStorageClass 

474 

475 formatter = getInstanceOf(storedFileInfo.formatter, 

476 FileDescriptor(location, readStorageClass=readStorageClass, 

477 storageClass=writeStorageClass, parameters=parameters), 

478 ref.dataId) 

479 

480 formatterParams, notFormatterParams = formatter.segregateParameters() 

481 

482 # Of the remaining parameters, extract the ones supported by 

483 # this StorageClass (for components not all will be handled) 

484 assemblerParams = readStorageClass.filterParameters(notFormatterParams) 

485 

486 # The ref itself could be a component if the dataset was 

487 # disassembled by butler, or we disassembled in datastore and 

488 # components came from the datastore records 

489 component = storedFileInfo.component if storedFileInfo.component else refComponent 

490 

491 fileGetInfo.append(DatastoreFileGetInformation(location, formatter, storedFileInfo, 

492 assemblerParams, formatterParams, 

493 component, readStorageClass)) 

494 

495 return fileGetInfo 

496 

497 def _prepare_for_put(self, inMemoryDataset: Any, ref: DatasetRef) -> Tuple[Location, Formatter]: 

498 """Check the arguments for ``put`` and obtain formatter and 

499 location. 

500 

501 Parameters 

502 ---------- 

503 inMemoryDataset : `object` 

504 The dataset to store. 

505 ref : `DatasetRef` 

506 Reference to the associated Dataset. 

507 

508 Returns 

509 ------- 

510 location : `Location` 

511 The location to write the dataset. 

512 formatter : `Formatter` 

513 The `Formatter` to use to write the dataset. 

514 

515 Raises 

516 ------ 

517 TypeError 

518 Supplied object and storage class are inconsistent. 

519 DatasetTypeNotSupportedError 

520 The associated `DatasetType` is not handled by this datastore. 

521 """ 

522 self._validate_put_parameters(inMemoryDataset, ref) 

523 

524 # Work out output file name 

525 try: 

526 template = self.templates.getTemplate(ref) 

527 except KeyError as e: 

528 raise DatasetTypeNotSupportedError(f"Unable to find template for {ref}") from e 

529 

530 location = self.locationFactory.fromPath(template.format(ref)) 

531 

532 # Get the formatter based on the storage class 

533 storageClass = ref.datasetType.storageClass 

534 try: 

535 formatter = self.formatterFactory.getFormatter(ref, 

536 FileDescriptor(location, 

537 storageClass=storageClass), 

538 ref.dataId) 

539 except KeyError as e: 

540 raise DatasetTypeNotSupportedError(f"Unable to find formatter for {ref} in datastore " 

541 f"{self.name}") from e 

542 

543 # Now that we know the formatter, update the location 

544 location = formatter.makeUpdatedLocation(location) 

545 

546 return location, formatter 

547 

548 @abstractmethod 

549 def _standardizeIngestPath(self, path: str, *, transfer: Optional[str] = None) -> str: 

550 """Standardize the path of a to-be-ingested file. 

551 

552 Parameters 

553 ---------- 

554 path : `str` 

555 Path of a file to be ingested. 

556 transfer : `str`, optional 

557 How (and whether) the dataset should be added to the datastore. 

558 See `ingest` for details of transfer modes. 

559 This implementation is provided only so 

560 `NotImplementedError` can be raised if the mode is not supported; 

561 actual transfers are deferred to `_extractIngestInfo`. 

562 

563 Returns 

564 ------- 

565 path : `str` 

566 New path in what the datastore considers standard form. 

567 

568 Notes 

569 ----- 

570 Subclasses of `FileLikeDatastore` should implement this method instead 

571 of `_prepIngest`. It should not modify the data repository or given 

572 file in any way. 

573 

574 Raises 

575 ------ 

576 NotImplementedError 

577 Raised if the datastore does not support the given transfer mode 

578 (including the case where ingest is not supported at all). 

579 FileNotFoundError 

580 Raised if one of the given files does not exist. 

581 """ 

582 raise NotImplementedError("Must be implemented by subclasses.") 

583 

584 @abstractmethod 

585 def _extractIngestInfo(self, path: Union[str, ButlerURI], ref: DatasetRef, *, 

586 formatter: Union[Formatter, Type[Formatter]], 

587 transfer: Optional[str] = None) -> StoredFileInfo: 

588 """Relocate (if necessary) and extract `StoredFileInfo` from a 

589 to-be-ingested file. 

590 

591 Parameters 

592 ---------- 

593 path : `str` or `ButlerURI` 

594 URI or path of a file to be ingested. 

595 ref : `DatasetRef` 

596 Reference for the dataset being ingested. Guaranteed to have 

597 ``dataset_id not None`. 

598 formatter : `type` or `Formatter` 

599 `Formatter` subclass to use for this dataset or an instance. 

600 transfer : `str`, optional 

601 How (and whether) the dataset should be added to the datastore. 

602 See `ingest` for details of transfer modes. 

603 

604 Returns 

605 ------- 

606 info : `StoredFileInfo` 

607 Internal datastore record for this file. This will be inserted by 

608 the caller; the `_extractIngestInfo` is only resposible for 

609 creating and populating the struct. 

610 

611 Raises 

612 ------ 

613 FileNotFoundError 

614 Raised if one of the given files does not exist. 

615 FileExistsError 

616 Raised if transfer is not `None` but the (internal) location the 

617 file would be moved to is already occupied. 

618 """ 

619 raise NotImplementedError("Must be implemented by subclasses.") 

620 

621 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> _IngestPrepData: 

622 # Docstring inherited from Datastore._prepIngest. 

623 filtered = [] 

624 for dataset in datasets: 

625 acceptable = [ref for ref in dataset.refs if self.constraints.isAcceptable(ref)] 

626 if not acceptable: 

627 continue 

628 else: 

629 dataset.refs = acceptable 

630 if dataset.formatter is None: 

631 dataset.formatter = self.formatterFactory.getFormatterClass(dataset.refs[0]) 

632 else: 

633 assert isinstance(dataset.formatter, (type, str)) 

634 dataset.formatter = getClassOf(dataset.formatter) 

635 dataset.path = self._standardizeIngestPath(dataset.path, transfer=transfer) 

636 filtered.append(dataset) 

637 return _IngestPrepData(filtered) 

638 

639 @transactional 

640 def _finishIngest(self, prepData: Datastore.IngestPrepData, *, transfer: Optional[str] = None) -> None: 

641 # Docstring inherited from Datastore._finishIngest. 

642 refsAndInfos = [] 

643 for dataset in prepData.datasets: 

644 # Do ingest as if the first dataset ref is associated with the file 

645 info = self._extractIngestInfo(dataset.path, dataset.refs[0], formatter=dataset.formatter, 

646 transfer=transfer) 

647 refsAndInfos.extend([(ref, info) for ref in dataset.refs]) 

648 self._register_datasets(refsAndInfos) 

649 

650 def _calculate_ingested_datastore_name(self, srcUri: ButlerURI, ref: DatasetRef, 

651 formatter: Union[Formatter, Type[Formatter]]) -> Location: 

652 """Given a source URI and a DatasetRef, determine the name the 

653 dataset will have inside datastore. 

654 

655 Parameters 

656 ---------- 

657 srcUri : `ButlerURI` 

658 URI to the source dataset file. 

659 ref : `DatasetRef` 

660 Ref associated with the newly-ingested dataset artifact. This 

661 is used to determine the name within the datastore. 

662 formatter : `Formatter` or Formatter class. 

663 Formatter to use for validation. Can be a class or an instance. 

664 

665 Returns 

666 ------- 

667 location : `Location` 

668 Target location for the newly-ingested dataset. 

669 """ 

670 # Ingesting a file from outside the datastore. 

671 # This involves a new name. 

672 template = self.templates.getTemplate(ref) 

673 location = self.locationFactory.fromPath(template.format(ref)) 

674 

675 # Get the extension 

676 ext = srcUri.getExtension() 

677 

678 # Update the destination to include that extension 

679 location.updateExtension(ext) 

680 

681 # Ask the formatter to validate this extension 

682 formatter.validateExtension(location) 

683 

684 return location 

685 

686 @abstractmethod 

687 def _write_in_memory_to_artifact(self, inMemoryDataset: Any, ref: DatasetRef) -> StoredFileInfo: 

688 """Write out in memory dataset to datastore. 

689 

690 Parameters 

691 ---------- 

692 inMemoryDataset : `object` 

693 Dataset to write to datastore. 

694 ref : `DatasetRef` 

695 Registry information associated with this dataset. 

696 

697 Returns 

698 ------- 

699 info : `StoredFileInfo` 

700 Information describin the artifact written to the datastore. 

701 """ 

702 raise NotImplementedError() 

703 

704 @abstractmethod 

705 def _read_artifact_into_memory(self, getInfo: DatastoreFileGetInformation, 

706 ref: DatasetRef, isComponent: bool = False) -> Any: 

707 """Read the artifact from datastore into in memory object. 

708 

709 Parameters 

710 ---------- 

711 getInfo : `DatastoreFileGetInformation` 

712 Information about the artifact within the datastore. 

713 ref : `DatasetRef` 

714 The registry information associated with this artifact. 

715 isComponent : `bool` 

716 Flag to indicate if a component is being read from this artifact. 

717 

718 Returns 

719 ------- 

720 inMemoryDataset : `object` 

721 The artifact as a python object. 

722 """ 

723 raise NotImplementedError() 

724 

725 def exists(self, ref: DatasetRef) -> bool: 

726 """Check if the dataset exists in the datastore. 

727 

728 Parameters 

729 ---------- 

730 ref : `DatasetRef` 

731 Reference to the required dataset. 

732 

733 Returns 

734 ------- 

735 exists : `bool` 

736 `True` if the entity exists in the `Datastore`. 

737 """ 

738 fileLocations = self._get_dataset_locations_info(ref) 

739 if not fileLocations: 

740 return False 

741 for location, _ in fileLocations: 

742 if not self._artifact_exists(location): 

743 return False 

744 

745 return True 

746 

747 def getURIs(self, ref: DatasetRef, 

748 predict: bool = False) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]: 

749 """Return URIs associated with dataset. 

750 

751 Parameters 

752 ---------- 

753 ref : `DatasetRef` 

754 Reference to the required dataset. 

755 predict : `bool`, optional 

756 If the datastore does not know about the dataset, should it 

757 return a predicted URI or not? 

758 

759 Returns 

760 ------- 

761 primary : `ButlerURI` 

762 The URI to the primary artifact associated with this dataset. 

763 If the dataset was disassembled within the datastore this 

764 may be `None`. 

765 components : `dict` 

766 URIs to any components associated with the dataset artifact. 

767 Can be empty if there are no components. 

768 """ 

769 

770 primary: Optional[ButlerURI] = None 

771 components: Dict[str, ButlerURI] = {} 

772 

773 # if this has never been written then we have to guess 

774 if not self.exists(ref): 

775 if not predict: 

776 raise FileNotFoundError("Dataset {} not in this datastore".format(ref)) 

777 

778 def predictLocation(thisRef: DatasetRef) -> Location: 

779 template = self.templates.getTemplate(thisRef) 

780 location = self.locationFactory.fromPath(template.format(thisRef)) 

781 storageClass = ref.datasetType.storageClass 

782 formatter = self.formatterFactory.getFormatter(thisRef, 

783 FileDescriptor(location, 

784 storageClass=storageClass)) 

785 # Try to use the extension attribute but ignore problems if the 

786 # formatter does not define one. 

787 try: 

788 location = formatter.makeUpdatedLocation(location) 

789 except Exception: 

790 # Use the default extension 

791 pass 

792 return location 

793 

794 doDisassembly = self.composites.shouldBeDisassembled(ref) 

795 

796 if doDisassembly: 

797 

798 for component, componentStorage in ref.datasetType.storageClass.components.items(): 

799 compRef = ref.makeComponentRef(component) 

800 compLocation = predictLocation(compRef) 

801 

802 # Add a URI fragment to indicate this is a guess 

803 components[component] = ButlerURI(compLocation.uri.geturl() + "#predicted") 

804 

805 else: 

806 

807 location = predictLocation(ref) 

808 

809 # Add a URI fragment to indicate this is a guess 

810 primary = ButlerURI(location.uri.geturl() + "#predicted") 

811 

812 return primary, components 

813 

814 # If this is a ref that we have written we can get the path. 

815 # Get file metadata and internal metadata 

816 fileLocations = self._get_dataset_locations_info(ref) 

817 

818 if not fileLocations: 818 ↛ 819line 818 didn't jump to line 819, because the condition on line 818 was never true

819 raise RuntimeError(f"Unexpectedly got no artifacts for dataset {ref}") 

820 

821 if len(fileLocations) == 1: 

822 # No disassembly so this is the primary URI 

823 primary = ButlerURI(fileLocations[0][0].uri) 

824 

825 else: 

826 for location, storedFileInfo in fileLocations: 

827 if storedFileInfo.component is None: 827 ↛ 828line 827 didn't jump to line 828, because the condition on line 827 was never true

828 raise RuntimeError(f"Unexpectedly got no component name for a component at {location}") 

829 components[storedFileInfo.component] = ButlerURI(location.uri) 

830 

831 return primary, components 

832 

833 def getURI(self, ref: DatasetRef, predict: bool = False) -> ButlerURI: 

834 """URI to the Dataset. 

835 

836 Parameters 

837 ---------- 

838 ref : `DatasetRef` 

839 Reference to the required Dataset. 

840 predict : `bool` 

841 If `True`, allow URIs to be returned of datasets that have not 

842 been written. 

843 

844 Returns 

845 ------- 

846 uri : `str` 

847 URI pointing to the dataset within the datastore. If the 

848 dataset does not exist in the datastore, and if ``predict`` is 

849 `True`, the URI will be a prediction and will include a URI 

850 fragment "#predicted". 

851 If the datastore does not have entities that relate well 

852 to the concept of a URI the returned URI will be 

853 descriptive. The returned URI is not guaranteed to be obtainable. 

854 

855 Raises 

856 ------ 

857 FileNotFoundError 

858 Raised if a URI has been requested for a dataset that does not 

859 exist and guessing is not allowed. 

860 RuntimeError 

861 Raised if a request is made for a single URI but multiple URIs 

862 are associated with this dataset. 

863 

864 Notes 

865 ----- 

866 When a predicted URI is requested an attempt will be made to form 

867 a reasonable URI based on file templates and the expected formatter. 

868 """ 

869 primary, components = self.getURIs(ref, predict) 

870 if primary is None or components: 870 ↛ 871line 870 didn't jump to line 871, because the condition on line 870 was never true

871 raise RuntimeError(f"Dataset ({ref}) includes distinct URIs for components. " 

872 "Use Dataastore.getURIs() instead.") 

873 return primary 

874 

875 def get(self, ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None) -> Any: 

876 """Load an InMemoryDataset from the store. 

877 

878 Parameters 

879 ---------- 

880 ref : `DatasetRef` 

881 Reference to the required Dataset. 

882 parameters : `dict` 

883 `StorageClass`-specific parameters that specify, for example, 

884 a slice of the dataset to be loaded. 

885 

886 Returns 

887 ------- 

888 inMemoryDataset : `object` 

889 Requested dataset or slice thereof as an InMemoryDataset. 

890 

891 Raises 

892 ------ 

893 FileNotFoundError 

894 Requested dataset can not be retrieved. 

895 TypeError 

896 Return value from formatter has unexpected type. 

897 ValueError 

898 Formatter failed to process the dataset. 

899 """ 

900 allGetInfo = self._prepare_for_get(ref, parameters) 

901 refComponent = ref.datasetType.component() 

902 

903 # Supplied storage class for the component being read 

904 refStorageClass = ref.datasetType.storageClass 

905 

906 # Create mapping from component name to related info 

907 allComponents = {i.component: i for i in allGetInfo} 

908 

909 # By definition the dataset is disassembled if we have more 

910 # than one record for it. 

911 isDisassembled = len(allGetInfo) > 1 

912 

913 # Look for the special case where we are disassembled but the 

914 # component is a read-only component that was not written during 

915 # disassembly. For this scenario we need to check that the 

916 # component requested is listed as a read-only component for the 

917 # composite storage class 

918 isDisassembledReadOnlyComponent = False 

919 if isDisassembled and refComponent: 

920 # The composite storage class should be accessible through 

921 # the component dataset type 

922 compositeStorageClass = ref.datasetType.parentStorageClass 

923 

924 # In the unlikely scenario where the composite storage 

925 # class is not known, we can only assume that this is a 

926 # normal component. If that assumption is wrong then the 

927 # branch below that reads a persisted component will fail 

928 # so there is no need to complain here. 

929 if compositeStorageClass is not None: 929 ↛ 932line 929 didn't jump to line 932, because the condition on line 929 was never false

930 isDisassembledReadOnlyComponent = refComponent in compositeStorageClass.readComponents 

931 

932 if isDisassembled and not refComponent: 

933 # This was a disassembled dataset spread over multiple files 

934 # and we need to put them all back together again. 

935 # Read into memory and then assemble 

936 

937 # Check that the supplied parameters are suitable for the type read 

938 refStorageClass.validateParameters(parameters) 

939 

940 # We want to keep track of all the parameters that were not used 

941 # by formatters. We assume that if any of the component formatters 

942 # use a parameter that we do not need to apply it again in the 

943 # assembler. 

944 usedParams = set() 

945 

946 components: Dict[str, Any] = {} 

947 for getInfo in allGetInfo: 

948 # assemblerParams are parameters not understood by the 

949 # associated formatter. 

950 usedParams.update(set(getInfo.formatterParams)) 

951 

952 component = getInfo.component 

953 

954 if component is None: 954 ↛ 955line 954 didn't jump to line 955, because the condition on line 954 was never true

955 raise RuntimeError(f"Internal error in datastore assembly of {ref}") 

956 

957 # We do not want the formatter to think it's reading 

958 # a component though because it is really reading a 

959 # standalone dataset -- always tell reader it is not a 

960 # component. 

961 components[component] = self._read_artifact_into_memory(getInfo, ref, isComponent=False) 

962 

963 inMemoryDataset = ref.datasetType.storageClass.assembler().assemble(components) 

964 

965 # Any unused parameters will have to be passed to the assembler 

966 if parameters: 

967 unusedParams = {k: v for k, v in parameters.items() if k not in usedParams} 

968 else: 

969 unusedParams = {} 

970 

971 # Process parameters 

972 return ref.datasetType.storageClass.assembler().handleParameters(inMemoryDataset, 

973 parameters=unusedParams) 

974 

975 elif isDisassembledReadOnlyComponent: 

976 

977 compositeStorageClass = ref.datasetType.parentStorageClass 

978 if compositeStorageClass is None: 978 ↛ 979line 978 didn't jump to line 979, because the condition on line 978 was never true

979 raise RuntimeError(f"Unable to retrieve read-only component '{refComponent}' since" 

980 "no composite storage class is available.") 

981 

982 if refComponent is None: 982 ↛ 984line 982 didn't jump to line 984, because the condition on line 982 was never true

983 # Mainly for mypy 

984 raise RuntimeError(f"Internal error in datastore {self.name}: component can not be None here") 

985 

986 # Assume that every read-only component can be calculated by 

987 # forwarding the request to a single read/write component. 

988 # Rather than guessing which rw component is the right one by 

989 # scanning each for a read-only component of the same name, 

990 # we ask the composite assembler directly which one is best to 

991 # use. 

992 compositeAssembler = compositeStorageClass.assembler() 

993 forwardedComponent = compositeAssembler.selectResponsibleComponent(refComponent, 

994 set(allComponents)) 

995 

996 # Select the relevant component 

997 rwInfo = allComponents[forwardedComponent] 

998 

999 # For now assume that read parameters are validated against 

1000 # the real component and not the requested component 

1001 forwardedStorageClass = rwInfo.formatter.fileDescriptor.readStorageClass 

1002 forwardedStorageClass.validateParameters(parameters) 

1003 

1004 # Unfortunately the FileDescriptor inside the formatter will have 

1005 # the wrong write storage class so we need to create a new one 

1006 # given the immutability constraint. 

1007 writeStorageClass = rwInfo.info.storageClass 

1008 

1009 # We may need to put some thought into parameters for read 

1010 # components but for now forward them on as is 

1011 readFormatter = type(rwInfo.formatter)(FileDescriptor(rwInfo.location, 

1012 readStorageClass=refStorageClass, 

1013 storageClass=writeStorageClass, 

1014 parameters=parameters), 

1015 ref.dataId) 

1016 

1017 # The assembler can not receive any parameter requests for a 

1018 # read-only component at this time since the assembler will 

1019 # see the storage class of the read-only component and those 

1020 # parameters will have to be handled by the formatter on the 

1021 # forwarded storage class. 

1022 assemblerParams: Dict[str, Any] = {} 

1023 

1024 # Need to created a new info that specifies the read-only 

1025 # component and associated storage class 

1026 readInfo = DatastoreFileGetInformation(rwInfo.location, readFormatter, 

1027 rwInfo.info, assemblerParams, {}, 

1028 refComponent, refStorageClass) 

1029 

1030 return self._read_artifact_into_memory(readInfo, ref, isComponent=True) 

1031 

1032 else: 

1033 # Single file request or component from that composite file 

1034 for lookup in (refComponent, None): 1034 ↛ 1039line 1034 didn't jump to line 1039, because the loop on line 1034 didn't complete

1035 if lookup in allComponents: 1035 ↛ 1034line 1035 didn't jump to line 1034, because the condition on line 1035 was never false

1036 getInfo = allComponents[lookup] 

1037 break 

1038 else: 

1039 raise FileNotFoundError(f"Component {refComponent} not found " 

1040 f"for ref {ref} in datastore {self.name}") 

1041 

1042 # Do not need the component itself if already disassembled 

1043 if isDisassembled: 

1044 isComponent = False 

1045 else: 

1046 isComponent = getInfo.component is not None 

1047 

1048 # For a disassembled component we can validate parametersagainst 

1049 # the component storage class directly 

1050 if isDisassembled: 

1051 refStorageClass.validateParameters(parameters) 

1052 else: 

1053 # For an assembled composite this could be a read-only 

1054 # component derived from a real component. The validity 

1055 # of the parameters is not clear. For now validate against 

1056 # the composite storage class 

1057 getInfo.formatter.fileDescriptor.storageClass.validateParameters(parameters) 

1058 

1059 return self._read_artifact_into_memory(getInfo, ref, isComponent=isComponent) 

1060 

1061 @transactional 

1062 def put(self, inMemoryDataset: Any, ref: DatasetRef) -> None: 

1063 """Write a InMemoryDataset with a given `DatasetRef` to the store. 

1064 

1065 Parameters 

1066 ---------- 

1067 inMemoryDataset : `object` 

1068 The dataset to store. 

1069 ref : `DatasetRef` 

1070 Reference to the associated Dataset. 

1071 

1072 Raises 

1073 ------ 

1074 TypeError 

1075 Supplied object and storage class are inconsistent. 

1076 DatasetTypeNotSupportedError 

1077 The associated `DatasetType` is not handled by this datastore. 

1078 

1079 Notes 

1080 ----- 

1081 If the datastore is configured to reject certain dataset types it 

1082 is possible that the put will fail and raise a 

1083 `DatasetTypeNotSupportedError`. The main use case for this is to 

1084 allow `ChainedDatastore` to put to multiple datastores without 

1085 requiring that every datastore accepts the dataset. 

1086 """ 

1087 

1088 doDisassembly = self.composites.shouldBeDisassembled(ref) 

1089 # doDisassembly = True 

1090 

1091 artifacts = [] 

1092 if doDisassembly: 

1093 components = ref.datasetType.storageClass.assembler().disassemble(inMemoryDataset) 

1094 for component, componentInfo in components.items(): 

1095 # Don't recurse because we want to take advantage of 

1096 # bulk insert -- need a new DatasetRef that refers to the 

1097 # same dataset_id but has the component DatasetType 

1098 # DatasetType does not refer to the types of components 

1099 # So we construct one ourselves. 

1100 compRef = ref.makeComponentRef(component) 

1101 storedInfo = self._write_in_memory_to_artifact(componentInfo.component, compRef) 

1102 artifacts.append((compRef, storedInfo)) 

1103 else: 

1104 # Write the entire thing out 

1105 storedInfo = self._write_in_memory_to_artifact(inMemoryDataset, ref) 

1106 artifacts.append((ref, storedInfo)) 

1107 

1108 self._register_datasets(artifacts) 

1109 

1110 @transactional 

1111 def trash(self, ref: DatasetRef, ignore_errors: bool = True) -> None: 

1112 """Indicate to the datastore that a dataset can be removed. 

1113 

1114 Parameters 

1115 ---------- 

1116 ref : `DatasetRef` 

1117 Reference to the required Dataset. 

1118 ignore_errors : `bool` 

1119 If `True` return without error even if something went wrong. 

1120 Problems could occur if another process is simultaneously trying 

1121 to delete. 

1122 

1123 Raises 

1124 ------ 

1125 FileNotFoundError 

1126 Attempt to remove a dataset that does not exist. 

1127 """ 

1128 # Get file metadata and internal metadata 

1129 log.debug("Trashing %s in datastore %s", ref, self.name) 

1130 

1131 fileLocations = self._get_dataset_locations_info(ref) 

1132 

1133 if not fileLocations: 

1134 err_msg = f"Requested dataset to trash ({ref}) is not known to datastore {self.name}" 

1135 if ignore_errors: 

1136 log.warning(err_msg) 

1137 return 

1138 else: 

1139 raise FileNotFoundError(err_msg) 

1140 

1141 for location, storedFileInfo in fileLocations: 

1142 if not self._artifact_exists(location): 1142 ↛ 1143line 1142 didn't jump to line 1143, because the condition on line 1142 was never true

1143 err_msg = f"Dataset is known to datastore {self.name} but " \ 

1144 f"associated artifact ({location.uri}) is missing" 

1145 if ignore_errors: 

1146 log.warning(err_msg) 

1147 return 

1148 else: 

1149 raise FileNotFoundError(err_msg) 

1150 

1151 # Mark dataset as trashed 

1152 try: 

1153 self._move_to_trash_in_registry(ref) 

1154 except Exception as e: 

1155 if ignore_errors: 

1156 log.warning(f"Attempted to mark dataset ({ref}) to be trashed in datastore {self.name} " 

1157 f"but encountered an error: {e}") 

1158 pass 

1159 else: 

1160 raise 

1161 

1162 @transactional 

1163 def emptyTrash(self, ignore_errors: bool = True) -> None: 

1164 """Remove all datasets from the trash. 

1165 

1166 Parameters 

1167 ---------- 

1168 ignore_errors : `bool` 

1169 If `True` return without error even if something went wrong. 

1170 Problems could occur if another process is simultaneously trying 

1171 to delete. 

1172 """ 

1173 log.debug("Emptying trash in datastore %s", self.name) 

1174 # Context manager will empty trash iff we finish it without raising. 

1175 with self._bridge.emptyTrash() as trashed: 

1176 for ref in trashed: 

1177 fileLocations = self._get_dataset_locations_info(ref) 

1178 

1179 if not fileLocations: 1179 ↛ 1180line 1179 didn't jump to line 1180, because the condition on line 1179 was never true

1180 err_msg = f"Requested dataset ({ref}) does not exist in datastore {self.name}" 

1181 if ignore_errors: 

1182 log.warning(err_msg) 

1183 continue 

1184 else: 

1185 raise FileNotFoundError(err_msg) 

1186 

1187 for location, _ in fileLocations: 

1188 

1189 if not self._artifact_exists(location): 1189 ↛ 1190line 1189 didn't jump to line 1190, because the condition on line 1189 was never true

1190 err_msg = f"Dataset {location.uri} no longer present in datastore {self.name}" 

1191 if ignore_errors: 

1192 log.warning(err_msg) 

1193 continue 

1194 else: 

1195 raise FileNotFoundError(err_msg) 

1196 

1197 # Can only delete the artifact if there are no references 

1198 # to the file from untrashed dataset refs. 

1199 if self._can_remove_dataset_artifact(ref, location): 

1200 # Point of no return for this artifact 

1201 log.debug("Removing artifact %s from datastore %s", location.uri, self.name) 

1202 try: 

1203 self._delete_artifact(location) 

1204 except Exception as e: 

1205 if ignore_errors: 

1206 log.critical("Encountered error removing artifact %s from datastore %s: %s", 

1207 location.uri, self.name, e) 

1208 else: 

1209 raise 

1210 

1211 # Now must remove the entry from the internal registry even if 

1212 # the artifact removal failed and was ignored, 

1213 # otherwise the removal check above will never be true 

1214 try: 

1215 # There may be multiple rows associated with this ref 

1216 # depending on disassembly 

1217 self.removeStoredItemInfo(ref) 

1218 except Exception as e: 

1219 if ignore_errors: 

1220 log.warning("Error removing dataset %s (%s) from internal registry of %s: %s", 

1221 ref.id, location.uri, self.name, e) 

1222 continue 

1223 else: 

1224 raise FileNotFoundError(err_msg) 

1225 

1226 def validateConfiguration(self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], 

1227 logFailures: bool = False) -> None: 

1228 """Validate some of the configuration for this datastore. 

1229 

1230 Parameters 

1231 ---------- 

1232 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

1233 Entities to test against this configuration. Can be differing 

1234 types. 

1235 logFailures : `bool`, optional 

1236 If `True`, output a log message for every validation error 

1237 detected. 

1238 

1239 Raises 

1240 ------ 

1241 DatastoreValidationError 

1242 Raised if there is a validation problem with a configuration. 

1243 All the problems are reported in a single exception. 

1244 

1245 Notes 

1246 ----- 

1247 This method checks that all the supplied entities have valid file 

1248 templates and also have formatters defined. 

1249 """ 

1250 

1251 templateFailed = None 

1252 try: 

1253 self.templates.validateTemplates(entities, logFailures=logFailures) 

1254 except FileTemplateValidationError as e: 

1255 templateFailed = str(e) 

1256 

1257 formatterFailed = [] 

1258 for entity in entities: 

1259 try: 

1260 self.formatterFactory.getFormatterClass(entity) 

1261 except KeyError as e: 

1262 formatterFailed.append(str(e)) 

1263 if logFailures: 1263 ↛ 1258line 1263 didn't jump to line 1258, because the condition on line 1263 was never false

1264 log.fatal("Formatter failure: %s", e) 

1265 

1266 if templateFailed or formatterFailed: 

1267 messages = [] 

1268 if templateFailed: 1268 ↛ 1269line 1268 didn't jump to line 1269, because the condition on line 1268 was never true

1269 messages.append(templateFailed) 

1270 if formatterFailed: 1270 ↛ 1272line 1270 didn't jump to line 1272, because the condition on line 1270 was never false

1271 messages.append(",".join(formatterFailed)) 

1272 msg = ";\n".join(messages) 

1273 raise DatastoreValidationError(msg) 

1274 

1275 def getLookupKeys(self) -> Set[LookupKey]: 

1276 # Docstring is inherited from base class 

1277 return self.templates.getLookupKeys() | self.formatterFactory.getLookupKeys() | \ 

1278 self.constraints.getLookupKeys() 

1279 

1280 def validateKey(self, lookupKey: LookupKey, 

1281 entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

1282 # Docstring is inherited from base class 

1283 # The key can be valid in either formatters or templates so we can 

1284 # only check the template if it exists 

1285 if lookupKey in self.templates: 

1286 try: 

1287 self.templates[lookupKey].validateTemplate(entity) 

1288 except FileTemplateValidationError as e: 

1289 raise DatastoreValidationError(e) from e 

1290 

1291 def export(self, refs: Iterable[DatasetRef], *, 

1292 directory: Optional[Union[ButlerURI, str]] = None, 

1293 transfer: Optional[str] = "auto") -> Iterable[FileDataset]: 

1294 # Docstring inherited from Datastore.export. 

1295 if transfer is not None and directory is None: 1295 ↛ 1296line 1295 didn't jump to line 1296, because the condition on line 1295 was never true

1296 raise RuntimeError(f"Cannot export using transfer mode {transfer} with no " 

1297 "export directory given") 

1298 

1299 # Force the directory to be a URI object 

1300 directoryUri: Optional[ButlerURI] = None 

1301 if directory is not None: 1301 ↛ 1304line 1301 didn't jump to line 1304, because the condition on line 1301 was never false

1302 directoryUri = ButlerURI(directory, forceDirectory=True) 

1303 

1304 if transfer is not None and directoryUri is not None: 1304 ↛ 1309line 1304 didn't jump to line 1309, because the condition on line 1304 was never false

1305 # mypy needs the second test 

1306 if not directoryUri.exists(): 1306 ↛ 1307line 1306 didn't jump to line 1307, because the condition on line 1306 was never true

1307 raise FileNotFoundError(f"Export location {directory} does not exist") 

1308 

1309 for ref in refs: 

1310 fileLocations = self._get_dataset_locations_info(ref) 

1311 if not fileLocations: 1311 ↛ 1312line 1311 didn't jump to line 1312, because the condition on line 1311 was never true

1312 raise FileNotFoundError(f"Could not retrieve dataset {ref}.") 

1313 # For now we can not export disassembled datasets 

1314 if len(fileLocations) > 1: 

1315 raise NotImplementedError(f"Can not export disassembled datasets such as {ref}") 

1316 location, storedFileInfo = fileLocations[0] 

1317 if transfer is None: 1317 ↛ 1320line 1317 didn't jump to line 1320, because the condition on line 1317 was never true

1318 # TODO: do we also need to return the readStorageClass somehow? 

1319 # We will use the path in store directly 

1320 pass 

1321 else: 

1322 # mypy needs help 

1323 assert directoryUri is not None, "directoryUri must be defined to get here" 

1324 storeUri = ButlerURI(location.uri) 

1325 exportUri = directoryUri.join(location.pathInStore) 

1326 exportUri.transfer_from(storeUri, transfer=transfer) 

1327 

1328 yield FileDataset(refs=[ref], path=location.pathInStore, formatter=storedFileInfo.formatter)