Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23"""Generic file-based datastore code.""" 

24 

25__all__ = ("FileLikeDatastore", ) 

26 

27import logging 

28from abc import abstractmethod 

29 

30from sqlalchemy import BigInteger, String 

31 

32from dataclasses import dataclass 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 ClassVar, 

37 Dict, 

38 Iterable, 

39 List, 

40 Mapping, 

41 Optional, 

42 Set, 

43 Tuple, 

44 Type, 

45 Union, 

46) 

47 

48from lsst.daf.butler import ( 

49 ButlerURI, 

50 CompositesMap, 

51 Config, 

52 FileDataset, 

53 DatasetRef, 

54 DatasetType, 

55 DatasetTypeNotSupportedError, 

56 Datastore, 

57 DatastoreConfig, 

58 DatastoreValidationError, 

59 FileDescriptor, 

60 FileTemplates, 

61 FileTemplateValidationError, 

62 Formatter, 

63 FormatterFactory, 

64 Location, 

65 LocationFactory, 

66 StorageClass, 

67 StoredFileInfo, 

68) 

69 

70from lsst.daf.butler import ddl 

71from lsst.daf.butler.registry.interfaces import ( 

72 ReadOnlyDatabaseError, 

73 DatastoreRegistryBridge, 

74) 

75 

76from lsst.daf.butler.core.repoRelocation import replaceRoot 

77from lsst.daf.butler.core.utils import getInstanceOf, getClassOf, transactional 

78from .genericDatastore import GenericBaseDatastore 

79 

80if TYPE_CHECKING: 80 ↛ 81line 80 didn't jump to line 81, because the condition on line 80 was never true

81 from lsst.daf.butler import LookupKey 

82 from lsst.daf.butler.registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager 

83 

84log = logging.getLogger(__name__) 

85 

86# String to use when a Python None is encountered 

87NULLSTR = "__NULL_STRING__" 

88 

89 

90class _IngestPrepData(Datastore.IngestPrepData): 

91 """Helper class for FileLikeDatastore ingest implementation. 

92 

93 Parameters 

94 ---------- 

95 datasets : `list` of `FileDataset` 

96 Files to be ingested by this datastore. 

97 """ 

98 def __init__(self, datasets: List[FileDataset]): 

99 super().__init__(ref for dataset in datasets for ref in dataset.refs) 

100 self.datasets = datasets 

101 

102 

103@dataclass(frozen=True) 

104class DatastoreFileGetInformation: 

105 """Collection of useful parameters needed to retrieve a file from 

106 a Datastore. 

107 """ 

108 

109 location: Location 

110 """The location from which to read the dataset.""" 

111 

112 formatter: Formatter 

113 """The `Formatter` to use to deserialize the dataset.""" 

114 

115 info: StoredFileInfo 

116 """Stored information about this file and its formatter.""" 

117 

118 assemblerParams: Dict[str, Any] 

119 """Parameters to use for post-processing the retrieved dataset.""" 

120 

121 formatterParams: Dict[str, Any] 

122 """Parameters that were understood by the associated formatter.""" 

123 

124 component: Optional[str] 

125 """The component to be retrieved (can be `None`).""" 

126 

127 readStorageClass: StorageClass 

128 """The `StorageClass` of the dataset being read.""" 

129 

130 

131class FileLikeDatastore(GenericBaseDatastore): 

132 """Generic Datastore for file-based implementations. 

133 

134 Should always be sub-classed since key abstract methods are missing. 

135 

136 Parameters 

137 ---------- 

138 config : `DatastoreConfig` or `str` 

139 Configuration as either a `Config` object or URI to file. 

140 bridgeManager : `DatastoreRegistryBridgeManager` 

141 Object that manages the interface between `Registry` and datastores. 

142 butlerRoot : `str`, optional 

143 New datastore root to use to override the configuration value. 

144 

145 Raises 

146 ------ 

147 ValueError 

148 If root location does not exist and ``create`` is `False` in the 

149 configuration. 

150 """ 

151 

152 defaultConfigFile: ClassVar[Optional[str]] = None 

153 """Path to configuration defaults. Accessed within the ``config`` resource 

154 or relative to a search path. Can be None if no defaults specified. 

155 """ 

156 

157 root: str 

158 """Root directory or URI of this `Datastore`.""" 

159 

160 locationFactory: LocationFactory 

161 """Factory for creating locations relative to the datastore root.""" 

162 

163 formatterFactory: FormatterFactory 

164 """Factory for creating instances of formatters.""" 

165 

166 templates: FileTemplates 

167 """File templates that can be used by this `Datastore`.""" 

168 

169 composites: CompositesMap 

170 """Determines whether a dataset should be disassembled on put.""" 

171 

172 @classmethod 

173 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

174 """Set any filesystem-dependent config options for this Datastore to 

175 be appropriate for a new empty repository with the given root. 

176 

177 Parameters 

178 ---------- 

179 root : `str` 

180 URI to the root of the data repository. 

181 config : `Config` 

182 A `Config` to update. Only the subset understood by 

183 this component will be updated. Will not expand 

184 defaults. 

185 full : `Config` 

186 A complete config with all defaults expanded that can be 

187 converted to a `DatastoreConfig`. Read-only and will not be 

188 modified by this method. 

189 Repository-specific options that should not be obtained 

190 from defaults when Butler instances are constructed 

191 should be copied from ``full`` to ``config``. 

192 overwrite : `bool`, optional 

193 If `False`, do not modify a value in ``config`` if the value 

194 already exists. Default is always to overwrite with the provided 

195 ``root``. 

196 

197 Notes 

198 ----- 

199 If a keyword is explicitly defined in the supplied ``config`` it 

200 will not be overridden by this method if ``overwrite`` is `False`. 

201 This allows explicit values set in external configs to be retained. 

202 """ 

203 Config.updateParameters(DatastoreConfig, config, full, 

204 toUpdate={"root": root}, 

205 toCopy=("cls", ("records", "table")), overwrite=overwrite) 

206 

207 @classmethod 

208 def makeTableSpec(cls) -> ddl.TableSpec: 

209 return ddl.TableSpec( 

210 fields=[ 

211 ddl.FieldSpec(name="dataset_id", dtype=BigInteger, primaryKey=True), 

212 ddl.FieldSpec(name="path", dtype=String, length=256, nullable=False), 

213 ddl.FieldSpec(name="formatter", dtype=String, length=128, nullable=False), 

214 ddl.FieldSpec(name="storage_class", dtype=String, length=64, nullable=False), 

215 # Use empty string to indicate no component 

216 ddl.FieldSpec(name="component", dtype=String, length=32, primaryKey=True), 

217 # TODO: should checksum be Base64Bytes instead? 

218 ddl.FieldSpec(name="checksum", dtype=String, length=128, nullable=True), 

219 ddl.FieldSpec(name="file_size", dtype=BigInteger, nullable=True), 

220 ], 

221 unique=frozenset(), 

222 ) 

223 

224 def __init__(self, config: Union[DatastoreConfig, str], 

225 bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None): 

226 super().__init__(config, bridgeManager) 

227 if "root" not in self.config: 227 ↛ 228line 227 didn't jump to line 228, because the condition on line 227 was never true

228 raise ValueError("No root directory specified in configuration") 

229 

230 # Name ourselves either using an explicit name or a name 

231 # derived from the (unexpanded) root 

232 if "name" in self.config: 

233 self.name = self.config["name"] 

234 else: 

235 # We use the unexpanded root in the name to indicate that this 

236 # datastore can be moved without having to update registry. 

237 self.name = "{}@{}".format(type(self).__name__, 

238 self.config["root"]) 

239 

240 # Support repository relocation in config 

241 # Existence of self.root is checked in subclass 

242 self.root = replaceRoot(self.config["root"], butlerRoot) 

243 

244 self.locationFactory = LocationFactory(self.root) 

245 self.formatterFactory = FormatterFactory() 

246 

247 # Now associate formatters with storage classes 

248 self.formatterFactory.registerFormatters(self.config["formatters"], 

249 universe=bridgeManager.universe) 

250 

251 # Read the file naming templates 

252 self.templates = FileTemplates(self.config["templates"], 

253 universe=bridgeManager.universe) 

254 

255 # See if composites should be disassembled 

256 self.composites = CompositesMap(self.config["composites"], 

257 universe=bridgeManager.universe) 

258 

259 tableName = self.config["records", "table"] 

260 try: 

261 # Storage of paths and formatters, keyed by dataset_id 

262 self._table = bridgeManager.opaque.register(tableName, self.makeTableSpec()) 

263 # Interface to Registry. 

264 self._bridge = bridgeManager.register(self.name) 

265 except ReadOnlyDatabaseError: 

266 # If the database is read only and we just tried and failed to 

267 # create a table, it means someone is trying to create a read-only 

268 # butler client for an empty repo. That should be okay, as long 

269 # as they then try to get any datasets before some other client 

270 # creates the table. Chances are they'rejust validating 

271 # configuration. 

272 pass 

273 

274 # Determine whether checksums should be used 

275 self.useChecksum = self.config.get("checksum", True) 

276 

277 def __str__(self) -> str: 

278 return self.root 

279 

280 @property 

281 def bridge(self) -> DatastoreRegistryBridge: 

282 return self._bridge 

283 

284 @abstractmethod 

285 def _artifact_exists(self, location: Location) -> bool: 

286 """Check that an artifact exists in this datastore at the specified 

287 location. 

288 

289 Parameters 

290 ---------- 

291 location : `Location` 

292 Expected location of the artifact associated with this datastore. 

293 

294 Returns 

295 ------- 

296 exists : `bool` 

297 True if the location can be found, false otherwise. 

298 """ 

299 raise NotImplementedError() 

300 

301 @abstractmethod 

302 def _delete_artifact(self, location: Location) -> None: 

303 """Delete the artifact from the datastore. 

304 

305 Parameters 

306 ---------- 

307 location : `Location` 

308 Location of the artifact associated with this datastore. 

309 """ 

310 raise NotImplementedError() 

311 

312 def addStoredItemInfo(self, refs: Iterable[DatasetRef], infos: Iterable[StoredFileInfo]) -> None: 

313 # Docstring inherited from GenericBaseDatastore 

314 records = [] 

315 for ref, info in zip(refs, infos): 

316 # Component should come from ref and fall back on info 

317 component = ref.datasetType.component() 

318 if component is None and info.component is not None: 318 ↛ 319line 318 didn't jump to line 319, because the condition on line 318 was never true

319 component = info.component 

320 if component is None: 

321 # Use empty string since we want this to be part of the 

322 # primary key. 

323 component = NULLSTR 

324 records.append( 

325 dict(dataset_id=ref.id, formatter=info.formatter, path=info.path, 

326 storage_class=info.storageClass.name, component=component, 

327 checksum=info.checksum, file_size=info.file_size) 

328 ) 

329 self._table.insert(*records) 

330 

331 def getStoredItemsInfo(self, ref: DatasetIdRef) -> List[StoredFileInfo]: 

332 # Docstring inherited from GenericBaseDatastore 

333 

334 # Look for the dataset_id -- there might be multiple matches 

335 # if we have disassembled the dataset. 

336 records = list(self._table.fetch(dataset_id=ref.id)) 

337 

338 results = [] 

339 for record in records: 

340 # Convert name of StorageClass to instance 

341 storageClass = self.storageClassFactory.getStorageClass(record["storage_class"]) 

342 component = record["component"] if (record["component"] 

343 and record["component"] != NULLSTR) else None 

344 

345 info = StoredFileInfo(formatter=record["formatter"], 

346 path=record["path"], 

347 storageClass=storageClass, 

348 component=component, 

349 checksum=record["checksum"], 

350 file_size=record["file_size"]) 

351 results.append(info) 

352 

353 return results 

354 

355 def _registered_refs_per_artifact(self, pathInStore: str) -> Set[int]: 

356 """Return all dataset refs associated with the supplied path. 

357 

358 Parameters 

359 ---------- 

360 pathInStore : `str` 

361 Path of interest in the data store. 

362 

363 Returns 

364 ------- 

365 ids : `set` of `int` 

366 All `DatasetRef` IDs associated with this path. 

367 """ 

368 records = list(self._table.fetch(path=pathInStore)) 

369 ids = {r["dataset_id"] for r in records} 

370 return ids 

371 

372 def removeStoredItemInfo(self, ref: DatasetIdRef) -> None: 

373 # Docstring inherited from GenericBaseDatastore 

374 self._table.delete(dataset_id=ref.id) 

375 

376 def _get_dataset_locations_info(self, ref: DatasetIdRef) -> List[Tuple[Location, StoredFileInfo]]: 

377 r"""Find all the `Location`\ s of the requested dataset in the 

378 `Datastore` and the associated stored file information. 

379 

380 Parameters 

381 ---------- 

382 ref : `DatasetRef` 

383 Reference to the required `Dataset`. 

384 

385 Returns 

386 ------- 

387 results : `list` [`tuple` [`Location`, `StoredFileInfo` ]] 

388 Location of the dataset within the datastore and 

389 stored information about each file and its formatter. 

390 """ 

391 # Get the file information (this will fail if no file) 

392 records = self.getStoredItemsInfo(ref) 

393 

394 # Use the path to determine the location 

395 return [(self.locationFactory.fromPath(r.path), r) for r in records] 

396 

397 def _can_remove_dataset_artifact(self, ref: DatasetIdRef, location: Location) -> bool: 

398 """Check that there is only one dataset associated with the 

399 specified artifact. 

400 

401 Parameters 

402 ---------- 

403 ref : `DatasetRef` or `FakeDatasetRef` 

404 Dataset to be removed. 

405 location : `Location` 

406 The location of the artifact to be removed. 

407 

408 Returns 

409 ------- 

410 can_remove : `Bool` 

411 True if the artifact can be safely removed. 

412 """ 

413 

414 # Get all entries associated with this path 

415 allRefs = self._registered_refs_per_artifact(location.pathInStore) 

416 if not allRefs: 416 ↛ 417line 416 didn't jump to line 417, because the condition on line 416 was never true

417 raise RuntimeError(f"Datastore inconsistency error. {location.pathInStore} not in registry") 

418 

419 # Remove these refs from all the refs and if there is nothing left 

420 # then we can delete 

421 remainingRefs = allRefs - {ref.id} 

422 

423 if remainingRefs: 

424 return False 

425 return True 

426 

427 def _prepare_for_get(self, ref: DatasetRef, 

428 parameters: Optional[Mapping[str, Any]] = None) -> List[DatastoreFileGetInformation]: 

429 """Check parameters for ``get`` and obtain formatter and 

430 location. 

431 

432 Parameters 

433 ---------- 

434 ref : `DatasetRef` 

435 Reference to the required Dataset. 

436 parameters : `dict` 

437 `StorageClass`-specific parameters that specify, for example, 

438 a slice of the dataset to be loaded. 

439 

440 Returns 

441 ------- 

442 getInfo : `list` [`DatastoreFileGetInformation`] 

443 Parameters needed to retrieve each file. 

444 """ 

445 log.debug("Retrieve %s from %s with parameters %s", ref, self.name, parameters) 

446 

447 # Get file metadata and internal metadata 

448 fileLocations = self._get_dataset_locations_info(ref) 

449 if not fileLocations: 

450 raise FileNotFoundError(f"Could not retrieve dataset {ref}.") 

451 

452 # The storage class we want to use eventually 

453 refStorageClass = ref.datasetType.storageClass 

454 

455 if len(fileLocations) > 1: 

456 disassembled = True 

457 else: 

458 disassembled = False 

459 

460 # Is this a component request? 

461 refComponent = ref.datasetType.component() 

462 

463 fileGetInfo = [] 

464 for location, storedFileInfo in fileLocations: 

465 

466 # The storage class used to write the file 

467 writeStorageClass = storedFileInfo.storageClass 

468 

469 # If this has been disassembled we need read to match the write 

470 if disassembled: 

471 readStorageClass = writeStorageClass 

472 else: 

473 readStorageClass = refStorageClass 

474 

475 formatter = getInstanceOf(storedFileInfo.formatter, 

476 FileDescriptor(location, readStorageClass=readStorageClass, 

477 storageClass=writeStorageClass, parameters=parameters), 

478 ref.dataId) 

479 

480 formatterParams, notFormatterParams = formatter.segregateParameters() 

481 

482 # Of the remaining parameters, extract the ones supported by 

483 # this StorageClass (for components not all will be handled) 

484 assemblerParams = readStorageClass.filterParameters(notFormatterParams) 

485 

486 # The ref itself could be a component if the dataset was 

487 # disassembled by butler, or we disassembled in datastore and 

488 # components came from the datastore records 

489 component = storedFileInfo.component if storedFileInfo.component else refComponent 

490 

491 fileGetInfo.append(DatastoreFileGetInformation(location, formatter, storedFileInfo, 

492 assemblerParams, formatterParams, 

493 component, readStorageClass)) 

494 

495 return fileGetInfo 

496 

497 def _prepare_for_put(self, inMemoryDataset: Any, ref: DatasetRef) -> Tuple[Location, Formatter]: 

498 """Check the arguments for ``put`` and obtain formatter and 

499 location. 

500 

501 Parameters 

502 ---------- 

503 inMemoryDataset : `object` 

504 The dataset to store. 

505 ref : `DatasetRef` 

506 Reference to the associated Dataset. 

507 

508 Returns 

509 ------- 

510 location : `Location` 

511 The location to write the dataset. 

512 formatter : `Formatter` 

513 The `Formatter` to use to write the dataset. 

514 

515 Raises 

516 ------ 

517 TypeError 

518 Supplied object and storage class are inconsistent. 

519 DatasetTypeNotSupportedError 

520 The associated `DatasetType` is not handled by this datastore. 

521 """ 

522 self._validate_put_parameters(inMemoryDataset, ref) 

523 

524 # Work out output file name 

525 try: 

526 template = self.templates.getTemplate(ref) 

527 except KeyError as e: 

528 raise DatasetTypeNotSupportedError(f"Unable to find template for {ref}") from e 

529 

530 # Validate the template to protect against filenames from different 

531 # dataIds returning the same and causing overwrite confusion. 

532 template.validateTemplate(ref) 

533 

534 location = self.locationFactory.fromPath(template.format(ref)) 

535 

536 # Get the formatter based on the storage class 

537 storageClass = ref.datasetType.storageClass 

538 try: 

539 formatter = self.formatterFactory.getFormatter(ref, 

540 FileDescriptor(location, 

541 storageClass=storageClass), 

542 ref.dataId) 

543 except KeyError as e: 

544 raise DatasetTypeNotSupportedError(f"Unable to find formatter for {ref} in datastore " 

545 f"{self.name}") from e 

546 

547 # Now that we know the formatter, update the location 

548 location = formatter.makeUpdatedLocation(location) 

549 

550 return location, formatter 

551 

552 @abstractmethod 

553 def _standardizeIngestPath(self, path: str, *, transfer: Optional[str] = None) -> str: 

554 """Standardize the path of a to-be-ingested file. 

555 

556 Parameters 

557 ---------- 

558 path : `str` 

559 Path of a file to be ingested. 

560 transfer : `str`, optional 

561 How (and whether) the dataset should be added to the datastore. 

562 See `ingest` for details of transfer modes. 

563 This implementation is provided only so 

564 `NotImplementedError` can be raised if the mode is not supported; 

565 actual transfers are deferred to `_extractIngestInfo`. 

566 

567 Returns 

568 ------- 

569 path : `str` 

570 New path in what the datastore considers standard form. 

571 

572 Notes 

573 ----- 

574 Subclasses of `FileLikeDatastore` should implement this method instead 

575 of `_prepIngest`. It should not modify the data repository or given 

576 file in any way. 

577 

578 Raises 

579 ------ 

580 NotImplementedError 

581 Raised if the datastore does not support the given transfer mode 

582 (including the case where ingest is not supported at all). 

583 FileNotFoundError 

584 Raised if one of the given files does not exist. 

585 """ 

586 raise NotImplementedError("Must be implemented by subclasses.") 

587 

588 @abstractmethod 

589 def _extractIngestInfo(self, path: Union[str, ButlerURI], ref: DatasetRef, *, 

590 formatter: Union[Formatter, Type[Formatter]], 

591 transfer: Optional[str] = None) -> StoredFileInfo: 

592 """Relocate (if necessary) and extract `StoredFileInfo` from a 

593 to-be-ingested file. 

594 

595 Parameters 

596 ---------- 

597 path : `str` or `ButlerURI` 

598 URI or path of a file to be ingested. 

599 ref : `DatasetRef` 

600 Reference for the dataset being ingested. Guaranteed to have 

601 ``dataset_id not None`. 

602 formatter : `type` or `Formatter` 

603 `Formatter` subclass to use for this dataset or an instance. 

604 transfer : `str`, optional 

605 How (and whether) the dataset should be added to the datastore. 

606 See `ingest` for details of transfer modes. 

607 

608 Returns 

609 ------- 

610 info : `StoredFileInfo` 

611 Internal datastore record for this file. This will be inserted by 

612 the caller; the `_extractIngestInfo` is only resposible for 

613 creating and populating the struct. 

614 

615 Raises 

616 ------ 

617 FileNotFoundError 

618 Raised if one of the given files does not exist. 

619 FileExistsError 

620 Raised if transfer is not `None` but the (internal) location the 

621 file would be moved to is already occupied. 

622 """ 

623 raise NotImplementedError("Must be implemented by subclasses.") 

624 

625 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> _IngestPrepData: 

626 # Docstring inherited from Datastore._prepIngest. 

627 filtered = [] 

628 for dataset in datasets: 

629 acceptable = [ref for ref in dataset.refs if self.constraints.isAcceptable(ref)] 

630 if not acceptable: 

631 continue 

632 else: 

633 dataset.refs = acceptable 

634 if dataset.formatter is None: 

635 dataset.formatter = self.formatterFactory.getFormatterClass(dataset.refs[0]) 

636 else: 

637 assert isinstance(dataset.formatter, (type, str)) 

638 dataset.formatter = getClassOf(dataset.formatter) 

639 dataset.path = self._standardizeIngestPath(dataset.path, transfer=transfer) 

640 filtered.append(dataset) 

641 return _IngestPrepData(filtered) 

642 

643 @transactional 

644 def _finishIngest(self, prepData: Datastore.IngestPrepData, *, transfer: Optional[str] = None) -> None: 

645 # Docstring inherited from Datastore._finishIngest. 

646 refsAndInfos = [] 

647 for dataset in prepData.datasets: 

648 # Do ingest as if the first dataset ref is associated with the file 

649 info = self._extractIngestInfo(dataset.path, dataset.refs[0], formatter=dataset.formatter, 

650 transfer=transfer) 

651 refsAndInfos.extend([(ref, info) for ref in dataset.refs]) 

652 self._register_datasets(refsAndInfos) 

653 

654 def _calculate_ingested_datastore_name(self, srcUri: ButlerURI, ref: DatasetRef, 

655 formatter: Union[Formatter, Type[Formatter]]) -> Location: 

656 """Given a source URI and a DatasetRef, determine the name the 

657 dataset will have inside datastore. 

658 

659 Parameters 

660 ---------- 

661 srcUri : `ButlerURI` 

662 URI to the source dataset file. 

663 ref : `DatasetRef` 

664 Ref associated with the newly-ingested dataset artifact. This 

665 is used to determine the name within the datastore. 

666 formatter : `Formatter` or Formatter class. 

667 Formatter to use for validation. Can be a class or an instance. 

668 

669 Returns 

670 ------- 

671 location : `Location` 

672 Target location for the newly-ingested dataset. 

673 """ 

674 # Ingesting a file from outside the datastore. 

675 # This involves a new name. 

676 template = self.templates.getTemplate(ref) 

677 location = self.locationFactory.fromPath(template.format(ref)) 

678 

679 # Get the extension 

680 ext = srcUri.getExtension() 

681 

682 # Update the destination to include that extension 

683 location.updateExtension(ext) 

684 

685 # Ask the formatter to validate this extension 

686 formatter.validateExtension(location) 

687 

688 return location 

689 

690 @abstractmethod 

691 def _write_in_memory_to_artifact(self, inMemoryDataset: Any, ref: DatasetRef) -> StoredFileInfo: 

692 """Write out in memory dataset to datastore. 

693 

694 Parameters 

695 ---------- 

696 inMemoryDataset : `object` 

697 Dataset to write to datastore. 

698 ref : `DatasetRef` 

699 Registry information associated with this dataset. 

700 

701 Returns 

702 ------- 

703 info : `StoredFileInfo` 

704 Information describin the artifact written to the datastore. 

705 """ 

706 raise NotImplementedError() 

707 

708 @abstractmethod 

709 def _read_artifact_into_memory(self, getInfo: DatastoreFileGetInformation, 

710 ref: DatasetRef, isComponent: bool = False) -> Any: 

711 """Read the artifact from datastore into in memory object. 

712 

713 Parameters 

714 ---------- 

715 getInfo : `DatastoreFileGetInformation` 

716 Information about the artifact within the datastore. 

717 ref : `DatasetRef` 

718 The registry information associated with this artifact. 

719 isComponent : `bool` 

720 Flag to indicate if a component is being read from this artifact. 

721 

722 Returns 

723 ------- 

724 inMemoryDataset : `object` 

725 The artifact as a python object. 

726 """ 

727 raise NotImplementedError() 

728 

729 def exists(self, ref: DatasetRef) -> bool: 

730 """Check if the dataset exists in the datastore. 

731 

732 Parameters 

733 ---------- 

734 ref : `DatasetRef` 

735 Reference to the required dataset. 

736 

737 Returns 

738 ------- 

739 exists : `bool` 

740 `True` if the entity exists in the `Datastore`. 

741 """ 

742 fileLocations = self._get_dataset_locations_info(ref) 

743 if not fileLocations: 

744 return False 

745 for location, _ in fileLocations: 

746 if not self._artifact_exists(location): 

747 return False 

748 

749 return True 

750 

751 def getURIs(self, ref: DatasetRef, 

752 predict: bool = False) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]: 

753 """Return URIs associated with dataset. 

754 

755 Parameters 

756 ---------- 

757 ref : `DatasetRef` 

758 Reference to the required dataset. 

759 predict : `bool`, optional 

760 If the datastore does not know about the dataset, should it 

761 return a predicted URI or not? 

762 

763 Returns 

764 ------- 

765 primary : `ButlerURI` 

766 The URI to the primary artifact associated with this dataset. 

767 If the dataset was disassembled within the datastore this 

768 may be `None`. 

769 components : `dict` 

770 URIs to any components associated with the dataset artifact. 

771 Can be empty if there are no components. 

772 """ 

773 

774 primary: Optional[ButlerURI] = None 

775 components: Dict[str, ButlerURI] = {} 

776 

777 # if this has never been written then we have to guess 

778 if not self.exists(ref): 

779 if not predict: 

780 raise FileNotFoundError("Dataset {} not in this datastore".format(ref)) 

781 

782 def predictLocation(thisRef: DatasetRef) -> Location: 

783 template = self.templates.getTemplate(thisRef) 

784 location = self.locationFactory.fromPath(template.format(thisRef)) 

785 storageClass = ref.datasetType.storageClass 

786 formatter = self.formatterFactory.getFormatter(thisRef, 

787 FileDescriptor(location, 

788 storageClass=storageClass)) 

789 # Try to use the extension attribute but ignore problems if the 

790 # formatter does not define one. 

791 try: 

792 location = formatter.makeUpdatedLocation(location) 

793 except Exception: 

794 # Use the default extension 

795 pass 

796 return location 

797 

798 doDisassembly = self.composites.shouldBeDisassembled(ref) 

799 

800 if doDisassembly: 

801 

802 for component, componentStorage in ref.datasetType.storageClass.components.items(): 

803 compRef = ref.makeComponentRef(component) 

804 compLocation = predictLocation(compRef) 

805 

806 # Add a URI fragment to indicate this is a guess 

807 components[component] = ButlerURI(compLocation.uri.geturl() + "#predicted") 

808 

809 else: 

810 

811 location = predictLocation(ref) 

812 

813 # Add a URI fragment to indicate this is a guess 

814 primary = ButlerURI(location.uri.geturl() + "#predicted") 

815 

816 return primary, components 

817 

818 # If this is a ref that we have written we can get the path. 

819 # Get file metadata and internal metadata 

820 fileLocations = self._get_dataset_locations_info(ref) 

821 

822 if not fileLocations: 822 ↛ 823line 822 didn't jump to line 823, because the condition on line 822 was never true

823 raise RuntimeError(f"Unexpectedly got no artifacts for dataset {ref}") 

824 

825 if len(fileLocations) == 1: 

826 # No disassembly so this is the primary URI 

827 primary = ButlerURI(fileLocations[0][0].uri) 

828 

829 else: 

830 for location, storedFileInfo in fileLocations: 

831 if storedFileInfo.component is None: 831 ↛ 832line 831 didn't jump to line 832, because the condition on line 831 was never true

832 raise RuntimeError(f"Unexpectedly got no component name for a component at {location}") 

833 components[storedFileInfo.component] = ButlerURI(location.uri) 

834 

835 return primary, components 

836 

837 def getURI(self, ref: DatasetRef, predict: bool = False) -> ButlerURI: 

838 """URI to the Dataset. 

839 

840 Parameters 

841 ---------- 

842 ref : `DatasetRef` 

843 Reference to the required Dataset. 

844 predict : `bool` 

845 If `True`, allow URIs to be returned of datasets that have not 

846 been written. 

847 

848 Returns 

849 ------- 

850 uri : `str` 

851 URI pointing to the dataset within the datastore. If the 

852 dataset does not exist in the datastore, and if ``predict`` is 

853 `True`, the URI will be a prediction and will include a URI 

854 fragment "#predicted". 

855 If the datastore does not have entities that relate well 

856 to the concept of a URI the returned URI will be 

857 descriptive. The returned URI is not guaranteed to be obtainable. 

858 

859 Raises 

860 ------ 

861 FileNotFoundError 

862 Raised if a URI has been requested for a dataset that does not 

863 exist and guessing is not allowed. 

864 RuntimeError 

865 Raised if a request is made for a single URI but multiple URIs 

866 are associated with this dataset. 

867 

868 Notes 

869 ----- 

870 When a predicted URI is requested an attempt will be made to form 

871 a reasonable URI based on file templates and the expected formatter. 

872 """ 

873 primary, components = self.getURIs(ref, predict) 

874 if primary is None or components: 874 ↛ 875line 874 didn't jump to line 875, because the condition on line 874 was never true

875 raise RuntimeError(f"Dataset ({ref}) includes distinct URIs for components. " 

876 "Use Dataastore.getURIs() instead.") 

877 return primary 

878 

879 def get(self, ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None) -> Any: 

880 """Load an InMemoryDataset from the store. 

881 

882 Parameters 

883 ---------- 

884 ref : `DatasetRef` 

885 Reference to the required Dataset. 

886 parameters : `dict` 

887 `StorageClass`-specific parameters that specify, for example, 

888 a slice of the dataset to be loaded. 

889 

890 Returns 

891 ------- 

892 inMemoryDataset : `object` 

893 Requested dataset or slice thereof as an InMemoryDataset. 

894 

895 Raises 

896 ------ 

897 FileNotFoundError 

898 Requested dataset can not be retrieved. 

899 TypeError 

900 Return value from formatter has unexpected type. 

901 ValueError 

902 Formatter failed to process the dataset. 

903 """ 

904 allGetInfo = self._prepare_for_get(ref, parameters) 

905 refComponent = ref.datasetType.component() 

906 

907 # Supplied storage class for the component being read 

908 refStorageClass = ref.datasetType.storageClass 

909 

910 # Create mapping from component name to related info 

911 allComponents = {i.component: i for i in allGetInfo} 

912 

913 # By definition the dataset is disassembled if we have more 

914 # than one record for it. 

915 isDisassembled = len(allGetInfo) > 1 

916 

917 # Look for the special case where we are disassembled but the 

918 # component is a read-only component that was not written during 

919 # disassembly. For this scenario we need to check that the 

920 # component requested is listed as a read-only component for the 

921 # composite storage class 

922 isDisassembledReadOnlyComponent = False 

923 if isDisassembled and refComponent: 

924 # The composite storage class should be accessible through 

925 # the component dataset type 

926 compositeStorageClass = ref.datasetType.parentStorageClass 

927 

928 # In the unlikely scenario where the composite storage 

929 # class is not known, we can only assume that this is a 

930 # normal component. If that assumption is wrong then the 

931 # branch below that reads a persisted component will fail 

932 # so there is no need to complain here. 

933 if compositeStorageClass is not None: 933 ↛ 936line 933 didn't jump to line 936, because the condition on line 933 was never false

934 isDisassembledReadOnlyComponent = refComponent in compositeStorageClass.readComponents 

935 

936 if isDisassembled and not refComponent: 

937 # This was a disassembled dataset spread over multiple files 

938 # and we need to put them all back together again. 

939 # Read into memory and then assemble 

940 

941 # Check that the supplied parameters are suitable for the type read 

942 refStorageClass.validateParameters(parameters) 

943 

944 # We want to keep track of all the parameters that were not used 

945 # by formatters. We assume that if any of the component formatters 

946 # use a parameter that we do not need to apply it again in the 

947 # assembler. 

948 usedParams = set() 

949 

950 components: Dict[str, Any] = {} 

951 for getInfo in allGetInfo: 

952 # assemblerParams are parameters not understood by the 

953 # associated formatter. 

954 usedParams.update(set(getInfo.formatterParams)) 

955 

956 component = getInfo.component 

957 

958 if component is None: 958 ↛ 959line 958 didn't jump to line 959, because the condition on line 958 was never true

959 raise RuntimeError(f"Internal error in datastore assembly of {ref}") 

960 

961 # We do not want the formatter to think it's reading 

962 # a component though because it is really reading a 

963 # standalone dataset -- always tell reader it is not a 

964 # component. 

965 components[component] = self._read_artifact_into_memory(getInfo, ref, isComponent=False) 

966 

967 inMemoryDataset = ref.datasetType.storageClass.assembler().assemble(components) 

968 

969 # Any unused parameters will have to be passed to the assembler 

970 if parameters: 

971 unusedParams = {k: v for k, v in parameters.items() if k not in usedParams} 

972 else: 

973 unusedParams = {} 

974 

975 # Process parameters 

976 return ref.datasetType.storageClass.assembler().handleParameters(inMemoryDataset, 

977 parameters=unusedParams) 

978 

979 elif isDisassembledReadOnlyComponent: 

980 

981 compositeStorageClass = ref.datasetType.parentStorageClass 

982 if compositeStorageClass is None: 982 ↛ 983line 982 didn't jump to line 983, because the condition on line 982 was never true

983 raise RuntimeError(f"Unable to retrieve read-only component '{refComponent}' since" 

984 "no composite storage class is available.") 

985 

986 if refComponent is None: 986 ↛ 988line 986 didn't jump to line 988, because the condition on line 986 was never true

987 # Mainly for mypy 

988 raise RuntimeError(f"Internal error in datastore {self.name}: component can not be None here") 

989 

990 # Assume that every read-only component can be calculated by 

991 # forwarding the request to a single read/write component. 

992 # Rather than guessing which rw component is the right one by 

993 # scanning each for a read-only component of the same name, 

994 # we ask the composite assembler directly which one is best to 

995 # use. 

996 compositeAssembler = compositeStorageClass.assembler() 

997 forwardedComponent = compositeAssembler.selectResponsibleComponent(refComponent, 

998 set(allComponents)) 

999 

1000 # Select the relevant component 

1001 rwInfo = allComponents[forwardedComponent] 

1002 

1003 # For now assume that read parameters are validated against 

1004 # the real component and not the requested component 

1005 forwardedStorageClass = rwInfo.formatter.fileDescriptor.readStorageClass 

1006 forwardedStorageClass.validateParameters(parameters) 

1007 

1008 # Unfortunately the FileDescriptor inside the formatter will have 

1009 # the wrong write storage class so we need to create a new one 

1010 # given the immutability constraint. 

1011 writeStorageClass = rwInfo.info.storageClass 

1012 

1013 # We may need to put some thought into parameters for read 

1014 # components but for now forward them on as is 

1015 readFormatter = type(rwInfo.formatter)(FileDescriptor(rwInfo.location, 

1016 readStorageClass=refStorageClass, 

1017 storageClass=writeStorageClass, 

1018 parameters=parameters), 

1019 ref.dataId) 

1020 

1021 # The assembler can not receive any parameter requests for a 

1022 # read-only component at this time since the assembler will 

1023 # see the storage class of the read-only component and those 

1024 # parameters will have to be handled by the formatter on the 

1025 # forwarded storage class. 

1026 assemblerParams: Dict[str, Any] = {} 

1027 

1028 # Need to created a new info that specifies the read-only 

1029 # component and associated storage class 

1030 readInfo = DatastoreFileGetInformation(rwInfo.location, readFormatter, 

1031 rwInfo.info, assemblerParams, {}, 

1032 refComponent, refStorageClass) 

1033 

1034 return self._read_artifact_into_memory(readInfo, ref, isComponent=True) 

1035 

1036 else: 

1037 # Single file request or component from that composite file 

1038 for lookup in (refComponent, None): 1038 ↛ 1043line 1038 didn't jump to line 1043, because the loop on line 1038 didn't complete

1039 if lookup in allComponents: 1039 ↛ 1038line 1039 didn't jump to line 1038, because the condition on line 1039 was never false

1040 getInfo = allComponents[lookup] 

1041 break 

1042 else: 

1043 raise FileNotFoundError(f"Component {refComponent} not found " 

1044 f"for ref {ref} in datastore {self.name}") 

1045 

1046 # Do not need the component itself if already disassembled 

1047 if isDisassembled: 

1048 isComponent = False 

1049 else: 

1050 isComponent = getInfo.component is not None 

1051 

1052 # For a disassembled component we can validate parametersagainst 

1053 # the component storage class directly 

1054 if isDisassembled: 

1055 refStorageClass.validateParameters(parameters) 

1056 else: 

1057 # For an assembled composite this could be a read-only 

1058 # component derived from a real component. The validity 

1059 # of the parameters is not clear. For now validate against 

1060 # the composite storage class 

1061 getInfo.formatter.fileDescriptor.storageClass.validateParameters(parameters) 

1062 

1063 return self._read_artifact_into_memory(getInfo, ref, isComponent=isComponent) 

1064 

1065 @transactional 

1066 def put(self, inMemoryDataset: Any, ref: DatasetRef) -> None: 

1067 """Write a InMemoryDataset with a given `DatasetRef` to the store. 

1068 

1069 Parameters 

1070 ---------- 

1071 inMemoryDataset : `object` 

1072 The dataset to store. 

1073 ref : `DatasetRef` 

1074 Reference to the associated Dataset. 

1075 

1076 Raises 

1077 ------ 

1078 TypeError 

1079 Supplied object and storage class are inconsistent. 

1080 DatasetTypeNotSupportedError 

1081 The associated `DatasetType` is not handled by this datastore. 

1082 

1083 Notes 

1084 ----- 

1085 If the datastore is configured to reject certain dataset types it 

1086 is possible that the put will fail and raise a 

1087 `DatasetTypeNotSupportedError`. The main use case for this is to 

1088 allow `ChainedDatastore` to put to multiple datastores without 

1089 requiring that every datastore accepts the dataset. 

1090 """ 

1091 

1092 doDisassembly = self.composites.shouldBeDisassembled(ref) 

1093 # doDisassembly = True 

1094 

1095 artifacts = [] 

1096 if doDisassembly: 

1097 components = ref.datasetType.storageClass.assembler().disassemble(inMemoryDataset) 

1098 for component, componentInfo in components.items(): 

1099 # Don't recurse because we want to take advantage of 

1100 # bulk insert -- need a new DatasetRef that refers to the 

1101 # same dataset_id but has the component DatasetType 

1102 # DatasetType does not refer to the types of components 

1103 # So we construct one ourselves. 

1104 compRef = ref.makeComponentRef(component) 

1105 storedInfo = self._write_in_memory_to_artifact(componentInfo.component, compRef) 

1106 artifacts.append((compRef, storedInfo)) 

1107 else: 

1108 # Write the entire thing out 

1109 storedInfo = self._write_in_memory_to_artifact(inMemoryDataset, ref) 

1110 artifacts.append((ref, storedInfo)) 

1111 

1112 self._register_datasets(artifacts) 

1113 

1114 @transactional 

1115 def trash(self, ref: DatasetRef, ignore_errors: bool = True) -> None: 

1116 """Indicate to the datastore that a dataset can be removed. 

1117 

1118 Parameters 

1119 ---------- 

1120 ref : `DatasetRef` 

1121 Reference to the required Dataset. 

1122 ignore_errors : `bool` 

1123 If `True` return without error even if something went wrong. 

1124 Problems could occur if another process is simultaneously trying 

1125 to delete. 

1126 

1127 Raises 

1128 ------ 

1129 FileNotFoundError 

1130 Attempt to remove a dataset that does not exist. 

1131 """ 

1132 # Get file metadata and internal metadata 

1133 log.debug("Trashing %s in datastore %s", ref, self.name) 

1134 

1135 fileLocations = self._get_dataset_locations_info(ref) 

1136 

1137 if not fileLocations: 

1138 err_msg = f"Requested dataset to trash ({ref}) is not known to datastore {self.name}" 

1139 if ignore_errors: 

1140 log.warning(err_msg) 

1141 return 

1142 else: 

1143 raise FileNotFoundError(err_msg) 

1144 

1145 for location, storedFileInfo in fileLocations: 

1146 if not self._artifact_exists(location): 1146 ↛ 1147line 1146 didn't jump to line 1147, because the condition on line 1146 was never true

1147 err_msg = f"Dataset is known to datastore {self.name} but " \ 

1148 f"associated artifact ({location.uri}) is missing" 

1149 if ignore_errors: 

1150 log.warning(err_msg) 

1151 return 

1152 else: 

1153 raise FileNotFoundError(err_msg) 

1154 

1155 # Mark dataset as trashed 

1156 try: 

1157 self._move_to_trash_in_registry(ref) 

1158 except Exception as e: 

1159 if ignore_errors: 

1160 log.warning(f"Attempted to mark dataset ({ref}) to be trashed in datastore {self.name} " 

1161 f"but encountered an error: {e}") 

1162 pass 

1163 else: 

1164 raise 

1165 

1166 @transactional 

1167 def emptyTrash(self, ignore_errors: bool = True) -> None: 

1168 """Remove all datasets from the trash. 

1169 

1170 Parameters 

1171 ---------- 

1172 ignore_errors : `bool` 

1173 If `True` return without error even if something went wrong. 

1174 Problems could occur if another process is simultaneously trying 

1175 to delete. 

1176 """ 

1177 log.debug("Emptying trash in datastore %s", self.name) 

1178 # Context manager will empty trash iff we finish it without raising. 

1179 with self.bridge.emptyTrash() as trashed: 

1180 for ref in trashed: 

1181 fileLocations = self._get_dataset_locations_info(ref) 

1182 

1183 if not fileLocations: 1183 ↛ 1184line 1183 didn't jump to line 1184, because the condition on line 1183 was never true

1184 err_msg = f"Requested dataset ({ref}) does not exist in datastore {self.name}" 

1185 if ignore_errors: 

1186 log.warning(err_msg) 

1187 continue 

1188 else: 

1189 raise FileNotFoundError(err_msg) 

1190 

1191 for location, _ in fileLocations: 

1192 

1193 if not self._artifact_exists(location): 1193 ↛ 1194line 1193 didn't jump to line 1194, because the condition on line 1193 was never true

1194 err_msg = f"Dataset {location.uri} no longer present in datastore {self.name}" 

1195 if ignore_errors: 

1196 log.warning(err_msg) 

1197 continue 

1198 else: 

1199 raise FileNotFoundError(err_msg) 

1200 

1201 # Can only delete the artifact if there are no references 

1202 # to the file from untrashed dataset refs. 

1203 if self._can_remove_dataset_artifact(ref, location): 

1204 # Point of no return for this artifact 

1205 log.debug("Removing artifact %s from datastore %s", location.uri, self.name) 

1206 try: 

1207 self._delete_artifact(location) 

1208 except Exception as e: 

1209 if ignore_errors: 

1210 log.critical("Encountered error removing artifact %s from datastore %s: %s", 

1211 location.uri, self.name, e) 

1212 else: 

1213 raise 

1214 

1215 # Now must remove the entry from the internal registry even if 

1216 # the artifact removal failed and was ignored, 

1217 # otherwise the removal check above will never be true 

1218 try: 

1219 # There may be multiple rows associated with this ref 

1220 # depending on disassembly 

1221 self.removeStoredItemInfo(ref) 

1222 except Exception as e: 

1223 if ignore_errors: 

1224 log.warning("Error removing dataset %s (%s) from internal registry of %s: %s", 

1225 ref.id, location.uri, self.name, e) 

1226 continue 

1227 else: 

1228 raise FileNotFoundError(err_msg) 

1229 

1230 def validateConfiguration(self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], 

1231 logFailures: bool = False) -> None: 

1232 """Validate some of the configuration for this datastore. 

1233 

1234 Parameters 

1235 ---------- 

1236 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

1237 Entities to test against this configuration. Can be differing 

1238 types. 

1239 logFailures : `bool`, optional 

1240 If `True`, output a log message for every validation error 

1241 detected. 

1242 

1243 Raises 

1244 ------ 

1245 DatastoreValidationError 

1246 Raised if there is a validation problem with a configuration. 

1247 All the problems are reported in a single exception. 

1248 

1249 Notes 

1250 ----- 

1251 This method checks that all the supplied entities have valid file 

1252 templates and also have formatters defined. 

1253 """ 

1254 

1255 templateFailed = None 

1256 try: 

1257 self.templates.validateTemplates(entities, logFailures=logFailures) 

1258 except FileTemplateValidationError as e: 

1259 templateFailed = str(e) 

1260 

1261 formatterFailed = [] 

1262 for entity in entities: 

1263 try: 

1264 self.formatterFactory.getFormatterClass(entity) 

1265 except KeyError as e: 

1266 formatterFailed.append(str(e)) 

1267 if logFailures: 1267 ↛ 1262line 1267 didn't jump to line 1262, because the condition on line 1267 was never false

1268 log.fatal("Formatter failure: %s", e) 

1269 

1270 if templateFailed or formatterFailed: 

1271 messages = [] 

1272 if templateFailed: 1272 ↛ 1273line 1272 didn't jump to line 1273, because the condition on line 1272 was never true

1273 messages.append(templateFailed) 

1274 if formatterFailed: 1274 ↛ 1276line 1274 didn't jump to line 1276, because the condition on line 1274 was never false

1275 messages.append(",".join(formatterFailed)) 

1276 msg = ";\n".join(messages) 

1277 raise DatastoreValidationError(msg) 

1278 

1279 def getLookupKeys(self) -> Set[LookupKey]: 

1280 # Docstring is inherited from base class 

1281 return self.templates.getLookupKeys() | self.formatterFactory.getLookupKeys() | \ 

1282 self.constraints.getLookupKeys() 

1283 

1284 def validateKey(self, lookupKey: LookupKey, 

1285 entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

1286 # Docstring is inherited from base class 

1287 # The key can be valid in either formatters or templates so we can 

1288 # only check the template if it exists 

1289 if lookupKey in self.templates: 

1290 try: 

1291 self.templates[lookupKey].validateTemplate(entity) 

1292 except FileTemplateValidationError as e: 

1293 raise DatastoreValidationError(e) from e 

1294 

1295 def export(self, refs: Iterable[DatasetRef], *, 

1296 directory: Optional[Union[ButlerURI, str]] = None, 

1297 transfer: Optional[str] = "auto") -> Iterable[FileDataset]: 

1298 # Docstring inherited from Datastore.export. 

1299 if transfer is not None and directory is None: 1299 ↛ 1300line 1299 didn't jump to line 1300, because the condition on line 1299 was never true

1300 raise RuntimeError(f"Cannot export using transfer mode {transfer} with no " 

1301 "export directory given") 

1302 

1303 # Force the directory to be a URI object 

1304 directoryUri: Optional[ButlerURI] = None 

1305 if directory is not None: 1305 ↛ 1308line 1305 didn't jump to line 1308, because the condition on line 1305 was never false

1306 directoryUri = ButlerURI(directory, forceDirectory=True) 

1307 

1308 if transfer is not None and directoryUri is not None: 1308 ↛ 1313line 1308 didn't jump to line 1313, because the condition on line 1308 was never false

1309 # mypy needs the second test 

1310 if not directoryUri.exists(): 1310 ↛ 1311line 1310 didn't jump to line 1311, because the condition on line 1310 was never true

1311 raise FileNotFoundError(f"Export location {directory} does not exist") 

1312 

1313 for ref in refs: 

1314 fileLocations = self._get_dataset_locations_info(ref) 

1315 if not fileLocations: 1315 ↛ 1316line 1315 didn't jump to line 1316, because the condition on line 1315 was never true

1316 raise FileNotFoundError(f"Could not retrieve dataset {ref}.") 

1317 # For now we can not export disassembled datasets 

1318 if len(fileLocations) > 1: 

1319 raise NotImplementedError(f"Can not export disassembled datasets such as {ref}") 

1320 location, storedFileInfo = fileLocations[0] 

1321 if transfer is None: 1321 ↛ 1324line 1321 didn't jump to line 1324, because the condition on line 1321 was never true

1322 # TODO: do we also need to return the readStorageClass somehow? 

1323 # We will use the path in store directly 

1324 pass 

1325 else: 

1326 # mypy needs help 

1327 assert directoryUri is not None, "directoryUri must be defined to get here" 

1328 storeUri = ButlerURI(location.uri) 

1329 exportUri = directoryUri.join(location.pathInStore) 

1330 exportUri.transfer_from(storeUri, transfer=transfer) 

1331 

1332 yield FileDataset(refs=[ref], path=location.pathInStore, formatter=storedFileInfo.formatter)