Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22""" 

23Butler top level classes. 

24""" 

25from __future__ import annotations 

26 

27__all__ = ( 

28 "Butler", 

29 "ButlerValidationError", 

30 "PruneCollectionsArgsError", 

31 "PurgeWithoutUnstorePruneCollectionsError", 

32 "RunWithoutPurgePruneCollectionsError", 

33 "PurgeUnsupportedPruneCollectionsError", 

34) 

35 

36 

37from collections import defaultdict 

38import contextlib 

39import logging 

40import numbers 

41import os 

42from typing import ( 

43 Any, 

44 ClassVar, 

45 Counter, 

46 Dict, 

47 Iterable, 

48 Iterator, 

49 List, 

50 MutableMapping, 

51 Optional, 

52 Set, 

53 TextIO, 

54 Tuple, 

55 Type, 

56 Union, 

57) 

58 

59try: 

60 import boto3 

61except ImportError: 

62 boto3 = None 

63 

64from lsst.utils import doImport 

65from .core import ( 

66 AmbiguousDatasetError, 

67 ButlerURI, 

68 Config, 

69 ConfigSubset, 

70 DataCoordinate, 

71 DataId, 

72 DataIdValue, 

73 DatasetRef, 

74 DatasetType, 

75 Datastore, 

76 Dimension, 

77 DimensionConfig, 

78 FileDataset, 

79 Progress, 

80 StorageClassFactory, 

81 Timespan, 

82 ValidationError, 

83) 

84from .core.repoRelocation import BUTLER_ROOT_TAG 

85from .core.utils import transactional, getClassOf 

86from ._deferredDatasetHandle import DeferredDatasetHandle 

87from ._butlerConfig import ButlerConfig 

88from .registry import Registry, RegistryConfig, RegistryDefaults, CollectionType, ConflictingDefinitionError 

89from .registry.wildcards import CollectionSearch 

90from .transfers import RepoExportContext 

91 

92log = logging.getLogger(__name__) 

93 

94 

95class ButlerValidationError(ValidationError): 

96 """There is a problem with the Butler configuration.""" 

97 pass 

98 

99 

100class PruneCollectionsArgsError(TypeError): 

101 """Base class for errors relating to Butler.pruneCollections input 

102 arguments. 

103 """ 

104 pass 

105 

106 

107class PurgeWithoutUnstorePruneCollectionsError(PruneCollectionsArgsError): 

108 """Raised when purge and unstore are both required to be True, and 

109 purge is True but unstore is False. 

110 """ 

111 

112 def __init__(self) -> None: 

113 super().__init__("Cannot pass purge=True without unstore=True.") 

114 

115 

116class RunWithoutPurgePruneCollectionsError(PruneCollectionsArgsError): 

117 """Raised when pruning a RUN collection but purge is False.""" 

118 

119 def __init__(self, collectionType: CollectionType): 

120 self.collectionType = collectionType 

121 super().__init__(f"Cannot prune RUN collection {self.collectionType.name} without purge=True.") 

122 

123 

124class PurgeUnsupportedPruneCollectionsError(PruneCollectionsArgsError): 

125 """Raised when purge is True but is not supported for the given 

126 collection.""" 

127 

128 def __init__(self, collectionType: CollectionType): 

129 self.collectionType = collectionType 

130 super().__init__( 

131 f"Cannot prune {self.collectionType} collection {self.collectionType.name} with purge=True.") 

132 

133 

134class Butler: 

135 """Main entry point for the data access system. 

136 

137 Parameters 

138 ---------- 

139 config : `ButlerConfig`, `Config` or `str`, optional. 

140 Configuration. Anything acceptable to the 

141 `ButlerConfig` constructor. If a directory path 

142 is given the configuration will be read from a ``butler.yaml`` file in 

143 that location. If `None` is given default values will be used. 

144 butler : `Butler`, optional. 

145 If provided, construct a new Butler that uses the same registry and 

146 datastore as the given one, but with the given collection and run. 

147 Incompatible with the ``config``, ``searchPaths``, and ``writeable`` 

148 arguments. 

149 collections : `str` or `Iterable` [ `str` ], optional 

150 An expression specifying the collections to be searched (in order) when 

151 reading datasets. 

152 This may be a `str` collection name or an iterable thereof. 

153 See :ref:`daf_butler_collection_expressions` for more information. 

154 These collections are not registered automatically and must be 

155 manually registered before they are used by any method, but they may be 

156 manually registered after the `Butler` is initialized. 

157 run : `str`, optional 

158 Name of the `~CollectionType.RUN` collection new datasets should be 

159 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

160 ``collections`` will be set to ``[run]``. If not `None`, this 

161 collection will automatically be registered. If this is not set (and 

162 ``writeable`` is not set either), a read-only butler will be created. 

163 searchPaths : `list` of `str`, optional 

164 Directory paths to search when calculating the full Butler 

165 configuration. Not used if the supplied config is already a 

166 `ButlerConfig`. 

167 writeable : `bool`, optional 

168 Explicitly sets whether the butler supports write operations. If not 

169 provided, a read-write butler is created if any of ``run``, ``tags``, 

170 or ``chains`` is non-empty. 

171 inferDefaults : `bool`, optional 

172 If `True` (default) infer default data ID values from the values 

173 present in the datasets in ``collections``: if all collections have the 

174 same value (or no value) for a governor dimension, that value will be 

175 the default for that dimension. Nonexistent collections are ignored. 

176 If a default value is provided explicitly for a governor dimension via 

177 ``**kwargs``, no default will be inferred for that dimension. 

178 **kwargs : `str` 

179 Default data ID key-value pairs. These may only identify "governor" 

180 dimensions like ``instrument`` and ``skymap``. 

181 

182 Examples 

183 -------- 

184 While there are many ways to control exactly how a `Butler` interacts with 

185 the collections in its `Registry`, the most common cases are still simple. 

186 

187 For a read-only `Butler` that searches one collection, do:: 

188 

189 butler = Butler("/path/to/repo", collections=["u/alice/DM-50000"]) 

190 

191 For a read-write `Butler` that writes to and reads from a 

192 `~CollectionType.RUN` collection:: 

193 

194 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a") 

195 

196 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

197 because we want to write to one `~CollectionType.RUN` collection but read 

198 from several others (as well):: 

199 

200 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a", 

201 collections=["u/alice/DM-50000/a", 

202 "u/bob/DM-49998", 

203 "HSC/defaults"]) 

204 

205 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

206 Datasets will be read first from that run (since it appears first in the 

207 chain), and then from ``u/bob/DM-49998`` and finally ``HSC/defaults``. 

208 

209 Finally, one can always create a `Butler` with no collections:: 

210 

211 butler = Butler("/path/to/repo", writeable=True) 

212 

213 This can be extremely useful when you just want to use ``butler.registry``, 

214 e.g. for inserting dimension data or managing collections, or when the 

215 collections you want to use with the butler are not consistent. 

216 Passing ``writeable`` explicitly here is only necessary if you want to be 

217 able to make changes to the repo - usually the value for ``writeable`` can 

218 be guessed from the collection arguments provided, but it defaults to 

219 `False` when there are not collection arguments. 

220 """ 

221 def __init__(self, config: Union[Config, str, None] = None, *, 

222 butler: Optional[Butler] = None, 

223 collections: Any = None, 

224 run: Optional[str] = None, 

225 searchPaths: Optional[List[str]] = None, 

226 writeable: Optional[bool] = None, 

227 inferDefaults: bool = True, 

228 **kwargs: str, 

229 ): 

230 defaults = RegistryDefaults(collections=collections, run=run, infer=inferDefaults, **kwargs) 

231 # Load registry, datastore, etc. from config or existing butler. 

232 if butler is not None: 

233 if config is not None or searchPaths is not None or writeable is not None: 

234 raise TypeError("Cannot pass 'config', 'searchPaths', or 'writeable' " 

235 "arguments with 'butler' argument.") 

236 self.registry = butler.registry.copy(defaults) 

237 self.datastore = butler.datastore 

238 self.storageClasses = butler.storageClasses 

239 self._config: ButlerConfig = butler._config 

240 else: 

241 self._config = ButlerConfig(config, searchPaths=searchPaths) 

242 if "root" in self._config: 

243 butlerRoot = self._config["root"] 

244 else: 

245 butlerRoot = self._config.configDir 

246 if writeable is None: 

247 writeable = run is not None 

248 self.registry = Registry.fromConfig(self._config, butlerRoot=butlerRoot, writeable=writeable, 

249 defaults=defaults) 

250 self.datastore = Datastore.fromConfig(self._config, self.registry.getDatastoreBridgeManager(), 

251 butlerRoot=butlerRoot) 

252 self.storageClasses = StorageClassFactory() 

253 self.storageClasses.addFromConfig(self._config) 

254 if "run" in self._config or "collection" in self._config: 

255 raise ValueError("Passing a run or collection via configuration is no longer supported.") 

256 

257 GENERATION: ClassVar[int] = 3 

258 """This is a Generation 3 Butler. 

259 

260 This attribute may be removed in the future, once the Generation 2 Butler 

261 interface has been fully retired; it should only be used in transitional 

262 code. 

263 """ 

264 

265 @staticmethod 

266 def makeRepo(root: str, config: Union[Config, str, None] = None, 

267 dimensionConfig: Union[Config, str, None] = None, standalone: bool = False, 

268 searchPaths: Optional[List[str]] = None, forceConfigRoot: bool = True, 

269 outfile: Optional[str] = None, overwrite: bool = False) -> Config: 

270 """Create an empty data repository by adding a butler.yaml config 

271 to a repository root directory. 

272 

273 Parameters 

274 ---------- 

275 root : `str` or `ButlerURI` 

276 Path or URI to the root location of the new repository. Will be 

277 created if it does not exist. 

278 config : `Config` or `str`, optional 

279 Configuration to write to the repository, after setting any 

280 root-dependent Registry or Datastore config options. Can not 

281 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

282 configuration will be used. Root-dependent config options 

283 specified in this config are overwritten if ``forceConfigRoot`` 

284 is `True`. 

285 dimensionConfig : `Config` or `str`, optional 

286 Configuration for dimensions, will be used to initialize registry 

287 database. 

288 standalone : `bool` 

289 If True, write all expanded defaults, not just customized or 

290 repository-specific settings. 

291 This (mostly) decouples the repository from the default 

292 configuration, insulating it from changes to the defaults (which 

293 may be good or bad, depending on the nature of the changes). 

294 Future *additions* to the defaults will still be picked up when 

295 initializing `Butlers` to repos created with ``standalone=True``. 

296 searchPaths : `list` of `str`, optional 

297 Directory paths to search when calculating the full butler 

298 configuration. 

299 forceConfigRoot : `bool`, optional 

300 If `False`, any values present in the supplied ``config`` that 

301 would normally be reset are not overridden and will appear 

302 directly in the output config. This allows non-standard overrides 

303 of the root directory for a datastore or registry to be given. 

304 If this parameter is `True` the values for ``root`` will be 

305 forced into the resulting config if appropriate. 

306 outfile : `str`, optional 

307 If not-`None`, the output configuration will be written to this 

308 location rather than into the repository itself. Can be a URI 

309 string. Can refer to a directory that will be used to write 

310 ``butler.yaml``. 

311 overwrite : `bool`, optional 

312 Create a new configuration file even if one already exists 

313 in the specified output location. Default is to raise 

314 an exception. 

315 

316 Returns 

317 ------- 

318 config : `Config` 

319 The updated `Config` instance written to the repo. 

320 

321 Raises 

322 ------ 

323 ValueError 

324 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

325 regular Config (as these subclasses would make it impossible to 

326 support ``standalone=False``). 

327 FileExistsError 

328 Raised if the output config file already exists. 

329 os.error 

330 Raised if the directory does not exist, exists but is not a 

331 directory, or cannot be created. 

332 

333 Notes 

334 ----- 

335 Note that when ``standalone=False`` (the default), the configuration 

336 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

337 construct the repository should also be used to construct any Butlers 

338 to avoid configuration inconsistencies. 

339 """ 

340 if isinstance(config, (ButlerConfig, ConfigSubset)): 

341 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

342 

343 # Ensure that the root of the repository exists or can be made 

344 uri = ButlerURI(root, forceDirectory=True) 

345 uri.mkdir() 

346 

347 config = Config(config) 

348 

349 # If we are creating a new repo from scratch with relative roots, 

350 # do not propagate an explicit root from the config file 

351 if "root" in config: 

352 del config["root"] 

353 

354 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

355 datastoreClass: Type[Datastore] = doImport(full["datastore", "cls"]) 

356 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

357 

358 # if key exists in given config, parse it, otherwise parse the defaults 

359 # in the expanded config 

360 if config.get(("registry", "db")): 

361 registryConfig = RegistryConfig(config) 

362 else: 

363 registryConfig = RegistryConfig(full) 

364 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

365 if defaultDatabaseUri is not None: 

366 Config.updateParameters(RegistryConfig, config, full, 

367 toUpdate={"db": defaultDatabaseUri}, 

368 overwrite=forceConfigRoot) 

369 else: 

370 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), 

371 overwrite=forceConfigRoot) 

372 

373 if standalone: 

374 config.merge(full) 

375 else: 

376 # Always expand the registry.managers section into the per-repo 

377 # config, because after the database schema is created, it's not 

378 # allowed to change anymore. Note that in the standalone=True 

379 # branch, _everything_ in the config is expanded, so there's no 

380 # need to special case this. 

381 Config.updateParameters(RegistryConfig, config, full, toCopy=("managers",), overwrite=False) 

382 configURI: Union[str, ButlerURI] 

383 if outfile is not None: 

384 # When writing to a separate location we must include 

385 # the root of the butler repo in the config else it won't know 

386 # where to look. 

387 config["root"] = uri.geturl() 

388 configURI = outfile 

389 else: 

390 configURI = uri 

391 config.dumpToUri(configURI, overwrite=overwrite) 

392 

393 # Create Registry and populate tables 

394 registryConfig = RegistryConfig(config.get("registry")) 

395 dimensionConfig = DimensionConfig(dimensionConfig) 

396 Registry.createFromConfig(registryConfig, dimensionConfig=dimensionConfig, butlerRoot=root) 

397 

398 return config 

399 

400 @classmethod 

401 def _unpickle(cls, config: ButlerConfig, collections: Optional[CollectionSearch], run: Optional[str], 

402 defaultDataId: Dict[str, str], writeable: bool) -> Butler: 

403 """Callable used to unpickle a Butler. 

404 

405 We prefer not to use ``Butler.__init__`` directly so we can force some 

406 of its many arguments to be keyword-only (note that ``__reduce__`` 

407 can only invoke callables with positional arguments). 

408 

409 Parameters 

410 ---------- 

411 config : `ButlerConfig` 

412 Butler configuration, already coerced into a true `ButlerConfig` 

413 instance (and hence after any search paths for overrides have been 

414 utilized). 

415 collections : `CollectionSearch` 

416 Names of the default collections to read from. 

417 run : `str`, optional 

418 Name of the default `~CollectionType.RUN` collection to write to. 

419 defaultDataId : `dict` [ `str`, `str` ] 

420 Default data ID values. 

421 writeable : `bool` 

422 Whether the Butler should support write operations. 

423 

424 Returns 

425 ------- 

426 butler : `Butler` 

427 A new `Butler` instance. 

428 """ 

429 # MyPy doesn't recognize that the kwargs below are totally valid; it 

430 # seems to think '**defaultDataId* is a _positional_ argument! 

431 return cls(config=config, collections=collections, run=run, writeable=writeable, 

432 **defaultDataId) # type: ignore 

433 

434 def __reduce__(self) -> tuple: 

435 """Support pickling. 

436 """ 

437 return (Butler._unpickle, (self._config, self.collections, self.run, 

438 self.registry.defaults.dataId.byName(), 

439 self.registry.isWriteable())) 

440 

441 def __str__(self) -> str: 

442 return "Butler(collections={}, run={}, datastore='{}', registry='{}')".format( 

443 self.collections, self.run, self.datastore, self.registry) 

444 

445 def isWriteable(self) -> bool: 

446 """Return `True` if this `Butler` supports write operations. 

447 """ 

448 return self.registry.isWriteable() 

449 

450 @contextlib.contextmanager 

451 def transaction(self) -> Iterator[None]: 

452 """Context manager supporting `Butler` transactions. 

453 

454 Transactions can be nested. 

455 """ 

456 with self.registry.transaction(): 

457 with self.datastore.transaction(): 

458 yield 

459 

460 def _standardizeArgs(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

461 dataId: Optional[DataId] = None, **kwds: Any 

462 ) -> Tuple[DatasetType, Optional[DataId]]: 

463 """Standardize the arguments passed to several Butler APIs. 

464 

465 Parameters 

466 ---------- 

467 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

468 When `DatasetRef` the `dataId` should be `None`. 

469 Otherwise the `DatasetType` or name thereof. 

470 dataId : `dict` or `DataCoordinate` 

471 A `dict` of `Dimension` link name, value pairs that label the 

472 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

473 should be provided as the second argument. 

474 kwds 

475 Additional keyword arguments used to augment or construct a 

476 `DataCoordinate`. See `DataCoordinate.standardize` 

477 parameters. 

478 

479 Returns 

480 ------- 

481 datasetType : `DatasetType` 

482 A `DatasetType` instance extracted from ``datasetRefOrType``. 

483 dataId : `dict` or `DataId`, optional 

484 Argument that can be used (along with ``kwds``) to construct a 

485 `DataId`. 

486 

487 Notes 

488 ----- 

489 Butler APIs that conceptually need a DatasetRef also allow passing a 

490 `DatasetType` (or the name of one) and a `DataId` (or a dict and 

491 keyword arguments that can be used to construct one) separately. This 

492 method accepts those arguments and always returns a true `DatasetType` 

493 and a `DataId` or `dict`. 

494 

495 Standardization of `dict` vs `DataId` is best handled by passing the 

496 returned ``dataId`` (and ``kwds``) to `Registry` APIs, which are 

497 generally similarly flexible. 

498 """ 

499 externalDatasetType: Optional[DatasetType] = None 

500 internalDatasetType: Optional[DatasetType] = None 

501 if isinstance(datasetRefOrType, DatasetRef): 

502 if dataId is not None or kwds: 

503 raise ValueError("DatasetRef given, cannot use dataId as well") 

504 externalDatasetType = datasetRefOrType.datasetType 

505 dataId = datasetRefOrType.dataId 

506 else: 

507 # Don't check whether DataId is provided, because Registry APIs 

508 # can usually construct a better error message when it wasn't. 

509 if isinstance(datasetRefOrType, DatasetType): 

510 externalDatasetType = datasetRefOrType 

511 else: 

512 internalDatasetType = self.registry.getDatasetType(datasetRefOrType) 

513 

514 # Check that they are self-consistent 

515 if externalDatasetType is not None: 

516 internalDatasetType = self.registry.getDatasetType(externalDatasetType.name) 

517 if externalDatasetType != internalDatasetType: 

518 raise ValueError(f"Supplied dataset type ({externalDatasetType}) inconsistent with " 

519 f"registry definition ({internalDatasetType})") 

520 

521 assert internalDatasetType is not None 

522 return internalDatasetType, dataId 

523 

524 def _findDatasetRef(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

525 dataId: Optional[DataId] = None, *, 

526 collections: Any = None, 

527 allowUnresolved: bool = False, 

528 **kwds: Any) -> DatasetRef: 

529 """Shared logic for methods that start with a search for a dataset in 

530 the registry. 

531 

532 Parameters 

533 ---------- 

534 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

535 When `DatasetRef` the `dataId` should be `None`. 

536 Otherwise the `DatasetType` or name thereof. 

537 dataId : `dict` or `DataCoordinate`, optional 

538 A `dict` of `Dimension` link name, value pairs that label the 

539 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

540 should be provided as the first argument. 

541 collections : Any, optional 

542 Collections to be searched, overriding ``self.collections``. 

543 Can be any of the types supported by the ``collections`` argument 

544 to butler construction. 

545 allowUnresolved : `bool`, optional 

546 If `True`, return an unresolved `DatasetRef` if finding a resolved 

547 one in the `Registry` fails. Defaults to `False`. 

548 kwds 

549 Additional keyword arguments used to augment or construct a 

550 `DataId`. See `DataId` parameters. 

551 

552 Returns 

553 ------- 

554 ref : `DatasetRef` 

555 A reference to the dataset identified by the given arguments. 

556 

557 Raises 

558 ------ 

559 LookupError 

560 Raised if no matching dataset exists in the `Registry` (and 

561 ``allowUnresolved is False``). 

562 ValueError 

563 Raised if a resolved `DatasetRef` was passed as an input, but it 

564 differs from the one found in the registry. 

565 TypeError 

566 Raised if no collections were provided. 

567 """ 

568 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwds) 

569 if isinstance(datasetRefOrType, DatasetRef): 

570 idNumber = datasetRefOrType.id 

571 else: 

572 idNumber = None 

573 timespan: Optional[Timespan] = None 

574 

575 # Process dimension records that are using record information 

576 # rather than ids 

577 newDataId: Dict[str, DataIdValue] = {} 

578 byRecord: Dict[str, Dict[str, Any]] = defaultdict(dict) 

579 

580 # if all the dataId comes from keyword parameters we do not need 

581 # to do anything here because they can't be of the form 

582 # exposure.obs_id because a "." is not allowed in a keyword parameter. 

583 if dataId: 

584 for k, v in dataId.items(): 

585 # If we have a Dimension we do not need to do anything 

586 # because it cannot be a compound key. 

587 if isinstance(k, str) and "." in k: 

588 # Someone is using a more human-readable dataId 

589 dimensionName, record = k.split(".", 1) 

590 byRecord[dimensionName][record] = v 

591 elif isinstance(k, Dimension): 

592 newDataId[k.name] = v 

593 else: 

594 newDataId[k] = v 

595 

596 # Go through the updated dataId and check the type in case someone is 

597 # using an alternate key. We have already filtered out the compound 

598 # keys dimensions.record format. 

599 not_dimensions = {} 

600 

601 # Will need to look in the dataId and the keyword arguments 

602 # and will remove them if they need to be fixed or are unrecognized. 

603 for dataIdDict in (newDataId, kwds): 

604 # Use a list so we can adjust the dict safely in the loop 

605 for dimensionName in list(dataIdDict): 

606 value = dataIdDict[dimensionName] 

607 try: 

608 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

609 except KeyError: 

610 # This is not a real dimension 

611 not_dimensions[dimensionName] = value 

612 del dataIdDict[dimensionName] 

613 continue 

614 

615 # Convert an integral type to an explicit int to simplify 

616 # comparisons here 

617 if isinstance(value, numbers.Integral): 

618 value = int(value) 

619 

620 if not isinstance(value, dimension.primaryKey.getPythonType()): 

621 for alternate in dimension.alternateKeys: 

622 if isinstance(value, alternate.getPythonType()): 

623 byRecord[dimensionName][alternate.name] = value 

624 del dataIdDict[dimensionName] 

625 log.debug("Converting dimension %s to %s.%s=%s", 

626 dimensionName, dimensionName, alternate.name, value) 

627 break 

628 else: 

629 log.warning("Type mismatch found for value '%r' provided for dimension %s. " 

630 "Could not find matching alternative (primary key has type %s) " 

631 "so attempting to use as-is.", 

632 value, dimensionName, dimension.primaryKey.getPythonType()) 

633 

634 # If we have some unrecognized dimensions we have to try to connect 

635 # them to records in other dimensions. This is made more complicated 

636 # by some dimensions having records with clashing names. A mitigation 

637 # is that we can tell by this point which dimensions are missing 

638 # for the DatasetType but this does not work for calibrations 

639 # where additional dimensions can be used to constrain the temporal 

640 # axis. 

641 if not_dimensions: 

642 # Calculate missing dimensions 

643 provided = set(newDataId) | set(kwds) | set(byRecord) 

644 missingDimensions = datasetType.dimensions.names - provided 

645 

646 # For calibrations we may well be needing temporal dimensions 

647 # so rather than always including all dimensions in the scan 

648 # restrict things a little. It is still possible for there 

649 # to be confusion over day_obs in visit vs exposure for example. 

650 # If we are not searching calibration collections things may 

651 # fail but they are going to fail anyway because of the 

652 # ambiguousness of the dataId... 

653 candidateDimensions: Set[str] = set() 

654 candidateDimensions.update(missingDimensions) 

655 if datasetType.isCalibration(): 

656 for dim in self.registry.dimensions.getStaticDimensions(): 

657 if dim.temporal: 

658 candidateDimensions.add(str(dim)) 

659 

660 # Look up table for the first association with a dimension 

661 guessedAssociation: Dict[str, Dict[str, Any]] = defaultdict(dict) 

662 

663 # Keep track of whether an item is associated with multiple 

664 # dimensions. 

665 counter: Counter[str] = Counter() 

666 assigned: Dict[str, Set[str]] = defaultdict(set) 

667 

668 # Go through the missing dimensions and associate the 

669 # given names with records within those dimensions 

670 for dimensionName in candidateDimensions: 

671 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

672 fields = dimension.metadata.names | dimension.uniqueKeys.names 

673 for field in not_dimensions: 

674 if field in fields: 

675 guessedAssociation[dimensionName][field] = not_dimensions[field] 

676 counter[dimensionName] += 1 

677 assigned[field].add(dimensionName) 

678 

679 # There is a chance we have allocated a single dataId item 

680 # to multiple dimensions. Need to decide which should be retained. 

681 # For now assume that the most popular alternative wins. 

682 # This means that day_obs with seq_num will result in 

683 # exposure.day_obs and not visit.day_obs 

684 # Also prefer an explicitly missing dimension over an inferred 

685 # temporal dimension. 

686 for fieldName, assignedDimensions in assigned.items(): 

687 if len(assignedDimensions) > 1: 

688 # Pick the most popular (preferring mandatory dimensions) 

689 requiredButMissing = assignedDimensions.intersection(missingDimensions) 

690 if requiredButMissing: 

691 candidateDimensions = requiredButMissing 

692 else: 

693 candidateDimensions = assignedDimensions 

694 

695 # Select the relevant items and get a new restricted 

696 # counter. 

697 theseCounts = {k: v for k, v in counter.items() if k in candidateDimensions} 

698 duplicatesCounter: Counter[str] = Counter() 

699 duplicatesCounter.update(theseCounts) 

700 

701 # Choose the most common. If they are equally common 

702 # we will pick the one that was found first. 

703 # Returns a list of tuples 

704 selected = duplicatesCounter.most_common(1)[0][0] 

705 

706 log.debug("Ambiguous dataId entry '%s' associated with multiple dimensions: %s." 

707 " Removed ambiguity by choosing dimension %s.", 

708 fieldName, ", ".join(assignedDimensions), selected) 

709 

710 for candidateDimension in assignedDimensions: 

711 if candidateDimension != selected: 

712 del guessedAssociation[candidateDimension][fieldName] 

713 

714 # Update the record look up dict with the new associations 

715 for dimensionName, values in guessedAssociation.items(): 

716 if values: # A dict might now be empty 

717 log.debug("Assigned non-dimension dataId keys to dimension %s: %s", 

718 dimensionName, values) 

719 byRecord[dimensionName].update(values) 

720 

721 if byRecord: 

722 # Some record specifiers were found so we need to convert 

723 # them to the Id form 

724 for dimensionName, values in byRecord.items(): 

725 if dimensionName in newDataId: 

726 log.warning("DataId specified explicit %s dimension value of %s in addition to" 

727 " general record specifiers for it of %s. Ignoring record information.", 

728 dimensionName, newDataId[dimensionName], str(values)) 

729 continue 

730 

731 # Build up a WHERE expression -- use single quotes 

732 def quote(s: Any) -> str: 

733 if isinstance(s, str): 

734 return f"'{s}'" 

735 else: 

736 return s 

737 

738 where = " AND ".join(f"{dimensionName}.{k} = {quote(v)}" 

739 for k, v in values.items()) 

740 

741 # Hopefully we get a single record that matches 

742 records = set(self.registry.queryDimensionRecords(dimensionName, dataId=newDataId, 

743 where=where, **kwds)) 

744 

745 if len(records) != 1: 

746 if len(records) > 1: 

747 log.debug("Received %d records from constraints of %s", len(records), str(values)) 

748 for r in records: 

749 log.debug("- %s", str(r)) 

750 raise RuntimeError(f"DataId specification for dimension {dimensionName} is not" 

751 f" uniquely constrained to a single dataset by {values}." 

752 f" Got {len(records)} results.") 

753 raise RuntimeError(f"DataId specification for dimension {dimensionName} matched no" 

754 f" records when constrained by {values}") 

755 

756 # Get the primary key from the real dimension object 

757 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

758 if not isinstance(dimension, Dimension): 

759 raise RuntimeError( 

760 f"{dimension.name} is not a true dimension, and cannot be used in data IDs." 

761 ) 

762 newDataId[dimensionName] = getattr(records.pop(), dimension.primaryKey.name) 

763 

764 # We have modified the dataId so need to switch to it 

765 dataId = newDataId 

766 

767 if datasetType.isCalibration(): 

768 # Because this is a calibration dataset, first try to make a 

769 # standardize the data ID without restricting the dimensions to 

770 # those of the dataset type requested, because there may be extra 

771 # dimensions that provide temporal information for a validity-range 

772 # lookup. 

773 dataId = DataCoordinate.standardize(dataId, universe=self.registry.dimensions, 

774 defaults=self.registry.defaults.dataId, **kwds) 

775 if dataId.graph.temporal: 

776 dataId = self.registry.expandDataId(dataId) 

777 timespan = dataId.timespan 

778 else: 

779 # Standardize the data ID to just the dimensions of the dataset 

780 # type instead of letting registry.findDataset do it, so we get the 

781 # result even if no dataset is found. 

782 dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions, 

783 defaults=self.registry.defaults.dataId, **kwds) 

784 # Always lookup the DatasetRef, even if one is given, to ensure it is 

785 # present in the current collection. 

786 ref = self.registry.findDataset(datasetType, dataId, collections=collections, timespan=timespan) 

787 if ref is None: 

788 if allowUnresolved: 

789 return DatasetRef(datasetType, dataId) 

790 else: 

791 if collections is None: 

792 collections = self.registry.defaults.collections 

793 raise LookupError(f"Dataset {datasetType.name} with data ID {dataId} " 

794 f"could not be found in collections {collections}.") 

795 if idNumber is not None and idNumber != ref.id: 

796 if collections is None: 

797 collections = self.registry.defaults.collections 

798 raise ValueError(f"DatasetRef.id provided ({idNumber}) does not match " 

799 f"id ({ref.id}) in registry in collections {collections}.") 

800 return ref 

801 

802 @transactional 

803 def put(self, obj: Any, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

804 dataId: Optional[DataId] = None, *, 

805 run: Optional[str] = None, 

806 **kwds: Any) -> DatasetRef: 

807 """Store and register a dataset. 

808 

809 Parameters 

810 ---------- 

811 obj : `object` 

812 The dataset. 

813 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

814 When `DatasetRef` is provided, ``dataId`` should be `None`. 

815 Otherwise the `DatasetType` or name thereof. 

816 dataId : `dict` or `DataCoordinate` 

817 A `dict` of `Dimension` link name, value pairs that label the 

818 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

819 should be provided as the second argument. 

820 run : `str`, optional 

821 The name of the run the dataset should be added to, overriding 

822 ``self.run``. 

823 kwds 

824 Additional keyword arguments used to augment or construct a 

825 `DataCoordinate`. See `DataCoordinate.standardize` 

826 parameters. 

827 

828 Returns 

829 ------- 

830 ref : `DatasetRef` 

831 A reference to the stored dataset, updated with the correct id if 

832 given. 

833 

834 Raises 

835 ------ 

836 TypeError 

837 Raised if the butler is read-only or if no run has been provided. 

838 """ 

839 log.debug("Butler put: %s, dataId=%s, run=%s", datasetRefOrType, dataId, run) 

840 if not self.isWriteable(): 

841 raise TypeError("Butler is read-only.") 

842 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwds) 

843 if isinstance(datasetRefOrType, DatasetRef) and datasetRefOrType.id is not None: 

844 raise ValueError("DatasetRef must not be in registry, must have None id") 

845 

846 # Add Registry Dataset entry. 

847 dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions, **kwds) 

848 ref, = self.registry.insertDatasets(datasetType, run=run, dataIds=[dataId]) 

849 

850 # Add Datastore entry. 

851 self.datastore.put(obj, ref) 

852 

853 return ref 

854 

855 def getDirect(self, ref: DatasetRef, *, parameters: Optional[Dict[str, Any]] = None) -> Any: 

856 """Retrieve a stored dataset. 

857 

858 Unlike `Butler.get`, this method allows datasets outside the Butler's 

859 collection to be read as long as the `DatasetRef` that identifies them 

860 can be obtained separately. 

861 

862 Parameters 

863 ---------- 

864 ref : `DatasetRef` 

865 Resolved reference to an already stored dataset. 

866 parameters : `dict` 

867 Additional StorageClass-defined options to control reading, 

868 typically used to efficiently read only a subset of the dataset. 

869 

870 Returns 

871 ------- 

872 obj : `object` 

873 The dataset. 

874 """ 

875 return self.datastore.get(ref, parameters=parameters) 

876 

877 def getDirectDeferred(self, ref: DatasetRef, *, 

878 parameters: Union[dict, None] = None) -> DeferredDatasetHandle: 

879 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

880 from a resolved `DatasetRef`. 

881 

882 Parameters 

883 ---------- 

884 ref : `DatasetRef` 

885 Resolved reference to an already stored dataset. 

886 parameters : `dict` 

887 Additional StorageClass-defined options to control reading, 

888 typically used to efficiently read only a subset of the dataset. 

889 

890 Returns 

891 ------- 

892 obj : `DeferredDatasetHandle` 

893 A handle which can be used to retrieve a dataset at a later time. 

894 

895 Raises 

896 ------ 

897 AmbiguousDatasetError 

898 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

899 """ 

900 if ref.id is None: 

901 raise AmbiguousDatasetError( 

902 f"Dataset of type {ref.datasetType.name} with data ID {ref.dataId} is not resolved." 

903 ) 

904 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

905 

906 def getDeferred(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

907 dataId: Optional[DataId] = None, *, 

908 parameters: Union[dict, None] = None, 

909 collections: Any = None, 

910 **kwds: Any) -> DeferredDatasetHandle: 

911 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

912 after an immediate registry lookup. 

913 

914 Parameters 

915 ---------- 

916 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

917 When `DatasetRef` the `dataId` should be `None`. 

918 Otherwise the `DatasetType` or name thereof. 

919 dataId : `dict` or `DataCoordinate`, optional 

920 A `dict` of `Dimension` link name, value pairs that label the 

921 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

922 should be provided as the first argument. 

923 parameters : `dict` 

924 Additional StorageClass-defined options to control reading, 

925 typically used to efficiently read only a subset of the dataset. 

926 collections : Any, optional 

927 Collections to be searched, overriding ``self.collections``. 

928 Can be any of the types supported by the ``collections`` argument 

929 to butler construction. 

930 kwds 

931 Additional keyword arguments used to augment or construct a 

932 `DataId`. See `DataId` parameters. 

933 

934 Returns 

935 ------- 

936 obj : `DeferredDatasetHandle` 

937 A handle which can be used to retrieve a dataset at a later time. 

938 

939 Raises 

940 ------ 

941 LookupError 

942 Raised if no matching dataset exists in the `Registry` (and 

943 ``allowUnresolved is False``). 

944 ValueError 

945 Raised if a resolved `DatasetRef` was passed as an input, but it 

946 differs from the one found in the registry. 

947 TypeError 

948 Raised if no collections were provided. 

949 """ 

950 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwds) 

951 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

952 

953 def get(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

954 dataId: Optional[DataId] = None, *, 

955 parameters: Optional[Dict[str, Any]] = None, 

956 collections: Any = None, 

957 **kwds: Any) -> Any: 

958 """Retrieve a stored dataset. 

959 

960 Parameters 

961 ---------- 

962 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

963 When `DatasetRef` the `dataId` should be `None`. 

964 Otherwise the `DatasetType` or name thereof. 

965 dataId : `dict` or `DataCoordinate` 

966 A `dict` of `Dimension` link name, value pairs that label the 

967 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

968 should be provided as the first argument. 

969 parameters : `dict` 

970 Additional StorageClass-defined options to control reading, 

971 typically used to efficiently read only a subset of the dataset. 

972 collections : Any, optional 

973 Collections to be searched, overriding ``self.collections``. 

974 Can be any of the types supported by the ``collections`` argument 

975 to butler construction. 

976 kwds 

977 Additional keyword arguments used to augment or construct a 

978 `DataCoordinate`. See `DataCoordinate.standardize` 

979 parameters. 

980 

981 Returns 

982 ------- 

983 obj : `object` 

984 The dataset. 

985 

986 Raises 

987 ------ 

988 ValueError 

989 Raised if a resolved `DatasetRef` was passed as an input, but it 

990 differs from the one found in the registry. 

991 LookupError 

992 Raised if no matching dataset exists in the `Registry`. 

993 TypeError 

994 Raised if no collections were provided. 

995 

996 Notes 

997 ----- 

998 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

999 this method requires that the given data ID include temporal dimensions 

1000 beyond the dimensions of the dataset type itself, in order to find the 

1001 dataset with the appropriate validity range. For example, a "bias" 

1002 dataset with native dimensions ``{instrument, detector}`` could be 

1003 fetched with a ``{instrument, detector, exposure}`` data ID, because 

1004 ``exposure`` is a temporal dimension. 

1005 """ 

1006 log.debug("Butler get: %s, dataId=%s, parameters=%s", datasetRefOrType, dataId, parameters) 

1007 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwds) 

1008 return self.getDirect(ref, parameters=parameters) 

1009 

1010 def getURIs(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1011 dataId: Optional[DataId] = None, *, 

1012 predict: bool = False, 

1013 collections: Any = None, 

1014 run: Optional[str] = None, 

1015 **kwds: Any) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]: 

1016 """Returns the URIs associated with the dataset. 

1017 

1018 Parameters 

1019 ---------- 

1020 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1021 When `DatasetRef` the `dataId` should be `None`. 

1022 Otherwise the `DatasetType` or name thereof. 

1023 dataId : `dict` or `DataCoordinate` 

1024 A `dict` of `Dimension` link name, value pairs that label the 

1025 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1026 should be provided as the first argument. 

1027 predict : `bool` 

1028 If `True`, allow URIs to be returned of datasets that have not 

1029 been written. 

1030 collections : Any, optional 

1031 Collections to be searched, overriding ``self.collections``. 

1032 Can be any of the types supported by the ``collections`` argument 

1033 to butler construction. 

1034 run : `str`, optional 

1035 Run to use for predictions, overriding ``self.run``. 

1036 kwds 

1037 Additional keyword arguments used to augment or construct a 

1038 `DataCoordinate`. See `DataCoordinate.standardize` 

1039 parameters. 

1040 

1041 Returns 

1042 ------- 

1043 primary : `ButlerURI` 

1044 The URI to the primary artifact associated with this dataset. 

1045 If the dataset was disassembled within the datastore this 

1046 may be `None`. 

1047 components : `dict` 

1048 URIs to any components associated with the dataset artifact. 

1049 Can be empty if there are no components. 

1050 """ 

1051 ref = self._findDatasetRef(datasetRefOrType, dataId, allowUnresolved=predict, 

1052 collections=collections, **kwds) 

1053 if ref.id is None: # only possible if predict is True 

1054 if run is None: 

1055 run = self.run 

1056 if run is None: 

1057 raise TypeError("Cannot predict location with run=None.") 

1058 # Lie about ID, because we can't guess it, and only 

1059 # Datastore.getURIs() will ever see it (and it doesn't use it). 

1060 ref = ref.resolved(id=0, run=run) 

1061 return self.datastore.getURIs(ref, predict) 

1062 

1063 def getURI(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1064 dataId: Optional[DataId] = None, *, 

1065 predict: bool = False, 

1066 collections: Any = None, 

1067 run: Optional[str] = None, 

1068 **kwds: Any) -> ButlerURI: 

1069 """Return the URI to the Dataset. 

1070 

1071 Parameters 

1072 ---------- 

1073 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1074 When `DatasetRef` the `dataId` should be `None`. 

1075 Otherwise the `DatasetType` or name thereof. 

1076 dataId : `dict` or `DataCoordinate` 

1077 A `dict` of `Dimension` link name, value pairs that label the 

1078 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1079 should be provided as the first argument. 

1080 predict : `bool` 

1081 If `True`, allow URIs to be returned of datasets that have not 

1082 been written. 

1083 collections : Any, optional 

1084 Collections to be searched, overriding ``self.collections``. 

1085 Can be any of the types supported by the ``collections`` argument 

1086 to butler construction. 

1087 run : `str`, optional 

1088 Run to use for predictions, overriding ``self.run``. 

1089 kwds 

1090 Additional keyword arguments used to augment or construct a 

1091 `DataCoordinate`. See `DataCoordinate.standardize` 

1092 parameters. 

1093 

1094 Returns 

1095 ------- 

1096 uri : `ButlerURI` 

1097 URI pointing to the Dataset within the datastore. If the 

1098 Dataset does not exist in the datastore, and if ``predict`` is 

1099 `True`, the URI will be a prediction and will include a URI 

1100 fragment "#predicted". 

1101 If the datastore does not have entities that relate well 

1102 to the concept of a URI the returned URI string will be 

1103 descriptive. The returned URI is not guaranteed to be obtainable. 

1104 

1105 Raises 

1106 ------ 

1107 LookupError 

1108 A URI has been requested for a dataset that does not exist and 

1109 guessing is not allowed. 

1110 ValueError 

1111 Raised if a resolved `DatasetRef` was passed as an input, but it 

1112 differs from the one found in the registry. 

1113 TypeError 

1114 Raised if no collections were provided. 

1115 RuntimeError 

1116 Raised if a URI is requested for a dataset that consists of 

1117 multiple artifacts. 

1118 """ 

1119 primary, components = self.getURIs(datasetRefOrType, dataId=dataId, predict=predict, 

1120 collections=collections, run=run, **kwds) 

1121 

1122 if primary is None or components: 

1123 raise RuntimeError(f"Dataset ({datasetRefOrType}) includes distinct URIs for components. " 

1124 "Use Butler.getURIs() instead.") 

1125 return primary 

1126 

1127 def datasetExists(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1128 dataId: Optional[DataId] = None, *, 

1129 collections: Any = None, 

1130 **kwds: Any) -> bool: 

1131 """Return True if the Dataset is actually present in the Datastore. 

1132 

1133 Parameters 

1134 ---------- 

1135 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1136 When `DatasetRef` the `dataId` should be `None`. 

1137 Otherwise the `DatasetType` or name thereof. 

1138 dataId : `dict` or `DataCoordinate` 

1139 A `dict` of `Dimension` link name, value pairs that label the 

1140 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1141 should be provided as the first argument. 

1142 collections : Any, optional 

1143 Collections to be searched, overriding ``self.collections``. 

1144 Can be any of the types supported by the ``collections`` argument 

1145 to butler construction. 

1146 kwds 

1147 Additional keyword arguments used to augment or construct a 

1148 `DataCoordinate`. See `DataCoordinate.standardize` 

1149 parameters. 

1150 

1151 Raises 

1152 ------ 

1153 LookupError 

1154 Raised if the dataset is not even present in the Registry. 

1155 ValueError 

1156 Raised if a resolved `DatasetRef` was passed as an input, but it 

1157 differs from the one found in the registry. 

1158 TypeError 

1159 Raised if no collections were provided. 

1160 """ 

1161 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwds) 

1162 return self.datastore.exists(ref) 

1163 

1164 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

1165 """Remove one or more `~CollectionType.RUN` collections and the 

1166 datasets within them. 

1167 

1168 Parameters 

1169 ---------- 

1170 names : `Iterable` [ `str` ] 

1171 The names of the collections to remove. 

1172 unstore : `bool`, optional 

1173 If `True` (default), delete datasets from all datastores in which 

1174 they are present, and attempt to rollback the registry deletions if 

1175 datastore deletions fail (which may not always be possible). If 

1176 `False`, datastore records for these datasets are still removed, 

1177 but any artifacts (e.g. files) will not be. 

1178 

1179 Raises 

1180 ------ 

1181 TypeError 

1182 Raised if one or more collections are not of type 

1183 `~CollectionType.RUN`. 

1184 """ 

1185 if not self.isWriteable(): 

1186 raise TypeError("Butler is read-only.") 

1187 names = list(names) 

1188 refs: List[DatasetRef] = [] 

1189 for name in names: 

1190 collectionType = self.registry.getCollectionType(name) 

1191 if collectionType is not CollectionType.RUN: 

1192 raise TypeError(f"The collection type of '{name}' is {collectionType.name}, not RUN.") 

1193 refs.extend(self.registry.queryDatasets(..., collections=name, findFirst=True)) 

1194 with self.registry.transaction(): 

1195 if unstore: 

1196 for ref in refs: 

1197 if self.datastore.exists(ref): 

1198 self.datastore.trash(ref) 

1199 else: 

1200 self.datastore.forget(refs) 

1201 for name in names: 

1202 self.registry.removeCollection(name) 

1203 if unstore: 

1204 # Point of no return for removing artifacts 

1205 self.datastore.emptyTrash() 

1206 

1207 def pruneCollection(self, name: str, purge: bool = False, unstore: bool = False) -> None: 

1208 """Remove a collection and possibly prune datasets within it. 

1209 

1210 Parameters 

1211 ---------- 

1212 name : `str` 

1213 Name of the collection to remove. If this is a 

1214 `~CollectionType.TAGGED` or `~CollectionType.CHAINED` collection, 

1215 datasets within the collection are not modified unless ``unstore`` 

1216 is `True`. If this is a `~CollectionType.RUN` collection, 

1217 ``purge`` and ``unstore`` must be `True`, and all datasets in it 

1218 are fully removed from the data repository. 

1219 purge : `bool`, optional 

1220 If `True`, permit `~CollectionType.RUN` collections to be removed, 

1221 fully removing datasets within them. Requires ``unstore=True`` as 

1222 well as an added precaution against accidental deletion. Must be 

1223 `False` (default) if the collection is not a ``RUN``. 

1224 unstore: `bool`, optional 

1225 If `True`, remove all datasets in the collection from all 

1226 datastores in which they appear. 

1227 

1228 Raises 

1229 ------ 

1230 TypeError 

1231 Raised if the butler is read-only or arguments are mutually 

1232 inconsistent. 

1233 """ 

1234 

1235 # See pruneDatasets comments for more information about the logic here; 

1236 # the cases are almost the same, but here we can rely on Registry to 

1237 # take care everything but Datastore deletion when we remove the 

1238 # collection. 

1239 if not self.isWriteable(): 

1240 raise TypeError("Butler is read-only.") 

1241 collectionType = self.registry.getCollectionType(name) 

1242 if purge and not unstore: 

1243 raise PurgeWithoutUnstorePruneCollectionsError() 

1244 if collectionType is CollectionType.RUN and not purge: 

1245 raise RunWithoutPurgePruneCollectionsError(collectionType) 

1246 if collectionType is not CollectionType.RUN and purge: 

1247 raise PurgeUnsupportedPruneCollectionsError(collectionType) 

1248 

1249 with self.registry.transaction(): 

1250 if unstore: 

1251 for ref in self.registry.queryDatasets(..., collections=name, findFirst=True): 

1252 if self.datastore.exists(ref): 

1253 self.datastore.trash(ref) 

1254 self.registry.removeCollection(name) 

1255 if unstore: 

1256 # Point of no return for removing artifacts 

1257 self.datastore.emptyTrash() 

1258 

1259 def pruneDatasets(self, refs: Iterable[DatasetRef], *, 

1260 disassociate: bool = True, 

1261 unstore: bool = False, 

1262 tags: Iterable[str] = (), 

1263 purge: bool = False, 

1264 run: Optional[str] = None) -> None: 

1265 """Remove one or more datasets from a collection and/or storage. 

1266 

1267 Parameters 

1268 ---------- 

1269 refs : `~collections.abc.Iterable` of `DatasetRef` 

1270 Datasets to prune. These must be "resolved" references (not just 

1271 a `DatasetType` and data ID). 

1272 disassociate : `bool`, optional 

1273 Disassociate pruned datasets from ``tags``, or from all collections 

1274 if ``purge=True``. 

1275 unstore : `bool`, optional 

1276 If `True` (`False` is default) remove these datasets from all 

1277 datastores known to this butler. Note that this will make it 

1278 impossible to retrieve these datasets even via other collections. 

1279 Datasets that are already not stored are ignored by this option. 

1280 tags : `Iterable` [ `str` ], optional 

1281 `~CollectionType.TAGGED` collections to disassociate the datasets 

1282 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

1283 `True`. 

1284 purge : `bool`, optional 

1285 If `True` (`False` is default), completely remove the dataset from 

1286 the `Registry`. To prevent accidental deletions, ``purge`` may 

1287 only be `True` if all of the following conditions are met: 

1288 

1289 - All given datasets are in the given run. 

1290 - ``disassociate`` is `True`; 

1291 - ``unstore`` is `True`. 

1292 

1293 This mode may remove provenance information from datasets other 

1294 than those provided, and should be used with extreme care. 

1295 

1296 Raises 

1297 ------ 

1298 TypeError 

1299 Raised if the butler is read-only, if no collection was provided, 

1300 or the conditions for ``purge=True`` were not met. 

1301 """ 

1302 if not self.isWriteable(): 

1303 raise TypeError("Butler is read-only.") 

1304 if purge: 

1305 if not disassociate: 

1306 raise TypeError("Cannot pass purge=True without disassociate=True.") 

1307 if not unstore: 

1308 raise TypeError("Cannot pass purge=True without unstore=True.") 

1309 elif disassociate: 

1310 tags = tuple(tags) 

1311 if not tags: 

1312 raise TypeError("No tags provided but disassociate=True.") 

1313 for tag in tags: 

1314 collectionType = self.registry.getCollectionType(tag) 

1315 if collectionType is not CollectionType.TAGGED: 

1316 raise TypeError(f"Cannot disassociate from collection '{tag}' " 

1317 f"of non-TAGGED type {collectionType.name}.") 

1318 # Transform possibly-single-pass iterable into something we can iterate 

1319 # over multiple times. 

1320 refs = list(refs) 

1321 # Pruning a component of a DatasetRef makes no sense since registry 

1322 # doesn't know about components and datastore might not store 

1323 # components in a separate file 

1324 for ref in refs: 

1325 if ref.datasetType.component(): 

1326 raise ValueError(f"Can not prune a component of a dataset (ref={ref})") 

1327 # We don't need an unreliable Datastore transaction for this, because 

1328 # we've been extra careful to ensure that Datastore.trash only involves 

1329 # mutating the Registry (it can _look_ at Datastore-specific things, 

1330 # but shouldn't change them), and hence all operations here are 

1331 # Registry operations. 

1332 with self.registry.transaction(): 

1333 if unstore: 

1334 for ref in refs: 

1335 # There is a difference between a concrete composite 

1336 # and virtual composite. In a virtual composite the 

1337 # datastore is never given the top level DatasetRef. In 

1338 # the concrete composite the datastore knows all the 

1339 # refs and will clean up itself if asked to remove the 

1340 # parent ref. We can not check configuration for this 

1341 # since we can not trust that the configuration is the 

1342 # same. We therefore have to ask if the ref exists or 

1343 # not. This is consistent with the fact that we want 

1344 # to ignore already-removed-from-datastore datasets 

1345 # anyway. 

1346 if self.datastore.exists(ref): 

1347 self.datastore.trash(ref) 

1348 if purge: 

1349 self.registry.removeDatasets(refs) 

1350 elif disassociate: 

1351 assert tags, "Guaranteed by earlier logic in this function." 

1352 for tag in tags: 

1353 self.registry.disassociate(tag, refs) 

1354 # We've exited the Registry transaction, and apparently committed. 

1355 # (if there was an exception, everything rolled back, and it's as if 

1356 # nothing happened - and we never get here). 

1357 # Datastore artifacts are not yet gone, but they're clearly marked 

1358 # as trash, so if we fail to delete now because of (e.g.) filesystem 

1359 # problems we can try again later, and if manual administrative 

1360 # intervention is required, it's pretty clear what that should entail: 

1361 # deleting everything on disk and in private Datastore tables that is 

1362 # in the dataset_location_trash table. 

1363 if unstore: 

1364 # Point of no return for removing artifacts 

1365 self.datastore.emptyTrash() 

1366 

1367 @transactional 

1368 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = "auto", run: Optional[str] = None, 

1369 ) -> None: 

1370 """Store and register one or more datasets that already exist on disk. 

1371 

1372 Parameters 

1373 ---------- 

1374 datasets : `FileDataset` 

1375 Each positional argument is a struct containing information about 

1376 a file to be ingested, including its path (either absolute or 

1377 relative to the datastore root, if applicable), a `DatasetRef`, 

1378 and optionally a formatter class or its fully-qualified string 

1379 name. If a formatter is not provided, the formatter that would be 

1380 used for `put` is assumed. On successful return, all 

1381 `FileDataset.ref` attributes will have their `DatasetRef.id` 

1382 attribute populated and all `FileDataset.formatter` attributes will 

1383 be set to the formatter class used. `FileDataset.path` attributes 

1384 may be modified to put paths in whatever the datastore considers a 

1385 standardized form. 

1386 transfer : `str`, optional 

1387 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1388 'hardlink', 'relsymlink' or 'symlink', indicating how to transfer 

1389 the file. 

1390 run : `str`, optional 

1391 The name of the run ingested datasets should be added to, 

1392 overriding ``self.run``. 

1393 

1394 Raises 

1395 ------ 

1396 TypeError 

1397 Raised if the butler is read-only or if no run was provided. 

1398 NotImplementedError 

1399 Raised if the `Datastore` does not support the given transfer mode. 

1400 DatasetTypeNotSupportedError 

1401 Raised if one or more files to be ingested have a dataset type that 

1402 is not supported by the `Datastore`.. 

1403 FileNotFoundError 

1404 Raised if one of the given files does not exist. 

1405 FileExistsError 

1406 Raised if transfer is not `None` but the (internal) location the 

1407 file would be moved to is already occupied. 

1408 

1409 Notes 

1410 ----- 

1411 This operation is not fully exception safe: if a database operation 

1412 fails, the given `FileDataset` instances may be only partially updated. 

1413 

1414 It is atomic in terms of database operations (they will either all 

1415 succeed or all fail) providing the database engine implements 

1416 transactions correctly. It will attempt to be atomic in terms of 

1417 filesystem operations as well, but this cannot be implemented 

1418 rigorously for most datastores. 

1419 """ 

1420 if not self.isWriteable(): 

1421 raise TypeError("Butler is read-only.") 

1422 progress = Progress("lsst.daf.butler.Butler.ingest", level=logging.DEBUG) 

1423 # Reorganize the inputs so they're grouped by DatasetType and then 

1424 # data ID. We also include a list of DatasetRefs for each FileDataset 

1425 # to hold the resolved DatasetRefs returned by the Registry, before 

1426 # it's safe to swap them into FileDataset.refs. 

1427 # Some type annotation aliases to make that clearer: 

1428 GroupForType = Dict[DataCoordinate, Tuple[FileDataset, List[DatasetRef]]] 

1429 GroupedData = MutableMapping[DatasetType, GroupForType] 

1430 # The actual data structure: 

1431 groupedData: GroupedData = defaultdict(dict) 

1432 # And the nested loop that populates it: 

1433 for dataset in progress.wrap(datasets, desc="Grouping by dataset type"): 

1434 # This list intentionally shared across the inner loop, since it's 

1435 # associated with `dataset`. 

1436 resolvedRefs: List[DatasetRef] = [] 

1437 for ref in dataset.refs: 

1438 if ref.dataId in groupedData[ref.datasetType]: 

1439 raise ConflictingDefinitionError(f"Ingest conflict. Dataset {dataset.path} has same" 

1440 " DataId as other ingest dataset" 

1441 f" {groupedData[ref.datasetType][ref.dataId][0].path} " 

1442 f" ({ref.dataId})") 

1443 groupedData[ref.datasetType][ref.dataId] = (dataset, resolvedRefs) 

1444 

1445 # Now we can bulk-insert into Registry for each DatasetType. 

1446 allResolvedRefs: List[DatasetRef] = [] 

1447 for datasetType, groupForType in progress.iter_item_chunks(groupedData.items(), 

1448 desc="Bulk-inserting datasets by type"): 

1449 refs = self.registry.insertDatasets(datasetType, 

1450 dataIds=groupForType.keys(), 

1451 run=run) 

1452 # Append those resolved DatasetRefs to the new lists we set up for 

1453 # them. 

1454 for ref, (_, resolvedRefs) in zip(refs, groupForType.values()): 

1455 resolvedRefs.append(ref) 

1456 

1457 # Go back to the original FileDatasets to replace their refs with the 

1458 # new resolved ones, and also build a big list of all refs. 

1459 allResolvedRefs = [] 

1460 for groupForType in progress.iter_chunks(groupedData.values(), 

1461 desc="Reassociating resolved dataset refs with files"): 

1462 for dataset, resolvedRefs in groupForType.values(): 

1463 dataset.refs = resolvedRefs 

1464 allResolvedRefs.extend(resolvedRefs) 

1465 

1466 # Bulk-insert everything into Datastore. 

1467 self.datastore.ingest(*datasets, transfer=transfer) 

1468 

1469 @contextlib.contextmanager 

1470 def export(self, *, directory: Optional[str] = None, 

1471 filename: Optional[str] = None, 

1472 format: Optional[str] = None, 

1473 transfer: Optional[str] = None) -> Iterator[RepoExportContext]: 

1474 """Export datasets from the repository represented by this `Butler`. 

1475 

1476 This method is a context manager that returns a helper object 

1477 (`RepoExportContext`) that is used to indicate what information from 

1478 the repository should be exported. 

1479 

1480 Parameters 

1481 ---------- 

1482 directory : `str`, optional 

1483 Directory dataset files should be written to if ``transfer`` is not 

1484 `None`. 

1485 filename : `str`, optional 

1486 Name for the file that will include database information associated 

1487 with the exported datasets. If this is not an absolute path and 

1488 ``directory`` is not `None`, it will be written to ``directory`` 

1489 instead of the current working directory. Defaults to 

1490 "export.{format}". 

1491 format : `str`, optional 

1492 File format for the database information file. If `None`, the 

1493 extension of ``filename`` will be used. 

1494 transfer : `str`, optional 

1495 Transfer mode passed to `Datastore.export`. 

1496 

1497 Raises 

1498 ------ 

1499 TypeError 

1500 Raised if the set of arguments passed is inconsistent. 

1501 

1502 Examples 

1503 -------- 

1504 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1505 methods are used to provide the iterables over data IDs and/or datasets 

1506 to be exported:: 

1507 

1508 with butler.export("exports.yaml") as export: 

1509 # Export all flats, but none of the dimension element rows 

1510 # (i.e. data ID information) associated with them. 

1511 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1512 elements=()) 

1513 # Export all datasets that start with "deepCoadd_" and all of 

1514 # their associated data ID information. 

1515 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1516 """ 

1517 if directory is None and transfer is not None: 

1518 raise TypeError("Cannot transfer without providing a directory.") 

1519 if transfer == "move": 

1520 raise TypeError("Transfer may not be 'move': export is read-only") 

1521 if format is None: 

1522 if filename is None: 

1523 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1524 else: 

1525 _, format = os.path.splitext(filename) 

1526 elif filename is None: 

1527 filename = f"export.{format}" 

1528 if directory is not None: 

1529 filename = os.path.join(directory, filename) 

1530 BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["export"]) 

1531 with open(filename, 'w') as stream: 

1532 backend = BackendClass(stream) 

1533 try: 

1534 helper = RepoExportContext(self.registry, self.datastore, backend=backend, 

1535 directory=directory, transfer=transfer) 

1536 yield helper 

1537 except BaseException: 

1538 raise 

1539 else: 

1540 helper._finish() 

1541 

1542 def import_(self, *, directory: Optional[str] = None, 

1543 filename: Union[str, TextIO, None] = None, 

1544 format: Optional[str] = None, 

1545 transfer: Optional[str] = None, 

1546 skip_dimensions: Optional[Set] = None) -> None: 

1547 """Import datasets into this repository that were exported from a 

1548 different butler repository via `~lsst.daf.butler.Butler.export`. 

1549 

1550 Parameters 

1551 ---------- 

1552 directory : `str`, optional 

1553 Directory containing dataset files to import from. If `None`, 

1554 ``filename`` and all dataset file paths specified therein must 

1555 be absolute. 

1556 filename : `str` or `TextIO`, optional 

1557 A stream or name of file that contains database information 

1558 associated with the exported datasets, typically generated by 

1559 `~lsst.daf.butler.Butler.export`. If this a string (name) and 

1560 is not an absolute path, does not exist in the current working 

1561 directory, and ``directory`` is not `None`, it is assumed to be in 

1562 ``directory``. Defaults to "export.{format}". 

1563 format : `str`, optional 

1564 File format for ``filename``. If `None`, the extension of 

1565 ``filename`` will be used. 

1566 transfer : `str`, optional 

1567 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

1568 skip_dimensions : `set`, optional 

1569 Names of dimensions that should be skipped and not imported. 

1570 

1571 Raises 

1572 ------ 

1573 TypeError 

1574 Raised if the set of arguments passed is inconsistent, or if the 

1575 butler is read-only. 

1576 """ 

1577 if not self.isWriteable(): 

1578 raise TypeError("Butler is read-only.") 

1579 if format is None: 

1580 if filename is None: 

1581 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1582 else: 

1583 _, format = os.path.splitext(filename) # type: ignore 

1584 elif filename is None: 

1585 filename = f"export.{format}" 

1586 if isinstance(filename, str) and directory is not None and not os.path.exists(filename): 

1587 filename = os.path.join(directory, filename) 

1588 BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["import"]) 

1589 

1590 def doImport(importStream: TextIO) -> None: 

1591 backend = BackendClass(importStream, self.registry) 

1592 backend.register() 

1593 with self.transaction(): 

1594 backend.load(self.datastore, directory=directory, transfer=transfer, 

1595 skip_dimensions=skip_dimensions) 

1596 

1597 if isinstance(filename, str): 

1598 with open(filename, "r") as stream: 

1599 doImport(stream) 

1600 else: 

1601 doImport(filename) 

1602 

1603 def validateConfiguration(self, logFailures: bool = False, 

1604 datasetTypeNames: Optional[Iterable[str]] = None, 

1605 ignore: Iterable[str] = None) -> None: 

1606 """Validate butler configuration. 

1607 

1608 Checks that each `DatasetType` can be stored in the `Datastore`. 

1609 

1610 Parameters 

1611 ---------- 

1612 logFailures : `bool`, optional 

1613 If `True`, output a log message for every validation error 

1614 detected. 

1615 datasetTypeNames : iterable of `str`, optional 

1616 The `DatasetType` names that should be checked. This allows 

1617 only a subset to be selected. 

1618 ignore : iterable of `str`, optional 

1619 Names of DatasetTypes to skip over. This can be used to skip 

1620 known problems. If a named `DatasetType` corresponds to a 

1621 composite, all components of that `DatasetType` will also be 

1622 ignored. 

1623 

1624 Raises 

1625 ------ 

1626 ButlerValidationError 

1627 Raised if there is some inconsistency with how this Butler 

1628 is configured. 

1629 """ 

1630 if datasetTypeNames: 

1631 datasetTypes = [self.registry.getDatasetType(name) for name in datasetTypeNames] 

1632 else: 

1633 datasetTypes = list(self.registry.queryDatasetTypes()) 

1634 

1635 # filter out anything from the ignore list 

1636 if ignore: 

1637 ignore = set(ignore) 

1638 datasetTypes = [e for e in datasetTypes 

1639 if e.name not in ignore and e.nameAndComponent()[0] not in ignore] 

1640 else: 

1641 ignore = set() 

1642 

1643 # Find all the registered instruments 

1644 instruments = set( 

1645 record.name for record in self.registry.queryDimensionRecords("instrument") 

1646 ) 

1647 

1648 # For each datasetType that has an instrument dimension, create 

1649 # a DatasetRef for each defined instrument 

1650 datasetRefs = [] 

1651 

1652 for datasetType in datasetTypes: 

1653 if "instrument" in datasetType.dimensions: 

1654 for instrument in instruments: 

1655 datasetRef = DatasetRef(datasetType, {"instrument": instrument}, # type: ignore 

1656 conform=False) 

1657 datasetRefs.append(datasetRef) 

1658 

1659 entities: List[Union[DatasetType, DatasetRef]] = [] 

1660 entities.extend(datasetTypes) 

1661 entities.extend(datasetRefs) 

1662 

1663 datastoreErrorStr = None 

1664 try: 

1665 self.datastore.validateConfiguration(entities, logFailures=logFailures) 

1666 except ValidationError as e: 

1667 datastoreErrorStr = str(e) 

1668 

1669 # Also check that the LookupKeys used by the datastores match 

1670 # registry and storage class definitions 

1671 keys = self.datastore.getLookupKeys() 

1672 

1673 failedNames = set() 

1674 failedDataId = set() 

1675 for key in keys: 

1676 if key.name is not None: 

1677 if key.name in ignore: 

1678 continue 

1679 

1680 # skip if specific datasetType names were requested and this 

1681 # name does not match 

1682 if datasetTypeNames and key.name not in datasetTypeNames: 

1683 continue 

1684 

1685 # See if it is a StorageClass or a DatasetType 

1686 if key.name in self.storageClasses: 

1687 pass 

1688 else: 

1689 try: 

1690 self.registry.getDatasetType(key.name) 

1691 except KeyError: 

1692 if logFailures: 

1693 log.critical("Key '%s' does not correspond to a DatasetType or StorageClass", key) 

1694 failedNames.add(key) 

1695 else: 

1696 # Dimensions are checked for consistency when the Butler 

1697 # is created and rendezvoused with a universe. 

1698 pass 

1699 

1700 # Check that the instrument is a valid instrument 

1701 # Currently only support instrument so check for that 

1702 if key.dataId: 

1703 dataIdKeys = set(key.dataId) 

1704 if set(["instrument"]) != dataIdKeys: 

1705 if logFailures: 

1706 log.critical("Key '%s' has unsupported DataId override", key) 

1707 failedDataId.add(key) 

1708 elif key.dataId["instrument"] not in instruments: 

1709 if logFailures: 

1710 log.critical("Key '%s' has unknown instrument", key) 

1711 failedDataId.add(key) 

1712 

1713 messages = [] 

1714 

1715 if datastoreErrorStr: 

1716 messages.append(datastoreErrorStr) 

1717 

1718 for failed, msg in ((failedNames, "Keys without corresponding DatasetType or StorageClass entry: "), 

1719 (failedDataId, "Keys with bad DataId entries: ")): 

1720 if failed: 

1721 msg += ", ".join(str(k) for k in failed) 

1722 messages.append(msg) 

1723 

1724 if messages: 

1725 raise ValidationError(";\n".join(messages)) 

1726 

1727 @property 

1728 def collections(self) -> CollectionSearch: 

1729 """The collections to search by default, in order (`CollectionSearch`). 

1730 

1731 This is an alias for ``self.registry.defaults.collections``. It cannot 

1732 be set directly in isolation, but all defaults may be changed together 

1733 by assigning a new `RegistryDefaults` instance to 

1734 ``self.registry.defaults``. 

1735 """ 

1736 return self.registry.defaults.collections 

1737 

1738 @property 

1739 def run(self) -> Optional[str]: 

1740 """Name of the run this butler writes outputs to by default (`str` or 

1741 `None`). 

1742 

1743 This is an alias for ``self.registry.defaults.run``. It cannot be set 

1744 directly in isolation, but all defaults may be changed together by 

1745 assigning a new `RegistryDefaults` instance to 

1746 ``self.registry.defaults``. 

1747 """ 

1748 return self.registry.defaults.run 

1749 

1750 registry: Registry 

1751 """The object that manages dataset metadata and relationships (`Registry`). 

1752 

1753 Most operations that don't involve reading or writing butler datasets are 

1754 accessible only via `Registry` methods. 

1755 """ 

1756 

1757 datastore: Datastore 

1758 """The object that manages actual dataset storage (`Datastore`). 

1759 

1760 Direct user access to the datastore should rarely be necessary; the primary 

1761 exception is the case where a `Datastore` implementation provides extra 

1762 functionality beyond what the base class defines. 

1763 """ 

1764 

1765 storageClasses: StorageClassFactory 

1766 """An object that maps known storage class names to objects that fully 

1767 describe them (`StorageClassFactory`). 

1768 """