Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Formatter", "FormatterFactory", "FormatterParameter") 

25 

26from abc import ABCMeta, abstractmethod 

27from collections.abc import Mapping 

28import logging 

29import copy 

30from typing import ClassVar, Set, AbstractSet, Union, Optional, Dict, Any, Tuple, Type, TYPE_CHECKING 

31 

32from .configSupport import processLookupConfigs, LookupKey 

33from .mappingFactory import MappingFactory 

34from .utils import getFullTypeName 

35from .fileDescriptor import FileDescriptor 

36from .location import Location 

37from .config import Config 

38from .dimensions import DimensionUniverse 

39from .storageClass import StorageClass 

40from .datasets import DatasetType, DatasetRef 

41 

42log = logging.getLogger(__name__) 

43 

44# Define a new special type for functions that take "entity" 

45Entity = Union[DatasetType, DatasetRef, StorageClass, str] 

46 

47 

48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true

49 from .dimensions import DataCoordinate 

50 

51 

52class Formatter(metaclass=ABCMeta): 

53 """Interface for reading and writing Datasets with a particular 

54 `StorageClass`. 

55 

56 Parameters 

57 ---------- 

58 fileDescriptor : `FileDescriptor`, optional 

59 Identifies the file to read or write, and the associated storage 

60 classes and parameter information. Its value can be `None` if the 

61 caller will never call `Formatter.read` or `Formatter.write`. 

62 dataId : `DataCoordinate`, optional 

63 Data ID associated with this formatter. 

64 writeParameters : `dict`, optional 

65 Any parameters to be hard-coded into this instance to control how 

66 the dataset is serialized. 

67 """ 

68 

69 unsupportedParameters: ClassVar[Optional[AbstractSet[str]]] = frozenset() 

70 """Set of read parameters not understood by this `Formatter`. An empty set 

71 means all parameters are supported. `None` indicates that no parameters 

72 are supported. These param (`frozenset`). 

73 """ 

74 

75 supportedWriteParameters: ClassVar[Optional[AbstractSet[str]]] = None 

76 """Parameters understood by this formatter that can be used to control 

77 how a dataset is serialized. `None` indicates that no parameters are 

78 supported.""" 

79 

80 def __init__(self, fileDescriptor: FileDescriptor, dataId: DataCoordinate = None, 

81 writeParameters: Optional[Dict[str, Any]] = None, 

82 writeRecipes: Optional[Dict[str, Any]] = None): 

83 if not isinstance(fileDescriptor, FileDescriptor): 

84 raise TypeError("File descriptor must be a FileDescriptor") 

85 self._fileDescriptor = fileDescriptor 

86 self._dataId = dataId 

87 

88 # Check that the write parameters are allowed 

89 if writeParameters: 

90 if self.supportedWriteParameters is None: 

91 raise ValueError("This formatter does not accept any write parameters. " 

92 f"Got: {', '.join(writeParameters)}") 

93 else: 

94 given = set(writeParameters) 

95 unknown = given - self.supportedWriteParameters 

96 if unknown: 

97 s = "s" if len(unknown) != 1 else "" 

98 unknownStr = ", ".join(f"'{u}'" for u in unknown) 

99 raise ValueError(f"This formatter does not accept parameter{s} {unknownStr}") 

100 

101 self._writeParameters = writeParameters 

102 self._writeRecipes = self.validateWriteRecipes(writeRecipes) 

103 

104 def __str__(self) -> str: 

105 return f"{self.name()}@{self.fileDescriptor.location.path}" 

106 

107 def __repr__(self) -> str: 

108 return f"{self.name()}({self.fileDescriptor!r})" 

109 

110 @property 

111 def fileDescriptor(self) -> FileDescriptor: 

112 """FileDescriptor associated with this formatter 

113 (`FileDescriptor`, read-only)""" 

114 return self._fileDescriptor 

115 

116 @property 

117 def dataId(self) -> Optional[DataCoordinate]: 

118 """DataId associated with this formatter (`DataCoordinate`)""" 

119 return self._dataId 

120 

121 @property 

122 def writeParameters(self) -> Mapping[str, Any]: 

123 """Parameters to use when writing out datasets.""" 

124 if self._writeParameters is not None: 

125 return self._writeParameters 

126 return {} 

127 

128 @property 

129 def writeRecipes(self) -> Mapping[str, Any]: 

130 """Detailed write Recipes indexed by recipe name.""" 

131 if self._writeRecipes is not None: 

132 return self._writeRecipes 

133 return {} 

134 

135 @classmethod 

136 def validateWriteRecipes(cls, recipes: Optional[Mapping[str, Any]]) -> Optional[Mapping[str, Any]]: 

137 """Validate supplied recipes for this formatter. 

138 

139 The recipes are supplemented with default values where appropriate. 

140 

141 Parameters 

142 ---------- 

143 recipes : `dict` 

144 Recipes to validate. 

145 

146 Returns 

147 ------- 

148 validated : `dict` 

149 Validated recipes. 

150 

151 Raises 

152 ------ 

153 RuntimeError 

154 Raised if validation fails. The default implementation raises 

155 if any recipes are given. 

156 """ 

157 if recipes: 

158 raise RuntimeError(f"This formatter does not understand these writeRecipes: {recipes}") 

159 return recipes 

160 

161 @classmethod 

162 def name(cls) -> str: 

163 """Returns the fully qualified name of the formatter. 

164 

165 Returns 

166 ------- 

167 name : `str` 

168 Fully-qualified name of formatter class. 

169 """ 

170 return getFullTypeName(cls) 

171 

172 @abstractmethod 

173 def read(self, component: Optional[str] = None) -> Any: 

174 """Read a Dataset. 

175 

176 Parameters 

177 ---------- 

178 component : `str`, optional 

179 Component to read from the file. Only used if the `StorageClass` 

180 for reading differed from the `StorageClass` used to write the 

181 file. 

182 

183 Returns 

184 ------- 

185 inMemoryDataset : `object` 

186 The requested Dataset. 

187 """ 

188 raise NotImplementedError("Type does not support reading") 

189 

190 @abstractmethod 

191 def write(self, inMemoryDataset: Any) -> str: 

192 """Write a Dataset. 

193 

194 Parameters 

195 ---------- 

196 inMemoryDataset : `object` 

197 The Dataset to store. 

198 

199 Returns 

200 ------- 

201 path : `str` 

202 The path to where the Dataset was stored within the datastore. 

203 """ 

204 raise NotImplementedError("Type does not support writing") 

205 

206 def fromBytes(self, serializedDataset: bytes, 

207 component: Optional[str] = None) -> object: 

208 """Reads serialized data into a Dataset or its component. 

209 

210 Parameters 

211 ---------- 

212 serializedDataset : `bytes` 

213 Bytes object to unserialize. 

214 component : `str`, optional 

215 Component to read from the Dataset. Only used if the `StorageClass` 

216 for reading differed from the `StorageClass` used to write the 

217 file. 

218 

219 Returns 

220 ------- 

221 inMemoryDataset : `object` 

222 The requested data as a Python object. The type of object 

223 is controlled by the specific formatter. 

224 """ 

225 raise NotImplementedError("Type does not support reading from bytes.") 

226 

227 def toBytes(self, inMemoryDataset: Any) -> bytes: 

228 """Serialize the Dataset to bytes based on formatter. 

229 

230 Parameters 

231 ---------- 

232 inMemoryDataset : `object` 

233 The Python object to serialize. 

234 

235 Returns 

236 ------- 

237 serializedDataset : `bytes` 

238 Bytes representing the serialized dataset. 

239 """ 

240 raise NotImplementedError("Type does not support writing to bytes.") 

241 

242 @classmethod 

243 def makeUpdatedLocation(cls, location: Location) -> Location: 

244 """Return a new `Location` instance updated with this formatter's 

245 extension. 

246 

247 Parameters 

248 ---------- 

249 location : `Location` 

250 The location to update. 

251 

252 Returns 

253 ------- 

254 updated : `Location` 

255 A new `Location` with a new file extension applied. 

256 

257 Raises 

258 ------ 

259 NotImplementedError 

260 Raised if there is no ``extension`` attribute associated with 

261 this formatter. 

262 

263 Notes 

264 ----- 

265 This method is available to all Formatters but might not be 

266 implemented by all formatters. It requires that a formatter set 

267 an ``extension`` attribute containing the file extension used when 

268 writing files. If ``extension`` is `None` the supplied file will 

269 not be updated. Not all formatters write files so this is not 

270 defined in the base class. 

271 """ 

272 location = copy.deepcopy(location) 

273 try: 

274 # We are deliberately allowing extension to be undefined by 

275 # default in the base class and mypy complains. 

276 location.updateExtension(cls.extension) # type:ignore 

277 except AttributeError: 

278 raise NotImplementedError("No file extension registered with this formatter") from None 

279 return location 

280 

281 @classmethod 

282 def predictPathFromLocation(cls, location: Location) -> str: 

283 """Return the path that would be returned by write, without actually 

284 writing. 

285 

286 Parameters 

287 ---------- 

288 location : `Location` 

289 Location of file for which path prediction is required. 

290 

291 Returns 

292 ------- 

293 path : `str` 

294 Path within datastore that would be associated with this location. 

295 """ 

296 return cls.makeUpdatedLocation(location).pathInStore 

297 

298 def predictPath(self) -> str: 

299 """Return the path that would be returned by write, without actually 

300 writing. 

301 

302 Uses the `FileDescriptor` associated with the instance. 

303 

304 Returns 

305 ------- 

306 path : `str` 

307 Path within datastore that would be associated with the location 

308 stored in this `Formatter`. 

309 """ 

310 return self.predictPathFromLocation(self.fileDescriptor.location) 

311 

312 def segregateParameters(self, parameters: Optional[Dict[str, Any]] = None) -> Tuple[Dict, Dict]: 

313 """Segregate the supplied parameters into those understood by the 

314 formatter and those not understood by the formatter. 

315 

316 Any unsupported parameters are assumed to be usable by associated 

317 assemblers. 

318 

319 Parameters 

320 ---------- 

321 parameters : `dict`, optional 

322 Parameters with values that have been supplied by the caller 

323 and which might be relevant for the formatter. If `None` 

324 parameters will be read from the registered `FileDescriptor`. 

325 

326 Returns 

327 ------- 

328 supported : `dict` 

329 Those parameters supported by this formatter. 

330 unsupported : `dict` 

331 Those parameters not supported by this formatter. 

332 """ 

333 

334 if parameters is None: 

335 parameters = self.fileDescriptor.parameters 

336 

337 if parameters is None: 

338 return {}, {} 

339 

340 if self.unsupportedParameters is None: 

341 # Support none of the parameters 

342 return {}, parameters.copy() 

343 

344 # Start by assuming all are supported 

345 supported = parameters.copy() 

346 unsupported = {} 

347 

348 # And remove any we know are not supported 

349 for p in set(supported): 

350 if p in self.unsupportedParameters: 

351 unsupported[p] = supported.pop(p) 

352 

353 return supported, unsupported 

354 

355 

356class FormatterFactory: 

357 """Factory for `Formatter` instances. 

358 """ 

359 

360 defaultKey = LookupKey("default") 

361 """Configuration key associated with default write parameter settings.""" 

362 

363 writeRecipesKey = LookupKey("write_recipes") 

364 """Configuration key associated with write recipes.""" 

365 

366 def __init__(self) -> None: 

367 self._mappingFactory = MappingFactory(Formatter) 

368 

369 def __contains__(self, key: Union[LookupKey, str]) -> bool: 

370 """Indicates whether the supplied key is present in the factory. 

371 

372 Parameters 

373 ---------- 

374 key : `LookupKey`, `str` or objects with ``name`` attribute 

375 Key to use to lookup in the factory whether a corresponding 

376 formatter is present. 

377 

378 Returns 

379 ------- 

380 in : `bool` 

381 `True` if the supplied key is present in the factory. 

382 """ 

383 return key in self._mappingFactory 

384 

385 def registerFormatters(self, config: Config, *, universe: DimensionUniverse) -> None: 

386 """Bulk register formatters from a config. 

387 

388 Parameters 

389 ---------- 

390 config : `Config` 

391 ``formatters`` section of a configuration. 

392 universe : `DimensionUniverse`, optional 

393 Set of all known dimensions, used to expand and validate any used 

394 in lookup keys. 

395 

396 Notes 

397 ----- 

398 The configuration can include one level of hierarchy where an 

399 instrument-specific section can be defined to override more general 

400 template specifications. This is represented in YAML using a 

401 key of form ``instrument<name>`` which can then define templates 

402 that will be returned if a `DatasetRef` contains a matching instrument 

403 name in the data ID. 

404 

405 The config is parsed using the function 

406 `~lsst.daf.butler.configSubset.processLookupConfigs`. 

407 

408 The values for formatter entries can be either a simple string 

409 referring to a python type or a dict representing the formatter and 

410 parameters to be hard-coded into the formatter constructor. For 

411 the dict case the following keys are supported: 

412 

413 - formatter: The python type to be used as the formatter class. 

414 - parameters: A further dict to be passed directly to the 

415 ``writeParameters`` Formatter constructor to seed it. 

416 These parameters are validated at instance creation and not at 

417 configuration. 

418 

419 Additionally, a special ``default`` section can be defined that 

420 uses the formatter type (class) name as the keys and specifies 

421 default write parameters that should be used whenever an instance 

422 of that class is constructed. 

423 

424 .. code-block:: yaml 

425 

426 formatters: 

427 default: 

428 lsst.daf.butler.formatters.example.ExampleFormatter: 

429 max: 10 

430 min: 2 

431 comment: Default comment 

432 calexp: lsst.daf.butler.formatters.example.ExampleFormatter 

433 coadd: 

434 formatter: lsst.daf.butler.formatters.example.ExampleFormatter 

435 parameters: 

436 max: 5 

437 

438 Any time an ``ExampleFormatter`` is constructed it will use those 

439 parameters. If an explicit entry later in the configuration specifies 

440 a different set of parameters, the two will be merged with the later 

441 entry taking priority. In the example above ``calexp`` will use 

442 the default parameters but ``coadd`` will override the value for 

443 ``max``. 

444 

445 Formatter configuration can also include a special section describing 

446 collections of write parameters that can be accessed through a 

447 simple label. This allows common collections of options to be 

448 specified in one place in the configuration and reused later. 

449 The ``write_recipes`` section is indexed by Formatter class name 

450 and each key is the label to associate with the parameters. 

451 

452 .. code-block:: yaml 

453 

454 formatters: 

455 write_recipes: 

456 lsst.obs.base.fitsExposureFormatter.FixExposureFormatter: 

457 lossless: 

458 ... 

459 noCompression: 

460 ... 

461 

462 By convention a formatter that uses write recipes will support a 

463 ``recipe`` write parameter that will refer to a recipe name in 

464 the ``write_recipes`` component. The `Formatter` will be constructed 

465 in the `FormatterFactory` with all the relevant recipes and 

466 will not attempt to filter by looking at ``writeParameters`` in 

467 advance. See the specific formatter documentation for details on 

468 acceptable recipe options. 

469 """ 

470 allowed_keys = {"formatter", "parameters"} 

471 

472 contents = processLookupConfigs(config, allow_hierarchy=True, universe=universe) 

473 

474 # Extract any default parameter settings 

475 defaultParameters = contents.get(self.defaultKey, {}) 

476 if not isinstance(defaultParameters, Mapping): 

477 raise RuntimeError("Default formatter parameters in config can not be a single string" 

478 f" (got: {type(defaultParameters)})") 

479 

480 # Extract any global write recipes -- these are indexed by 

481 # Formatter class name. 

482 writeRecipes = contents.get(self.writeRecipesKey, {}) 

483 if isinstance(writeRecipes, str): 

484 raise RuntimeError(f"The formatters.{self.writeRecipesKey} section must refer to a dict" 

485 f" not '{writeRecipes}'") 

486 

487 for key, f in contents.items(): 

488 # default is handled in a special way 

489 if key == self.defaultKey: 

490 continue 

491 if key == self.writeRecipesKey: 

492 continue 

493 

494 # Can be a str or a dict. 

495 specificWriteParameters = {} 

496 if isinstance(f, str): 

497 formatter = f 

498 elif isinstance(f, Mapping): 

499 all_keys = set(f) 

500 unexpected_keys = all_keys - allowed_keys 

501 if unexpected_keys: 

502 raise ValueError(f"Formatter {key} uses unexpected keys {unexpected_keys} in config") 

503 if "formatter" not in f: 

504 raise ValueError(f"Mandatory 'formatter' key missing for formatter key {key}") 

505 formatter = f["formatter"] 

506 if "parameters" in f: 

507 specificWriteParameters = f["parameters"] 

508 else: 

509 raise ValueError(f"Formatter for key {key} has unexpected value: '{f}'") 

510 

511 # Apply any default parameters for this formatter 

512 writeParameters = copy.deepcopy(defaultParameters.get(formatter, {})) 

513 writeParameters.update(specificWriteParameters) 

514 

515 kwargs: Dict[str, Any] = {} 

516 if writeParameters: 

517 kwargs["writeParameters"] = writeParameters 

518 

519 if formatter in writeRecipes: 

520 kwargs["writeRecipes"] = writeRecipes[formatter] 

521 

522 self.registerFormatter(key, formatter, **kwargs) 

523 

524 def getLookupKeys(self) -> Set[LookupKey]: 

525 """Retrieve the look up keys for all the registry entries. 

526 

527 Returns 

528 ------- 

529 keys : `set` of `LookupKey` 

530 The keys available for matching in the registry. 

531 """ 

532 return self._mappingFactory.getLookupKeys() 

533 

534 def getFormatterClassWithMatch(self, entity: Entity) -> Tuple[LookupKey, Type[Formatter], 

535 Dict[str, Any]]: 

536 """Get the matching formatter class along with the matching registry 

537 key. 

538 

539 Parameters 

540 ---------- 

541 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str` 

542 Entity to use to determine the formatter to return. 

543 `StorageClass` will be used as a last resort if `DatasetRef` 

544 or `DatasetType` instance is provided. Supports instrument 

545 override if a `DatasetRef` is provided configured with an 

546 ``instrument`` value for the data ID. 

547 

548 Returns 

549 ------- 

550 matchKey : `LookupKey` 

551 The key that resulted in the successful match. 

552 formatter : `type` 

553 The class of the registered formatter. 

554 formatter_kwargs : `dict` 

555 Keyword arguments that are associated with this formatter entry. 

556 """ 

557 names = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

558 matchKey, formatter, formatter_kwargs = self._mappingFactory.getClassFromRegistryWithMatch(names) 

559 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter), 

560 matchKey, entity) 

561 

562 return matchKey, formatter, formatter_kwargs 

563 

564 def getFormatterClass(self, entity: Entity) -> Type: 

565 """Get the matching formatter class. 

566 

567 Parameters 

568 ---------- 

569 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str` 

570 Entity to use to determine the formatter to return. 

571 `StorageClass` will be used as a last resort if `DatasetRef` 

572 or `DatasetType` instance is provided. Supports instrument 

573 override if a `DatasetRef` is provided configured with an 

574 ``instrument`` value for the data ID. 

575 

576 Returns 

577 ------- 

578 formatter : `type` 

579 The class of the registered formatter. 

580 """ 

581 _, formatter, _ = self.getFormatterClassWithMatch(entity) 

582 return formatter 

583 

584 def getFormatterWithMatch(self, entity: Entity, *args: Any, **kwargs: Any) -> Tuple[LookupKey, Formatter]: 

585 """Get a new formatter instance along with the matching registry 

586 key. 

587 

588 Parameters 

589 ---------- 

590 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str` 

591 Entity to use to determine the formatter to return. 

592 `StorageClass` will be used as a last resort if `DatasetRef` 

593 or `DatasetType` instance is provided. Supports instrument 

594 override if a `DatasetRef` is provided configured with an 

595 ``instrument`` value for the data ID. 

596 args : `tuple` 

597 Positional arguments to use pass to the object constructor. 

598 kwargs : `dict` 

599 Keyword arguments to pass to object constructor. 

600 

601 Returns 

602 ------- 

603 matchKey : `LookupKey` 

604 The key that resulted in the successful match. 

605 formatter : `Formatter` 

606 An instance of the registered formatter. 

607 """ 

608 names = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

609 matchKey, formatter = self._mappingFactory.getFromRegistryWithMatch(names, *args, **kwargs) 

610 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter), 

611 matchKey, entity) 

612 

613 return matchKey, formatter 

614 

615 def getFormatter(self, entity: Entity, *args: Any, **kwargs: Any) -> Formatter: 

616 """Get a new formatter instance. 

617 

618 Parameters 

619 ---------- 

620 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str` 

621 Entity to use to determine the formatter to return. 

622 `StorageClass` will be used as a last resort if `DatasetRef` 

623 or `DatasetType` instance is provided. Supports instrument 

624 override if a `DatasetRef` is provided configured with an 

625 ``instrument`` value for the data ID. 

626 args : `tuple` 

627 Positional arguments to use pass to the object constructor. 

628 kwargs : `dict` 

629 Keyword arguments to pass to object constructor. 

630 

631 Returns 

632 ------- 

633 formatter : `Formatter` 

634 An instance of the registered formatter. 

635 """ 

636 _, formatter = self.getFormatterWithMatch(entity, *args, **kwargs) 

637 return formatter 

638 

639 def registerFormatter(self, type_: Union[LookupKey, str, StorageClass, DatasetType], 

640 formatter: str, *, overwrite: bool = False, 

641 **kwargs: Any) -> None: 

642 """Register a `Formatter`. 

643 

644 Parameters 

645 ---------- 

646 type_ : `LookupKey`, `str`, `StorageClass` or `DatasetType` 

647 Type for which this formatter is to be used. If a `LookupKey` 

648 is not provided, one will be constructed from the supplied string 

649 or by using the ``name`` property of the supplied entity. 

650 formatter : `str` or class of type `Formatter` 

651 Identifies a `Formatter` subclass to use for reading and writing 

652 Datasets of this type. Can be a `Formatter` class. 

653 overwrite : `bool`, optional 

654 If `True` an existing entry will be replaced by the new value. 

655 Default is `False`. 

656 kwargs : `dict` 

657 Keyword arguments to always pass to object constructor when 

658 retrieved. 

659 

660 Raises 

661 ------ 

662 ValueError 

663 Raised if the formatter does not name a valid formatter type and 

664 ``overwrite`` is `False`. 

665 """ 

666 self._mappingFactory.placeInRegistry(type_, formatter, overwrite=overwrite, **kwargs) 

667 

668 

669# Type to use when allowing a Formatter or its class name 

670FormatterParameter = Union[str, Type[Formatter], Formatter]