Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Formatter", "FormatterFactory", "FormatterParameter") 

25 

26from abc import ABCMeta, abstractmethod 

27from collections.abc import Mapping 

28import logging 

29import copy 

30from typing import ClassVar, Set, AbstractSet, Union, Optional, Dict, Any, Tuple, Type, TYPE_CHECKING 

31 

32from .configSupport import processLookupConfigs, LookupKey 

33from .mappingFactory import MappingFactory 

34from .utils import getFullTypeName 

35from .fileDescriptor import FileDescriptor 

36from .location import Location 

37from .config import Config 

38from .dimensions import DimensionUniverse 

39from .storageClass import StorageClass 

40from .datasets import DatasetType, DatasetRef 

41 

42log = logging.getLogger(__name__) 

43 

44# Define a new special type for functions that take "entity" 

45Entity = Union[DatasetType, DatasetRef, StorageClass, str] 

46 

47 

48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true

49 from .dimensions import DataCoordinate 

50 

51 

52class Formatter(metaclass=ABCMeta): 

53 """Interface for reading and writing Datasets with a particular 

54 `StorageClass`. 

55 

56 Parameters 

57 ---------- 

58 fileDescriptor : `FileDescriptor`, optional 

59 Identifies the file to read or write, and the associated storage 

60 classes and parameter information. Its value can be `None` if the 

61 caller will never call `Formatter.read` or `Formatter.write`. 

62 dataId : `DataCoordinate`, optional 

63 Data ID associated with this formatter. 

64 writeParameters : `dict`, optional 

65 Any parameters to be hard-coded into this instance to control how 

66 the dataset is serialized. 

67 """ 

68 

69 unsupportedParameters: ClassVar[Optional[AbstractSet[str]]] = frozenset() 

70 """Set of read parameters not understood by this `Formatter`. An empty set 

71 means all parameters are supported. `None` indicates that no parameters 

72 are supported. These param (`frozenset`). 

73 """ 

74 

75 supportedWriteParameters: ClassVar[Optional[AbstractSet[str]]] = None 

76 """Parameters understood by this formatter that can be used to control 

77 how a dataset is serialized. `None` indicates that no parameters are 

78 supported.""" 

79 

80 extension: Optional[str] = None 

81 """File extension default provided by this formatter.""" 

82 

83 def __init__(self, fileDescriptor: FileDescriptor, dataId: DataCoordinate = None, 

84 writeParameters: Optional[Dict[str, Any]] = None): 

85 if not isinstance(fileDescriptor, FileDescriptor): 

86 raise TypeError("File descriptor must be a FileDescriptor") 

87 self._fileDescriptor = fileDescriptor 

88 self._dataId = dataId 

89 

90 # Check that the write parameters are allowed 

91 if writeParameters: 

92 if self.supportedWriteParameters is None: 

93 raise ValueError("This formatter does not accept any write parameters. " 

94 f"Got: {', '.join(writeParameters)}") 

95 else: 

96 given = set(writeParameters) 

97 unknown = given - self.supportedWriteParameters 

98 if unknown: 

99 s = "s" if len(unknown) != 1 else "" 

100 unknownStr = ", ".join(f"'{u}'" for u in unknown) 

101 raise ValueError(f"This formatter does not accept parameter{s} {unknownStr}") 

102 

103 self._writeParameters = writeParameters 

104 

105 def __str__(self) -> str: 

106 return f"{self.name()}@{self.fileDescriptor.location.path}" 

107 

108 def __repr__(self) -> str: 

109 return f"{self.name()}({self.fileDescriptor!r})" 

110 

111 @property 

112 def fileDescriptor(self) -> FileDescriptor: 

113 """FileDescriptor associated with this formatter 

114 (`FileDescriptor`, read-only)""" 

115 return self._fileDescriptor 

116 

117 @property 

118 def dataId(self) -> Optional[DataCoordinate]: 

119 """DataId associated with this formatter (`DataCoordinate`)""" 

120 return self._dataId 

121 

122 @property 

123 def writeParameters(self) -> Mapping: 

124 if self._writeParameters is not None: 

125 return self._writeParameters 

126 return {} 

127 

128 @classmethod 

129 def name(cls) -> str: 

130 """Returns the fully qualified name of the formatter. 

131 

132 Returns 

133 ------- 

134 name : `str` 

135 Fully-qualified name of formatter class. 

136 """ 

137 return getFullTypeName(cls) 

138 

139 @abstractmethod 

140 def read(self, component: Optional[str] = None) -> Any: 

141 """Read a Dataset. 

142 

143 Parameters 

144 ---------- 

145 component : `str`, optional 

146 Component to read from the file. Only used if the `StorageClass` 

147 for reading differed from the `StorageClass` used to write the 

148 file. 

149 

150 Returns 

151 ------- 

152 inMemoryDataset : `object` 

153 The requested Dataset. 

154 """ 

155 raise NotImplementedError("Type does not support reading") 

156 

157 @abstractmethod 

158 def write(self, inMemoryDataset: Any) -> str: 

159 """Write a Dataset. 

160 

161 Parameters 

162 ---------- 

163 inMemoryDataset : `object` 

164 The Dataset to store. 

165 

166 Returns 

167 ------- 

168 path : `str` 

169 The path to where the Dataset was stored within the datastore. 

170 """ 

171 raise NotImplementedError("Type does not support writing") 

172 

173 def fromBytes(self, serializedDataset: bytes, 

174 component: Optional[str] = None) -> object: 

175 """Reads serialized data into a Dataset or its component. 

176 

177 Parameters 

178 ---------- 

179 serializedDataset : `bytes` 

180 Bytes object to unserialize. 

181 component : `str`, optional 

182 Component to read from the Dataset. Only used if the `StorageClass` 

183 for reading differed from the `StorageClass` used to write the 

184 file. 

185 

186 Returns 

187 ------- 

188 inMemoryDataset : `object` 

189 The requested data as a Python object. The type of object 

190 is controlled by the specific formatter. 

191 """ 

192 raise NotImplementedError("Type does not support reading from bytes.") 

193 

194 def toBytes(self, inMemoryDataset: Any) -> bytes: 

195 """Serialize the Dataset to bytes based on formatter. 

196 

197 Parameters 

198 ---------- 

199 inMemoryDataset : `object` 

200 The Python object to serialize. 

201 

202 Returns 

203 ------- 

204 serializedDataset : `bytes` 

205 Bytes representing the serialized dataset. 

206 """ 

207 raise NotImplementedError("Type does not support writing to bytes.") 

208 

209 @classmethod 

210 def makeUpdatedLocation(cls, location: Location) -> Location: 

211 """Return a new `Location` instance updated with this formatter's 

212 extension. 

213 

214 Parameters 

215 ---------- 

216 location : `Location` 

217 The location to update. 

218 

219 Returns 

220 ------- 

221 updated : `Location` 

222 The updated location with a new file extension applied. 

223 

224 Raises 

225 ------ 

226 NotImplementedError 

227 Raised if there is no ``extension`` attribute associated with 

228 this formatter. 

229 """ 

230 location = copy.deepcopy(location) 

231 try: 

232 location.updateExtension(cls.extension) 

233 except AttributeError: 

234 raise NotImplementedError("No file extension registered with this formatter") from None 

235 return location 

236 

237 @classmethod 

238 def predictPathFromLocation(cls, location: Location) -> str: 

239 """Return the path that would be returned by write, without actually 

240 writing. 

241 

242 Parameters 

243 ---------- 

244 location : `Location` 

245 Location of file for which path prediction is required. 

246 

247 Returns 

248 ------- 

249 path : `str` 

250 Path within datastore that would be associated with this location. 

251 """ 

252 return cls.makeUpdatedLocation(location).pathInStore 

253 

254 def predictPath(self) -> str: 

255 """Return the path that would be returned by write, without actually 

256 writing. 

257 

258 Uses the `FileDescriptor` associated with the instance. 

259 

260 Returns 

261 ------- 

262 path : `str` 

263 Path within datastore that would be associated with the location 

264 stored in this `Formatter`. 

265 """ 

266 return self.predictPathFromLocation(self.fileDescriptor.location) 

267 

268 def segregateParameters(self, parameters: Optional[Dict[str, Any]] = None) -> Tuple[Dict, Dict]: 

269 """Segregate the supplied parameters into those understood by the 

270 formatter and those not understood by the formatter. 

271 

272 Any unsupported parameters are assumed to be usable by associated 

273 assemblers. 

274 

275 Parameters 

276 ---------- 

277 parameters : `dict`, optional 

278 Parameters with values that have been supplied by the caller 

279 and which might be relevant for the formatter. If `None` 

280 parameters will be read from the registered `FileDescriptor`. 

281 

282 Returns 

283 ------- 

284 supported : `dict` 

285 Those parameters supported by this formatter. 

286 unsupported : `dict` 

287 Those parameters not supported by this formatter. 

288 """ 

289 

290 if parameters is None: 

291 parameters = self.fileDescriptor.parameters 

292 

293 if parameters is None: 

294 return {}, {} 

295 

296 if self.unsupportedParameters is None: 

297 # Support none of the parameters 

298 return {}, parameters.copy() 

299 

300 # Start by assuming all are supported 

301 supported = parameters.copy() 

302 unsupported = {} 

303 

304 # And remove any we know are not supported 

305 for p in set(supported): 

306 if p in self.unsupportedParameters: 

307 unsupported[p] = supported.pop(p) 

308 

309 return supported, unsupported 

310 

311 

312class FormatterFactory: 

313 """Factory for `Formatter` instances. 

314 """ 

315 

316 defaultKey = LookupKey("default") 

317 """Configuration key associated with default write parameter settings.""" 

318 

319 def __init__(self) -> None: 

320 self._mappingFactory = MappingFactory(Formatter) 

321 

322 def __contains__(self, key: Union[LookupKey, str]) -> bool: 

323 """Indicates whether the supplied key is present in the factory. 

324 

325 Parameters 

326 ---------- 

327 key : `LookupKey`, `str` or objects with ``name`` attribute 

328 Key to use to lookup in the factory whether a corresponding 

329 formatter is present. 

330 

331 Returns 

332 ------- 

333 in : `bool` 

334 `True` if the supplied key is present in the factory. 

335 """ 

336 return key in self._mappingFactory 

337 

338 def registerFormatters(self, config: Config, *, universe: DimensionUniverse) -> None: 

339 """Bulk register formatters from a config. 

340 

341 Parameters 

342 ---------- 

343 config : `Config` 

344 ``formatters`` section of a configuration. 

345 universe : `DimensionUniverse`, optional 

346 Set of all known dimensions, used to expand and validate any used 

347 in lookup keys. 

348 

349 Notes 

350 ----- 

351 The configuration can include one level of hierarchy where an 

352 instrument-specific section can be defined to override more general 

353 template specifications. This is represented in YAML using a 

354 key of form ``instrument<name>`` which can then define templates 

355 that will be returned if a `DatasetRef` contains a matching instrument 

356 name in the data ID. 

357 

358 The config is parsed using the function 

359 `~lsst.daf.butler.configSubset.processLookupConfigs`. 

360 

361 The values for formatter entries can be either a simple string 

362 referring to a python type or a dict representing the formatter and 

363 parameters to be hard-coded into the formatter constructor. For 

364 the dict case the following keys are supported: 

365 

366 - formatter: The python type to be used as the formatter class. 

367 - parameters: A further dict to be passed directly to the 

368 ``writeParameters`` Formatter constructor to seed it. 

369 These parameters are validated at instance creation and not at 

370 configuration. 

371 

372 Additionally, a special ``default`` section can be defined that 

373 uses the formatter type (class) name as the keys and specifies 

374 default write parameters that should be used whenever an instance 

375 of that class is constructed. 

376 

377 .. code-block:: yaml 

378 

379 formatters: 

380 default: 

381 lsst.daf.butler.formatters.example.ExampleFormatter: 

382 max: 10 

383 min: 2 

384 comment: Default comment 

385 calexp: lsst.daf.butler.formatters.example.ExampleFormatter 

386 coadd: 

387 formatter: lsst.daf.butler.formatters.example.ExampleFormatter 

388 parameters: 

389 max: 5 

390 

391 Any time an ``ExampleFormatter`` is constructed it will use those 

392 parameters. If an explicit entry later in the configuration specifies 

393 a different set of parameters, the two will be merged with the later 

394 entry taking priority. In the example above ``calexp`` will use 

395 the default parameters but ``coadd`` will override the value for 

396 ``max``. 

397 """ 

398 allowed_keys = {"formatter", "parameters"} 

399 

400 contents = processLookupConfigs(config, allow_hierarchy=True, universe=universe) 

401 

402 # Extract any default parameter settings 

403 defaultParameters = contents.get(self.defaultKey, {}) 

404 if not isinstance(defaultParameters, Mapping): 

405 raise RuntimeError("Default formatter parameters in config can not be a single string" 

406 f" (got: {type(defaultParameters)})") 

407 

408 for key, f in contents.items(): 

409 # default is handled in a special way 

410 if key == self.defaultKey: 

411 continue 

412 

413 # Can be a str or a dict. 

414 specificWriteParameters = {} 

415 if isinstance(f, str): 

416 formatter = f 

417 elif isinstance(f, Mapping): 

418 all_keys = set(f) 

419 unexpected_keys = all_keys - allowed_keys 

420 if unexpected_keys: 

421 raise ValueError(f"Formatter {key} uses unexpected keys {unexpected_keys} in config") 

422 if "formatter" not in f: 

423 raise ValueError(f"Mandatory 'formatter' key missing for formatter key {key}") 

424 formatter = f["formatter"] 

425 if "parameters" in f: 

426 specificWriteParameters = f["parameters"] 

427 else: 

428 raise ValueError(f"Formatter for key {key} has unexpected value: '{f}'") 

429 

430 # Apply any default parameters for this formatter 

431 writeParameters = defaultParameters.get(formatter, {}) 

432 writeParameters.update(specificWriteParameters) 

433 

434 kwargs: Dict[str, Any] = {} 

435 if writeParameters: 

436 # Need to coerce Config to dict 

437 kwargs["writeParameters"] = dict(writeParameters) 

438 

439 self.registerFormatter(key, formatter, **kwargs) 

440 

441 def getLookupKeys(self) -> Set[LookupKey]: 

442 """Retrieve the look up keys for all the registry entries. 

443 

444 Returns 

445 ------- 

446 keys : `set` of `LookupKey` 

447 The keys available for matching in the registry. 

448 """ 

449 return self._mappingFactory.getLookupKeys() 

450 

451 def getFormatterClassWithMatch(self, entity: Entity) -> Tuple[LookupKey, Type[Formatter], 

452 Dict[str, Any]]: 

453 """Get the matching formatter class along with the matching registry 

454 key. 

455 

456 Parameters 

457 ---------- 

458 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str` 

459 Entity to use to determine the formatter to return. 

460 `StorageClass` will be used as a last resort if `DatasetRef` 

461 or `DatasetType` instance is provided. Supports instrument 

462 override if a `DatasetRef` is provided configured with an 

463 ``instrument`` value for the data ID. 

464 

465 Returns 

466 ------- 

467 matchKey : `LookupKey` 

468 The key that resulted in the successful match. 

469 formatter : `type` 

470 The class of the registered formatter. 

471 formatter_kwargs : `dict` 

472 Keyword arguments that are associated with this formatter entry. 

473 """ 

474 names = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

475 matchKey, formatter, formatter_kwargs = self._mappingFactory.getClassFromRegistryWithMatch(names) 

476 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter), 

477 matchKey, entity) 

478 

479 return matchKey, formatter, formatter_kwargs 

480 

481 def getFormatterClass(self, entity: Entity) -> Type: 

482 """Get the matching formatter class. 

483 

484 Parameters 

485 ---------- 

486 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str` 

487 Entity to use to determine the formatter to return. 

488 `StorageClass` will be used as a last resort if `DatasetRef` 

489 or `DatasetType` instance is provided. Supports instrument 

490 override if a `DatasetRef` is provided configured with an 

491 ``instrument`` value for the data ID. 

492 

493 Returns 

494 ------- 

495 formatter : `type` 

496 The class of the registered formatter. 

497 """ 

498 _, formatter, _ = self.getFormatterClassWithMatch(entity) 

499 return formatter 

500 

501 def getFormatterWithMatch(self, entity: Entity, *args: Any, **kwargs: Any) -> Tuple[LookupKey, Formatter]: 

502 """Get a new formatter instance along with the matching registry 

503 key. 

504 

505 Parameters 

506 ---------- 

507 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str` 

508 Entity to use to determine the formatter to return. 

509 `StorageClass` will be used as a last resort if `DatasetRef` 

510 or `DatasetType` instance is provided. Supports instrument 

511 override if a `DatasetRef` is provided configured with an 

512 ``instrument`` value for the data ID. 

513 args : `tuple` 

514 Positional arguments to use pass to the object constructor. 

515 kwargs : `dict` 

516 Keyword arguments to pass to object constructor. 

517 

518 Returns 

519 ------- 

520 matchKey : `LookupKey` 

521 The key that resulted in the successful match. 

522 formatter : `Formatter` 

523 An instance of the registered formatter. 

524 """ 

525 names = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

526 matchKey, formatter = self._mappingFactory.getFromRegistryWithMatch(names, *args, **kwargs) 

527 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter), 

528 matchKey, entity) 

529 

530 return matchKey, formatter 

531 

532 def getFormatter(self, entity: Entity, *args: Any, **kwargs: Any) -> Formatter: 

533 """Get a new formatter instance. 

534 

535 Parameters 

536 ---------- 

537 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str` 

538 Entity to use to determine the formatter to return. 

539 `StorageClass` will be used as a last resort if `DatasetRef` 

540 or `DatasetType` instance is provided. Supports instrument 

541 override if a `DatasetRef` is provided configured with an 

542 ``instrument`` value for the data ID. 

543 args : `tuple` 

544 Positional arguments to use pass to the object constructor. 

545 kwargs : `dict` 

546 Keyword arguments to pass to object constructor. 

547 

548 Returns 

549 ------- 

550 formatter : `Formatter` 

551 An instance of the registered formatter. 

552 """ 

553 _, formatter = self.getFormatterWithMatch(entity, *args, **kwargs) 

554 return formatter 

555 

556 def registerFormatter(self, type_: Union[LookupKey, str, StorageClass, DatasetType], 

557 formatter: str, *, overwrite: bool = False, 

558 **kwargs: Any) -> None: 

559 """Register a `Formatter`. 

560 

561 Parameters 

562 ---------- 

563 type_ : `LookupKey`, `str`, `StorageClass` or `DatasetType` 

564 Type for which this formatter is to be used. If a `LookupKey` 

565 is not provided, one will be constructed from the supplied string 

566 or by using the ``name`` property of the supplied entity. 

567 formatter : `str` or class of type `Formatter` 

568 Identifies a `Formatter` subclass to use for reading and writing 

569 Datasets of this type. Can be a `Formatter` class. 

570 overwrite : `bool`, optional 

571 If `True` an existing entry will be replaced by the new value. 

572 Default is `False`. 

573 kwargs : `dict` 

574 Keyword arguments to always pass to object constructor when 

575 retrieved. 

576 

577 Raises 

578 ------ 

579 ValueError 

580 Raised if the formatter does not name a valid formatter type and 

581 ``overwrite`` is `False`. 

582 """ 

583 self._mappingFactory.placeInRegistry(type_, formatter, overwrite=overwrite, **kwargs) 

584 

585 

586# Type to use when allowing a Formatter or its class name 

587FormatterParameter = Union[str, Type[Formatter], Formatter]