Coverage for python/lsst/daf/butler/core/formatter.py : 26%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Formatter", "FormatterFactory", "FormatterParameter")
26from abc import ABCMeta, abstractmethod
27from collections.abc import Mapping
28import logging
29import copy
30from typing import ClassVar, Set, AbstractSet, Union, Optional, Dict, Any, Tuple, Type, TYPE_CHECKING
32from .configSupport import processLookupConfigs, LookupKey
33from .mappingFactory import MappingFactory
34from .utils import getFullTypeName
35from .fileDescriptor import FileDescriptor
36from .location import Location
37from .config import Config
38from .dimensions import DimensionUniverse
39from .storageClass import StorageClass
40from .datasets import DatasetType, DatasetRef
42log = logging.getLogger(__name__)
44# Define a new special type for functions that take "entity"
45Entity = Union[DatasetType, DatasetRef, StorageClass, str]
48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true
49 from .dimensions import DataCoordinate
52class Formatter(metaclass=ABCMeta):
53 """Interface for reading and writing Datasets with a particular
54 `StorageClass`.
56 Parameters
57 ----------
58 fileDescriptor : `FileDescriptor`, optional
59 Identifies the file to read or write, and the associated storage
60 classes and parameter information. Its value can be `None` if the
61 caller will never call `Formatter.read` or `Formatter.write`.
62 dataId : `DataCoordinate`, optional
63 Data ID associated with this formatter.
64 writeParameters : `dict`, optional
65 Any parameters to be hard-coded into this instance to control how
66 the dataset is serialized.
67 """
69 unsupportedParameters: ClassVar[Optional[AbstractSet[str]]] = frozenset()
70 """Set of read parameters not understood by this `Formatter`. An empty set
71 means all parameters are supported. `None` indicates that no parameters
72 are supported. These param (`frozenset`).
73 """
75 supportedWriteParameters: ClassVar[Optional[AbstractSet[str]]] = None
76 """Parameters understood by this formatter that can be used to control
77 how a dataset is serialized. `None` indicates that no parameters are
78 supported."""
80 def __init__(self, fileDescriptor: FileDescriptor, dataId: DataCoordinate = None,
81 writeParameters: Optional[Dict[str, Any]] = None,
82 writeRecipes: Optional[Dict[str, Any]] = None):
83 if not isinstance(fileDescriptor, FileDescriptor):
84 raise TypeError("File descriptor must be a FileDescriptor")
85 self._fileDescriptor = fileDescriptor
86 self._dataId = dataId
88 # Check that the write parameters are allowed
89 if writeParameters:
90 if self.supportedWriteParameters is None:
91 raise ValueError("This formatter does not accept any write parameters. "
92 f"Got: {', '.join(writeParameters)}")
93 else:
94 given = set(writeParameters)
95 unknown = given - self.supportedWriteParameters
96 if unknown:
97 s = "s" if len(unknown) != 1 else ""
98 unknownStr = ", ".join(f"'{u}'" for u in unknown)
99 raise ValueError(f"This formatter does not accept parameter{s} {unknownStr}")
101 self._writeParameters = writeParameters
102 self._writeRecipes = self.validateWriteRecipes(writeRecipes)
104 def __str__(self) -> str:
105 return f"{self.name()}@{self.fileDescriptor.location.path}"
107 def __repr__(self) -> str:
108 return f"{self.name()}({self.fileDescriptor!r})"
110 @property
111 def fileDescriptor(self) -> FileDescriptor:
112 """FileDescriptor associated with this formatter
113 (`FileDescriptor`, read-only)"""
114 return self._fileDescriptor
116 @property
117 def dataId(self) -> Optional[DataCoordinate]:
118 """DataId associated with this formatter (`DataCoordinate`)"""
119 return self._dataId
121 @property
122 def writeParameters(self) -> Mapping[str, Any]:
123 """Parameters to use when writing out datasets."""
124 if self._writeParameters is not None:
125 return self._writeParameters
126 return {}
128 @property
129 def writeRecipes(self) -> Mapping[str, Any]:
130 """Detailed write Recipes indexed by recipe name."""
131 if self._writeRecipes is not None:
132 return self._writeRecipes
133 return {}
135 @classmethod
136 def validateWriteRecipes(cls, recipes: Optional[Mapping[str, Any]]) -> Optional[Mapping[str, Any]]:
137 """Validate supplied recipes for this formatter.
139 The recipes are supplemented with default values where appropriate.
141 Parameters
142 ----------
143 recipes : `dict`
144 Recipes to validate.
146 Returns
147 -------
148 validated : `dict`
149 Validated recipes.
151 Raises
152 ------
153 RuntimeError
154 Raised if validation fails. The default implementation raises
155 if any recipes are given.
156 """
157 if recipes:
158 raise RuntimeError(f"This formatter does not understand these writeRecipes: {recipes}")
159 return recipes
161 @classmethod
162 def name(cls) -> str:
163 """Returns the fully qualified name of the formatter.
165 Returns
166 -------
167 name : `str`
168 Fully-qualified name of formatter class.
169 """
170 return getFullTypeName(cls)
172 @abstractmethod
173 def read(self, component: Optional[str] = None) -> Any:
174 """Read a Dataset.
176 Parameters
177 ----------
178 component : `str`, optional
179 Component to read from the file. Only used if the `StorageClass`
180 for reading differed from the `StorageClass` used to write the
181 file.
183 Returns
184 -------
185 inMemoryDataset : `object`
186 The requested Dataset.
187 """
188 raise NotImplementedError("Type does not support reading")
190 @abstractmethod
191 def write(self, inMemoryDataset: Any) -> str:
192 """Write a Dataset.
194 Parameters
195 ----------
196 inMemoryDataset : `object`
197 The Dataset to store.
199 Returns
200 -------
201 path : `str`
202 The path to where the Dataset was stored within the datastore.
203 """
204 raise NotImplementedError("Type does not support writing")
206 def fromBytes(self, serializedDataset: bytes,
207 component: Optional[str] = None) -> object:
208 """Reads serialized data into a Dataset or its component.
210 Parameters
211 ----------
212 serializedDataset : `bytes`
213 Bytes object to unserialize.
214 component : `str`, optional
215 Component to read from the Dataset. Only used if the `StorageClass`
216 for reading differed from the `StorageClass` used to write the
217 file.
219 Returns
220 -------
221 inMemoryDataset : `object`
222 The requested data as a Python object. The type of object
223 is controlled by the specific formatter.
224 """
225 raise NotImplementedError("Type does not support reading from bytes.")
227 def toBytes(self, inMemoryDataset: Any) -> bytes:
228 """Serialize the Dataset to bytes based on formatter.
230 Parameters
231 ----------
232 inMemoryDataset : `object`
233 The Python object to serialize.
235 Returns
236 -------
237 serializedDataset : `bytes`
238 Bytes representing the serialized dataset.
239 """
240 raise NotImplementedError("Type does not support writing to bytes.")
242 @classmethod
243 def makeUpdatedLocation(cls, location: Location) -> Location:
244 """Return a new `Location` instance updated with this formatter's
245 extension.
247 Parameters
248 ----------
249 location : `Location`
250 The location to update.
252 Returns
253 -------
254 updated : `Location`
255 A new `Location` with a new file extension applied.
257 Raises
258 ------
259 NotImplementedError
260 Raised if there is no ``extension`` attribute associated with
261 this formatter.
263 Notes
264 -----
265 This method is available to all Formatters but might not be
266 implemented by all formatters. It requires that a formatter set
267 an ``extension`` attribute containing the file extension used when
268 writing files. If ``extension`` is `None` the supplied file will
269 not be updated. Not all formatters write files so this is not
270 defined in the base class.
271 """
272 location = copy.deepcopy(location)
273 try:
274 # We are deliberately allowing extension to be undefined by
275 # default in the base class and mypy complains.
276 location.updateExtension(cls.extension) # type:ignore
277 except AttributeError:
278 raise NotImplementedError("No file extension registered with this formatter") from None
279 return location
281 @classmethod
282 def predictPathFromLocation(cls, location: Location) -> str:
283 """Return the path that would be returned by write, without actually
284 writing.
286 Parameters
287 ----------
288 location : `Location`
289 Location of file for which path prediction is required.
291 Returns
292 -------
293 path : `str`
294 Path within datastore that would be associated with this location.
295 """
296 return cls.makeUpdatedLocation(location).pathInStore
298 def predictPath(self) -> str:
299 """Return the path that would be returned by write, without actually
300 writing.
302 Uses the `FileDescriptor` associated with the instance.
304 Returns
305 -------
306 path : `str`
307 Path within datastore that would be associated with the location
308 stored in this `Formatter`.
309 """
310 return self.predictPathFromLocation(self.fileDescriptor.location)
312 def segregateParameters(self, parameters: Optional[Dict[str, Any]] = None) -> Tuple[Dict, Dict]:
313 """Segregate the supplied parameters into those understood by the
314 formatter and those not understood by the formatter.
316 Any unsupported parameters are assumed to be usable by associated
317 assemblers.
319 Parameters
320 ----------
321 parameters : `dict`, optional
322 Parameters with values that have been supplied by the caller
323 and which might be relevant for the formatter. If `None`
324 parameters will be read from the registered `FileDescriptor`.
326 Returns
327 -------
328 supported : `dict`
329 Those parameters supported by this formatter.
330 unsupported : `dict`
331 Those parameters not supported by this formatter.
332 """
334 if parameters is None:
335 parameters = self.fileDescriptor.parameters
337 if parameters is None:
338 return {}, {}
340 if self.unsupportedParameters is None:
341 # Support none of the parameters
342 return {}, parameters.copy()
344 # Start by assuming all are supported
345 supported = parameters.copy()
346 unsupported = {}
348 # And remove any we know are not supported
349 for p in set(supported):
350 if p in self.unsupportedParameters:
351 unsupported[p] = supported.pop(p)
353 return supported, unsupported
356class FormatterFactory:
357 """Factory for `Formatter` instances.
358 """
360 defaultKey = LookupKey("default")
361 """Configuration key associated with default write parameter settings."""
363 writeRecipesKey = LookupKey("write_recipes")
364 """Configuration key associated with write recipes."""
366 def __init__(self) -> None:
367 self._mappingFactory = MappingFactory(Formatter)
369 def __contains__(self, key: Union[LookupKey, str]) -> bool:
370 """Indicates whether the supplied key is present in the factory.
372 Parameters
373 ----------
374 key : `LookupKey`, `str` or objects with ``name`` attribute
375 Key to use to lookup in the factory whether a corresponding
376 formatter is present.
378 Returns
379 -------
380 in : `bool`
381 `True` if the supplied key is present in the factory.
382 """
383 return key in self._mappingFactory
385 def registerFormatters(self, config: Config, *, universe: DimensionUniverse) -> None:
386 """Bulk register formatters from a config.
388 Parameters
389 ----------
390 config : `Config`
391 ``formatters`` section of a configuration.
392 universe : `DimensionUniverse`, optional
393 Set of all known dimensions, used to expand and validate any used
394 in lookup keys.
396 Notes
397 -----
398 The configuration can include one level of hierarchy where an
399 instrument-specific section can be defined to override more general
400 template specifications. This is represented in YAML using a
401 key of form ``instrument<name>`` which can then define templates
402 that will be returned if a `DatasetRef` contains a matching instrument
403 name in the data ID.
405 The config is parsed using the function
406 `~lsst.daf.butler.configSubset.processLookupConfigs`.
408 The values for formatter entries can be either a simple string
409 referring to a python type or a dict representing the formatter and
410 parameters to be hard-coded into the formatter constructor. For
411 the dict case the following keys are supported:
413 - formatter: The python type to be used as the formatter class.
414 - parameters: A further dict to be passed directly to the
415 ``writeParameters`` Formatter constructor to seed it.
416 These parameters are validated at instance creation and not at
417 configuration.
419 Additionally, a special ``default`` section can be defined that
420 uses the formatter type (class) name as the keys and specifies
421 default write parameters that should be used whenever an instance
422 of that class is constructed.
424 .. code-block:: yaml
426 formatters:
427 default:
428 lsst.daf.butler.formatters.example.ExampleFormatter:
429 max: 10
430 min: 2
431 comment: Default comment
432 calexp: lsst.daf.butler.formatters.example.ExampleFormatter
433 coadd:
434 formatter: lsst.daf.butler.formatters.example.ExampleFormatter
435 parameters:
436 max: 5
438 Any time an ``ExampleFormatter`` is constructed it will use those
439 parameters. If an explicit entry later in the configuration specifies
440 a different set of parameters, the two will be merged with the later
441 entry taking priority. In the example above ``calexp`` will use
442 the default parameters but ``coadd`` will override the value for
443 ``max``.
445 Formatter configuration can also include a special section describing
446 collections of write parameters that can be accessed through a
447 simple label. This allows common collections of options to be
448 specified in one place in the configuration and reused later.
449 The ``write_recipes`` section is indexed by Formatter class name
450 and each key is the label to associate with the parameters.
452 .. code-block:: yaml
454 formatters:
455 write_recipes:
456 lsst.obs.base.fitsExposureFormatter.FixExposureFormatter:
457 lossless:
458 ...
459 noCompression:
460 ...
462 By convention a formatter that uses write recipes will support a
463 ``recipe`` write parameter that will refer to a recipe name in
464 the ``write_recipes`` component. The `Formatter` will be constructed
465 in the `FormatterFactory` with all the relevant recipes and
466 will not attempt to filter by looking at ``writeParameters`` in
467 advance. See the specific formatter documentation for details on
468 acceptable recipe options.
469 """
470 allowed_keys = {"formatter", "parameters"}
472 contents = processLookupConfigs(config, allow_hierarchy=True, universe=universe)
474 # Extract any default parameter settings
475 defaultParameters = contents.get(self.defaultKey, {})
476 if not isinstance(defaultParameters, Mapping):
477 raise RuntimeError("Default formatter parameters in config can not be a single string"
478 f" (got: {type(defaultParameters)})")
480 # Extract any global write recipes -- these are indexed by
481 # Formatter class name.
482 writeRecipes = contents.get(self.writeRecipesKey, {})
483 if isinstance(writeRecipes, str):
484 raise RuntimeError(f"The formatters.{self.writeRecipesKey} section must refer to a dict"
485 f" not '{writeRecipes}'")
487 for key, f in contents.items():
488 # default is handled in a special way
489 if key == self.defaultKey:
490 continue
491 if key == self.writeRecipesKey:
492 continue
494 # Can be a str or a dict.
495 specificWriteParameters = {}
496 if isinstance(f, str):
497 formatter = f
498 elif isinstance(f, Mapping):
499 all_keys = set(f)
500 unexpected_keys = all_keys - allowed_keys
501 if unexpected_keys:
502 raise ValueError(f"Formatter {key} uses unexpected keys {unexpected_keys} in config")
503 if "formatter" not in f:
504 raise ValueError(f"Mandatory 'formatter' key missing for formatter key {key}")
505 formatter = f["formatter"]
506 if "parameters" in f:
507 specificWriteParameters = f["parameters"]
508 else:
509 raise ValueError(f"Formatter for key {key} has unexpected value: '{f}'")
511 # Apply any default parameters for this formatter
512 writeParameters = copy.deepcopy(defaultParameters.get(formatter, {}))
513 writeParameters.update(specificWriteParameters)
515 kwargs: Dict[str, Any] = {}
516 if writeParameters:
517 kwargs["writeParameters"] = writeParameters
519 if formatter in writeRecipes:
520 kwargs["writeRecipes"] = writeRecipes[formatter]
522 self.registerFormatter(key, formatter, **kwargs)
524 def getLookupKeys(self) -> Set[LookupKey]:
525 """Retrieve the look up keys for all the registry entries.
527 Returns
528 -------
529 keys : `set` of `LookupKey`
530 The keys available for matching in the registry.
531 """
532 return self._mappingFactory.getLookupKeys()
534 def getFormatterClassWithMatch(self, entity: Entity) -> Tuple[LookupKey, Type[Formatter],
535 Dict[str, Any]]:
536 """Get the matching formatter class along with the matching registry
537 key.
539 Parameters
540 ----------
541 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
542 Entity to use to determine the formatter to return.
543 `StorageClass` will be used as a last resort if `DatasetRef`
544 or `DatasetType` instance is provided. Supports instrument
545 override if a `DatasetRef` is provided configured with an
546 ``instrument`` value for the data ID.
548 Returns
549 -------
550 matchKey : `LookupKey`
551 The key that resulted in the successful match.
552 formatter : `type`
553 The class of the registered formatter.
554 formatter_kwargs : `dict`
555 Keyword arguments that are associated with this formatter entry.
556 """
557 names = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
558 matchKey, formatter, formatter_kwargs = self._mappingFactory.getClassFromRegistryWithMatch(names)
559 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter),
560 matchKey, entity)
562 return matchKey, formatter, formatter_kwargs
564 def getFormatterClass(self, entity: Entity) -> Type:
565 """Get the matching formatter class.
567 Parameters
568 ----------
569 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
570 Entity to use to determine the formatter to return.
571 `StorageClass` will be used as a last resort if `DatasetRef`
572 or `DatasetType` instance is provided. Supports instrument
573 override if a `DatasetRef` is provided configured with an
574 ``instrument`` value for the data ID.
576 Returns
577 -------
578 formatter : `type`
579 The class of the registered formatter.
580 """
581 _, formatter, _ = self.getFormatterClassWithMatch(entity)
582 return formatter
584 def getFormatterWithMatch(self, entity: Entity, *args: Any, **kwargs: Any) -> Tuple[LookupKey, Formatter]:
585 """Get a new formatter instance along with the matching registry
586 key.
588 Parameters
589 ----------
590 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
591 Entity to use to determine the formatter to return.
592 `StorageClass` will be used as a last resort if `DatasetRef`
593 or `DatasetType` instance is provided. Supports instrument
594 override if a `DatasetRef` is provided configured with an
595 ``instrument`` value for the data ID.
596 args : `tuple`
597 Positional arguments to use pass to the object constructor.
598 kwargs : `dict`
599 Keyword arguments to pass to object constructor.
601 Returns
602 -------
603 matchKey : `LookupKey`
604 The key that resulted in the successful match.
605 formatter : `Formatter`
606 An instance of the registered formatter.
607 """
608 names = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
609 matchKey, formatter = self._mappingFactory.getFromRegistryWithMatch(names, *args, **kwargs)
610 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter),
611 matchKey, entity)
613 return matchKey, formatter
615 def getFormatter(self, entity: Entity, *args: Any, **kwargs: Any) -> Formatter:
616 """Get a new formatter instance.
618 Parameters
619 ----------
620 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
621 Entity to use to determine the formatter to return.
622 `StorageClass` will be used as a last resort if `DatasetRef`
623 or `DatasetType` instance is provided. Supports instrument
624 override if a `DatasetRef` is provided configured with an
625 ``instrument`` value for the data ID.
626 args : `tuple`
627 Positional arguments to use pass to the object constructor.
628 kwargs : `dict`
629 Keyword arguments to pass to object constructor.
631 Returns
632 -------
633 formatter : `Formatter`
634 An instance of the registered formatter.
635 """
636 _, formatter = self.getFormatterWithMatch(entity, *args, **kwargs)
637 return formatter
639 def registerFormatter(self, type_: Union[LookupKey, str, StorageClass, DatasetType],
640 formatter: str, *, overwrite: bool = False,
641 **kwargs: Any) -> None:
642 """Register a `Formatter`.
644 Parameters
645 ----------
646 type_ : `LookupKey`, `str`, `StorageClass` or `DatasetType`
647 Type for which this formatter is to be used. If a `LookupKey`
648 is not provided, one will be constructed from the supplied string
649 or by using the ``name`` property of the supplied entity.
650 formatter : `str` or class of type `Formatter`
651 Identifies a `Formatter` subclass to use for reading and writing
652 Datasets of this type. Can be a `Formatter` class.
653 overwrite : `bool`, optional
654 If `True` an existing entry will be replaced by the new value.
655 Default is `False`.
656 kwargs : `dict`
657 Keyword arguments to always pass to object constructor when
658 retrieved.
660 Raises
661 ------
662 ValueError
663 Raised if the formatter does not name a valid formatter type and
664 ``overwrite`` is `False`.
665 """
666 self._mappingFactory.placeInRegistry(type_, formatter, overwrite=overwrite, **kwargs)
669# Type to use when allowing a Formatter or its class name
670FormatterParameter = Union[str, Type[Formatter], Formatter]