Coverage for python/lsst/daf/butler/core/formatter.py : 24%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Formatter", "FormatterFactory", "FormatterParameter")
26from abc import ABCMeta, abstractmethod
27from collections.abc import Mapping
28import logging
29import copy
30from typing import ClassVar, Set, AbstractSet, Union, Optional, Dict, Any, Tuple, Type, TYPE_CHECKING
32from .configSupport import processLookupConfigs, LookupKey
33from .mappingFactory import MappingFactory
34from .utils import getFullTypeName
35from .fileDescriptor import FileDescriptor
36from .location import Location
37from .config import Config
38from .dimensions import DimensionUniverse
39from .storageClass import StorageClass
40from .datasets import DatasetType, DatasetRef
42log = logging.getLogger(__name__)
44# Define a new special type for functions that take "entity"
45Entity = Union[DatasetType, DatasetRef, StorageClass, str]
48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true
49 from .dimensions import DataCoordinate
52class Formatter(metaclass=ABCMeta):
53 """Interface for reading and writing Datasets with a particular
54 `StorageClass`.
56 Parameters
57 ----------
58 fileDescriptor : `FileDescriptor`, optional
59 Identifies the file to read or write, and the associated storage
60 classes and parameter information. Its value can be `None` if the
61 caller will never call `Formatter.read` or `Formatter.write`.
62 dataId : `DataCoordinate`, optional
63 Data ID associated with this formatter.
64 writeParameters : `dict`, optional
65 Any parameters to be hard-coded into this instance to control how
66 the dataset is serialized.
67 """
69 unsupportedParameters: ClassVar[Optional[AbstractSet[str]]] = frozenset()
70 """Set of read parameters not understood by this `Formatter`. An empty set
71 means all parameters are supported. `None` indicates that no parameters
72 are supported. These param (`frozenset`).
73 """
75 supportedWriteParameters: ClassVar[Optional[AbstractSet[str]]] = None
76 """Parameters understood by this formatter that can be used to control
77 how a dataset is serialized. `None` indicates that no parameters are
78 supported."""
80 supportedExtensions: ClassVar[AbstractSet[str]] = frozenset()
81 """Set of all extensions supported by this formatter.
83 Only expected to be populated by Formatters that write files. Any extension
84 assigned to the ``extension`` property will be automatically included in
85 the list of supported extensions."""
87 def __init__(self, fileDescriptor: FileDescriptor, dataId: Optional[DataCoordinate] = None,
88 writeParameters: Optional[Dict[str, Any]] = None,
89 writeRecipes: Optional[Dict[str, Any]] = None):
90 if not isinstance(fileDescriptor, FileDescriptor):
91 raise TypeError("File descriptor must be a FileDescriptor")
92 self._fileDescriptor = fileDescriptor
93 self._dataId = dataId
95 # Check that the write parameters are allowed
96 if writeParameters:
97 if self.supportedWriteParameters is None:
98 raise ValueError("This formatter does not accept any write parameters. "
99 f"Got: {', '.join(writeParameters)}")
100 else:
101 given = set(writeParameters)
102 unknown = given - self.supportedWriteParameters
103 if unknown:
104 s = "s" if len(unknown) != 1 else ""
105 unknownStr = ", ".join(f"'{u}'" for u in unknown)
106 raise ValueError(f"This formatter does not accept parameter{s} {unknownStr}")
108 self._writeParameters = writeParameters
109 self._writeRecipes = self.validateWriteRecipes(writeRecipes)
111 def __str__(self) -> str:
112 return f"{self.name()}@{self.fileDescriptor.location.path}"
114 def __repr__(self) -> str:
115 return f"{self.name()}({self.fileDescriptor!r})"
117 @property
118 def fileDescriptor(self) -> FileDescriptor:
119 """FileDescriptor associated with this formatter
120 (`FileDescriptor`, read-only)"""
121 return self._fileDescriptor
123 @property
124 def dataId(self) -> Optional[DataCoordinate]:
125 """DataId associated with this formatter (`DataCoordinate`)"""
126 return self._dataId
128 @property
129 def writeParameters(self) -> Mapping[str, Any]:
130 """Parameters to use when writing out datasets."""
131 if self._writeParameters is not None:
132 return self._writeParameters
133 return {}
135 @property
136 def writeRecipes(self) -> Mapping[str, Any]:
137 """Detailed write Recipes indexed by recipe name."""
138 if self._writeRecipes is not None:
139 return self._writeRecipes
140 return {}
142 @classmethod
143 def validateWriteRecipes(cls, recipes: Optional[Mapping[str, Any]]) -> Optional[Mapping[str, Any]]:
144 """Validate supplied recipes for this formatter.
146 The recipes are supplemented with default values where appropriate.
148 Parameters
149 ----------
150 recipes : `dict`
151 Recipes to validate.
153 Returns
154 -------
155 validated : `dict`
156 Validated recipes.
158 Raises
159 ------
160 RuntimeError
161 Raised if validation fails. The default implementation raises
162 if any recipes are given.
163 """
164 if recipes:
165 raise RuntimeError(f"This formatter does not understand these writeRecipes: {recipes}")
166 return recipes
168 @classmethod
169 def name(cls) -> str:
170 """Returns the fully qualified name of the formatter.
172 Returns
173 -------
174 name : `str`
175 Fully-qualified name of formatter class.
176 """
177 return getFullTypeName(cls)
179 @abstractmethod
180 def read(self, component: Optional[str] = None) -> Any:
181 """Read a Dataset.
183 Parameters
184 ----------
185 component : `str`, optional
186 Component to read from the file. Only used if the `StorageClass`
187 for reading differed from the `StorageClass` used to write the
188 file.
190 Returns
191 -------
192 inMemoryDataset : `object`
193 The requested Dataset.
194 """
195 raise NotImplementedError("Type does not support reading")
197 @abstractmethod
198 def write(self, inMemoryDataset: Any) -> str:
199 """Write a Dataset.
201 Parameters
202 ----------
203 inMemoryDataset : `object`
204 The Dataset to store.
206 Returns
207 -------
208 path : `str`
209 The path to where the Dataset was stored within the datastore.
210 """
211 raise NotImplementedError("Type does not support writing")
213 def fromBytes(self, serializedDataset: bytes,
214 component: Optional[str] = None) -> object:
215 """Reads serialized data into a Dataset or its component.
217 Parameters
218 ----------
219 serializedDataset : `bytes`
220 Bytes object to unserialize.
221 component : `str`, optional
222 Component to read from the Dataset. Only used if the `StorageClass`
223 for reading differed from the `StorageClass` used to write the
224 file.
226 Returns
227 -------
228 inMemoryDataset : `object`
229 The requested data as a Python object. The type of object
230 is controlled by the specific formatter.
231 """
232 raise NotImplementedError("Type does not support reading from bytes.")
234 def toBytes(self, inMemoryDataset: Any) -> bytes:
235 """Serialize the Dataset to bytes based on formatter.
237 Parameters
238 ----------
239 inMemoryDataset : `object`
240 The Python object to serialize.
242 Returns
243 -------
244 serializedDataset : `bytes`
245 Bytes representing the serialized dataset.
246 """
247 raise NotImplementedError("Type does not support writing to bytes.")
249 def makeUpdatedLocation(self, location: Location) -> Location:
250 """Return a new `Location` instance updated with this formatter's
251 extension.
253 Parameters
254 ----------
255 location : `Location`
256 The location to update.
258 Returns
259 -------
260 updated : `Location`
261 A new `Location` with a new file extension applied.
263 Raises
264 ------
265 NotImplementedError
266 Raised if there is no ``extension`` attribute associated with
267 this formatter.
269 Notes
270 -----
271 This method is available to all Formatters but might not be
272 implemented by all formatters. It requires that a formatter set
273 an ``extension`` attribute containing the file extension used when
274 writing files. If ``extension`` is `None` the supplied file will
275 not be updated. Not all formatters write files so this is not
276 defined in the base class.
277 """
278 location = copy.deepcopy(location)
279 try:
280 # We are deliberately allowing extension to be undefined by
281 # default in the base class and mypy complains.
282 location.updateExtension(self.extension) # type:ignore
283 except AttributeError:
284 raise NotImplementedError("No file extension registered with this formatter") from None
285 return location
287 @classmethod
288 def validateExtension(cls, location: Location) -> None:
289 """Check that the provided location refers to a file extension that is
290 understood by this formatter.
292 Parameters
293 ----------
294 location : `Location`
295 Location from which to extract a file extension.
297 Raises
298 ------
299 NotImplementedError
300 Raised if file extensions are a concept not understood by this
301 formatter.
302 ValueError
303 Raised if the formatter does not understand this extension.
305 Notes
306 -----
307 This method is available to all Formatters but might not be
308 implemented by all formatters. It requires that a formatter set
309 an ``extension`` attribute containing the file extension used when
310 writing files. If ``extension`` is `None` only the set of supported
311 extensions will be examined.
312 """
313 ext = location.getExtension()
314 supported = set(cls.supportedExtensions)
316 try:
317 # We are deliberately allowing extension to be undefined by
318 # default in the base class and mypy complains.
319 default = cls.extension # type: ignore
320 except AttributeError:
321 raise NotImplementedError("No file extension registered with this formatter") from None
323 if default is not None:
324 supported.add(default)
326 if ext in supported:
327 return
328 raise ValueError(f"Extension '{ext}' on '{location}' is not supported by Formatter '{cls.__name__}'")
330 def predictPath(self) -> str:
331 """Return the path that would be returned by write, without actually
332 writing.
334 Uses the `FileDescriptor` associated with the instance.
336 Returns
337 -------
338 path : `str`
339 Path within datastore that would be associated with the location
340 stored in this `Formatter`.
341 """
342 updated = self.makeUpdatedLocation(self.fileDescriptor.location)
343 return updated.pathInStore
345 def segregateParameters(self, parameters: Optional[Dict[str, Any]] = None) -> Tuple[Dict, Dict]:
346 """Segregate the supplied parameters into those understood by the
347 formatter and those not understood by the formatter.
349 Any unsupported parameters are assumed to be usable by associated
350 assemblers.
352 Parameters
353 ----------
354 parameters : `dict`, optional
355 Parameters with values that have been supplied by the caller
356 and which might be relevant for the formatter. If `None`
357 parameters will be read from the registered `FileDescriptor`.
359 Returns
360 -------
361 supported : `dict`
362 Those parameters supported by this formatter.
363 unsupported : `dict`
364 Those parameters not supported by this formatter.
365 """
367 if parameters is None:
368 parameters = self.fileDescriptor.parameters
370 if parameters is None:
371 return {}, {}
373 if self.unsupportedParameters is None:
374 # Support none of the parameters
375 return {}, parameters.copy()
377 # Start by assuming all are supported
378 supported = parameters.copy()
379 unsupported = {}
381 # And remove any we know are not supported
382 for p in set(supported):
383 if p in self.unsupportedParameters:
384 unsupported[p] = supported.pop(p)
386 return supported, unsupported
389class FormatterFactory:
390 """Factory for `Formatter` instances.
391 """
393 defaultKey = LookupKey("default")
394 """Configuration key associated with default write parameter settings."""
396 writeRecipesKey = LookupKey("write_recipes")
397 """Configuration key associated with write recipes."""
399 def __init__(self) -> None:
400 self._mappingFactory = MappingFactory(Formatter)
402 def __contains__(self, key: Union[LookupKey, str]) -> bool:
403 """Indicates whether the supplied key is present in the factory.
405 Parameters
406 ----------
407 key : `LookupKey`, `str` or objects with ``name`` attribute
408 Key to use to lookup in the factory whether a corresponding
409 formatter is present.
411 Returns
412 -------
413 in : `bool`
414 `True` if the supplied key is present in the factory.
415 """
416 return key in self._mappingFactory
418 def registerFormatters(self, config: Config, *, universe: DimensionUniverse) -> None:
419 """Bulk register formatters from a config.
421 Parameters
422 ----------
423 config : `Config`
424 ``formatters`` section of a configuration.
425 universe : `DimensionUniverse`, optional
426 Set of all known dimensions, used to expand and validate any used
427 in lookup keys.
429 Notes
430 -----
431 The configuration can include one level of hierarchy where an
432 instrument-specific section can be defined to override more general
433 template specifications. This is represented in YAML using a
434 key of form ``instrument<name>`` which can then define templates
435 that will be returned if a `DatasetRef` contains a matching instrument
436 name in the data ID.
438 The config is parsed using the function
439 `~lsst.daf.butler.configSubset.processLookupConfigs`.
441 The values for formatter entries can be either a simple string
442 referring to a python type or a dict representing the formatter and
443 parameters to be hard-coded into the formatter constructor. For
444 the dict case the following keys are supported:
446 - formatter: The python type to be used as the formatter class.
447 - parameters: A further dict to be passed directly to the
448 ``writeParameters`` Formatter constructor to seed it.
449 These parameters are validated at instance creation and not at
450 configuration.
452 Additionally, a special ``default`` section can be defined that
453 uses the formatter type (class) name as the keys and specifies
454 default write parameters that should be used whenever an instance
455 of that class is constructed.
457 .. code-block:: yaml
459 formatters:
460 default:
461 lsst.daf.butler.formatters.example.ExampleFormatter:
462 max: 10
463 min: 2
464 comment: Default comment
465 calexp: lsst.daf.butler.formatters.example.ExampleFormatter
466 coadd:
467 formatter: lsst.daf.butler.formatters.example.ExampleFormatter
468 parameters:
469 max: 5
471 Any time an ``ExampleFormatter`` is constructed it will use those
472 parameters. If an explicit entry later in the configuration specifies
473 a different set of parameters, the two will be merged with the later
474 entry taking priority. In the example above ``calexp`` will use
475 the default parameters but ``coadd`` will override the value for
476 ``max``.
478 Formatter configuration can also include a special section describing
479 collections of write parameters that can be accessed through a
480 simple label. This allows common collections of options to be
481 specified in one place in the configuration and reused later.
482 The ``write_recipes`` section is indexed by Formatter class name
483 and each key is the label to associate with the parameters.
485 .. code-block:: yaml
487 formatters:
488 write_recipes:
489 lsst.obs.base.formatters.fitsExposure.FixExposureFormatter:
490 lossless:
491 ...
492 noCompression:
493 ...
495 By convention a formatter that uses write recipes will support a
496 ``recipe`` write parameter that will refer to a recipe name in
497 the ``write_recipes`` component. The `Formatter` will be constructed
498 in the `FormatterFactory` with all the relevant recipes and
499 will not attempt to filter by looking at ``writeParameters`` in
500 advance. See the specific formatter documentation for details on
501 acceptable recipe options.
502 """
503 allowed_keys = {"formatter", "parameters"}
505 contents = processLookupConfigs(config, allow_hierarchy=True, universe=universe)
507 # Extract any default parameter settings
508 defaultParameters = contents.get(self.defaultKey, {})
509 if not isinstance(defaultParameters, Mapping):
510 raise RuntimeError("Default formatter parameters in config can not be a single string"
511 f" (got: {type(defaultParameters)})")
513 # Extract any global write recipes -- these are indexed by
514 # Formatter class name.
515 writeRecipes = contents.get(self.writeRecipesKey, {})
516 if isinstance(writeRecipes, str):
517 raise RuntimeError(f"The formatters.{self.writeRecipesKey} section must refer to a dict"
518 f" not '{writeRecipes}'")
520 for key, f in contents.items():
521 # default is handled in a special way
522 if key == self.defaultKey:
523 continue
524 if key == self.writeRecipesKey:
525 continue
527 # Can be a str or a dict.
528 specificWriteParameters = {}
529 if isinstance(f, str):
530 formatter = f
531 elif isinstance(f, Mapping):
532 all_keys = set(f)
533 unexpected_keys = all_keys - allowed_keys
534 if unexpected_keys:
535 raise ValueError(f"Formatter {key} uses unexpected keys {unexpected_keys} in config")
536 if "formatter" not in f:
537 raise ValueError(f"Mandatory 'formatter' key missing for formatter key {key}")
538 formatter = f["formatter"]
539 if "parameters" in f:
540 specificWriteParameters = f["parameters"]
541 else:
542 raise ValueError(f"Formatter for key {key} has unexpected value: '{f}'")
544 # Apply any default parameters for this formatter
545 writeParameters = copy.deepcopy(defaultParameters.get(formatter, {}))
546 writeParameters.update(specificWriteParameters)
548 kwargs: Dict[str, Any] = {}
549 if writeParameters:
550 kwargs["writeParameters"] = writeParameters
552 if formatter in writeRecipes:
553 kwargs["writeRecipes"] = writeRecipes[formatter]
555 self.registerFormatter(key, formatter, **kwargs)
557 def getLookupKeys(self) -> Set[LookupKey]:
558 """Retrieve the look up keys for all the registry entries.
560 Returns
561 -------
562 keys : `set` of `LookupKey`
563 The keys available for matching in the registry.
564 """
565 return self._mappingFactory.getLookupKeys()
567 def getFormatterClassWithMatch(self, entity: Entity) -> Tuple[LookupKey, Type[Formatter],
568 Dict[str, Any]]:
569 """Get the matching formatter class along with the matching registry
570 key.
572 Parameters
573 ----------
574 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
575 Entity to use to determine the formatter to return.
576 `StorageClass` will be used as a last resort if `DatasetRef`
577 or `DatasetType` instance is provided. Supports instrument
578 override if a `DatasetRef` is provided configured with an
579 ``instrument`` value for the data ID.
581 Returns
582 -------
583 matchKey : `LookupKey`
584 The key that resulted in the successful match.
585 formatter : `type`
586 The class of the registered formatter.
587 formatter_kwargs : `dict`
588 Keyword arguments that are associated with this formatter entry.
589 """
590 names = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
591 matchKey, formatter, formatter_kwargs = self._mappingFactory.getClassFromRegistryWithMatch(names)
592 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter),
593 matchKey, entity)
595 return matchKey, formatter, formatter_kwargs
597 def getFormatterClass(self, entity: Entity) -> Type:
598 """Get the matching formatter class.
600 Parameters
601 ----------
602 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
603 Entity to use to determine the formatter to return.
604 `StorageClass` will be used as a last resort if `DatasetRef`
605 or `DatasetType` instance is provided. Supports instrument
606 override if a `DatasetRef` is provided configured with an
607 ``instrument`` value for the data ID.
609 Returns
610 -------
611 formatter : `type`
612 The class of the registered formatter.
613 """
614 _, formatter, _ = self.getFormatterClassWithMatch(entity)
615 return formatter
617 def getFormatterWithMatch(self, entity: Entity, *args: Any, **kwargs: Any) -> Tuple[LookupKey, Formatter]:
618 """Get a new formatter instance along with the matching registry
619 key.
621 Parameters
622 ----------
623 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
624 Entity to use to determine the formatter to return.
625 `StorageClass` will be used as a last resort if `DatasetRef`
626 or `DatasetType` instance is provided. Supports instrument
627 override if a `DatasetRef` is provided configured with an
628 ``instrument`` value for the data ID.
629 args : `tuple`
630 Positional arguments to use pass to the object constructor.
631 kwargs : `dict`
632 Keyword arguments to pass to object constructor.
634 Returns
635 -------
636 matchKey : `LookupKey`
637 The key that resulted in the successful match.
638 formatter : `Formatter`
639 An instance of the registered formatter.
640 """
641 names = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
642 matchKey, formatter = self._mappingFactory.getFromRegistryWithMatch(names, *args, **kwargs)
643 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter),
644 matchKey, entity)
646 return matchKey, formatter
648 def getFormatter(self, entity: Entity, *args: Any, **kwargs: Any) -> Formatter:
649 """Get a new formatter instance.
651 Parameters
652 ----------
653 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
654 Entity to use to determine the formatter to return.
655 `StorageClass` will be used as a last resort if `DatasetRef`
656 or `DatasetType` instance is provided. Supports instrument
657 override if a `DatasetRef` is provided configured with an
658 ``instrument`` value for the data ID.
659 args : `tuple`
660 Positional arguments to use pass to the object constructor.
661 kwargs : `dict`
662 Keyword arguments to pass to object constructor.
664 Returns
665 -------
666 formatter : `Formatter`
667 An instance of the registered formatter.
668 """
669 _, formatter = self.getFormatterWithMatch(entity, *args, **kwargs)
670 return formatter
672 def registerFormatter(self, type_: Union[LookupKey, str, StorageClass, DatasetType],
673 formatter: str, *, overwrite: bool = False,
674 **kwargs: Any) -> None:
675 """Register a `Formatter`.
677 Parameters
678 ----------
679 type_ : `LookupKey`, `str`, `StorageClass` or `DatasetType`
680 Type for which this formatter is to be used. If a `LookupKey`
681 is not provided, one will be constructed from the supplied string
682 or by using the ``name`` property of the supplied entity.
683 formatter : `str` or class of type `Formatter`
684 Identifies a `Formatter` subclass to use for reading and writing
685 Datasets of this type. Can be a `Formatter` class.
686 overwrite : `bool`, optional
687 If `True` an existing entry will be replaced by the new value.
688 Default is `False`.
689 kwargs : `dict`
690 Keyword arguments to always pass to object constructor when
691 retrieved.
693 Raises
694 ------
695 ValueError
696 Raised if the formatter does not name a valid formatter type and
697 ``overwrite`` is `False`.
698 """
699 self._mappingFactory.placeInRegistry(type_, formatter, overwrite=overwrite, **kwargs)
702# Type to use when allowing a Formatter or its class name
703FormatterParameter = Union[str, Type[Formatter], Formatter]