Coverage for python/lsst/daf/butler/core/formatter.py : 24%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Formatter", "FormatterFactory", "FormatterParameter")
26from abc import ABCMeta, abstractmethod
27from collections.abc import Mapping
28import logging
29import copy
30from typing import ClassVar, Set, AbstractSet, Union, Optional, Dict, Any, Tuple, Type, TYPE_CHECKING
32from .configSupport import processLookupConfigs, LookupKey
33from .mappingFactory import MappingFactory
34from .utils import getFullTypeName
35from .fileDescriptor import FileDescriptor
36from .location import Location
37from .config import Config
38from .dimensions import DimensionUniverse
39from .storageClass import StorageClass
40from .datasets import DatasetType, DatasetRef
42log = logging.getLogger(__name__)
44# Define a new special type for functions that take "entity"
45Entity = Union[DatasetType, DatasetRef, StorageClass, str]
48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true
49 from .dimensions import DataCoordinate
52class Formatter(metaclass=ABCMeta):
53 """Interface for reading and writing Datasets with a particular
54 `StorageClass`.
56 Parameters
57 ----------
58 fileDescriptor : `FileDescriptor`, optional
59 Identifies the file to read or write, and the associated storage
60 classes and parameter information. Its value can be `None` if the
61 caller will never call `Formatter.read` or `Formatter.write`.
62 dataId : `DataCoordinate`, optional
63 Data ID associated with this formatter.
64 writeParameters : `dict`, optional
65 Any parameters to be hard-coded into this instance to control how
66 the dataset is serialized.
67 """
69 unsupportedParameters: ClassVar[Optional[AbstractSet[str]]] = frozenset()
70 """Set of read parameters not understood by this `Formatter`. An empty set
71 means all parameters are supported. `None` indicates that no parameters
72 are supported. These param (`frozenset`).
73 """
75 supportedWriteParameters: ClassVar[Optional[AbstractSet[str]]] = None
76 """Parameters understood by this formatter that can be used to control
77 how a dataset is serialized. `None` indicates that no parameters are
78 supported."""
80 supportedExtensions: ClassVar[AbstractSet[str]] = frozenset()
81 """Set of all extensions supported by this formatter.
83 Only expected to be populated by Formatters that write files. Any extension
84 assigned to the ``extension`` property will be automatically included in
85 the list of supported extensions."""
87 def __init__(self, fileDescriptor: FileDescriptor, dataId: Optional[DataCoordinate] = None,
88 writeParameters: Optional[Dict[str, Any]] = None,
89 writeRecipes: Optional[Dict[str, Any]] = None):
90 if not isinstance(fileDescriptor, FileDescriptor):
91 raise TypeError("File descriptor must be a FileDescriptor")
92 self._fileDescriptor = fileDescriptor
93 self._dataId = dataId
95 # Check that the write parameters are allowed
96 if writeParameters:
97 if self.supportedWriteParameters is None:
98 raise ValueError("This formatter does not accept any write parameters. "
99 f"Got: {', '.join(writeParameters)}")
100 else:
101 given = set(writeParameters)
102 unknown = given - self.supportedWriteParameters
103 if unknown:
104 s = "s" if len(unknown) != 1 else ""
105 unknownStr = ", ".join(f"'{u}'" for u in unknown)
106 raise ValueError(f"This formatter does not accept parameter{s} {unknownStr}")
108 self._writeParameters = writeParameters
109 self._writeRecipes = self.validateWriteRecipes(writeRecipes)
111 def __str__(self) -> str:
112 return f"{self.name()}@{self.fileDescriptor.location.path}"
114 def __repr__(self) -> str:
115 return f"{self.name()}({self.fileDescriptor!r})"
117 @property
118 def fileDescriptor(self) -> FileDescriptor:
119 """FileDescriptor associated with this formatter
120 (`FileDescriptor`, read-only)"""
121 return self._fileDescriptor
123 @property
124 def dataId(self) -> Optional[DataCoordinate]:
125 """DataId associated with this formatter (`DataCoordinate`)"""
126 return self._dataId
128 @property
129 def writeParameters(self) -> Mapping[str, Any]:
130 """Parameters to use when writing out datasets."""
131 if self._writeParameters is not None:
132 return self._writeParameters
133 return {}
135 @property
136 def writeRecipes(self) -> Mapping[str, Any]:
137 """Detailed write Recipes indexed by recipe name."""
138 if self._writeRecipes is not None:
139 return self._writeRecipes
140 return {}
142 @classmethod
143 def validateWriteRecipes(cls, recipes: Optional[Mapping[str, Any]]) -> Optional[Mapping[str, Any]]:
144 """Validate supplied recipes for this formatter.
146 The recipes are supplemented with default values where appropriate.
148 Parameters
149 ----------
150 recipes : `dict`
151 Recipes to validate.
153 Returns
154 -------
155 validated : `dict`
156 Validated recipes.
158 Raises
159 ------
160 RuntimeError
161 Raised if validation fails. The default implementation raises
162 if any recipes are given.
163 """
164 if recipes:
165 raise RuntimeError(f"This formatter does not understand these writeRecipes: {recipes}")
166 return recipes
168 @classmethod
169 def name(cls) -> str:
170 """Returns the fully qualified name of the formatter.
172 Returns
173 -------
174 name : `str`
175 Fully-qualified name of formatter class.
176 """
177 return getFullTypeName(cls)
179 @abstractmethod
180 def read(self, component: Optional[str] = None) -> Any:
181 """Read a Dataset.
183 Parameters
184 ----------
185 component : `str`, optional
186 Component to read from the file. Only used if the `StorageClass`
187 for reading differed from the `StorageClass` used to write the
188 file.
190 Returns
191 -------
192 inMemoryDataset : `object`
193 The requested Dataset.
194 """
195 raise NotImplementedError("Type does not support reading")
197 @abstractmethod
198 def write(self, inMemoryDataset: Any) -> str:
199 """Write a Dataset.
201 Parameters
202 ----------
203 inMemoryDataset : `object`
204 The Dataset to store.
206 Returns
207 -------
208 path : `str`
209 The path to where the Dataset was stored within the datastore.
210 """
211 raise NotImplementedError("Type does not support writing")
213 def fromBytes(self, serializedDataset: bytes,
214 component: Optional[str] = None) -> object:
215 """Reads serialized data into a Dataset or its component.
217 Parameters
218 ----------
219 serializedDataset : `bytes`
220 Bytes object to unserialize.
221 component : `str`, optional
222 Component to read from the Dataset. Only used if the `StorageClass`
223 for reading differed from the `StorageClass` used to write the
224 file.
226 Returns
227 -------
228 inMemoryDataset : `object`
229 The requested data as a Python object. The type of object
230 is controlled by the specific formatter.
231 """
232 raise NotImplementedError("Type does not support reading from bytes.")
234 def toBytes(self, inMemoryDataset: Any) -> bytes:
235 """Serialize the Dataset to bytes based on formatter.
237 Parameters
238 ----------
239 inMemoryDataset : `object`
240 The Python object to serialize.
242 Returns
243 -------
244 serializedDataset : `bytes`
245 Bytes representing the serialized dataset.
246 """
247 raise NotImplementedError("Type does not support writing to bytes.")
249 def makeUpdatedLocation(self, location: Location) -> Location:
250 """Return a new `Location` instance updated with this formatter's
251 extension.
253 Parameters
254 ----------
255 location : `Location`
256 The location to update.
258 Returns
259 -------
260 updated : `Location`
261 A new `Location` with a new file extension applied.
263 Raises
264 ------
265 NotImplementedError
266 Raised if there is no ``extension`` attribute associated with
267 this formatter.
269 Notes
270 -----
271 This method is available to all Formatters but might not be
272 implemented by all formatters. It requires that a formatter set
273 an ``extension`` attribute containing the file extension used when
274 writing files. If ``extension`` is `None` the supplied file will
275 not be updated. Not all formatters write files so this is not
276 defined in the base class.
277 """
278 location = copy.deepcopy(location)
279 try:
280 # We are deliberately allowing extension to be undefined by
281 # default in the base class and mypy complains.
282 location.updateExtension(self.extension) # type:ignore
283 except AttributeError:
284 raise NotImplementedError("No file extension registered with this formatter") from None
285 return location
287 @classmethod
288 def validateExtension(cls, location: Location) -> None:
289 """Check that the provided location refers to a file extension that is
290 understood by this formatter.
292 Parameters
293 ----------
294 location : `Location`
295 Location from which to extract a file extension.
297 Raises
298 ------
299 NotImplementedError
300 Raised if file extensions are a concept not understood by this
301 formatter.
302 ValueError
303 Raised if the formatter does not understand this extension.
305 Notes
306 -----
307 This method is available to all Formatters but might not be
308 implemented by all formatters. It requires that a formatter set
309 an ``extension`` attribute containing the file extension used when
310 writing files. If ``extension`` is `None` only the set of supported
311 extensions will be examined.
312 """
313 supported = set(cls.supportedExtensions)
315 try:
316 # We are deliberately allowing extension to be undefined by
317 # default in the base class and mypy complains.
318 default = cls.extension # type: ignore
319 except AttributeError:
320 raise NotImplementedError("No file extension registered with this formatter") from None
322 # If extension is implemented as an instance property it won't return
323 # a string when called as a class propertt. Assume that
324 # the supported extensions class property is complete.
325 if default is not None and isinstance(default, str):
326 supported.add(default)
328 # Get the file name from the uri
329 file = location.uri.basename()
331 # Check that this file name ends with one of the supported extensions.
332 # This is less prone to confusion than asking the location for
333 # its extension and then doing a set comparison
334 for ext in supported:
335 if file.endswith(ext):
336 return
338 raise ValueError(f"Extension '{location.getExtension()}' on '{location}' "
339 f"is not supported by Formatter '{cls.__name__}' (supports: {supported})")
341 def predictPath(self) -> str:
342 """Return the path that would be returned by write, without actually
343 writing.
345 Uses the `FileDescriptor` associated with the instance.
347 Returns
348 -------
349 path : `str`
350 Path within datastore that would be associated with the location
351 stored in this `Formatter`.
352 """
353 updated = self.makeUpdatedLocation(self.fileDescriptor.location)
354 return updated.pathInStore
356 def segregateParameters(self, parameters: Optional[Dict[str, Any]] = None) -> Tuple[Dict, Dict]:
357 """Segregate the supplied parameters into those understood by the
358 formatter and those not understood by the formatter.
360 Any unsupported parameters are assumed to be usable by associated
361 assemblers.
363 Parameters
364 ----------
365 parameters : `dict`, optional
366 Parameters with values that have been supplied by the caller
367 and which might be relevant for the formatter. If `None`
368 parameters will be read from the registered `FileDescriptor`.
370 Returns
371 -------
372 supported : `dict`
373 Those parameters supported by this formatter.
374 unsupported : `dict`
375 Those parameters not supported by this formatter.
376 """
378 if parameters is None:
379 parameters = self.fileDescriptor.parameters
381 if parameters is None:
382 return {}, {}
384 if self.unsupportedParameters is None:
385 # Support none of the parameters
386 return {}, parameters.copy()
388 # Start by assuming all are supported
389 supported = parameters.copy()
390 unsupported = {}
392 # And remove any we know are not supported
393 for p in set(supported):
394 if p in self.unsupportedParameters:
395 unsupported[p] = supported.pop(p)
397 return supported, unsupported
400class FormatterFactory:
401 """Factory for `Formatter` instances.
402 """
404 defaultKey = LookupKey("default")
405 """Configuration key associated with default write parameter settings."""
407 writeRecipesKey = LookupKey("write_recipes")
408 """Configuration key associated with write recipes."""
410 def __init__(self) -> None:
411 self._mappingFactory = MappingFactory(Formatter)
413 def __contains__(self, key: Union[LookupKey, str]) -> bool:
414 """Indicates whether the supplied key is present in the factory.
416 Parameters
417 ----------
418 key : `LookupKey`, `str` or objects with ``name`` attribute
419 Key to use to lookup in the factory whether a corresponding
420 formatter is present.
422 Returns
423 -------
424 in : `bool`
425 `True` if the supplied key is present in the factory.
426 """
427 return key in self._mappingFactory
429 def registerFormatters(self, config: Config, *, universe: DimensionUniverse) -> None:
430 """Bulk register formatters from a config.
432 Parameters
433 ----------
434 config : `Config`
435 ``formatters`` section of a configuration.
436 universe : `DimensionUniverse`, optional
437 Set of all known dimensions, used to expand and validate any used
438 in lookup keys.
440 Notes
441 -----
442 The configuration can include one level of hierarchy where an
443 instrument-specific section can be defined to override more general
444 template specifications. This is represented in YAML using a
445 key of form ``instrument<name>`` which can then define templates
446 that will be returned if a `DatasetRef` contains a matching instrument
447 name in the data ID.
449 The config is parsed using the function
450 `~lsst.daf.butler.configSubset.processLookupConfigs`.
452 The values for formatter entries can be either a simple string
453 referring to a python type or a dict representing the formatter and
454 parameters to be hard-coded into the formatter constructor. For
455 the dict case the following keys are supported:
457 - formatter: The python type to be used as the formatter class.
458 - parameters: A further dict to be passed directly to the
459 ``writeParameters`` Formatter constructor to seed it.
460 These parameters are validated at instance creation and not at
461 configuration.
463 Additionally, a special ``default`` section can be defined that
464 uses the formatter type (class) name as the keys and specifies
465 default write parameters that should be used whenever an instance
466 of that class is constructed.
468 .. code-block:: yaml
470 formatters:
471 default:
472 lsst.daf.butler.formatters.example.ExampleFormatter:
473 max: 10
474 min: 2
475 comment: Default comment
476 calexp: lsst.daf.butler.formatters.example.ExampleFormatter
477 coadd:
478 formatter: lsst.daf.butler.formatters.example.ExampleFormatter
479 parameters:
480 max: 5
482 Any time an ``ExampleFormatter`` is constructed it will use those
483 parameters. If an explicit entry later in the configuration specifies
484 a different set of parameters, the two will be merged with the later
485 entry taking priority. In the example above ``calexp`` will use
486 the default parameters but ``coadd`` will override the value for
487 ``max``.
489 Formatter configuration can also include a special section describing
490 collections of write parameters that can be accessed through a
491 simple label. This allows common collections of options to be
492 specified in one place in the configuration and reused later.
493 The ``write_recipes`` section is indexed by Formatter class name
494 and each key is the label to associate with the parameters.
496 .. code-block:: yaml
498 formatters:
499 write_recipes:
500 lsst.obs.base.formatters.fitsExposure.FixExposureFormatter:
501 lossless:
502 ...
503 noCompression:
504 ...
506 By convention a formatter that uses write recipes will support a
507 ``recipe`` write parameter that will refer to a recipe name in
508 the ``write_recipes`` component. The `Formatter` will be constructed
509 in the `FormatterFactory` with all the relevant recipes and
510 will not attempt to filter by looking at ``writeParameters`` in
511 advance. See the specific formatter documentation for details on
512 acceptable recipe options.
513 """
514 allowed_keys = {"formatter", "parameters"}
516 contents = processLookupConfigs(config, allow_hierarchy=True, universe=universe)
518 # Extract any default parameter settings
519 defaultParameters = contents.get(self.defaultKey, {})
520 if not isinstance(defaultParameters, Mapping):
521 raise RuntimeError("Default formatter parameters in config can not be a single string"
522 f" (got: {type(defaultParameters)})")
524 # Extract any global write recipes -- these are indexed by
525 # Formatter class name.
526 writeRecipes = contents.get(self.writeRecipesKey, {})
527 if isinstance(writeRecipes, str):
528 raise RuntimeError(f"The formatters.{self.writeRecipesKey} section must refer to a dict"
529 f" not '{writeRecipes}'")
531 for key, f in contents.items():
532 # default is handled in a special way
533 if key == self.defaultKey:
534 continue
535 if key == self.writeRecipesKey:
536 continue
538 # Can be a str or a dict.
539 specificWriteParameters = {}
540 if isinstance(f, str):
541 formatter = f
542 elif isinstance(f, Mapping):
543 all_keys = set(f)
544 unexpected_keys = all_keys - allowed_keys
545 if unexpected_keys:
546 raise ValueError(f"Formatter {key} uses unexpected keys {unexpected_keys} in config")
547 if "formatter" not in f:
548 raise ValueError(f"Mandatory 'formatter' key missing for formatter key {key}")
549 formatter = f["formatter"]
550 if "parameters" in f:
551 specificWriteParameters = f["parameters"]
552 else:
553 raise ValueError(f"Formatter for key {key} has unexpected value: '{f}'")
555 # Apply any default parameters for this formatter
556 writeParameters = copy.deepcopy(defaultParameters.get(formatter, {}))
557 writeParameters.update(specificWriteParameters)
559 kwargs: Dict[str, Any] = {}
560 if writeParameters:
561 kwargs["writeParameters"] = writeParameters
563 if formatter in writeRecipes:
564 kwargs["writeRecipes"] = writeRecipes[formatter]
566 self.registerFormatter(key, formatter, **kwargs)
568 def getLookupKeys(self) -> Set[LookupKey]:
569 """Retrieve the look up keys for all the registry entries.
571 Returns
572 -------
573 keys : `set` of `LookupKey`
574 The keys available for matching in the registry.
575 """
576 return self._mappingFactory.getLookupKeys()
578 def getFormatterClassWithMatch(self, entity: Entity) -> Tuple[LookupKey, Type[Formatter],
579 Dict[str, Any]]:
580 """Get the matching formatter class along with the matching registry
581 key.
583 Parameters
584 ----------
585 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
586 Entity to use to determine the formatter to return.
587 `StorageClass` will be used as a last resort if `DatasetRef`
588 or `DatasetType` instance is provided. Supports instrument
589 override if a `DatasetRef` is provided configured with an
590 ``instrument`` value for the data ID.
592 Returns
593 -------
594 matchKey : `LookupKey`
595 The key that resulted in the successful match.
596 formatter : `type`
597 The class of the registered formatter.
598 formatter_kwargs : `dict`
599 Keyword arguments that are associated with this formatter entry.
600 """
601 names = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
602 matchKey, formatter, formatter_kwargs = self._mappingFactory.getClassFromRegistryWithMatch(names)
603 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter),
604 matchKey, entity)
606 return matchKey, formatter, formatter_kwargs
608 def getFormatterClass(self, entity: Entity) -> Type:
609 """Get the matching formatter class.
611 Parameters
612 ----------
613 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
614 Entity to use to determine the formatter to return.
615 `StorageClass` will be used as a last resort if `DatasetRef`
616 or `DatasetType` instance is provided. Supports instrument
617 override if a `DatasetRef` is provided configured with an
618 ``instrument`` value for the data ID.
620 Returns
621 -------
622 formatter : `type`
623 The class of the registered formatter.
624 """
625 _, formatter, _ = self.getFormatterClassWithMatch(entity)
626 return formatter
628 def getFormatterWithMatch(self, entity: Entity, *args: Any, **kwargs: Any) -> Tuple[LookupKey, Formatter]:
629 """Get a new formatter instance along with the matching registry
630 key.
632 Parameters
633 ----------
634 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
635 Entity to use to determine the formatter to return.
636 `StorageClass` will be used as a last resort if `DatasetRef`
637 or `DatasetType` instance is provided. Supports instrument
638 override if a `DatasetRef` is provided configured with an
639 ``instrument`` value for the data ID.
640 args : `tuple`
641 Positional arguments to use pass to the object constructor.
642 kwargs : `dict`
643 Keyword arguments to pass to object constructor.
645 Returns
646 -------
647 matchKey : `LookupKey`
648 The key that resulted in the successful match.
649 formatter : `Formatter`
650 An instance of the registered formatter.
651 """
652 names = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
653 matchKey, formatter = self._mappingFactory.getFromRegistryWithMatch(names, *args, **kwargs)
654 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter),
655 matchKey, entity)
657 return matchKey, formatter
659 def getFormatter(self, entity: Entity, *args: Any, **kwargs: Any) -> Formatter:
660 """Get a new formatter instance.
662 Parameters
663 ----------
664 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
665 Entity to use to determine the formatter to return.
666 `StorageClass` will be used as a last resort if `DatasetRef`
667 or `DatasetType` instance is provided. Supports instrument
668 override if a `DatasetRef` is provided configured with an
669 ``instrument`` value for the data ID.
670 args : `tuple`
671 Positional arguments to use pass to the object constructor.
672 kwargs : `dict`
673 Keyword arguments to pass to object constructor.
675 Returns
676 -------
677 formatter : `Formatter`
678 An instance of the registered formatter.
679 """
680 _, formatter = self.getFormatterWithMatch(entity, *args, **kwargs)
681 return formatter
683 def registerFormatter(self, type_: Union[LookupKey, str, StorageClass, DatasetType],
684 formatter: str, *, overwrite: bool = False,
685 **kwargs: Any) -> None:
686 """Register a `Formatter`.
688 Parameters
689 ----------
690 type_ : `LookupKey`, `str`, `StorageClass` or `DatasetType`
691 Type for which this formatter is to be used. If a `LookupKey`
692 is not provided, one will be constructed from the supplied string
693 or by using the ``name`` property of the supplied entity.
694 formatter : `str` or class of type `Formatter`
695 Identifies a `Formatter` subclass to use for reading and writing
696 Datasets of this type. Can be a `Formatter` class.
697 overwrite : `bool`, optional
698 If `True` an existing entry will be replaced by the new value.
699 Default is `False`.
700 kwargs : `dict`
701 Keyword arguments to always pass to object constructor when
702 retrieved.
704 Raises
705 ------
706 ValueError
707 Raised if the formatter does not name a valid formatter type and
708 ``overwrite`` is `False`.
709 """
710 self._mappingFactory.placeInRegistry(type_, formatter, overwrite=overwrite, **kwargs)
713# Type to use when allowing a Formatter or its class name
714FormatterParameter = Union[str, Type[Formatter], Formatter]