Coverage for python/lsst/daf/butler/core/formatter.py : 28%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Formatter", "FormatterFactory", "FormatterParameter")
26from abc import ABCMeta, abstractmethod
27from collections.abc import Mapping
28import logging
29import copy
30from typing import ClassVar, Set, AbstractSet, Union, Optional, Dict, Any, Tuple, Type, TYPE_CHECKING
32from .configSupport import processLookupConfigs, LookupKey
33from .mappingFactory import MappingFactory
34from .utils import getFullTypeName
35from .fileDescriptor import FileDescriptor
36from .location import Location
37from .config import Config
38from .dimensions import DimensionUniverse
39from .storageClass import StorageClass
40from .datasets import DatasetType, DatasetRef
42log = logging.getLogger(__name__)
44# Define a new special type for functions that take "entity"
45Entity = Union[DatasetType, DatasetRef, StorageClass, str]
48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true
49 from .dimensions import DataCoordinate
52class Formatter(metaclass=ABCMeta):
53 """Interface for reading and writing Datasets with a particular
54 `StorageClass`.
56 Parameters
57 ----------
58 fileDescriptor : `FileDescriptor`, optional
59 Identifies the file to read or write, and the associated storage
60 classes and parameter information. Its value can be `None` if the
61 caller will never call `Formatter.read` or `Formatter.write`.
62 dataId : `DataCoordinate`, optional
63 Data ID associated with this formatter.
64 writeParameters : `dict`, optional
65 Any parameters to be hard-coded into this instance to control how
66 the dataset is serialized.
67 """
69 unsupportedParameters: ClassVar[Optional[AbstractSet[str]]] = frozenset()
70 """Set of read parameters not understood by this `Formatter`. An empty set
71 means all parameters are supported. `None` indicates that no parameters
72 are supported. These param (`frozenset`).
73 """
75 supportedWriteParameters: ClassVar[Optional[AbstractSet[str]]] = None
76 """Parameters understood by this formatter that can be used to control
77 how a dataset is serialized. `None` indicates that no parameters are
78 supported."""
80 extension: Optional[str] = None
81 """File extension default provided by this formatter."""
83 def __init__(self, fileDescriptor: FileDescriptor, dataId: DataCoordinate = None,
84 writeParameters: Optional[Dict[str, Any]] = None):
85 if not isinstance(fileDescriptor, FileDescriptor):
86 raise TypeError("File descriptor must be a FileDescriptor")
87 self._fileDescriptor = fileDescriptor
88 self._dataId = dataId
90 # Check that the write parameters are allowed
91 if writeParameters:
92 if self.supportedWriteParameters is None:
93 raise ValueError("This formatter does not accept any write parameters. "
94 f"Got: {', '.join(writeParameters)}")
95 else:
96 given = set(writeParameters)
97 unknown = given - self.supportedWriteParameters
98 if unknown:
99 s = "s" if len(unknown) != 1 else ""
100 unknownStr = ", ".join(f"'{u}'" for u in unknown)
101 raise ValueError(f"This formatter does not accept parameter{s} {unknownStr}")
103 self._writeParameters = writeParameters
105 def __str__(self) -> str:
106 return f"{self.name()}@{self.fileDescriptor.location.path}"
108 def __repr__(self) -> str:
109 return f"{self.name()}({self.fileDescriptor!r})"
111 @property
112 def fileDescriptor(self) -> FileDescriptor:
113 """FileDescriptor associated with this formatter
114 (`FileDescriptor`, read-only)"""
115 return self._fileDescriptor
117 @property
118 def dataId(self) -> Optional[DataCoordinate]:
119 """DataId associated with this formatter (`DataCoordinate`)"""
120 return self._dataId
122 @property
123 def writeParameters(self) -> Mapping:
124 if self._writeParameters is not None:
125 return self._writeParameters
126 return {}
128 @classmethod
129 def name(cls) -> str:
130 """Returns the fully qualified name of the formatter.
132 Returns
133 -------
134 name : `str`
135 Fully-qualified name of formatter class.
136 """
137 return getFullTypeName(cls)
139 @abstractmethod
140 def read(self, component: Optional[str] = None) -> Any:
141 """Read a Dataset.
143 Parameters
144 ----------
145 component : `str`, optional
146 Component to read from the file. Only used if the `StorageClass`
147 for reading differed from the `StorageClass` used to write the
148 file.
150 Returns
151 -------
152 inMemoryDataset : `object`
153 The requested Dataset.
154 """
155 raise NotImplementedError("Type does not support reading")
157 @abstractmethod
158 def write(self, inMemoryDataset: Any) -> str:
159 """Write a Dataset.
161 Parameters
162 ----------
163 inMemoryDataset : `object`
164 The Dataset to store.
166 Returns
167 -------
168 path : `str`
169 The path to where the Dataset was stored within the datastore.
170 """
171 raise NotImplementedError("Type does not support writing")
173 def fromBytes(self, serializedDataset: bytes,
174 component: Optional[str] = None) -> object:
175 """Reads serialized data into a Dataset or its component.
177 Parameters
178 ----------
179 serializedDataset : `bytes`
180 Bytes object to unserialize.
181 component : `str`, optional
182 Component to read from the Dataset. Only used if the `StorageClass`
183 for reading differed from the `StorageClass` used to write the
184 file.
186 Returns
187 -------
188 inMemoryDataset : `object`
189 The requested data as a Python object. The type of object
190 is controlled by the specific formatter.
191 """
192 raise NotImplementedError("Type does not support reading from bytes.")
194 def toBytes(self, inMemoryDataset: Any) -> bytes:
195 """Serialize the Dataset to bytes based on formatter.
197 Parameters
198 ----------
199 inMemoryDataset : `object`
200 The Python object to serialize.
202 Returns
203 -------
204 serializedDataset : `bytes`
205 Bytes representing the serialized dataset.
206 """
207 raise NotImplementedError("Type does not support writing to bytes.")
209 @classmethod
210 def makeUpdatedLocation(cls, location: Location) -> Location:
211 """Return a new `Location` instance updated with this formatter's
212 extension.
214 Parameters
215 ----------
216 location : `Location`
217 The location to update.
219 Returns
220 -------
221 updated : `Location`
222 The updated location with a new file extension applied.
224 Raises
225 ------
226 NotImplementedError
227 Raised if there is no ``extension`` attribute associated with
228 this formatter.
229 """
230 location = copy.deepcopy(location)
231 try:
232 location.updateExtension(cls.extension)
233 except AttributeError:
234 raise NotImplementedError("No file extension registered with this formatter") from None
235 return location
237 @classmethod
238 def predictPathFromLocation(cls, location: Location) -> str:
239 """Return the path that would be returned by write, without actually
240 writing.
242 Parameters
243 ----------
244 location : `Location`
245 Location of file for which path prediction is required.
247 Returns
248 -------
249 path : `str`
250 Path within datastore that would be associated with this location.
251 """
252 return cls.makeUpdatedLocation(location).pathInStore
254 def predictPath(self) -> str:
255 """Return the path that would be returned by write, without actually
256 writing.
258 Uses the `FileDescriptor` associated with the instance.
260 Returns
261 -------
262 path : `str`
263 Path within datastore that would be associated with the location
264 stored in this `Formatter`.
265 """
266 return self.predictPathFromLocation(self.fileDescriptor.location)
268 def segregateParameters(self, parameters: Optional[Dict[str, Any]] = None) -> Tuple[Dict, Dict]:
269 """Segregate the supplied parameters into those understood by the
270 formatter and those not understood by the formatter.
272 Any unsupported parameters are assumed to be usable by associated
273 assemblers.
275 Parameters
276 ----------
277 parameters : `dict`, optional
278 Parameters with values that have been supplied by the caller
279 and which might be relevant for the formatter. If `None`
280 parameters will be read from the registered `FileDescriptor`.
282 Returns
283 -------
284 supported : `dict`
285 Those parameters supported by this formatter.
286 unsupported : `dict`
287 Those parameters not supported by this formatter.
288 """
290 if parameters is None:
291 parameters = self.fileDescriptor.parameters
293 if parameters is None:
294 return {}, {}
296 if self.unsupportedParameters is None:
297 # Support none of the parameters
298 return {}, parameters.copy()
300 # Start by assuming all are supported
301 supported = parameters.copy()
302 unsupported = {}
304 # And remove any we know are not supported
305 for p in set(supported):
306 if p in self.unsupportedParameters:
307 unsupported[p] = supported.pop(p)
309 return supported, unsupported
312class FormatterFactory:
313 """Factory for `Formatter` instances.
314 """
316 defaultKey = LookupKey("default")
317 """Configuration key associated with default write parameter settings."""
319 def __init__(self) -> None:
320 self._mappingFactory = MappingFactory(Formatter)
322 def __contains__(self, key: Union[LookupKey, str]) -> bool:
323 """Indicates whether the supplied key is present in the factory.
325 Parameters
326 ----------
327 key : `LookupKey`, `str` or objects with ``name`` attribute
328 Key to use to lookup in the factory whether a corresponding
329 formatter is present.
331 Returns
332 -------
333 in : `bool`
334 `True` if the supplied key is present in the factory.
335 """
336 return key in self._mappingFactory
338 def registerFormatters(self, config: Config, *, universe: DimensionUniverse) -> None:
339 """Bulk register formatters from a config.
341 Parameters
342 ----------
343 config : `Config`
344 ``formatters`` section of a configuration.
345 universe : `DimensionUniverse`, optional
346 Set of all known dimensions, used to expand and validate any used
347 in lookup keys.
349 Notes
350 -----
351 The configuration can include one level of hierarchy where an
352 instrument-specific section can be defined to override more general
353 template specifications. This is represented in YAML using a
354 key of form ``instrument<name>`` which can then define templates
355 that will be returned if a `DatasetRef` contains a matching instrument
356 name in the data ID.
358 The config is parsed using the function
359 `~lsst.daf.butler.configSubset.processLookupConfigs`.
361 The values for formatter entries can be either a simple string
362 referring to a python type or a dict representing the formatter and
363 parameters to be hard-coded into the formatter constructor. For
364 the dict case the following keys are supported:
366 - formatter: The python type to be used as the formatter class.
367 - parameters: A further dict to be passed directly to the
368 ``writeParameters`` Formatter constructor to seed it.
369 These parameters are validated at instance creation and not at
370 configuration.
372 Additionally, a special ``default`` section can be defined that
373 uses the formatter type (class) name as the keys and specifies
374 default write parameters that should be used whenever an instance
375 of that class is constructed.
377 .. code-block:: yaml
379 formatters:
380 default:
381 lsst.daf.butler.formatters.example.ExampleFormatter:
382 max: 10
383 min: 2
384 comment: Default comment
385 calexp: lsst.daf.butler.formatters.example.ExampleFormatter
386 coadd:
387 formatter: lsst.daf.butler.formatters.example.ExampleFormatter
388 parameters:
389 max: 5
391 Any time an ``ExampleFormatter`` is constructed it will use those
392 parameters. If an explicit entry later in the configuration specifies
393 a different set of parameters, the two will be merged with the later
394 entry taking priority. In the example above ``calexp`` will use
395 the default parameters but ``coadd`` will override the value for
396 ``max``.
397 """
398 allowed_keys = {"formatter", "parameters"}
400 contents = processLookupConfigs(config, allow_hierarchy=True, universe=universe)
402 # Extract any default parameter settings
403 defaultParameters = contents.get(self.defaultKey, {})
404 if not isinstance(defaultParameters, Mapping):
405 raise RuntimeError("Default formatter parameters in config can not be a single string"
406 f" (got: {type(defaultParameters)})")
408 for key, f in contents.items():
409 # default is handled in a special way
410 if key == self.defaultKey:
411 continue
413 # Can be a str or a dict.
414 specificWriteParameters = {}
415 if isinstance(f, str):
416 formatter = f
417 elif isinstance(f, Mapping):
418 all_keys = set(f)
419 unexpected_keys = all_keys - allowed_keys
420 if unexpected_keys:
421 raise ValueError(f"Formatter {key} uses unexpected keys {unexpected_keys} in config")
422 if "formatter" not in f:
423 raise ValueError(f"Mandatory 'formatter' key missing for formatter key {key}")
424 formatter = f["formatter"]
425 if "parameters" in f:
426 specificWriteParameters = f["parameters"]
427 else:
428 raise ValueError(f"Formatter for key {key} has unexpected value: '{f}'")
430 # Apply any default parameters for this formatter
431 writeParameters = defaultParameters.get(formatter, {})
432 writeParameters.update(specificWriteParameters)
434 kwargs: Dict[str, Any] = {}
435 if writeParameters:
436 # Need to coerce Config to dict
437 kwargs["writeParameters"] = dict(writeParameters)
439 self.registerFormatter(key, formatter, **kwargs)
441 def getLookupKeys(self) -> Set[LookupKey]:
442 """Retrieve the look up keys for all the registry entries.
444 Returns
445 -------
446 keys : `set` of `LookupKey`
447 The keys available for matching in the registry.
448 """
449 return self._mappingFactory.getLookupKeys()
451 def getFormatterClassWithMatch(self, entity: Entity) -> Tuple[LookupKey, Type[Formatter],
452 Dict[str, Any]]:
453 """Get the matching formatter class along with the matching registry
454 key.
456 Parameters
457 ----------
458 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
459 Entity to use to determine the formatter to return.
460 `StorageClass` will be used as a last resort if `DatasetRef`
461 or `DatasetType` instance is provided. Supports instrument
462 override if a `DatasetRef` is provided configured with an
463 ``instrument`` value for the data ID.
465 Returns
466 -------
467 matchKey : `LookupKey`
468 The key that resulted in the successful match.
469 formatter : `type`
470 The class of the registered formatter.
471 formatter_kwargs : `dict`
472 Keyword arguments that are associated with this formatter entry.
473 """
474 names = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
475 matchKey, formatter, formatter_kwargs = self._mappingFactory.getClassFromRegistryWithMatch(names)
476 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter),
477 matchKey, entity)
479 return matchKey, formatter, formatter_kwargs
481 def getFormatterClass(self, entity: Entity) -> Type:
482 """Get the matching formatter class.
484 Parameters
485 ----------
486 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
487 Entity to use to determine the formatter to return.
488 `StorageClass` will be used as a last resort if `DatasetRef`
489 or `DatasetType` instance is provided. Supports instrument
490 override if a `DatasetRef` is provided configured with an
491 ``instrument`` value for the data ID.
493 Returns
494 -------
495 formatter : `type`
496 The class of the registered formatter.
497 """
498 _, formatter, _ = self.getFormatterClassWithMatch(entity)
499 return formatter
501 def getFormatterWithMatch(self, entity: Entity, *args: Any, **kwargs: Any) -> Tuple[LookupKey, Formatter]:
502 """Get a new formatter instance along with the matching registry
503 key.
505 Parameters
506 ----------
507 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
508 Entity to use to determine the formatter to return.
509 `StorageClass` will be used as a last resort if `DatasetRef`
510 or `DatasetType` instance is provided. Supports instrument
511 override if a `DatasetRef` is provided configured with an
512 ``instrument`` value for the data ID.
513 args : `tuple`
514 Positional arguments to use pass to the object constructor.
515 kwargs : `dict`
516 Keyword arguments to pass to object constructor.
518 Returns
519 -------
520 matchKey : `LookupKey`
521 The key that resulted in the successful match.
522 formatter : `Formatter`
523 An instance of the registered formatter.
524 """
525 names = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
526 matchKey, formatter = self._mappingFactory.getFromRegistryWithMatch(names, *args, **kwargs)
527 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter),
528 matchKey, entity)
530 return matchKey, formatter
532 def getFormatter(self, entity: Entity, *args: Any, **kwargs: Any) -> Formatter:
533 """Get a new formatter instance.
535 Parameters
536 ----------
537 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
538 Entity to use to determine the formatter to return.
539 `StorageClass` will be used as a last resort if `DatasetRef`
540 or `DatasetType` instance is provided. Supports instrument
541 override if a `DatasetRef` is provided configured with an
542 ``instrument`` value for the data ID.
543 args : `tuple`
544 Positional arguments to use pass to the object constructor.
545 kwargs : `dict`
546 Keyword arguments to pass to object constructor.
548 Returns
549 -------
550 formatter : `Formatter`
551 An instance of the registered formatter.
552 """
553 _, formatter = self.getFormatterWithMatch(entity, *args, **kwargs)
554 return formatter
556 def registerFormatter(self, type_: Union[LookupKey, str, StorageClass, DatasetType],
557 formatter: str, *, overwrite: bool = False,
558 **kwargs: Any) -> None:
559 """Register a `Formatter`.
561 Parameters
562 ----------
563 type_ : `LookupKey`, `str`, `StorageClass` or `DatasetType`
564 Type for which this formatter is to be used. If a `LookupKey`
565 is not provided, one will be constructed from the supplied string
566 or by using the ``name`` property of the supplied entity.
567 formatter : `str` or class of type `Formatter`
568 Identifies a `Formatter` subclass to use for reading and writing
569 Datasets of this type. Can be a `Formatter` class.
570 overwrite : `bool`, optional
571 If `True` an existing entry will be replaced by the new value.
572 Default is `False`.
573 kwargs : `dict`
574 Keyword arguments to always pass to object constructor when
575 retrieved.
577 Raises
578 ------
579 ValueError
580 Raised if the formatter does not name a valid formatter type and
581 ``overwrite`` is `False`.
582 """
583 self._mappingFactory.placeInRegistry(type_, formatter, overwrite=overwrite, **kwargs)
586# Type to use when allowing a Formatter or its class name
587FormatterParameter = Union[str, Type[Formatter], Formatter]