Coverage for python/lsst/daf/butler/core/formatter.py : 41%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Formatter", "FormatterFactory", "FormatterParameter")
26from abc import ABCMeta, abstractmethod
27import logging
28import copy
29from typing import ClassVar, Set, FrozenSet, Union, Optional, Dict, Any, Tuple, Type, TYPE_CHECKING
31from .configSupport import processLookupConfigs, LookupKey
32from .mappingFactory import MappingFactory
33from .utils import getFullTypeName
34from .fileDescriptor import FileDescriptor
35from .location import Location
36from .config import Config
37from .dimensions import DimensionUniverse
38from .storageClass import StorageClass
39from .datasets import DatasetType, DatasetRef
41log = logging.getLogger(__name__)
43# Define a new special type for functions that take "entity"
44Entity = Union[DatasetType, DatasetRef, StorageClass, str]
47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true
48 from .dimensions import DataCoordinate
51class Formatter(metaclass=ABCMeta):
52 """Interface for reading and writing Datasets with a particular
53 `StorageClass`.
55 Parameters
56 ----------
57 fileDescriptor : `FileDescriptor`, optional
58 Identifies the file to read or write, and the associated storage
59 classes and parameter information. Its value can be `None` if the
60 caller will never call `Formatter.read` or `Formatter.write`.
61 """
63 unsupportedParameters: ClassVar[Optional[Union[FrozenSet[str], Set[str]]]] = frozenset()
64 """Set of parameters not understood by this `Formatter`. An empty set means
65 all parameters are supported. `None` indicates that no parameters
66 are supported (`frozenset`).
67 """
69 extension: Optional[str] = None
70 """File extension default provided by this formatter."""
72 def __init__(self, fileDescriptor: FileDescriptor, dataId: DataCoordinate = None):
73 if not isinstance(fileDescriptor, FileDescriptor):
74 raise TypeError("File descriptor must be a FileDescriptor")
75 self._fileDescriptor = fileDescriptor
76 self._dataId = dataId
78 def __str__(self) -> str:
79 return f"{self.name()}@{self.fileDescriptor.location.path}"
81 def __repr__(self) -> str:
82 return f"{self.name()}({self.fileDescriptor!r})"
84 @property
85 def fileDescriptor(self) -> FileDescriptor:
86 """FileDescriptor associated with this formatter
87 (`FileDescriptor`, read-only)"""
88 return self._fileDescriptor
90 @property
91 def dataId(self) -> Optional[DataCoordinate]:
92 """DataId associated with this formatter (`DataCoordinate`)"""
93 return self._dataId
95 @classmethod
96 def name(cls) -> str:
97 """Returns the fully qualified name of the formatter.
99 Returns
100 -------
101 name : `str`
102 Fully-qualified name of formatter class.
103 """
104 return getFullTypeName(cls)
106 @abstractmethod
107 def read(self, component: Optional[str] = None) -> object:
108 """Read a Dataset.
110 Parameters
111 ----------
112 component : `str`, optional
113 Component to read from the file. Only used if the `StorageClass`
114 for reading differed from the `StorageClass` used to write the
115 file.
117 Returns
118 -------
119 inMemoryDataset : `object`
120 The requested Dataset.
121 """
122 raise NotImplementedError("Type does not support reading")
124 @abstractmethod
125 def write(self, inMemoryDataset: Any) -> str:
126 """Write a Dataset.
128 Parameters
129 ----------
130 inMemoryDataset : `object`
131 The Dataset to store.
133 Returns
134 -------
135 path : `str`
136 The path to where the Dataset was stored within the datastore.
137 """
138 raise NotImplementedError("Type does not support writing")
140 def fromBytes(self, serializedDataset: bytes,
141 component: Optional[str] = None) -> object:
142 """Reads serialized data into a Dataset or its component.
144 Parameters
145 ----------
146 serializedDataset : `bytes`
147 Bytes object to unserialize.
148 component : `str`, optional
149 Component to read from the Dataset. Only used if the `StorageClass`
150 for reading differed from the `StorageClass` used to write the
151 file.
153 Returns
154 -------
155 inMemoryDataset : `object`
156 The requested data as a Python object. The type of object
157 is controlled by the specific formatter.
158 """
159 raise NotImplementedError("Type does not support reading from bytes.")
161 def toBytes(self, inMemoryDataset: Any) -> bytes:
162 """Serialize the Dataset to bytes based on formatter.
164 Parameters
165 ----------
166 inMemoryDataset : `object`
167 The Python object to serialize.
169 Returns
170 -------
171 serializedDataset : `bytes`
172 Bytes representing the serialized dataset.
173 """
174 raise NotImplementedError("Type does not support writing to bytes.")
176 @classmethod
177 def makeUpdatedLocation(cls, location: Location) -> Location:
178 """Return a new `Location` instance updated with this formatter's
179 extension.
181 Parameters
182 ----------
183 location : `Location`
184 The location to update.
186 Returns
187 -------
188 updated : `Location`
189 The updated location with a new file extension applied.
191 Raises
192 ------
193 NotImplementedError
194 Raised if there is no ``extension`` attribute associated with
195 this formatter.
196 """
197 location = copy.deepcopy(location)
198 try:
199 location.updateExtension(cls.extension)
200 except AttributeError:
201 raise NotImplementedError("No file extension registered with this formatter") from None
202 return location
204 @classmethod
205 def predictPathFromLocation(cls, location: Location) -> str:
206 """Return the path that would be returned by write, without actually
207 writing.
209 Parameters
210 ----------
211 location : `Location`
212 Location of file for which path prediction is required.
214 Returns
215 -------
216 path : `str`
217 Path within datastore that would be associated with this location.
218 """
219 return cls.makeUpdatedLocation(location).pathInStore
221 def predictPath(self) -> str:
222 """Return the path that would be returned by write, without actually
223 writing.
225 Uses the `FileDescriptor` associated with the instance.
227 Returns
228 -------
229 path : `str`
230 Path within datastore that would be associated with the location
231 stored in this `Formatter`.
232 """
233 return self.predictPathFromLocation(self.fileDescriptor.location)
235 def segregateParameters(self, parameters: Optional[Dict[str, Any]] = None) -> Tuple[Dict, Dict]:
236 """Segregate the supplied parameters into those understood by the
237 formatter and those not understood by the formatter.
239 Any unsupported parameters are assumed to be usable by associated
240 assemblers.
242 Parameters
243 ----------
244 parameters : `dict`, optional
245 Parameters with values that have been supplied by the caller
246 and which might be relevant for the formatter. If `None`
247 parameters will be read from the registered `FileDescriptor`.
249 Returns
250 -------
251 supported : `dict`
252 Those parameters supported by this formatter.
253 unsupported : `dict`
254 Those parameters not supported by this formatter.
255 """
257 if parameters is None:
258 parameters = self.fileDescriptor.parameters
260 if parameters is None:
261 return {}, {}
263 if self.unsupportedParameters is None:
264 # Support none of the parameters
265 return {}, parameters.copy()
267 # Start by assuming all are supported
268 supported = parameters.copy()
269 unsupported = {}
271 # And remove any we know are not supported
272 for p in set(supported):
273 if p in self.unsupportedParameters:
274 unsupported[p] = supported.pop(p)
276 return supported, unsupported
279class FormatterFactory:
280 """Factory for `Formatter` instances.
281 """
283 def __init__(self) -> None:
284 self._mappingFactory = MappingFactory(Formatter)
286 def __contains__(self, key: Union[LookupKey, str]) -> bool:
287 """Indicates whether the supplied key is present in the factory.
289 Parameters
290 ----------
291 key : `LookupKey`, `str` or objects with ``name`` attribute
292 Key to use to lookup in the factory whether a corresponding
293 formatter is present.
295 Returns
296 -------
297 in : `bool`
298 `True` if the supplied key is present in the factory.
299 """
300 return key in self._mappingFactory
302 def registerFormatters(self, config: Config, *, universe: DimensionUniverse) -> None:
303 """Bulk register formatters from a config.
305 Parameters
306 ----------
307 config : `Config`
308 ``formatters`` section of a configuration.
309 universe : `DimensionUniverse`, optional
310 Set of all known dimensions, used to expand and validate any used
311 in lookup keys.
313 Notes
314 -----
315 The configuration can include one level of hierarchy where an
316 instrument-specific section can be defined to override more general
317 template specifications. This is represented in YAML using a
318 key of form ``instrument<name>`` which can then define templates
319 that will be returned if a `DatasetRef` contains a matching instrument
320 name in the data ID.
322 The config is parsed using the function
323 `~lsst.daf.butler.configSubset.processLookupConfigs`.
324 """
325 contents = processLookupConfigs(config, universe=universe)
326 for key, f in contents.items():
327 self.registerFormatter(key, f)
329 def getLookupKeys(self) -> Set[LookupKey]:
330 """Retrieve the look up keys for all the registry entries.
332 Returns
333 -------
334 keys : `set` of `LookupKey`
335 The keys available for matching in the registry.
336 """
337 return self._mappingFactory.getLookupKeys()
339 def getFormatterClassWithMatch(self, entity: Entity) -> Tuple[LookupKey, Type[Formatter]]:
340 """Get the matching formatter class along with the matching registry
341 key.
343 Parameters
344 ----------
345 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
346 Entity to use to determine the formatter to return.
347 `StorageClass` will be used as a last resort if `DatasetRef`
348 or `DatasetType` instance is provided. Supports instrument
349 override if a `DatasetRef` is provided configured with an
350 ``instrument`` value for the data ID.
352 Returns
353 -------
354 matchKey : `LookupKey`
355 The key that resulted in the successful match.
356 formatter : `type`
357 The class of the registered formatter.
358 """
359 names = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
360 matchKey, formatter = self._mappingFactory.getClassFromRegistryWithMatch(names)
361 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter),
362 matchKey, entity)
364 return matchKey, formatter
366 def getFormatterClass(self, entity: Entity) -> Type:
367 """Get the matching formatter class.
369 Parameters
370 ----------
371 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
372 Entity to use to determine the formatter to return.
373 `StorageClass` will be used as a last resort if `DatasetRef`
374 or `DatasetType` instance is provided. Supports instrument
375 override if a `DatasetRef` is provided configured with an
376 ``instrument`` value for the data ID.
378 Returns
379 -------
380 formatter : `type`
381 The class of the registered formatter.
382 """
383 _, formatter = self.getFormatterClassWithMatch(entity)
384 return formatter
386 def getFormatterWithMatch(self, entity: Entity, *args: Any, **kwargs: Any) -> Tuple[LookupKey, Formatter]:
387 """Get a new formatter instance along with the matching registry
388 key.
390 Parameters
391 ----------
392 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
393 Entity to use to determine the formatter to return.
394 `StorageClass` will be used as a last resort if `DatasetRef`
395 or `DatasetType` instance is provided. Supports instrument
396 override if a `DatasetRef` is provided configured with an
397 ``instrument`` value for the data ID.
398 args : `tuple`
399 Positional arguments to use pass to the object constructor.
400 kwargs : `dict`
401 Keyword arguments to pass to object constructor.
403 Returns
404 -------
405 matchKey : `LookupKey`
406 The key that resulted in the successful match.
407 formatter : `Formatter`
408 An instance of the registered formatter.
409 """
410 names = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
411 matchKey, formatter = self._mappingFactory.getFromRegistryWithMatch(names, *args, **kwargs)
412 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter),
413 matchKey, entity)
415 return matchKey, formatter
417 def getFormatter(self, entity: Entity, *args: Any, **kwargs: Any) -> Formatter:
418 """Get a new formatter instance.
420 Parameters
421 ----------
422 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
423 Entity to use to determine the formatter to return.
424 `StorageClass` will be used as a last resort if `DatasetRef`
425 or `DatasetType` instance is provided. Supports instrument
426 override if a `DatasetRef` is provided configured with an
427 ``instrument`` value for the data ID.
428 args : `tuple`
429 Positional arguments to use pass to the object constructor.
430 kwargs : `dict`
431 Keyword arguments to pass to object constructor.
433 Returns
434 -------
435 formatter : `Formatter`
436 An instance of the registered formatter.
437 """
438 _, formatter = self.getFormatterWithMatch(entity, *args, **kwargs)
439 return formatter
441 def registerFormatter(self, type_: Union[LookupKey, str, StorageClass, DatasetType],
442 formatter: str, overwrite: bool = False) -> None:
443 """Register a `Formatter`.
445 Parameters
446 ----------
447 type_ : `LookupKey`, `str`, `StorageClass` or `DatasetType`
448 Type for which this formatter is to be used. If a `LookupKey`
449 is not provided, one will be constructed from the supplied string
450 or by using the ``name`` property of the supplied entity.
451 formatter : `str` or class of type `Formatter`
452 Identifies a `Formatter` subclass to use for reading and writing
453 Datasets of this type. Can be a `Formatter` class.
454 overwrite : `bool`, optional
455 If `True` an existing entry will be replaced by the new value.
456 Default is `False`.
458 Raises
459 ------
460 ValueError
461 Raised if the formatter does not name a valid formatter type and
462 ``overwrite`` is `False`.
463 """
464 self._mappingFactory.placeInRegistry(type_, formatter, overwrite=overwrite)
467# Type to use when allowing a Formatter or its class name
468FormatterParameter = Union[str, Type[Formatter], Formatter]