Coverage for python/lsst/daf/butler/core/formatter.py : 38%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Formatter", "FormatterFactory", "FormatterParameter")
26from abc import ABCMeta, abstractmethod
27import logging
28import copy
29from typing import ClassVar, Set, FrozenSet, Union, Optional, Dict, Any, Tuple, Type, TYPE_CHECKING
31from .configSupport import processLookupConfigs, LookupKey
32from .mappingFactory import MappingFactory
33from .utils import getFullTypeName
34from .fileDescriptor import FileDescriptor
35from .location import Location
36from .config import Config
37from .dimensions import DimensionUniverse
38from .storageClass import StorageClass
39from .datasets import DatasetType, DatasetRef
41log = logging.getLogger(__name__)
43# Define a new special type for functions that take "entity"
44Entity = Union[DatasetType, DatasetRef, StorageClass, str]
47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true
48 from .dimensions import DataCoordinate
51class Formatter(metaclass=ABCMeta):
52 """Interface for reading and writing Datasets with a particular
53 `StorageClass`.
55 Parameters
56 ----------
57 fileDescriptor : `FileDescriptor`, optional
58 Identifies the file to read or write, and the associated storage
59 classes and parameter information. Its value can be `None` if the
60 caller will never call `Formatter.read` or `Formatter.write`.
61 """
63 unsupportedParameters: ClassVar[Optional[Union[FrozenSet[str], Set[str]]]] = frozenset()
64 """Set of parameters not understood by this `Formatter`. An empty set means
65 all parameters are supported. `None` indicates that no parameters
66 are supported (`frozenset`).
67 """
69 def __init__(self, fileDescriptor: FileDescriptor, dataId: DataCoordinate = None):
70 if not isinstance(fileDescriptor, FileDescriptor):
71 raise TypeError("File descriptor must be a FileDescriptor")
72 self._fileDescriptor = fileDescriptor
73 self._dataId = dataId
75 def __str__(self):
76 return f"{self.name()}@{self.fileDescriptor.location.path}"
78 def __repr__(self):
79 return f"{self.name()}({self.fileDescriptor!r})"
81 @property
82 def fileDescriptor(self) -> FileDescriptor:
83 """FileDescriptor associated with this formatter
84 (`FileDescriptor`, read-only)"""
85 return self._fileDescriptor
87 @property
88 def dataId(self) -> DataCoordinate:
89 """DataId associated with this formatter (`DataCoordinate`)"""
90 return self._dataId
92 @classmethod
93 def name(cls) -> str:
94 """Returns the fully qualified name of the formatter.
96 Returns
97 -------
98 name : `str`
99 Fully-qualified name of formatter class.
100 """
101 return getFullTypeName(cls)
103 @abstractmethod
104 def read(self, component: Optional[str] = None) -> object:
105 """Read a Dataset.
107 Parameters
108 ----------
109 component : `str`, optional
110 Component to read from the file. Only used if the `StorageClass`
111 for reading differed from the `StorageClass` used to write the
112 file.
114 Returns
115 -------
116 inMemoryDataset : `object`
117 The requested Dataset.
118 """
119 raise NotImplementedError("Type does not support reading")
121 @abstractmethod
122 def write(self, inMemoryDataset: Any) -> str:
123 """Write a Dataset.
125 Parameters
126 ----------
127 inMemoryDataset : `object`
128 The Dataset to store.
130 Returns
131 -------
132 path : `str`
133 The path to where the Dataset was stored within the datastore.
134 """
135 raise NotImplementedError("Type does not support writing")
137 def fromBytes(self, serializedDataset: bytes,
138 component: Optional[str] = None) -> object:
139 """Reads serialized data into a Dataset or its component.
141 Parameters
142 ----------
143 serializedDataset : `bytes`
144 Bytes object to unserialize.
145 component : `str`, optional
146 Component to read from the Dataset. Only used if the `StorageClass`
147 for reading differed from the `StorageClass` used to write the
148 file.
150 Returns
151 -------
152 inMemoryDataset : `object`
153 The requested data as a Python object. The type of object
154 is controlled by the specific formatter.
155 """
156 raise NotImplementedError("Type does not support reading from bytes.")
158 def toBytes(self, inMemoryDataset: Any) -> bytes:
159 """Serialize the Dataset to bytes based on formatter.
161 Parameters
162 ----------
163 inMemoryDataset : `object`
164 The Python object to serialize.
166 Returns
167 -------
168 serializedDataset : `bytes`
169 Bytes representing the serialized dataset.
170 """
171 raise NotImplementedError("Type does not support writing to bytes.")
173 @classmethod
174 def makeUpdatedLocation(cls, location: Location) -> Location:
175 """Return a new `Location` instance updated with this formatter's
176 extension.
178 Parameters
179 ----------
180 location : `Location`
181 The location to update.
183 Returns
184 -------
185 updated : `Location`
186 The updated location with a new file extension applied.
188 Raises
189 ------
190 NotImplementedError
191 Raised if there is no ``extension`` attribute associated with
192 this formatter.
193 """
194 location = copy.deepcopy(location)
195 try:
196 location.updateExtension(cls.extension)
197 except AttributeError:
198 raise NotImplementedError("No file extension registered with this formatter") from None
199 return location
201 @classmethod
202 def predictPathFromLocation(cls, location: Location) -> str:
203 """Return the path that would be returned by write, without actually
204 writing.
206 Parameters
207 ----------
208 location : `Location`
209 Location of file for which path prediction is required.
211 Returns
212 -------
213 path : `str`
214 Path within datastore that would be associated with this location.
215 """
216 return cls.makeUpdatedLocation(location).pathInStore
218 def predictPath(self) -> str:
219 """Return the path that would be returned by write, without actually
220 writing.
222 Uses the `FileDescriptor` associated with the instance.
224 Returns
225 -------
226 path : `str`
227 Path within datastore that would be associated with the location
228 stored in this `Formatter`.
229 """
230 return self.predictPathFromLocation(self.fileDescriptor.location)
232 def segregateParameters(self, parameters: Optional[Dict[str, Any]] = None) -> Tuple[Dict, Dict]:
233 """Segregate the supplied parameters into those understood by the
234 formatter and those not understood by the formatter.
236 Any unsupported parameters are assumed to be usable by associated
237 assemblers.
239 Parameters
240 ----------
241 parameters : `dict`, optional
242 Parameters with values that have been supplied by the caller
243 and which might be relevant for the formatter. If `None`
244 parameters will be read from the registered `FileDescriptor`.
246 Returns
247 -------
248 supported : `dict`
249 Those parameters supported by this formatter.
250 unsupported : `dict`
251 Those parameters not supported by this formatter.
252 """
254 if parameters is None:
255 parameters = self.fileDescriptor.parameters
257 if parameters is None:
258 return {}, {}
260 if self.unsupportedParameters is None:
261 # Support none of the parameters
262 return {}, parameters.copy()
264 # Start by assuming all are supported
265 supported = parameters.copy()
266 unsupported = {}
268 # And remove any we know are not supported
269 for p in set(supported):
270 if p in self.unsupportedParameters:
271 unsupported[p] = supported.pop(p)
273 return supported, unsupported
276class FormatterFactory:
277 """Factory for `Formatter` instances.
278 """
280 def __init__(self):
281 self._mappingFactory = MappingFactory(Formatter)
283 def __contains__(self, key):
284 """Indicates whether the supplied key is present in the factory.
286 Parameters
287 ----------
288 key : `LookupKey`, `str` or objects with ``name`` attribute
289 Key to use to lookup in the factory whether a corresponding
290 formatter is present.
292 Returns
293 -------
294 in : `bool`
295 `True` if the supplied key is present in the factory.
296 """
297 return key in self._mappingFactory
299 def registerFormatters(self, config: Config, *, universe: DimensionUniverse) -> None:
300 """Bulk register formatters from a config.
302 Parameters
303 ----------
304 config : `Config`
305 ``formatters`` section of a configuration.
306 universe : `DimensionUniverse`, optional
307 Set of all known dimensions, used to expand and validate any used
308 in lookup keys.
310 Notes
311 -----
312 The configuration can include one level of hierarchy where an
313 instrument-specific section can be defined to override more general
314 template specifications. This is represented in YAML using a
315 key of form ``instrument<name>`` which can then define templates
316 that will be returned if a `DatasetRef` contains a matching instrument
317 name in the data ID.
319 The config is parsed using the function
320 `~lsst.daf.butler.configSubset.processLookupConfigs`.
321 """
322 contents = processLookupConfigs(config, universe=universe)
323 for key, f in contents.items():
324 self.registerFormatter(key, f)
326 def getLookupKeys(self) -> Set[LookupKey]:
327 """Retrieve the look up keys for all the registry entries.
329 Returns
330 -------
331 keys : `set` of `LookupKey`
332 The keys available for matching in the registry.
333 """
334 return self._mappingFactory.getLookupKeys()
336 def getFormatterClassWithMatch(self, entity: Entity) -> Tuple[LookupKey, Type]:
337 """Get the matching formatter class along with the matching registry
338 key.
340 Parameters
341 ----------
342 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
343 Entity to use to determine the formatter to return.
344 `StorageClass` will be used as a last resort if `DatasetRef`
345 or `DatasetType` instance is provided. Supports instrument
346 override if a `DatasetRef` is provided configured with an
347 ``instrument`` value for the data ID.
349 Returns
350 -------
351 matchKey : `LookupKey`
352 The key that resulted in the successful match.
353 formatter : `type`
354 The class of the registered formatter.
355 """
356 if isinstance(entity, str):
357 names = (entity,)
358 else:
359 names = entity._lookupNames()
360 matchKey, formatter = self._mappingFactory.getClassFromRegistryWithMatch(names)
361 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter),
362 matchKey, entity)
364 return matchKey, formatter
366 def getFormatterClass(self, entity: Entity) -> Type:
367 """Get the matching formatter class.
369 Parameters
370 ----------
371 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
372 Entity to use to determine the formatter to return.
373 `StorageClass` will be used as a last resort if `DatasetRef`
374 or `DatasetType` instance is provided. Supports instrument
375 override if a `DatasetRef` is provided configured with an
376 ``instrument`` value for the data ID.
378 Returns
379 -------
380 formatter : `type`
381 The class of the registered formatter.
382 """
383 _, formatter = self.getFormatterClassWithMatch(entity)
384 return formatter
386 def getFormatterWithMatch(self, entity: Entity, *args, **kwargs) -> Tuple[LookupKey, Formatter]:
387 """Get a new formatter instance along with the matching registry
388 key.
390 Parameters
391 ----------
392 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
393 Entity to use to determine the formatter to return.
394 `StorageClass` will be used as a last resort if `DatasetRef`
395 or `DatasetType` instance is provided. Supports instrument
396 override if a `DatasetRef` is provided configured with an
397 ``instrument`` value for the data ID.
398 args : `tuple`
399 Positional arguments to use pass to the object constructor.
400 kwargs : `dict`
401 Keyword arguments to pass to object constructor.
403 Returns
404 -------
405 matchKey : `LookupKey`
406 The key that resulted in the successful match.
407 formatter : `Formatter`
408 An instance of the registered formatter.
409 """
410 if isinstance(entity, str):
411 names = (entity,)
412 else:
413 names = entity._lookupNames()
414 matchKey, formatter = self._mappingFactory.getFromRegistryWithMatch(names, *args, **kwargs)
415 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter),
416 matchKey, entity)
418 return matchKey, formatter
420 def getFormatter(self, entity: Entity, *args, **kwargs) -> Formatter:
421 """Get a new formatter instance.
423 Parameters
424 ----------
425 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str`
426 Entity to use to determine the formatter to return.
427 `StorageClass` will be used as a last resort if `DatasetRef`
428 or `DatasetType` instance is provided. Supports instrument
429 override if a `DatasetRef` is provided configured with an
430 ``instrument`` value for the data ID.
431 args : `tuple`
432 Positional arguments to use pass to the object constructor.
433 kwargs : `dict`
434 Keyword arguments to pass to object constructor.
436 Returns
437 -------
438 formatter : `Formatter`
439 An instance of the registered formatter.
440 """
441 _, formatter = self.getFormatterWithMatch(entity, *args, **kwargs)
442 return formatter
444 def registerFormatter(self, type_: Union[LookupKey, str, StorageClass, DatasetType],
445 formatter: str, overwrite: bool = False) -> None:
446 """Register a `Formatter`.
448 Parameters
449 ----------
450 type_ : `LookupKey`, `str`, `StorageClass` or `DatasetType`
451 Type for which this formatter is to be used. If a `LookupKey`
452 is not provided, one will be constructed from the supplied string
453 or by using the ``name`` property of the supplied entity.
454 formatter : `str` or class of type `Formatter`
455 Identifies a `Formatter` subclass to use for reading and writing
456 Datasets of this type. Can be a `Formatter` class.
457 overwrite : `bool`, optional
458 If `True` an existing entry will be replaced by the new value.
459 Default is `False`.
461 Raises
462 ------
463 ValueError
464 Raised if the formatter does not name a valid formatter type and
465 ``overwrite`` is `False`.
466 """
467 self._mappingFactory.placeInRegistry(type_, formatter, overwrite=overwrite)
470# Type to use when allowing a Formatter or its class name
471FormatterParameter = Union[None, str, Type[Formatter]]