Coverage for python/lsst/daf/butler/core/storageClass.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Support for Storage Classes."""
26__all__ = ("StorageClass", "StorageClassFactory", "StorageClassConfig")
28import builtins
29import logging
31from typing import (
32 Any,
33 Collection,
34 Dict,
35 List,
36 Optional,
37 Set,
38 Sequence,
39 Tuple,
40 Type,
41 Union,
42)
44from lsst.utils import doImport
45from .utils import Singleton, getFullTypeName
46from .assembler import CompositeAssembler
47from .config import ConfigSubset, Config
48from .configSupport import LookupKey
50log = logging.getLogger(__name__)
53class StorageClassConfig(ConfigSubset):
54 component = "storageClasses"
55 defaultConfigFile = "storageClasses.yaml"
58class StorageClass:
59 """Class describing how a label maps to a particular Python type.
61 Parameters
62 ----------
63 name : `str`
64 Name to use for this class.
65 pytype : `type` or `str`
66 Python type (or name of type) to associate with the `StorageClass`
67 components : `dict`, optional
68 `dict` mapping name of a component to another `StorageClass`.
69 parameters : `~collections.abc.Sequence` or `~collections.abc.Set`
70 Parameters understood by this `StorageClass` that can control
71 reading of data from datastores.
72 assembler : `str`, optional
73 Fully qualified name of class supporting assembly and disassembly
74 of a `pytype` instance.
75 """
76 _cls_name: str = "BaseStorageClass"
77 _cls_components: Optional[Dict[str, StorageClass]] = None
78 _cls_parameters: Optional[Union[Set[str], Sequence[str]]] = None
79 _cls_assembler: Optional[str] = None
80 _cls_pytype: Optional[Union[Type, str]] = None
81 defaultAssembler: Type = CompositeAssembler
82 defaultAssemblerName: str = getFullTypeName(defaultAssembler)
84 def __init__(self, name: Optional[str] = None,
85 pytype: Optional[Union[Type, str]] = None,
86 components: Optional[Dict[str, StorageClass]] = None,
87 parameters: Optional[Union[Sequence, Set]] = None,
88 assembler: Optional[str] = None):
89 if name is None:
90 name = self._cls_name
91 if pytype is None:
92 pytype = self._cls_pytype
93 if components is None:
94 components = self._cls_components
95 if parameters is None:
96 parameters = self._cls_parameters
97 if assembler is None:
98 assembler = self._cls_assembler
99 self.name = name
101 if pytype is None:
102 pytype = object
104 self._pytype: Optional[Type]
105 if not isinstance(pytype, str):
106 # Already have a type so store it and get the name
107 self._pytypeName = getFullTypeName(pytype)
108 self._pytype = pytype
109 else:
110 # Store the type name and defer loading of type
111 self._pytypeName = pytype
112 self._pytype = None
114 self._components = components if components is not None else {}
115 self._parameters = frozenset(parameters) if parameters is not None else frozenset()
116 # if the assembler is not None also set it and clear the default
117 # assembler
118 self._assembler: Optional[Type]
119 self._assemblerClassName: Optional[str]
120 if assembler is not None:
121 self._assemblerClassName = assembler
122 self._assembler = None
123 elif components is not None:
124 # We set a default assembler for composites so that a class is
125 # guaranteed to support something if it is a composite.
126 log.debug("Setting default assembler for %s", self.name)
127 self._assembler = self.defaultAssembler
128 self._assemblerClassName = self.defaultAssemblerName
129 else:
130 self._assembler = None
131 self._assemblerClassName = None
133 @property
134 def components(self) -> Dict[str, StorageClass]:
135 """Component names mapped to associated `StorageClass`
136 """
137 return self._components
139 @property
140 def parameters(self) -> Set[str]:
141 """`set` of names of parameters supported by this `StorageClass`
142 """
143 return set(self._parameters)
145 @property
146 def pytype(self) -> Type:
147 """Python type associated with this `StorageClass`."""
148 if self._pytype is not None:
149 return self._pytype
151 if hasattr(builtins, self._pytypeName):
152 pytype = getattr(builtins, self._pytypeName)
153 else:
154 pytype = doImport(self._pytypeName)
155 self._pytype = pytype
156 return self._pytype
158 @property
159 def assemblerClass(self) -> Optional[Type]:
160 """Class to use to (dis)assemble an object from components."""
161 if self._assembler is not None:
162 return self._assembler
163 if self._assemblerClassName is None:
164 return None
165 self._assembler = doImport(self._assemblerClassName)
166 return self._assembler
168 def assembler(self) -> CompositeAssembler:
169 """Return an instance of an assembler.
171 Returns
172 -------
173 assembler : `CompositeAssembler`
174 Instance of the assembler associated with this `StorageClass`.
175 Assembler is constructed with this `StorageClass`.
177 Raises
178 ------
179 TypeError
180 This StorageClass has no associated assembler.
181 """
182 cls = self.assemblerClass
183 if cls is None:
184 raise TypeError(f"No assembler class is associated with StorageClass {self.name}")
185 return cls(storageClass=self)
187 def isComposite(self) -> bool:
188 """Boolean indicating whether this `StorageClass` is a composite
189 or not.
191 Returns
192 -------
193 isComposite : `bool`
194 `True` if this `StorageClass` is a composite, `False`
195 otherwise.
196 """
197 if self.components:
198 return True
199 return False
201 def _lookupNames(self) -> Tuple[LookupKey, ...]:
202 """Keys to use when looking up this DatasetRef in a configuration.
204 The names are returned in order of priority.
206 Returns
207 -------
208 names : `tuple` of `LookupKey`
209 Tuple of a `LookupKey` using the `StorageClass` name.
210 """
211 return (LookupKey(name=self.name), )
213 def knownParameters(self) -> Set[str]:
214 """Return set of all parameters known to this `StorageClass`
216 The set includes parameters understood by components of a composite.
218 Returns
219 -------
220 known : `set`
221 All parameter keys of this `StorageClass` and the component
222 storage classes.
223 """
224 known = set(self._parameters)
225 for sc in self.components.values():
226 known.update(sc.knownParameters())
227 return known
229 def validateParameters(self, parameters: Collection = None) -> None:
230 """Check that the parameters are known to this `StorageClass`
232 Does not check the values.
234 Parameters
235 ----------
236 parameters : `~collections.abc.Collection`, optional
237 Collection containing the parameters. Can be `dict`-like or
238 `set`-like. The parameter values are not checked.
239 If no parameters are supplied, always returns without error.
241 Raises
242 ------
243 KeyError
244 Some parameters are not understood by this `StorageClass`.
245 """
246 # No parameters is always okay
247 if not parameters:
248 return
250 # Extract the important information into a set. Works for dict and
251 # list.
252 external = set(parameters)
254 diff = external - self.knownParameters()
255 if diff:
256 s = "s" if len(diff) > 1 else ""
257 unknown = '\', \''.join(diff)
258 raise KeyError(f"Parameter{s} '{unknown}' not understood by StorageClass {self.name}")
260 def filterParameters(self, parameters: Dict[str, Any],
261 subset: Collection = None) -> Dict[str, Any]:
262 """Filter out parameters that are not known to this StorageClass
264 Parameters
265 ----------
266 parameters : `dict`, optional
267 Candidate parameters. Can be `None` if no parameters have
268 been provided.
269 subset : `~collections.abc.Collection`, optional
270 Subset of supported parameters that the caller is interested
271 in using. The subset must be known to the `StorageClass`
272 if specified. If `None` the supplied parameters will all
273 be checked, else only the keys in this set will be checked.
275 Returns
276 -------
277 filtered : `dict`
278 Valid parameters. Empty `dict` if none are suitable.
280 Raises
281 ------
282 ValueError
283 Raised if the provided subset is not a subset of the supported
284 parameters or if it is an empty set.
285 """
286 if not parameters:
287 return {}
289 known = self.knownParameters()
291 if subset is not None:
292 if not subset:
293 raise ValueError("Specified a parameter subset but it was empty")
294 subset = set(subset)
295 if not subset.issubset(known):
296 raise ValueError(f"Requested subset ({subset}) is not a subset of"
297 f" known parameters ({known})")
298 wanted = subset
299 else:
300 wanted = known
302 return {k: parameters[k] for k in wanted if k in parameters}
304 def validateInstance(self, instance: Any) -> bool:
305 """Check that the supplied Python object has the expected Python type
307 Parameters
308 ----------
309 instance : `object`
310 Object to check.
312 Returns
313 -------
314 isOk : `bool`
315 True if the supplied instance object can be handled by this
316 `StorageClass`, False otherwise.
317 """
318 return isinstance(instance, self.pytype)
320 def __eq__(self, other: Any) -> bool:
321 """Equality checks name, pytype name, assembler name, and components"""
323 if not isinstance(other, StorageClass):
324 return False
326 if self.name != other.name:
327 return False
329 # We must compare pytype and assembler by name since we do not want
330 # to trigger an import of external module code here
331 if self._assemblerClassName != other._assemblerClassName:
332 return False
333 if self._pytypeName != other._pytypeName:
334 return False
336 # Ensure we have the same component keys in each
337 if set(self.components.keys()) != set(other.components.keys()):
338 return False
340 # Same parameters
341 if self.parameters != other.parameters:
342 return False
344 # Ensure that all the components have the same type
345 for k in self.components:
346 if self.components[k] != other.components[k]:
347 return False
349 # If we got to this point everything checks out
350 return True
352 def __hash__(self) -> int:
353 return hash(self.name)
355 def __repr__(self) -> str:
356 optionals: Dict[str, Any] = {}
357 if self._pytypeName != "object":
358 optionals["pytype"] = self._pytypeName
359 if self._assemblerClassName is not None:
360 optionals["assembler"] = self._assemblerClassName
361 if self._parameters:
362 optionals["parameters"] = self._parameters
363 if self.components:
364 optionals["components"] = self.components
366 # order is preserved in the dict
367 options = ", ".join(f"{k}={v!r}" for k, v in optionals.items())
369 # Start with mandatory fields
370 r = f"{self.__class__.__name__}({self.name!r}"
371 if options:
372 r = r + ", " + options
373 r = r + ")"
374 return r
376 def __str__(self) -> str:
377 return self.name
380class StorageClassFactory(metaclass=Singleton):
381 """Factory for `StorageClass` instances.
383 This class is a singleton, with each instance sharing the pool of
384 StorageClasses. Since code can not know whether it is the first
385 time the instance has been created, the constructor takes no arguments.
386 To populate the factory with storage classes, a call to
387 `~StorageClassFactory.addFromConfig()` should be made.
389 Parameters
390 ----------
391 config : `StorageClassConfig` or `str`, optional
392 Load configuration. In a ButlerConfig` the relevant configuration
393 is located in the ``storageClasses`` section.
394 """
396 def __init__(self, config: Optional[Union[StorageClassConfig, str]] = None):
397 self._storageClasses: Dict[str, StorageClass] = {}
398 self._configs: List[StorageClassConfig] = []
400 # Always seed with the default config
401 self.addFromConfig(StorageClassConfig())
403 if config is not None:
404 self.addFromConfig(config)
406 def __str__(self) -> str:
407 """Return summary of factory.
409 Returns
410 -------
411 summary : `str`
412 Summary of the factory status.
413 """
414 sep = "\n"
415 return f"""Number of registered StorageClasses: {len(self._storageClasses)}
417StorageClasses
418--------------
419{sep.join(f"{s}: {self._storageClasses[s]}" for s in self._storageClasses)}
420"""
422 def __contains__(self, storageClassOrName: Union[StorageClass, str]) -> bool:
423 """Indicates whether the storage class exists in the factory.
425 Parameters
426 ----------
427 storageClassOrName : `str` or `StorageClass`
428 If `str` is given existence of the named StorageClass
429 in the factory is checked. If `StorageClass` is given
430 existence and equality are checked.
432 Returns
433 -------
434 in : `bool`
435 True if the supplied string is present, or if the supplied
436 `StorageClass` is present and identical.
438 Notes
439 -----
440 The two different checks (one for "key" and one for "value") based on
441 the type of the given argument mean that it is possible for
442 StorageClass.name to be in the factory but StorageClass to not be
443 in the factory.
444 """
445 if isinstance(storageClassOrName, str):
446 return storageClassOrName in self._storageClasses
447 elif isinstance(storageClassOrName, StorageClass):
448 if storageClassOrName.name in self._storageClasses:
449 return storageClassOrName == self._storageClasses[storageClassOrName.name]
450 return False
452 def addFromConfig(self, config: Union[StorageClassConfig, Config, str]) -> None:
453 """Add more `StorageClass` definitions from a config file.
455 Parameters
456 ----------
457 config : `StorageClassConfig`, `Config` or `str`
458 Storage class configuration. Can contain a ``storageClasses``
459 key if part of a global configuration.
460 """
461 sconfig = StorageClassConfig(config)
462 self._configs.append(sconfig)
464 # Since we can not assume that we will get definitions of
465 # components or parents before their classes are defined
466 # we have a helper function that we can call recursively
467 # to extract definitions from the configuration.
468 def processStorageClass(name: str, sconfig: StorageClassConfig) -> None:
469 # Maybe we've already processed this through recursion
470 if name not in sconfig:
471 return
472 info = sconfig.pop(name)
474 # Always create the storage class so we can ensure that
475 # we are not trying to overwrite with a different definition
476 components = None
477 if "components" in info:
478 components = {}
479 for cname, ctype in info["components"].items():
480 if ctype not in self:
481 processStorageClass(ctype, sconfig)
482 components[cname] = self.getStorageClass(ctype)
484 # Extract scalar items from dict that are needed for
485 # StorageClass Constructor
486 storageClassKwargs = {k: info[k] for k in ("pytype", "assembler", "parameters") if k in info}
488 # Fill in other items
489 storageClassKwargs["components"] = components
491 # Create the new storage class and register it
492 baseClass = None
493 if "inheritsFrom" in info:
494 baseName = info["inheritsFrom"]
495 if baseName not in self:
496 processStorageClass(baseName, sconfig)
497 baseClass = type(self.getStorageClass(baseName))
499 newStorageClassType = self.makeNewStorageClass(name, baseClass, **storageClassKwargs)
500 newStorageClass = newStorageClassType()
501 self.registerStorageClass(newStorageClass)
503 for name in list(sconfig.keys()):
504 processStorageClass(name, sconfig)
506 @staticmethod
507 def makeNewStorageClass(name: str,
508 baseClass: Optional[Type[StorageClass]] = StorageClass,
509 **kwargs: Any) -> Type[StorageClass]:
510 """Create a new Python class as a subclass of `StorageClass`.
512 Parameters
513 ----------
514 name : `str`
515 Name to use for this class.
516 baseClass : `type`, optional
517 Base class for this `StorageClass`. Must be either `StorageClass`
518 or a subclass of `StorageClass`. If `None`, `StorageClass` will
519 be used.
521 Returns
522 -------
523 newtype : `type` subclass of `StorageClass`
524 Newly created Python type.
525 """
527 if baseClass is None:
528 baseClass = StorageClass
529 if not issubclass(baseClass, StorageClass):
530 raise ValueError(f"Base class must be a StorageClass not {baseClass}")
532 # convert the arguments to use different internal names
533 clsargs = {f"_cls_{k}": v for k, v in kwargs.items() if v is not None}
534 clsargs["_cls_name"] = name
536 # Some container items need to merge with the base class values
537 # so that a child can inherit but override one bit.
538 # lists (which you get from configs) are treated as sets for this to
539 # work consistently.
540 for k in ("components", "parameters"):
541 classKey = f"_cls_{k}"
542 if classKey in clsargs:
543 baseValue = getattr(baseClass, classKey, None)
544 if baseValue is not None:
545 currentValue = clsargs[classKey]
546 if isinstance(currentValue, dict):
547 newValue = baseValue.copy()
548 else:
549 newValue = set(baseValue)
550 newValue.update(currentValue)
551 clsargs[classKey] = newValue
553 # If we have parameters they should be a frozen set so that the
554 # parameters in the class can not be modified.
555 pk = "_cls_parameters"
556 if pk in clsargs:
557 clsargs[pk] = frozenset(clsargs[pk])
559 return type(f"StorageClass{name}", (baseClass,), clsargs)
561 def getStorageClass(self, storageClassName: str) -> StorageClass:
562 """Get a StorageClass instance associated with the supplied name.
564 Parameters
565 ----------
566 storageClassName : `str`
567 Name of the storage class to retrieve.
569 Returns
570 -------
571 instance : `StorageClass`
572 Instance of the correct `StorageClass`.
574 Raises
575 ------
576 KeyError
577 The requested storage class name is not registered.
578 """
579 return self._storageClasses[storageClassName]
581 def registerStorageClass(self, storageClass: StorageClass) -> None:
582 """Store the `StorageClass` in the factory.
584 Will be indexed by `StorageClass.name` and will return instances
585 of the supplied `StorageClass`.
587 Parameters
588 ----------
589 storageClass : `StorageClass`
590 Type of the Python `StorageClass` to register.
592 Raises
593 ------
594 ValueError
595 If a storage class has already been registered with
596 storageClassName and the previous definition differs.
597 """
598 if storageClass.name in self._storageClasses:
599 existing = self.getStorageClass(storageClass.name)
600 if existing != storageClass:
601 raise ValueError(f"New definition for StorageClass {storageClass.name} ({storageClass}) "
602 f"differs from current definition ({existing})")
603 else:
604 self._storageClasses[storageClass.name] = storageClass
606 def _unregisterStorageClass(self, storageClassName: str) -> None:
607 """Remove the named StorageClass from the factory.
609 Parameters
610 ----------
611 storageClassName : `str`
612 Name of storage class to remove.
614 Raises
615 ------
616 KeyError
617 The named storage class is not registered.
619 Notes
620 -----
621 This method is intended to simplify testing of StorageClassFactory
622 functionality and it is not expected to be required for normal usage.
623 """
624 del self._storageClasses[storageClassName]