Coverage for python/lsst/pipe/base/_task_metadata.py: 15%
205 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-31 09:39 +0000
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-31 09:39 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["TaskMetadata"]
24import itertools
25import numbers
26import warnings
27from collections.abc import Collection, Iterator, Mapping, Sequence
28from typing import Any, Protocol
30from lsst.daf.butler._compat import _BaseModelCompat
31from lsst.utils.introspection import find_outside_stacklevel
32from pydantic import Field, StrictBool, StrictFloat, StrictInt, StrictStr
34# The types allowed in a Task metadata field are restricted
35# to allow predictable serialization.
36_ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool)
39class PropertySetLike(Protocol):
40 """Protocol that looks like a ``lsst.daf.base.PropertySet``
42 Enough of the API is specified to support conversion of a
43 ``PropertySet`` to a `TaskMetadata`.
44 """
46 def paramNames(self, topLevelOnly: bool = True) -> Collection[str]:
47 ...
49 def getArray(self, name: str) -> Any:
50 ...
53def _isListLike(v: Any) -> bool:
54 return isinstance(v, Sequence) and not isinstance(v, str)
57class TaskMetadata(_BaseModelCompat):
58 """Dict-like object for storing task metadata.
60 Metadata can be stored at two levels: single task or task plus subtasks.
61 The later is called full metadata of a task and has a form
63 topLevelTaskName:subtaskName:subsubtaskName.itemName
65 Metadata item key of a task (`itemName` above) must not contain `.`,
66 which serves as a separator in full metadata keys and turns
67 the value into sub-dictionary. Arbitrary hierarchies are supported.
68 """
70 scalars: dict[str, StrictFloat | StrictInt | StrictBool | StrictStr] = Field(default_factory=dict)
71 arrays: dict[str, list[StrictFloat] | list[StrictInt] | list[StrictBool] | list[StrictStr]] = Field(
72 default_factory=dict
73 )
74 metadata: dict[str, "TaskMetadata"] = Field(default_factory=dict)
76 @classmethod
77 def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata":
78 """Create a TaskMetadata from a dictionary.
80 Parameters
81 ----------
82 d : `~collections.abc.Mapping`
83 Mapping to convert. Can be hierarchical. Any dictionaries
84 in the hierarchy are converted to `TaskMetadata`.
86 Returns
87 -------
88 meta : `TaskMetadata`
89 Newly-constructed metadata.
90 """
91 metadata = cls()
92 for k, v in d.items():
93 metadata[k] = v
94 return metadata
96 @classmethod
97 def from_metadata(cls, ps: PropertySetLike) -> "TaskMetadata":
98 """Create a TaskMetadata from a PropertySet-like object.
100 Parameters
101 ----------
102 ps : `PropertySetLike` or `TaskMetadata`
103 A ``PropertySet``-like object to be transformed to a
104 `TaskMetadata`. A `TaskMetadata` can be copied using this
105 class method.
107 Returns
108 -------
109 tm : `TaskMetadata`
110 Newly-constructed metadata.
112 Notes
113 -----
114 Items stored in single-element arrays in the supplied object
115 will be converted to scalars in the newly-created object.
116 """
117 # Use hierarchical names to assign values from input to output.
118 # This API exists for both PropertySet and TaskMetadata.
119 # from_dict() does not work because PropertySet is not declared
120 # to be a Mapping.
121 # PropertySet.toDict() is not present in TaskMetadata so is best
122 # avoided.
123 metadata = cls()
124 for key in sorted(ps.paramNames(topLevelOnly=False)):
125 value = ps.getArray(key)
126 if len(value) == 1:
127 value = value[0]
128 metadata[key] = value
129 return metadata
131 def to_dict(self) -> dict[str, Any]:
132 """Convert the class to a simple dictionary.
134 Returns
135 -------
136 d : `dict`
137 Simple dictionary that can contain scalar values, array values
138 or other dictionary values.
140 Notes
141 -----
142 Unlike `dict()`, this method hides the model layout and combines
143 scalars, arrays, and other metadata in the same dictionary. Can be
144 used when a simple dictionary is needed. Use
145 `TaskMetadata.from_dict()` to convert it back.
146 """
147 d: dict[str, Any] = {}
148 d.update(self.scalars)
149 d.update(self.arrays)
150 for k, v in self.metadata.items():
151 d[k] = v.to_dict()
152 return d
154 def add(self, name: str, value: Any) -> None:
155 """Store a new value, adding to a list if one already exists.
157 Parameters
158 ----------
159 name : `str`
160 Name of the metadata property.
161 value
162 Metadata property value.
163 """
164 keys = self._getKeys(name)
165 key0 = keys.pop(0)
166 if len(keys) == 0:
167 # If add() is being used, always store the value in the arrays
168 # property as a list. It's likely there will be another call.
169 slot_type, value = self._validate_value(value)
170 if slot_type == "array":
171 pass
172 elif slot_type == "scalar":
173 value = [value]
174 else:
175 raise ValueError("add() can only be used for primitive types or sequences of those types.")
177 if key0 in self.metadata:
178 raise ValueError(f"Can not add() to key '{name}' since that is a TaskMetadata")
180 if key0 in self.scalars:
181 # Convert scalar to array.
182 # MyPy should be able to figure out that List[Union[T1, T2]] is
183 # compatible with Union[List[T1], List[T2]] if the list has
184 # only one element, but it can't.
185 self.arrays[key0] = [self.scalars.pop(key0)] # type: ignore
187 if key0 in self.arrays:
188 # Check that the type is not changing.
189 if (curtype := type(self.arrays[key0][0])) is not (newtype := type(value[0])):
190 raise ValueError(f"Type mismatch in add() -- currently {curtype} but adding {newtype}")
191 self.arrays[key0].extend(value)
192 else:
193 self.arrays[key0] = value
195 return
197 self.metadata[key0].add(".".join(keys), value)
199 def getScalar(self, key: str) -> str | int | float | bool:
200 """Retrieve a scalar item even if the item is a list.
202 Parameters
203 ----------
204 key : `str`
205 Item to retrieve.
207 Returns
208 -------
209 value : `str`, `int`, `float`, or `bool`
210 Either the value associated with the key or, if the key
211 corresponds to a list, the last item in the list.
213 Raises
214 ------
215 KeyError
216 Raised if the item is not found.
217 """
218 # Used in pipe_tasks.
219 # getScalar() is the default behavior for __getitem__.
220 return self[key]
222 def getArray(self, key: str) -> list[Any]:
223 """Retrieve an item as a list even if it is a scalar.
225 Parameters
226 ----------
227 key : `str`
228 Item to retrieve.
230 Returns
231 -------
232 values : `list` of any
233 A list containing the value or values associated with this item.
235 Raises
236 ------
237 KeyError
238 Raised if the item is not found.
239 """
240 keys = self._getKeys(key)
241 key0 = keys.pop(0)
242 if len(keys) == 0:
243 if key0 in self.arrays:
244 return self.arrays[key0]
245 elif key0 in self.scalars:
246 return [self.scalars[key0]]
247 elif key0 in self.metadata:
248 return [self.metadata[key0]]
249 raise KeyError(f"'{key}' not found")
251 try:
252 return self.metadata[key0].getArray(".".join(keys))
253 except KeyError:
254 # Report the correct key.
255 raise KeyError(f"'{key}' not found") from None
257 def names(self, topLevelOnly: bool | None = None) -> set[str]:
258 """Return the hierarchical keys from the metadata.
260 Parameters
261 ----------
262 topLevelOnly : `bool` or `None`, optional
263 This parameter is deprecated and will be removed in the future.
264 If given it can only be `False`. All names in the hierarchy are
265 always returned.
267 Returns
268 -------
269 names : `collections.abc.Set`
270 A set of all keys, including those from the hierarchy and the
271 top-level hierarchy.
272 """
273 if topLevelOnly:
274 raise RuntimeError(
275 "The topLevelOnly parameter is no longer supported and can not have a True value."
276 )
278 if topLevelOnly is False:
279 warnings.warn(
280 "The topLevelOnly parameter is deprecated and is always assumed to be False."
281 " It will be removed completely after v26.",
282 category=FutureWarning,
283 stacklevel=find_outside_stacklevel("lsst.pipe.base"),
284 )
286 names = set()
287 for k, v in self.items():
288 names.add(k) # Always include the current level
289 if isinstance(v, TaskMetadata):
290 names.update({k + "." + item for item in v.names()})
291 return names
293 def paramNames(self, topLevelOnly: bool) -> set[str]:
294 """Return hierarchical names.
296 Parameters
297 ----------
298 topLevelOnly : `bool`
299 Control whether only top-level items are returned or items
300 from the hierarchy.
302 Returns
303 -------
304 paramNames : `set` of `str`
305 If ``topLevelOnly`` is `True`, returns any keys that are not
306 part of a hierarchy. If `False` also returns fully-qualified
307 names from the hierarchy. Keys associated with the top
308 of a hierarchy are never returned.
309 """
310 # Currently used by the verify package.
311 paramNames = set()
312 for k, v in self.items():
313 if isinstance(v, TaskMetadata):
314 if not topLevelOnly:
315 paramNames.update({k + "." + item for item in v.paramNames(topLevelOnly=topLevelOnly)})
316 else:
317 paramNames.add(k)
318 return paramNames
320 @staticmethod
321 def _getKeys(key: str) -> list[str]:
322 """Return the key hierarchy.
324 Parameters
325 ----------
326 key : `str`
327 The key to analyze. Can be dot-separated.
329 Returns
330 -------
331 keys : `list` of `str`
332 The key hierarchy that has been split on ``.``.
334 Raises
335 ------
336 KeyError
337 Raised if the key is not a string.
338 """
339 try:
340 keys = key.split(".")
341 except Exception:
342 raise KeyError(f"Invalid key '{key}': only string keys are allowed") from None
343 return keys
345 def keys(self) -> tuple[str, ...]:
346 """Return the top-level keys."""
347 return tuple(k for k in self)
349 def items(self) -> Iterator[tuple[str, Any]]:
350 """Yield the top-level keys and values."""
351 yield from itertools.chain(self.scalars.items(), self.arrays.items(), self.metadata.items())
353 def __len__(self) -> int:
354 """Return the number of items."""
355 return len(self.scalars) + len(self.arrays) + len(self.metadata)
357 # This is actually a Liskov substitution violation, because
358 # pydantic.BaseModel says __iter__ should return something else. But the
359 # pydantic docs say to do exactly this to in order to make a mapping-like
360 # BaseModel, so that's what we do.
361 def __iter__(self) -> Iterator[str]: # type: ignore
362 """Return an iterator over each key."""
363 # The order of keys is not preserved since items can move
364 # from scalar to array.
365 return itertools.chain(iter(self.scalars), iter(self.arrays), iter(self.metadata))
367 def __getitem__(self, key: str) -> Any:
368 """Retrieve the item associated with the key.
370 Parameters
371 ----------
372 key : `str`
373 The key to retrieve. Can be dot-separated hierarchical.
375 Returns
376 -------
377 value : `TaskMetadata`, `float`, `int`, `bool`, `str`
378 A scalar value. For compatibility with ``PropertySet``, if the key
379 refers to an array, the final element is returned and not the
380 array itself.
382 Raises
383 ------
384 KeyError
385 Raised if the item is not found.
386 """
387 keys = self._getKeys(key)
388 key0 = keys.pop(0)
389 if len(keys) == 0:
390 if key0 in self.scalars:
391 return self.scalars[key0]
392 if key0 in self.metadata:
393 return self.metadata[key0]
394 if key0 in self.arrays:
395 return self.arrays[key0][-1]
396 raise KeyError(f"'{key}' not found")
397 # Hierarchical lookup so the top key can only be in the metadata
398 # property. Trap KeyError and reraise so that the correct key
399 # in the hierarchy is reported.
400 try:
401 # And forward request to that metadata.
402 return self.metadata[key0][".".join(keys)]
403 except KeyError:
404 raise KeyError(f"'{key}' not found") from None
406 def get(self, key: str, default: Any = None) -> Any:
407 """Retrieve the item associated with the key or a default.
409 Parameters
410 ----------
411 key : `str`
412 The key to retrieve. Can be dot-separated hierarchical.
413 default
414 The value to return if the key does not exist.
416 Returns
417 -------
418 value : `TaskMetadata`, `float`, `int`, `bool`, `str`
419 A scalar value. If the key refers to an array, the final element
420 is returned and not the array itself; this is consistent with
421 `__getitem__` and `PropertySet.get`, but not ``to_dict().get``.
422 """
423 try:
424 return self[key]
425 except KeyError:
426 return default
428 def __setitem__(self, key: str, item: Any) -> None:
429 """Store the given item."""
430 keys = self._getKeys(key)
431 key0 = keys.pop(0)
432 if len(keys) == 0:
433 slots: dict[str, dict[str, Any]] = {
434 "array": self.arrays,
435 "scalar": self.scalars,
436 "metadata": self.metadata,
437 }
438 primary: dict[str, Any] | None = None
439 slot_type, item = self._validate_value(item)
440 primary = slots.pop(slot_type, None)
441 if primary is None:
442 raise AssertionError(f"Unknown slot type returned from validator: {slot_type}")
444 # Assign the value to the right place.
445 primary[key0] = item
446 for property in slots.values():
447 # Remove any other entries.
448 property.pop(key0, None)
449 return
451 # This must be hierarchical so forward to the child TaskMetadata.
452 if key0 not in self.metadata:
453 self.metadata[key0] = TaskMetadata()
454 self.metadata[key0][".".join(keys)] = item
456 # Ensure we have cleared out anything with the same name elsewhere.
457 self.scalars.pop(key0, None)
458 self.arrays.pop(key0, None)
460 def __contains__(self, key: str) -> bool:
461 """Determine if the key exists."""
462 keys = self._getKeys(key)
463 key0 = keys.pop(0)
464 if len(keys) == 0:
465 return key0 in self.scalars or key0 in self.arrays or key0 in self.metadata
467 if key0 in self.metadata:
468 return ".".join(keys) in self.metadata[key0]
469 return False
471 def __delitem__(self, key: str) -> None:
472 """Remove the specified item.
474 Raises
475 ------
476 KeyError
477 Raised if the item is not present.
478 """
479 keys = self._getKeys(key)
480 key0 = keys.pop(0)
481 if len(keys) == 0:
482 # MyPy can't figure out that this way to combine the types in the
483 # tuple is the one that matters, and annotating a local variable
484 # helps it out.
485 properties: tuple[dict[str, Any], ...] = (self.scalars, self.arrays, self.metadata)
486 for property in properties:
487 if key0 in property:
488 del property[key0]
489 return
490 raise KeyError(f"'{key}' not found'")
492 try:
493 del self.metadata[key0][".".join(keys)]
494 except KeyError:
495 # Report the correct key.
496 raise KeyError(f"'{key}' not found'") from None
498 def _validate_value(self, value: Any) -> tuple[str, Any]:
499 """Validate the given value.
501 Parameters
502 ----------
503 value : Any
504 Value to check.
506 Returns
507 -------
508 slot_type : `str`
509 The type of value given. Options are "scalar", "array", "metadata".
510 item : Any
511 The item that was given but possibly modified to conform to
512 the slot type.
514 Raises
515 ------
516 ValueError
517 Raised if the value is not a recognized type.
518 """
519 # Test the simplest option first.
520 value_type = type(value)
521 if value_type in _ALLOWED_PRIMITIVE_TYPES:
522 return "scalar", value
524 if isinstance(value, TaskMetadata):
525 return "metadata", value
526 if isinstance(value, Mapping):
527 return "metadata", self.from_dict(value)
529 if _isListLike(value):
530 # For model consistency, need to check that every item in the
531 # list has the same type.
532 value = list(value)
534 type0 = type(value[0])
535 for i in value:
536 if type(i) != type0:
537 raise ValueError(
538 "Type mismatch in supplied list. TaskMetadata requires all"
539 f" elements have same type but see {type(i)} and {type0}."
540 )
542 if type0 not in _ALLOWED_PRIMITIVE_TYPES:
543 # Must check to see if we got numpy floats or something.
544 type_cast: type
545 if isinstance(value[0], numbers.Integral):
546 type_cast = int
547 elif isinstance(value[0], numbers.Real):
548 type_cast = float
549 else:
550 raise ValueError(
551 f"Supplied list has element of type '{type0}'. "
552 "TaskMetadata can only accept primitive types in lists."
553 )
555 value = [type_cast(v) for v in value]
557 return "array", value
559 # Sometimes a numpy number is given.
560 if isinstance(value, numbers.Integral):
561 value = int(value)
562 return "scalar", value
563 if isinstance(value, numbers.Real):
564 value = float(value)
565 return "scalar", value
567 raise ValueError(f"TaskMetadata does not support values of type {value!r}.")
570# Needed because a TaskMetadata can contain a TaskMetadata.
571TaskMetadata.model_rebuild()