Coverage for python/lsst/pipe/base/_task_metadata.py: 16%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["TaskMetadata"]
24import itertools
25import numbers
26import warnings
27from collections.abc import Sequence
28from typing import Any, Collection, Dict, Iterator, List, Mapping, Optional, Protocol, Set, Tuple, Union
30from deprecated.sphinx import deprecated
31from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr
33_DEPRECATION_REASON = "Will be removed after v25."
34_DEPRECATION_VERSION = "v24"
36# The types allowed in a Task metadata field are restricted
37# to allow predictable serialization.
38_ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool)
41class PropertySetLike(Protocol):
42 """Protocol that looks like a ``lsst.daf.base.PropertySet``
44 Enough of the API is specified to support conversion of a
45 ``PropertySet`` to a `TaskMetadata`.
46 """
48 def paramNames(self, topLevelOnly: bool = True) -> Collection[str]:
49 ...
51 def getArray(self, name: str) -> Any:
52 ...
55def _isListLike(v: Any) -> bool:
56 return isinstance(v, Sequence) and not isinstance(v, str)
59class TaskMetadata(BaseModel):
60 """Dict-like object for storing task metadata.
62 Metadata can be stored at two levels: single task or task plus subtasks.
63 The later is called full metadata of a task and has a form
65 topLevelTaskName:subtaskName:subsubtaskName.itemName
67 Metadata item key of a task (`itemName` above) must not contain `.`,
68 which serves as a separator in full metadata keys and turns
69 the value into sub-dictionary. Arbitrary hierarchies are supported.
71 Deprecated methods are for compatibility with
72 the predecessor containers.
73 """
75 scalars: Dict[str, Union[StrictFloat, StrictInt, StrictBool, StrictStr]] = Field(default_factory=dict)
76 arrays: Dict[str, Union[List[StrictFloat], List[StrictInt], List[StrictBool], List[StrictStr]]] = Field(
77 default_factory=dict
78 )
79 metadata: Dict[str, "TaskMetadata"] = Field(default_factory=dict)
81 @classmethod
82 def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata":
83 """Create a TaskMetadata from a dictionary.
85 Parameters
86 ----------
87 d : `Mapping`
88 Mapping to convert. Can be hierarchical. Any dictionaries
89 in the hierarchy are converted to `TaskMetadata`.
91 Returns
92 -------
93 meta : `TaskMetadata`
94 Newly-constructed metadata.
95 """
96 metadata = cls()
97 for k, v in d.items():
98 metadata[k] = v
99 return metadata
101 @classmethod
102 def from_metadata(cls, ps: PropertySetLike) -> "TaskMetadata":
103 """Create a TaskMetadata from a PropertySet-like object.
105 Parameters
106 ----------
107 ps : `PropertySetLike` or `TaskMetadata`
108 A ``PropertySet``-like object to be transformed to a
109 `TaskMetadata`. A `TaskMetadata` can be copied using this
110 class method.
112 Returns
113 -------
114 tm : `TaskMetadata`
115 Newly-constructed metadata.
117 Notes
118 -----
119 Items stored in single-element arrays in the supplied object
120 will be converted to scalars in the newly-created object.
121 """
122 # Use hierarchical names to assign values from input to output.
123 # This API exists for both PropertySet and TaskMetadata.
124 # from_dict() does not work because PropertySet is not declared
125 # to be a Mapping.
126 # PropertySet.toDict() is not present in TaskMetadata so is best
127 # avoided.
128 metadata = cls()
129 for key in sorted(ps.paramNames(topLevelOnly=False)):
130 value = ps.getArray(key)
131 if len(value) == 1:
132 value = value[0]
133 metadata[key] = value
134 return metadata
136 def to_dict(self) -> Dict[str, Any]:
137 """Convert the class to a simple dictionary.
139 Returns
140 -------
141 d : `dict`
142 Simple dictionary that can contain scalar values, array values
143 or other dictionary values.
145 Notes
146 -----
147 Unlike `dict()`, this method hides the model layout and combines
148 scalars, arrays, and other metadata in the same dictionary. Can be
149 used when a simple dictionary is needed. Use
150 `TaskMetadata.from_dict()` to convert it back.
151 """
152 d: Dict[str, Any] = {}
153 d.update(self.scalars)
154 d.update(self.arrays)
155 for k, v in self.metadata.items():
156 d[k] = v.to_dict()
157 return d
159 def add(self, name: str, value: Any) -> None:
160 """Store a new value, adding to a list if one already exists.
162 Parameters
163 ----------
164 name : `str`
165 Name of the metadata property.
166 value
167 Metadata property value.
168 """
169 keys = self._getKeys(name)
170 key0 = keys.pop(0)
171 if len(keys) == 0:
173 # If add() is being used, always store the value in the arrays
174 # property as a list. It's likely there will be another call.
175 slot_type, value = self._validate_value(value)
176 if slot_type == "array":
177 pass
178 elif slot_type == "scalar":
179 value = [value]
180 else:
181 raise ValueError("add() can only be used for primitive types or sequences of those types.")
183 if key0 in self.metadata:
184 raise ValueError(f"Can not add() to key '{name}' since that is a TaskMetadata")
186 if key0 in self.scalars:
187 # Convert scalar to array.
188 # MyPy should be able to figure out that List[Union[T1, T2]] is
189 # compatible with Union[List[T1], List[T2]] if the list has
190 # only one element, but it can't.
191 self.arrays[key0] = [self.scalars.pop(key0)] # type: ignore
193 if key0 in self.arrays:
194 # Check that the type is not changing.
195 if (curtype := type(self.arrays[key0][0])) is not (newtype := type(value[0])):
196 raise ValueError(f"Type mismatch in add() -- currently {curtype} but adding {newtype}")
197 self.arrays[key0].extend(value)
198 else:
199 self.arrays[key0] = value
201 return
203 self.metadata[key0].add(".".join(keys), value)
205 @deprecated(
206 reason="Cast the return value to float explicitly. " + _DEPRECATION_REASON,
207 version=_DEPRECATION_VERSION,
208 category=FutureWarning,
209 )
210 def getAsDouble(self, key: str) -> float:
211 """Return the value cast to a `float`.
213 Parameters
214 ----------
215 key : `str`
216 Item to return. Can be dot-separated hierarchical.
218 Returns
219 -------
220 value : `float`
221 The value cast to a `float`.
223 Raises
224 ------
225 KeyError
226 Raised if the item is not found.
227 """
228 return float(self.__getitem__(key))
230 def getScalar(self, key: str) -> Union[str, int, float, bool]:
231 """Retrieve a scalar item even if the item is a list.
233 Parameters
234 ----------
235 key : `str`
236 Item to retrieve.
238 Returns
239 -------
240 value : `str`, `int`, `float`, or `bool`
241 Either the value associated with the key or, if the key
242 corresponds to a list, the last item in the list.
244 Raises
245 ------
246 KeyError
247 Raised if the item is not found.
248 """
249 # Used in pipe_tasks.
250 # getScalar() is the default behavior for __getitem__.
251 return self[key]
253 def getArray(self, key: str) -> List[Any]:
254 """Retrieve an item as a list even if it is a scalar.
256 Parameters
257 ----------
258 key : `str`
259 Item to retrieve.
261 Returns
262 -------
263 values : `list` of any
264 A list containing the value or values associated with this item.
266 Raises
267 ------
268 KeyError
269 Raised if the item is not found.
270 """
271 keys = self._getKeys(key)
272 key0 = keys.pop(0)
273 if len(keys) == 0:
274 if key0 in self.arrays:
275 return self.arrays[key0]
276 elif key0 in self.scalars:
277 return [self.scalars[key0]]
278 elif key0 in self.metadata:
279 return [self.metadata[key0]]
280 raise KeyError(f"'{key}' not found")
282 try:
283 return self.metadata[key0].getArray(".".join(keys))
284 except KeyError:
285 # Report the correct key.
286 raise KeyError(f"'{key}' not found") from None
288 def names(self, topLevelOnly: bool = True) -> Set[str]:
289 """Return the hierarchical keys from the metadata.
291 Parameters
292 ----------
293 topLevelOnly : `bool`
294 If true, return top-level keys, otherwise full metadata item keys.
296 Returns
297 -------
298 names : `collection.abc.Set`
299 A set of top-level keys or full metadata item keys, including
300 the top-level keys.
302 Notes
303 -----
304 Should never be called in new code with ``topLevelOnly`` set to `True`
305 -- this is equivalent to asking for the keys and is the default
306 when iterating through the task metadata. In this case a deprecation
307 message will be issued and the ability will raise an exception
308 in a future release.
310 When ``topLevelOnly`` is `False` all keys, including those from the
311 hierarchy and the top-level hierarchy, are returned.
312 """
313 if topLevelOnly:
314 warnings.warn("Use keys() instead. " + _DEPRECATION_REASON, FutureWarning)
315 return set(self.keys())
316 else:
317 names = set()
318 for k, v in self.items():
319 names.add(k) # Always include the current level
320 if isinstance(v, TaskMetadata):
321 names.update({k + "." + item for item in v.names(topLevelOnly=topLevelOnly)})
322 return names
324 def paramNames(self, topLevelOnly: bool) -> Set[str]:
325 """Return hierarchical names.
327 Parameters
328 ----------
329 topLevelOnly : `bool`
330 Control whether only top-level items are returned or items
331 from the hierarchy.
333 Returns
334 -------
335 paramNames : `set` of `str`
336 If ``topLevelOnly`` is `True`, returns any keys that are not
337 part of a hierarchy. If `False` also returns fully-qualified
338 names from the hierarchy. Keys associated with the top
339 of a hierarchy are never returned.
340 """
341 # Currently used by the verify package.
342 paramNames = set()
343 for k, v in self.items():
344 if isinstance(v, TaskMetadata):
345 if not topLevelOnly:
346 paramNames.update({k + "." + item for item in v.paramNames(topLevelOnly=topLevelOnly)})
347 else:
348 paramNames.add(k)
349 return paramNames
351 @deprecated(
352 reason="Use standard assignment syntax. " + _DEPRECATION_REASON,
353 version=_DEPRECATION_VERSION,
354 category=FutureWarning,
355 )
356 def set(self, key: str, item: Any) -> None:
357 """Set the value of the supplied key."""
358 self.__setitem__(key, item)
360 @deprecated(
361 reason="Use standard del dict syntax. " + _DEPRECATION_REASON,
362 version=_DEPRECATION_VERSION,
363 category=FutureWarning,
364 )
365 def remove(self, key: str) -> None:
366 """Remove the item without raising if absent."""
367 try:
368 self.__delitem__(key)
369 except KeyError:
370 # The PropertySet.remove() should always work.
371 pass
373 @staticmethod
374 def _getKeys(key: str) -> List[str]:
375 """Return the key hierarchy.
377 Parameters
378 ----------
379 key : `str`
380 The key to analyze. Can be dot-separated.
382 Returns
383 -------
384 keys : `list` of `str`
385 The key hierarchy that has been split on ``.``.
387 Raises
388 ------
389 KeyError
390 Raised if the key is not a string.
391 """
392 try:
393 keys = key.split(".")
394 except Exception:
395 raise KeyError(f"Invalid key '{key}': only string keys are allowed") from None
396 return keys
398 def keys(self) -> Tuple[str, ...]:
399 """Return the top-level keys."""
400 return tuple(k for k in self)
402 def items(self) -> Iterator[Tuple[str, Any]]:
403 """Yield the top-level keys and values."""
404 for k, v in itertools.chain(self.scalars.items(), self.arrays.items(), self.metadata.items()):
405 yield (k, v)
407 def __len__(self) -> int:
408 """Return the number of items."""
409 return len(self.scalars) + len(self.arrays) + len(self.metadata)
411 # This is actually a Liskov substitution violation, because
412 # pydantic.BaseModel says __iter__ should return something else. But the
413 # pydantic docs say to do exactly this to in order to make a mapping-like
414 # BaseModel, so that's what we do.
415 def __iter__(self) -> Iterator[str]: # type: ignore
416 """Return an iterator over each key."""
417 # The order of keys is not preserved since items can move
418 # from scalar to array.
419 return itertools.chain(iter(self.scalars), iter(self.arrays), iter(self.metadata))
421 def __getitem__(self, key: str) -> Any:
422 """Retrieve the item associated with the key.
424 Parameters
425 ----------
426 key : `str`
427 The key to retrieve. Can be dot-separated hierarchical.
429 Returns
430 -------
431 value : `TaskMetadata`, `float`, `int`, `bool`, `str`
432 A scalar value. For compatibility with ``PropertySet``, if the key
433 refers to an array, the final element is returned and not the
434 array itself.
436 Raises
437 ------
438 KeyError
439 Raised if the item is not found.
440 """
441 keys = self._getKeys(key)
442 key0 = keys.pop(0)
443 if len(keys) == 0:
444 if key0 in self.scalars:
445 return self.scalars[key0]
446 if key0 in self.metadata:
447 return self.metadata[key0]
448 if key0 in self.arrays:
449 return self.arrays[key0][-1]
450 raise KeyError(f"'{key}' not found")
451 # Hierarchical lookup so the top key can only be in the metadata
452 # property. Trap KeyError and reraise so that the correct key
453 # in the hierarchy is reported.
454 try:
455 # And forward request to that metadata.
456 return self.metadata[key0][".".join(keys)]
457 except KeyError:
458 raise KeyError(f"'{key}' not found") from None
460 def __setitem__(self, key: str, item: Any) -> None:
461 """Store the given item."""
462 keys = self._getKeys(key)
463 key0 = keys.pop(0)
464 if len(keys) == 0:
465 slots: Dict[str, Dict[str, Any]] = {
466 "array": self.arrays,
467 "scalar": self.scalars,
468 "metadata": self.metadata,
469 }
470 primary: Optional[Dict[str, Any]] = None
471 slot_type, item = self._validate_value(item)
472 primary = slots.pop(slot_type, None)
473 if primary is None:
474 raise AssertionError(f"Unknown slot type returned from validator: {slot_type}")
476 # Assign the value to the right place.
477 primary[key0] = item
478 for property in slots.values():
479 # Remove any other entries.
480 property.pop(key0, None)
481 return
483 # This must be hierarchical so forward to the child TaskMetadata.
484 if key0 not in self.metadata:
485 self.metadata[key0] = TaskMetadata()
486 self.metadata[key0][".".join(keys)] = item
488 # Ensure we have cleared out anything with the same name elsewhere.
489 self.scalars.pop(key0, None)
490 self.arrays.pop(key0, None)
492 def __contains__(self, key: str) -> bool:
493 """Determine if the key exists."""
494 keys = self._getKeys(key)
495 key0 = keys.pop(0)
496 if len(keys) == 0:
497 return key0 in self.scalars or key0 in self.arrays or key0 in self.metadata
499 if key0 in self.metadata:
500 return ".".join(keys) in self.metadata[key0]
501 return False
503 def __delitem__(self, key: str) -> None:
504 """Remove the specified item.
506 Raises
507 ------
508 KeyError
509 Raised if the item is not present.
510 """
511 keys = self._getKeys(key)
512 key0 = keys.pop(0)
513 if len(keys) == 0:
514 # MyPy can't figure out that this way to combine the types in the
515 # tuple is the one that matters, and annotating a local variable
516 # helps it out.
517 properties: Tuple[Dict[str, Any], ...] = (self.scalars, self.arrays, self.metadata)
518 for property in properties:
519 if key0 in property:
520 del property[key0]
521 return
522 raise KeyError(f"'{key}' not found'")
524 try:
525 del self.metadata[key0][".".join(keys)]
526 except KeyError:
527 # Report the correct key.
528 raise KeyError(f"'{key}' not found'") from None
530 def _validate_value(self, value: Any) -> Tuple[str, Any]:
531 """Validate the given value.
533 Parameters
534 ----------
535 value : Any
536 Value to check.
538 Returns
539 -------
540 slot_type : `str`
541 The type of value given. Options are "scalar", "array", "metadata".
542 item : Any
543 The item that was given but possibly modified to conform to
544 the slot type.
546 Raises
547 ------
548 ValueError
549 Raised if the value is not a recognized type.
550 """
551 # Test the simplest option first.
552 value_type = type(value)
553 if value_type in _ALLOWED_PRIMITIVE_TYPES:
554 return "scalar", value
556 if isinstance(value, TaskMetadata):
557 return "metadata", value
558 if isinstance(value, Mapping):
559 return "metadata", self.from_dict(value)
561 if _isListLike(value):
562 # For model consistency, need to check that every item in the
563 # list has the same type.
564 value = list(value)
566 type0 = type(value[0])
567 for i in value:
568 if type(i) != type0:
569 raise ValueError(
570 "Type mismatch in supplied list. TaskMetadata requires all"
571 f" elements have same type but see {type(i)} and {type0}."
572 )
574 if type0 not in _ALLOWED_PRIMITIVE_TYPES:
575 # Must check to see if we got numpy floats or something.
576 type_cast: type
577 if isinstance(value[0], numbers.Integral):
578 type_cast = int
579 elif isinstance(value[0], numbers.Real):
580 type_cast = float
581 else:
582 raise ValueError(
583 f"Supplied list has element of type '{type0}'. "
584 "TaskMetadata can only accept primitive types in lists."
585 )
587 value = [type_cast(v) for v in value]
589 return "array", value
591 # Sometimes a numpy number is given.
592 if isinstance(value, numbers.Integral):
593 value = int(value)
594 return "scalar", value
595 if isinstance(value, numbers.Real):
596 value = float(value)
597 return "scalar", value
599 raise ValueError(f"TaskMetadata does not support values of type {value!r}.")
602# Needed because a TaskMetadata can contain a TaskMetadata.
603TaskMetadata.update_forward_refs()