Coverage for python/lsst/pipe/base/_task_metadata.py: 13%
206 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-09 09:17 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-09 09:17 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["TaskMetadata"]
24import itertools
25import numbers
26import warnings
27from collections.abc import Sequence
28from typing import Any, Collection, Dict, Iterator, List, Mapping, Optional, Protocol, Set, Tuple, Union
30from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr
32_DEPRECATION_REASON = "Will be removed after v25."
33_DEPRECATION_VERSION = "v24"
35# The types allowed in a Task metadata field are restricted
36# to allow predictable serialization.
37_ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool)
40class PropertySetLike(Protocol):
41 """Protocol that looks like a ``lsst.daf.base.PropertySet``
43 Enough of the API is specified to support conversion of a
44 ``PropertySet`` to a `TaskMetadata`.
45 """
47 def paramNames(self, topLevelOnly: bool = True) -> Collection[str]:
48 ...
50 def getArray(self, name: str) -> Any:
51 ...
54def _isListLike(v: Any) -> bool:
55 return isinstance(v, Sequence) and not isinstance(v, str)
58class TaskMetadata(BaseModel):
59 """Dict-like object for storing task metadata.
61 Metadata can be stored at two levels: single task or task plus subtasks.
62 The later is called full metadata of a task and has a form
64 topLevelTaskName:subtaskName:subsubtaskName.itemName
66 Metadata item key of a task (`itemName` above) must not contain `.`,
67 which serves as a separator in full metadata keys and turns
68 the value into sub-dictionary. Arbitrary hierarchies are supported.
69 """
71 scalars: Dict[str, Union[StrictFloat, StrictInt, StrictBool, StrictStr]] = Field(default_factory=dict)
72 arrays: Dict[str, Union[List[StrictFloat], List[StrictInt], List[StrictBool], List[StrictStr]]] = Field(
73 default_factory=dict
74 )
75 metadata: Dict[str, "TaskMetadata"] = Field(default_factory=dict)
77 @classmethod
78 def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata":
79 """Create a TaskMetadata from a dictionary.
81 Parameters
82 ----------
83 d : `Mapping`
84 Mapping to convert. Can be hierarchical. Any dictionaries
85 in the hierarchy are converted to `TaskMetadata`.
87 Returns
88 -------
89 meta : `TaskMetadata`
90 Newly-constructed metadata.
91 """
92 metadata = cls()
93 for k, v in d.items():
94 metadata[k] = v
95 return metadata
97 @classmethod
98 def from_metadata(cls, ps: PropertySetLike) -> "TaskMetadata":
99 """Create a TaskMetadata from a PropertySet-like object.
101 Parameters
102 ----------
103 ps : `PropertySetLike` or `TaskMetadata`
104 A ``PropertySet``-like object to be transformed to a
105 `TaskMetadata`. A `TaskMetadata` can be copied using this
106 class method.
108 Returns
109 -------
110 tm : `TaskMetadata`
111 Newly-constructed metadata.
113 Notes
114 -----
115 Items stored in single-element arrays in the supplied object
116 will be converted to scalars in the newly-created object.
117 """
118 # Use hierarchical names to assign values from input to output.
119 # This API exists for both PropertySet and TaskMetadata.
120 # from_dict() does not work because PropertySet is not declared
121 # to be a Mapping.
122 # PropertySet.toDict() is not present in TaskMetadata so is best
123 # avoided.
124 metadata = cls()
125 for key in sorted(ps.paramNames(topLevelOnly=False)):
126 value = ps.getArray(key)
127 if len(value) == 1:
128 value = value[0]
129 metadata[key] = value
130 return metadata
132 def to_dict(self) -> Dict[str, Any]:
133 """Convert the class to a simple dictionary.
135 Returns
136 -------
137 d : `dict`
138 Simple dictionary that can contain scalar values, array values
139 or other dictionary values.
141 Notes
142 -----
143 Unlike `dict()`, this method hides the model layout and combines
144 scalars, arrays, and other metadata in the same dictionary. Can be
145 used when a simple dictionary is needed. Use
146 `TaskMetadata.from_dict()` to convert it back.
147 """
148 d: Dict[str, Any] = {}
149 d.update(self.scalars)
150 d.update(self.arrays)
151 for k, v in self.metadata.items():
152 d[k] = v.to_dict()
153 return d
155 def add(self, name: str, value: Any) -> None:
156 """Store a new value, adding to a list if one already exists.
158 Parameters
159 ----------
160 name : `str`
161 Name of the metadata property.
162 value
163 Metadata property value.
164 """
165 keys = self._getKeys(name)
166 key0 = keys.pop(0)
167 if len(keys) == 0:
168 # If add() is being used, always store the value in the arrays
169 # property as a list. It's likely there will be another call.
170 slot_type, value = self._validate_value(value)
171 if slot_type == "array":
172 pass
173 elif slot_type == "scalar":
174 value = [value]
175 else:
176 raise ValueError("add() can only be used for primitive types or sequences of those types.")
178 if key0 in self.metadata:
179 raise ValueError(f"Can not add() to key '{name}' since that is a TaskMetadata")
181 if key0 in self.scalars:
182 # Convert scalar to array.
183 # MyPy should be able to figure out that List[Union[T1, T2]] is
184 # compatible with Union[List[T1], List[T2]] if the list has
185 # only one element, but it can't.
186 self.arrays[key0] = [self.scalars.pop(key0)] # type: ignore
188 if key0 in self.arrays:
189 # Check that the type is not changing.
190 if (curtype := type(self.arrays[key0][0])) is not (newtype := type(value[0])):
191 raise ValueError(f"Type mismatch in add() -- currently {curtype} but adding {newtype}")
192 self.arrays[key0].extend(value)
193 else:
194 self.arrays[key0] = value
196 return
198 self.metadata[key0].add(".".join(keys), value)
200 def getScalar(self, key: str) -> Union[str, int, float, bool]:
201 """Retrieve a scalar item even if the item is a list.
203 Parameters
204 ----------
205 key : `str`
206 Item to retrieve.
208 Returns
209 -------
210 value : `str`, `int`, `float`, or `bool`
211 Either the value associated with the key or, if the key
212 corresponds to a list, the last item in the list.
214 Raises
215 ------
216 KeyError
217 Raised if the item is not found.
218 """
219 # Used in pipe_tasks.
220 # getScalar() is the default behavior for __getitem__.
221 return self[key]
223 def getArray(self, key: str) -> List[Any]:
224 """Retrieve an item as a list even if it is a scalar.
226 Parameters
227 ----------
228 key : `str`
229 Item to retrieve.
231 Returns
232 -------
233 values : `list` of any
234 A list containing the value or values associated with this item.
236 Raises
237 ------
238 KeyError
239 Raised if the item is not found.
240 """
241 keys = self._getKeys(key)
242 key0 = keys.pop(0)
243 if len(keys) == 0:
244 if key0 in self.arrays:
245 return self.arrays[key0]
246 elif key0 in self.scalars:
247 return [self.scalars[key0]]
248 elif key0 in self.metadata:
249 return [self.metadata[key0]]
250 raise KeyError(f"'{key}' not found")
252 try:
253 return self.metadata[key0].getArray(".".join(keys))
254 except KeyError:
255 # Report the correct key.
256 raise KeyError(f"'{key}' not found") from None
258 def names(self, topLevelOnly: bool = True) -> Set[str]:
259 """Return the hierarchical keys from the metadata.
261 Parameters
262 ----------
263 topLevelOnly : `bool`
264 If true, return top-level keys, otherwise full metadata item keys.
266 Returns
267 -------
268 names : `collection.abc.Set`
269 A set of top-level keys or full metadata item keys, including
270 the top-level keys.
272 Notes
273 -----
274 Should never be called in new code with ``topLevelOnly`` set to `True`
275 -- this is equivalent to asking for the keys and is the default
276 when iterating through the task metadata. In this case a deprecation
277 message will be issued and the ability will raise an exception
278 in a future release.
280 When ``topLevelOnly`` is `False` all keys, including those from the
281 hierarchy and the top-level hierarchy, are returned.
282 """
283 if topLevelOnly:
284 warnings.warn("Use keys() instead. " + _DEPRECATION_REASON, FutureWarning)
285 return set(self.keys())
286 else:
287 names = set()
288 for k, v in self.items():
289 names.add(k) # Always include the current level
290 if isinstance(v, TaskMetadata):
291 names.update({k + "." + item for item in v.names(topLevelOnly=topLevelOnly)})
292 return names
294 def paramNames(self, topLevelOnly: bool) -> Set[str]:
295 """Return hierarchical names.
297 Parameters
298 ----------
299 topLevelOnly : `bool`
300 Control whether only top-level items are returned or items
301 from the hierarchy.
303 Returns
304 -------
305 paramNames : `set` of `str`
306 If ``topLevelOnly`` is `True`, returns any keys that are not
307 part of a hierarchy. If `False` also returns fully-qualified
308 names from the hierarchy. Keys associated with the top
309 of a hierarchy are never returned.
310 """
311 # Currently used by the verify package.
312 paramNames = set()
313 for k, v in self.items():
314 if isinstance(v, TaskMetadata):
315 if not topLevelOnly:
316 paramNames.update({k + "." + item for item in v.paramNames(topLevelOnly=topLevelOnly)})
317 else:
318 paramNames.add(k)
319 return paramNames
321 @staticmethod
322 def _getKeys(key: str) -> List[str]:
323 """Return the key hierarchy.
325 Parameters
326 ----------
327 key : `str`
328 The key to analyze. Can be dot-separated.
330 Returns
331 -------
332 keys : `list` of `str`
333 The key hierarchy that has been split on ``.``.
335 Raises
336 ------
337 KeyError
338 Raised if the key is not a string.
339 """
340 try:
341 keys = key.split(".")
342 except Exception:
343 raise KeyError(f"Invalid key '{key}': only string keys are allowed") from None
344 return keys
346 def keys(self) -> Tuple[str, ...]:
347 """Return the top-level keys."""
348 return tuple(k for k in self)
350 def items(self) -> Iterator[Tuple[str, Any]]:
351 """Yield the top-level keys and values."""
352 for k, v in itertools.chain(self.scalars.items(), self.arrays.items(), self.metadata.items()):
353 yield (k, v)
355 def __len__(self) -> int:
356 """Return the number of items."""
357 return len(self.scalars) + len(self.arrays) + len(self.metadata)
359 # This is actually a Liskov substitution violation, because
360 # pydantic.BaseModel says __iter__ should return something else. But the
361 # pydantic docs say to do exactly this to in order to make a mapping-like
362 # BaseModel, so that's what we do.
363 def __iter__(self) -> Iterator[str]: # type: ignore
364 """Return an iterator over each key."""
365 # The order of keys is not preserved since items can move
366 # from scalar to array.
367 return itertools.chain(iter(self.scalars), iter(self.arrays), iter(self.metadata))
369 def __getitem__(self, key: str) -> Any:
370 """Retrieve the item associated with the key.
372 Parameters
373 ----------
374 key : `str`
375 The key to retrieve. Can be dot-separated hierarchical.
377 Returns
378 -------
379 value : `TaskMetadata`, `float`, `int`, `bool`, `str`
380 A scalar value. For compatibility with ``PropertySet``, if the key
381 refers to an array, the final element is returned and not the
382 array itself.
384 Raises
385 ------
386 KeyError
387 Raised if the item is not found.
388 """
389 keys = self._getKeys(key)
390 key0 = keys.pop(0)
391 if len(keys) == 0:
392 if key0 in self.scalars:
393 return self.scalars[key0]
394 if key0 in self.metadata:
395 return self.metadata[key0]
396 if key0 in self.arrays:
397 return self.arrays[key0][-1]
398 raise KeyError(f"'{key}' not found")
399 # Hierarchical lookup so the top key can only be in the metadata
400 # property. Trap KeyError and reraise so that the correct key
401 # in the hierarchy is reported.
402 try:
403 # And forward request to that metadata.
404 return self.metadata[key0][".".join(keys)]
405 except KeyError:
406 raise KeyError(f"'{key}' not found") from None
408 def get(self, key: str, default: Any = None) -> Any:
409 """Retrieve the item associated with the key or a default.
411 Parameters
412 ----------
413 key : `str`
414 The key to retrieve. Can be dot-separated hierarchical.
415 default
416 The value to return if the key doesnot exist.
418 Returns
419 -------
420 value : `TaskMetadata`, `float`, `int`, `bool`, `str`
421 A scalar value. If the key refers to an array, the final element
422 is returned and not the array itself; this is consistent with
423 `__getitem__` and `PropertySet.get`, but not ``to_dict().get``.
424 """
425 try:
426 return self[key]
427 except KeyError:
428 return default
430 def __setitem__(self, key: str, item: Any) -> None:
431 """Store the given item."""
432 keys = self._getKeys(key)
433 key0 = keys.pop(0)
434 if len(keys) == 0:
435 slots: Dict[str, Dict[str, Any]] = {
436 "array": self.arrays,
437 "scalar": self.scalars,
438 "metadata": self.metadata,
439 }
440 primary: Optional[Dict[str, Any]] = None
441 slot_type, item = self._validate_value(item)
442 primary = slots.pop(slot_type, None)
443 if primary is None:
444 raise AssertionError(f"Unknown slot type returned from validator: {slot_type}")
446 # Assign the value to the right place.
447 primary[key0] = item
448 for property in slots.values():
449 # Remove any other entries.
450 property.pop(key0, None)
451 return
453 # This must be hierarchical so forward to the child TaskMetadata.
454 if key0 not in self.metadata:
455 self.metadata[key0] = TaskMetadata()
456 self.metadata[key0][".".join(keys)] = item
458 # Ensure we have cleared out anything with the same name elsewhere.
459 self.scalars.pop(key0, None)
460 self.arrays.pop(key0, None)
462 def __contains__(self, key: str) -> bool:
463 """Determine if the key exists."""
464 keys = self._getKeys(key)
465 key0 = keys.pop(0)
466 if len(keys) == 0:
467 return key0 in self.scalars or key0 in self.arrays or key0 in self.metadata
469 if key0 in self.metadata:
470 return ".".join(keys) in self.metadata[key0]
471 return False
473 def __delitem__(self, key: str) -> None:
474 """Remove the specified item.
476 Raises
477 ------
478 KeyError
479 Raised if the item is not present.
480 """
481 keys = self._getKeys(key)
482 key0 = keys.pop(0)
483 if len(keys) == 0:
484 # MyPy can't figure out that this way to combine the types in the
485 # tuple is the one that matters, and annotating a local variable
486 # helps it out.
487 properties: Tuple[Dict[str, Any], ...] = (self.scalars, self.arrays, self.metadata)
488 for property in properties:
489 if key0 in property:
490 del property[key0]
491 return
492 raise KeyError(f"'{key}' not found'")
494 try:
495 del self.metadata[key0][".".join(keys)]
496 except KeyError:
497 # Report the correct key.
498 raise KeyError(f"'{key}' not found'") from None
500 def _validate_value(self, value: Any) -> Tuple[str, Any]:
501 """Validate the given value.
503 Parameters
504 ----------
505 value : Any
506 Value to check.
508 Returns
509 -------
510 slot_type : `str`
511 The type of value given. Options are "scalar", "array", "metadata".
512 item : Any
513 The item that was given but possibly modified to conform to
514 the slot type.
516 Raises
517 ------
518 ValueError
519 Raised if the value is not a recognized type.
520 """
521 # Test the simplest option first.
522 value_type = type(value)
523 if value_type in _ALLOWED_PRIMITIVE_TYPES:
524 return "scalar", value
526 if isinstance(value, TaskMetadata):
527 return "metadata", value
528 if isinstance(value, Mapping):
529 return "metadata", self.from_dict(value)
531 if _isListLike(value):
532 # For model consistency, need to check that every item in the
533 # list has the same type.
534 value = list(value)
536 type0 = type(value[0])
537 for i in value:
538 if type(i) != type0:
539 raise ValueError(
540 "Type mismatch in supplied list. TaskMetadata requires all"
541 f" elements have same type but see {type(i)} and {type0}."
542 )
544 if type0 not in _ALLOWED_PRIMITIVE_TYPES:
545 # Must check to see if we got numpy floats or something.
546 type_cast: type
547 if isinstance(value[0], numbers.Integral):
548 type_cast = int
549 elif isinstance(value[0], numbers.Real):
550 type_cast = float
551 else:
552 raise ValueError(
553 f"Supplied list has element of type '{type0}'. "
554 "TaskMetadata can only accept primitive types in lists."
555 )
557 value = [type_cast(v) for v in value]
559 return "array", value
561 # Sometimes a numpy number is given.
562 if isinstance(value, numbers.Integral):
563 value = int(value)
564 return "scalar", value
565 if isinstance(value, numbers.Real):
566 value = float(value)
567 return "scalar", value
569 raise ValueError(f"TaskMetadata does not support values of type {value!r}.")
572# Needed because a TaskMetadata can contain a TaskMetadata.
573TaskMetadata.update_forward_refs()