Coverage for python/lsst/pipe/base/_task_metadata.py: 14%
205 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-15 02:49 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-15 02:49 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["TaskMetadata"]
24import itertools
25import numbers
26import warnings
27from collections.abc import Collection, Iterator, Mapping, Sequence
28from typing import Any, Protocol
30from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr
32_DEPRECATION_REASON = "Will be removed after v25."
33_DEPRECATION_VERSION = "v24"
35# The types allowed in a Task metadata field are restricted
36# to allow predictable serialization.
37_ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool)
40class PropertySetLike(Protocol):
41 """Protocol that looks like a ``lsst.daf.base.PropertySet``
43 Enough of the API is specified to support conversion of a
44 ``PropertySet`` to a `TaskMetadata`.
45 """
47 def paramNames(self, topLevelOnly: bool = True) -> Collection[str]:
48 ...
50 def getArray(self, name: str) -> Any:
51 ...
54def _isListLike(v: Any) -> bool:
55 return isinstance(v, Sequence) and not isinstance(v, str)
58class TaskMetadata(BaseModel):
59 """Dict-like object for storing task metadata.
61 Metadata can be stored at two levels: single task or task plus subtasks.
62 The later is called full metadata of a task and has a form
64 topLevelTaskName:subtaskName:subsubtaskName.itemName
66 Metadata item key of a task (`itemName` above) must not contain `.`,
67 which serves as a separator in full metadata keys and turns
68 the value into sub-dictionary. Arbitrary hierarchies are supported.
69 """
71 scalars: dict[str, StrictFloat | StrictInt | StrictBool | StrictStr] = Field(default_factory=dict)
72 arrays: dict[str, list[StrictFloat] | list[StrictInt] | list[StrictBool] | list[StrictStr]] = Field(
73 default_factory=dict
74 )
75 metadata: dict[str, "TaskMetadata"] = Field(default_factory=dict)
77 @classmethod
78 def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata":
79 """Create a TaskMetadata from a dictionary.
81 Parameters
82 ----------
83 d : `~collections.abc.Mapping`
84 Mapping to convert. Can be hierarchical. Any dictionaries
85 in the hierarchy are converted to `TaskMetadata`.
87 Returns
88 -------
89 meta : `TaskMetadata`
90 Newly-constructed metadata.
91 """
92 metadata = cls()
93 for k, v in d.items():
94 metadata[k] = v
95 return metadata
97 @classmethod
98 def from_metadata(cls, ps: PropertySetLike) -> "TaskMetadata":
99 """Create a TaskMetadata from a PropertySet-like object.
101 Parameters
102 ----------
103 ps : `PropertySetLike` or `TaskMetadata`
104 A ``PropertySet``-like object to be transformed to a
105 `TaskMetadata`. A `TaskMetadata` can be copied using this
106 class method.
108 Returns
109 -------
110 tm : `TaskMetadata`
111 Newly-constructed metadata.
113 Notes
114 -----
115 Items stored in single-element arrays in the supplied object
116 will be converted to scalars in the newly-created object.
117 """
118 # Use hierarchical names to assign values from input to output.
119 # This API exists for both PropertySet and TaskMetadata.
120 # from_dict() does not work because PropertySet is not declared
121 # to be a Mapping.
122 # PropertySet.toDict() is not present in TaskMetadata so is best
123 # avoided.
124 metadata = cls()
125 for key in sorted(ps.paramNames(topLevelOnly=False)):
126 value = ps.getArray(key)
127 if len(value) == 1:
128 value = value[0]
129 metadata[key] = value
130 return metadata
132 def to_dict(self) -> dict[str, Any]:
133 """Convert the class to a simple dictionary.
135 Returns
136 -------
137 d : `dict`
138 Simple dictionary that can contain scalar values, array values
139 or other dictionary values.
141 Notes
142 -----
143 Unlike `dict()`, this method hides the model layout and combines
144 scalars, arrays, and other metadata in the same dictionary. Can be
145 used when a simple dictionary is needed. Use
146 `TaskMetadata.from_dict()` to convert it back.
147 """
148 d: dict[str, Any] = {}
149 d.update(self.scalars)
150 d.update(self.arrays)
151 for k, v in self.metadata.items():
152 d[k] = v.to_dict()
153 return d
155 def add(self, name: str, value: Any) -> None:
156 """Store a new value, adding to a list if one already exists.
158 Parameters
159 ----------
160 name : `str`
161 Name of the metadata property.
162 value
163 Metadata property value.
164 """
165 keys = self._getKeys(name)
166 key0 = keys.pop(0)
167 if len(keys) == 0:
168 # If add() is being used, always store the value in the arrays
169 # property as a list. It's likely there will be another call.
170 slot_type, value = self._validate_value(value)
171 if slot_type == "array":
172 pass
173 elif slot_type == "scalar":
174 value = [value]
175 else:
176 raise ValueError("add() can only be used for primitive types or sequences of those types.")
178 if key0 in self.metadata:
179 raise ValueError(f"Can not add() to key '{name}' since that is a TaskMetadata")
181 if key0 in self.scalars:
182 # Convert scalar to array.
183 # MyPy should be able to figure out that List[Union[T1, T2]] is
184 # compatible with Union[List[T1], List[T2]] if the list has
185 # only one element, but it can't.
186 self.arrays[key0] = [self.scalars.pop(key0)] # type: ignore
188 if key0 in self.arrays:
189 # Check that the type is not changing.
190 if (curtype := type(self.arrays[key0][0])) is not (newtype := type(value[0])):
191 raise ValueError(f"Type mismatch in add() -- currently {curtype} but adding {newtype}")
192 self.arrays[key0].extend(value)
193 else:
194 self.arrays[key0] = value
196 return
198 self.metadata[key0].add(".".join(keys), value)
200 def getScalar(self, key: str) -> str | int | float | bool:
201 """Retrieve a scalar item even if the item is a list.
203 Parameters
204 ----------
205 key : `str`
206 Item to retrieve.
208 Returns
209 -------
210 value : `str`, `int`, `float`, or `bool`
211 Either the value associated with the key or, if the key
212 corresponds to a list, the last item in the list.
214 Raises
215 ------
216 KeyError
217 Raised if the item is not found.
218 """
219 # Used in pipe_tasks.
220 # getScalar() is the default behavior for __getitem__.
221 return self[key]
223 def getArray(self, key: str) -> list[Any]:
224 """Retrieve an item as a list even if it is a scalar.
226 Parameters
227 ----------
228 key : `str`
229 Item to retrieve.
231 Returns
232 -------
233 values : `list` of any
234 A list containing the value or values associated with this item.
236 Raises
237 ------
238 KeyError
239 Raised if the item is not found.
240 """
241 keys = self._getKeys(key)
242 key0 = keys.pop(0)
243 if len(keys) == 0:
244 if key0 in self.arrays:
245 return self.arrays[key0]
246 elif key0 in self.scalars:
247 return [self.scalars[key0]]
248 elif key0 in self.metadata:
249 return [self.metadata[key0]]
250 raise KeyError(f"'{key}' not found")
252 try:
253 return self.metadata[key0].getArray(".".join(keys))
254 except KeyError:
255 # Report the correct key.
256 raise KeyError(f"'{key}' not found") from None
258 def names(self, topLevelOnly: bool = True) -> set[str]:
259 """Return the hierarchical keys from the metadata.
261 Parameters
262 ----------
263 topLevelOnly : `bool`
264 If true, return top-level keys, otherwise full metadata item keys.
266 Returns
267 -------
268 names : `collection.abc.Set`
269 A set of top-level keys or full metadata item keys, including
270 the top-level keys.
272 Notes
273 -----
274 Should never be called in new code with ``topLevelOnly`` set to `True`
275 -- this is equivalent to asking for the keys and is the default
276 when iterating through the task metadata. In this case a deprecation
277 message will be issued and the ability will raise an exception
278 in a future release.
280 When ``topLevelOnly`` is `False` all keys, including those from the
281 hierarchy and the top-level hierarchy, are returned.
282 """
283 if topLevelOnly:
284 warnings.warn("Use keys() instead. " + _DEPRECATION_REASON, FutureWarning)
285 return set(self.keys())
286 else:
287 names = set()
288 for k, v in self.items():
289 names.add(k) # Always include the current level
290 if isinstance(v, TaskMetadata):
291 names.update({k + "." + item for item in v.names(topLevelOnly=topLevelOnly)})
292 return names
294 def paramNames(self, topLevelOnly: bool) -> set[str]:
295 """Return hierarchical names.
297 Parameters
298 ----------
299 topLevelOnly : `bool`
300 Control whether only top-level items are returned or items
301 from the hierarchy.
303 Returns
304 -------
305 paramNames : `set` of `str`
306 If ``topLevelOnly`` is `True`, returns any keys that are not
307 part of a hierarchy. If `False` also returns fully-qualified
308 names from the hierarchy. Keys associated with the top
309 of a hierarchy are never returned.
310 """
311 # Currently used by the verify package.
312 paramNames = set()
313 for k, v in self.items():
314 if isinstance(v, TaskMetadata):
315 if not topLevelOnly:
316 paramNames.update({k + "." + item for item in v.paramNames(topLevelOnly=topLevelOnly)})
317 else:
318 paramNames.add(k)
319 return paramNames
321 @staticmethod
322 def _getKeys(key: str) -> list[str]:
323 """Return the key hierarchy.
325 Parameters
326 ----------
327 key : `str`
328 The key to analyze. Can be dot-separated.
330 Returns
331 -------
332 keys : `list` of `str`
333 The key hierarchy that has been split on ``.``.
335 Raises
336 ------
337 KeyError
338 Raised if the key is not a string.
339 """
340 try:
341 keys = key.split(".")
342 except Exception:
343 raise KeyError(f"Invalid key '{key}': only string keys are allowed") from None
344 return keys
346 def keys(self) -> tuple[str, ...]:
347 """Return the top-level keys."""
348 return tuple(k for k in self)
350 def items(self) -> Iterator[tuple[str, Any]]:
351 """Yield the top-level keys and values."""
352 for k, v in itertools.chain(self.scalars.items(), self.arrays.items(), self.metadata.items()):
353 yield (k, v)
355 def __len__(self) -> int:
356 """Return the number of items."""
357 return len(self.scalars) + len(self.arrays) + len(self.metadata)
359 # This is actually a Liskov substitution violation, because
360 # pydantic.BaseModel says __iter__ should return something else. But the
361 # pydantic docs say to do exactly this to in order to make a mapping-like
362 # BaseModel, so that's what we do.
363 def __iter__(self) -> Iterator[str]: # type: ignore
364 """Return an iterator over each key."""
365 # The order of keys is not preserved since items can move
366 # from scalar to array.
367 return itertools.chain(iter(self.scalars), iter(self.arrays), iter(self.metadata))
369 def __getitem__(self, key: str) -> Any:
370 """Retrieve the item associated with the key.
372 Parameters
373 ----------
374 key : `str`
375 The key to retrieve. Can be dot-separated hierarchical.
377 Returns
378 -------
379 value : `TaskMetadata`, `float`, `int`, `bool`, `str`
380 A scalar value. For compatibility with ``PropertySet``, if the key
381 refers to an array, the final element is returned and not the
382 array itself.
384 Raises
385 ------
386 KeyError
387 Raised if the item is not found.
388 """
389 keys = self._getKeys(key)
390 key0 = keys.pop(0)
391 if len(keys) == 0:
392 if key0 in self.scalars:
393 return self.scalars[key0]
394 if key0 in self.metadata:
395 return self.metadata[key0]
396 if key0 in self.arrays:
397 return self.arrays[key0][-1]
398 raise KeyError(f"'{key}' not found")
399 # Hierarchical lookup so the top key can only be in the metadata
400 # property. Trap KeyError and reraise so that the correct key
401 # in the hierarchy is reported.
402 try:
403 # And forward request to that metadata.
404 return self.metadata[key0][".".join(keys)]
405 except KeyError:
406 raise KeyError(f"'{key}' not found") from None
408 def get(self, key: str, default: Any = None) -> Any:
409 """Retrieve the item associated with the key or a default.
411 Parameters
412 ----------
413 key : `str`
414 The key to retrieve. Can be dot-separated hierarchical.
415 default
416 The value to return if the key doesnot exist.
418 Returns
419 -------
420 value : `TaskMetadata`, `float`, `int`, `bool`, `str`
421 A scalar value. If the key refers to an array, the final element
422 is returned and not the array itself; this is consistent with
423 `__getitem__` and `PropertySet.get`, but not ``to_dict().get``.
424 """
425 try:
426 return self[key]
427 except KeyError:
428 return default
430 def __setitem__(self, key: str, item: Any) -> None:
431 """Store the given item."""
432 keys = self._getKeys(key)
433 key0 = keys.pop(0)
434 if len(keys) == 0:
435 slots: dict[str, dict[str, Any]] = {
436 "array": self.arrays,
437 "scalar": self.scalars,
438 "metadata": self.metadata,
439 }
440 primary: dict[str, Any] | None = None
441 slot_type, item = self._validate_value(item)
442 primary = slots.pop(slot_type, None)
443 if primary is None:
444 raise AssertionError(f"Unknown slot type returned from validator: {slot_type}")
446 # Assign the value to the right place.
447 primary[key0] = item
448 for property in slots.values():
449 # Remove any other entries.
450 property.pop(key0, None)
451 return
453 # This must be hierarchical so forward to the child TaskMetadata.
454 if key0 not in self.metadata:
455 self.metadata[key0] = TaskMetadata()
456 self.metadata[key0][".".join(keys)] = item
458 # Ensure we have cleared out anything with the same name elsewhere.
459 self.scalars.pop(key0, None)
460 self.arrays.pop(key0, None)
462 def __contains__(self, key: str) -> bool:
463 """Determine if the key exists."""
464 keys = self._getKeys(key)
465 key0 = keys.pop(0)
466 if len(keys) == 0:
467 return key0 in self.scalars or key0 in self.arrays or key0 in self.metadata
469 if key0 in self.metadata:
470 return ".".join(keys) in self.metadata[key0]
471 return False
473 def __delitem__(self, key: str) -> None:
474 """Remove the specified item.
476 Raises
477 ------
478 KeyError
479 Raised if the item is not present.
480 """
481 keys = self._getKeys(key)
482 key0 = keys.pop(0)
483 if len(keys) == 0:
484 # MyPy can't figure out that this way to combine the types in the
485 # tuple is the one that matters, and annotating a local variable
486 # helps it out.
487 properties: tuple[dict[str, Any], ...] = (self.scalars, self.arrays, self.metadata)
488 for property in properties:
489 if key0 in property:
490 del property[key0]
491 return
492 raise KeyError(f"'{key}' not found'")
494 try:
495 del self.metadata[key0][".".join(keys)]
496 except KeyError:
497 # Report the correct key.
498 raise KeyError(f"'{key}' not found'") from None
500 def _validate_value(self, value: Any) -> tuple[str, Any]:
501 """Validate the given value.
503 Parameters
504 ----------
505 value : Any
506 Value to check.
508 Returns
509 -------
510 slot_type : `str`
511 The type of value given. Options are "scalar", "array", "metadata".
512 item : Any
513 The item that was given but possibly modified to conform to
514 the slot type.
516 Raises
517 ------
518 ValueError
519 Raised if the value is not a recognized type.
520 """
521 # Test the simplest option first.
522 value_type = type(value)
523 if value_type in _ALLOWED_PRIMITIVE_TYPES:
524 return "scalar", value
526 if isinstance(value, TaskMetadata):
527 return "metadata", value
528 if isinstance(value, Mapping):
529 return "metadata", self.from_dict(value)
531 if _isListLike(value):
532 # For model consistency, need to check that every item in the
533 # list has the same type.
534 value = list(value)
536 type0 = type(value[0])
537 for i in value:
538 if type(i) != type0:
539 raise ValueError(
540 "Type mismatch in supplied list. TaskMetadata requires all"
541 f" elements have same type but see {type(i)} and {type0}."
542 )
544 if type0 not in _ALLOWED_PRIMITIVE_TYPES:
545 # Must check to see if we got numpy floats or something.
546 type_cast: type
547 if isinstance(value[0], numbers.Integral):
548 type_cast = int
549 elif isinstance(value[0], numbers.Real):
550 type_cast = float
551 else:
552 raise ValueError(
553 f"Supplied list has element of type '{type0}'. "
554 "TaskMetadata can only accept primitive types in lists."
555 )
557 value = [type_cast(v) for v in value]
559 return "array", value
561 # Sometimes a numpy number is given.
562 if isinstance(value, numbers.Integral):
563 value = int(value)
564 return "scalar", value
565 if isinstance(value, numbers.Real):
566 value = float(value)
567 return "scalar", value
569 raise ValueError(f"TaskMetadata does not support values of type {value!r}.")
572# Needed because a TaskMetadata can contain a TaskMetadata.
573TaskMetadata.update_forward_refs()