Coverage for python/lsst/pipe/base/_task_metadata.py: 16%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["TaskMetadata"]
24import itertools
25import numbers
26import warnings
27from collections.abc import Sequence
28from typing import Any, Collection, Dict, List, Mapping, Protocol, Union
30from deprecated.sphinx import deprecated
31from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr
33_DEPRECATION_REASON = "Will be removed after v25."
34_DEPRECATION_VERSION = "v24"
36# The types allowed in a Task metadata field are restricted
37# to allow predictable serialization.
38_ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool)
41class PropertySetLike(Protocol):
42 """Protocol that looks like a ``lsst.daf.base.PropertySet``
44 Enough of the API is specified to support conversion of a
45 ``PropertySet`` to a `TaskMetadata`.
46 """
48 def paramNames(self, topLevelOnly: bool = True) -> Collection[str]:
49 ...
51 def getArray(self, name: str) -> Any:
52 ...
55def _isListLike(v):
56 return isinstance(v, Sequence) and not isinstance(v, str)
59class TaskMetadata(BaseModel):
60 """Dict-like object for storing task metadata.
62 Metadata can be stored at two levels: single task or task plus subtasks.
63 The later is called full metadata of a task and has a form
65 topLevelTaskName:subtaskName:subsubtaskName.itemName
67 Metadata item key of a task (`itemName` above) must not contain `.`,
68 which serves as a separator in full metadata keys and turns
69 the value into sub-dictionary. Arbitrary hierarchies are supported.
71 Deprecated methods are for compatibility with
72 the predecessor containers.
73 """
75 scalars: Dict[str, Union[StrictFloat, StrictInt, StrictBool, StrictStr]] = Field(default_factory=dict)
76 arrays: Dict[str, Union[List[StrictFloat], List[StrictInt], List[StrictBool], List[StrictStr]]] = Field(
77 default_factory=dict
78 )
79 metadata: Dict[str, "TaskMetadata"] = Field(default_factory=dict)
81 @classmethod
82 def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata":
83 """Create a TaskMetadata from a dictionary.
85 Parameters
86 ----------
87 d : `Mapping`
88 Mapping to convert. Can be hierarchical. Any dictionaries
89 in the hierarchy are converted to `TaskMetadata`.
91 Returns
92 -------
93 meta : `TaskMetadata`
94 Newly-constructed metadata.
95 """
96 metadata = cls()
97 for k, v in d.items():
98 metadata[k] = v
99 return metadata
101 @classmethod
102 def from_metadata(cls, ps: PropertySetLike) -> "TaskMetadata":
103 """Create a TaskMetadata from a PropertySet-like object.
105 Parameters
106 ----------
107 ps : `PropertySetLike` or `TaskMetadata`
108 A ``PropertySet``-like object to be transformed to a
109 `TaskMetadata`. A `TaskMetadata` can be copied using this
110 class method.
112 Returns
113 -------
114 tm : `TaskMetadata`
115 Newly-constructed metadata.
117 Notes
118 -----
119 Items stored in single-element arrays in the supplied object
120 will be converted to scalars in the newly-created object.
121 """
122 # Use hierarchical names to assign values from input to output.
123 # This API exists for both PropertySet and TaskMetadata.
124 # from_dict() does not work because PropertySet is not declared
125 # to be a Mapping.
126 # PropertySet.toDict() is not present in TaskMetadata so is best
127 # avoided.
128 metadata = cls()
129 for key in sorted(ps.paramNames(topLevelOnly=False)):
130 value = ps.getArray(key)
131 if len(value) == 1:
132 value = value[0]
133 metadata[key] = value
134 return metadata
136 def to_dict(self) -> Dict[str, Any]:
137 """Convert the class to a simple dictionary.
139 Returns
140 -------
141 d : `dict`
142 Simple dictionary that can contain scalar values, array values
143 or other dictionary values.
145 Notes
146 -----
147 Unlike `dict()`, this method hides the model layout and combines
148 scalars, arrays, and other metadata in the same dictionary. Can be
149 used when a simple dictionary is needed. Use
150 `TaskMetadata.from_dict()` to convert it back.
151 """
152 d = {}
153 d.update(self.scalars)
154 d.update(self.arrays)
155 for k, v in self.metadata.items():
156 d[k] = v.to_dict()
157 return d
159 def add(self, name, value):
160 """Store a new value, adding to a list if one already exists.
162 Parameters
163 ----------
164 name : `str`
165 Name of the metadata property.
166 value
167 Metadata property value.
168 """
169 keys = self._getKeys(name)
170 key0 = keys.pop(0)
171 if len(keys) == 0:
173 # If add() is being used, always store the value in the arrays
174 # property as a list. It's likely there will be another call.
175 slot_type, value = self._validate_value(value)
176 if slot_type == "array":
177 pass
178 elif slot_type == "scalar":
179 value = [value]
180 else:
181 raise ValueError("add() can only be used for primitive types or sequences of those types.")
183 if key0 in self.metadata:
184 raise ValueError(f"Can not add() to key '{name}' since that is a TaskMetadata")
186 if key0 in self.scalars:
187 # Convert scalar to array.
188 self.arrays[key0] = [self.scalars.pop(key0)]
190 if key0 in self.arrays:
191 # Check that the type is not changing.
192 if (curtype := type(self.arrays[key0][0])) is not (newtype := type(value[0])):
193 raise ValueError(f"Type mismatch in add() -- currently {curtype} but adding {newtype}")
194 self.arrays[key0].extend(value)
195 else:
196 self.arrays[key0] = value
198 return
200 self.metadata[key0].add(".".join(keys), value)
202 @deprecated(
203 reason="Cast the return value to float explicitly. " + _DEPRECATION_REASON,
204 version=_DEPRECATION_VERSION,
205 category=FutureWarning,
206 )
207 def getAsDouble(self, key):
208 """Return the value cast to a `float`.
210 Parameters
211 ----------
212 key : `str`
213 Item to return. Can be dot-separated hierarchical.
215 Returns
216 -------
217 value : `float`
218 The value cast to a `float`.
220 Raises
221 ------
222 KeyError
223 Raised if the item is not found.
224 """
225 return float(self.__getitem__(key))
227 def getScalar(self, key):
228 """Retrieve a scalar item even if the item is a list.
230 Parameters
231 ----------
232 key : `str`
233 Item to retrieve.
235 Returns
236 -------
237 value : Any
238 Either the value associated with the key or, if the key
239 corresponds to a list, the last item in the list.
241 Raises
242 ------
243 KeyError
244 Raised if the item is not found.
245 """
246 # Used in pipe_tasks.
247 # getScalar() is the default behavior for __getitem__.
248 return self[key]
250 def getArray(self, key):
251 """Retrieve an item as a list even if it is a scalar.
253 Parameters
254 ----------
255 key : `str`
256 Item to retrieve.
258 Returns
259 -------
260 values : `list` of any
261 A list containing the value or values associated with this item.
263 Raises
264 ------
265 KeyError
266 Raised if the item is not found.
267 """
268 keys = self._getKeys(key)
269 key0 = keys.pop(0)
270 if len(keys) == 0:
271 if key0 in self.arrays:
272 return self.arrays[key0]
273 elif key0 in self.scalars:
274 return [self.scalars[key0]]
275 elif key0 in self.metadata:
276 return [self.metadata[key0]]
277 raise KeyError(f"'{key}' not found")
279 try:
280 return self.metadata[key0].getArray(".".join(keys))
281 except KeyError:
282 # Report the correct key.
283 raise KeyError(f"'{key}' not found") from None
285 def names(self, topLevelOnly: bool = True):
286 """Return the hierarchical keys from the metadata.
288 Parameters
289 ----------
290 topLevelOnly : `bool`
291 If true, return top-level keys, otherwise full metadata item keys.
293 Returns
294 -------
295 names : `collection.abc.Set`
296 A set of top-level keys or full metadata item keys, including
297 the top-level keys.
299 Notes
300 -----
301 Should never be called in new code with ``topLevelOnly`` set to `True`
302 -- this is equivalent to asking for the keys and is the default
303 when iterating through the task metadata. In this case a deprecation
304 message will be issued and the ability will raise an exception
305 in a future release.
307 When ``topLevelOnly`` is `False` all keys, including those from the
308 hierarchy and the top-level hierarchy, are returned.
309 """
310 if topLevelOnly:
311 warnings.warn("Use keys() instead. " + _DEPRECATION_REASON, FutureWarning)
312 return set(self.keys())
313 else:
314 names = set()
315 for k, v in self.items():
316 names.add(k) # Always include the current level
317 if isinstance(v, TaskMetadata):
318 names.update({k + "." + item for item in v.names(topLevelOnly=topLevelOnly)})
319 return names
321 def paramNames(self, topLevelOnly):
322 """Return hierarchical names.
324 Parameters
325 ----------
326 topLevelOnly : `bool`
327 Control whether only top-level items are returned or items
328 from the hierarchy.
330 Returns
331 -------
332 paramNames : `set` of `str`
333 If ``topLevelOnly`` is `True`, returns any keys that are not
334 part of a hierarchy. If `False` also returns fully-qualified
335 names from the hierarchy. Keys associated with the top
336 of a hierarchy are never returned.
337 """
338 # Currently used by the verify package.
339 paramNames = set()
340 for k, v in self.items():
341 if isinstance(v, TaskMetadata):
342 if not topLevelOnly:
343 paramNames.update({k + "." + item for item in v.paramNames(topLevelOnly=topLevelOnly)})
344 else:
345 paramNames.add(k)
346 return paramNames
348 @deprecated(
349 reason="Use standard assignment syntax. " + _DEPRECATION_REASON,
350 version=_DEPRECATION_VERSION,
351 category=FutureWarning,
352 )
353 def set(self, key, item):
354 """Set the value of the supplied key."""
355 self.__setitem__(key, item)
357 @deprecated(
358 reason="Use standard del dict syntax. " + _DEPRECATION_REASON,
359 version=_DEPRECATION_VERSION,
360 category=FutureWarning,
361 )
362 def remove(self, key):
363 """Remove the item without raising if absent."""
364 try:
365 self.__delitem__(key)
366 except KeyError:
367 # The PropertySet.remove() should always work.
368 pass
370 @staticmethod
371 def _getKeys(key):
372 """Return the key hierarchy.
374 Parameters
375 ----------
376 key : `str`
377 The key to analyze. Can be dot-separated.
379 Returns
380 -------
381 keys : `list` of `str`
382 The key hierarchy that has been split on ``.``.
384 Raises
385 ------
386 KeyError
387 Raised if the key is not a string.
388 """
389 try:
390 keys = key.split(".")
391 except Exception:
392 raise KeyError(f"Invalid key '{key}': only string keys are allowed") from None
393 return keys
395 def keys(self):
396 """Return the top-level keys."""
397 return tuple(k for k in self)
399 def items(self):
400 """Yield the top-level keys and values."""
401 for k, v in itertools.chain(self.scalars.items(), self.arrays.items(), self.metadata.items()):
402 yield (k, v)
404 def __len__(self):
405 """Return the number of items."""
406 return len(self.scalars) + len(self.arrays) + len(self.metadata)
408 def __iter__(self):
409 """Return an iterator over each key."""
410 # The order of keys is not preserved since items can move
411 # from scalar to array.
412 return itertools.chain(iter(self.scalars), iter(self.arrays), iter(self.metadata))
414 def __getitem__(self, key):
415 """Retrieve the item associated with the key.
417 Parameters
418 ----------
419 key : `str`
420 The key to retrieve. Can be dot-separated hierarchical.
422 Returns
423 -------
424 value : `TaskMetadata`, `float`, `int`, `bool`, `str`
425 A scalar value. For compatibility with ``PropertySet``, if the key
426 refers to an array, the final element is returned and not the
427 array itself.
429 Raises
430 ------
431 KeyError
432 Raised if the item is not found.
433 """
434 keys = self._getKeys(key)
435 key0 = keys.pop(0)
436 if len(keys) == 0:
437 if key0 in self.scalars:
438 return self.scalars[key0]
439 if key0 in self.metadata:
440 return self.metadata[key0]
441 if key0 in self.arrays:
442 return self.arrays[key0][-1]
443 raise KeyError(f"'{key}' not found")
444 # Hierarchical lookup so the top key can only be in the metadata
445 # property. Trap KeyError and reraise so that the correct key
446 # in the hierarchy is reported.
447 try:
448 # And forward request to that metadata.
449 return self.metadata[key0][".".join(keys)]
450 except KeyError:
451 raise KeyError(f"'{key}' not found") from None
453 def __setitem__(self, key, item):
454 """Store the given item."""
455 keys = self._getKeys(key)
456 key0 = keys.pop(0)
457 if len(keys) == 0:
458 slots = {"array": self.arrays, "scalar": self.scalars, "metadata": self.metadata}
459 primary = None
460 slot_type, item = self._validate_value(item)
461 primary = slots.pop(slot_type, None)
462 if primary is None:
463 raise AssertionError(f"Unknown slot type returned from validator: {slot_type}")
465 # Assign the value to the right place.
466 primary[key0] = item
467 for property in slots.values():
468 # Remove any other entries.
469 property.pop(key0, None)
470 return
472 # This must be hierarchical so forward to the child TaskMetadata.
473 if key0 not in self.metadata:
474 self.metadata[key0] = TaskMetadata()
475 self.metadata[key0][".".join(keys)] = item
477 # Ensure we have cleared out anything with the same name elsewhere.
478 self.scalars.pop(key0, None)
479 self.arrays.pop(key0, None)
481 def __contains__(self, key):
482 """Determine if the key exists."""
483 keys = self._getKeys(key)
484 key0 = keys.pop(0)
485 if len(keys) == 0:
486 return key0 in self.scalars or key0 in self.arrays or key0 in self.metadata
488 if key0 in self.metadata:
489 return ".".join(keys) in self.metadata[key0]
490 return False
492 def __delitem__(self, key):
493 """Remove the specified item.
495 Raises
496 ------
497 KeyError
498 Raised if the item is not present.
499 """
500 keys = self._getKeys(key)
501 key0 = keys.pop(0)
502 if len(keys) == 0:
503 for property in (self.scalars, self.arrays, self.metadata):
504 if key0 in property:
505 del property[key0]
506 return
507 raise KeyError(f"'{key}' not found'")
509 try:
510 del self.metadata[key0][".".join(keys)]
511 except KeyError:
512 # Report the correct key.
513 raise KeyError(f"'{key}' not found'") from None
515 def _validate_value(self, value):
516 """Validate the given value.
518 Parameters
519 ----------
520 value : Any
521 Value to check.
523 Returns
524 -------
525 slot_type : `str`
526 The type of value given. Options are "scalar", "array", "metadata".
527 item : Any
528 The item that was given but possibly modified to conform to
529 the slot type.
531 Raises
532 ------
533 ValueError
534 Raised if the value is not a recognized type.
535 """
536 # Test the simplest option first.
537 value_type = type(value)
538 if value_type in _ALLOWED_PRIMITIVE_TYPES:
539 return "scalar", value
541 if isinstance(value, TaskMetadata):
542 return "metadata", value
543 if isinstance(value, Mapping):
544 return "metadata", self.from_dict(value)
546 if _isListLike(value):
547 # For model consistency, need to check that every item in the
548 # list has the same type.
549 value = list(value)
551 type0 = type(value[0])
552 for i in value:
553 if type(i) != type0:
554 raise ValueError(
555 "Type mismatch in supplied list. TaskMetadata requires all"
556 f" elements have same type but see {type(i)} and {type0}."
557 )
559 if type0 not in _ALLOWED_PRIMITIVE_TYPES:
560 # Must check to see if we got numpy floats or something.
561 if isinstance(value[0], numbers.Integral):
562 type_cast = int
563 elif isinstance(value[0], numbers.Real):
564 type_cast = float
565 else:
566 raise ValueError(
567 f"Supplied list has element of type '{type0}'. "
568 "TaskMetadata can only accept primitive types in lists."
569 )
571 value = [type_cast(v) for v in value]
573 return "array", value
575 # Sometimes a numpy number is given.
576 if isinstance(value, numbers.Integral):
577 value = int(value)
578 return "scalar", value
579 if isinstance(value, numbers.Real):
580 value = float(value)
581 return "scalar", value
583 raise ValueError(f"TaskMetadata does not support values of type {value!r}.")
586# Needed because a TaskMetadata can contain a TaskMetadata.
587TaskMetadata.update_forward_refs()