Coverage for python/lsst/pipe/base/_task_metadata.py: 16%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["TaskMetadata"]
24import itertools
25import numbers
26import warnings
27from collections.abc import Sequence
28from typing import Any, Collection, Dict, List, Mapping, Protocol, Union
30from deprecated.sphinx import deprecated
31from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr
33_DEPRECATION_REASON = "Will be removed after v25."
34_DEPRECATION_VERSION = "v24"
36# The types allowed in a Task metadata field are restricted
37# to allow predictable serialization.
38_ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool)
41class PropertySetLike(Protocol):
42 """Protocol that looks like a ``lsst.daf.base.PropertySet``
44 Enough of the API is specified to support conversion of a
45 ``PropertySet`` to a `TaskMetadata`.
46 """
48 def paramNames(self, topLevelOnly: bool = True) -> Collection[str]:
49 ...
51 def getArray(self, name: str) -> Any:
52 ...
55def _isListLike(v):
56 return isinstance(v, Sequence) and not isinstance(v, str)
59class TaskMetadata(BaseModel):
60 """Dict-like object for storing task metadata.
62 Metadata can be stored at two levels: single task or task plus subtasks.
63 The later is called full metadata of a task and has a form
65 topLevelTaskName:subtaskName:subsubtaskName.itemName
67 Metadata item key of a task (`itemName` above) must not contain `.`,
68 which serves as a separator in full metadata keys and turns
69 the value into sub-dictionary. Arbitrary hierarchies are supported.
71 Deprecated methods are for compatibility with
72 the predecessor containers.
73 """
75 scalars: Dict[str, Union[StrictFloat, StrictInt, StrictBool, StrictStr]] = Field(default_factory=dict)
76 arrays: Dict[str, Union[List[StrictFloat], List[StrictInt], List[StrictBool], List[StrictStr]]] = Field(
77 default_factory=dict
78 )
79 metadata: Dict[str, "TaskMetadata"] = Field(default_factory=dict)
81 @classmethod
82 def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata":
83 """Create a TaskMetadata from a dictionary.
85 Parameters
86 ----------
87 d : `Mapping`
88 Mapping to convert. Can be hierarchical. Any dictionaries
89 in the hierarchy are converted to `TaskMetadata`.
91 Returns
92 -------
93 meta : `TaskMetadata`
94 Newly-constructed metadata.
95 """
96 metadata = cls()
97 for k, v in d.items():
98 metadata[k] = v
99 return metadata
101 @classmethod
102 def from_metadata(cls, ps: PropertySetLike) -> "TaskMetadata":
103 """Create a TaskMetadata from a PropertySet-like object.
105 Parameters
106 ----------
107 ps : `lsst.daf.base.PropertySet` or `TaskMetadata`
108 A ``PropertySet``-like object to be transformed to a
109 `TaskMetadata`. A `TaskMetadata` can be copied using this
110 class method.
112 Returns
113 -------
114 tm : `TaskMetadata`
115 Newly-constructed metadata.
117 Notes
118 -----
119 Items stored in single-element arrays in the supplied object
120 will be converted to scalars in the newly-created object.
121 """
122 # Use hierarchical names to assign values from input to output.
123 # This API exists for both PropertySet and TaskMetadata.
124 # from_dict() does not work because PropertySet is not declared
125 # to be a Mapping.
126 # PropertySet.toDict() is not present in TaskMetadata so is best
127 # avoided.
128 metadata = cls()
129 for key in sorted(ps.paramNames(topLevelOnly=False)):
130 value = ps.getArray(key)
131 if len(value) == 1:
132 value = value[0]
133 metadata[key] = value
134 return metadata
136 def add(self, name, value):
137 """Store a new value, adding to a list if one already exists.
139 Parameters
140 ----------
141 name : `str`
142 Name of the metadata property.
143 value
144 Metadata property value.
145 """
146 keys = self._getKeys(name)
147 key0 = keys.pop(0)
148 if len(keys) == 0:
150 # If add() is being used, always store the value in the arrays
151 # property as a list. It's likely there will be another call.
152 slot_type, value = self._validate_value(value)
153 if slot_type == "array":
154 pass
155 elif slot_type == "scalar":
156 value = [value]
157 else:
158 raise ValueError("add() can only be used for primitive types or sequences of those types.")
160 if key0 in self.metadata:
161 raise ValueError(f"Can not add() to key '{name}' since that is a TaskMetadata")
163 if key0 in self.scalars:
164 # Convert scalar to array.
165 self.arrays[key0] = [self.scalars.pop(key0)]
167 if key0 in self.arrays:
168 # Check that the type is not changing.
169 if (curtype := type(self.arrays[key0][0])) is not (newtype := type(value[0])):
170 raise ValueError(f"Type mismatch in add() -- currently {curtype} but adding {newtype}")
171 self.arrays[key0].extend(value)
172 else:
173 self.arrays[key0] = value
175 return
177 self.metadata[key0].add(".".join(keys), value)
179 @deprecated(
180 reason="Cast the return value to float explicitly. " + _DEPRECATION_REASON,
181 version=_DEPRECATION_VERSION,
182 category=FutureWarning,
183 )
184 def getAsDouble(self, key):
185 """Return the value cast to a `float`.
187 Parameters
188 ----------
189 key : `str`
190 Item to return. Can be dot-separated hierarchical.
192 Returns
193 -------
194 value : `float`
195 The value cast to a `float`.
197 Raises
198 ------
199 KeyError
200 Raised if the item is not found.
201 """
202 return float(self.__getitem__(key))
204 def getScalar(self, key):
205 """Retrieve a scalar item even if the item is a list.
207 Parameters
208 ----------
209 key : `str`
210 Item to retrieve.
212 Returns
213 -------
214 value : Any
215 Either the value associated with the key or, if the key
216 corresponds to a list, the last item in the list.
218 Raises
219 ------
220 KeyError
221 Raised if the item is not found.
222 """
223 # Used in pipe_tasks.
224 # getScalar() is the default behavior for __getitem__.
225 return self[key]
227 def getArray(self, key):
228 """Retrieve an item as a list even if it is a scalar.
230 Parameters
231 ----------
232 key : `str`
233 Item to retrieve.
235 Returns
236 -------
237 values : `list` of any
238 A list containing the value or values associated with this item.
240 Raises
241 ------
242 KeyError
243 Raised if the item is not found.
244 """
245 keys = self._getKeys(key)
246 key0 = keys.pop(0)
247 if len(keys) == 0:
248 if key0 in self.arrays:
249 return self.arrays[key0]
250 elif key0 in self.scalars:
251 return [self.scalars[key0]]
252 elif key0 in self.metadata:
253 return [self.metadata[key0]]
254 raise KeyError(f"'{key}' not found")
256 try:
257 return self.metadata[key0].getArray(".".join(keys))
258 except KeyError:
259 # Report the correct key.
260 raise KeyError(f"'{key}' not found") from None
262 def names(self, topLevelOnly: bool = True):
263 """Return the hierarchical keys from the metadata.
265 Parameters
266 ----------
267 topLevelOnly : `bool`
268 If true, return top-level keys, otherwise full metadata item keys.
270 Returns
271 -------
272 names : `collection.abc.Set`
273 A set of top-level keys or full metadata item keys, including
274 the top-level keys.
276 Notes
277 -----
278 Should never be called in new code with ``topLevelOnly`` set to `True`
279 -- this is equivalent to asking for the keys and is the default
280 when iterating through the task metadata. In this case a deprecation
281 message will be issued and the ability will raise an exception
282 in a future release.
284 When ``topLevelOnly`` is `False` all keys, including those from the
285 hierarchy and the top-level hierarchy, are returned.
286 """
287 if topLevelOnly:
288 warnings.warn("Use keys() instead. " + _DEPRECATION_REASON, FutureWarning)
289 return set(self.keys())
290 else:
291 names = set()
292 for k, v in self.items():
293 names.add(k) # Always include the current level
294 if isinstance(v, TaskMetadata):
295 names.update({k + "." + item for item in v.names(topLevelOnly=topLevelOnly)})
296 return names
298 def paramNames(self, topLevelOnly):
299 """Return hierarchical names.
301 Parameters
302 ----------
303 topLevelOnly : `bool`
304 Control whether only top-level items are returned or items
305 from the hierarchy.
307 Returns
308 -------
309 paramNames : `set` of `str`
310 If ``topLevelOnly`` is `True`, returns any keys that are not
311 part of a hierarchy. If `False` also returns fully-qualified
312 names from the hierarchy. Keys associated with the top
313 of a hierarchy are never returned.
314 """
315 # Currently used by the verify package.
316 paramNames = set()
317 for k, v in self.items():
318 if isinstance(v, TaskMetadata):
319 if not topLevelOnly:
320 paramNames.update({k + "." + item for item in v.paramNames(topLevelOnly=topLevelOnly)})
321 else:
322 paramNames.add(k)
323 return paramNames
325 @deprecated(
326 reason="Use standard assignment syntax. " + _DEPRECATION_REASON,
327 version=_DEPRECATION_VERSION,
328 category=FutureWarning,
329 )
330 def set(self, key, item):
331 """Set the value of the supplied key."""
332 self.__setitem__(key, item)
334 @deprecated(
335 reason="Use standard del dict syntax. " + _DEPRECATION_REASON,
336 version=_DEPRECATION_VERSION,
337 category=FutureWarning,
338 )
339 def remove(self, key):
340 """Remove the item without raising if absent."""
341 try:
342 self.__delitem__(key)
343 except KeyError:
344 # The PropertySet.remove() should always work.
345 pass
347 @staticmethod
348 def _getKeys(key):
349 """Return the key hierarchy.
351 Parameters
352 ----------
353 key : `str`
354 The key to analyze. Can be dot-separated.
356 Returns
357 -------
358 keys : `list` of `str`
359 The key hierarchy that has been split on ``.``.
361 Raises
362 ------
363 KeyError
364 Raised if the key is not a string.
365 """
366 try:
367 keys = key.split(".")
368 except Exception:
369 raise KeyError(f"Invalid key '{key}': only string keys are allowed") from None
370 return keys
372 def keys(self):
373 """Return the top-level keys."""
374 return tuple(k for k in self)
376 def items(self):
377 """Yield the top-level keys and values."""
378 for k, v in itertools.chain(self.scalars.items(), self.arrays.items(), self.metadata.items()):
379 yield (k, v)
381 def __len__(self):
382 """Return the number of items."""
383 return len(self.scalars) + len(self.arrays) + len(self.metadata)
385 def __iter__(self):
386 """Return an iterator over each key."""
387 # The order of keys is not preserved since items can move
388 # from scalar to array.
389 return itertools.chain(iter(self.scalars), iter(self.arrays), iter(self.metadata))
391 def __getitem__(self, key):
392 """Retrieve the item associated with the key.
394 Parameters
395 ----------
396 key : `str`
397 The key to retrieve. Can be dot-separated hierarchical.
399 Returns
400 -------
401 value : `TaskMetadata`, `float`, `int`, `bool`, `str`
402 A scalar value. For compatibility with ``PropertySet``, if the key
403 refers to an array, the final element is returned and not the
404 array itself.
406 Raises
407 ------
408 KeyError
409 Raised if the item is not found.
410 """
411 keys = self._getKeys(key)
412 key0 = keys.pop(0)
413 if len(keys) == 0:
414 if key0 in self.scalars:
415 return self.scalars[key0]
416 if key0 in self.metadata:
417 return self.metadata[key0]
418 if key0 in self.arrays:
419 return self.arrays[key0][-1]
420 raise KeyError(f"'{key}' not found")
421 # Hierarchical lookup so the top key can only be in the metadata
422 # property. Trap KeyError and reraise so that the correct key
423 # in the hierarchy is reported.
424 try:
425 # And forward request to that metadata.
426 return self.metadata[key0][".".join(keys)]
427 except KeyError:
428 raise KeyError(f"'{key}' not found") from None
430 def __setitem__(self, key, item):
431 """Store the given item."""
432 keys = self._getKeys(key)
433 key0 = keys.pop(0)
434 if len(keys) == 0:
435 slots = {"array": self.arrays, "scalar": self.scalars, "metadata": self.metadata}
436 primary = None
437 slot_type, item = self._validate_value(item)
438 primary = slots.pop(slot_type, None)
439 if primary is None:
440 raise AssertionError(f"Unknown slot type returned from validator: {slot_type}")
442 # Assign the value to the right place.
443 primary[key0] = item
444 for property in slots.values():
445 # Remove any other entries.
446 property.pop(key0, None)
447 return
449 # This must be hierarchical so forward to the child TaskMetadata.
450 if key0 not in self.metadata:
451 self.metadata[key0] = TaskMetadata()
452 self.metadata[key0][".".join(keys)] = item
454 # Ensure we have cleared out anything with the same name elsewhere.
455 self.scalars.pop(key0, None)
456 self.arrays.pop(key0, None)
458 def __contains__(self, key):
459 """Determine if the key exists."""
460 keys = self._getKeys(key)
461 key0 = keys.pop(0)
462 if len(keys) == 0:
463 return key0 in self.scalars or key0 in self.arrays or key0 in self.metadata
465 if key0 in self.metadata:
466 return ".".join(keys) in self.metadata[key0]
467 return False
469 def __delitem__(self, key):
470 """Remove the specified item.
472 Raises
473 ------
474 KeyError
475 Raised if the item is not present.
476 """
477 keys = self._getKeys(key)
478 key0 = keys.pop(0)
479 if len(keys) == 0:
480 for property in (self.scalars, self.arrays, self.metadata):
481 if key0 in property:
482 del property[key0]
483 return
484 raise KeyError(f"'{key}' not found'")
486 try:
487 del self.metadata[key0][".".join(keys)]
488 except KeyError:
489 # Report the correct key.
490 raise KeyError(f"'{key}' not found'") from None
492 def _validate_value(self, value):
493 """Validate the given value.
495 Parameters
496 ----------
497 value : Any
498 Value to check.
500 Returns
501 -------
502 slot_type : `str`
503 The type of value given. Options are "scalar", "array", "metadata".
504 item : Any
505 The item that was given but possibly modified to conform to
506 the slot type.
508 Raises
509 ------
510 ValueError
511 Raised if the value is not a recognized type.
512 """
513 # Test the simplest option first.
514 value_type = type(value)
515 if value_type in _ALLOWED_PRIMITIVE_TYPES:
516 return "scalar", value
518 if isinstance(value, TaskMetadata):
519 return "metadata", value
520 if isinstance(value, Mapping):
521 return "metadata", self.from_dict(value)
523 if _isListLike(value):
524 # For model consistency, need to check that every item in the
525 # list has the same type.
526 value = list(value)
528 type0 = type(value[0])
529 for i in value:
530 if type(i) != type0:
531 raise ValueError(
532 "Type mismatch in supplied list. TaskMetadata requires all"
533 f" elements have same type but see {type(i)} and {type0}."
534 )
536 if type0 not in _ALLOWED_PRIMITIVE_TYPES:
537 # Must check to see if we got numpy floats or something.
538 if isinstance(value[0], numbers.Integral):
539 type_cast = int
540 elif isinstance(value[0], numbers.Real):
541 type_cast = float
542 else:
543 raise ValueError(
544 f"Supplied list has element of type '{type0}'. "
545 "TaskMetadata can only accept primitive types in lists."
546 )
548 value = [type_cast(v) for v in value]
550 return "array", value
552 # Sometimes a numpy number is given.
553 if isinstance(value, numbers.Integral):
554 value = int(value)
555 return "scalar", value
556 if isinstance(value, numbers.Real):
557 value = float(value)
558 return "scalar", value
560 raise ValueError(f"TaskMetadata does not support values of type {value!r}.")
563# Needed because a TaskMetadata can contain a TaskMetadata.
564TaskMetadata.update_forward_refs()