Coverage for python/lsst/pipe/base/_task_metadata.py: 16%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["TaskMetadata"]
24import numbers
25import itertools
26import warnings
27from collections.abc import Sequence
28from deprecated.sphinx import deprecated
30from typing import Dict, List, Union, Any, Mapping, Protocol, Collection
31from pydantic import BaseModel, StrictInt, StrictFloat, StrictBool, StrictStr, Field
33_DEPRECATION_REASON = "Will be removed after v25."
34_DEPRECATION_VERSION = "v24"
36# The types allowed in a Task metadata field are restricted
37# to allow predictable serialization.
38_ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool)
41class PropertySetLike(Protocol):
42 """Protocol that looks like a ``lsst.daf.base.PropertySet``
44 Enough of the API is specified to support conversion of a
45 ``PropertySet`` to a `TaskMetadata`.
46 """
48 def paramNames(self, topLevelOnly: bool = True) -> Collection[str]:
49 ...
51 def getArray(self, name: str) -> Any:
52 ...
55def _isListLike(v):
56 return isinstance(v, Sequence) and not isinstance(v, str)
59class TaskMetadata(BaseModel):
60 """Dict-like object for storing task metadata.
62 Metadata can be stored at two levels: single task or task plus subtasks.
63 The later is called full metadata of a task and has a form
65 topLevelTaskName:subtaskName:subsubtaskName.itemName
67 Metadata item key of a task (`itemName` above) must not contain `.`,
68 which serves as a separator in full metadata keys and turns
69 the value into sub-dictionary. Arbitrary hierarchies are supported.
71 Deprecated methods are for compatibility with
72 the predecessor containers.
73 """
75 scalars: Dict[str, Union[StrictFloat, StrictInt, StrictBool, StrictStr]] = Field(default_factory=dict)
76 arrays: Dict[str, Union[List[StrictFloat], List[StrictInt], List[StrictBool],
77 List[StrictStr]]] = Field(default_factory=dict)
78 metadata: Dict[str, "TaskMetadata"] = Field(default_factory=dict)
80 @classmethod
81 def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata":
82 """Create a TaskMetadata from a dictionary.
84 Parameters
85 ----------
86 d : `Mapping`
87 Mapping to convert. Can be hierarchical. Any dictionaries
88 in the hierarchy are converted to `TaskMetadata`.
90 Returns
91 -------
92 meta : `TaskMetadata`
93 Newly-constructed metadata.
94 """
95 metadata = cls()
96 for k, v in d.items():
97 metadata[k] = v
98 return metadata
100 @classmethod
101 def from_metadata(cls, ps: PropertySetLike) -> "TaskMetadata":
102 """Create a TaskMetadata from a PropertySet-like object.
104 Parameters
105 ----------
106 ps : `lsst.daf.base.PropertySet` or `TaskMetadata`
107 A ``PropertySet``-like object to be transformed to a
108 `TaskMetadata`. A `TaskMetadata` can be copied using this
109 class method.
111 Returns
112 -------
113 tm : `TaskMetadata`
114 Newly-constructed metadata.
116 Notes
117 -----
118 Items stored in single-element arrays in the supplied object
119 will be converted to scalars in the newly-created object.
120 """
121 # Use hierarchical names to assign values from input to output.
122 # This API exists for both PropertySet and TaskMetadata.
123 # from_dict() does not work because PropertySet is not declared
124 # to be a Mapping.
125 # PropertySet.toDict() is not present in TaskMetadata so is best
126 # avoided.
127 metadata = cls()
128 for key in sorted(ps.paramNames(topLevelOnly=False)):
129 value = ps.getArray(key)
130 if len(value) == 1:
131 value = value[0]
132 metadata[key] = value
133 return metadata
135 def add(self, name, value):
136 """Store a new value, adding to a list if one already exists.
138 Parameters
139 ----------
140 name : `str`
141 Name of the metadata property.
142 value
143 Metadata property value.
144 """
145 keys = self._getKeys(name)
146 key0 = keys.pop(0)
147 if len(keys) == 0:
149 # If add() is being used, always store the value in the arrays
150 # property as a list. It's likely there will be another call.
151 slot_type, value = self._validate_value(value)
152 if slot_type == "array":
153 pass
154 elif slot_type == "scalar":
155 value = [value]
156 else:
157 raise ValueError("add() can only be used for primitive types or sequences of those types.")
159 if key0 in self.metadata:
160 raise ValueError(f"Can not add() to key '{name}' since that is a TaskMetadata")
162 if key0 in self.scalars:
163 # Convert scalar to array.
164 self.arrays[key0] = [self.scalars.pop(key0)]
166 if key0 in self.arrays:
167 # Check that the type is not changing.
168 if (curtype := type(self.arrays[key0][0])) is not (newtype := type(value[0])):
169 raise ValueError(f"Type mismatch in add() -- currently {curtype} but adding {newtype}")
170 self.arrays[key0].extend(value)
171 else:
172 self.arrays[key0] = value
174 return
176 self.metadata[key0].add(".".join(keys), value)
178 @deprecated(reason="Cast the return value to float explicitly. " + _DEPRECATION_REASON,
179 version=_DEPRECATION_VERSION, category=FutureWarning)
180 def getAsDouble(self, key):
181 """Return the value cast to a `float`.
183 Parameters
184 ----------
185 key : `str`
186 Item to return. Can be dot-separated hierarchical.
188 Returns
189 -------
190 value : `float`
191 The value cast to a `float`.
193 Raises
194 ------
195 KeyError
196 Raised if the item is not found.
197 """
198 return float(self.__getitem__(key))
200 def getScalar(self, key):
201 """Retrieve a scalar item even if the item is a list.
203 Parameters
204 ----------
205 key : `str`
206 Item to retrieve.
208 Returns
209 -------
210 value : Any
211 Either the value associated with the key or, if the key
212 corresponds to a list, the last item in the list.
214 Raises
215 ------
216 KeyError
217 Raised if the item is not found.
218 """
219 # Used in pipe_tasks.
220 # getScalar() is the default behavior for __getitem__.
221 return self[key]
223 def getArray(self, key):
224 """Retrieve an item as a list even if it is a scalar.
226 Parameters
227 ----------
228 key : `str`
229 Item to retrieve.
231 Returns
232 -------
233 values : `list` of any
234 A list containing the value or values associated with this item.
236 Raises
237 ------
238 KeyError
239 Raised if the item is not found.
240 """
241 keys = self._getKeys(key)
242 key0 = keys.pop(0)
243 if len(keys) == 0:
244 if key0 in self.arrays:
245 return self.arrays[key0]
246 elif key0 in self.scalars:
247 return [self.scalars[key0]]
248 elif key0 in self.metadata:
249 return [self.metadata[key0]]
250 raise KeyError(f"'{key}' not found")
252 try:
253 return self.metadata[key0].getArray(".".join(keys))
254 except KeyError:
255 # Report the correct key.
256 raise KeyError(f"'{key}' not found") from None
258 def names(self, topLevelOnly: bool = True):
259 """Return the hierarchical keys from the metadata.
261 Parameters
262 ----------
263 topLevelOnly : `bool`
264 If true, return top-level keys, otherwise full metadata item keys.
266 Returns
267 -------
268 names : `collection.abc.Set`
269 A set of top-level keys or full metadata item keys, including
270 the top-level keys.
272 Notes
273 -----
274 Should never be called in new code with ``topLevelOnly`` set to `True`
275 -- this is equivalent to asking for the keys and is the default
276 when iterating through the task metadata. In this case a deprecation
277 message will be issued and the ability will raise an exception
278 in a future release.
280 When ``topLevelOnly`` is `False` all keys, including those from the
281 hierarchy and the top-level hierarchy, are returned.
282 """
283 if topLevelOnly:
284 warnings.warn("Use keys() instead. " + _DEPRECATION_REASON, FutureWarning)
285 return set(self.keys())
286 else:
287 names = set()
288 for k, v in self.items():
289 names.add(k) # Always include the current level
290 if isinstance(v, TaskMetadata):
291 names.update({k + '.' + item for item in v.names(topLevelOnly=topLevelOnly)})
292 return names
294 def paramNames(self, topLevelOnly):
295 """Return hierarchical names.
297 Parameters
298 ----------
299 topLevelOnly : `bool`
300 Control whether only top-level items are returned or items
301 from the hierarchy.
303 Returns
304 -------
305 paramNames : `set` of `str`
306 If ``topLevelOnly`` is `True`, returns any keys that are not
307 part of a hierarchy. If `False` also returns fully-qualified
308 names from the hierarchy. Keys associated with the top
309 of a hierarchy are never returned.
310 """
311 # Currently used by the verify package.
312 paramNames = set()
313 for k, v in self.items():
314 if isinstance(v, TaskMetadata):
315 if not topLevelOnly:
316 paramNames.update({k + "." + item for item in v.paramNames(topLevelOnly=topLevelOnly)})
317 else:
318 paramNames.add(k)
319 return paramNames
321 @deprecated(reason="Use standard assignment syntax. " + _DEPRECATION_REASON,
322 version=_DEPRECATION_VERSION, category=FutureWarning)
323 def set(self, key, item):
324 """Set the value of the supplied key."""
325 self.__setitem__(key, item)
327 @deprecated(reason="Use standard del dict syntax. " + _DEPRECATION_REASON,
328 version=_DEPRECATION_VERSION, category=FutureWarning)
329 def remove(self, key):
330 """Remove the item without raising if absent."""
331 try:
332 self.__delitem__(key)
333 except KeyError:
334 # The PropertySet.remove() should always work.
335 pass
337 @staticmethod
338 def _getKeys(key):
339 """Return the key hierarchy.
341 Parameters
342 ----------
343 key : `str`
344 The key to analyze. Can be dot-separated.
346 Returns
347 -------
348 keys : `list` of `str`
349 The key hierarchy that has been split on ``.``.
351 Raises
352 ------
353 KeyError
354 Raised if the key is not a string.
355 """
356 try:
357 keys = key.split('.')
358 except Exception:
359 raise KeyError(f"Invalid key '{key}': only string keys are allowed") from None
360 return keys
362 def keys(self):
363 """Return the top-level keys."""
364 return tuple(k for k in self)
366 def items(self):
367 """Yield the top-level keys and values."""
368 for k, v in itertools.chain(self.scalars.items(), self.arrays.items(), self.metadata.items()):
369 yield (k, v)
371 def __len__(self):
372 """Return the number of items."""
373 return len(self.scalars) + len(self.arrays) + len(self.metadata)
375 def __iter__(self):
376 """Return an iterator over each key."""
377 # The order of keys is not preserved since items can move
378 # from scalar to array.
379 return itertools.chain(iter(self.scalars), iter(self.arrays), iter(self.metadata))
381 def __getitem__(self, key):
382 """Retrieve the item associated with the key.
384 Parameters
385 ----------
386 key : `str`
387 The key to retrieve. Can be dot-separated hierarchical.
389 Returns
390 -------
391 value : `TaskMetadata`, `float`, `int`, `bool`, `str`
392 A scalar value. For compatibility with ``PropertySet``, if the key
393 refers to an array, the final element is returned and not the
394 array itself.
396 Raises
397 ------
398 KeyError
399 Raised if the item is not found.
400 """
401 keys = self._getKeys(key)
402 key0 = keys.pop(0)
403 if len(keys) == 0:
404 if key0 in self.scalars:
405 return self.scalars[key0]
406 if key0 in self.metadata:
407 return self.metadata[key0]
408 if key0 in self.arrays:
409 return self.arrays[key0][-1]
410 raise KeyError(f"'{key}' not found")
411 # Hierarchical lookup so the top key can only be in the metadata
412 # property. Trap KeyError and reraise so that the correct key
413 # in the hierarchy is reported.
414 try:
415 # And forward request to that metadata.
416 return self.metadata[key0][".".join(keys)]
417 except KeyError:
418 raise KeyError(f"'{key}' not found") from None
420 def __setitem__(self, key, item):
421 """Store the given item."""
422 keys = self._getKeys(key)
423 key0 = keys.pop(0)
424 if len(keys) == 0:
425 slots = {"array": self.arrays, "scalar": self.scalars, "metadata": self.metadata}
426 primary = None
427 slot_type, item = self._validate_value(item)
428 primary = slots.pop(slot_type, None)
429 if primary is None:
430 raise AssertionError(f"Unknown slot type returned from validator: {slot_type}")
432 # Assign the value to the right place.
433 primary[key0] = item
434 for property in slots.values():
435 # Remove any other entries.
436 property.pop(key0, None)
437 return
439 # This must be hierarchical so forward to the child TaskMetadata.
440 if key0 not in self.metadata:
441 self.metadata[key0] = TaskMetadata()
442 self.metadata[key0][".".join(keys)] = item
444 # Ensure we have cleared out anything with the same name elsewhere.
445 self.scalars.pop(key0, None)
446 self.arrays.pop(key0, None)
448 def __contains__(self, key):
449 """Determine if the key exists."""
450 keys = self._getKeys(key)
451 key0 = keys.pop(0)
452 if len(keys) == 0:
453 return key0 in self.scalars or key0 in self.arrays or key0 in self.metadata
455 if key0 in self.metadata:
456 return ".".join(keys) in self.metadata[key0]
457 return False
459 def __delitem__(self, key):
460 """Remove the specified item.
462 Raises
463 ------
464 KeyError
465 Raised if the item is not present.
466 """
467 keys = self._getKeys(key)
468 key0 = keys.pop(0)
469 if len(keys) == 0:
470 for property in (self.scalars, self.arrays, self.metadata):
471 if key0 in property:
472 del property[key0]
473 return
474 raise KeyError(f"'{key}' not found'")
476 try:
477 del self.metadata[key0][".".join(keys)]
478 except KeyError:
479 # Report the correct key.
480 raise KeyError(f"'{key}' not found'") from None
482 def _validate_value(self, value):
483 """Validate the given value.
485 Parameters
486 ----------
487 value : Any
488 Value to check.
490 Returns
491 -------
492 slot_type : `str`
493 The type of value given. Options are "scalar", "array", "metadata".
494 item : Any
495 The item that was given but possibly modified to conform to
496 the slot type.
498 Raises
499 ------
500 ValueError
501 Raised if the value is not a recognized type.
502 """
503 # Test the simplest option first.
504 value_type = type(value)
505 if value_type in _ALLOWED_PRIMITIVE_TYPES:
506 return "scalar", value
508 if isinstance(value, TaskMetadata):
509 return "metadata", value
510 if isinstance(value, Mapping):
511 return "metadata", self.from_dict(value)
513 if _isListLike(value):
514 # For model consistency, need to check that every item in the
515 # list has the same type.
516 value = list(value)
518 type0 = type(value[0])
519 for i in value:
520 if type(i) != type0:
521 raise ValueError("Type mismatch in supplied list. TaskMetadata requires all"
522 f" elements have same type but see {type(i)} and {type0}.")
524 if type0 not in _ALLOWED_PRIMITIVE_TYPES:
525 # Must check to see if we got numpy floats or something.
526 if isinstance(value[0], numbers.Integral):
527 type_cast = int
528 elif isinstance(value[0], numbers.Real):
529 type_cast = float
530 else:
531 raise ValueError(f"Supplied list has element of type '{type0}'. "
532 "TaskMetadata can only accept primitive types in lists.")
534 value = [type_cast(v) for v in value]
536 return "array", value
538 # Sometimes a numpy number is given.
539 if isinstance(value, numbers.Integral):
540 value = int(value)
541 return "scalar", value
542 if isinstance(value, numbers.Real):
543 value = float(value)
544 return "scalar", value
546 raise ValueError(f"TaskMetadata does not support values of type {value!r}.")
549# Needed because a TaskMetadata can contain a TaskMetadata.
550TaskMetadata.update_forward_refs()