Coverage for python/lsst/pipe/base/_task_metadata.py: 15%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["TaskMetadata"]
24import numbers
25import itertools
26import warnings
27from collections.abc import Sequence
28from deprecated.sphinx import deprecated
30from typing import Dict, List, Union, Any, Mapping
31from pydantic import BaseModel, StrictInt, StrictFloat, StrictBool, StrictStr, Field
33_DEPRECATION_REASON = "Will be removed after v25."
34_DEPRECATION_VERSION = "v24"
36# The types allowed in a Task metadata field are restricted
37# to allow predictable serialization.
38_ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool)
41def _isListLike(v):
42 return isinstance(v, Sequence) and not isinstance(v, str)
45class TaskMetadata(BaseModel):
46 """Dict-like object for storing task metadata.
48 Metadata can be stored at two levels: single task or task plus subtasks.
49 The later is called full metadata of a task and has a form
51 topLevelTaskName:subtaskName:subsubtaskName.itemName
53 Metadata item key of a task (`itemName` above) must not contain `.`,
54 which serves as a separator in full metadata keys and turns
55 the value into sub-dictionary. Arbitrary hierarchies are supported.
57 Deprecated methods are for compatibility with
58 the predecessor containers.
59 """
61 scalars: Dict[str, Union[StrictFloat, StrictInt, StrictBool, StrictStr]] = Field(default_factory=dict)
62 arrays: Dict[str, Union[List[StrictFloat], List[StrictInt], List[StrictBool],
63 List[StrictStr]]] = Field(default_factory=dict)
64 metadata: Dict[str, "TaskMetadata"] = Field(default_factory=dict)
66 @classmethod
67 def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata":
68 """Create a TaskMetadata from a dictionary.
70 Parameters
71 ----------
72 d : `Mapping`
73 Mapping to convert. Can be hierarchical. Any dictionaries
74 in the hierarchy are converted to `TaskMetadata`.
76 Returns
77 -------
78 meta : `TaskMetadata`
79 Newly-constructed metadata.
80 """
81 metadata = cls()
82 for k, v in d.items():
83 metadata[k] = v
84 return metadata
86 def add(self, name, value):
87 """Store a new value, adding to a list if one already exists.
89 Parameters
90 ----------
91 name : `str`
92 Name of the metadata property.
93 value
94 Metadata property value.
95 """
96 keys = self._getKeys(name)
97 key0 = keys.pop(0)
98 if len(keys) == 0:
100 # If add() is being used, always store the value in the arrays
101 # property as a list. It's likely there will be another call.
102 slot_type, value = self._validate_value(value)
103 if slot_type == "array":
104 pass
105 elif slot_type == "scalar":
106 value = [value]
107 else:
108 raise ValueError("add() can only be used for primitive types or sequences of those types.")
110 if key0 in self.metadata:
111 raise ValueError(f"Can not add() to key '{name}' since that is a TaskMetadata")
113 if key0 in self.scalars:
114 # Convert scalar to array.
115 self.arrays[key0] = [self.scalars.pop(key0)]
117 if key0 in self.arrays:
118 # Check that the type is not changing.
119 if (curtype := type(self.arrays[key0][0])) is not (newtype := type(value[0])):
120 raise ValueError(f"Type mismatch in add() -- currently {curtype} but adding {newtype}")
121 self.arrays[key0].extend(value)
122 else:
123 self.arrays[key0] = value
125 return
127 self.metadata[key0].add(".".join(keys), value)
129 @deprecated(reason="Cast the return value to float explicitly. " + _DEPRECATION_REASON,
130 version=_DEPRECATION_VERSION, category=FutureWarning)
131 def getAsDouble(self, key):
132 """Return the value cast to a `float`.
134 Parameters
135 ----------
136 key : `str`
137 Item to return. Can be dot-separated hierarchical.
139 Returns
140 -------
141 value : `float`
142 The value cast to a `float`.
144 Raises
145 ------
146 KeyError
147 Raised if the item is not found.
148 """
149 return float(self.__getitem__(key))
151 def getScalar(self, key):
152 """Retrieve a scalar item even if the item is a list.
154 Parameters
155 ----------
156 key : `str`
157 Item to retrieve.
159 Returns
160 -------
161 value : Any
162 Either the value associated with the key or, if the key
163 corresponds to a list, the last item in the list.
165 Raises
166 ------
167 KeyError
168 Raised if the item is not found.
169 """
170 # Used in pipe_tasks.
171 # getScalar() is the default behavior for __getitem__.
172 return self[key]
174 def getArray(self, key):
175 """Retrieve an item as a list even if it is a scalar.
177 Parameters
178 ----------
179 key : `str`
180 Item to retrieve.
182 Returns
183 -------
184 values : `list` of any
185 A list containing the value or values associated with this item.
187 Raises
188 ------
189 KeyError
190 Raised if the item is not found.
191 """
192 keys = self._getKeys(key)
193 key0 = keys.pop(0)
194 if len(keys) == 0:
195 if key0 in self.arrays:
196 return self.arrays[key0]
197 elif key0 in self.scalars:
198 return [self.scalars[key0]]
199 elif key0 in self.metadata:
200 return [self.metadata[key0]]
201 raise KeyError(f"'{key}' not found")
203 try:
204 return self.metadata[key0].getArray(".".join(keys))
205 except KeyError:
206 # Report the correct key.
207 raise KeyError(f"'{key}' not found") from None
209 def names(self, topLevelOnly: bool = True):
210 """Return the hierarchical keys from the metadata.
212 Parameters
213 ----------
214 topLevelOnly : `bool`
215 If true, return top-level keys, otherwise full metadata item keys.
217 Returns
218 -------
219 names : `collection.abc.Set`
220 A set of top-level keys or full metadata item keys, including
221 the top-level keys.
223 Notes
224 -----
225 Should never be called in new code with ``topLevelOnly`` set to `True`
226 -- this is equivalent to asking for the keys and is the default
227 when iterating through the task metadata. In this case a deprecation
228 message will be issued and the ability will raise an exception
229 in a future release.
231 When ``topLevelOnly`` is `False` all keys, including those from the
232 hierarchy and the top-level hierarchy, are returned.
233 """
234 if topLevelOnly:
235 warnings.warn("Use keys() instead. " + _DEPRECATION_REASON, FutureWarning)
236 return set(self.keys())
237 else:
238 names = set()
239 for k, v in self.items():
240 names.add(k) # Always include the current level
241 if isinstance(v, TaskMetadata):
242 names.update({k + '.' + item for item in v.names(topLevelOnly=topLevelOnly)})
243 return names
245 def paramNames(self, topLevelOnly):
246 """Return hierarchical names.
248 Parameters
249 ----------
250 topLevelOnly : `bool`
251 Control whether only top-level items are returned or items
252 from the hierarchy.
254 Returns
255 -------
256 paramNames : `set` of `str`
257 If ``topLevelOnly`` is `True`, returns any keys that are not
258 part of a hierarchy. If `False` also returns fully-qualified
259 names from the hierarchy. Keys associated with the top
260 of a hierarchy are never returned.
261 """
262 # Currently used by the verify package.
263 paramNames = set()
264 for k, v in self.items():
265 if isinstance(v, TaskMetadata):
266 if not topLevelOnly:
267 paramNames.update({k + "." + item for item in v.paramNames(topLevelOnly=topLevelOnly)})
268 else:
269 paramNames.add(k)
270 return paramNames
272 @deprecated(reason="Use standard assignment syntax. " + _DEPRECATION_REASON,
273 version=_DEPRECATION_VERSION, category=FutureWarning)
274 def set(self, key, item):
275 """Set the value of the supplied key."""
276 self.__setitem__(key, item)
278 @deprecated(reason="Use standard del dict syntax. " + _DEPRECATION_REASON,
279 version=_DEPRECATION_VERSION, category=FutureWarning)
280 def remove(self, key):
281 """Remove the item without raising if absent."""
282 try:
283 self.__delitem__(key)
284 except KeyError:
285 # The PropertySet.remove() should always work.
286 pass
288 @staticmethod
289 def _getKeys(key):
290 """Return the key hierarchy.
292 Parameters
293 ----------
294 key : `str`
295 The key to analyze. Can be dot-separated.
297 Returns
298 -------
299 keys : `list` of `str`
300 The key hierarchy that has been split on ``.``.
302 Raises
303 ------
304 KeyError
305 Raised if the key is not a string.
306 """
307 try:
308 keys = key.split('.')
309 except Exception:
310 raise KeyError(f"Invalid key '{key}': only string keys are allowed") from None
311 return keys
313 def keys(self):
314 """Return the top-level keys."""
315 return tuple(k for k in self)
317 def items(self):
318 """Yield the top-level keys and values."""
319 for k, v in itertools.chain(self.scalars.items(), self.arrays.items(), self.metadata.items()):
320 yield (k, v)
322 def __len__(self):
323 """Return the number of items."""
324 return len(self.scalars) + len(self.arrays) + len(self.metadata)
326 def __iter__(self):
327 """Return an iterator over each key."""
328 # The order of keys is not preserved since items can move
329 # from scalar to array.
330 return itertools.chain(iter(self.scalars), iter(self.arrays), iter(self.metadata))
332 def __getitem__(self, key):
333 """Retrieve the item associated with the key.
335 Parameters
336 ----------
337 key : `str`
338 The key to retrieve. Can be dot-separated hierarchical.
340 Returns
341 -------
342 value : `TaskMetadata`, `float`, `int`, `bool`, `str`
343 A scalar value. For compatibility with ``PropertySet``, if the key
344 refers to an array, the final element is returned and not the
345 array itself.
347 Raises
348 ------
349 KeyError
350 Raised if the item is not found.
351 """
352 keys = self._getKeys(key)
353 key0 = keys.pop(0)
354 if len(keys) == 0:
355 if key0 in self.scalars:
356 return self.scalars[key0]
357 if key0 in self.metadata:
358 return self.metadata[key0]
359 if key0 in self.arrays:
360 return self.arrays[key0][-1]
361 raise KeyError(f"'{key}' not found")
362 # Hierarchical lookup so the top key can only be in the metadata
363 # property. Trap KeyError and reraise so that the correct key
364 # in the hierarchy is reported.
365 try:
366 # And forward request to that metadata.
367 return self.metadata[key0][".".join(keys)]
368 except KeyError:
369 raise KeyError(f"'{key}' not found") from None
371 def __setitem__(self, key, item):
372 """Store the given item."""
373 keys = self._getKeys(key)
374 key0 = keys.pop(0)
375 if len(keys) == 0:
376 slots = {"array": self.arrays, "scalar": self.scalars, "metadata": self.metadata}
377 primary = None
378 slot_type, item = self._validate_value(item)
379 primary = slots.pop(slot_type, None)
380 if primary is None:
381 raise AssertionError(f"Unknown slot type returned from validator: {slot_type}")
383 # Assign the value to the right place.
384 primary[key0] = item
385 for property in slots.values():
386 # Remove any other entries.
387 property.pop(key0, None)
388 return
390 # This must be hierarchical so forward to the child TaskMetadata.
391 if key0 not in self.metadata:
392 self.metadata[key0] = TaskMetadata()
393 self.metadata[key0][".".join(keys)] = item
395 # Ensure we have cleared out anything with the same name elsewhere.
396 self.scalars.pop(key0, None)
397 self.arrays.pop(key0, None)
399 def __contains__(self, key):
400 """Determine if the key exists."""
401 keys = self._getKeys(key)
402 key0 = keys.pop(0)
403 if len(keys) == 0:
404 return key0 in self.scalars or key0 in self.arrays or key0 in self.metadata
406 if key0 in self.metadata:
407 return ".".join(keys) in self.metadata[key0]
408 return False
410 def __delitem__(self, key):
411 """Remove the specified item.
413 Raises
414 ------
415 KeyError
416 Raised if the item is not present.
417 """
418 keys = self._getKeys(key)
419 key0 = keys.pop(0)
420 if len(keys) == 0:
421 for property in (self.scalars, self.arrays, self.metadata):
422 if key0 in property:
423 del property[key0]
424 return
425 raise KeyError(f"'{key}' not found'")
427 try:
428 del self.metadata[key0][".".join(keys)]
429 except KeyError:
430 # Report the correct key.
431 raise KeyError(f"'{key}' not found'") from None
433 def _validate_value(self, value):
434 """Validate the given value.
436 Parameters
437 ----------
438 value : Any
439 Value to check.
441 Returns
442 -------
443 slot_type : `str`
444 The type of value given. Options are "scalar", "array", "metadata".
445 item : Any
446 The item that was given but possibly modified to conform to
447 the slot type.
449 Raises
450 ------
451 ValueError
452 Raised if the value is not a recognized type.
453 """
454 # Test the simplest option first.
455 value_type = type(value)
456 if value_type in _ALLOWED_PRIMITIVE_TYPES:
457 return "scalar", value
459 if isinstance(value, TaskMetadata):
460 return "metadata", value
461 if isinstance(value, Mapping):
462 return "metadata", self.from_dict(value)
464 if _isListLike(value):
465 # For model consistency, need to check that every item in the
466 # list has the same type.
467 value = list(value)
469 type0 = type(value[0])
470 for i in value:
471 if type(i) != type0:
472 raise ValueError("Type mismatch in supplied list. TaskMetadata requires all"
473 f" elements have same type but see {type(i)} and {type0}.")
475 if type0 not in _ALLOWED_PRIMITIVE_TYPES:
476 # Must check to see if we got numpy floats or something.
477 if isinstance(value[0], numbers.Integral):
478 type_cast = int
479 elif isinstance(value[0], numbers.Real):
480 type_cast = float
481 else:
482 raise ValueError(f"Supplied list has element of type '{type0}'. "
483 "TaskMetadata can only accept primitive types in lists.")
485 value = [type_cast(v) for v in value]
487 return "array", value
489 # Sometimes a numpy number is given.
490 if isinstance(value, numbers.Integral):
491 value = int(value)
492 return "scalar", value
493 if isinstance(value, numbers.Real):
494 value = float(value)
495 return "scalar", value
497 raise ValueError(f"TaskMetadata does not support values of type {value!r}.")
500# Needed because a TaskMetadata can contain a TaskMetadata.
501TaskMetadata.update_forward_refs()