Coverage for python/lsst/pipe/base/_task_metadata.py: 15%

205 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-23 10:31 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["TaskMetadata"] 

23 

24import itertools 

25import numbers 

26import warnings 

27from collections.abc import Collection, Iterator, Mapping, Sequence 

28from typing import Any, Protocol 

29 

30from lsst.daf.butler._compat import _BaseModelCompat 

31from lsst.utils.introspection import find_outside_stacklevel 

32from pydantic import Field, StrictBool, StrictFloat, StrictInt, StrictStr 

33 

34# The types allowed in a Task metadata field are restricted 

35# to allow predictable serialization. 

36_ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool) 

37 

38 

39class PropertySetLike(Protocol): 

40 """Protocol that looks like a ``lsst.daf.base.PropertySet`` 

41 

42 Enough of the API is specified to support conversion of a 

43 ``PropertySet`` to a `TaskMetadata`. 

44 """ 

45 

46 def paramNames(self, topLevelOnly: bool = True) -> Collection[str]: 

47 ... 

48 

49 def getArray(self, name: str) -> Any: 

50 ... 

51 

52 

53def _isListLike(v: Any) -> bool: 

54 return isinstance(v, Sequence) and not isinstance(v, str) 

55 

56 

57class TaskMetadata(_BaseModelCompat): 

58 """Dict-like object for storing task metadata. 

59 

60 Metadata can be stored at two levels: single task or task plus subtasks. 

61 The later is called full metadata of a task and has a form 

62 

63 topLevelTaskName:subtaskName:subsubtaskName.itemName 

64 

65 Metadata item key of a task (`itemName` above) must not contain `.`, 

66 which serves as a separator in full metadata keys and turns 

67 the value into sub-dictionary. Arbitrary hierarchies are supported. 

68 """ 

69 

70 scalars: dict[str, StrictFloat | StrictInt | StrictBool | StrictStr] = Field(default_factory=dict) 

71 arrays: dict[str, list[StrictFloat] | list[StrictInt] | list[StrictBool] | list[StrictStr]] = Field( 

72 default_factory=dict 

73 ) 

74 metadata: dict[str, "TaskMetadata"] = Field(default_factory=dict) 

75 

76 @classmethod 

77 def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata": 

78 """Create a TaskMetadata from a dictionary. 

79 

80 Parameters 

81 ---------- 

82 d : `~collections.abc.Mapping` 

83 Mapping to convert. Can be hierarchical. Any dictionaries 

84 in the hierarchy are converted to `TaskMetadata`. 

85 

86 Returns 

87 ------- 

88 meta : `TaskMetadata` 

89 Newly-constructed metadata. 

90 """ 

91 metadata = cls() 

92 for k, v in d.items(): 

93 metadata[k] = v 

94 return metadata 

95 

96 @classmethod 

97 def from_metadata(cls, ps: PropertySetLike) -> "TaskMetadata": 

98 """Create a TaskMetadata from a PropertySet-like object. 

99 

100 Parameters 

101 ---------- 

102 ps : `PropertySetLike` or `TaskMetadata` 

103 A ``PropertySet``-like object to be transformed to a 

104 `TaskMetadata`. A `TaskMetadata` can be copied using this 

105 class method. 

106 

107 Returns 

108 ------- 

109 tm : `TaskMetadata` 

110 Newly-constructed metadata. 

111 

112 Notes 

113 ----- 

114 Items stored in single-element arrays in the supplied object 

115 will be converted to scalars in the newly-created object. 

116 """ 

117 # Use hierarchical names to assign values from input to output. 

118 # This API exists for both PropertySet and TaskMetadata. 

119 # from_dict() does not work because PropertySet is not declared 

120 # to be a Mapping. 

121 # PropertySet.toDict() is not present in TaskMetadata so is best 

122 # avoided. 

123 metadata = cls() 

124 for key in sorted(ps.paramNames(topLevelOnly=False)): 

125 value = ps.getArray(key) 

126 if len(value) == 1: 

127 value = value[0] 

128 metadata[key] = value 

129 return metadata 

130 

131 def to_dict(self) -> dict[str, Any]: 

132 """Convert the class to a simple dictionary. 

133 

134 Returns 

135 ------- 

136 d : `dict` 

137 Simple dictionary that can contain scalar values, array values 

138 or other dictionary values. 

139 

140 Notes 

141 ----- 

142 Unlike `dict()`, this method hides the model layout and combines 

143 scalars, arrays, and other metadata in the same dictionary. Can be 

144 used when a simple dictionary is needed. Use 

145 `TaskMetadata.from_dict()` to convert it back. 

146 """ 

147 d: dict[str, Any] = {} 

148 d.update(self.scalars) 

149 d.update(self.arrays) 

150 for k, v in self.metadata.items(): 

151 d[k] = v.to_dict() 

152 return d 

153 

154 def add(self, name: str, value: Any) -> None: 

155 """Store a new value, adding to a list if one already exists. 

156 

157 Parameters 

158 ---------- 

159 name : `str` 

160 Name of the metadata property. 

161 value 

162 Metadata property value. 

163 """ 

164 keys = self._getKeys(name) 

165 key0 = keys.pop(0) 

166 if len(keys) == 0: 

167 # If add() is being used, always store the value in the arrays 

168 # property as a list. It's likely there will be another call. 

169 slot_type, value = self._validate_value(value) 

170 if slot_type == "array": 

171 pass 

172 elif slot_type == "scalar": 

173 value = [value] 

174 else: 

175 raise ValueError("add() can only be used for primitive types or sequences of those types.") 

176 

177 if key0 in self.metadata: 

178 raise ValueError(f"Can not add() to key '{name}' since that is a TaskMetadata") 

179 

180 if key0 in self.scalars: 

181 # Convert scalar to array. 

182 # MyPy should be able to figure out that List[Union[T1, T2]] is 

183 # compatible with Union[List[T1], List[T2]] if the list has 

184 # only one element, but it can't. 

185 self.arrays[key0] = [self.scalars.pop(key0)] # type: ignore 

186 

187 if key0 in self.arrays: 

188 # Check that the type is not changing. 

189 if (curtype := type(self.arrays[key0][0])) is not (newtype := type(value[0])): 

190 raise ValueError(f"Type mismatch in add() -- currently {curtype} but adding {newtype}") 

191 self.arrays[key0].extend(value) 

192 else: 

193 self.arrays[key0] = value 

194 

195 return 

196 

197 self.metadata[key0].add(".".join(keys), value) 

198 

199 def getScalar(self, key: str) -> str | int | float | bool: 

200 """Retrieve a scalar item even if the item is a list. 

201 

202 Parameters 

203 ---------- 

204 key : `str` 

205 Item to retrieve. 

206 

207 Returns 

208 ------- 

209 value : `str`, `int`, `float`, or `bool` 

210 Either the value associated with the key or, if the key 

211 corresponds to a list, the last item in the list. 

212 

213 Raises 

214 ------ 

215 KeyError 

216 Raised if the item is not found. 

217 """ 

218 # Used in pipe_tasks. 

219 # getScalar() is the default behavior for __getitem__. 

220 return self[key] 

221 

222 def getArray(self, key: str) -> list[Any]: 

223 """Retrieve an item as a list even if it is a scalar. 

224 

225 Parameters 

226 ---------- 

227 key : `str` 

228 Item to retrieve. 

229 

230 Returns 

231 ------- 

232 values : `list` of any 

233 A list containing the value or values associated with this item. 

234 

235 Raises 

236 ------ 

237 KeyError 

238 Raised if the item is not found. 

239 """ 

240 keys = self._getKeys(key) 

241 key0 = keys.pop(0) 

242 if len(keys) == 0: 

243 if key0 in self.arrays: 

244 return self.arrays[key0] 

245 elif key0 in self.scalars: 

246 return [self.scalars[key0]] 

247 elif key0 in self.metadata: 

248 return [self.metadata[key0]] 

249 raise KeyError(f"'{key}' not found") 

250 

251 try: 

252 return self.metadata[key0].getArray(".".join(keys)) 

253 except KeyError: 

254 # Report the correct key. 

255 raise KeyError(f"'{key}' not found") from None 

256 

257 def names(self, topLevelOnly: bool | None = None) -> set[str]: 

258 """Return the hierarchical keys from the metadata. 

259 

260 Parameters 

261 ---------- 

262 topLevelOnly : `bool` or `None`, optional 

263 This parameter is deprecated and will be removed in the future. 

264 If given it can only be `False`. All names in the hierarchy are 

265 always returned. 

266 

267 Returns 

268 ------- 

269 names : `collections.abc.Set` 

270 A set of all keys, including those from the hierarchy and the 

271 top-level hierarchy. 

272 """ 

273 if topLevelOnly: 

274 raise RuntimeError( 

275 "The topLevelOnly parameter is no longer supported and can not have a True value." 

276 ) 

277 

278 if topLevelOnly is False: 

279 warnings.warn( 

280 "The topLevelOnly parameter is deprecated and is always assumed to be False." 

281 " It will be removed completely after v26.", 

282 category=FutureWarning, 

283 stacklevel=find_outside_stacklevel("lsst.pipe.base"), 

284 ) 

285 

286 names = set() 

287 for k, v in self.items(): 

288 names.add(k) # Always include the current level 

289 if isinstance(v, TaskMetadata): 

290 names.update({k + "." + item for item in v.names()}) 

291 return names 

292 

293 def paramNames(self, topLevelOnly: bool) -> set[str]: 

294 """Return hierarchical names. 

295 

296 Parameters 

297 ---------- 

298 topLevelOnly : `bool` 

299 Control whether only top-level items are returned or items 

300 from the hierarchy. 

301 

302 Returns 

303 ------- 

304 paramNames : `set` of `str` 

305 If ``topLevelOnly`` is `True`, returns any keys that are not 

306 part of a hierarchy. If `False` also returns fully-qualified 

307 names from the hierarchy. Keys associated with the top 

308 of a hierarchy are never returned. 

309 """ 

310 # Currently used by the verify package. 

311 paramNames = set() 

312 for k, v in self.items(): 

313 if isinstance(v, TaskMetadata): 

314 if not topLevelOnly: 

315 paramNames.update({k + "." + item for item in v.paramNames(topLevelOnly=topLevelOnly)}) 

316 else: 

317 paramNames.add(k) 

318 return paramNames 

319 

320 @staticmethod 

321 def _getKeys(key: str) -> list[str]: 

322 """Return the key hierarchy. 

323 

324 Parameters 

325 ---------- 

326 key : `str` 

327 The key to analyze. Can be dot-separated. 

328 

329 Returns 

330 ------- 

331 keys : `list` of `str` 

332 The key hierarchy that has been split on ``.``. 

333 

334 Raises 

335 ------ 

336 KeyError 

337 Raised if the key is not a string. 

338 """ 

339 try: 

340 keys = key.split(".") 

341 except Exception: 

342 raise KeyError(f"Invalid key '{key}': only string keys are allowed") from None 

343 return keys 

344 

345 def keys(self) -> tuple[str, ...]: 

346 """Return the top-level keys.""" 

347 return tuple(k for k in self) 

348 

349 def items(self) -> Iterator[tuple[str, Any]]: 

350 """Yield the top-level keys and values.""" 

351 yield from itertools.chain(self.scalars.items(), self.arrays.items(), self.metadata.items()) 

352 

353 def __len__(self) -> int: 

354 """Return the number of items.""" 

355 return len(self.scalars) + len(self.arrays) + len(self.metadata) 

356 

357 # This is actually a Liskov substitution violation, because 

358 # pydantic.BaseModel says __iter__ should return something else. But the 

359 # pydantic docs say to do exactly this to in order to make a mapping-like 

360 # BaseModel, so that's what we do. 

361 def __iter__(self) -> Iterator[str]: # type: ignore 

362 """Return an iterator over each key.""" 

363 # The order of keys is not preserved since items can move 

364 # from scalar to array. 

365 return itertools.chain(iter(self.scalars), iter(self.arrays), iter(self.metadata)) 

366 

367 def __getitem__(self, key: str) -> Any: 

368 """Retrieve the item associated with the key. 

369 

370 Parameters 

371 ---------- 

372 key : `str` 

373 The key to retrieve. Can be dot-separated hierarchical. 

374 

375 Returns 

376 ------- 

377 value : `TaskMetadata`, `float`, `int`, `bool`, `str` 

378 A scalar value. For compatibility with ``PropertySet``, if the key 

379 refers to an array, the final element is returned and not the 

380 array itself. 

381 

382 Raises 

383 ------ 

384 KeyError 

385 Raised if the item is not found. 

386 """ 

387 keys = self._getKeys(key) 

388 key0 = keys.pop(0) 

389 if len(keys) == 0: 

390 if key0 in self.scalars: 

391 return self.scalars[key0] 

392 if key0 in self.metadata: 

393 return self.metadata[key0] 

394 if key0 in self.arrays: 

395 return self.arrays[key0][-1] 

396 raise KeyError(f"'{key}' not found") 

397 # Hierarchical lookup so the top key can only be in the metadata 

398 # property. Trap KeyError and reraise so that the correct key 

399 # in the hierarchy is reported. 

400 try: 

401 # And forward request to that metadata. 

402 return self.metadata[key0][".".join(keys)] 

403 except KeyError: 

404 raise KeyError(f"'{key}' not found") from None 

405 

406 def get(self, key: str, default: Any = None) -> Any: 

407 """Retrieve the item associated with the key or a default. 

408 

409 Parameters 

410 ---------- 

411 key : `str` 

412 The key to retrieve. Can be dot-separated hierarchical. 

413 default 

414 The value to return if the key does not exist. 

415 

416 Returns 

417 ------- 

418 value : `TaskMetadata`, `float`, `int`, `bool`, `str` 

419 A scalar value. If the key refers to an array, the final element 

420 is returned and not the array itself; this is consistent with 

421 `__getitem__` and `PropertySet.get`, but not ``to_dict().get``. 

422 """ 

423 try: 

424 return self[key] 

425 except KeyError: 

426 return default 

427 

428 def __setitem__(self, key: str, item: Any) -> None: 

429 """Store the given item.""" 

430 keys = self._getKeys(key) 

431 key0 = keys.pop(0) 

432 if len(keys) == 0: 

433 slots: dict[str, dict[str, Any]] = { 

434 "array": self.arrays, 

435 "scalar": self.scalars, 

436 "metadata": self.metadata, 

437 } 

438 primary: dict[str, Any] | None = None 

439 slot_type, item = self._validate_value(item) 

440 primary = slots.pop(slot_type, None) 

441 if primary is None: 

442 raise AssertionError(f"Unknown slot type returned from validator: {slot_type}") 

443 

444 # Assign the value to the right place. 

445 primary[key0] = item 

446 for property in slots.values(): 

447 # Remove any other entries. 

448 property.pop(key0, None) 

449 return 

450 

451 # This must be hierarchical so forward to the child TaskMetadata. 

452 if key0 not in self.metadata: 

453 self.metadata[key0] = TaskMetadata() 

454 self.metadata[key0][".".join(keys)] = item 

455 

456 # Ensure we have cleared out anything with the same name elsewhere. 

457 self.scalars.pop(key0, None) 

458 self.arrays.pop(key0, None) 

459 

460 def __contains__(self, key: str) -> bool: 

461 """Determine if the key exists.""" 

462 keys = self._getKeys(key) 

463 key0 = keys.pop(0) 

464 if len(keys) == 0: 

465 return key0 in self.scalars or key0 in self.arrays or key0 in self.metadata 

466 

467 if key0 in self.metadata: 

468 return ".".join(keys) in self.metadata[key0] 

469 return False 

470 

471 def __delitem__(self, key: str) -> None: 

472 """Remove the specified item. 

473 

474 Raises 

475 ------ 

476 KeyError 

477 Raised if the item is not present. 

478 """ 

479 keys = self._getKeys(key) 

480 key0 = keys.pop(0) 

481 if len(keys) == 0: 

482 # MyPy can't figure out that this way to combine the types in the 

483 # tuple is the one that matters, and annotating a local variable 

484 # helps it out. 

485 properties: tuple[dict[str, Any], ...] = (self.scalars, self.arrays, self.metadata) 

486 for property in properties: 

487 if key0 in property: 

488 del property[key0] 

489 return 

490 raise KeyError(f"'{key}' not found'") 

491 

492 try: 

493 del self.metadata[key0][".".join(keys)] 

494 except KeyError: 

495 # Report the correct key. 

496 raise KeyError(f"'{key}' not found'") from None 

497 

498 def _validate_value(self, value: Any) -> tuple[str, Any]: 

499 """Validate the given value. 

500 

501 Parameters 

502 ---------- 

503 value : Any 

504 Value to check. 

505 

506 Returns 

507 ------- 

508 slot_type : `str` 

509 The type of value given. Options are "scalar", "array", "metadata". 

510 item : Any 

511 The item that was given but possibly modified to conform to 

512 the slot type. 

513 

514 Raises 

515 ------ 

516 ValueError 

517 Raised if the value is not a recognized type. 

518 """ 

519 # Test the simplest option first. 

520 value_type = type(value) 

521 if value_type in _ALLOWED_PRIMITIVE_TYPES: 

522 return "scalar", value 

523 

524 if isinstance(value, TaskMetadata): 

525 return "metadata", value 

526 if isinstance(value, Mapping): 

527 return "metadata", self.from_dict(value) 

528 

529 if _isListLike(value): 

530 # For model consistency, need to check that every item in the 

531 # list has the same type. 

532 value = list(value) 

533 

534 type0 = type(value[0]) 

535 for i in value: 

536 if type(i) != type0: 

537 raise ValueError( 

538 "Type mismatch in supplied list. TaskMetadata requires all" 

539 f" elements have same type but see {type(i)} and {type0}." 

540 ) 

541 

542 if type0 not in _ALLOWED_PRIMITIVE_TYPES: 

543 # Must check to see if we got numpy floats or something. 

544 type_cast: type 

545 if isinstance(value[0], numbers.Integral): 

546 type_cast = int 

547 elif isinstance(value[0], numbers.Real): 

548 type_cast = float 

549 else: 

550 raise ValueError( 

551 f"Supplied list has element of type '{type0}'. " 

552 "TaskMetadata can only accept primitive types in lists." 

553 ) 

554 

555 value = [type_cast(v) for v in value] 

556 

557 return "array", value 

558 

559 # Sometimes a numpy number is given. 

560 if isinstance(value, numbers.Integral): 

561 value = int(value) 

562 return "scalar", value 

563 if isinstance(value, numbers.Real): 

564 value = float(value) 

565 return "scalar", value 

566 

567 raise ValueError(f"TaskMetadata does not support values of type {value!r}.") 

568 

569 

570# Needed because a TaskMetadata can contain a TaskMetadata. 

571TaskMetadata.model_rebuild()