Coverage for python/lsst/pipe/base/_task_metadata.py: 14%

205 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-11 02:00 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["TaskMetadata"] 

23 

24import itertools 

25import numbers 

26import warnings 

27from collections.abc import Collection, Iterator, Mapping, Sequence 

28from typing import Any, Protocol 

29 

30from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr 

31 

32_DEPRECATION_REASON = "Will be removed after v25." 

33_DEPRECATION_VERSION = "v24" 

34 

35# The types allowed in a Task metadata field are restricted 

36# to allow predictable serialization. 

37_ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool) 

38 

39 

40class PropertySetLike(Protocol): 

41 """Protocol that looks like a ``lsst.daf.base.PropertySet`` 

42 

43 Enough of the API is specified to support conversion of a 

44 ``PropertySet`` to a `TaskMetadata`. 

45 """ 

46 

47 def paramNames(self, topLevelOnly: bool = True) -> Collection[str]: 

48 ... 

49 

50 def getArray(self, name: str) -> Any: 

51 ... 

52 

53 

54def _isListLike(v: Any) -> bool: 

55 return isinstance(v, Sequence) and not isinstance(v, str) 

56 

57 

58class TaskMetadata(BaseModel): 

59 """Dict-like object for storing task metadata. 

60 

61 Metadata can be stored at two levels: single task or task plus subtasks. 

62 The later is called full metadata of a task and has a form 

63 

64 topLevelTaskName:subtaskName:subsubtaskName.itemName 

65 

66 Metadata item key of a task (`itemName` above) must not contain `.`, 

67 which serves as a separator in full metadata keys and turns 

68 the value into sub-dictionary. Arbitrary hierarchies are supported. 

69 """ 

70 

71 scalars: dict[str, StrictFloat | StrictInt | StrictBool | StrictStr] = Field(default_factory=dict) 

72 arrays: dict[str, list[StrictFloat] | list[StrictInt] | list[StrictBool] | list[StrictStr]] = Field( 

73 default_factory=dict 

74 ) 

75 metadata: dict[str, "TaskMetadata"] = Field(default_factory=dict) 

76 

77 @classmethod 

78 def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata": 

79 """Create a TaskMetadata from a dictionary. 

80 

81 Parameters 

82 ---------- 

83 d : `~collections.abc.Mapping` 

84 Mapping to convert. Can be hierarchical. Any dictionaries 

85 in the hierarchy are converted to `TaskMetadata`. 

86 

87 Returns 

88 ------- 

89 meta : `TaskMetadata` 

90 Newly-constructed metadata. 

91 """ 

92 metadata = cls() 

93 for k, v in d.items(): 

94 metadata[k] = v 

95 return metadata 

96 

97 @classmethod 

98 def from_metadata(cls, ps: PropertySetLike) -> "TaskMetadata": 

99 """Create a TaskMetadata from a PropertySet-like object. 

100 

101 Parameters 

102 ---------- 

103 ps : `PropertySetLike` or `TaskMetadata` 

104 A ``PropertySet``-like object to be transformed to a 

105 `TaskMetadata`. A `TaskMetadata` can be copied using this 

106 class method. 

107 

108 Returns 

109 ------- 

110 tm : `TaskMetadata` 

111 Newly-constructed metadata. 

112 

113 Notes 

114 ----- 

115 Items stored in single-element arrays in the supplied object 

116 will be converted to scalars in the newly-created object. 

117 """ 

118 # Use hierarchical names to assign values from input to output. 

119 # This API exists for both PropertySet and TaskMetadata. 

120 # from_dict() does not work because PropertySet is not declared 

121 # to be a Mapping. 

122 # PropertySet.toDict() is not present in TaskMetadata so is best 

123 # avoided. 

124 metadata = cls() 

125 for key in sorted(ps.paramNames(topLevelOnly=False)): 

126 value = ps.getArray(key) 

127 if len(value) == 1: 

128 value = value[0] 

129 metadata[key] = value 

130 return metadata 

131 

132 def to_dict(self) -> dict[str, Any]: 

133 """Convert the class to a simple dictionary. 

134 

135 Returns 

136 ------- 

137 d : `dict` 

138 Simple dictionary that can contain scalar values, array values 

139 or other dictionary values. 

140 

141 Notes 

142 ----- 

143 Unlike `dict()`, this method hides the model layout and combines 

144 scalars, arrays, and other metadata in the same dictionary. Can be 

145 used when a simple dictionary is needed. Use 

146 `TaskMetadata.from_dict()` to convert it back. 

147 """ 

148 d: dict[str, Any] = {} 

149 d.update(self.scalars) 

150 d.update(self.arrays) 

151 for k, v in self.metadata.items(): 

152 d[k] = v.to_dict() 

153 return d 

154 

155 def add(self, name: str, value: Any) -> None: 

156 """Store a new value, adding to a list if one already exists. 

157 

158 Parameters 

159 ---------- 

160 name : `str` 

161 Name of the metadata property. 

162 value 

163 Metadata property value. 

164 """ 

165 keys = self._getKeys(name) 

166 key0 = keys.pop(0) 

167 if len(keys) == 0: 

168 # If add() is being used, always store the value in the arrays 

169 # property as a list. It's likely there will be another call. 

170 slot_type, value = self._validate_value(value) 

171 if slot_type == "array": 

172 pass 

173 elif slot_type == "scalar": 

174 value = [value] 

175 else: 

176 raise ValueError("add() can only be used for primitive types or sequences of those types.") 

177 

178 if key0 in self.metadata: 

179 raise ValueError(f"Can not add() to key '{name}' since that is a TaskMetadata") 

180 

181 if key0 in self.scalars: 

182 # Convert scalar to array. 

183 # MyPy should be able to figure out that List[Union[T1, T2]] is 

184 # compatible with Union[List[T1], List[T2]] if the list has 

185 # only one element, but it can't. 

186 self.arrays[key0] = [self.scalars.pop(key0)] # type: ignore 

187 

188 if key0 in self.arrays: 

189 # Check that the type is not changing. 

190 if (curtype := type(self.arrays[key0][0])) is not (newtype := type(value[0])): 

191 raise ValueError(f"Type mismatch in add() -- currently {curtype} but adding {newtype}") 

192 self.arrays[key0].extend(value) 

193 else: 

194 self.arrays[key0] = value 

195 

196 return 

197 

198 self.metadata[key0].add(".".join(keys), value) 

199 

200 def getScalar(self, key: str) -> str | int | float | bool: 

201 """Retrieve a scalar item even if the item is a list. 

202 

203 Parameters 

204 ---------- 

205 key : `str` 

206 Item to retrieve. 

207 

208 Returns 

209 ------- 

210 value : `str`, `int`, `float`, or `bool` 

211 Either the value associated with the key or, if the key 

212 corresponds to a list, the last item in the list. 

213 

214 Raises 

215 ------ 

216 KeyError 

217 Raised if the item is not found. 

218 """ 

219 # Used in pipe_tasks. 

220 # getScalar() is the default behavior for __getitem__. 

221 return self[key] 

222 

223 def getArray(self, key: str) -> list[Any]: 

224 """Retrieve an item as a list even if it is a scalar. 

225 

226 Parameters 

227 ---------- 

228 key : `str` 

229 Item to retrieve. 

230 

231 Returns 

232 ------- 

233 values : `list` of any 

234 A list containing the value or values associated with this item. 

235 

236 Raises 

237 ------ 

238 KeyError 

239 Raised if the item is not found. 

240 """ 

241 keys = self._getKeys(key) 

242 key0 = keys.pop(0) 

243 if len(keys) == 0: 

244 if key0 in self.arrays: 

245 return self.arrays[key0] 

246 elif key0 in self.scalars: 

247 return [self.scalars[key0]] 

248 elif key0 in self.metadata: 

249 return [self.metadata[key0]] 

250 raise KeyError(f"'{key}' not found") 

251 

252 try: 

253 return self.metadata[key0].getArray(".".join(keys)) 

254 except KeyError: 

255 # Report the correct key. 

256 raise KeyError(f"'{key}' not found") from None 

257 

258 def names(self, topLevelOnly: bool = True) -> set[str]: 

259 """Return the hierarchical keys from the metadata. 

260 

261 Parameters 

262 ---------- 

263 topLevelOnly : `bool` 

264 If true, return top-level keys, otherwise full metadata item keys. 

265 

266 Returns 

267 ------- 

268 names : `collection.abc.Set` 

269 A set of top-level keys or full metadata item keys, including 

270 the top-level keys. 

271 

272 Notes 

273 ----- 

274 Should never be called in new code with ``topLevelOnly`` set to `True` 

275 -- this is equivalent to asking for the keys and is the default 

276 when iterating through the task metadata. In this case a deprecation 

277 message will be issued and the ability will raise an exception 

278 in a future release. 

279 

280 When ``topLevelOnly`` is `False` all keys, including those from the 

281 hierarchy and the top-level hierarchy, are returned. 

282 """ 

283 if topLevelOnly: 

284 warnings.warn("Use keys() instead. " + _DEPRECATION_REASON, FutureWarning) 

285 return set(self.keys()) 

286 else: 

287 names = set() 

288 for k, v in self.items(): 

289 names.add(k) # Always include the current level 

290 if isinstance(v, TaskMetadata): 

291 names.update({k + "." + item for item in v.names(topLevelOnly=topLevelOnly)}) 

292 return names 

293 

294 def paramNames(self, topLevelOnly: bool) -> set[str]: 

295 """Return hierarchical names. 

296 

297 Parameters 

298 ---------- 

299 topLevelOnly : `bool` 

300 Control whether only top-level items are returned or items 

301 from the hierarchy. 

302 

303 Returns 

304 ------- 

305 paramNames : `set` of `str` 

306 If ``topLevelOnly`` is `True`, returns any keys that are not 

307 part of a hierarchy. If `False` also returns fully-qualified 

308 names from the hierarchy. Keys associated with the top 

309 of a hierarchy are never returned. 

310 """ 

311 # Currently used by the verify package. 

312 paramNames = set() 

313 for k, v in self.items(): 

314 if isinstance(v, TaskMetadata): 

315 if not topLevelOnly: 

316 paramNames.update({k + "." + item for item in v.paramNames(topLevelOnly=topLevelOnly)}) 

317 else: 

318 paramNames.add(k) 

319 return paramNames 

320 

321 @staticmethod 

322 def _getKeys(key: str) -> list[str]: 

323 """Return the key hierarchy. 

324 

325 Parameters 

326 ---------- 

327 key : `str` 

328 The key to analyze. Can be dot-separated. 

329 

330 Returns 

331 ------- 

332 keys : `list` of `str` 

333 The key hierarchy that has been split on ``.``. 

334 

335 Raises 

336 ------ 

337 KeyError 

338 Raised if the key is not a string. 

339 """ 

340 try: 

341 keys = key.split(".") 

342 except Exception: 

343 raise KeyError(f"Invalid key '{key}': only string keys are allowed") from None 

344 return keys 

345 

346 def keys(self) -> tuple[str, ...]: 

347 """Return the top-level keys.""" 

348 return tuple(k for k in self) 

349 

350 def items(self) -> Iterator[tuple[str, Any]]: 

351 """Yield the top-level keys and values.""" 

352 for k, v in itertools.chain(self.scalars.items(), self.arrays.items(), self.metadata.items()): 

353 yield (k, v) 

354 

355 def __len__(self) -> int: 

356 """Return the number of items.""" 

357 return len(self.scalars) + len(self.arrays) + len(self.metadata) 

358 

359 # This is actually a Liskov substitution violation, because 

360 # pydantic.BaseModel says __iter__ should return something else. But the 

361 # pydantic docs say to do exactly this to in order to make a mapping-like 

362 # BaseModel, so that's what we do. 

363 def __iter__(self) -> Iterator[str]: # type: ignore 

364 """Return an iterator over each key.""" 

365 # The order of keys is not preserved since items can move 

366 # from scalar to array. 

367 return itertools.chain(iter(self.scalars), iter(self.arrays), iter(self.metadata)) 

368 

369 def __getitem__(self, key: str) -> Any: 

370 """Retrieve the item associated with the key. 

371 

372 Parameters 

373 ---------- 

374 key : `str` 

375 The key to retrieve. Can be dot-separated hierarchical. 

376 

377 Returns 

378 ------- 

379 value : `TaskMetadata`, `float`, `int`, `bool`, `str` 

380 A scalar value. For compatibility with ``PropertySet``, if the key 

381 refers to an array, the final element is returned and not the 

382 array itself. 

383 

384 Raises 

385 ------ 

386 KeyError 

387 Raised if the item is not found. 

388 """ 

389 keys = self._getKeys(key) 

390 key0 = keys.pop(0) 

391 if len(keys) == 0: 

392 if key0 in self.scalars: 

393 return self.scalars[key0] 

394 if key0 in self.metadata: 

395 return self.metadata[key0] 

396 if key0 in self.arrays: 

397 return self.arrays[key0][-1] 

398 raise KeyError(f"'{key}' not found") 

399 # Hierarchical lookup so the top key can only be in the metadata 

400 # property. Trap KeyError and reraise so that the correct key 

401 # in the hierarchy is reported. 

402 try: 

403 # And forward request to that metadata. 

404 return self.metadata[key0][".".join(keys)] 

405 except KeyError: 

406 raise KeyError(f"'{key}' not found") from None 

407 

408 def get(self, key: str, default: Any = None) -> Any: 

409 """Retrieve the item associated with the key or a default. 

410 

411 Parameters 

412 ---------- 

413 key : `str` 

414 The key to retrieve. Can be dot-separated hierarchical. 

415 default 

416 The value to return if the key doesnot exist. 

417 

418 Returns 

419 ------- 

420 value : `TaskMetadata`, `float`, `int`, `bool`, `str` 

421 A scalar value. If the key refers to an array, the final element 

422 is returned and not the array itself; this is consistent with 

423 `__getitem__` and `PropertySet.get`, but not ``to_dict().get``. 

424 """ 

425 try: 

426 return self[key] 

427 except KeyError: 

428 return default 

429 

430 def __setitem__(self, key: str, item: Any) -> None: 

431 """Store the given item.""" 

432 keys = self._getKeys(key) 

433 key0 = keys.pop(0) 

434 if len(keys) == 0: 

435 slots: dict[str, dict[str, Any]] = { 

436 "array": self.arrays, 

437 "scalar": self.scalars, 

438 "metadata": self.metadata, 

439 } 

440 primary: dict[str, Any] | None = None 

441 slot_type, item = self._validate_value(item) 

442 primary = slots.pop(slot_type, None) 

443 if primary is None: 

444 raise AssertionError(f"Unknown slot type returned from validator: {slot_type}") 

445 

446 # Assign the value to the right place. 

447 primary[key0] = item 

448 for property in slots.values(): 

449 # Remove any other entries. 

450 property.pop(key0, None) 

451 return 

452 

453 # This must be hierarchical so forward to the child TaskMetadata. 

454 if key0 not in self.metadata: 

455 self.metadata[key0] = TaskMetadata() 

456 self.metadata[key0][".".join(keys)] = item 

457 

458 # Ensure we have cleared out anything with the same name elsewhere. 

459 self.scalars.pop(key0, None) 

460 self.arrays.pop(key0, None) 

461 

462 def __contains__(self, key: str) -> bool: 

463 """Determine if the key exists.""" 

464 keys = self._getKeys(key) 

465 key0 = keys.pop(0) 

466 if len(keys) == 0: 

467 return key0 in self.scalars or key0 in self.arrays or key0 in self.metadata 

468 

469 if key0 in self.metadata: 

470 return ".".join(keys) in self.metadata[key0] 

471 return False 

472 

473 def __delitem__(self, key: str) -> None: 

474 """Remove the specified item. 

475 

476 Raises 

477 ------ 

478 KeyError 

479 Raised if the item is not present. 

480 """ 

481 keys = self._getKeys(key) 

482 key0 = keys.pop(0) 

483 if len(keys) == 0: 

484 # MyPy can't figure out that this way to combine the types in the 

485 # tuple is the one that matters, and annotating a local variable 

486 # helps it out. 

487 properties: tuple[dict[str, Any], ...] = (self.scalars, self.arrays, self.metadata) 

488 for property in properties: 

489 if key0 in property: 

490 del property[key0] 

491 return 

492 raise KeyError(f"'{key}' not found'") 

493 

494 try: 

495 del self.metadata[key0][".".join(keys)] 

496 except KeyError: 

497 # Report the correct key. 

498 raise KeyError(f"'{key}' not found'") from None 

499 

500 def _validate_value(self, value: Any) -> tuple[str, Any]: 

501 """Validate the given value. 

502 

503 Parameters 

504 ---------- 

505 value : Any 

506 Value to check. 

507 

508 Returns 

509 ------- 

510 slot_type : `str` 

511 The type of value given. Options are "scalar", "array", "metadata". 

512 item : Any 

513 The item that was given but possibly modified to conform to 

514 the slot type. 

515 

516 Raises 

517 ------ 

518 ValueError 

519 Raised if the value is not a recognized type. 

520 """ 

521 # Test the simplest option first. 

522 value_type = type(value) 

523 if value_type in _ALLOWED_PRIMITIVE_TYPES: 

524 return "scalar", value 

525 

526 if isinstance(value, TaskMetadata): 

527 return "metadata", value 

528 if isinstance(value, Mapping): 

529 return "metadata", self.from_dict(value) 

530 

531 if _isListLike(value): 

532 # For model consistency, need to check that every item in the 

533 # list has the same type. 

534 value = list(value) 

535 

536 type0 = type(value[0]) 

537 for i in value: 

538 if type(i) != type0: 

539 raise ValueError( 

540 "Type mismatch in supplied list. TaskMetadata requires all" 

541 f" elements have same type but see {type(i)} and {type0}." 

542 ) 

543 

544 if type0 not in _ALLOWED_PRIMITIVE_TYPES: 

545 # Must check to see if we got numpy floats or something. 

546 type_cast: type 

547 if isinstance(value[0], numbers.Integral): 

548 type_cast = int 

549 elif isinstance(value[0], numbers.Real): 

550 type_cast = float 

551 else: 

552 raise ValueError( 

553 f"Supplied list has element of type '{type0}'. " 

554 "TaskMetadata can only accept primitive types in lists." 

555 ) 

556 

557 value = [type_cast(v) for v in value] 

558 

559 return "array", value 

560 

561 # Sometimes a numpy number is given. 

562 if isinstance(value, numbers.Integral): 

563 value = int(value) 

564 return "scalar", value 

565 if isinstance(value, numbers.Real): 

566 value = float(value) 

567 return "scalar", value 

568 

569 raise ValueError(f"TaskMetadata does not support values of type {value!r}.") 

570 

571 

572# Needed because a TaskMetadata can contain a TaskMetadata. 

573TaskMetadata.update_forward_refs()