Coverage for python/lsst/pipe/base/_task_metadata.py: 13%

206 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-11 10:21 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["TaskMetadata"] 

23 

24import itertools 

25import numbers 

26import warnings 

27from collections.abc import Sequence 

28from typing import Any, Collection, Dict, Iterator, List, Mapping, Optional, Protocol, Set, Tuple, Union 

29 

30from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr 

31 

32_DEPRECATION_REASON = "Will be removed after v25." 

33_DEPRECATION_VERSION = "v24" 

34 

35# The types allowed in a Task metadata field are restricted 

36# to allow predictable serialization. 

37_ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool) 

38 

39 

40class PropertySetLike(Protocol): 

41 """Protocol that looks like a ``lsst.daf.base.PropertySet`` 

42 

43 Enough of the API is specified to support conversion of a 

44 ``PropertySet`` to a `TaskMetadata`. 

45 """ 

46 

47 def paramNames(self, topLevelOnly: bool = True) -> Collection[str]: 

48 ... 

49 

50 def getArray(self, name: str) -> Any: 

51 ... 

52 

53 

54def _isListLike(v: Any) -> bool: 

55 return isinstance(v, Sequence) and not isinstance(v, str) 

56 

57 

58class TaskMetadata(BaseModel): 

59 """Dict-like object for storing task metadata. 

60 

61 Metadata can be stored at two levels: single task or task plus subtasks. 

62 The later is called full metadata of a task and has a form 

63 

64 topLevelTaskName:subtaskName:subsubtaskName.itemName 

65 

66 Metadata item key of a task (`itemName` above) must not contain `.`, 

67 which serves as a separator in full metadata keys and turns 

68 the value into sub-dictionary. Arbitrary hierarchies are supported. 

69 """ 

70 

71 scalars: Dict[str, Union[StrictFloat, StrictInt, StrictBool, StrictStr]] = Field(default_factory=dict) 

72 arrays: Dict[str, Union[List[StrictFloat], List[StrictInt], List[StrictBool], List[StrictStr]]] = Field( 

73 default_factory=dict 

74 ) 

75 metadata: Dict[str, "TaskMetadata"] = Field(default_factory=dict) 

76 

77 @classmethod 

78 def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata": 

79 """Create a TaskMetadata from a dictionary. 

80 

81 Parameters 

82 ---------- 

83 d : `Mapping` 

84 Mapping to convert. Can be hierarchical. Any dictionaries 

85 in the hierarchy are converted to `TaskMetadata`. 

86 

87 Returns 

88 ------- 

89 meta : `TaskMetadata` 

90 Newly-constructed metadata. 

91 """ 

92 metadata = cls() 

93 for k, v in d.items(): 

94 metadata[k] = v 

95 return metadata 

96 

97 @classmethod 

98 def from_metadata(cls, ps: PropertySetLike) -> "TaskMetadata": 

99 """Create a TaskMetadata from a PropertySet-like object. 

100 

101 Parameters 

102 ---------- 

103 ps : `PropertySetLike` or `TaskMetadata` 

104 A ``PropertySet``-like object to be transformed to a 

105 `TaskMetadata`. A `TaskMetadata` can be copied using this 

106 class method. 

107 

108 Returns 

109 ------- 

110 tm : `TaskMetadata` 

111 Newly-constructed metadata. 

112 

113 Notes 

114 ----- 

115 Items stored in single-element arrays in the supplied object 

116 will be converted to scalars in the newly-created object. 

117 """ 

118 # Use hierarchical names to assign values from input to output. 

119 # This API exists for both PropertySet and TaskMetadata. 

120 # from_dict() does not work because PropertySet is not declared 

121 # to be a Mapping. 

122 # PropertySet.toDict() is not present in TaskMetadata so is best 

123 # avoided. 

124 metadata = cls() 

125 for key in sorted(ps.paramNames(topLevelOnly=False)): 

126 value = ps.getArray(key) 

127 if len(value) == 1: 

128 value = value[0] 

129 metadata[key] = value 

130 return metadata 

131 

132 def to_dict(self) -> Dict[str, Any]: 

133 """Convert the class to a simple dictionary. 

134 

135 Returns 

136 ------- 

137 d : `dict` 

138 Simple dictionary that can contain scalar values, array values 

139 or other dictionary values. 

140 

141 Notes 

142 ----- 

143 Unlike `dict()`, this method hides the model layout and combines 

144 scalars, arrays, and other metadata in the same dictionary. Can be 

145 used when a simple dictionary is needed. Use 

146 `TaskMetadata.from_dict()` to convert it back. 

147 """ 

148 d: Dict[str, Any] = {} 

149 d.update(self.scalars) 

150 d.update(self.arrays) 

151 for k, v in self.metadata.items(): 

152 d[k] = v.to_dict() 

153 return d 

154 

155 def add(self, name: str, value: Any) -> None: 

156 """Store a new value, adding to a list if one already exists. 

157 

158 Parameters 

159 ---------- 

160 name : `str` 

161 Name of the metadata property. 

162 value 

163 Metadata property value. 

164 """ 

165 keys = self._getKeys(name) 

166 key0 = keys.pop(0) 

167 if len(keys) == 0: 

168 # If add() is being used, always store the value in the arrays 

169 # property as a list. It's likely there will be another call. 

170 slot_type, value = self._validate_value(value) 

171 if slot_type == "array": 

172 pass 

173 elif slot_type == "scalar": 

174 value = [value] 

175 else: 

176 raise ValueError("add() can only be used for primitive types or sequences of those types.") 

177 

178 if key0 in self.metadata: 

179 raise ValueError(f"Can not add() to key '{name}' since that is a TaskMetadata") 

180 

181 if key0 in self.scalars: 

182 # Convert scalar to array. 

183 # MyPy should be able to figure out that List[Union[T1, T2]] is 

184 # compatible with Union[List[T1], List[T2]] if the list has 

185 # only one element, but it can't. 

186 self.arrays[key0] = [self.scalars.pop(key0)] # type: ignore 

187 

188 if key0 in self.arrays: 

189 # Check that the type is not changing. 

190 if (curtype := type(self.arrays[key0][0])) is not (newtype := type(value[0])): 

191 raise ValueError(f"Type mismatch in add() -- currently {curtype} but adding {newtype}") 

192 self.arrays[key0].extend(value) 

193 else: 

194 self.arrays[key0] = value 

195 

196 return 

197 

198 self.metadata[key0].add(".".join(keys), value) 

199 

200 def getScalar(self, key: str) -> Union[str, int, float, bool]: 

201 """Retrieve a scalar item even if the item is a list. 

202 

203 Parameters 

204 ---------- 

205 key : `str` 

206 Item to retrieve. 

207 

208 Returns 

209 ------- 

210 value : `str`, `int`, `float`, or `bool` 

211 Either the value associated with the key or, if the key 

212 corresponds to a list, the last item in the list. 

213 

214 Raises 

215 ------ 

216 KeyError 

217 Raised if the item is not found. 

218 """ 

219 # Used in pipe_tasks. 

220 # getScalar() is the default behavior for __getitem__. 

221 return self[key] 

222 

223 def getArray(self, key: str) -> List[Any]: 

224 """Retrieve an item as a list even if it is a scalar. 

225 

226 Parameters 

227 ---------- 

228 key : `str` 

229 Item to retrieve. 

230 

231 Returns 

232 ------- 

233 values : `list` of any 

234 A list containing the value or values associated with this item. 

235 

236 Raises 

237 ------ 

238 KeyError 

239 Raised if the item is not found. 

240 """ 

241 keys = self._getKeys(key) 

242 key0 = keys.pop(0) 

243 if len(keys) == 0: 

244 if key0 in self.arrays: 

245 return self.arrays[key0] 

246 elif key0 in self.scalars: 

247 return [self.scalars[key0]] 

248 elif key0 in self.metadata: 

249 return [self.metadata[key0]] 

250 raise KeyError(f"'{key}' not found") 

251 

252 try: 

253 return self.metadata[key0].getArray(".".join(keys)) 

254 except KeyError: 

255 # Report the correct key. 

256 raise KeyError(f"'{key}' not found") from None 

257 

258 def names(self, topLevelOnly: bool = True) -> Set[str]: 

259 """Return the hierarchical keys from the metadata. 

260 

261 Parameters 

262 ---------- 

263 topLevelOnly : `bool` 

264 If true, return top-level keys, otherwise full metadata item keys. 

265 

266 Returns 

267 ------- 

268 names : `collection.abc.Set` 

269 A set of top-level keys or full metadata item keys, including 

270 the top-level keys. 

271 

272 Notes 

273 ----- 

274 Should never be called in new code with ``topLevelOnly`` set to `True` 

275 -- this is equivalent to asking for the keys and is the default 

276 when iterating through the task metadata. In this case a deprecation 

277 message will be issued and the ability will raise an exception 

278 in a future release. 

279 

280 When ``topLevelOnly`` is `False` all keys, including those from the 

281 hierarchy and the top-level hierarchy, are returned. 

282 """ 

283 if topLevelOnly: 

284 warnings.warn("Use keys() instead. " + _DEPRECATION_REASON, FutureWarning) 

285 return set(self.keys()) 

286 else: 

287 names = set() 

288 for k, v in self.items(): 

289 names.add(k) # Always include the current level 

290 if isinstance(v, TaskMetadata): 

291 names.update({k + "." + item for item in v.names(topLevelOnly=topLevelOnly)}) 

292 return names 

293 

294 def paramNames(self, topLevelOnly: bool) -> Set[str]: 

295 """Return hierarchical names. 

296 

297 Parameters 

298 ---------- 

299 topLevelOnly : `bool` 

300 Control whether only top-level items are returned or items 

301 from the hierarchy. 

302 

303 Returns 

304 ------- 

305 paramNames : `set` of `str` 

306 If ``topLevelOnly`` is `True`, returns any keys that are not 

307 part of a hierarchy. If `False` also returns fully-qualified 

308 names from the hierarchy. Keys associated with the top 

309 of a hierarchy are never returned. 

310 """ 

311 # Currently used by the verify package. 

312 paramNames = set() 

313 for k, v in self.items(): 

314 if isinstance(v, TaskMetadata): 

315 if not topLevelOnly: 

316 paramNames.update({k + "." + item for item in v.paramNames(topLevelOnly=topLevelOnly)}) 

317 else: 

318 paramNames.add(k) 

319 return paramNames 

320 

321 @staticmethod 

322 def _getKeys(key: str) -> List[str]: 

323 """Return the key hierarchy. 

324 

325 Parameters 

326 ---------- 

327 key : `str` 

328 The key to analyze. Can be dot-separated. 

329 

330 Returns 

331 ------- 

332 keys : `list` of `str` 

333 The key hierarchy that has been split on ``.``. 

334 

335 Raises 

336 ------ 

337 KeyError 

338 Raised if the key is not a string. 

339 """ 

340 try: 

341 keys = key.split(".") 

342 except Exception: 

343 raise KeyError(f"Invalid key '{key}': only string keys are allowed") from None 

344 return keys 

345 

346 def keys(self) -> Tuple[str, ...]: 

347 """Return the top-level keys.""" 

348 return tuple(k for k in self) 

349 

350 def items(self) -> Iterator[Tuple[str, Any]]: 

351 """Yield the top-level keys and values.""" 

352 for k, v in itertools.chain(self.scalars.items(), self.arrays.items(), self.metadata.items()): 

353 yield (k, v) 

354 

355 def __len__(self) -> int: 

356 """Return the number of items.""" 

357 return len(self.scalars) + len(self.arrays) + len(self.metadata) 

358 

359 # This is actually a Liskov substitution violation, because 

360 # pydantic.BaseModel says __iter__ should return something else. But the 

361 # pydantic docs say to do exactly this to in order to make a mapping-like 

362 # BaseModel, so that's what we do. 

363 def __iter__(self) -> Iterator[str]: # type: ignore 

364 """Return an iterator over each key.""" 

365 # The order of keys is not preserved since items can move 

366 # from scalar to array. 

367 return itertools.chain(iter(self.scalars), iter(self.arrays), iter(self.metadata)) 

368 

369 def __getitem__(self, key: str) -> Any: 

370 """Retrieve the item associated with the key. 

371 

372 Parameters 

373 ---------- 

374 key : `str` 

375 The key to retrieve. Can be dot-separated hierarchical. 

376 

377 Returns 

378 ------- 

379 value : `TaskMetadata`, `float`, `int`, `bool`, `str` 

380 A scalar value. For compatibility with ``PropertySet``, if the key 

381 refers to an array, the final element is returned and not the 

382 array itself. 

383 

384 Raises 

385 ------ 

386 KeyError 

387 Raised if the item is not found. 

388 """ 

389 keys = self._getKeys(key) 

390 key0 = keys.pop(0) 

391 if len(keys) == 0: 

392 if key0 in self.scalars: 

393 return self.scalars[key0] 

394 if key0 in self.metadata: 

395 return self.metadata[key0] 

396 if key0 in self.arrays: 

397 return self.arrays[key0][-1] 

398 raise KeyError(f"'{key}' not found") 

399 # Hierarchical lookup so the top key can only be in the metadata 

400 # property. Trap KeyError and reraise so that the correct key 

401 # in the hierarchy is reported. 

402 try: 

403 # And forward request to that metadata. 

404 return self.metadata[key0][".".join(keys)] 

405 except KeyError: 

406 raise KeyError(f"'{key}' not found") from None 

407 

408 def get(self, key: str, default: Any = None) -> Any: 

409 """Retrieve the item associated with the key or a default. 

410 

411 Parameters 

412 ---------- 

413 key : `str` 

414 The key to retrieve. Can be dot-separated hierarchical. 

415 default 

416 The value to return if the key doesnot exist. 

417 

418 Returns 

419 ------- 

420 value : `TaskMetadata`, `float`, `int`, `bool`, `str` 

421 A scalar value. If the key refers to an array, the final element 

422 is returned and not the array itself; this is consistent with 

423 `__getitem__` and `PropertySet.get`, but not ``to_dict().get``. 

424 """ 

425 try: 

426 return self[key] 

427 except KeyError: 

428 return default 

429 

430 def __setitem__(self, key: str, item: Any) -> None: 

431 """Store the given item.""" 

432 keys = self._getKeys(key) 

433 key0 = keys.pop(0) 

434 if len(keys) == 0: 

435 slots: Dict[str, Dict[str, Any]] = { 

436 "array": self.arrays, 

437 "scalar": self.scalars, 

438 "metadata": self.metadata, 

439 } 

440 primary: Optional[Dict[str, Any]] = None 

441 slot_type, item = self._validate_value(item) 

442 primary = slots.pop(slot_type, None) 

443 if primary is None: 

444 raise AssertionError(f"Unknown slot type returned from validator: {slot_type}") 

445 

446 # Assign the value to the right place. 

447 primary[key0] = item 

448 for property in slots.values(): 

449 # Remove any other entries. 

450 property.pop(key0, None) 

451 return 

452 

453 # This must be hierarchical so forward to the child TaskMetadata. 

454 if key0 not in self.metadata: 

455 self.metadata[key0] = TaskMetadata() 

456 self.metadata[key0][".".join(keys)] = item 

457 

458 # Ensure we have cleared out anything with the same name elsewhere. 

459 self.scalars.pop(key0, None) 

460 self.arrays.pop(key0, None) 

461 

462 def __contains__(self, key: str) -> bool: 

463 """Determine if the key exists.""" 

464 keys = self._getKeys(key) 

465 key0 = keys.pop(0) 

466 if len(keys) == 0: 

467 return key0 in self.scalars or key0 in self.arrays or key0 in self.metadata 

468 

469 if key0 in self.metadata: 

470 return ".".join(keys) in self.metadata[key0] 

471 return False 

472 

473 def __delitem__(self, key: str) -> None: 

474 """Remove the specified item. 

475 

476 Raises 

477 ------ 

478 KeyError 

479 Raised if the item is not present. 

480 """ 

481 keys = self._getKeys(key) 

482 key0 = keys.pop(0) 

483 if len(keys) == 0: 

484 # MyPy can't figure out that this way to combine the types in the 

485 # tuple is the one that matters, and annotating a local variable 

486 # helps it out. 

487 properties: Tuple[Dict[str, Any], ...] = (self.scalars, self.arrays, self.metadata) 

488 for property in properties: 

489 if key0 in property: 

490 del property[key0] 

491 return 

492 raise KeyError(f"'{key}' not found'") 

493 

494 try: 

495 del self.metadata[key0][".".join(keys)] 

496 except KeyError: 

497 # Report the correct key. 

498 raise KeyError(f"'{key}' not found'") from None 

499 

500 def _validate_value(self, value: Any) -> Tuple[str, Any]: 

501 """Validate the given value. 

502 

503 Parameters 

504 ---------- 

505 value : Any 

506 Value to check. 

507 

508 Returns 

509 ------- 

510 slot_type : `str` 

511 The type of value given. Options are "scalar", "array", "metadata". 

512 item : Any 

513 The item that was given but possibly modified to conform to 

514 the slot type. 

515 

516 Raises 

517 ------ 

518 ValueError 

519 Raised if the value is not a recognized type. 

520 """ 

521 # Test the simplest option first. 

522 value_type = type(value) 

523 if value_type in _ALLOWED_PRIMITIVE_TYPES: 

524 return "scalar", value 

525 

526 if isinstance(value, TaskMetadata): 

527 return "metadata", value 

528 if isinstance(value, Mapping): 

529 return "metadata", self.from_dict(value) 

530 

531 if _isListLike(value): 

532 # For model consistency, need to check that every item in the 

533 # list has the same type. 

534 value = list(value) 

535 

536 type0 = type(value[0]) 

537 for i in value: 

538 if type(i) != type0: 

539 raise ValueError( 

540 "Type mismatch in supplied list. TaskMetadata requires all" 

541 f" elements have same type but see {type(i)} and {type0}." 

542 ) 

543 

544 if type0 not in _ALLOWED_PRIMITIVE_TYPES: 

545 # Must check to see if we got numpy floats or something. 

546 type_cast: type 

547 if isinstance(value[0], numbers.Integral): 

548 type_cast = int 

549 elif isinstance(value[0], numbers.Real): 

550 type_cast = float 

551 else: 

552 raise ValueError( 

553 f"Supplied list has element of type '{type0}'. " 

554 "TaskMetadata can only accept primitive types in lists." 

555 ) 

556 

557 value = [type_cast(v) for v in value] 

558 

559 return "array", value 

560 

561 # Sometimes a numpy number is given. 

562 if isinstance(value, numbers.Integral): 

563 value = int(value) 

564 return "scalar", value 

565 if isinstance(value, numbers.Real): 

566 value = float(value) 

567 return "scalar", value 

568 

569 raise ValueError(f"TaskMetadata does not support values of type {value!r}.") 

570 

571 

572# Needed because a TaskMetadata can contain a TaskMetadata. 

573TaskMetadata.update_forward_refs()