Coverage for python/lsst/pipe/base/_task_metadata.py: 16%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

213 statements  

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["TaskMetadata"] 

23 

24import itertools 

25import numbers 

26import warnings 

27from collections.abc import Sequence 

28from typing import Any, Collection, Dict, List, Mapping, Protocol, Union 

29 

30from deprecated.sphinx import deprecated 

31from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr 

32 

33_DEPRECATION_REASON = "Will be removed after v25." 

34_DEPRECATION_VERSION = "v24" 

35 

36# The types allowed in a Task metadata field are restricted 

37# to allow predictable serialization. 

38_ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool) 

39 

40 

41class PropertySetLike(Protocol): 

42 """Protocol that looks like a ``lsst.daf.base.PropertySet`` 

43 

44 Enough of the API is specified to support conversion of a 

45 ``PropertySet`` to a `TaskMetadata`. 

46 """ 

47 

48 def paramNames(self, topLevelOnly: bool = True) -> Collection[str]: 

49 ... 

50 

51 def getArray(self, name: str) -> Any: 

52 ... 

53 

54 

55def _isListLike(v): 

56 return isinstance(v, Sequence) and not isinstance(v, str) 

57 

58 

59class TaskMetadata(BaseModel): 

60 """Dict-like object for storing task metadata. 

61 

62 Metadata can be stored at two levels: single task or task plus subtasks. 

63 The later is called full metadata of a task and has a form 

64 

65 topLevelTaskName:subtaskName:subsubtaskName.itemName 

66 

67 Metadata item key of a task (`itemName` above) must not contain `.`, 

68 which serves as a separator in full metadata keys and turns 

69 the value into sub-dictionary. Arbitrary hierarchies are supported. 

70 

71 Deprecated methods are for compatibility with 

72 the predecessor containers. 

73 """ 

74 

75 scalars: Dict[str, Union[StrictFloat, StrictInt, StrictBool, StrictStr]] = Field(default_factory=dict) 

76 arrays: Dict[str, Union[List[StrictFloat], List[StrictInt], List[StrictBool], List[StrictStr]]] = Field( 

77 default_factory=dict 

78 ) 

79 metadata: Dict[str, "TaskMetadata"] = Field(default_factory=dict) 

80 

81 @classmethod 

82 def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata": 

83 """Create a TaskMetadata from a dictionary. 

84 

85 Parameters 

86 ---------- 

87 d : `Mapping` 

88 Mapping to convert. Can be hierarchical. Any dictionaries 

89 in the hierarchy are converted to `TaskMetadata`. 

90 

91 Returns 

92 ------- 

93 meta : `TaskMetadata` 

94 Newly-constructed metadata. 

95 """ 

96 metadata = cls() 

97 for k, v in d.items(): 

98 metadata[k] = v 

99 return metadata 

100 

101 @classmethod 

102 def from_metadata(cls, ps: PropertySetLike) -> "TaskMetadata": 

103 """Create a TaskMetadata from a PropertySet-like object. 

104 

105 Parameters 

106 ---------- 

107 ps : `PropertySetLike` or `TaskMetadata` 

108 A ``PropertySet``-like object to be transformed to a 

109 `TaskMetadata`. A `TaskMetadata` can be copied using this 

110 class method. 

111 

112 Returns 

113 ------- 

114 tm : `TaskMetadata` 

115 Newly-constructed metadata. 

116 

117 Notes 

118 ----- 

119 Items stored in single-element arrays in the supplied object 

120 will be converted to scalars in the newly-created object. 

121 """ 

122 # Use hierarchical names to assign values from input to output. 

123 # This API exists for both PropertySet and TaskMetadata. 

124 # from_dict() does not work because PropertySet is not declared 

125 # to be a Mapping. 

126 # PropertySet.toDict() is not present in TaskMetadata so is best 

127 # avoided. 

128 metadata = cls() 

129 for key in sorted(ps.paramNames(topLevelOnly=False)): 

130 value = ps.getArray(key) 

131 if len(value) == 1: 

132 value = value[0] 

133 metadata[key] = value 

134 return metadata 

135 

136 def to_dict(self) -> Dict[str, Any]: 

137 """Convert the class to a simple dictionary. 

138 

139 Returns 

140 ------- 

141 d : `dict` 

142 Simple dictionary that can contain scalar values, array values 

143 or other dictionary values. 

144 

145 Notes 

146 ----- 

147 Unlike `dict()`, this method hides the model layout and combines 

148 scalars, arrays, and other metadata in the same dictionary. Can be 

149 used when a simple dictionary is needed. Use 

150 `TaskMetadata.from_dict()` to convert it back. 

151 """ 

152 d = {} 

153 d.update(self.scalars) 

154 d.update(self.arrays) 

155 for k, v in self.metadata.items(): 

156 d[k] = v.to_dict() 

157 return d 

158 

159 def add(self, name, value): 

160 """Store a new value, adding to a list if one already exists. 

161 

162 Parameters 

163 ---------- 

164 name : `str` 

165 Name of the metadata property. 

166 value 

167 Metadata property value. 

168 """ 

169 keys = self._getKeys(name) 

170 key0 = keys.pop(0) 

171 if len(keys) == 0: 

172 

173 # If add() is being used, always store the value in the arrays 

174 # property as a list. It's likely there will be another call. 

175 slot_type, value = self._validate_value(value) 

176 if slot_type == "array": 

177 pass 

178 elif slot_type == "scalar": 

179 value = [value] 

180 else: 

181 raise ValueError("add() can only be used for primitive types or sequences of those types.") 

182 

183 if key0 in self.metadata: 

184 raise ValueError(f"Can not add() to key '{name}' since that is a TaskMetadata") 

185 

186 if key0 in self.scalars: 

187 # Convert scalar to array. 

188 self.arrays[key0] = [self.scalars.pop(key0)] 

189 

190 if key0 in self.arrays: 

191 # Check that the type is not changing. 

192 if (curtype := type(self.arrays[key0][0])) is not (newtype := type(value[0])): 

193 raise ValueError(f"Type mismatch in add() -- currently {curtype} but adding {newtype}") 

194 self.arrays[key0].extend(value) 

195 else: 

196 self.arrays[key0] = value 

197 

198 return 

199 

200 self.metadata[key0].add(".".join(keys), value) 

201 

202 @deprecated( 

203 reason="Cast the return value to float explicitly. " + _DEPRECATION_REASON, 

204 version=_DEPRECATION_VERSION, 

205 category=FutureWarning, 

206 ) 

207 def getAsDouble(self, key): 

208 """Return the value cast to a `float`. 

209 

210 Parameters 

211 ---------- 

212 key : `str` 

213 Item to return. Can be dot-separated hierarchical. 

214 

215 Returns 

216 ------- 

217 value : `float` 

218 The value cast to a `float`. 

219 

220 Raises 

221 ------ 

222 KeyError 

223 Raised if the item is not found. 

224 """ 

225 return float(self.__getitem__(key)) 

226 

227 def getScalar(self, key): 

228 """Retrieve a scalar item even if the item is a list. 

229 

230 Parameters 

231 ---------- 

232 key : `str` 

233 Item to retrieve. 

234 

235 Returns 

236 ------- 

237 value : Any 

238 Either the value associated with the key or, if the key 

239 corresponds to a list, the last item in the list. 

240 

241 Raises 

242 ------ 

243 KeyError 

244 Raised if the item is not found. 

245 """ 

246 # Used in pipe_tasks. 

247 # getScalar() is the default behavior for __getitem__. 

248 return self[key] 

249 

250 def getArray(self, key): 

251 """Retrieve an item as a list even if it is a scalar. 

252 

253 Parameters 

254 ---------- 

255 key : `str` 

256 Item to retrieve. 

257 

258 Returns 

259 ------- 

260 values : `list` of any 

261 A list containing the value or values associated with this item. 

262 

263 Raises 

264 ------ 

265 KeyError 

266 Raised if the item is not found. 

267 """ 

268 keys = self._getKeys(key) 

269 key0 = keys.pop(0) 

270 if len(keys) == 0: 

271 if key0 in self.arrays: 

272 return self.arrays[key0] 

273 elif key0 in self.scalars: 

274 return [self.scalars[key0]] 

275 elif key0 in self.metadata: 

276 return [self.metadata[key0]] 

277 raise KeyError(f"'{key}' not found") 

278 

279 try: 

280 return self.metadata[key0].getArray(".".join(keys)) 

281 except KeyError: 

282 # Report the correct key. 

283 raise KeyError(f"'{key}' not found") from None 

284 

285 def names(self, topLevelOnly: bool = True): 

286 """Return the hierarchical keys from the metadata. 

287 

288 Parameters 

289 ---------- 

290 topLevelOnly : `bool` 

291 If true, return top-level keys, otherwise full metadata item keys. 

292 

293 Returns 

294 ------- 

295 names : `collection.abc.Set` 

296 A set of top-level keys or full metadata item keys, including 

297 the top-level keys. 

298 

299 Notes 

300 ----- 

301 Should never be called in new code with ``topLevelOnly`` set to `True` 

302 -- this is equivalent to asking for the keys and is the default 

303 when iterating through the task metadata. In this case a deprecation 

304 message will be issued and the ability will raise an exception 

305 in a future release. 

306 

307 When ``topLevelOnly`` is `False` all keys, including those from the 

308 hierarchy and the top-level hierarchy, are returned. 

309 """ 

310 if topLevelOnly: 

311 warnings.warn("Use keys() instead. " + _DEPRECATION_REASON, FutureWarning) 

312 return set(self.keys()) 

313 else: 

314 names = set() 

315 for k, v in self.items(): 

316 names.add(k) # Always include the current level 

317 if isinstance(v, TaskMetadata): 

318 names.update({k + "." + item for item in v.names(topLevelOnly=topLevelOnly)}) 

319 return names 

320 

321 def paramNames(self, topLevelOnly): 

322 """Return hierarchical names. 

323 

324 Parameters 

325 ---------- 

326 topLevelOnly : `bool` 

327 Control whether only top-level items are returned or items 

328 from the hierarchy. 

329 

330 Returns 

331 ------- 

332 paramNames : `set` of `str` 

333 If ``topLevelOnly`` is `True`, returns any keys that are not 

334 part of a hierarchy. If `False` also returns fully-qualified 

335 names from the hierarchy. Keys associated with the top 

336 of a hierarchy are never returned. 

337 """ 

338 # Currently used by the verify package. 

339 paramNames = set() 

340 for k, v in self.items(): 

341 if isinstance(v, TaskMetadata): 

342 if not topLevelOnly: 

343 paramNames.update({k + "." + item for item in v.paramNames(topLevelOnly=topLevelOnly)}) 

344 else: 

345 paramNames.add(k) 

346 return paramNames 

347 

348 @deprecated( 

349 reason="Use standard assignment syntax. " + _DEPRECATION_REASON, 

350 version=_DEPRECATION_VERSION, 

351 category=FutureWarning, 

352 ) 

353 def set(self, key, item): 

354 """Set the value of the supplied key.""" 

355 self.__setitem__(key, item) 

356 

357 @deprecated( 

358 reason="Use standard del dict syntax. " + _DEPRECATION_REASON, 

359 version=_DEPRECATION_VERSION, 

360 category=FutureWarning, 

361 ) 

362 def remove(self, key): 

363 """Remove the item without raising if absent.""" 

364 try: 

365 self.__delitem__(key) 

366 except KeyError: 

367 # The PropertySet.remove() should always work. 

368 pass 

369 

370 @staticmethod 

371 def _getKeys(key): 

372 """Return the key hierarchy. 

373 

374 Parameters 

375 ---------- 

376 key : `str` 

377 The key to analyze. Can be dot-separated. 

378 

379 Returns 

380 ------- 

381 keys : `list` of `str` 

382 The key hierarchy that has been split on ``.``. 

383 

384 Raises 

385 ------ 

386 KeyError 

387 Raised if the key is not a string. 

388 """ 

389 try: 

390 keys = key.split(".") 

391 except Exception: 

392 raise KeyError(f"Invalid key '{key}': only string keys are allowed") from None 

393 return keys 

394 

395 def keys(self): 

396 """Return the top-level keys.""" 

397 return tuple(k for k in self) 

398 

399 def items(self): 

400 """Yield the top-level keys and values.""" 

401 for k, v in itertools.chain(self.scalars.items(), self.arrays.items(), self.metadata.items()): 

402 yield (k, v) 

403 

404 def __len__(self): 

405 """Return the number of items.""" 

406 return len(self.scalars) + len(self.arrays) + len(self.metadata) 

407 

408 def __iter__(self): 

409 """Return an iterator over each key.""" 

410 # The order of keys is not preserved since items can move 

411 # from scalar to array. 

412 return itertools.chain(iter(self.scalars), iter(self.arrays), iter(self.metadata)) 

413 

414 def __getitem__(self, key): 

415 """Retrieve the item associated with the key. 

416 

417 Parameters 

418 ---------- 

419 key : `str` 

420 The key to retrieve. Can be dot-separated hierarchical. 

421 

422 Returns 

423 ------- 

424 value : `TaskMetadata`, `float`, `int`, `bool`, `str` 

425 A scalar value. For compatibility with ``PropertySet``, if the key 

426 refers to an array, the final element is returned and not the 

427 array itself. 

428 

429 Raises 

430 ------ 

431 KeyError 

432 Raised if the item is not found. 

433 """ 

434 keys = self._getKeys(key) 

435 key0 = keys.pop(0) 

436 if len(keys) == 0: 

437 if key0 in self.scalars: 

438 return self.scalars[key0] 

439 if key0 in self.metadata: 

440 return self.metadata[key0] 

441 if key0 in self.arrays: 

442 return self.arrays[key0][-1] 

443 raise KeyError(f"'{key}' not found") 

444 # Hierarchical lookup so the top key can only be in the metadata 

445 # property. Trap KeyError and reraise so that the correct key 

446 # in the hierarchy is reported. 

447 try: 

448 # And forward request to that metadata. 

449 return self.metadata[key0][".".join(keys)] 

450 except KeyError: 

451 raise KeyError(f"'{key}' not found") from None 

452 

453 def __setitem__(self, key, item): 

454 """Store the given item.""" 

455 keys = self._getKeys(key) 

456 key0 = keys.pop(0) 

457 if len(keys) == 0: 

458 slots = {"array": self.arrays, "scalar": self.scalars, "metadata": self.metadata} 

459 primary = None 

460 slot_type, item = self._validate_value(item) 

461 primary = slots.pop(slot_type, None) 

462 if primary is None: 

463 raise AssertionError(f"Unknown slot type returned from validator: {slot_type}") 

464 

465 # Assign the value to the right place. 

466 primary[key0] = item 

467 for property in slots.values(): 

468 # Remove any other entries. 

469 property.pop(key0, None) 

470 return 

471 

472 # This must be hierarchical so forward to the child TaskMetadata. 

473 if key0 not in self.metadata: 

474 self.metadata[key0] = TaskMetadata() 

475 self.metadata[key0][".".join(keys)] = item 

476 

477 # Ensure we have cleared out anything with the same name elsewhere. 

478 self.scalars.pop(key0, None) 

479 self.arrays.pop(key0, None) 

480 

481 def __contains__(self, key): 

482 """Determine if the key exists.""" 

483 keys = self._getKeys(key) 

484 key0 = keys.pop(0) 

485 if len(keys) == 0: 

486 return key0 in self.scalars or key0 in self.arrays or key0 in self.metadata 

487 

488 if key0 in self.metadata: 

489 return ".".join(keys) in self.metadata[key0] 

490 return False 

491 

492 def __delitem__(self, key): 

493 """Remove the specified item. 

494 

495 Raises 

496 ------ 

497 KeyError 

498 Raised if the item is not present. 

499 """ 

500 keys = self._getKeys(key) 

501 key0 = keys.pop(0) 

502 if len(keys) == 0: 

503 for property in (self.scalars, self.arrays, self.metadata): 

504 if key0 in property: 

505 del property[key0] 

506 return 

507 raise KeyError(f"'{key}' not found'") 

508 

509 try: 

510 del self.metadata[key0][".".join(keys)] 

511 except KeyError: 

512 # Report the correct key. 

513 raise KeyError(f"'{key}' not found'") from None 

514 

515 def _validate_value(self, value): 

516 """Validate the given value. 

517 

518 Parameters 

519 ---------- 

520 value : Any 

521 Value to check. 

522 

523 Returns 

524 ------- 

525 slot_type : `str` 

526 The type of value given. Options are "scalar", "array", "metadata". 

527 item : Any 

528 The item that was given but possibly modified to conform to 

529 the slot type. 

530 

531 Raises 

532 ------ 

533 ValueError 

534 Raised if the value is not a recognized type. 

535 """ 

536 # Test the simplest option first. 

537 value_type = type(value) 

538 if value_type in _ALLOWED_PRIMITIVE_TYPES: 

539 return "scalar", value 

540 

541 if isinstance(value, TaskMetadata): 

542 return "metadata", value 

543 if isinstance(value, Mapping): 

544 return "metadata", self.from_dict(value) 

545 

546 if _isListLike(value): 

547 # For model consistency, need to check that every item in the 

548 # list has the same type. 

549 value = list(value) 

550 

551 type0 = type(value[0]) 

552 for i in value: 

553 if type(i) != type0: 

554 raise ValueError( 

555 "Type mismatch in supplied list. TaskMetadata requires all" 

556 f" elements have same type but see {type(i)} and {type0}." 

557 ) 

558 

559 if type0 not in _ALLOWED_PRIMITIVE_TYPES: 

560 # Must check to see if we got numpy floats or something. 

561 if isinstance(value[0], numbers.Integral): 

562 type_cast = int 

563 elif isinstance(value[0], numbers.Real): 

564 type_cast = float 

565 else: 

566 raise ValueError( 

567 f"Supplied list has element of type '{type0}'. " 

568 "TaskMetadata can only accept primitive types in lists." 

569 ) 

570 

571 value = [type_cast(v) for v in value] 

572 

573 return "array", value 

574 

575 # Sometimes a numpy number is given. 

576 if isinstance(value, numbers.Integral): 

577 value = int(value) 

578 return "scalar", value 

579 if isinstance(value, numbers.Real): 

580 value = float(value) 

581 return "scalar", value 

582 

583 raise ValueError(f"TaskMetadata does not support values of type {value!r}.") 

584 

585 

586# Needed because a TaskMetadata can contain a TaskMetadata. 

587TaskMetadata.update_forward_refs()