Coverage for python/lsst/pipe/base/_task_metadata.py: 16%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

206 statements  

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["TaskMetadata"] 

23 

24import itertools 

25import numbers 

26import warnings 

27from collections.abc import Sequence 

28from typing import Any, Collection, Dict, List, Mapping, Protocol, Union 

29 

30from deprecated.sphinx import deprecated 

31from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr 

32 

33_DEPRECATION_REASON = "Will be removed after v25." 

34_DEPRECATION_VERSION = "v24" 

35 

36# The types allowed in a Task metadata field are restricted 

37# to allow predictable serialization. 

38_ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool) 

39 

40 

41class PropertySetLike(Protocol): 

42 """Protocol that looks like a ``lsst.daf.base.PropertySet`` 

43 

44 Enough of the API is specified to support conversion of a 

45 ``PropertySet`` to a `TaskMetadata`. 

46 """ 

47 

48 def paramNames(self, topLevelOnly: bool = True) -> Collection[str]: 

49 ... 

50 

51 def getArray(self, name: str) -> Any: 

52 ... 

53 

54 

55def _isListLike(v): 

56 return isinstance(v, Sequence) and not isinstance(v, str) 

57 

58 

59class TaskMetadata(BaseModel): 

60 """Dict-like object for storing task metadata. 

61 

62 Metadata can be stored at two levels: single task or task plus subtasks. 

63 The later is called full metadata of a task and has a form 

64 

65 topLevelTaskName:subtaskName:subsubtaskName.itemName 

66 

67 Metadata item key of a task (`itemName` above) must not contain `.`, 

68 which serves as a separator in full metadata keys and turns 

69 the value into sub-dictionary. Arbitrary hierarchies are supported. 

70 

71 Deprecated methods are for compatibility with 

72 the predecessor containers. 

73 """ 

74 

75 scalars: Dict[str, Union[StrictFloat, StrictInt, StrictBool, StrictStr]] = Field(default_factory=dict) 

76 arrays: Dict[str, Union[List[StrictFloat], List[StrictInt], List[StrictBool], List[StrictStr]]] = Field( 

77 default_factory=dict 

78 ) 

79 metadata: Dict[str, "TaskMetadata"] = Field(default_factory=dict) 

80 

81 @classmethod 

82 def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata": 

83 """Create a TaskMetadata from a dictionary. 

84 

85 Parameters 

86 ---------- 

87 d : `Mapping` 

88 Mapping to convert. Can be hierarchical. Any dictionaries 

89 in the hierarchy are converted to `TaskMetadata`. 

90 

91 Returns 

92 ------- 

93 meta : `TaskMetadata` 

94 Newly-constructed metadata. 

95 """ 

96 metadata = cls() 

97 for k, v in d.items(): 

98 metadata[k] = v 

99 return metadata 

100 

101 @classmethod 

102 def from_metadata(cls, ps: PropertySetLike) -> "TaskMetadata": 

103 """Create a TaskMetadata from a PropertySet-like object. 

104 

105 Parameters 

106 ---------- 

107 ps : `lsst.daf.base.PropertySet` or `TaskMetadata` 

108 A ``PropertySet``-like object to be transformed to a 

109 `TaskMetadata`. A `TaskMetadata` can be copied using this 

110 class method. 

111 

112 Returns 

113 ------- 

114 tm : `TaskMetadata` 

115 Newly-constructed metadata. 

116 

117 Notes 

118 ----- 

119 Items stored in single-element arrays in the supplied object 

120 will be converted to scalars in the newly-created object. 

121 """ 

122 # Use hierarchical names to assign values from input to output. 

123 # This API exists for both PropertySet and TaskMetadata. 

124 # from_dict() does not work because PropertySet is not declared 

125 # to be a Mapping. 

126 # PropertySet.toDict() is not present in TaskMetadata so is best 

127 # avoided. 

128 metadata = cls() 

129 for key in sorted(ps.paramNames(topLevelOnly=False)): 

130 value = ps.getArray(key) 

131 if len(value) == 1: 

132 value = value[0] 

133 metadata[key] = value 

134 return metadata 

135 

136 def add(self, name, value): 

137 """Store a new value, adding to a list if one already exists. 

138 

139 Parameters 

140 ---------- 

141 name : `str` 

142 Name of the metadata property. 

143 value 

144 Metadata property value. 

145 """ 

146 keys = self._getKeys(name) 

147 key0 = keys.pop(0) 

148 if len(keys) == 0: 

149 

150 # If add() is being used, always store the value in the arrays 

151 # property as a list. It's likely there will be another call. 

152 slot_type, value = self._validate_value(value) 

153 if slot_type == "array": 

154 pass 

155 elif slot_type == "scalar": 

156 value = [value] 

157 else: 

158 raise ValueError("add() can only be used for primitive types or sequences of those types.") 

159 

160 if key0 in self.metadata: 

161 raise ValueError(f"Can not add() to key '{name}' since that is a TaskMetadata") 

162 

163 if key0 in self.scalars: 

164 # Convert scalar to array. 

165 self.arrays[key0] = [self.scalars.pop(key0)] 

166 

167 if key0 in self.arrays: 

168 # Check that the type is not changing. 

169 if (curtype := type(self.arrays[key0][0])) is not (newtype := type(value[0])): 

170 raise ValueError(f"Type mismatch in add() -- currently {curtype} but adding {newtype}") 

171 self.arrays[key0].extend(value) 

172 else: 

173 self.arrays[key0] = value 

174 

175 return 

176 

177 self.metadata[key0].add(".".join(keys), value) 

178 

179 @deprecated( 

180 reason="Cast the return value to float explicitly. " + _DEPRECATION_REASON, 

181 version=_DEPRECATION_VERSION, 

182 category=FutureWarning, 

183 ) 

184 def getAsDouble(self, key): 

185 """Return the value cast to a `float`. 

186 

187 Parameters 

188 ---------- 

189 key : `str` 

190 Item to return. Can be dot-separated hierarchical. 

191 

192 Returns 

193 ------- 

194 value : `float` 

195 The value cast to a `float`. 

196 

197 Raises 

198 ------ 

199 KeyError 

200 Raised if the item is not found. 

201 """ 

202 return float(self.__getitem__(key)) 

203 

204 def getScalar(self, key): 

205 """Retrieve a scalar item even if the item is a list. 

206 

207 Parameters 

208 ---------- 

209 key : `str` 

210 Item to retrieve. 

211 

212 Returns 

213 ------- 

214 value : Any 

215 Either the value associated with the key or, if the key 

216 corresponds to a list, the last item in the list. 

217 

218 Raises 

219 ------ 

220 KeyError 

221 Raised if the item is not found. 

222 """ 

223 # Used in pipe_tasks. 

224 # getScalar() is the default behavior for __getitem__. 

225 return self[key] 

226 

227 def getArray(self, key): 

228 """Retrieve an item as a list even if it is a scalar. 

229 

230 Parameters 

231 ---------- 

232 key : `str` 

233 Item to retrieve. 

234 

235 Returns 

236 ------- 

237 values : `list` of any 

238 A list containing the value or values associated with this item. 

239 

240 Raises 

241 ------ 

242 KeyError 

243 Raised if the item is not found. 

244 """ 

245 keys = self._getKeys(key) 

246 key0 = keys.pop(0) 

247 if len(keys) == 0: 

248 if key0 in self.arrays: 

249 return self.arrays[key0] 

250 elif key0 in self.scalars: 

251 return [self.scalars[key0]] 

252 elif key0 in self.metadata: 

253 return [self.metadata[key0]] 

254 raise KeyError(f"'{key}' not found") 

255 

256 try: 

257 return self.metadata[key0].getArray(".".join(keys)) 

258 except KeyError: 

259 # Report the correct key. 

260 raise KeyError(f"'{key}' not found") from None 

261 

262 def names(self, topLevelOnly: bool = True): 

263 """Return the hierarchical keys from the metadata. 

264 

265 Parameters 

266 ---------- 

267 topLevelOnly : `bool` 

268 If true, return top-level keys, otherwise full metadata item keys. 

269 

270 Returns 

271 ------- 

272 names : `collection.abc.Set` 

273 A set of top-level keys or full metadata item keys, including 

274 the top-level keys. 

275 

276 Notes 

277 ----- 

278 Should never be called in new code with ``topLevelOnly`` set to `True` 

279 -- this is equivalent to asking for the keys and is the default 

280 when iterating through the task metadata. In this case a deprecation 

281 message will be issued and the ability will raise an exception 

282 in a future release. 

283 

284 When ``topLevelOnly`` is `False` all keys, including those from the 

285 hierarchy and the top-level hierarchy, are returned. 

286 """ 

287 if topLevelOnly: 

288 warnings.warn("Use keys() instead. " + _DEPRECATION_REASON, FutureWarning) 

289 return set(self.keys()) 

290 else: 

291 names = set() 

292 for k, v in self.items(): 

293 names.add(k) # Always include the current level 

294 if isinstance(v, TaskMetadata): 

295 names.update({k + "." + item for item in v.names(topLevelOnly=topLevelOnly)}) 

296 return names 

297 

298 def paramNames(self, topLevelOnly): 

299 """Return hierarchical names. 

300 

301 Parameters 

302 ---------- 

303 topLevelOnly : `bool` 

304 Control whether only top-level items are returned or items 

305 from the hierarchy. 

306 

307 Returns 

308 ------- 

309 paramNames : `set` of `str` 

310 If ``topLevelOnly`` is `True`, returns any keys that are not 

311 part of a hierarchy. If `False` also returns fully-qualified 

312 names from the hierarchy. Keys associated with the top 

313 of a hierarchy are never returned. 

314 """ 

315 # Currently used by the verify package. 

316 paramNames = set() 

317 for k, v in self.items(): 

318 if isinstance(v, TaskMetadata): 

319 if not topLevelOnly: 

320 paramNames.update({k + "." + item for item in v.paramNames(topLevelOnly=topLevelOnly)}) 

321 else: 

322 paramNames.add(k) 

323 return paramNames 

324 

325 @deprecated( 

326 reason="Use standard assignment syntax. " + _DEPRECATION_REASON, 

327 version=_DEPRECATION_VERSION, 

328 category=FutureWarning, 

329 ) 

330 def set(self, key, item): 

331 """Set the value of the supplied key.""" 

332 self.__setitem__(key, item) 

333 

334 @deprecated( 

335 reason="Use standard del dict syntax. " + _DEPRECATION_REASON, 

336 version=_DEPRECATION_VERSION, 

337 category=FutureWarning, 

338 ) 

339 def remove(self, key): 

340 """Remove the item without raising if absent.""" 

341 try: 

342 self.__delitem__(key) 

343 except KeyError: 

344 # The PropertySet.remove() should always work. 

345 pass 

346 

347 @staticmethod 

348 def _getKeys(key): 

349 """Return the key hierarchy. 

350 

351 Parameters 

352 ---------- 

353 key : `str` 

354 The key to analyze. Can be dot-separated. 

355 

356 Returns 

357 ------- 

358 keys : `list` of `str` 

359 The key hierarchy that has been split on ``.``. 

360 

361 Raises 

362 ------ 

363 KeyError 

364 Raised if the key is not a string. 

365 """ 

366 try: 

367 keys = key.split(".") 

368 except Exception: 

369 raise KeyError(f"Invalid key '{key}': only string keys are allowed") from None 

370 return keys 

371 

372 def keys(self): 

373 """Return the top-level keys.""" 

374 return tuple(k for k in self) 

375 

376 def items(self): 

377 """Yield the top-level keys and values.""" 

378 for k, v in itertools.chain(self.scalars.items(), self.arrays.items(), self.metadata.items()): 

379 yield (k, v) 

380 

381 def __len__(self): 

382 """Return the number of items.""" 

383 return len(self.scalars) + len(self.arrays) + len(self.metadata) 

384 

385 def __iter__(self): 

386 """Return an iterator over each key.""" 

387 # The order of keys is not preserved since items can move 

388 # from scalar to array. 

389 return itertools.chain(iter(self.scalars), iter(self.arrays), iter(self.metadata)) 

390 

391 def __getitem__(self, key): 

392 """Retrieve the item associated with the key. 

393 

394 Parameters 

395 ---------- 

396 key : `str` 

397 The key to retrieve. Can be dot-separated hierarchical. 

398 

399 Returns 

400 ------- 

401 value : `TaskMetadata`, `float`, `int`, `bool`, `str` 

402 A scalar value. For compatibility with ``PropertySet``, if the key 

403 refers to an array, the final element is returned and not the 

404 array itself. 

405 

406 Raises 

407 ------ 

408 KeyError 

409 Raised if the item is not found. 

410 """ 

411 keys = self._getKeys(key) 

412 key0 = keys.pop(0) 

413 if len(keys) == 0: 

414 if key0 in self.scalars: 

415 return self.scalars[key0] 

416 if key0 in self.metadata: 

417 return self.metadata[key0] 

418 if key0 in self.arrays: 

419 return self.arrays[key0][-1] 

420 raise KeyError(f"'{key}' not found") 

421 # Hierarchical lookup so the top key can only be in the metadata 

422 # property. Trap KeyError and reraise so that the correct key 

423 # in the hierarchy is reported. 

424 try: 

425 # And forward request to that metadata. 

426 return self.metadata[key0][".".join(keys)] 

427 except KeyError: 

428 raise KeyError(f"'{key}' not found") from None 

429 

430 def __setitem__(self, key, item): 

431 """Store the given item.""" 

432 keys = self._getKeys(key) 

433 key0 = keys.pop(0) 

434 if len(keys) == 0: 

435 slots = {"array": self.arrays, "scalar": self.scalars, "metadata": self.metadata} 

436 primary = None 

437 slot_type, item = self._validate_value(item) 

438 primary = slots.pop(slot_type, None) 

439 if primary is None: 

440 raise AssertionError(f"Unknown slot type returned from validator: {slot_type}") 

441 

442 # Assign the value to the right place. 

443 primary[key0] = item 

444 for property in slots.values(): 

445 # Remove any other entries. 

446 property.pop(key0, None) 

447 return 

448 

449 # This must be hierarchical so forward to the child TaskMetadata. 

450 if key0 not in self.metadata: 

451 self.metadata[key0] = TaskMetadata() 

452 self.metadata[key0][".".join(keys)] = item 

453 

454 # Ensure we have cleared out anything with the same name elsewhere. 

455 self.scalars.pop(key0, None) 

456 self.arrays.pop(key0, None) 

457 

458 def __contains__(self, key): 

459 """Determine if the key exists.""" 

460 keys = self._getKeys(key) 

461 key0 = keys.pop(0) 

462 if len(keys) == 0: 

463 return key0 in self.scalars or key0 in self.arrays or key0 in self.metadata 

464 

465 if key0 in self.metadata: 

466 return ".".join(keys) in self.metadata[key0] 

467 return False 

468 

469 def __delitem__(self, key): 

470 """Remove the specified item. 

471 

472 Raises 

473 ------ 

474 KeyError 

475 Raised if the item is not present. 

476 """ 

477 keys = self._getKeys(key) 

478 key0 = keys.pop(0) 

479 if len(keys) == 0: 

480 for property in (self.scalars, self.arrays, self.metadata): 

481 if key0 in property: 

482 del property[key0] 

483 return 

484 raise KeyError(f"'{key}' not found'") 

485 

486 try: 

487 del self.metadata[key0][".".join(keys)] 

488 except KeyError: 

489 # Report the correct key. 

490 raise KeyError(f"'{key}' not found'") from None 

491 

492 def _validate_value(self, value): 

493 """Validate the given value. 

494 

495 Parameters 

496 ---------- 

497 value : Any 

498 Value to check. 

499 

500 Returns 

501 ------- 

502 slot_type : `str` 

503 The type of value given. Options are "scalar", "array", "metadata". 

504 item : Any 

505 The item that was given but possibly modified to conform to 

506 the slot type. 

507 

508 Raises 

509 ------ 

510 ValueError 

511 Raised if the value is not a recognized type. 

512 """ 

513 # Test the simplest option first. 

514 value_type = type(value) 

515 if value_type in _ALLOWED_PRIMITIVE_TYPES: 

516 return "scalar", value 

517 

518 if isinstance(value, TaskMetadata): 

519 return "metadata", value 

520 if isinstance(value, Mapping): 

521 return "metadata", self.from_dict(value) 

522 

523 if _isListLike(value): 

524 # For model consistency, need to check that every item in the 

525 # list has the same type. 

526 value = list(value) 

527 

528 type0 = type(value[0]) 

529 for i in value: 

530 if type(i) != type0: 

531 raise ValueError( 

532 "Type mismatch in supplied list. TaskMetadata requires all" 

533 f" elements have same type but see {type(i)} and {type0}." 

534 ) 

535 

536 if type0 not in _ALLOWED_PRIMITIVE_TYPES: 

537 # Must check to see if we got numpy floats or something. 

538 if isinstance(value[0], numbers.Integral): 

539 type_cast = int 

540 elif isinstance(value[0], numbers.Real): 

541 type_cast = float 

542 else: 

543 raise ValueError( 

544 f"Supplied list has element of type '{type0}'. " 

545 "TaskMetadata can only accept primitive types in lists." 

546 ) 

547 

548 value = [type_cast(v) for v in value] 

549 

550 return "array", value 

551 

552 # Sometimes a numpy number is given. 

553 if isinstance(value, numbers.Integral): 

554 value = int(value) 

555 return "scalar", value 

556 if isinstance(value, numbers.Real): 

557 value = float(value) 

558 return "scalar", value 

559 

560 raise ValueError(f"TaskMetadata does not support values of type {value!r}.") 

561 

562 

563# Needed because a TaskMetadata can contain a TaskMetadata. 

564TaskMetadata.update_forward_refs()