Coverage for python/lsst/pipe/base/_task_metadata.py: 16%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

206 statements  

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["TaskMetadata"] 

23 

24import numbers 

25import itertools 

26import warnings 

27from collections.abc import Sequence 

28from deprecated.sphinx import deprecated 

29 

30from typing import Dict, List, Union, Any, Mapping, Protocol, Collection 

31from pydantic import BaseModel, StrictInt, StrictFloat, StrictBool, StrictStr, Field 

32 

33_DEPRECATION_REASON = "Will be removed after v25." 

34_DEPRECATION_VERSION = "v24" 

35 

36# The types allowed in a Task metadata field are restricted 

37# to allow predictable serialization. 

38_ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool) 

39 

40 

41class PropertySetLike(Protocol): 

42 """Protocol that looks like a ``lsst.daf.base.PropertySet`` 

43 

44 Enough of the API is specified to support conversion of a 

45 ``PropertySet`` to a `TaskMetadata`. 

46 """ 

47 

48 def paramNames(self, topLevelOnly: bool = True) -> Collection[str]: 

49 ... 

50 

51 def getArray(self, name: str) -> Any: 

52 ... 

53 

54 

55def _isListLike(v): 

56 return isinstance(v, Sequence) and not isinstance(v, str) 

57 

58 

59class TaskMetadata(BaseModel): 

60 """Dict-like object for storing task metadata. 

61 

62 Metadata can be stored at two levels: single task or task plus subtasks. 

63 The later is called full metadata of a task and has a form 

64 

65 topLevelTaskName:subtaskName:subsubtaskName.itemName 

66 

67 Metadata item key of a task (`itemName` above) must not contain `.`, 

68 which serves as a separator in full metadata keys and turns 

69 the value into sub-dictionary. Arbitrary hierarchies are supported. 

70 

71 Deprecated methods are for compatibility with 

72 the predecessor containers. 

73 """ 

74 

75 scalars: Dict[str, Union[StrictFloat, StrictInt, StrictBool, StrictStr]] = Field(default_factory=dict) 

76 arrays: Dict[str, Union[List[StrictFloat], List[StrictInt], List[StrictBool], 

77 List[StrictStr]]] = Field(default_factory=dict) 

78 metadata: Dict[str, "TaskMetadata"] = Field(default_factory=dict) 

79 

80 @classmethod 

81 def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata": 

82 """Create a TaskMetadata from a dictionary. 

83 

84 Parameters 

85 ---------- 

86 d : `Mapping` 

87 Mapping to convert. Can be hierarchical. Any dictionaries 

88 in the hierarchy are converted to `TaskMetadata`. 

89 

90 Returns 

91 ------- 

92 meta : `TaskMetadata` 

93 Newly-constructed metadata. 

94 """ 

95 metadata = cls() 

96 for k, v in d.items(): 

97 metadata[k] = v 

98 return metadata 

99 

100 @classmethod 

101 def from_metadata(cls, ps: PropertySetLike) -> "TaskMetadata": 

102 """Create a TaskMetadata from a PropertySet-like object. 

103 

104 Parameters 

105 ---------- 

106 ps : `lsst.daf.base.PropertySet` or `TaskMetadata` 

107 A ``PropertySet``-like object to be transformed to a 

108 `TaskMetadata`. A `TaskMetadata` can be copied using this 

109 class method. 

110 

111 Returns 

112 ------- 

113 tm : `TaskMetadata` 

114 Newly-constructed metadata. 

115 

116 Notes 

117 ----- 

118 Items stored in single-element arrays in the supplied object 

119 will be converted to scalars in the newly-created object. 

120 """ 

121 # Use hierarchical names to assign values from input to output. 

122 # This API exists for both PropertySet and TaskMetadata. 

123 # from_dict() does not work because PropertySet is not declared 

124 # to be a Mapping. 

125 # PropertySet.toDict() is not present in TaskMetadata so is best 

126 # avoided. 

127 metadata = cls() 

128 for key in sorted(ps.paramNames(topLevelOnly=False)): 

129 value = ps.getArray(key) 

130 if len(value) == 1: 

131 value = value[0] 

132 metadata[key] = value 

133 return metadata 

134 

135 def add(self, name, value): 

136 """Store a new value, adding to a list if one already exists. 

137 

138 Parameters 

139 ---------- 

140 name : `str` 

141 Name of the metadata property. 

142 value 

143 Metadata property value. 

144 """ 

145 keys = self._getKeys(name) 

146 key0 = keys.pop(0) 

147 if len(keys) == 0: 

148 

149 # If add() is being used, always store the value in the arrays 

150 # property as a list. It's likely there will be another call. 

151 slot_type, value = self._validate_value(value) 

152 if slot_type == "array": 

153 pass 

154 elif slot_type == "scalar": 

155 value = [value] 

156 else: 

157 raise ValueError("add() can only be used for primitive types or sequences of those types.") 

158 

159 if key0 in self.metadata: 

160 raise ValueError(f"Can not add() to key '{name}' since that is a TaskMetadata") 

161 

162 if key0 in self.scalars: 

163 # Convert scalar to array. 

164 self.arrays[key0] = [self.scalars.pop(key0)] 

165 

166 if key0 in self.arrays: 

167 # Check that the type is not changing. 

168 if (curtype := type(self.arrays[key0][0])) is not (newtype := type(value[0])): 

169 raise ValueError(f"Type mismatch in add() -- currently {curtype} but adding {newtype}") 

170 self.arrays[key0].extend(value) 

171 else: 

172 self.arrays[key0] = value 

173 

174 return 

175 

176 self.metadata[key0].add(".".join(keys), value) 

177 

178 @deprecated(reason="Cast the return value to float explicitly. " + _DEPRECATION_REASON, 

179 version=_DEPRECATION_VERSION, category=FutureWarning) 

180 def getAsDouble(self, key): 

181 """Return the value cast to a `float`. 

182 

183 Parameters 

184 ---------- 

185 key : `str` 

186 Item to return. Can be dot-separated hierarchical. 

187 

188 Returns 

189 ------- 

190 value : `float` 

191 The value cast to a `float`. 

192 

193 Raises 

194 ------ 

195 KeyError 

196 Raised if the item is not found. 

197 """ 

198 return float(self.__getitem__(key)) 

199 

200 def getScalar(self, key): 

201 """Retrieve a scalar item even if the item is a list. 

202 

203 Parameters 

204 ---------- 

205 key : `str` 

206 Item to retrieve. 

207 

208 Returns 

209 ------- 

210 value : Any 

211 Either the value associated with the key or, if the key 

212 corresponds to a list, the last item in the list. 

213 

214 Raises 

215 ------ 

216 KeyError 

217 Raised if the item is not found. 

218 """ 

219 # Used in pipe_tasks. 

220 # getScalar() is the default behavior for __getitem__. 

221 return self[key] 

222 

223 def getArray(self, key): 

224 """Retrieve an item as a list even if it is a scalar. 

225 

226 Parameters 

227 ---------- 

228 key : `str` 

229 Item to retrieve. 

230 

231 Returns 

232 ------- 

233 values : `list` of any 

234 A list containing the value or values associated with this item. 

235 

236 Raises 

237 ------ 

238 KeyError 

239 Raised if the item is not found. 

240 """ 

241 keys = self._getKeys(key) 

242 key0 = keys.pop(0) 

243 if len(keys) == 0: 

244 if key0 in self.arrays: 

245 return self.arrays[key0] 

246 elif key0 in self.scalars: 

247 return [self.scalars[key0]] 

248 elif key0 in self.metadata: 

249 return [self.metadata[key0]] 

250 raise KeyError(f"'{key}' not found") 

251 

252 try: 

253 return self.metadata[key0].getArray(".".join(keys)) 

254 except KeyError: 

255 # Report the correct key. 

256 raise KeyError(f"'{key}' not found") from None 

257 

258 def names(self, topLevelOnly: bool = True): 

259 """Return the hierarchical keys from the metadata. 

260 

261 Parameters 

262 ---------- 

263 topLevelOnly : `bool` 

264 If true, return top-level keys, otherwise full metadata item keys. 

265 

266 Returns 

267 ------- 

268 names : `collection.abc.Set` 

269 A set of top-level keys or full metadata item keys, including 

270 the top-level keys. 

271 

272 Notes 

273 ----- 

274 Should never be called in new code with ``topLevelOnly`` set to `True` 

275 -- this is equivalent to asking for the keys and is the default 

276 when iterating through the task metadata. In this case a deprecation 

277 message will be issued and the ability will raise an exception 

278 in a future release. 

279 

280 When ``topLevelOnly`` is `False` all keys, including those from the 

281 hierarchy and the top-level hierarchy, are returned. 

282 """ 

283 if topLevelOnly: 

284 warnings.warn("Use keys() instead. " + _DEPRECATION_REASON, FutureWarning) 

285 return set(self.keys()) 

286 else: 

287 names = set() 

288 for k, v in self.items(): 

289 names.add(k) # Always include the current level 

290 if isinstance(v, TaskMetadata): 

291 names.update({k + '.' + item for item in v.names(topLevelOnly=topLevelOnly)}) 

292 return names 

293 

294 def paramNames(self, topLevelOnly): 

295 """Return hierarchical names. 

296 

297 Parameters 

298 ---------- 

299 topLevelOnly : `bool` 

300 Control whether only top-level items are returned or items 

301 from the hierarchy. 

302 

303 Returns 

304 ------- 

305 paramNames : `set` of `str` 

306 If ``topLevelOnly`` is `True`, returns any keys that are not 

307 part of a hierarchy. If `False` also returns fully-qualified 

308 names from the hierarchy. Keys associated with the top 

309 of a hierarchy are never returned. 

310 """ 

311 # Currently used by the verify package. 

312 paramNames = set() 

313 for k, v in self.items(): 

314 if isinstance(v, TaskMetadata): 

315 if not topLevelOnly: 

316 paramNames.update({k + "." + item for item in v.paramNames(topLevelOnly=topLevelOnly)}) 

317 else: 

318 paramNames.add(k) 

319 return paramNames 

320 

321 @deprecated(reason="Use standard assignment syntax. " + _DEPRECATION_REASON, 

322 version=_DEPRECATION_VERSION, category=FutureWarning) 

323 def set(self, key, item): 

324 """Set the value of the supplied key.""" 

325 self.__setitem__(key, item) 

326 

327 @deprecated(reason="Use standard del dict syntax. " + _DEPRECATION_REASON, 

328 version=_DEPRECATION_VERSION, category=FutureWarning) 

329 def remove(self, key): 

330 """Remove the item without raising if absent.""" 

331 try: 

332 self.__delitem__(key) 

333 except KeyError: 

334 # The PropertySet.remove() should always work. 

335 pass 

336 

337 @staticmethod 

338 def _getKeys(key): 

339 """Return the key hierarchy. 

340 

341 Parameters 

342 ---------- 

343 key : `str` 

344 The key to analyze. Can be dot-separated. 

345 

346 Returns 

347 ------- 

348 keys : `list` of `str` 

349 The key hierarchy that has been split on ``.``. 

350 

351 Raises 

352 ------ 

353 KeyError 

354 Raised if the key is not a string. 

355 """ 

356 try: 

357 keys = key.split('.') 

358 except Exception: 

359 raise KeyError(f"Invalid key '{key}': only string keys are allowed") from None 

360 return keys 

361 

362 def keys(self): 

363 """Return the top-level keys.""" 

364 return tuple(k for k in self) 

365 

366 def items(self): 

367 """Yield the top-level keys and values.""" 

368 for k, v in itertools.chain(self.scalars.items(), self.arrays.items(), self.metadata.items()): 

369 yield (k, v) 

370 

371 def __len__(self): 

372 """Return the number of items.""" 

373 return len(self.scalars) + len(self.arrays) + len(self.metadata) 

374 

375 def __iter__(self): 

376 """Return an iterator over each key.""" 

377 # The order of keys is not preserved since items can move 

378 # from scalar to array. 

379 return itertools.chain(iter(self.scalars), iter(self.arrays), iter(self.metadata)) 

380 

381 def __getitem__(self, key): 

382 """Retrieve the item associated with the key. 

383 

384 Parameters 

385 ---------- 

386 key : `str` 

387 The key to retrieve. Can be dot-separated hierarchical. 

388 

389 Returns 

390 ------- 

391 value : `TaskMetadata`, `float`, `int`, `bool`, `str` 

392 A scalar value. For compatibility with ``PropertySet``, if the key 

393 refers to an array, the final element is returned and not the 

394 array itself. 

395 

396 Raises 

397 ------ 

398 KeyError 

399 Raised if the item is not found. 

400 """ 

401 keys = self._getKeys(key) 

402 key0 = keys.pop(0) 

403 if len(keys) == 0: 

404 if key0 in self.scalars: 

405 return self.scalars[key0] 

406 if key0 in self.metadata: 

407 return self.metadata[key0] 

408 if key0 in self.arrays: 

409 return self.arrays[key0][-1] 

410 raise KeyError(f"'{key}' not found") 

411 # Hierarchical lookup so the top key can only be in the metadata 

412 # property. Trap KeyError and reraise so that the correct key 

413 # in the hierarchy is reported. 

414 try: 

415 # And forward request to that metadata. 

416 return self.metadata[key0][".".join(keys)] 

417 except KeyError: 

418 raise KeyError(f"'{key}' not found") from None 

419 

420 def __setitem__(self, key, item): 

421 """Store the given item.""" 

422 keys = self._getKeys(key) 

423 key0 = keys.pop(0) 

424 if len(keys) == 0: 

425 slots = {"array": self.arrays, "scalar": self.scalars, "metadata": self.metadata} 

426 primary = None 

427 slot_type, item = self._validate_value(item) 

428 primary = slots.pop(slot_type, None) 

429 if primary is None: 

430 raise AssertionError(f"Unknown slot type returned from validator: {slot_type}") 

431 

432 # Assign the value to the right place. 

433 primary[key0] = item 

434 for property in slots.values(): 

435 # Remove any other entries. 

436 property.pop(key0, None) 

437 return 

438 

439 # This must be hierarchical so forward to the child TaskMetadata. 

440 if key0 not in self.metadata: 

441 self.metadata[key0] = TaskMetadata() 

442 self.metadata[key0][".".join(keys)] = item 

443 

444 # Ensure we have cleared out anything with the same name elsewhere. 

445 self.scalars.pop(key0, None) 

446 self.arrays.pop(key0, None) 

447 

448 def __contains__(self, key): 

449 """Determine if the key exists.""" 

450 keys = self._getKeys(key) 

451 key0 = keys.pop(0) 

452 if len(keys) == 0: 

453 return key0 in self.scalars or key0 in self.arrays or key0 in self.metadata 

454 

455 if key0 in self.metadata: 

456 return ".".join(keys) in self.metadata[key0] 

457 return False 

458 

459 def __delitem__(self, key): 

460 """Remove the specified item. 

461 

462 Raises 

463 ------ 

464 KeyError 

465 Raised if the item is not present. 

466 """ 

467 keys = self._getKeys(key) 

468 key0 = keys.pop(0) 

469 if len(keys) == 0: 

470 for property in (self.scalars, self.arrays, self.metadata): 

471 if key0 in property: 

472 del property[key0] 

473 return 

474 raise KeyError(f"'{key}' not found'") 

475 

476 try: 

477 del self.metadata[key0][".".join(keys)] 

478 except KeyError: 

479 # Report the correct key. 

480 raise KeyError(f"'{key}' not found'") from None 

481 

482 def _validate_value(self, value): 

483 """Validate the given value. 

484 

485 Parameters 

486 ---------- 

487 value : Any 

488 Value to check. 

489 

490 Returns 

491 ------- 

492 slot_type : `str` 

493 The type of value given. Options are "scalar", "array", "metadata". 

494 item : Any 

495 The item that was given but possibly modified to conform to 

496 the slot type. 

497 

498 Raises 

499 ------ 

500 ValueError 

501 Raised if the value is not a recognized type. 

502 """ 

503 # Test the simplest option first. 

504 value_type = type(value) 

505 if value_type in _ALLOWED_PRIMITIVE_TYPES: 

506 return "scalar", value 

507 

508 if isinstance(value, TaskMetadata): 

509 return "metadata", value 

510 if isinstance(value, Mapping): 

511 return "metadata", self.from_dict(value) 

512 

513 if _isListLike(value): 

514 # For model consistency, need to check that every item in the 

515 # list has the same type. 

516 value = list(value) 

517 

518 type0 = type(value[0]) 

519 for i in value: 

520 if type(i) != type0: 

521 raise ValueError("Type mismatch in supplied list. TaskMetadata requires all" 

522 f" elements have same type but see {type(i)} and {type0}.") 

523 

524 if type0 not in _ALLOWED_PRIMITIVE_TYPES: 

525 # Must check to see if we got numpy floats or something. 

526 if isinstance(value[0], numbers.Integral): 

527 type_cast = int 

528 elif isinstance(value[0], numbers.Real): 

529 type_cast = float 

530 else: 

531 raise ValueError(f"Supplied list has element of type '{type0}'. " 

532 "TaskMetadata can only accept primitive types in lists.") 

533 

534 value = [type_cast(v) for v in value] 

535 

536 return "array", value 

537 

538 # Sometimes a numpy number is given. 

539 if isinstance(value, numbers.Integral): 

540 value = int(value) 

541 return "scalar", value 

542 if isinstance(value, numbers.Real): 

543 value = float(value) 

544 return "scalar", value 

545 

546 raise ValueError(f"TaskMetadata does not support values of type {value!r}.") 

547 

548 

549# Needed because a TaskMetadata can contain a TaskMetadata. 

550TaskMetadata.update_forward_refs()