Coverage for python/lsst/pipe/base/_task_metadata.py: 15%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

192 statements  

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["TaskMetadata"] 

23 

24import numbers 

25import itertools 

26import warnings 

27from collections.abc import Sequence 

28from deprecated.sphinx import deprecated 

29 

30from typing import Dict, List, Union, Any, Mapping 

31from pydantic import BaseModel, StrictInt, StrictFloat, StrictBool, StrictStr, Field 

32 

33_DEPRECATION_REASON = "Will be removed after v25." 

34_DEPRECATION_VERSION = "v24" 

35 

36# The types allowed in a Task metadata field are restricted 

37# to allow predictable serialization. 

38_ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool) 

39 

40 

41def _isListLike(v): 

42 return isinstance(v, Sequence) and not isinstance(v, str) 

43 

44 

45class TaskMetadata(BaseModel): 

46 """Dict-like object for storing task metadata. 

47 

48 Metadata can be stored at two levels: single task or task plus subtasks. 

49 The later is called full metadata of a task and has a form 

50 

51 topLevelTaskName:subtaskName:subsubtaskName.itemName 

52 

53 Metadata item key of a task (`itemName` above) must not contain `.`, 

54 which serves as a separator in full metadata keys and turns 

55 the value into sub-dictionary. Arbitrary hierarchies are supported. 

56 

57 Deprecated methods are for compatibility with 

58 the predecessor containers. 

59 """ 

60 

61 scalars: Dict[str, Union[StrictFloat, StrictInt, StrictBool, StrictStr]] = Field(default_factory=dict) 

62 arrays: Dict[str, Union[List[StrictFloat], List[StrictInt], List[StrictBool], 

63 List[StrictStr]]] = Field(default_factory=dict) 

64 metadata: Dict[str, "TaskMetadata"] = Field(default_factory=dict) 

65 

66 @classmethod 

67 def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata": 

68 """Create a TaskMetadata from a dictionary. 

69 

70 Parameters 

71 ---------- 

72 d : `Mapping` 

73 Mapping to convert. Can be hierarchical. Any dictionaries 

74 in the hierarchy are converted to `TaskMetadata`. 

75 

76 Returns 

77 ------- 

78 meta : `TaskMetadata` 

79 Newly-constructed metadata. 

80 """ 

81 metadata = cls() 

82 for k, v in d.items(): 

83 metadata[k] = v 

84 return metadata 

85 

86 def add(self, name, value): 

87 """Store a new value, adding to a list if one already exists. 

88 

89 Parameters 

90 ---------- 

91 name : `str` 

92 Name of the metadata property. 

93 value 

94 Metadata property value. 

95 """ 

96 keys = self._getKeys(name) 

97 key0 = keys.pop(0) 

98 if len(keys) == 0: 

99 

100 # If add() is being used, always store the value in the arrays 

101 # property as a list. It's likely there will be another call. 

102 slot_type, value = self._validate_value(value) 

103 if slot_type == "array": 

104 pass 

105 elif slot_type == "scalar": 

106 value = [value] 

107 else: 

108 raise ValueError("add() can only be used for primitive types or sequences of those types.") 

109 

110 if key0 in self.metadata: 

111 raise ValueError(f"Can not add() to key '{name}' since that is a TaskMetadata") 

112 

113 if key0 in self.scalars: 

114 # Convert scalar to array. 

115 self.arrays[key0] = [self.scalars.pop(key0)] 

116 

117 if key0 in self.arrays: 

118 # Check that the type is not changing. 

119 if (curtype := type(self.arrays[key0][0])) is not (newtype := type(value[0])): 

120 raise ValueError(f"Type mismatch in add() -- currently {curtype} but adding {newtype}") 

121 self.arrays[key0].extend(value) 

122 else: 

123 self.arrays[key0] = value 

124 

125 return 

126 

127 self.metadata[key0].add(".".join(keys), value) 

128 

129 @deprecated(reason="Cast the return value to float explicitly. " + _DEPRECATION_REASON, 

130 version=_DEPRECATION_VERSION, category=FutureWarning) 

131 def getAsDouble(self, key): 

132 """Return the value cast to a `float`. 

133 

134 Parameters 

135 ---------- 

136 key : `str` 

137 Item to return. Can be dot-separated hierarchical. 

138 

139 Returns 

140 ------- 

141 value : `float` 

142 The value cast to a `float`. 

143 

144 Raises 

145 ------ 

146 KeyError 

147 Raised if the item is not found. 

148 """ 

149 return float(self.__getitem__(key)) 

150 

151 def getScalar(self, key): 

152 """Retrieve a scalar item even if the item is a list. 

153 

154 Parameters 

155 ---------- 

156 key : `str` 

157 Item to retrieve. 

158 

159 Returns 

160 ------- 

161 value : Any 

162 Either the value associated with the key or, if the key 

163 corresponds to a list, the last item in the list. 

164 

165 Raises 

166 ------ 

167 KeyError 

168 Raised if the item is not found. 

169 """ 

170 # Used in pipe_tasks. 

171 # getScalar() is the default behavior for __getitem__. 

172 return self[key] 

173 

174 def getArray(self, key): 

175 """Retrieve an item as a list even if it is a scalar. 

176 

177 Parameters 

178 ---------- 

179 key : `str` 

180 Item to retrieve. 

181 

182 Returns 

183 ------- 

184 values : `list` of any 

185 A list containing the value or values associated with this item. 

186 

187 Raises 

188 ------ 

189 KeyError 

190 Raised if the item is not found. 

191 """ 

192 keys = self._getKeys(key) 

193 key0 = keys.pop(0) 

194 if len(keys) == 0: 

195 if key0 in self.arrays: 

196 return self.arrays[key0] 

197 elif key0 in self.scalars: 

198 return [self.scalars[key0]] 

199 elif key0 in self.metadata: 

200 return [self.metadata[key0]] 

201 raise KeyError(f"'{key}' not found") 

202 

203 try: 

204 return self.metadata[key0].getArray(".".join(keys)) 

205 except KeyError: 

206 # Report the correct key. 

207 raise KeyError(f"'{key}' not found") from None 

208 

209 def names(self, topLevelOnly: bool = True): 

210 """Return the hierarchical keys from the metadata. 

211 

212 Parameters 

213 ---------- 

214 topLevelOnly : `bool` 

215 If true, return top-level keys, otherwise full metadata item keys. 

216 

217 Returns 

218 ------- 

219 names : `collection.abc.Set` 

220 A set of top-level keys or full metadata item keys, including 

221 the top-level keys. 

222 

223 Notes 

224 ----- 

225 Should never be called in new code with ``topLevelOnly`` set to `True` 

226 -- this is equivalent to asking for the keys and is the default 

227 when iterating through the task metadata. In this case a deprecation 

228 message will be issued and the ability will raise an exception 

229 in a future release. 

230 

231 When ``topLevelOnly`` is `False` all keys, including those from the 

232 hierarchy and the top-level hierarchy, are returned. 

233 """ 

234 if topLevelOnly: 

235 warnings.warn("Use keys() instead. " + _DEPRECATION_REASON, FutureWarning) 

236 return set(self.keys()) 

237 else: 

238 names = set() 

239 for k, v in self.items(): 

240 names.add(k) # Always include the current level 

241 if isinstance(v, TaskMetadata): 

242 names.update({k + '.' + item for item in v.names(topLevelOnly=topLevelOnly)}) 

243 return names 

244 

245 def paramNames(self, topLevelOnly): 

246 """Return hierarchical names. 

247 

248 Parameters 

249 ---------- 

250 topLevelOnly : `bool` 

251 Control whether only top-level items are returned or items 

252 from the hierarchy. 

253 

254 Returns 

255 ------- 

256 paramNames : `set` of `str` 

257 If ``topLevelOnly`` is `True`, returns any keys that are not 

258 part of a hierarchy. If `False` also returns fully-qualified 

259 names from the hierarchy. Keys associated with the top 

260 of a hierarchy are never returned. 

261 """ 

262 # Currently used by the verify package. 

263 paramNames = set() 

264 for k, v in self.items(): 

265 if isinstance(v, TaskMetadata): 

266 if not topLevelOnly: 

267 paramNames.update({k + "." + item for item in v.paramNames(topLevelOnly=topLevelOnly)}) 

268 else: 

269 paramNames.add(k) 

270 return paramNames 

271 

272 @deprecated(reason="Use standard assignment syntax. " + _DEPRECATION_REASON, 

273 version=_DEPRECATION_VERSION, category=FutureWarning) 

274 def set(self, key, item): 

275 """Set the value of the supplied key.""" 

276 self.__setitem__(key, item) 

277 

278 @deprecated(reason="Use standard del dict syntax. " + _DEPRECATION_REASON, 

279 version=_DEPRECATION_VERSION, category=FutureWarning) 

280 def remove(self, key): 

281 """Remove the item without raising if absent.""" 

282 try: 

283 self.__delitem__(key) 

284 except KeyError: 

285 # The PropertySet.remove() should always work. 

286 pass 

287 

288 @staticmethod 

289 def _getKeys(key): 

290 """Return the key hierarchy. 

291 

292 Parameters 

293 ---------- 

294 key : `str` 

295 The key to analyze. Can be dot-separated. 

296 

297 Returns 

298 ------- 

299 keys : `list` of `str` 

300 The key hierarchy that has been split on ``.``. 

301 

302 Raises 

303 ------ 

304 KeyError 

305 Raised if the key is not a string. 

306 """ 

307 try: 

308 keys = key.split('.') 

309 except Exception: 

310 raise KeyError(f"Invalid key '{key}': only string keys are allowed") from None 

311 return keys 

312 

313 def keys(self): 

314 """Return the top-level keys.""" 

315 return tuple(k for k in self) 

316 

317 def items(self): 

318 """Yield the top-level keys and values.""" 

319 for k, v in itertools.chain(self.scalars.items(), self.arrays.items(), self.metadata.items()): 

320 yield (k, v) 

321 

322 def __len__(self): 

323 """Return the number of items.""" 

324 return len(self.scalars) + len(self.arrays) + len(self.metadata) 

325 

326 def __iter__(self): 

327 """Return an iterator over each key.""" 

328 # The order of keys is not preserved since items can move 

329 # from scalar to array. 

330 return itertools.chain(iter(self.scalars), iter(self.arrays), iter(self.metadata)) 

331 

332 def __getitem__(self, key): 

333 """Retrieve the item associated with the key. 

334 

335 Parameters 

336 ---------- 

337 key : `str` 

338 The key to retrieve. Can be dot-separated hierarchical. 

339 

340 Returns 

341 ------- 

342 value : `TaskMetadata`, `float`, `int`, `bool`, `str` 

343 A scalar value. For compatibility with ``PropertySet``, if the key 

344 refers to an array, the final element is returned and not the 

345 array itself. 

346 

347 Raises 

348 ------ 

349 KeyError 

350 Raised if the item is not found. 

351 """ 

352 keys = self._getKeys(key) 

353 key0 = keys.pop(0) 

354 if len(keys) == 0: 

355 if key0 in self.scalars: 

356 return self.scalars[key0] 

357 if key0 in self.metadata: 

358 return self.metadata[key0] 

359 if key0 in self.arrays: 

360 return self.arrays[key0][-1] 

361 raise KeyError(f"'{key}' not found") 

362 # Hierarchical lookup so the top key can only be in the metadata 

363 # property. Trap KeyError and reraise so that the correct key 

364 # in the hierarchy is reported. 

365 try: 

366 # And forward request to that metadata. 

367 return self.metadata[key0][".".join(keys)] 

368 except KeyError: 

369 raise KeyError(f"'{key}' not found") from None 

370 

371 def __setitem__(self, key, item): 

372 """Store the given item.""" 

373 keys = self._getKeys(key) 

374 key0 = keys.pop(0) 

375 if len(keys) == 0: 

376 slots = {"array": self.arrays, "scalar": self.scalars, "metadata": self.metadata} 

377 primary = None 

378 slot_type, item = self._validate_value(item) 

379 primary = slots.pop(slot_type, None) 

380 if primary is None: 

381 raise AssertionError(f"Unknown slot type returned from validator: {slot_type}") 

382 

383 # Assign the value to the right place. 

384 primary[key0] = item 

385 for property in slots.values(): 

386 # Remove any other entries. 

387 property.pop(key0, None) 

388 return 

389 

390 # This must be hierarchical so forward to the child TaskMetadata. 

391 if key0 not in self.metadata: 

392 self.metadata[key0] = TaskMetadata() 

393 self.metadata[key0][".".join(keys)] = item 

394 

395 # Ensure we have cleared out anything with the same name elsewhere. 

396 self.scalars.pop(key0, None) 

397 self.arrays.pop(key0, None) 

398 

399 def __contains__(self, key): 

400 """Determine if the key exists.""" 

401 keys = self._getKeys(key) 

402 key0 = keys.pop(0) 

403 if len(keys) == 0: 

404 return key0 in self.scalars or key0 in self.arrays or key0 in self.metadata 

405 

406 if key0 in self.metadata: 

407 return ".".join(keys) in self.metadata[key0] 

408 return False 

409 

410 def __delitem__(self, key): 

411 """Remove the specified item. 

412 

413 Raises 

414 ------ 

415 KeyError 

416 Raised if the item is not present. 

417 """ 

418 keys = self._getKeys(key) 

419 key0 = keys.pop(0) 

420 if len(keys) == 0: 

421 for property in (self.scalars, self.arrays, self.metadata): 

422 if key0 in property: 

423 del property[key0] 

424 return 

425 raise KeyError(f"'{key}' not found'") 

426 

427 try: 

428 del self.metadata[key0][".".join(keys)] 

429 except KeyError: 

430 # Report the correct key. 

431 raise KeyError(f"'{key}' not found'") from None 

432 

433 def _validate_value(self, value): 

434 """Validate the given value. 

435 

436 Parameters 

437 ---------- 

438 value : Any 

439 Value to check. 

440 

441 Returns 

442 ------- 

443 slot_type : `str` 

444 The type of value given. Options are "scalar", "array", "metadata". 

445 item : Any 

446 The item that was given but possibly modified to conform to 

447 the slot type. 

448 

449 Raises 

450 ------ 

451 ValueError 

452 Raised if the value is not a recognized type. 

453 """ 

454 # Test the simplest option first. 

455 value_type = type(value) 

456 if value_type in _ALLOWED_PRIMITIVE_TYPES: 

457 return "scalar", value 

458 

459 if isinstance(value, TaskMetadata): 

460 return "metadata", value 

461 if isinstance(value, Mapping): 

462 return "metadata", self.from_dict(value) 

463 

464 if _isListLike(value): 

465 # For model consistency, need to check that every item in the 

466 # list has the same type. 

467 value = list(value) 

468 

469 type0 = type(value[0]) 

470 for i in value: 

471 if type(i) != type0: 

472 raise ValueError("Type mismatch in supplied list. TaskMetadata requires all" 

473 f" elements have same type but see {type(i)} and {type0}.") 

474 

475 if type0 not in _ALLOWED_PRIMITIVE_TYPES: 

476 # Must check to see if we got numpy floats or something. 

477 if isinstance(value[0], numbers.Integral): 

478 type_cast = int 

479 elif isinstance(value[0], numbers.Real): 

480 type_cast = float 

481 else: 

482 raise ValueError(f"Supplied list has element of type '{type0}'. " 

483 "TaskMetadata can only accept primitive types in lists.") 

484 

485 value = [type_cast(v) for v in value] 

486 

487 return "array", value 

488 

489 # Sometimes a numpy number is given. 

490 if isinstance(value, numbers.Integral): 

491 value = int(value) 

492 return "scalar", value 

493 if isinstance(value, numbers.Real): 

494 value = float(value) 

495 return "scalar", value 

496 

497 raise ValueError(f"TaskMetadata does not support values of type {value!r}.") 

498 

499 

500# Needed because a TaskMetadata can contain a TaskMetadata. 

501TaskMetadata.update_forward_refs()