Coverage for python/lsst/obs/base/gen2to3/translators.py: 32%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

165 statements  

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "Translator", 

26 "TranslatorFactory", 

27 "KeyHandler", 

28 "CopyKeyHandler", 

29 "ConstantKeyHandler", 

30 "BandToPhysicalFilterKeyHandler", 

31 "PhysicalFilterToBandKeyHandler", 

32) 

33 

34import itertools 

35import logging 

36from abc import ABCMeta, abstractmethod 

37from typing import Any, Dict, FrozenSet, Iterable, List, Optional, Tuple 

38 

39from lsst.skymap import BaseSkyMap 

40 

41 

42class KeyHandler(metaclass=ABCMeta): 

43 """Base class for Translator helpers that each handle just one Gen3 Data 

44 ID key. 

45 

46 Parameters 

47 ---------- 

48 dimension : `str` 

49 Name of the Gen3 dimension (data ID key) populated by 

50 this handler (e.g. "visit" or "band"). 

51 """ 

52 

53 def __init__(self, dimension: str): 

54 self.dimension = dimension 

55 

56 __slots__ = ("dimension",) 

57 

58 def __repr__(self): 

59 return f"{type(self).__name__}({self.dimension}, ...)" 

60 

61 def translate( 

62 self, 

63 gen2id: dict, 

64 gen3id: dict, 

65 skyMap: Optional[BaseSkyMap], 

66 skyMapName: Optional[str], 

67 datasetTypeName: str, 

68 ): 

69 """Update a Gen3 data ID dict with a single key-value pair from a Gen2 

70 data ID. 

71 

72 This method is implemented by the base class and is not expected to 

73 be re-implemented by subclasses. 

74 

75 Parameters 

76 ---------- 

77 gen2id: `dict` 

78 Gen2 data ID from which to draw key-value pairs from. 

79 gen3id: `dict` 

80 Gen3 data ID to update in-place. 

81 skyMap: `BaseSkyMap`, optional 

82 SkyMap that defines the tracts and patches used in the Gen2 data 

83 ID, if any. 

84 skyMapName: `str` 

85 Name of the Gen3 skymap dimension that defines the tracts and 

86 patches used in the Gen3 data ID. 

87 datasetTypeName: `str` 

88 Name of the dataset type. 

89 """ 

90 gen3id[self.dimension] = self.extract( 

91 gen2id, skyMap=skyMap, skyMapName=skyMapName, datasetTypeName=datasetTypeName 

92 ) 

93 

94 @abstractmethod 

95 def extract( 

96 self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str], datasetTypeName: str 

97 ) -> Any: 

98 """Extract a Gen3 data ID value from a Gen2 data ID. 

99 

100 Parameters 

101 ---------- 

102 gen2id: `dict` 

103 Gen2 data ID from which to draw key-value pairs from. 

104 skyMap: `BaseSkyMap`, optional 

105 SkyMap that defines the tracts and patches used in the Gen2 data 

106 ID, if any. 

107 skyMapName: `str` 

108 Name of the Gen3 skymap dimension that defines the tracts and 

109 patches used in the Gen3 data ID. 

110 datasetTypeName: `str` 

111 Name of the dataset type. 

112 """ 

113 raise NotImplementedError() 

114 

115 

116class ConstantKeyHandler(KeyHandler): 

117 """A KeyHandler that adds a constant key-value pair to the Gen3 data ID. 

118 

119 Parameters 

120 ---------- 

121 dimension : `str` 

122 Name of the Gen3 dimension (data ID key) populated by 

123 this handler (e.g. "visit" or "band"). 

124 value : `object` 

125 Data ID value. 

126 """ 

127 

128 def __init__(self, dimension: str, value: Any): 

129 super().__init__(dimension) 

130 self.value = value 

131 

132 __slots__ = ("value",) 

133 

134 def extract( 

135 self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str], datasetTypeName: str 

136 ) -> Any: 

137 # Docstring inherited from KeyHandler.extract. 

138 return self.value 

139 

140 

141class CopyKeyHandler(KeyHandler): 

142 """A KeyHandler that simply copies a value from a Gen3 data ID. 

143 

144 Parameters 

145 ---------- 

146 dimension : `str` 

147 Name of the Gen3 dimension produced by this handler. 

148 dtype : `type`, optional 

149 If not `None`, the type that values for this key must be an 

150 instance of. 

151 """ 

152 

153 def __init__(self, dimension: str, gen2key: Optional[str] = None, dtype: Optional[type] = None): 

154 super().__init__(dimension) 

155 self.gen2key = gen2key if gen2key is not None else dimension 

156 self.dtype = dtype 

157 

158 __slots__ = ("gen2key", "dtype") 

159 

160 def __str__(self): 

161 return f"{type(self).__name__}({self.gen2key}, {self.dtype})" 

162 

163 def extract( 

164 self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str], datasetTypeName: str 

165 ) -> Any: 

166 # Docstring inherited from KeyHandler.extract. 

167 r = gen2id[self.gen2key] 

168 if self.dtype is not None: 

169 try: 

170 r = self.dtype(r) 

171 except ValueError as err: 

172 raise TypeError( 

173 f"'{r}' is not a valid value for {self.dimension}; " 

174 f"expected {self.dtype.__name__}, got {type(r).__name__}." 

175 ) from err 

176 return r 

177 

178 

179class PatchKeyHandler(KeyHandler): 

180 """A KeyHandler for skymap patches.""" 

181 

182 def __init__(self): 

183 super().__init__("patch") 

184 

185 __slots__ = () 

186 

187 def extract( 

188 self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str], datasetTypeName: str 

189 ) -> Any: 

190 # Docstring inherited from KeyHandler.extract. 

191 tract = gen2id["tract"] 

192 tractInfo = skyMap[tract] 

193 x, y = gen2id["patch"].split(",") 

194 patchInfo = tractInfo[int(x), int(y)] 

195 return tractInfo.getSequentialPatchIndex(patchInfo) 

196 

197 

198class SkyMapKeyHandler(KeyHandler): 

199 """A KeyHandler for skymaps.""" 

200 

201 def __init__(self): 

202 super().__init__("skymap") 

203 

204 __slots__ = () 

205 

206 def extract( 

207 self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str], datasetTypeName: str 

208 ) -> Any: 

209 # Docstring inherited from KeyHandler.extract. 

210 return skyMapName 

211 

212 

213class PhysicalFilterToBandKeyHandler(KeyHandler): 

214 """KeyHandler for gen2 ``filter`` keys that match ``physical_filter`` 

215 keys in gen3 but should be mapped to ``band``. 

216 

217 Note that multiple physical filter can potentially map to one abstract 

218 filter, so be careful to only use this translator on obs packages where 

219 there is a one-to-one mapping. 

220 """ 

221 

222 __slots__ = ("_map",) 

223 

224 def __init__(self, filterDefinitions): 

225 super().__init__("band") 

226 self._map = {d.physical_filter: d.band for d in filterDefinitions if d.physical_filter is not None} 

227 

228 def extract(self, gen2id, *args, **kwargs): 

229 physical = gen2id["filter"] 

230 return self._map.get(physical, physical) 

231 

232 

233class BandToPhysicalFilterKeyHandler(KeyHandler): 

234 """KeyHandler for gen2 ``filter`` keys that match ``band`` 

235 keys in gen3 but should be mapped to ``physical_filter``. 

236 

237 Note that one abstract filter can potentially map to multiple physical 

238 filters, so be careful to only use this translator on obs packages where 

239 there is a one-to-one mapping. 

240 """ 

241 

242 __slots__ = ("_map",) 

243 

244 def __init__(self, filterDefinitions): 

245 super().__init__("physical_filter") 

246 self._map = {d.band: d.physical_filter for d in filterDefinitions if d.band is not None} 

247 

248 def extract(self, gen2id, *args, **kwargs): 

249 abstract = gen2id["filter"] 

250 return self._map.get(abstract, abstract) 

251 

252 

253class TranslatorFactory: 

254 """A class that manages a system of rules for translating Gen2 data IDs 

255 to Gen3 data IDs, and uses these to construct translators for particular 

256 dataset types. 

257 

258 Parameters 

259 ---------- 

260 log : `logging.Logger`, optional 

261 A logger for diagnostic messages. 

262 """ 

263 

264 def __init__(self, log: Optional[logging.Logger] = None): 

265 # The rules used to match KeyHandlers when constructing a Translator. 

266 self._rules: Dict[ 

267 Optional[str], # instrument name (or None to match any) 

268 Dict[ 

269 Optional[str], # dataset type name (or None to match any) 

270 # gen2keys, handler, consume 

271 List[Tuple[FrozenSet[str], KeyHandler, bool]], 

272 ], 

273 ] = {None: {None: []}} 

274 self._addDefaultRules() 

275 if log is None: 

276 log = logging.getLogger(__name__) 

277 self.log = log 

278 

279 def __str__(self): 

280 lines = [] 

281 for instrumentName, nested in self._rules.items(): 

282 if instrumentName is None: 

283 instrumentName = "[any instrument]" 

284 for datasetTypeName, rules in nested.items(): 

285 if datasetTypeName is None: 

286 datasetTypeName = "[any dataset type]" 

287 lines.append(f"{instrumentName} + {datasetTypeName}:") 

288 for gen2keys, handler, consume in rules: 

289 consumed = " (consumed)" if consume else "" 

290 lines.append(f" {gen2keys}{consumed}: {handler}") 

291 return "\n".join(lines) 

292 

293 def addRule( 

294 self, 

295 handler: KeyHandler, 

296 instrument: Optional[str] = None, 

297 datasetTypeName: Optional[str] = None, 

298 gen2keys: Iterable[str] = (), 

299 consume: bool = True, 

300 ): 

301 """Add a KeyHandler and an associated matching rule. 

302 

303 Parameters 

304 ---------- 

305 handler : `KeyHandler` 

306 A KeyHandler instance to add to a Translator when this rule 

307 matches. 

308 instrument : `str` 

309 Gen3 instrument name the Gen2 repository must be associated with 

310 for this rule to match, or None to match any instrument. 

311 datasetTypeName : `str` 

312 Name of the DatasetType this rule matches, or None to match any 

313 DatasetType. 

314 gen2Keys : sequence 

315 Sequence of Gen2 data ID keys that must all be present for this 

316 rule to match. 

317 consume : `bool` or `tuple` 

318 If True (default), remove all entries in gen2keys from the set of 

319 keys being matched to in order to prevent less-specific handlers 

320 from matching them. 

321 May also be a `tuple` listing only the keys to consume. 

322 """ 

323 # Ensure consume is always a frozenset, so we can process it uniformly 

324 # from here on. 

325 if consume is True: 

326 consume = frozenset(gen2keys) 

327 elif consume: 

328 consume = frozenset(consume) 

329 else: 

330 consume = frozenset() 

331 # find the rules for this instrument, or if we haven't seen it before, 

332 # add a nested dictionary that matches any DatasetType name and then 

333 # append this rule. 

334 rulesForInstrument = self._rules.setdefault(instrument, {None: []}) 

335 rulesForInstrumentAndDatasetType = rulesForInstrument.setdefault(datasetTypeName, []) 

336 rulesForInstrumentAndDatasetType.append((frozenset(gen2keys), handler, consume)) 

337 

338 def _addDefaultRules(self): 

339 """Add translator rules that should always be present, and don't depend 

340 at all on the instrument whose datasets are being converted. 

341 

342 This is called by `TranslatorFactory` construction. 

343 """ 

344 # Add "skymap" to Gen3 ID if Gen2 ID has a "tract" key. 

345 self.addRule(SkyMapKeyHandler(), gen2keys=("tract",), consume=False) 

346 

347 # Add "skymap" to Gen3 ID if DatasetType is one of a few specific ones 

348 for coaddName in ("deep", "goodSeeing", "psfMatched", "dcr"): 

349 self.addRule(SkyMapKeyHandler(), datasetTypeName=f"{coaddName}Coadd_skyMap") 

350 

351 # Translate Gen2 str patch IDs to Gen3 sequential integers. 

352 self.addRule(PatchKeyHandler(), gen2keys=("patch",)) 

353 

354 # Translate any "filter" values that appear alongside "tract" to 

355 # "band". This is _not_ the right choice for instruments 

356 # that use "physical_filter" values for coadds in Gen2 (like HSC); 

357 # those will need to add a rule that invokes 

358 # PhysicalFilterToBandKey instead for just that instrument, but the 

359 # same criteria otherwise. That will override this one, because 

360 # instrument-specific rules match first, and that rule will consume 

361 # the Gen2 "filter" key before this rule has a chance to fire. 

362 self.addRule(CopyKeyHandler("band", "filter"), gen2keys=("filter", "tract"), consume=("filter",)) 

363 

364 # Copy Gen2 "tract" to Gen3 "tract". 

365 self.addRule(CopyKeyHandler("tract", dtype=int), gen2keys=("tract",)) 

366 

367 # Translate Gen2 pixel_id to Gen3 skypix. 

368 # 

369 # TODO: For now, we just assume that the refcat indexer uses htm7, 

370 # since that's what we have generated most of our refcats at. 

371 # Eventually that may have to change, but it's not clear enough how to 

372 # do that for us to have a ticket yet. If you found this note because 

373 # you've run into this limitation, please let the middleware team know 

374 # that it's time to make this a priority. 

375 self.addRule(CopyKeyHandler("htm7", gen2key="pixel_id", dtype=int), gen2keys=("pixel_id",)) 

376 

377 def addGenericInstrumentRules( 

378 self, 

379 instrumentName: str, 

380 calibFilterType: str = "physical_filter", 

381 detectorKey: str = "ccd", 

382 exposureKey: str = "visit", 

383 ): 

384 """Add translation rules that depend on some properties of the 

385 instrument but are otherwise generic. 

386 

387 Parameters 

388 ---------- 

389 instrument : `str` 

390 The short (dimension) name of the instrument that conversion is 

391 going to be run on. 

392 calibFilterType : `str`, optional 

393 One of ``physical_filter`` or ``band``, indicating which 

394 of those the gen2 calibRegistry uses as the ``filter`` key. 

395 detectorKey : `str`, optional 

396 The gen2 key used to identify what in gen3 is `detector`. 

397 exposureKey : `str`, optional 

398 The gen2 key used to identify what in gen3 is `exposure`. 

399 """ 

400 # Add instrument to Gen3 data ID if Gen2 contains exposureKey, 

401 # detectorKey, "visit", or "calibDate". (Multiple rules may match, so 

402 # we'll actually set instrument in the same dict more than once). 

403 self.addRule( 

404 ConstantKeyHandler("instrument", instrumentName), 

405 instrument=instrumentName, 

406 gen2keys=(exposureKey,), 

407 consume=False, 

408 ) 

409 self.addRule( 

410 ConstantKeyHandler("instrument", instrumentName), 

411 instrument=instrumentName, 

412 gen2keys=(detectorKey,), 

413 consume=False, 

414 ) 

415 self.addRule( 

416 ConstantKeyHandler("instrument", instrumentName), 

417 instrument=instrumentName, 

418 gen2keys=("calibDate",), 

419 consume=False, 

420 ) 

421 self.addRule( 

422 ConstantKeyHandler("instrument", instrumentName), 

423 instrument=instrumentName, 

424 gen2keys=("visit",), 

425 consume=False, 

426 ) 

427 

428 # Copy Gen2 exposureKey to Gen3 'exposure' for raw only. Also consume 

429 # filter, since that's implied by 'exposure' in Gen3. 

430 self.addRule( 

431 CopyKeyHandler("exposure", exposureKey), 

432 instrument=instrumentName, 

433 datasetTypeName="raw", 

434 gen2keys=(exposureKey,), 

435 consume=(exposureKey, "filter"), 

436 ) 

437 

438 # Copy Gen2 'visit' to Gen3 'visit' otherwise. Also consume filter. 

439 self.addRule( 

440 CopyKeyHandler("visit"), 

441 instrument=instrumentName, 

442 gen2keys=("visit",), 

443 consume=("visit", "filter"), 

444 ) 

445 

446 # Copy Gen2 'ccd' to Gen3 'detector; 

447 self.addRule( 

448 CopyKeyHandler("detector", detectorKey), instrument=instrumentName, gen2keys=(detectorKey,) 

449 ) 

450 

451 # Add instrument for transmission curve datasets (transmission_sensor 

452 # is already handled by the above rules). 

453 self.addRule( 

454 ConstantKeyHandler("instrument", instrumentName), 

455 instrument=instrumentName, 

456 datasetTypeName="transmission_optics", 

457 ) 

458 self.addRule( 

459 ConstantKeyHandler("instrument", instrumentName), 

460 instrument=instrumentName, 

461 datasetTypeName="transmission_atmosphere", 

462 ) 

463 self.addRule( 

464 ConstantKeyHandler("instrument", instrumentName), 

465 instrument=instrumentName, 

466 datasetTypeName="transmission_filter", 

467 ) 

468 self.addRule( 

469 CopyKeyHandler("physical_filter", "filter"), 

470 instrument=instrumentName, 

471 datasetTypeName="transmission_filter", 

472 ) 

473 

474 # Add calibration mapping for filter dependent types 

475 for calibType in ("flat", "sky", "fringe"): 

476 self.addRule( 

477 CopyKeyHandler(calibFilterType, "filter"), 

478 instrument=instrumentName, 

479 datasetTypeName=calibType, 

480 ) 

481 

482 def makeMatching( 

483 self, 

484 datasetTypeName: str, 

485 gen2keys: Dict[str, type], 

486 instrument: Optional[str] = None, 

487 skyMap: Optional[BaseSkyMap] = None, 

488 skyMapName: Optional[str] = None, 

489 ): 

490 """Construct a Translator appropriate for instances of the given 

491 dataset. 

492 

493 Parameters 

494 ---------- 

495 datasetTypeName : `str` 

496 Name of the dataset type. 

497 gen2keys: `dict` 

498 Keys of a Gen2 data ID for this dataset. 

499 instrument: `str`, optional 

500 Name of the Gen3 instrument dimension for translated data IDs. 

501 skyMap: `~lsst.skymap.BaseSkyMap`, optional 

502 The skymap instance that defines any tract/patch data IDs. 

503 `~lsst.skymap.BaseSkyMap` instances. 

504 skyMapName : `str`, optional 

505 Gen3 SkyMap Dimension name to be associated with any tract or patch 

506 Dimensions. 

507 

508 Returns 

509 ------- 

510 translator : `Translator` 

511 A translator whose translate() method can be used to transform Gen2 

512 data IDs to Gen3 dataIds. 

513 """ 

514 if instrument is not None: 

515 rulesForInstrument = self._rules.get(instrument, {None: []}) 

516 else: 

517 rulesForInstrument = {None: []} 

518 rulesForAnyInstrument = self._rules[None] 

519 candidateRules = itertools.chain( 

520 rulesForInstrument.get(datasetTypeName, []), # this instrument, this DatasetType 

521 rulesForInstrument[None], # this instrument, any DatasetType 

522 rulesForAnyInstrument.get(datasetTypeName, []), # any instrument, this DatasetType 

523 rulesForAnyInstrument[None], # any instrument, any DatasetType 

524 ) 

525 matchedHandlers = [] 

526 targetKeys = set(gen2keys) 

527 self.log.debug( 

528 "Constructing data ID translator for %s with Gen2 keys %s...", datasetTypeName, gen2keys 

529 ) 

530 for ruleKeys, ruleHandlers, consume in candidateRules: 

531 if ruleKeys.issubset(targetKeys): 

532 matchedHandlers.append(ruleHandlers) 

533 targetKeys -= consume 

534 self.log.debug( 

535 "...matched %d handlers: %s, with %s unmatched.", 

536 len(matchedHandlers), 

537 matchedHandlers, 

538 targetKeys, 

539 ) 

540 return Translator( 

541 matchedHandlers, 

542 skyMap=skyMap, 

543 skyMapName=skyMapName, 

544 datasetTypeName=datasetTypeName, 

545 log=self.log, 

546 ) 

547 

548 

549class Translator: 

550 """Callable object that translates Gen2 Data IDs to Gen3 Data IDs for a 

551 particular DatasetType. 

552 

553 Translators should usually be constructed via 

554 `TranslatorFactory.makeMatching`. 

555 

556 Parameters 

557 ---------- 

558 handlers : `list` 

559 A list of KeyHandlers this Translator should use. 

560 skyMap : `BaseSkyMap`, optional 

561 SkyMap instance used to define any tract or patch Dimensions. 

562 skyMapName : `str` 

563 Gen3 SkyMap Dimension name to be associated with any tract or patch 

564 Dimensions. 

565 datasetTypeName : `str` 

566 Name of the dataset type whose data IDs this translator handles. 

567 """ 

568 

569 def __init__( 

570 self, 

571 handlers: List[KeyHandler], 

572 skyMap: Optional[BaseSkyMap], 

573 skyMapName: Optional[str], 

574 datasetTypeName: str, 

575 log: logging.Logger, 

576 ): 

577 self.handlers = handlers 

578 self.skyMap = skyMap 

579 self.skyMapName = skyMapName 

580 self.datasetTypeName = datasetTypeName 

581 self.log = log 

582 

583 __slots__ = ("handlers", "skyMap", "skyMapName", "datasetTypeName", "log") 

584 

585 def __str__(self): 

586 hstr = ",".join(str(h) for h in self.handlers) 

587 return f"{type(self).__name__}(dtype={self.datasetTypeName}, handlers=[{hstr}])" 

588 

589 def __call__(self, gen2id: Dict[str, Any], *, partial: bool = False) -> Tuple[dict, Optional[str]]: 

590 """Return a Gen3 data ID that corresponds to the given Gen2 data ID.""" 

591 gen3id = {} 

592 calibDate = gen2id.get("calibDate", None) 

593 for handler in self.handlers: 

594 try: 

595 handler.translate( 

596 gen2id, 

597 gen3id, 

598 skyMap=self.skyMap, 

599 skyMapName=self.skyMapName, 

600 datasetTypeName=self.datasetTypeName, 

601 ) 

602 except KeyError: 

603 if partial: 

604 self.log.debug( 

605 "Failed to translate %s from %s (this may not be an error).", 

606 handler.dimension, 

607 gen2id, 

608 ) 

609 continue 

610 else: 

611 raise 

612 return gen3id, calibDate 

613 

614 @property 

615 def dimensionNames(self) -> FrozenSet[str]: 

616 """The names of the dimensions populated by this Translator 

617 (`frozenset`). 

618 """ 

619 return frozenset(h.dimension for h in self.handlers)