Coverage for python/lsst/summit/utils/butlerUtils.py: 13%

256 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-11 05:38 -0700

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import copy 

23import itertools 

24from collections.abc import Iterable 

25from typing import Any 

26 

27from deprecated.sphinx import deprecated 

28 

29import lsst.daf.butler as dafButler 

30from lsst.summit.utils.utils import getSite 

31 

32__all__ = [ 

33 "makeDefaultLatissButler", 

34 "updateDataId", 

35 "sanitizeDayObs", 

36 "getMostRecentDayObs", 

37 "getSeqNumsForDayObs", 

38 "getMostRecentDataId", 

39 "getDatasetRefForDataId", 

40 "getDayObs", 

41 "getSeqNum", 

42 "getExpId", 

43 "datasetExists", 

44 "sortRecordsByDayObsThenSeqNum", 

45 "getDaysWithData", 

46 "getExpIdFromDayObsSeqNum", 

47 "updateDataIdOrDataCord", 

48 "fillDataId", 

49 "getExpRecordFromDataId", 

50 "getDayObsSeqNumFromExposureId", 

51 "removeDataProduct", 

52 "getLatissOnSkyDataIds", 

53 "getExpRecord", 

54] 

55 

56_LATISS_DEFAULT_COLLECTIONS = ["LATISS/raw/all", "LATISS/calib", "LATISS/runs/quickLook"] 

57 

58# RECENT_DAY must be in the past *and have data* (otherwise some tests are 

59# no-ops), to speed up queries by restricting them significantly, 

60# but data must definitely been taken since. Should 

61# also not be more than 2 months in the past due to 60 day lookback time on the 

62# summit. All this means it should be updated by an informed human. 

63RECENT_DAY = 20220503 

64 

65 

66def _configureForSite() -> None: 

67 try: 

68 site = getSite() 

69 except ValueError: 

70 # this method is run automatically on module import, so 

71 # don't fail for k8s where this cannot yet be determined 

72 print("WARNING: failed to automatically determine site") 

73 site = None 

74 

75 if site == "tucson": 75 ↛ 77line 75 didn't jump to line 77, because the condition on line 75 was never true

76 global RECENT_DAY 

77 RECENT_DAY = 20211104 # TTS has limited data, so use this day 

78 

79 

80_configureForSite() 

81 

82 

83def getLatissDefaultCollections() -> list[str]: 

84 """Get the default set of LATISS collections, updated for the site at 

85 which the code is being run. 

86 

87 Returns 

88 ------- 

89 collections : `list` of `str` 

90 The default collections for the site. 

91 """ 

92 collections = _LATISS_DEFAULT_COLLECTIONS 

93 try: 

94 site = getSite() 

95 except ValueError: 

96 site = "" 

97 

98 if site == "tucson": 

99 collections.append("LATISS-test-data") 

100 return collections 

101 if site == "summit": 

102 collections.append("LATISS-test-data") 

103 return collections 

104 return collections 

105 

106 

107def _update_RECENT_DAY(day: int) -> None: 

108 """Update the value for RECENT_DAY once we have a value for free.""" 

109 global RECENT_DAY 

110 RECENT_DAY = max(day - 1, RECENT_DAY) 

111 

112 

113def makeDefaultLatissButler( 

114 *, 

115 extraCollections: list[str] | None = None, 

116 writeable: bool = False, 

117 embargo: bool = False, 

118) -> dafButler.Butler: 

119 """Create a butler for LATISS using the default collections. 

120 

121 Parameters 

122 ---------- 

123 extraCollections : `list` of `str` 

124 Extra input collections to supply to the butler init. 

125 writable : `bool`, optional 

126 Whether to make a writable butler. 

127 embargo : `bool`, optional 

128 Use the embargo repo instead of the main one. Needed to access 

129 embargoed data. 

130 

131 Returns 

132 ------- 

133 butler : `lsst.daf.butler.Butler` 

134 The butler. 

135 """ 

136 # TODO: Add logging to which collections are going in 

137 collections = getLatissDefaultCollections() 

138 if extraCollections: 

139 collections.extend(extraCollections) 

140 try: 

141 repoString = "LATISS" if not embargo else "/repo/embargo" 

142 butler = dafButler.Butler( 

143 repoString, collections=collections, writeable=writeable, instrument="LATISS" 

144 ) 

145 except (FileNotFoundError, RuntimeError): 

146 # Depending on the value of DAF_BUTLER_REPOSITORY_INDEX and whether 

147 # it is present and blank, or just not set, both these exception 

148 # types can be raised, see tests/test_butlerUtils.py:ButlerInitTestCase 

149 # for details and tests which confirm these have not changed 

150 raise FileNotFoundError # unify exception type 

151 return butler 

152 

153 

154@deprecated( 

155 reason="datasExists has been replaced by Butler.exists(). Will be removed after v26.0.", 

156 version="v26.0", 

157 category=FutureWarning, 

158) 

159def datasetExists(butler: dafButler.Butler, dataProduct: str, dataId: dict, **kwargs: Any) -> bool: 

160 """Collapse the tri-state behaviour of butler.datasetExists to a boolean. 

161 

162 Parameters 

163 ---------- 

164 butler : `lsst.daf.butler.Butler` 

165 The butler 

166 dataProduct : `str` 

167 The type of data product to check for 

168 dataId : `dict` 

169 The dataId of the dataProduct to check for 

170 

171 Returns 

172 ------- 

173 exists : `bool` 

174 True if the dataProduct exists for the dataProduct and can be retreived 

175 else False. 

176 """ 

177 return butler.exists(dataProduct, dataId, **kwargs) 

178 

179 

180def updateDataId(dataId, **kwargs: Any) -> dict | dafButler.DataCoordinate: 

181 """Update a DataCoordinate or dataId dict with kwargs. 

182 

183 Provides a single interface for adding the detector key (or others) to a 

184 dataId whether it's a DataCoordinate or a dict 

185 

186 Parameters 

187 ---------- 

188 dataId : `dict` or `lsst.daf.butler.DataCoordinate` 

189 The dataId to update. 

190 kwargs : `dict` 

191 The keys and values to add to the dataId. 

192 

193 Returns 

194 ------- 

195 dataId : `dict` or `lsst.daf.butler.DataCoordinate` 

196 The updated dataId, with the same type as the input. 

197 """ 

198 

199 match dataId: 

200 case dafButler.DataCoordinate(): 

201 return dafButler.DataCoordinate.standardize(dataId, **kwargs) 

202 case dict() as dataId: 

203 return dict(dataId, **kwargs) 

204 raise ValueError(f"Unknown dataId type {type(dataId)}") 

205 

206 

207def sanitizeDayObs(day_obs: int | str) -> int: 

208 """Take string or int day_obs and turn it into the int version. 

209 

210 Parameters 

211 ---------- 

212 day_obs : `str` or `int` 

213 The day_obs to sanitize. 

214 

215 Returns 

216 ------- 

217 day_obs : `int` 

218 The sanitized day_obs. 

219 

220 Raises 

221 ------ 

222 ValueError 

223 Raised if the day_obs fails to translate for any reason. 

224 """ 

225 if isinstance(day_obs, int): 

226 return day_obs 

227 elif isinstance(day_obs, str): 

228 try: 

229 return int(day_obs.replace("-", "")) 

230 except Exception: 

231 ValueError(f"Failed to sanitize {day_obs!r} to a day_obs") 

232 raise ValueError(f"Cannot sanitize {day_obs!r} to a day_obs") 

233 

234 

235def getMostRecentDayObs(butler: dafButler.Butler) -> int: 

236 """Get the most recent day_obs for which there is data. 

237 

238 Parameters 

239 ---------- 

240 butler : `lsst.daf.butler.Butler 

241 The butler to query. 

242 

243 Returns 

244 ------- 

245 day_obs : `int` 

246 The day_obs. 

247 """ 

248 where = "exposure.day_obs>=RECENT_DAY" 

249 records = butler.registry.queryDimensionRecords( 

250 "exposure", where=where, datasets="raw", bind={"RECENT_DAY": RECENT_DAY} 

251 ) 

252 recentDay = max(r.day_obs for r in records) 

253 _update_RECENT_DAY(recentDay) 

254 return recentDay 

255 

256 

257def getSeqNumsForDayObs(butler: dafButler.Butler, day_obs: int, extraWhere: str = "") -> list[int]: 

258 """Get a list of all seq_nums taken on a given day_obs. 

259 

260 Parameters 

261 ---------- 

262 butler : `lsst.daf.butler.Butler 

263 The butler to query. 

264 day_obs : `int` or `str` 

265 The day_obs for which the seq_nums are desired. 

266 extraWhere : `str` 

267 Any extra where conditions to add to the queryDimensionRecords call. 

268 

269 Returns 

270 ------- 

271 seq_nums : `iterable` 

272 The seq_nums taken on the corresponding day_obs in ascending numerical 

273 order. 

274 """ 

275 day_obs = sanitizeDayObs(day_obs) 

276 where = "exposure.day_obs=dayObs" 

277 if extraWhere: 

278 extraWhere = extraWhere.replace('"', "'") 

279 where += f" and {extraWhere}" 

280 records = butler.registry.queryDimensionRecords( 

281 "exposure", where=where, bind={"dayObs": day_obs}, datasets="raw" 

282 ) 

283 return sorted([r.seq_num for r in records]) 

284 

285 

286def sortRecordsByDayObsThenSeqNum( 

287 records: list[dafButler.DimensionRecord], 

288) -> list[dafButler.DimensionRecord]: 

289 """Sort a set of records by dayObs, then seqNum to get the order in which 

290 they were taken. 

291 

292 Parameters 

293 ---------- 

294 records : `list` of `dafButler.DimensionRecord` 

295 The records to be sorted. 

296 

297 Returns 

298 ------- 

299 sortedRecords : `list` of `dafButler.DimensionRecord` 

300 The sorted records 

301 

302 Raises 

303 ------ 

304 ValueError 

305 Raised if the recordSet contains duplicate records, or if it contains 

306 (dayObs, seqNum) collisions. 

307 """ 

308 records = list(records) # must call list in case we have a generator 

309 recordSet = set(records) 

310 if len(records) != len(recordSet): 

311 raise ValueError("Record set contains duplicate records and therefore cannot be sorted unambiguously") 

312 

313 daySeqTuples = [(r.day_obs, r.seq_num) for r in records] 

314 if len(daySeqTuples) != len(set(daySeqTuples)): 

315 raise ValueError( 

316 "Record set contains dayObs/seqNum collisions, and therefore cannot be sorted " "unambiguously" 

317 ) 

318 

319 records.sort(key=lambda r: (r.day_obs, r.seq_num)) 

320 return records 

321 

322 

323def getDaysWithData(butler: dafButler.Butler, datasetType: str = "raw") -> list[int]: 

324 """Get all the days for which LATISS has taken data on the mountain. 

325 

326 Parameters 

327 ---------- 

328 butler : `lsst.daf.butler.Butler 

329 The butler to query. 

330 datasetType : `str` 

331 The datasetType to query. 

332 

333 Returns 

334 ------- 

335 days : `list` of `int` 

336 A sorted list of the day_obs values for which mountain-top data exists. 

337 """ 

338 # 20200101 is a day between shipping LATISS and going on sky 

339 # We used to constrain on exposure.seq_num<50 to massively reduce the 

340 # number of returned records whilst being large enough to ensure that no 

341 # days are missed because early seq_nums were skipped. However, because 

342 # we have test datasets like LATISS-test-data-tts where we only kept 

343 # seqNums from 950 on one day, we can no longer assume this so don't be 

344 # tempted to add such a constraint back in here for speed. 

345 where = "exposure.day_obs>20200101" 

346 records = butler.registry.queryDimensionRecords("exposure", where=where, datasets=datasetType) 

347 return sorted(set([r.day_obs for r in records])) 

348 

349 

350def getMostRecentDataId(butler: dafButler.Butler) -> dict: 

351 """Get the dataId for the most recent observation. 

352 

353 Parameters 

354 ---------- 

355 butler : `lsst.daf.butler.Butler 

356 The butler to query. 

357 

358 Returns 

359 ------- 

360 dataId : `dict` 

361 The dataId of the most recent exposure. 

362 """ 

363 lastDay = getMostRecentDayObs(butler) 

364 seqNum = getSeqNumsForDayObs(butler, lastDay)[-1] 

365 dataId = {"day_obs": lastDay, "seq_num": seqNum, "detector": 0} 

366 dataId.update(getExpIdFromDayObsSeqNum(butler, dataId)) 

367 return dataId 

368 

369 

370def getExpIdFromDayObsSeqNum(butler: dafButler.Butler, dataId: dict) -> dict: 

371 """Get the exposure id for the dataId. 

372 

373 Parameters 

374 ---------- 

375 butler : `lsst.daf.butler.Butler 

376 The butler to query. 

377 dataId : `dict` 

378 The dataId for which to return the exposure id. 

379 

380 Returns 

381 ------- 

382 dataId : `dict` 

383 The dataId of the most recent exposure. 

384 """ 

385 expRecord = getExpRecordFromDataId(butler, dataId) 

386 return {"exposure": expRecord.id} 

387 

388 

389def updateDataIdOrDataCord(dataId: dict, **updateKwargs: Any) -> dict: 

390 """Add key, value pairs to a dataId or data coordinate. 

391 

392 Parameters 

393 ---------- 

394 dataId : `dict` 

395 The dataId for which to return the exposure id. 

396 updateKwargs : `dict` 

397 The key value pairs add to the dataId or dataCoord. 

398 

399 Returns 

400 ------- 

401 dataId : `dict` 

402 The updated dataId. 

403 

404 Notes 

405 ----- 

406 Always returns a dict, so note that if a data coordinate is supplied, a 

407 dict is returned, changing the type. 

408 """ 

409 newId = copy.copy(dataId) 

410 newId = _assureDict(newId) 

411 newId.update(updateKwargs) 

412 return newId 

413 

414 

415def fillDataId(butler: dafButler.Butler, dataId: dict) -> dict: 

416 """Given a dataId, fill it with values for all available dimensions. 

417 

418 Parameters 

419 ---------- 

420 butler : `lsst.daf.butler.Butler` 

421 The butler. 

422 dataId : `dict` 

423 The dataId to fill. 

424 

425 Returns 

426 ------- 

427 dataId : `dict` 

428 The filled dataId. 

429 

430 Notes 

431 ----- 

432 This function is *slow*! Running this on 20,000 dataIds takes approximately 

433 7 minutes. Virtually all the slowdown is in the 

434 butler.registry.expandDataId() call though, so this wrapper is not to blame 

435 here, and might speed up in future with butler improvements. 

436 """ 

437 # ensure it's a dict to deal with records etc 

438 dataId = _assureDict(dataId) 

439 

440 # this removes extraneous keys that would trip up the registry call 

441 # using _rewrite_data_id is perhaps ever so slightly slower than popping 

442 # the bad keys, or making a minimal dataId by hand, but is more 

443 # reliable/general, so we choose that over the other approach here 

444 dataId, _ = butler._rewrite_data_id(dataId, butler.get_dataset_type("raw")) 

445 

446 # now expand and turn back to a dict 

447 dataId = butler.registry.expandDataId(dataId, detector=0).full # this call is VERY slow 

448 dataId = _assureDict(dataId) 

449 

450 missingExpId = getExpId(dataId) is None 

451 missingDayObs = getDayObs(dataId) is None 

452 missingSeqNum = getSeqNum(dataId) is None 

453 

454 if missingDayObs or missingSeqNum: 

455 dayObsSeqNum = getDayObsSeqNumFromExposureId(butler, dataId) 

456 dataId.update(dayObsSeqNum) 

457 

458 if missingExpId: 

459 expId = getExpIdFromDayObsSeqNum(butler, dataId) 

460 dataId.update(expId) 

461 

462 return dataId 

463 

464 

465def _assureDict(dataId: dict | dafButler.dimensions.DataCoordinate | dafButler.DimensionRecord) -> dict: 

466 """Turn any data-identifier-like object into a dict. 

467 

468 Parameters 

469 ---------- 

470 dataId : `dict` or `lsst.daf.butler.dimensions.DataCoordinate` or 

471 `lsst.daf.butler.dimensions.DimensionRecord` 

472 The data identifier. 

473 

474 Returns 

475 ------- 

476 dataId : `dict` 

477 The data identifier as a dict. 

478 """ 

479 if isinstance(dataId, dict): 

480 return dataId 

481 elif hasattr(dataId, "items"): # dafButler.dimensions.DataCoordinate 

482 return {str(k): v for k, v in dataId.items()} # str() required due to full names 

483 elif hasattr(dataId, "dataId"): # dafButler.DimensionRecord 

484 return {str(k): v for k, v in dataId.dataId.items()} 

485 else: 

486 raise RuntimeError(f"Failed to coerce {type(dataId)} to dict") 

487 

488 

489def getExpRecordFromDataId(butler: dafButler.Butler, dataId: dict) -> dafButler.DimensionRecord: 

490 """Get the exposure record for a given dataId. 

491 

492 Parameters 

493 ---------- 

494 butler : `lsst.daf.butler.Butler` 

495 The butler. 

496 dataId : `dict` 

497 The dataId. 

498 

499 Returns 

500 ------- 

501 expRecord : `lsst.daf.butler.dimensions.DimensionRecord` 

502 The exposure record. 

503 """ 

504 dataId = _assureDict(dataId) 

505 assert isinstance(dataId, dict), f"dataId must be a dict or DimensionRecord, got {type(dataId)}" 

506 

507 if expId := getExpId(dataId): 

508 where = "exposure.id=expId" 

509 expRecords = butler.registry.queryDimensionRecords( 

510 "exposure", where=where, bind={"expId": expId}, datasets="raw" 

511 ) 

512 

513 else: 

514 dayObs = getDayObs(dataId) 

515 seqNum = getSeqNum(dataId) 

516 if not (dayObs and seqNum): 

517 raise RuntimeError(f"Failed to find either expId or day_obs and seq_num in dataId {dataId}") 

518 where = "exposure.day_obs=dayObs AND exposure.seq_num=seq_num" 

519 expRecords = butler.registry.queryDimensionRecords( 

520 "exposure", where=where, bind={"dayObs": dayObs, "seq_num": seqNum}, datasets="raw" 

521 ) 

522 

523 expRecords = set(expRecords) 

524 if not expRecords: 

525 raise LookupError(f"No exposure records found for {dataId}") 

526 assert len(expRecords) == 1, f"Found {len(expRecords)} exposure records for {dataId}" 

527 return expRecords.pop() 

528 

529 

530def getDayObsSeqNumFromExposureId(butler: dafButler.Butler, dataId: dict) -> dict[str, int]: 

531 """Get the day_obs and seq_num for an exposure id. 

532 

533 Parameters 

534 ---------- 

535 butler : `lsst.daf.butler.Butler` 

536 The butler. 

537 dataId : `dict` 

538 The dataId containing the exposure id. 

539 

540 Returns 

541 ------- 

542 dataId : `dict` 

543 A dict containing only the day_obs and seq_num. 

544 """ 

545 if (dayObs := getDayObs(dataId)) and (seqNum := getSeqNum(dataId)): 

546 return {"day_obs": dayObs, "seq_num": seqNum} 

547 

548 if isinstance(dataId, int): 

549 dataId = {"exposure": dataId} 

550 else: 

551 dataId = _assureDict(dataId) 

552 assert isinstance(dataId, dict) 

553 

554 if not (expId := getExpId(dataId)): 

555 raise RuntimeError(f"Failed to find exposure id in {dataId}") 

556 

557 where = "exposure.id=expId" 

558 expRecords = butler.registry.queryDimensionRecords( 

559 "exposure", where=where, bind={"expId": expId}, datasets="raw" 

560 ) 

561 expRecords = set(expRecords) 

562 if not expRecords: 

563 raise LookupError(f"No exposure records found for {dataId}") 

564 assert len(expRecords) == 1, f"Found {len(expRecords)} exposure records for {dataId}" 

565 record = expRecords.pop() 

566 return {"day_obs": record.day_obs, "seq_num": record.seq_num} 

567 

568 

569def getDatasetRefForDataId( 

570 butler: dafButler.Butler, datasetType: str | dafButler.DatasetType, dataId: dict 

571) -> dafButler.DatasetRef: 

572 """Get the datasetReference for a dataId. 

573 

574 Parameters 

575 ---------- 

576 butler : `lsst.daf.butler.Butler` 

577 The butler. 

578 datasetType : `str` or `datasetType` 

579 The dataset type. 

580 dataId : `dict` 

581 The dataId. 

582 

583 Returns 

584 ------- 

585 datasetRef : `lsst.daf.butler.dimensions.DatasetReference` 

586 The dataset reference. 

587 """ 

588 if not _expid_present(dataId): 

589 assert _dayobs_present(dataId) and _seqnum_present(dataId) 

590 dataId.update(getExpIdFromDayObsSeqNum(butler, dataId)) 

591 

592 dRef = butler.find_dataset(datasetType, dataId) 

593 return dRef 

594 

595 

596def removeDataProduct( 

597 butler: dafButler.Butler, datasetType: str | dafButler.DatasetType, dataId: dict 

598) -> None: 

599 """Remove a data prodcut from the registry. Use with caution. 

600 

601 Parameters 

602 ---------- 

603 butler : `lsst.daf.butler.Butler` 

604 The butler. 

605 datasetType : `str` or `datasetType` 

606 The dataset type. 

607 dataId : `dict` 

608 The dataId. 

609 

610 """ 

611 if datasetType == "raw": 

612 raise RuntimeError("I'm sorry, Dave, I'm afraid I can't do that.") 

613 dRef = getDatasetRefForDataId(butler, datasetType, dataId) 

614 butler.pruneDatasets([dRef], disassociate=True, unstore=True, purge=True) 

615 return 

616 

617 

618def _dayobs_present(dataId: dict) -> bool: 

619 return _get_dayobs_key(dataId) is not None 

620 

621 

622def _seqnum_present(dataId: dict) -> bool: 

623 return _get_seqnum_key(dataId) is not None 

624 

625 

626def _expid_present(dataId: dict) -> bool: 

627 return _get_expid_key(dataId) is not None 

628 

629 

630def _get_dayobs_key(dataId: dict) -> str | None: 

631 """Return the key for day_obs if present, else None""" 

632 keys = [k for k in dataId.keys() if k.find("day_obs") != -1] 

633 if not keys: 

634 return None 

635 return keys[0] 

636 

637 

638def _get_seqnum_key(dataId: dict) -> str | None: 

639 """Return the key for seq_num if present, else None""" 

640 keys = [k for k in dataId.keys() if k.find("seq_num") != -1] 

641 if not keys: 

642 return None 

643 return keys[0] 

644 

645 

646def _get_expid_key(dataId: dict) -> str | None: 

647 """Return the key for expId if present, else None""" 

648 if "exposure.id" in dataId: 

649 return "exposure.id" 

650 elif "exposure" in dataId: 

651 return "exposure" 

652 return None 

653 

654 

655def getDayObs(dataId: dict | dafButler.DimensionRecord) -> int | None: 

656 """Get the day_obs from a dataId. 

657 

658 Parameters 

659 ---------- 

660 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

661 The dataId. 

662 

663 Returns 

664 ------- 

665 day_obs : `int` or `None` 

666 The day_obs value if present, else None. 

667 """ 

668 if hasattr(dataId, "day_obs"): 

669 return getattr(dataId, "day_obs") 

670 if not _dayobs_present(dataId): 

671 return None 

672 return dataId["day_obs"] if "day_obs" in dataId else dataId["exposure.day_obs"] 

673 

674 

675def getSeqNum(dataId: dict | dafButler.DimensionRecord) -> int | None: 

676 """Get the seq_num from a dataId. 

677 

678 Parameters 

679 ---------- 

680 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

681 The dataId. 

682 

683 Returns 

684 ------- 

685 seq_num : `int` or `None` 

686 The seq_num value if present, else None. 

687 """ 

688 if hasattr(dataId, "seq_num"): 

689 return getattr(dataId, "seq_num") 

690 if not _seqnum_present(dataId): 

691 return None 

692 return dataId["seq_num"] if "seq_num" in dataId else dataId["exposure.seq_num"] 

693 

694 

695def getExpId(dataId: dict | dafButler.DimensionRecord) -> int | None: 

696 """Get the expId from a dataId. 

697 

698 Parameters 

699 ---------- 

700 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

701 The dataId. 

702 

703 Returns 

704 ------- 

705 expId : `int` or `None` 

706 The expId value if present, else None. 

707 """ 

708 if hasattr(dataId, "id"): 

709 return getattr(dataId, "id") 

710 if not _expid_present(dataId): 

711 return None 

712 return dataId["exposure"] if "exposure" in dataId else dataId["exposure.id"] 

713 

714 

715def getLatissOnSkyDataIds( 

716 butler: dafButler.Butler, 

717 skipTypes: Iterable[str] = ("bias", "dark", "flat"), 

718 checkObject: bool = True, 

719 full: bool = True, 

720 startDate: int | None = None, 

721 endDate: int | None = None, 

722) -> list[dict]: 

723 """Get a list of all on-sky dataIds taken. 

724 

725 Parameters 

726 ---------- 

727 butler : `lsst.daf.butler.Butler` 

728 The butler. 

729 skipTypes : `list` of `str` 

730 Image types to exclude. 

731 checkObject : `bool` 

732 Check if the value of target_name (formerly OBJECT) is set and exlude 

733 if it is not. 

734 full : `bool` 

735 Return filled dataIds. Required for some analyses, but runs much 

736 (~30x) slower. 

737 startDate : `int` 

738 The day_obs to start at, inclusive. 

739 endDate : `int` 

740 The day_obs to end at, inclusive. 

741 

742 Returns 

743 ------- 

744 dataIds : `list` or `dataIds` 

745 The dataIds. 

746 """ 

747 

748 def isOnSky(expRecord): 

749 imageType = expRecord.observation_type 

750 obj = expRecord.target_name 

751 if checkObject and obj == "NOTSET": 

752 return False 

753 if imageType not in skipTypes: 

754 return True 

755 return False 

756 

757 recordSets = [] 

758 days = getDaysWithData(butler) 

759 if startDate: 

760 days = [d for d in days if d >= startDate] 

761 if endDate: 

762 days = [d for d in days if d <= endDate] 

763 days = sorted(set(days)) 

764 

765 where = "exposure.day_obs=dayObs" 

766 for day in days: 

767 # queryDataIds would be better here, but it's then hard/impossible 

768 # to do the filtering for which is on sky, so just take the dataIds 

769 records = butler.registry.queryDimensionRecords( 

770 "exposure", where=where, bind={"dayObs": day}, datasets="raw" 

771 ) 

772 recordSets.append(sortRecordsByDayObsThenSeqNum(records)) 

773 

774 dataIds = [r.dataId for r in filter(isOnSky, itertools.chain(*recordSets))] 

775 if full: 

776 expandedIds = [ 

777 updateDataIdOrDataCord(butler.registry.expandDataId(dataId, detector=0).full) 

778 for dataId in dataIds 

779 ] 

780 filledIds = [fillDataId(butler, dataId) for dataId in expandedIds] 

781 return filledIds 

782 else: 

783 return [updateDataIdOrDataCord(dataId, detector=0) for dataId in dataIds] 

784 

785 

786def getExpRecord( 

787 butler: dafButler.Butler, 

788 instrument: str, 

789 expId: int | None = None, 

790 dayObs: int | None = None, 

791 seqNum: int | None = None, 

792) -> dafButler.DimensionRecord: 

793 """Get the exposure record for a given exposure ID or dayObs+seqNum. 

794 

795 Parameters 

796 ---------- 

797 butler : `lsst.daf.butler.Butler` 

798 The butler. 

799 instrument : `str` 

800 The instrument name, e.g. 'LSSTCam'. 

801 expId : `int` 

802 The exposure ID. 

803 

804 Returns 

805 ------- 

806 expRecord : `lsst.daf.butler.DimensionRecord` 

807 The exposure record. 

808 """ 

809 if expId is None and (dayObs is None or seqNum is None): 

810 raise ValueError("Must supply either expId or (dayObs AND seqNum)") 

811 

812 where = "instrument=inst" # Note you can't use =instrument as bind-strings can't clash with dimensions 

813 bind: "dict[str, str | int]" = {"inst": instrument} 

814 if expId: 

815 where += " AND exposure.id=expId" 

816 bind.update({"expId": expId}) 

817 if dayObs and seqNum: 

818 where += " AND exposure.day_obs=dayObs AND exposure.seq_num=seqNum" 

819 bind.update({"dayObs": dayObs, "seqNum": seqNum}) 

820 

821 expRecords = butler.registry.queryDimensionRecords("exposure", where=where, bind=bind, datasets="raw") 

822 expRecords = list(set(expRecords)) # must call set as this may contain many duplicates 

823 if len(expRecords) != 1: 

824 raise RuntimeError( 

825 f"Failed to find unique exposure record for {instrument=} with" 

826 f" {expId=}, {dayObs=}, {seqNum=}, got {len(expRecords)} records" 

827 ) 

828 return expRecords[0]