Coverage for python/lsst/summit/utils/butlerUtils.py: 14%

258 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-17 08:55 +0000

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import copy 

23import itertools 

24from collections.abc import Iterable 

25from typing import Any 

26 

27from deprecated.sphinx import deprecated 

28 

29import lsst.daf.butler as dafButler 

30from lsst.summit.utils.utils import getSite 

31 

32__all__ = [ 

33 "makeDefaultLatissButler", 

34 "updateDataId", 

35 "sanitizeDayObs", 

36 "getMostRecentDayObs", 

37 "getSeqNumsForDayObs", 

38 "getMostRecentDataId", 

39 "getDatasetRefForDataId", 

40 "getDayObs", 

41 "getSeqNum", 

42 "getExpId", 

43 "datasetExists", 

44 "sortRecordsByDayObsThenSeqNum", 

45 "getDaysWithData", 

46 "getExpIdFromDayObsSeqNum", 

47 "updateDataIdOrDataCord", 

48 "fillDataId", 

49 "getExpRecordFromDataId", 

50 "getDayObsSeqNumFromExposureId", 

51 "removeDataProduct", 

52 "getLatissOnSkyDataIds", 

53 "getExpRecord", 

54] 

55 

56_LATISS_DEFAULT_COLLECTIONS = ["LATISS/raw/all", "LATISS/calib", "LATISS/runs/quickLook"] 

57 

58# RECENT_DAY must be in the past *and have data* (otherwise some tests are 

59# no-ops), to speed up queries by restricting them significantly, 

60# but data must definitely been taken since. Should 

61# also not be more than 2 months in the past due to 60 day lookback time on the 

62# summit. All this means it should be updated by an informed human. 

63RECENT_DAY = 20220503 

64 

65 

66def _configureForSite() -> None: 

67 try: 

68 site = getSite() 

69 except ValueError: 

70 # this method is run automatically on module import, so 

71 # don't fail for k8s where this cannot yet be determined 

72 print("WARNING: failed to automatically determine site") 

73 site = None 

74 

75 if site == "tucson": 75 ↛ 77line 75 didn't jump to line 77, because the condition on line 75 was never true

76 global RECENT_DAY 

77 RECENT_DAY = 20211104 # TTS has limited data, so use this day 

78 

79 

80_configureForSite() 

81 

82 

83def getLatissDefaultCollections() -> list[str]: 

84 """Get the default set of LATISS collections, updated for the site at 

85 which the code is being run. 

86 

87 Returns 

88 ------- 

89 collections : `list` of `str` 

90 The default collections for the site. 

91 """ 

92 collections = _LATISS_DEFAULT_COLLECTIONS 

93 try: 

94 site = getSite() 

95 except ValueError: 

96 site = "" 

97 

98 if site == "tucson": 

99 collections.append("LATISS-test-data") 

100 return collections 

101 if site == "summit": 

102 collections.append("LATISS-test-data") 

103 return collections 

104 return collections 

105 

106 

107def _update_RECENT_DAY(day: int) -> None: 

108 """Update the value for RECENT_DAY once we have a value for free.""" 

109 global RECENT_DAY 

110 RECENT_DAY = max(day - 1, RECENT_DAY) 

111 

112 

113def makeDefaultLatissButler( 

114 *, 

115 extraCollections: list[str] | None = None, 

116 writeable: bool = False, 

117 embargo: bool = False, 

118) -> dafButler.Butler: 

119 """Create a butler for LATISS using the default collections. 

120 

121 Parameters 

122 ---------- 

123 extraCollections : `list` of `str` 

124 Extra input collections to supply to the butler init. 

125 writable : `bool`, optional 

126 Whether to make a writable butler. 

127 embargo : `bool`, optional 

128 Use the embargo repo instead of the main one. Needed to access 

129 embargoed data. 

130 

131 Returns 

132 ------- 

133 butler : `lsst.daf.butler.Butler` 

134 The butler. 

135 """ 

136 # TODO: Add logging to which collections are going in 

137 collections = getLatissDefaultCollections() 

138 if extraCollections: 

139 collections.extend(extraCollections) 

140 try: 

141 repoString = "LATISS" if not embargo else "/repo/embargo" 

142 butler = dafButler.Butler( 

143 repoString, collections=collections, writeable=writeable, instrument="LATISS" 

144 ) 

145 except (FileNotFoundError, RuntimeError): 

146 # Depending on the value of DAF_BUTLER_REPOSITORY_INDEX and whether 

147 # it is present and blank, or just not set, both these exception 

148 # types can be raised, see tests/test_butlerUtils.py:ButlerInitTestCase 

149 # for details and tests which confirm these have not changed 

150 raise FileNotFoundError # unify exception type 

151 return butler 

152 

153 

154@deprecated( 

155 reason="datasExists has been replaced by Butler.exists(). Will be removed after v26.0.", 

156 version="v26.0", 

157 category=FutureWarning, 

158) 

159def datasetExists(butler: dafButler.Butler, dataProduct: str, dataId: dict, **kwargs: Any) -> bool: 

160 """Collapse the tri-state behaviour of butler.datasetExists to a boolean. 

161 

162 Parameters 

163 ---------- 

164 butler : `lsst.daf.butler.Butler` 

165 The butler 

166 dataProduct : `str` 

167 The type of data product to check for 

168 dataId : `dict` 

169 The dataId of the dataProduct to check for 

170 

171 Returns 

172 ------- 

173 exists : `bool` 

174 True if the dataProduct exists for the dataProduct and can be retreived 

175 else False. 

176 """ 

177 return butler.exists(dataProduct, dataId, **kwargs) 

178 

179 

180def updateDataId(dataId, **kwargs: Any) -> dict | dafButler.DataCoordinate: 

181 """Update a DataCoordinate or dataId dict with kwargs. 

182 

183 Provides a single interface for adding the detector key (or others) to a 

184 dataId whether it's a DataCoordinate or a dict 

185 

186 Parameters 

187 ---------- 

188 dataId : `dict` or `lsst.daf.butler.DataCoordinate` 

189 The dataId to update. 

190 kwargs : `dict` 

191 The keys and values to add to the dataId. 

192 

193 Returns 

194 ------- 

195 dataId : `dict` or `lsst.daf.butler.DataCoordinate` 

196 The updated dataId, with the same type as the input. 

197 """ 

198 

199 match dataId: 

200 case dafButler.DataCoordinate(): 

201 return dafButler.DataCoordinate.standardize(dataId, **kwargs) 

202 case dict() as dataId: 

203 return dict(dataId, **kwargs) 

204 raise ValueError(f"Unknown dataId type {type(dataId)}") 

205 

206 

207def sanitizeDayObs(day_obs: int | str) -> int: 

208 """Take string or int day_obs and turn it into the int version. 

209 

210 Parameters 

211 ---------- 

212 day_obs : `str` or `int` 

213 The day_obs to sanitize. 

214 

215 Returns 

216 ------- 

217 day_obs : `int` 

218 The sanitized day_obs. 

219 

220 Raises 

221 ------ 

222 ValueError 

223 Raised if the day_obs fails to translate for any reason. 

224 """ 

225 if isinstance(day_obs, int): 

226 return day_obs 

227 elif isinstance(day_obs, str): 

228 try: 

229 return int(day_obs.replace("-", "")) 

230 except Exception: 

231 ValueError(f"Failed to sanitize {day_obs!r} to a day_obs") 

232 raise ValueError(f"Cannot sanitize {day_obs!r} to a day_obs") 

233 

234 

235def getMostRecentDayObs(butler: dafButler.Butler) -> int: 

236 """Get the most recent day_obs for which there is data. 

237 

238 Parameters 

239 ---------- 

240 butler : `lsst.daf.butler.Butler 

241 The butler to query. 

242 

243 Returns 

244 ------- 

245 day_obs : `int` 

246 The day_obs. 

247 """ 

248 where = "exposure.day_obs>=RECENT_DAY" 

249 records = butler.registry.queryDimensionRecords( 

250 "exposure", where=where, datasets="raw", bind={"RECENT_DAY": RECENT_DAY} 

251 ) 

252 recentDay = max(r.day_obs for r in records) 

253 _update_RECENT_DAY(recentDay) 

254 return recentDay 

255 

256 

257def getSeqNumsForDayObs(butler: dafButler.Butler, day_obs: int, extraWhere: str = "") -> list[int]: 

258 """Get a list of all seq_nums taken on a given day_obs. 

259 

260 Parameters 

261 ---------- 

262 butler : `lsst.daf.butler.Butler 

263 The butler to query. 

264 day_obs : `int` or `str` 

265 The day_obs for which the seq_nums are desired. 

266 extraWhere : `str` 

267 Any extra where conditions to add to the queryDimensionRecords call. 

268 

269 Returns 

270 ------- 

271 seq_nums : `iterable` 

272 The seq_nums taken on the corresponding day_obs in ascending numerical 

273 order. 

274 """ 

275 day_obs = sanitizeDayObs(day_obs) 

276 where = "exposure.day_obs=dayObs" 

277 if extraWhere: 

278 extraWhere = extraWhere.replace('"', "'") 

279 where += f" and {extraWhere}" 

280 records = butler.registry.queryDimensionRecords( 

281 "exposure", where=where, bind={"dayObs": day_obs}, datasets="raw" 

282 ) 

283 return sorted([r.seq_num for r in records]) 

284 

285 

286def sortRecordsByDayObsThenSeqNum( 

287 records: list[dafButler.DimensionRecord], 

288) -> list[dafButler.DimensionRecord]: 

289 """Sort a set of records by dayObs, then seqNum to get the order in which 

290 they were taken. 

291 

292 Parameters 

293 ---------- 

294 records : `list` of `dafButler.DimensionRecord` 

295 The records to be sorted. 

296 

297 Returns 

298 ------- 

299 sortedRecords : `list` of `dafButler.DimensionRecord` 

300 The sorted records 

301 

302 Raises 

303 ------ 

304 ValueError 

305 Raised if the recordSet contains duplicate records, or if it contains 

306 (dayObs, seqNum) collisions. 

307 """ 

308 records = list(records) # must call list in case we have a generator 

309 recordSet = set(records) 

310 if len(records) != len(recordSet): 

311 raise ValueError("Record set contains duplicate records and therefore cannot be sorted unambiguously") 

312 

313 daySeqTuples = [(r.day_obs, r.seq_num) for r in records] 

314 if len(daySeqTuples) != len(set(daySeqTuples)): 

315 raise ValueError( 

316 "Record set contains dayObs/seqNum collisions, and therefore cannot be sorted " "unambiguously" 

317 ) 

318 

319 records.sort(key=lambda r: (r.day_obs, r.seq_num)) 

320 return records 

321 

322 

323def getDaysWithData(butler: dafButler.Butler, datasetType: str = "raw") -> list[int]: 

324 """Get all the days for which LATISS has taken data on the mountain. 

325 

326 Parameters 

327 ---------- 

328 butler : `lsst.daf.butler.Butler 

329 The butler to query. 

330 datasetType : `str` 

331 The datasetType to query. 

332 

333 Returns 

334 ------- 

335 days : `list` of `int` 

336 A sorted list of the day_obs values for which mountain-top data exists. 

337 """ 

338 # 20200101 is a day between shipping LATISS and going on sky 

339 # We used to constrain on exposure.seq_num<50 to massively reduce the 

340 # number of returned records whilst being large enough to ensure that no 

341 # days are missed because early seq_nums were skipped. However, because 

342 # we have test datasets like LATISS-test-data-tts where we only kept 

343 # seqNums from 950 on one day, we can no longer assume this so don't be 

344 # tempted to add such a constraint back in here for speed. 

345 where = "exposure.day_obs>20200101" 

346 records = butler.registry.queryDimensionRecords("exposure", where=where, datasets=datasetType) 

347 return sorted(set([r.day_obs for r in records])) 

348 

349 

350def getMostRecentDataId(butler: dafButler.Butler) -> dict: 

351 """Get the dataId for the most recent observation. 

352 

353 Parameters 

354 ---------- 

355 butler : `lsst.daf.butler.Butler 

356 The butler to query. 

357 

358 Returns 

359 ------- 

360 dataId : `dict` 

361 The dataId of the most recent exposure. 

362 """ 

363 lastDay = getMostRecentDayObs(butler) 

364 seqNum = getSeqNumsForDayObs(butler, lastDay)[-1] 

365 dataId = {"day_obs": lastDay, "seq_num": seqNum, "detector": 0} 

366 dataId.update(getExpIdFromDayObsSeqNum(butler, dataId)) 

367 return dataId 

368 

369 

370def getExpIdFromDayObsSeqNum(butler: dafButler.Butler, dataId: dict) -> dict: 

371 """Get the exposure id for the dataId. 

372 

373 Parameters 

374 ---------- 

375 butler : `lsst.daf.butler.Butler 

376 The butler to query. 

377 dataId : `dict` 

378 The dataId for which to return the exposure id. 

379 

380 Returns 

381 ------- 

382 dataId : `dict` 

383 The dataId of the most recent exposure. 

384 """ 

385 expRecord = getExpRecordFromDataId(butler, dataId) 

386 return {"exposure": expRecord.id} 

387 

388 

389def updateDataIdOrDataCord(dataId: dict, **updateKwargs: Any) -> dict: 

390 """Add key, value pairs to a dataId or data coordinate. 

391 

392 Parameters 

393 ---------- 

394 dataId : `dict` 

395 The dataId for which to return the exposure id. 

396 updateKwargs : `dict` 

397 The key value pairs add to the dataId or dataCoord. 

398 

399 Returns 

400 ------- 

401 dataId : `dict` 

402 The updated dataId. 

403 

404 Notes 

405 ----- 

406 Always returns a dict, so note that if a data coordinate is supplied, a 

407 dict is returned, changing the type. 

408 """ 

409 newId = copy.copy(dataId) 

410 newId = _assureDict(newId) 

411 newId.update(updateKwargs) 

412 return newId 

413 

414 

415def fillDataId(butler: dafButler.Butler, dataId: dict) -> dict: 

416 """Given a dataId, fill it with values for all available dimensions. 

417 

418 Parameters 

419 ---------- 

420 butler : `lsst.daf.butler.Butler` 

421 The butler. 

422 dataId : `dict` 

423 The dataId to fill. 

424 

425 Returns 

426 ------- 

427 dataId : `dict` 

428 The filled dataId. 

429 

430 Notes 

431 ----- 

432 This function is *slow*! Running this on 20,000 dataIds takes approximately 

433 7 minutes. Virtually all the slowdown is in the 

434 butler.registry.expandDataId() call though, so this wrapper is not to blame 

435 here, and might speed up in future with butler improvements. 

436 """ 

437 # ensure it's a dict to deal with records etc 

438 dataId = _assureDict(dataId) 

439 

440 # this removes extraneous keys that would trip up the registry call 

441 # using _rewrite_data_id is perhaps ever so slightly slower than popping 

442 # the bad keys, or making a minimal dataId by hand, but is more 

443 # reliable/general, so we choose that over the other approach here 

444 dataId, _ = butler._rewrite_data_id(dataId, butler.get_dataset_type("raw")) 

445 

446 # now expand and turn back to a dict 

447 dataId = butler.registry.expandDataId(dataId, detector=0).mapping # this call is VERY slow 

448 dataId = _assureDict(dataId) 

449 

450 missingExpId = getExpId(dataId) is None 

451 missingDayObs = getDayObs(dataId) is None 

452 missingSeqNum = getSeqNum(dataId) is None 

453 

454 if missingDayObs or missingSeqNum: 

455 dayObsSeqNum = getDayObsSeqNumFromExposureId(butler, dataId) 

456 dataId.update(dayObsSeqNum) 

457 

458 if missingExpId: 

459 expId = getExpIdFromDayObsSeqNum(butler, dataId) 

460 dataId.update(expId) 

461 

462 return dataId 

463 

464 

465def _assureDict(dataId: dict | dafButler.dimensions.DataCoordinate | dafButler.DimensionRecord) -> dict: 

466 """Turn any data-identifier-like object into a dict. 

467 

468 Parameters 

469 ---------- 

470 dataId : `dict` or `lsst.daf.butler.dimensions.DataCoordinate` or 

471 `lsst.daf.butler.dimensions.DimensionRecord` 

472 The data identifier. 

473 

474 Returns 

475 ------- 

476 dataId : `dict` 

477 The data identifier as a dict. 

478 """ 

479 if isinstance(dataId, dict): 

480 return dataId 

481 elif hasattr(dataId, "mapping"): # dafButler.dimensions.DataCoordinate 

482 return {str(k): v for k, v in dataId.mapping.items()} 

483 elif hasattr(dataId, "items"): # dafButler.dimensions.DataCoordinate 

484 return {str(k): v for k, v in dataId.items()} # str() required due to full names 

485 elif hasattr(dataId, "dataId"): # dafButler.DimensionRecord 

486 return {str(k): v for k, v in dataId.dataId.mapping.items()} 

487 else: 

488 raise RuntimeError(f"Failed to coerce {type(dataId)} to dict") 

489 

490 

491def getExpRecordFromDataId(butler: dafButler.Butler, dataId: dict) -> dafButler.DimensionRecord: 

492 """Get the exposure record for a given dataId. 

493 

494 Parameters 

495 ---------- 

496 butler : `lsst.daf.butler.Butler` 

497 The butler. 

498 dataId : `dict` 

499 The dataId. 

500 

501 Returns 

502 ------- 

503 expRecord : `lsst.daf.butler.dimensions.DimensionRecord` 

504 The exposure record. 

505 """ 

506 dataId = _assureDict(dataId) 

507 assert isinstance(dataId, dict), f"dataId must be a dict or DimensionRecord, got {type(dataId)}" 

508 

509 if expId := getExpId(dataId): 

510 where = "exposure.id=expId" 

511 expRecords = butler.registry.queryDimensionRecords( 

512 "exposure", where=where, bind={"expId": expId}, datasets="raw" 

513 ) 

514 

515 else: 

516 dayObs = getDayObs(dataId) 

517 seqNum = getSeqNum(dataId) 

518 if not (dayObs and seqNum): 

519 raise RuntimeError(f"Failed to find either expId or day_obs and seq_num in dataId {dataId}") 

520 where = "exposure.day_obs=dayObs AND exposure.seq_num=seq_num" 

521 expRecords = butler.registry.queryDimensionRecords( 

522 "exposure", where=where, bind={"dayObs": dayObs, "seq_num": seqNum}, datasets="raw" 

523 ) 

524 

525 expRecords = set(expRecords) 

526 if not expRecords: 

527 raise LookupError(f"No exposure records found for {dataId}") 

528 assert len(expRecords) == 1, f"Found {len(expRecords)} exposure records for {dataId}" 

529 return expRecords.pop() 

530 

531 

532def getDayObsSeqNumFromExposureId(butler: dafButler.Butler, dataId: dict) -> dict[str, int]: 

533 """Get the day_obs and seq_num for an exposure id. 

534 

535 Parameters 

536 ---------- 

537 butler : `lsst.daf.butler.Butler` 

538 The butler. 

539 dataId : `dict` 

540 The dataId containing the exposure id. 

541 

542 Returns 

543 ------- 

544 dataId : `dict` 

545 A dict containing only the day_obs and seq_num. 

546 """ 

547 if (dayObs := getDayObs(dataId)) and (seqNum := getSeqNum(dataId)): 

548 return {"day_obs": dayObs, "seq_num": seqNum} 

549 

550 if isinstance(dataId, int): 

551 dataId = {"exposure": dataId} 

552 else: 

553 dataId = _assureDict(dataId) 

554 assert isinstance(dataId, dict) 

555 

556 if not (expId := getExpId(dataId)): 

557 raise RuntimeError(f"Failed to find exposure id in {dataId}") 

558 

559 where = "exposure.id=expId" 

560 expRecords = butler.registry.queryDimensionRecords( 

561 "exposure", where=where, bind={"expId": expId}, datasets="raw" 

562 ) 

563 expRecords = set(expRecords) 

564 if not expRecords: 

565 raise LookupError(f"No exposure records found for {dataId}") 

566 assert len(expRecords) == 1, f"Found {len(expRecords)} exposure records for {dataId}" 

567 record = expRecords.pop() 

568 return {"day_obs": record.day_obs, "seq_num": record.seq_num} 

569 

570 

571def getDatasetRefForDataId( 

572 butler: dafButler.Butler, datasetType: str | dafButler.DatasetType, dataId: dict 

573) -> dafButler.DatasetRef: 

574 """Get the datasetReference for a dataId. 

575 

576 Parameters 

577 ---------- 

578 butler : `lsst.daf.butler.Butler` 

579 The butler. 

580 datasetType : `str` or `datasetType` 

581 The dataset type. 

582 dataId : `dict` 

583 The dataId. 

584 

585 Returns 

586 ------- 

587 datasetRef : `lsst.daf.butler.dimensions.DatasetReference` 

588 The dataset reference. 

589 """ 

590 if not _expid_present(dataId): 

591 assert _dayobs_present(dataId) and _seqnum_present(dataId) 

592 dataId.update(getExpIdFromDayObsSeqNum(butler, dataId)) 

593 

594 dRef = butler.find_dataset(datasetType, dataId) 

595 return dRef 

596 

597 

598def removeDataProduct( 

599 butler: dafButler.Butler, datasetType: str | dafButler.DatasetType, dataId: dict 

600) -> None: 

601 """Remove a data prodcut from the registry. Use with caution. 

602 

603 Parameters 

604 ---------- 

605 butler : `lsst.daf.butler.Butler` 

606 The butler. 

607 datasetType : `str` or `datasetType` 

608 The dataset type. 

609 dataId : `dict` 

610 The dataId. 

611 

612 """ 

613 if datasetType == "raw": 

614 raise RuntimeError("I'm sorry, Dave, I'm afraid I can't do that.") 

615 dRef = getDatasetRefForDataId(butler, datasetType, dataId) 

616 butler.pruneDatasets([dRef], disassociate=True, unstore=True, purge=True) 

617 return 

618 

619 

620def _dayobs_present(dataId: dict) -> bool: 

621 return _get_dayobs_key(dataId) is not None 

622 

623 

624def _seqnum_present(dataId: dict) -> bool: 

625 return _get_seqnum_key(dataId) is not None 

626 

627 

628def _expid_present(dataId: dict) -> bool: 

629 return _get_expid_key(dataId) is not None 

630 

631 

632def _get_dayobs_key(dataId: dict) -> str | None: 

633 """Return the key for day_obs if present, else None""" 

634 keys = [k for k in dataId.keys() if k.find("day_obs") != -1] 

635 if not keys: 

636 return None 

637 return keys[0] 

638 

639 

640def _get_seqnum_key(dataId: dict) -> str | None: 

641 """Return the key for seq_num if present, else None""" 

642 keys = [k for k in dataId.keys() if k.find("seq_num") != -1] 

643 if not keys: 

644 return None 

645 return keys[0] 

646 

647 

648def _get_expid_key(dataId: dict) -> str | None: 

649 """Return the key for expId if present, else None""" 

650 if "exposure.id" in dataId: 

651 return "exposure.id" 

652 elif "exposure" in dataId: 

653 return "exposure" 

654 return None 

655 

656 

657def getDayObs(dataId: dict | dafButler.DimensionRecord) -> int | None: 

658 """Get the day_obs from a dataId. 

659 

660 Parameters 

661 ---------- 

662 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

663 The dataId. 

664 

665 Returns 

666 ------- 

667 day_obs : `int` or `None` 

668 The day_obs value if present, else None. 

669 """ 

670 if hasattr(dataId, "day_obs"): 

671 return getattr(dataId, "day_obs") 

672 if not _dayobs_present(dataId): 

673 return None 

674 return dataId["day_obs"] if "day_obs" in dataId else dataId["exposure.day_obs"] 

675 

676 

677def getSeqNum(dataId: dict | dafButler.DimensionRecord) -> int | None: 

678 """Get the seq_num from a dataId. 

679 

680 Parameters 

681 ---------- 

682 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

683 The dataId. 

684 

685 Returns 

686 ------- 

687 seq_num : `int` or `None` 

688 The seq_num value if present, else None. 

689 """ 

690 if hasattr(dataId, "seq_num"): 

691 return getattr(dataId, "seq_num") 

692 if not _seqnum_present(dataId): 

693 return None 

694 return dataId["seq_num"] if "seq_num" in dataId else dataId["exposure.seq_num"] 

695 

696 

697def getExpId(dataId: dict | dafButler.DimensionRecord) -> int | None: 

698 """Get the expId from a dataId. 

699 

700 Parameters 

701 ---------- 

702 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

703 The dataId. 

704 

705 Returns 

706 ------- 

707 expId : `int` or `None` 

708 The expId value if present, else None. 

709 """ 

710 if hasattr(dataId, "id"): 

711 return getattr(dataId, "id") 

712 if not _expid_present(dataId): 

713 return None 

714 return dataId["exposure"] if "exposure" in dataId else dataId["exposure.id"] 

715 

716 

717def getLatissOnSkyDataIds( 

718 butler: dafButler.Butler, 

719 skipTypes: Iterable[str] = ("bias", "dark", "flat"), 

720 checkObject: bool = True, 

721 full: bool = True, 

722 startDate: int | None = None, 

723 endDate: int | None = None, 

724) -> list[dict]: 

725 """Get a list of all on-sky dataIds taken. 

726 

727 Parameters 

728 ---------- 

729 butler : `lsst.daf.butler.Butler` 

730 The butler. 

731 skipTypes : `list` of `str` 

732 Image types to exclude. 

733 checkObject : `bool` 

734 Check if the value of target_name (formerly OBJECT) is set and exlude 

735 if it is not. 

736 full : `bool` 

737 Return filled dataIds. Required for some analyses, but runs much 

738 (~30x) slower. 

739 startDate : `int` 

740 The day_obs to start at, inclusive. 

741 endDate : `int` 

742 The day_obs to end at, inclusive. 

743 

744 Returns 

745 ------- 

746 dataIds : `list` or `dataIds` 

747 The dataIds. 

748 """ 

749 

750 def isOnSky(expRecord): 

751 imageType = expRecord.observation_type 

752 obj = expRecord.target_name 

753 if checkObject and obj == "NOTSET": 

754 return False 

755 if imageType not in skipTypes: 

756 return True 

757 return False 

758 

759 recordSets = [] 

760 days = getDaysWithData(butler) 

761 if startDate: 

762 days = [d for d in days if d >= startDate] 

763 if endDate: 

764 days = [d for d in days if d <= endDate] 

765 days = sorted(set(days)) 

766 

767 where = "exposure.day_obs=dayObs" 

768 for day in days: 

769 # queryDataIds would be better here, but it's then hard/impossible 

770 # to do the filtering for which is on sky, so just take the dataIds 

771 records = butler.registry.queryDimensionRecords( 

772 "exposure", where=where, bind={"dayObs": day}, datasets="raw" 

773 ) 

774 recordSets.append(sortRecordsByDayObsThenSeqNum(records)) 

775 

776 dataIds = [r.dataId for r in filter(isOnSky, itertools.chain(*recordSets))] 

777 if full: 

778 expandedIds = [ 

779 updateDataIdOrDataCord(butler.registry.expandDataId(dataId, detector=0).mapping) 

780 for dataId in dataIds 

781 ] 

782 filledIds = [fillDataId(butler, dataId) for dataId in expandedIds] 

783 return filledIds 

784 else: 

785 return [updateDataIdOrDataCord(dataId, detector=0) for dataId in dataIds] 

786 

787 

788def getExpRecord( 

789 butler: dafButler.Butler, 

790 instrument: str, 

791 expId: int | None = None, 

792 dayObs: int | None = None, 

793 seqNum: int | None = None, 

794) -> dafButler.DimensionRecord: 

795 """Get the exposure record for a given exposure ID or dayObs+seqNum. 

796 

797 Parameters 

798 ---------- 

799 butler : `lsst.daf.butler.Butler` 

800 The butler. 

801 instrument : `str` 

802 The instrument name, e.g. 'LSSTCam'. 

803 expId : `int` 

804 The exposure ID. 

805 

806 Returns 

807 ------- 

808 expRecord : `lsst.daf.butler.DimensionRecord` 

809 The exposure record. 

810 """ 

811 if expId is None and (dayObs is None or seqNum is None): 

812 raise ValueError("Must supply either expId or (dayObs AND seqNum)") 

813 

814 where = "instrument=inst" # Note you can't use =instrument as bind-strings can't clash with dimensions 

815 bind: "dict[str, str | int]" = {"inst": instrument} 

816 if expId: 

817 where += " AND exposure.id=expId" 

818 bind.update({"expId": expId}) 

819 if dayObs and seqNum: 

820 where += " AND exposure.day_obs=dayObs AND exposure.seq_num=seqNum" 

821 bind.update({"dayObs": dayObs, "seqNum": seqNum}) 

822 

823 expRecords = butler.registry.queryDimensionRecords("exposure", where=where, bind=bind, datasets="raw") 

824 expRecords = list(set(expRecords)) # must call set as this may contain many duplicates 

825 if len(expRecords) != 1: 

826 raise RuntimeError( 

827 f"Failed to find unique exposure record for {instrument=} with" 

828 f" {expId=}, {dayObs=}, {seqNum=}, got {len(expRecords)} records" 

829 ) 

830 return expRecords[0]