Coverage for python/lsst/summit/utils/butlerUtils.py: 13%

254 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-06 06:18 -0700

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import copy 

23import itertools 

24 

25from deprecated.sphinx import deprecated 

26 

27import lsst.daf.butler as dafButler 

28from lsst.summit.utils.utils import getSite 

29 

30__all__ = [ 

31 "makeDefaultLatissButler", 

32 "updateDataId", 

33 "sanitizeDayObs", 

34 "getMostRecentDayObs", 

35 "getSeqNumsForDayObs", 

36 "getMostRecentDataId", 

37 "getDatasetRefForDataId", 

38 "getDayObs", 

39 "getSeqNum", 

40 "getExpId", 

41 "datasetExists", 

42 "sortRecordsByDayObsThenSeqNum", 

43 "getDaysWithData", 

44 "getExpIdFromDayObsSeqNum", 

45 "updateDataIdOrDataCord", 

46 "fillDataId", 

47 "getExpRecordFromDataId", 

48 "getDayObsSeqNumFromExposureId", 

49 "removeDataProduct", 

50 "getLatissOnSkyDataIds", 

51 "getExpRecord", 

52] 

53 

54_LATISS_DEFAULT_COLLECTIONS = ["LATISS/raw/all", "LATISS/calib", "LATISS/runs/quickLook"] 

55 

56# RECENT_DAY must be in the past *and have data* (otherwise some tests are 

57# no-ops), to speed up queries by restricting them significantly, 

58# but data must definitely been taken since. Should 

59# also not be more than 2 months in the past due to 60 day lookback time on the 

60# summit. All this means it should be updated by an informed human. 

61RECENT_DAY = 20220503 

62 

63 

64def _configureForSite(): 

65 try: 

66 site = getSite() 

67 except ValueError: 

68 # this method is run automatically on module import, so 

69 # don't fail for k8s where this cannot yet be determined 

70 print("WARNING: failed to automatically determine site") 

71 site = None 

72 

73 if site == "tucson": 73 ↛ 75line 73 didn't jump to line 75, because the condition on line 73 was never true

74 global RECENT_DAY 

75 RECENT_DAY = 20211104 # TTS has limited data, so use this day 

76 

77 

78_configureForSite() 

79 

80 

81def getLatissDefaultCollections(): 

82 """Get the default set of LATISS collections, updated for the site at 

83 which the code is being run. 

84 

85 Returns 

86 ------- 

87 collections : `list` of `str` 

88 The default collections for the site. 

89 """ 

90 collections = _LATISS_DEFAULT_COLLECTIONS 

91 try: 

92 site = getSite() 

93 except ValueError: 

94 site = "" 

95 

96 if site == "tucson": 

97 collections.append("LATISS-test-data") 

98 return collections 

99 if site == "summit": 

100 collections.append("LATISS-test-data") 

101 return collections 

102 return collections 

103 

104 

105def _update_RECENT_DAY(day): 

106 """Update the value for RECENT_DAY once we have a value for free.""" 

107 global RECENT_DAY 

108 RECENT_DAY = max(day - 1, RECENT_DAY) 

109 

110 

111def makeDefaultLatissButler(*, extraCollections=None, writeable=False, embargo=False): 

112 """Create a butler for LATISS using the default collections. 

113 

114 Parameters 

115 ---------- 

116 extraCollections : `list` of `str` 

117 Extra input collections to supply to the butler init. 

118 writable : `bool`, optional 

119 Whether to make a writable butler. 

120 embargo : `bool`, optional 

121 Use the embargo repo instead of the main one. Needed to access 

122 embargoed data. 

123 

124 Returns 

125 ------- 

126 butler : `lsst.daf.butler.Butler` 

127 The butler. 

128 """ 

129 # TODO: Add logging to which collections are going in 

130 collections = getLatissDefaultCollections() 

131 if extraCollections: 

132 collections.extend(extraCollections) 

133 try: 

134 repoString = "LATISS" if not embargo else "/repo/embargo" 

135 butler = dafButler.Butler( 

136 repoString, collections=collections, writeable=writeable, instrument="LATISS" 

137 ) 

138 except (FileNotFoundError, RuntimeError): 

139 # Depending on the value of DAF_BUTLER_REPOSITORY_INDEX and whether 

140 # it is present and blank, or just not set, both these exception 

141 # types can be raised, see tests/test_butlerUtils.py:ButlerInitTestCase 

142 # for details and tests which confirm these have not changed 

143 raise FileNotFoundError # unify exception type 

144 return butler 

145 

146 

147@deprecated( 

148 reason="datasExists has been replaced by Butler.exists(). Will be removed after v26.0.", 

149 version="v26.0", 

150 category=FutureWarning, 

151) 

152def datasetExists(butler, dataProduct, dataId, **kwargs): 

153 """Collapse the tri-state behaviour of butler.datasetExists to a boolean. 

154 

155 Parameters 

156 ---------- 

157 butler : `lsst.daf.butler.Butler` 

158 The butler 

159 dataProduct : `str` 

160 The type of data product to check for 

161 dataId : `dict` 

162 The dataId of the dataProduct to check for 

163 

164 Returns 

165 ------- 

166 exists : `bool` 

167 True if the dataProduct exists for the dataProduct and can be retreived 

168 else False. 

169 """ 

170 return butler.exists(dataProduct, dataId, **kwargs) 

171 

172 

173def updateDataId(dataId, **kwargs): 

174 """Update a DataCoordinate or dataId dict with kwargs. 

175 

176 Provides a single interface for adding the detector key (or others) to a 

177 dataId whether it's a DataCoordinate or a dict 

178 

179 Parameters 

180 ---------- 

181 dataId : `dict` or `lsst.daf.butler.DataCoordinate` 

182 The dataId to update. 

183 kwargs : `dict` 

184 The keys and values to add to the dataId. 

185 

186 Returns 

187 ------- 

188 dataId : `dict` or `lsst.daf.butler.DataCoordinate` 

189 The updated dataId, with the same type as the input. 

190 """ 

191 

192 match dataId: 

193 case dafButler.DataCoordinate(): 

194 return dafButler.DataCoordinate.standardize(dataId, **kwargs) 

195 case dict() as dataId: 

196 return dict(dataId, **kwargs) 

197 raise ValueError(f"Unknown dataId type {type(dataId)}") 

198 

199 

200def sanitizeDayObs(day_obs): 

201 """Take string or int day_obs and turn it into the int version. 

202 

203 Parameters 

204 ---------- 

205 day_obs : `str` or `int` 

206 The day_obs to sanitize. 

207 

208 Returns 

209 ------- 

210 day_obs : `int` 

211 The sanitized day_obs. 

212 

213 Raises 

214 ------ 

215 ValueError 

216 Raised if the day_obs fails to translate for any reason. 

217 """ 

218 if isinstance(day_obs, int): 

219 return day_obs 

220 elif isinstance(day_obs, str): 

221 try: 

222 return int(day_obs.replace("-", "")) 

223 except Exception: 

224 ValueError(f"Failed to sanitize {day_obs!r} to a day_obs") 

225 else: 

226 raise ValueError(f"Cannot sanitize {day_obs!r} to a day_obs") 

227 

228 

229def getMostRecentDayObs(butler): 

230 """Get the most recent day_obs for which there is data. 

231 

232 Parameters 

233 ---------- 

234 butler : `lsst.daf.butler.Butler 

235 The butler to query. 

236 

237 Returns 

238 ------- 

239 day_obs : `int` 

240 The day_obs. 

241 """ 

242 where = "exposure.day_obs>=RECENT_DAY" 

243 records = butler.registry.queryDimensionRecords( 

244 "exposure", where=where, datasets="raw", bind={"RECENT_DAY": RECENT_DAY} 

245 ) 

246 recentDay = max(r.day_obs for r in records) 

247 _update_RECENT_DAY(recentDay) 

248 return recentDay 

249 

250 

251def getSeqNumsForDayObs(butler, day_obs, extraWhere=""): 

252 """Get a list of all seq_nums taken on a given day_obs. 

253 

254 Parameters 

255 ---------- 

256 butler : `lsst.daf.butler.Butler 

257 The butler to query. 

258 day_obs : `int` or `str` 

259 The day_obs for which the seq_nums are desired. 

260 extraWhere : `str` 

261 Any extra where conditions to add to the queryDimensionRecords call. 

262 

263 Returns 

264 ------- 

265 seq_nums : `iterable` 

266 The seq_nums taken on the corresponding day_obs in ascending numerical 

267 order. 

268 """ 

269 day_obs = sanitizeDayObs(day_obs) 

270 where = "exposure.day_obs=dayObs" 

271 if extraWhere: 

272 extraWhere = extraWhere.replace('"', "'") 

273 where += f" and {extraWhere}" 

274 records = butler.registry.queryDimensionRecords( 

275 "exposure", where=where, bind={"dayObs": day_obs}, datasets="raw" 

276 ) 

277 return sorted([r.seq_num for r in records]) 

278 

279 

280def sortRecordsByDayObsThenSeqNum(records): 

281 """Sort a set of records by dayObs, then seqNum to get the order in which 

282 they were taken. 

283 

284 Parameters 

285 ---------- 

286 records : `list` of `dict` 

287 The records to be sorted. 

288 

289 Returns 

290 ------- 

291 sortedRecords : `list` of `dict` 

292 The sorted records 

293 

294 Raises 

295 ------ 

296 ValueError 

297 Raised if the recordSet contains duplicate records, or if it contains 

298 (dayObs, seqNum) collisions. 

299 """ 

300 records = list(records) # must call list in case we have a generator 

301 recordSet = set(records) 

302 if len(records) != len(recordSet): 

303 raise ValueError("Record set contains duplicate records and therefore cannot be sorted unambiguously") 

304 

305 daySeqTuples = [(r.day_obs, r.seq_num) for r in records] 

306 if len(daySeqTuples) != len(set(daySeqTuples)): 

307 raise ValueError( 

308 "Record set contains dayObs/seqNum collisions, and therefore cannot be sorted " "unambiguously" 

309 ) 

310 

311 records.sort(key=lambda r: (r.day_obs, r.seq_num)) 

312 return records 

313 

314 

315def getDaysWithData(butler, datasetType="raw"): 

316 """Get all the days for which LATISS has taken data on the mountain. 

317 

318 Parameters 

319 ---------- 

320 butler : `lsst.daf.butler.Butler 

321 The butler to query. 

322 datasetType : `str` 

323 The datasetType to query. 

324 

325 Returns 

326 ------- 

327 days : `list` of `int` 

328 A sorted list of the day_obs values for which mountain-top data exists. 

329 """ 

330 # 20200101 is a day between shipping LATISS and going on sky 

331 # We used to constrain on exposure.seq_num<50 to massively reduce the 

332 # number of returned records whilst being large enough to ensure that no 

333 # days are missed because early seq_nums were skipped. However, because 

334 # we have test datasets like LATISS-test-data-tts where we only kept 

335 # seqNums from 950 on one day, we can no longer assume this so don't be 

336 # tempted to add such a constraint back in here for speed. 

337 where = "exposure.day_obs>20200101" 

338 records = butler.registry.queryDimensionRecords("exposure", where=where, datasets=datasetType) 

339 return sorted(set([r.day_obs for r in records])) 

340 

341 

342def getMostRecentDataId(butler): 

343 """Get the dataId for the most recent observation. 

344 

345 Parameters 

346 ---------- 

347 butler : `lsst.daf.butler.Butler 

348 The butler to query. 

349 

350 Returns 

351 ------- 

352 dataId : `dict` 

353 The dataId of the most recent exposure. 

354 """ 

355 lastDay = getMostRecentDayObs(butler) 

356 seqNum = getSeqNumsForDayObs(butler, lastDay)[-1] 

357 dataId = {"day_obs": lastDay, "seq_num": seqNum, "detector": 0} 

358 dataId.update(getExpIdFromDayObsSeqNum(butler, dataId)) 

359 return dataId 

360 

361 

362def getExpIdFromDayObsSeqNum(butler, dataId): 

363 """Get the exposure id for the dataId. 

364 

365 Parameters 

366 ---------- 

367 butler : `lsst.daf.butler.Butler 

368 The butler to query. 

369 dataId : `dict` 

370 The dataId for which to return the exposure id. 

371 

372 Returns 

373 ------- 

374 dataId : `dict` 

375 The dataId of the most recent exposure. 

376 """ 

377 expRecord = getExpRecordFromDataId(butler, dataId) 

378 return {"exposure": expRecord.id} 

379 

380 

381def updateDataIdOrDataCord(dataId, **updateKwargs): 

382 """Add key, value pairs to a dataId or data coordinate. 

383 

384 Parameters 

385 ---------- 

386 dataId : `dict` 

387 The dataId for which to return the exposure id. 

388 updateKwargs : `dict` 

389 The key value pairs add to the dataId or dataCoord. 

390 

391 Returns 

392 ------- 

393 dataId : `dict` 

394 The updated dataId. 

395 

396 Notes 

397 ----- 

398 Always returns a dict, so note that if a data coordinate is supplied, a 

399 dict is returned, changing the type. 

400 """ 

401 newId = copy.copy(dataId) 

402 newId = _assureDict(newId) 

403 newId.update(updateKwargs) 

404 return newId 

405 

406 

407def fillDataId(butler, dataId): 

408 """Given a dataId, fill it with values for all available dimensions. 

409 

410 Parameters 

411 ---------- 

412 butler : `lsst.daf.butler.Butler` 

413 The butler. 

414 dataId : `dict` 

415 The dataId to fill. 

416 

417 Returns 

418 ------- 

419 dataId : `dict` 

420 The filled dataId. 

421 

422 Notes 

423 ----- 

424 This function is *slow*! Running this on 20,000 dataIds takes approximately 

425 7 minutes. Virtually all the slowdown is in the 

426 butler.registry.expandDataId() call though, so this wrapper is not to blame 

427 here, and might speed up in future with butler improvements. 

428 """ 

429 # ensure it's a dict to deal with records etc 

430 dataId = _assureDict(dataId) 

431 

432 # this removes extraneous keys that would trip up the registry call 

433 # using _rewrite_data_id is perhaps ever so slightly slower than popping 

434 # the bad keys, or making a minimal dataId by hand, but is more 

435 # reliable/general, so we choose that over the other approach here 

436 dataId, _ = butler._rewrite_data_id(dataId, butler.get_dataset_type("raw")) 

437 

438 # now expand and turn back to a dict 

439 dataId = butler.registry.expandDataId(dataId, detector=0).full # this call is VERY slow 

440 dataId = _assureDict(dataId) 

441 

442 missingExpId = getExpId(dataId) is None 

443 missingDayObs = getDayObs(dataId) is None 

444 missingSeqNum = getSeqNum(dataId) is None 

445 

446 if missingDayObs or missingSeqNum: 

447 dayObsSeqNum = getDayObsSeqNumFromExposureId(butler, dataId) 

448 dataId.update(dayObsSeqNum) 

449 

450 if missingExpId: 

451 expId = getExpIdFromDayObsSeqNum(butler, dataId) 

452 dataId.update(expId) 

453 

454 return dataId 

455 

456 

457def _assureDict(dataId): 

458 """Turn any data-identifier-like object into a dict. 

459 

460 Parameters 

461 ---------- 

462 dataId : `dict` or `lsst.daf.butler.dimensions.DataCoordinate` or 

463 `lsst.daf.butler.dimensions.DimensionRecord` 

464 The data identifier. 

465 

466 Returns 

467 ------- 

468 dataId : `dict` 

469 The data identifier as a dict. 

470 """ 

471 if isinstance(dataId, dict): 

472 return dataId 

473 elif hasattr(dataId, "items"): # dafButler.dimensions.DataCoordinate 

474 return {str(k): v for k, v in dataId.items()} # str() required due to full names 

475 elif hasattr(dataId, "dataId"): # dafButler.dimensions.DimensionRecord 

476 return {str(k): v for k, v in dataId.dataId.items()} 

477 else: 

478 raise RuntimeError(f"Failed to coerce {type(dataId)} to dict") 

479 

480 

481def getExpRecordFromDataId(butler, dataId): 

482 """Get the exposure record for a given dataId. 

483 

484 Parameters 

485 ---------- 

486 butler : `lsst.daf.butler.Butler` 

487 The butler. 

488 dataId : `dict` 

489 The dataId. 

490 

491 Returns 

492 ------- 

493 expRecord : `lsst.daf.butler.dimensions.ExposureRecord` 

494 The exposure record. 

495 """ 

496 dataId = _assureDict(dataId) 

497 assert isinstance(dataId, dict), f"dataId must be a dict or DimensionRecord, got {type(dataId)}" 

498 

499 if expId := getExpId(dataId): 

500 where = "exposure.id=expId" 

501 expRecords = butler.registry.queryDimensionRecords( 

502 "exposure", where=where, bind={"expId": expId}, datasets="raw" 

503 ) 

504 

505 else: 

506 dayObs = getDayObs(dataId) 

507 seqNum = getSeqNum(dataId) 

508 if not (dayObs and seqNum): 

509 raise RuntimeError(f"Failed to find either expId or day_obs and seq_num in dataId {dataId}") 

510 where = "exposure.day_obs=dayObs AND exposure.seq_num=seq_num" 

511 expRecords = butler.registry.queryDimensionRecords( 

512 "exposure", where=where, bind={"dayObs": dayObs, "seq_num": seqNum}, datasets="raw" 

513 ) 

514 

515 expRecords = set(expRecords) 

516 if not expRecords: 

517 raise LookupError(f"No exposure records found for {dataId}") 

518 assert len(expRecords) == 1, f"Found {len(expRecords)} exposure records for {dataId}" 

519 return expRecords.pop() 

520 

521 

522def getDayObsSeqNumFromExposureId(butler, dataId): 

523 """Get the day_obs and seq_num for an exposure id. 

524 

525 Parameters 

526 ---------- 

527 butler : `lsst.daf.butler.Butler` 

528 The butler. 

529 dataId : `dict` 

530 The dataId containing the exposure id. 

531 

532 Returns 

533 ------- 

534 dataId : `dict` 

535 A dict containing only the day_obs and seq_num. 

536 """ 

537 if (dayObs := getDayObs(dataId)) and (seqNum := getSeqNum(dataId)): 

538 return {"day_obs": dayObs, "seq_num": seqNum} 

539 

540 if isinstance(dataId, int): 

541 dataId = {"exposure": dataId} 

542 else: 

543 dataId = _assureDict(dataId) 

544 assert isinstance(dataId, dict) 

545 

546 if not (expId := getExpId(dataId)): 

547 raise RuntimeError(f"Failed to find exposure id in {dataId}") 

548 

549 where = "exposure.id=expId" 

550 expRecords = butler.registry.queryDimensionRecords( 

551 "exposure", where=where, bind={"expId": expId}, datasets="raw" 

552 ) 

553 expRecords = set(expRecords) 

554 if not expRecords: 

555 raise LookupError(f"No exposure records found for {dataId}") 

556 assert len(expRecords) == 1, f"Found {len(expRecords)} exposure records for {dataId}" 

557 record = expRecords.pop() 

558 return {"day_obs": record.day_obs, "seq_num": record.seq_num} 

559 

560 

561def getDatasetRefForDataId(butler, datasetType, dataId): 

562 """Get the datasetReference for a dataId. 

563 

564 Parameters 

565 ---------- 

566 butler : `lsst.daf.butler.Butler` 

567 The butler. 

568 datasetType : `str` or `datasetType` 

569 The dataset type. 

570 dataId : `dict` 

571 The dataId. 

572 

573 Returns 

574 ------- 

575 datasetRef : `lsst.daf.butler.dimensions.DatasetReference` 

576 The dataset reference. 

577 """ 

578 if not _expid_present(dataId): 

579 assert _dayobs_present(dataId) and _seqnum_present(dataId) 

580 dataId.update(getExpIdFromDayObsSeqNum(butler, dataId)) 

581 

582 dRef = butler.find_dataset(datasetType, dataId) 

583 return dRef 

584 

585 

586def removeDataProduct(butler, datasetType, dataId): 

587 """Remove a data prodcut from the registry. Use with caution. 

588 

589 Parameters 

590 ---------- 

591 butler : `lsst.daf.butler.Butler` 

592 The butler. 

593 datasetType : `str` or `datasetType` 

594 The dataset type. 

595 dataId : `dict` 

596 The dataId. 

597 

598 """ 

599 if datasetType == "raw": 

600 raise RuntimeError("I'm sorry, Dave, I'm afraid I can't do that.") 

601 dRef = getDatasetRefForDataId(butler, datasetType, dataId) 

602 butler.pruneDatasets([dRef], disassociate=True, unstore=True, purge=True) 

603 return 

604 

605 

606def _dayobs_present(dataId): 

607 return _get_dayobs_key(dataId) is not None 

608 

609 

610def _seqnum_present(dataId): 

611 return _get_seqnum_key(dataId) is not None 

612 

613 

614def _expid_present(dataId): 

615 return _get_expid_key(dataId) is not None 

616 

617 

618def _get_dayobs_key(dataId): 

619 """Return the key for day_obs if present, else None""" 

620 keys = [k for k in dataId.keys() if k.find("day_obs") != -1] 

621 if not keys: 

622 return None 

623 return keys[0] 

624 

625 

626def _get_seqnum_key(dataId): 

627 """Return the key for seq_num if present, else None""" 

628 keys = [k for k in dataId.keys() if k.find("seq_num") != -1] 

629 if not keys: 

630 return None 

631 return keys[0] 

632 

633 

634def _get_expid_key(dataId): 

635 """Return the key for expId if present, else None""" 

636 if "exposure.id" in dataId: 

637 return "exposure.id" 

638 elif "exposure" in dataId: 

639 return "exposure" 

640 return None 

641 

642 

643def getDayObs(dataId): 

644 """Get the day_obs from a dataId. 

645 

646 Parameters 

647 ---------- 

648 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

649 The dataId. 

650 

651 Returns 

652 ------- 

653 day_obs : `int` or `None` 

654 The day_obs value if present, else None. 

655 """ 

656 if hasattr(dataId, "day_obs"): 

657 return getattr(dataId, "day_obs") 

658 if not _dayobs_present(dataId): 

659 return None 

660 return dataId["day_obs"] if "day_obs" in dataId else dataId["exposure.day_obs"] 

661 

662 

663def getSeqNum(dataId): 

664 """Get the seq_num from a dataId. 

665 

666 Parameters 

667 ---------- 

668 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

669 The dataId. 

670 

671 Returns 

672 ------- 

673 seq_num : `int` or `None` 

674 The seq_num value if present, else None. 

675 """ 

676 if hasattr(dataId, "seq_num"): 

677 return getattr(dataId, "seq_num") 

678 if not _seqnum_present(dataId): 

679 return None 

680 return dataId["seq_num"] if "seq_num" in dataId else dataId["exposure.seq_num"] 

681 

682 

683def getExpId(dataId): 

684 """Get the expId from a dataId. 

685 

686 Parameters 

687 ---------- 

688 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

689 The dataId. 

690 

691 Returns 

692 ------- 

693 expId : `int` or `None` 

694 The expId value if present, else None. 

695 """ 

696 if hasattr(dataId, "id"): 

697 return getattr(dataId, "id") 

698 if not _expid_present(dataId): 

699 return None 

700 return dataId["exposure"] if "exposure" in dataId else dataId["exposure.id"] 

701 

702 

703def getLatissOnSkyDataIds( 

704 butler, skipTypes=("bias", "dark", "flat"), checkObject=True, full=True, startDate=None, endDate=None 

705): 

706 """Get a list of all on-sky dataIds taken. 

707 

708 Parameters 

709 ---------- 

710 butler : `lsst.daf.butler.Butler` 

711 The butler. 

712 skipTypes : `list` of `str` 

713 Image types to exclude. 

714 checkObject : `bool` 

715 Check if the value of target_name (formerly OBJECT) is set and exlude 

716 if it is not. 

717 full : `bool` 

718 Return filled dataIds. Required for some analyses, but runs much 

719 (~30x) slower. 

720 startDate : `int` 

721 The day_obs to start at, inclusive. 

722 endDate : `int` 

723 The day_obs to end at, inclusive. 

724 

725 Returns 

726 ------- 

727 dataIds : `list` or `dataIds` 

728 The dataIds. 

729 """ 

730 

731 def isOnSky(expRecord): 

732 imageType = expRecord.observation_type 

733 obj = expRecord.target_name 

734 if checkObject and obj == "NOTSET": 

735 return False 

736 if imageType not in skipTypes: 

737 return True 

738 return False 

739 

740 recordSets = [] 

741 days = getDaysWithData(butler) 

742 if startDate: 

743 days = [d for d in days if d >= startDate] 

744 if endDate: 

745 days = [d for d in days if d <= endDate] 

746 days = sorted(set(days)) 

747 

748 where = "exposure.day_obs=dayObs" 

749 for day in days: 

750 # queryDataIds would be better here, but it's then hard/impossible 

751 # to do the filtering for which is on sky, so just take the dataIds 

752 records = butler.registry.queryDimensionRecords( 

753 "exposure", where=where, bind={"dayObs": day}, datasets="raw" 

754 ) 

755 recordSets.append(sortRecordsByDayObsThenSeqNum(records)) 

756 

757 dataIds = [r.dataId for r in filter(isOnSky, itertools.chain(*recordSets))] 

758 if full: 

759 expandedIds = [ 

760 updateDataIdOrDataCord(butler.registry.expandDataId(dataId, detector=0).full) 

761 for dataId in dataIds 

762 ] 

763 filledIds = [fillDataId(butler, dataId) for dataId in expandedIds] 

764 return filledIds 

765 else: 

766 return [updateDataIdOrDataCord(dataId, detector=0) for dataId in dataIds] 

767 

768 

769def getExpRecord(butler, instrument, expId=None, dayObs=None, seqNum=None): 

770 """Get the exposure record for a given exposure ID or dayObs+seqNum. 

771 

772 Parameters 

773 ---------- 

774 butler : `lsst.daf.butler.Butler` 

775 The butler. 

776 expId : `int` 

777 The exposure ID. 

778 instrument : `str` 

779 The instrument name, e.g. 'LSSTCam'. 

780 

781 Returns 

782 ------- 

783 expRecord : `lsst.daf.butler.DimensionRecord` 

784 The exposure record. 

785 """ 

786 if expId is None and (dayObs is None or seqNum is None): 

787 raise ValueError("Must supply either expId or (dayObs AND seqNum)") 

788 

789 where = "instrument=inst" # Note you can't use =instrument as bind-strings can't clash with dimensions 

790 bind = {"inst": instrument} 

791 if expId: 

792 where += " AND exposure.id=expId" 

793 bind.update({"expId": expId}) 

794 if dayObs and seqNum: 

795 where += " AND exposure.day_obs=dayObs AND exposure.seq_num=seqNum" 

796 bind.update({"dayObs": dayObs, "seqNum": seqNum}) 

797 

798 expRecords = butler.registry.queryDimensionRecords("exposure", where=where, bind=bind, datasets="raw") 

799 expRecords = list(set(expRecords)) # must call set as this may contain many duplicates 

800 if len(expRecords) != 1: 

801 raise RuntimeError( 

802 f"Failed to find unique exposure record for {instrument=} with" 

803 f" {expId=}, {dayObs=}, {seqNum=}, got {len(expRecords)} records" 

804 ) 

805 return expRecords[0]