Coverage for python/lsst/summit/utils/butlerUtils.py: 13%

254 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-06 15:50 +0000

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import lsst.daf.butler as dafButler 

23import itertools 

24import copy 

25from deprecated.sphinx import deprecated 

26 

27from lsst.summit.utils.utils import getSite 

28 

29 

30__all__ = ["makeDefaultLatissButler", 

31 "updateDataId", 

32 "sanitizeDayObs", 

33 "getMostRecentDayObs", 

34 "getSeqNumsForDayObs", 

35 "getMostRecentDataId", 

36 "getDatasetRefForDataId", 

37 "getDayObs", 

38 "getSeqNum", 

39 "getExpId", 

40 "datasetExists", 

41 "sortRecordsByDayObsThenSeqNum", 

42 "getDaysWithData", 

43 "getExpIdFromDayObsSeqNum", 

44 "updateDataIdOrDataCord", 

45 "fillDataId", 

46 "getExpRecordFromDataId", 

47 "getDayObsSeqNumFromExposureId", 

48 "removeDataProduct", 

49 "getLatissOnSkyDataIds", 

50 "getExpRecord", 

51 ] 

52 

53_LATISS_DEFAULT_COLLECTIONS = ['LATISS/raw/all', 'LATISS/calib', "LATISS/runs/quickLook"] 

54 

55# RECENT_DAY must be in the past *and have data* (otherwise some tests are 

56# no-ops), to speed up queries by restricting them significantly, 

57# but data must definitely been taken since. Should 

58# also not be more than 2 months in the past due to 60 day lookback time on the 

59# summit. All this means it should be updated by an informed human. 

60RECENT_DAY = 20220503 

61 

62 

63def _configureForSite(): 

64 try: 

65 site = getSite() 

66 except ValueError: 

67 # this method is run automatically on module import, so 

68 # don't fail for k8s where this cannot yet be determined 

69 print("WARNING: failed to automatically determine site") 

70 site = None 

71 

72 if site == 'tucson': 72 ↛ 74line 72 didn't jump to line 74, because the condition on line 72 was never true

73 global RECENT_DAY 

74 RECENT_DAY = 20211104 # TTS has limited data, so use this day 

75 

76 

77_configureForSite() 

78 

79 

80def getLatissDefaultCollections(): 

81 """Get the default set of LATISS collections, updated for the site at 

82 which the code is being run. 

83 

84 Returns 

85 ------- 

86 collections : `list` of `str` 

87 The default collections for the site. 

88 """ 

89 collections = _LATISS_DEFAULT_COLLECTIONS 

90 try: 

91 site = getSite() 

92 except ValueError: 

93 site = '' 

94 

95 if site == 'tucson': 

96 collections.append("LATISS-test-data") 

97 return collections 

98 if site == 'summit': 

99 collections.append("LATISS-test-data") 

100 return collections 

101 return collections 

102 

103 

104def _update_RECENT_DAY(day): 

105 """Update the value for RECENT_DAY once we have a value for free.""" 

106 global RECENT_DAY 

107 RECENT_DAY = max(day-1, RECENT_DAY) 

108 

109 

110def makeDefaultLatissButler(*, extraCollections=None, writeable=False, embargo=False): 

111 """Create a butler for LATISS using the default collections. 

112 

113 Parameters 

114 ---------- 

115 extraCollections : `list` of `str` 

116 Extra input collections to supply to the butler init. 

117 writable : `bool`, optional 

118 Whether to make a writable butler. 

119 embargo : `bool`, optional 

120 Use the embargo repo instead of the main one. Needed to access 

121 embargoed data. 

122 

123 Returns 

124 ------- 

125 butler : `lsst.daf.butler.Butler` 

126 The butler. 

127 """ 

128 # TODO: Add logging to which collections are going in 

129 collections = getLatissDefaultCollections() 

130 if extraCollections: 

131 collections.extend(extraCollections) 

132 try: 

133 repoString = "LATISS" if not embargo else "/repo/embargo" 

134 butler = dafButler.Butler(repoString, 

135 collections=collections, 

136 writeable=writeable, 

137 instrument='LATISS') 

138 except (FileNotFoundError, RuntimeError): 

139 # Depending on the value of DAF_BUTLER_REPOSITORY_INDEX and whether 

140 # it is present and blank, or just not set, both these exception 

141 # types can be raised, see tests/test_butlerUtils.py:ButlerInitTestCase 

142 # for details and tests which confirm these have not changed 

143 raise FileNotFoundError # unify exception type 

144 return butler 

145 

146 

147@deprecated( 

148 reason="datasExists has been replaced by Butler.exists(). Will be removed after v26.0.", 

149 version="v26.0", 

150 category=FutureWarning, 

151) 

152def datasetExists(butler, dataProduct, dataId, **kwargs): 

153 """Collapse the tri-state behaviour of butler.datasetExists to a boolean. 

154 

155 Parameters 

156 ---------- 

157 butler : `lsst.daf.butler.Butler` 

158 The butler 

159 dataProduct : `str` 

160 The type of data product to check for 

161 dataId : `dict` 

162 The dataId of the dataProduct to check for 

163 

164 Returns 

165 ------- 

166 exists : `bool` 

167 True if the dataProduct exists for the dataProduct and can be retreived 

168 else False. 

169 """ 

170 return butler.exists(dataProduct, dataId, **kwargs) 

171 

172 

173def updateDataId(dataId, **kwargs): 

174 """Update a DataCoordinate or dataId dict with kwargs. 

175 

176 Provides a single interface for adding the detector key (or others) to a 

177 dataId whether it's a DataCoordinate or a dict 

178 

179 Parameters 

180 ---------- 

181 dataId : `dict` or `lsst.daf.butler.DataCoordinate` 

182 The dataId to update. 

183 kwargs : `dict` 

184 The keys and values to add to the dataId. 

185 

186 Returns 

187 ------- 

188 dataId : `dict` or `lsst.daf.butler.DataCoordinate` 

189 The updated dataId, with the same type as the input. 

190 """ 

191 

192 match dataId: 

193 case dafButler.DataCoordinate(): 

194 return dafButler.DataCoordinate.standardize(dataId, **kwargs) 

195 case dict() as dataId: 

196 return dict(dataId, **kwargs) 

197 raise ValueError(f"Unknown dataId type {type(dataId)}") 

198 

199 

200def sanitizeDayObs(day_obs): 

201 """Take string or int day_obs and turn it into the int version. 

202 

203 Parameters 

204 ---------- 

205 day_obs : `str` or `int` 

206 The day_obs to sanitize. 

207 

208 Returns 

209 ------- 

210 day_obs : `int` 

211 The sanitized day_obs. 

212 

213 Raises 

214 ------ 

215 ValueError 

216 Raised if the day_obs fails to translate for any reason. 

217 """ 

218 if isinstance(day_obs, int): 

219 return day_obs 

220 elif isinstance(day_obs, str): 

221 try: 

222 return int(day_obs.replace('-', '')) 

223 except Exception: 

224 ValueError(f'Failed to sanitize {day_obs!r} to a day_obs') 

225 else: 

226 raise ValueError(f'Cannot sanitize {day_obs!r} to a day_obs') 

227 

228 

229def getMostRecentDayObs(butler): 

230 """Get the most recent day_obs for which there is data. 

231 

232 Parameters 

233 ---------- 

234 butler : `lsst.daf.butler.Butler 

235 The butler to query. 

236 

237 Returns 

238 ------- 

239 day_obs : `int` 

240 The day_obs. 

241 """ 

242 where = "exposure.day_obs>=RECENT_DAY" 

243 records = butler.registry.queryDimensionRecords('exposure', where=where, datasets='raw', 

244 bind={'RECENT_DAY': RECENT_DAY}) 

245 recentDay = max(r.day_obs for r in records) 

246 _update_RECENT_DAY(recentDay) 

247 return recentDay 

248 

249 

250def getSeqNumsForDayObs(butler, day_obs, extraWhere=''): 

251 """Get a list of all seq_nums taken on a given day_obs. 

252 

253 Parameters 

254 ---------- 

255 butler : `lsst.daf.butler.Butler 

256 The butler to query. 

257 day_obs : `int` or `str` 

258 The day_obs for which the seq_nums are desired. 

259 extraWhere : `str` 

260 Any extra where conditions to add to the queryDimensionRecords call. 

261 

262 Returns 

263 ------- 

264 seq_nums : `iterable` 

265 The seq_nums taken on the corresponding day_obs in ascending numerical 

266 order. 

267 """ 

268 day_obs = sanitizeDayObs(day_obs) 

269 where = "exposure.day_obs=day_obs" 

270 if extraWhere: 

271 extraWhere = extraWhere.replace('"', '\'') 

272 where += f" and {extraWhere}" 

273 records = butler.registry.queryDimensionRecords("exposure", 

274 where=where, 

275 bind={'day_obs': day_obs}, 

276 datasets='raw') 

277 return sorted([r.seq_num for r in records]) 

278 

279 

280def sortRecordsByDayObsThenSeqNum(records): 

281 """Sort a set of records by dayObs, then seqNum to get the order in which 

282 they were taken. 

283 

284 Parameters 

285 ---------- 

286 records : `list` of `dict` 

287 The records to be sorted. 

288 

289 Returns 

290 ------- 

291 sortedRecords : `list` of `dict` 

292 The sorted records 

293 

294 Raises 

295 ------ 

296 ValueError 

297 Raised if the recordSet contains duplicate records, or if it contains 

298 (dayObs, seqNum) collisions. 

299 """ 

300 records = list(records) # must call list in case we have a generator 

301 recordSet = set(records) 

302 if len(records) != len(recordSet): 

303 raise ValueError("Record set contains duplicate records and therefore cannot be sorted unambiguously") 

304 

305 daySeqTuples = [(r.day_obs, r.seq_num) for r in records] 

306 if len(daySeqTuples) != len(set(daySeqTuples)): 

307 raise ValueError("Record set contains dayObs/seqNum collisions, and therefore cannot be sorted " 

308 "unambiguously") 

309 

310 records.sort(key=lambda r: (r.day_obs, r.seq_num)) 

311 return records 

312 

313 

314def getDaysWithData(butler, datasetType='raw'): 

315 """Get all the days for which LATISS has taken data on the mountain. 

316 

317 Parameters 

318 ---------- 

319 butler : `lsst.daf.butler.Butler 

320 The butler to query. 

321 datasetType : `str` 

322 The datasetType to query. 

323 

324 Returns 

325 ------- 

326 days : `list` of `int` 

327 A sorted list of the day_obs values for which mountain-top data exists. 

328 """ 

329 # 20200101 is a day between shipping LATISS and going on sky 

330 # We used to constrain on exposure.seq_num<50 to massively reduce the 

331 # number of returned records whilst being large enough to ensure that no 

332 # days are missed because early seq_nums were skipped. However, because 

333 # we have test datasets like LATISS-test-data-tts where we only kept 

334 # seqNums from 950 on one day, we can no longer assume this so don't be 

335 # tempted to add such a constraint back in here for speed. 

336 where = "exposure.day_obs>20200101" 

337 records = butler.registry.queryDimensionRecords("exposure", where=where, datasets=datasetType) 

338 return sorted(set([r.day_obs for r in records])) 

339 

340 

341def getMostRecentDataId(butler): 

342 """Get the dataId for the most recent observation. 

343 

344 Parameters 

345 ---------- 

346 butler : `lsst.daf.butler.Butler 

347 The butler to query. 

348 

349 Returns 

350 ------- 

351 dataId : `dict` 

352 The dataId of the most recent exposure. 

353 """ 

354 lastDay = getMostRecentDayObs(butler) 

355 seqNum = getSeqNumsForDayObs(butler, lastDay)[-1] 

356 dataId = {'day_obs': lastDay, 'seq_num': seqNum, 'detector': 0} 

357 dataId.update(getExpIdFromDayObsSeqNum(butler, dataId)) 

358 return dataId 

359 

360 

361def getExpIdFromDayObsSeqNum(butler, dataId): 

362 """Get the exposure id for the dataId. 

363 

364 Parameters 

365 ---------- 

366 butler : `lsst.daf.butler.Butler 

367 The butler to query. 

368 dataId : `dict` 

369 The dataId for which to return the exposure id. 

370 

371 Returns 

372 ------- 

373 dataId : `dict` 

374 The dataId of the most recent exposure. 

375 """ 

376 expRecord = getExpRecordFromDataId(butler, dataId) 

377 return {'exposure': expRecord.id} 

378 

379 

380def updateDataIdOrDataCord(dataId, **updateKwargs): 

381 """Add key, value pairs to a dataId or data coordinate. 

382 

383 Parameters 

384 ---------- 

385 dataId : `dict` 

386 The dataId for which to return the exposure id. 

387 updateKwargs : `dict` 

388 The key value pairs add to the dataId or dataCoord. 

389 

390 Returns 

391 ------- 

392 dataId : `dict` 

393 The updated dataId. 

394 

395 Notes 

396 ----- 

397 Always returns a dict, so note that if a data coordinate is supplied, a 

398 dict is returned, changing the type. 

399 """ 

400 newId = copy.copy(dataId) 

401 newId = _assureDict(newId) 

402 newId.update(updateKwargs) 

403 return newId 

404 

405 

406def fillDataId(butler, dataId): 

407 """Given a dataId, fill it with values for all available dimensions. 

408 

409 Parameters 

410 ---------- 

411 butler : `lsst.daf.butler.Butler` 

412 The butler. 

413 dataId : `dict` 

414 The dataId to fill. 

415 

416 Returns 

417 ------- 

418 dataId : `dict` 

419 The filled dataId. 

420 

421 Notes 

422 ----- 

423 This function is *slow*! Running this on 20,000 dataIds takes approximately 

424 7 minutes. Virtually all the slowdown is in the 

425 butler.registry.expandDataId() call though, so this wrapper is not to blame 

426 here, and might speed up in future with butler improvements. 

427 """ 

428 # ensure it's a dict to deal with records etc 

429 dataId = _assureDict(dataId) 

430 

431 # this removes extraneous keys that would trip up the registry call 

432 # using _rewrite_data_id is perhaps ever so slightly slower than popping 

433 # the bad keys, or making a minimal dataId by hand, but is more 

434 # reliable/general, so we choose that over the other approach here 

435 dataId, _ = butler._rewrite_data_id(dataId, butler.get_dataset_type('raw')) 

436 

437 # now expand and turn back to a dict 

438 dataId = butler.registry.expandDataId(dataId, detector=0).full # this call is VERY slow 

439 dataId = _assureDict(dataId) 

440 

441 missingExpId = getExpId(dataId) is None 

442 missingDayObs = getDayObs(dataId) is None 

443 missingSeqNum = getSeqNum(dataId) is None 

444 

445 if missingDayObs or missingSeqNum: 

446 dayObsSeqNum = getDayObsSeqNumFromExposureId(butler, dataId) 

447 dataId.update(dayObsSeqNum) 

448 

449 if missingExpId: 

450 expId = getExpIdFromDayObsSeqNum(butler, dataId) 

451 dataId.update(expId) 

452 

453 return dataId 

454 

455 

456def _assureDict(dataId): 

457 """Turn any data-identifier-like object into a dict. 

458 

459 Parameters 

460 ---------- 

461 dataId : `dict` or `lsst.daf.butler.dimensions.DataCoordinate` or 

462 `lsst.daf.butler.dimensions.DimensionRecord` 

463 The data identifier. 

464 

465 Returns 

466 ------- 

467 dataId : `dict` 

468 The data identifier as a dict. 

469 """ 

470 if isinstance(dataId, dict): 

471 return dataId 

472 elif hasattr(dataId, 'items'): # dafButler.dimensions.DataCoordinate 

473 return {str(k): v for k, v in dataId.items()} # str() required due to full names 

474 elif hasattr(dataId, 'dataId'): # dafButler.dimensions.DimensionRecord 

475 return {str(k): v for k, v in dataId.dataId.items()} 

476 else: 

477 raise RuntimeError(f'Failed to coerce {type(dataId)} to dict') 

478 

479 

480def getExpRecordFromDataId(butler, dataId): 

481 """Get the exposure record for a given dataId. 

482 

483 Parameters 

484 ---------- 

485 butler : `lsst.daf.butler.Butler` 

486 The butler. 

487 dataId : `dict` 

488 The dataId. 

489 

490 Returns 

491 ------- 

492 expRecord : `lsst.daf.butler.dimensions.ExposureRecord` 

493 The exposure record. 

494 """ 

495 dataId = _assureDict(dataId) 

496 assert isinstance(dataId, dict), f'dataId must be a dict or DimensionRecord, got {type(dataId)}' 

497 

498 if expId := getExpId(dataId): 

499 where = "exposure.id=expId" 

500 expRecords = butler.registry.queryDimensionRecords("exposure", 

501 where=where, 

502 bind={'expId': expId}, 

503 datasets='raw') 

504 

505 else: 

506 dayObs = getDayObs(dataId) 

507 seqNum = getSeqNum(dataId) 

508 if not (dayObs and seqNum): 

509 raise RuntimeError(f'Failed to find either expId or day_obs and seq_num in dataId {dataId}') 

510 where = "exposure.day_obs=day_obs AND exposure.seq_num=seq_num" 

511 expRecords = butler.registry.queryDimensionRecords("exposure", 

512 where=where, 

513 bind={'day_obs': dayObs, 'seq_num': seqNum}, 

514 datasets='raw') 

515 

516 expRecords = set(expRecords) 

517 if not expRecords: 

518 raise LookupError(f"No exposure records found for {dataId}") 

519 assert len(expRecords) == 1, f'Found {len(expRecords)} exposure records for {dataId}' 

520 return expRecords.pop() 

521 

522 

523def getDayObsSeqNumFromExposureId(butler, dataId): 

524 """Get the day_obs and seq_num for an exposure id. 

525 

526 Parameters 

527 ---------- 

528 butler : `lsst.daf.butler.Butler` 

529 The butler. 

530 dataId : `dict` 

531 The dataId containing the exposure id. 

532 

533 Returns 

534 ------- 

535 dataId : `dict` 

536 A dict containing only the day_obs and seq_num. 

537 """ 

538 if (dayObs := getDayObs(dataId)) and (seqNum := getSeqNum(dataId)): 

539 return {'day_obs': dayObs, 'seq_num': seqNum} 

540 

541 if isinstance(dataId, int): 

542 dataId = {'exposure': dataId} 

543 else: 

544 dataId = _assureDict(dataId) 

545 assert isinstance(dataId, dict) 

546 

547 if not (expId := getExpId(dataId)): 

548 raise RuntimeError(f'Failed to find exposure id in {dataId}') 

549 

550 where = "exposure.id=expId" 

551 expRecords = butler.registry.queryDimensionRecords("exposure", 

552 where=where, 

553 bind={'expId': expId}, 

554 datasets='raw') 

555 expRecords = set(expRecords) 

556 if not expRecords: 

557 raise LookupError(f"No exposure records found for {dataId}") 

558 assert len(expRecords) == 1, f'Found {len(expRecords)} exposure records for {dataId}' 

559 record = expRecords.pop() 

560 return {'day_obs': record.day_obs, 'seq_num': record.seq_num} 

561 

562 

563def getDatasetRefForDataId(butler, datasetType, dataId): 

564 """Get the datasetReference for a dataId. 

565 

566 Parameters 

567 ---------- 

568 butler : `lsst.daf.butler.Butler` 

569 The butler. 

570 datasetType : `str` or `datasetType` 

571 The dataset type. 

572 dataId : `dict` 

573 The dataId. 

574 

575 Returns 

576 ------- 

577 datasetRef : `lsst.daf.butler.dimensions.DatasetReference` 

578 The dataset reference. 

579 """ 

580 if not _expid_present(dataId): 

581 assert _dayobs_present(dataId) and _seqnum_present(dataId) 

582 dataId.update(getExpIdFromDayObsSeqNum(butler, dataId)) 

583 

584 dRef = butler.find_dataset(datasetType, dataId) 

585 return dRef 

586 

587 

588def removeDataProduct(butler, datasetType, dataId): 

589 """Remove a data prodcut from the registry. Use with caution. 

590 

591 Parameters 

592 ---------- 

593 butler : `lsst.daf.butler.Butler` 

594 The butler. 

595 datasetType : `str` or `datasetType` 

596 The dataset type. 

597 dataId : `dict` 

598 The dataId. 

599 

600 """ 

601 if datasetType == 'raw': 

602 raise RuntimeError("I'm sorry, Dave, I'm afraid I can't do that.") 

603 dRef = getDatasetRefForDataId(butler, datasetType, dataId) 

604 butler.pruneDatasets([dRef], disassociate=True, unstore=True, purge=True) 

605 return 

606 

607 

608def _dayobs_present(dataId): 

609 return _get_dayobs_key(dataId) is not None 

610 

611 

612def _seqnum_present(dataId): 

613 return _get_seqnum_key(dataId) is not None 

614 

615 

616def _expid_present(dataId): 

617 return _get_expid_key(dataId) is not None 

618 

619 

620def _get_dayobs_key(dataId): 

621 """Return the key for day_obs if present, else None 

622 """ 

623 keys = [k for k in dataId.keys() if k.find('day_obs') != -1] 

624 if not keys: 

625 return None 

626 return keys[0] 

627 

628 

629def _get_seqnum_key(dataId): 

630 """Return the key for seq_num if present, else None 

631 """ 

632 keys = [k for k in dataId.keys() if k.find('seq_num') != -1] 

633 if not keys: 

634 return None 

635 return keys[0] 

636 

637 

638def _get_expid_key(dataId): 

639 """Return the key for expId if present, else None 

640 """ 

641 if 'exposure.id' in dataId: 

642 return 'exposure.id' 

643 elif 'exposure' in dataId: 

644 return 'exposure' 

645 return None 

646 

647 

648def getDayObs(dataId): 

649 """Get the day_obs from a dataId. 

650 

651 Parameters 

652 ---------- 

653 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

654 The dataId. 

655 

656 Returns 

657 ------- 

658 day_obs : `int` or `None` 

659 The day_obs value if present, else None. 

660 """ 

661 if hasattr(dataId, 'day_obs'): 

662 return getattr(dataId, 'day_obs') 

663 if not _dayobs_present(dataId): 

664 return None 

665 return dataId['day_obs'] if 'day_obs' in dataId else dataId['exposure.day_obs'] 

666 

667 

668def getSeqNum(dataId): 

669 """Get the seq_num from a dataId. 

670 

671 Parameters 

672 ---------- 

673 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

674 The dataId. 

675 

676 Returns 

677 ------- 

678 seq_num : `int` or `None` 

679 The seq_num value if present, else None. 

680 """ 

681 if hasattr(dataId, 'seq_num'): 

682 return getattr(dataId, 'seq_num') 

683 if not _seqnum_present(dataId): 

684 return None 

685 return dataId['seq_num'] if 'seq_num' in dataId else dataId['exposure.seq_num'] 

686 

687 

688def getExpId(dataId): 

689 """Get the expId from a dataId. 

690 

691 Parameters 

692 ---------- 

693 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

694 The dataId. 

695 

696 Returns 

697 ------- 

698 expId : `int` or `None` 

699 The expId value if present, else None. 

700 """ 

701 if hasattr(dataId, 'id'): 

702 return getattr(dataId, 'id') 

703 if not _expid_present(dataId): 

704 return None 

705 return dataId['exposure'] if 'exposure' in dataId else dataId['exposure.id'] 

706 

707 

708def getLatissOnSkyDataIds(butler, skipTypes=('bias', 'dark', 'flat'), checkObject=True, full=True, 

709 startDate=None, endDate=None): 

710 """Get a list of all on-sky dataIds taken. 

711 

712 Parameters 

713 ---------- 

714 butler : `lsst.daf.butler.Butler` 

715 The butler. 

716 skipTypes : `list` of `str` 

717 Image types to exclude. 

718 checkObject : `bool` 

719 Check if the value of target_name (formerly OBJECT) is set and exlude 

720 if it is not. 

721 full : `bool` 

722 Return filled dataIds. Required for some analyses, but runs much 

723 (~30x) slower. 

724 startDate : `int` 

725 The day_obs to start at, inclusive. 

726 endDate : `int` 

727 The day_obs to end at, inclusive. 

728 

729 Returns 

730 ------- 

731 dataIds : `list` or `dataIds` 

732 The dataIds. 

733 """ 

734 def isOnSky(expRecord): 

735 imageType = expRecord.observation_type 

736 obj = expRecord.target_name 

737 if checkObject and obj == 'NOTSET': 

738 return False 

739 if imageType not in skipTypes: 

740 return True 

741 return False 

742 

743 recordSets = [] 

744 days = getDaysWithData(butler) 

745 if startDate: 

746 days = [d for d in days if d >= startDate] 

747 if endDate: 

748 days = [d for d in days if d <= endDate] 

749 days = sorted(set(days)) 

750 

751 where = "exposure.day_obs=day_obs" 

752 for day in days: 

753 # queryDataIds would be better here, but it's then hard/impossible 

754 # to do the filtering for which is on sky, so just take the dataIds 

755 records = butler.registry.queryDimensionRecords("exposure", 

756 where=where, 

757 bind={'day_obs': day}, 

758 datasets='raw') 

759 recordSets.append(sortRecordsByDayObsThenSeqNum(records)) 

760 

761 dataIds = [r.dataId for r in filter(isOnSky, itertools.chain(*recordSets))] 

762 if full: 

763 expandedIds = [updateDataIdOrDataCord(butler.registry.expandDataId(dataId, detector=0).full) 

764 for dataId in dataIds] 

765 filledIds = [fillDataId(butler, dataId) for dataId in expandedIds] 

766 return filledIds 

767 else: 

768 return [updateDataIdOrDataCord(dataId, detector=0) for dataId in dataIds] 

769 

770 

771def getExpRecord(butler, instrument, expId=None, dayObs=None, seqNum=None): 

772 """Get the exposure record for a given exposure ID or dayObs+seqNum. 

773 

774 Parameters 

775 ---------- 

776 butler : `lsst.daf.butler.Butler` 

777 The butler. 

778 expId : `int` 

779 The exposure ID. 

780 instrument : `str` 

781 The instrument name, e.g. 'LSSTCam'. 

782 

783 Returns 

784 ------- 

785 expRecord : `lsst.daf.butler.DimensionRecord` 

786 The exposure record. 

787 """ 

788 if expId is None and (dayObs is None or seqNum is None): 

789 raise ValueError('Must supply either expId or (dayObs AND seqNum)') 

790 

791 where = "instrument=inst" # Note you can't use =instrument as bind-strings can't clash with dimensions 

792 bind = {'inst': instrument} 

793 if expId: 

794 where += ' AND exposure.id=expId' 

795 bind.update({'expId': expId}) 

796 if dayObs and seqNum: 

797 where += ' AND exposure.day_obs=dayObs AND exposure.seq_num=seqNum' 

798 bind.update({'dayObs': dayObs, 'seqNum': seqNum}) 

799 

800 expRecords = butler.registry.queryDimensionRecords("exposure", 

801 where=where, 

802 bind=bind, 

803 datasets='raw') 

804 expRecords = list(set(expRecords)) # must call set as this may contain many duplicates 

805 if len(expRecords) != 1: 

806 raise RuntimeError(f'Failed to find unique exposure record for {instrument=} with' 

807 f' {expId=}, {dayObs=}, {seqNum=}, got {len(expRecords)} records') 

808 return expRecords[0]