Coverage for python / lsst / summit / utils / butlerUtils.py: 14%

281 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-04 17:50 +0000

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import copy 

23import itertools 

24import logging 

25from collections.abc import Iterable, Mapping 

26from typing import Any 

27 

28from deprecated.sphinx import deprecated 

29 

30import lsst.daf.butler as dafButler 

31from lsst.daf.butler.direct_butler import DirectButler 

32from lsst.summit.utils.utils import getSite 

33from lsst.utils.iteration import ensure_iterable 

34 

35__all__ = [ 

36 "makeDefaultLatissButler", # deprecated 

37 "makeDefaultButler", 

38 "updateDataId", 

39 "sanitizeDayObs", 

40 "getMostRecentDayObs", 

41 "getSeqNumsForDayObs", 

42 "getMostRecentDataId", 

43 "getDatasetRefForDataId", 

44 "getDayObs", 

45 "getSeqNum", 

46 "getExpId", 

47 "datasetExists", # deprecated 

48 "sortRecordsByDayObsThenSeqNum", 

49 "getDaysWithData", 

50 "getExpIdFromDayObsSeqNum", 

51 "updateDataIdOrDataCord", 

52 "fillDataId", 

53 "getExpRecordFromDataId", 

54 "getDayObsSeqNumFromExposureId", 

55 "removeDataProduct", 

56 "getLatissOnSkyDataIds", 

57 "getExpRecord", 

58] 

59 

60_LATISS_DEFAULT_COLLECTIONS = ["LATISS/raw/all", "LATISS/calib", "LATISS/runs/quickLook"] 

61 

62# RECENT_DAY must be in the past *and have data* (otherwise some tests are 

63# no-ops), to speed up queries by restricting them significantly, 

64# but data must definitely been taken since. Should 

65# also not be more than 2 months in the past due to 60 day lookback time on the 

66# summit. All this means it should be updated by an informed human. 

67RECENT_DAY = 20220503 

68 

69 

70def _configureForSite() -> None: 

71 try: 

72 site = getSite() 

73 except ValueError: 

74 # this method is run automatically on module import, so 

75 # don't fail for k8s where this cannot yet be determined 

76 print("WARNING: failed to automatically determine site") 

77 site = None 

78 if site == "tucson": 78 ↛ 80line 78 didn't jump to line 80 because the condition on line 78 was never true

79 global RECENT_DAY 

80 RECENT_DAY = 20211104 # TTS has limited data, so use this day 

81 

82 

83_configureForSite() 

84 

85 

86def getLatissDefaultCollections() -> list[str]: 

87 """Get the default set of LATISS collections, updated for the site at 

88 which the code is being run. 

89 

90 Returns 

91 ------- 

92 collections : `list` of `str` 

93 The default collections for the site. 

94 """ 

95 collections = _LATISS_DEFAULT_COLLECTIONS 

96 try: 

97 site = getSite() 

98 except ValueError: 

99 site = "" 

100 

101 if site == "tucson": 

102 collections.append("LATISS-test-data") 

103 return collections 

104 if site == "summit": 

105 collections.append("LATISS-test-data") 

106 return collections 

107 return collections 

108 

109 

110def _update_RECENT_DAY(day: int) -> None: 

111 """Update the value for RECENT_DAY once we have a value for free.""" 

112 global RECENT_DAY 

113 RECENT_DAY = max(day - 1, RECENT_DAY) 

114 

115 

116@deprecated( 

117 reason="Use the more generic makeDefaultButler('LATISS'). Will be removed after v28.0.", 

118 version="v27.0", 

119 category=FutureWarning, 

120) 

121def makeDefaultLatissButler( 

122 *, 

123 extraCollections: list[str] | None = None, 

124 writeable: bool = False, 

125 embargo: bool = False, 

126) -> dafButler.Butler: 

127 """Create a butler for LATISS using the default collections. 

128 

129 Parameters 

130 ---------- 

131 extraCollections : `list` of `str` 

132 Extra input collections to supply to the butler init. 

133 writable : `bool`, optional 

134 Whether to make a writable butler. 

135 embargo : `bool`, optional 

136 Use the embargo repo instead of the main one. Needed to access 

137 embargoed data. 

138 

139 Returns 

140 ------- 

141 butler : `lsst.daf.butler.Butler` 

142 The butler. 

143 """ 

144 return makeDefaultButler( 

145 "LATISS", extraCollections=extraCollections, writeable=writeable, embargo=embargo 

146 ) 

147 

148 

149def makeDefaultButler( 

150 instrument: str, 

151 *, 

152 extraCollections: list[str] | None = None, 

153 writeable: bool = False, 

154 embargo: bool = True, 

155) -> dafButler.Butler: 

156 """Create a butler for the instrument using default collections, regardless 

157 of the location. 

158 

159 Parameters 

160 ---------- 

161 extraCollections : `list` of `str` 

162 Extra input collections to supply to the butler init. 

163 writable : `bool`, optional 

164 Whether to make a writable butler. 

165 embargo : `bool`, optional 

166 Use the embargo repo instead of the main one. Needed to access 

167 embargoed data if not at a summit-like location. 

168 

169 Returns 

170 ------- 

171 butler : `lsst.daf.butler.Butler` 

172 The butler. 

173 

174 Raises 

175 ------ 

176 FileNotFoundError 

177 Raised if the butler cannot be created, because this is the error 

178 when the DAF_BUTLER_REPOSITORY_INDEX is not set correctly. 

179 """ 

180 SUPPORTED_SITES = [ 

181 "summit", 

182 "tucson", 

183 "base", 

184 "staff-rsp", 

185 "rubin-devl", 

186 "usdf-k8s", 

187 ] 

188 

189 site = getSite() 

190 if site not in SUPPORTED_SITES: 

191 # This might look like a slightly weird error to raise, but this is 

192 # the same error that's raised when the DAF_BUTLER_REPOSITORY_INDEX 

193 # isn't set, so it's the most appropriate error to raise here, i.e. 

194 # this is what would be raised if this function was allowed to continue 

195 # only this is a less confusing version of it. 

196 raise FileNotFoundError(f"Default butler creation only available at: {SUPPORTED_SITES}, got {site=}") 

197 

198 summitLike = site in ["summit", "tucson", "base"] 

199 if summitLike: 

200 if embargo is True: 

201 logger = logging.getLogger(__name__) 

202 logger.debug("embargo option is irrelevant on the summit, ignoring") 

203 embargo = False # there's only one repo too, so this makes the code more simple too 

204 

205 collections: list[str] = [f"{instrument}/defaults"] 

206 raCollection = ( 

207 [f"{instrument}/runs/quickLook"] if summitLike else [f"{instrument}/runs/nightlyValidation"] 

208 ) 

209 collections.extend(raCollection) 

210 if instrument == "LSSTCam": 

211 collections.append("LSSTCam/raw/guider") 

212 

213 repo = instrument if embargo is False else "embargo" 

214 

215 if extraCollections is not None: 

216 assert extraCollections is not None # just for mypy 

217 extraCollectionsList = ensure_iterable(extraCollections) 

218 collections.extend(extraCollectionsList) 

219 

220 try: 

221 butler = dafButler.Butler.from_config( 

222 repo, collections=collections, writeable=writeable, instrument=instrument 

223 ) 

224 except (FileNotFoundError, RuntimeError): 

225 # Depending on the value of DAF_BUTLER_REPOSITORY_INDEX and whether 

226 # it is present and blank, or just not set, both these exception 

227 # types can be raised, see tests/test_butlerUtils.py:ButlerInitTestCase 

228 # for details and tests which confirm these have not changed 

229 raise FileNotFoundError # unify exception type 

230 return butler 

231 

232 

233@deprecated( 

234 reason="datasExists has been replaced by Butler.exists(). Will be removed after v26.0.", 

235 version="v26.0", 

236 category=FutureWarning, 

237) 

238def datasetExists( 

239 butler: dafButler.Butler, dataProduct: str, dataId: dafButler.DataId, **kwargs: Any 

240) -> bool: 

241 """Collapse the tri-state behaviour of butler.datasetExists to a boolean. 

242 

243 Parameters 

244 ---------- 

245 butler : `lsst.daf.butler.Butler` 

246 The butler 

247 dataProduct : `str` 

248 The type of data product to check for 

249 dataId : `dafButler.DataId` 

250 The dataId of the dataProduct to check for 

251 

252 Returns 

253 ------- 

254 exists : `bool` 

255 True if the dataProduct exists for the dataProduct and can be retreived 

256 else False. 

257 """ 

258 return bool(butler.exists(dataProduct, dataId, **kwargs)) 

259 

260 

261def updateDataId(dataId: dafButler.DataId, **kwargs: Any) -> dafButler.DataId: 

262 """Update a DataCoordinate or dataId dict with kwargs. 

263 

264 Provides a single interface for adding the detector key (or others) to a 

265 dataId whether it's a DataCoordinate or a dict 

266 

267 Parameters 

268 ---------- 

269 dataId : `dafButler.DataId` 

270 The dataId to update. 

271 kwargs : `dict` 

272 The keys and values to add to the dataId. 

273 

274 Returns 

275 ------- 

276 dataId : `dict` or `lsst.daf.butler.DataCoordinate` 

277 The updated dataId, with the same type as the input. 

278 """ 

279 

280 match dataId: 

281 case dafButler.DataCoordinate(): 

282 return dafButler.DataCoordinate.standardize(dataId, **kwargs) 

283 case dict() as dataId: 

284 return dict(dataId, **kwargs) 

285 raise ValueError(f"Unknown dataId type {type(dataId)}") 

286 

287 

288def sanitizeDayObs(day_obs: int | str) -> int: 

289 """Take string or int day_obs and turn it into the int version. 

290 

291 Parameters 

292 ---------- 

293 day_obs : `str` or `int` 

294 The day_obs to sanitize. 

295 

296 Returns 

297 ------- 

298 day_obs : `int` 

299 The sanitized day_obs. 

300 

301 Raises 

302 ------ 

303 ValueError 

304 Raised if the day_obs fails to translate for any reason. 

305 """ 

306 if isinstance(day_obs, int): 

307 return day_obs 

308 elif isinstance(day_obs, str): 

309 try: 

310 return int(day_obs.replace("-", "")) 

311 except Exception: 

312 ValueError(f"Failed to sanitize {day_obs!r} to a day_obs") 

313 raise ValueError(f"Cannot sanitize {day_obs!r} to a day_obs") 

314 

315 

316def getMostRecentDayObs(butler: dafButler.Butler) -> int: 

317 """Get the most recent day_obs for which there is data. 

318 

319 Parameters 

320 ---------- 

321 butler : `lsst.daf.butler.Butler 

322 The butler to query. 

323 

324 Returns 

325 ------- 

326 day_obs : `int` 

327 The day_obs. 

328 """ 

329 where = "exposure.day_obs>=RECENT_DAY" 

330 records = butler.registry.queryDimensionRecords( 

331 "exposure", where=where, datasets="raw", bind={"RECENT_DAY": RECENT_DAY} 

332 ) 

333 recentDay = max(r.day_obs for r in records) 

334 _update_RECENT_DAY(recentDay) 

335 return recentDay 

336 

337 

338def getSeqNumsForDayObs(butler: dafButler.Butler, day_obs: int, extraWhere: str = "") -> list[int]: 

339 """Get a list of all seq_nums taken on a given day_obs. 

340 

341 Parameters 

342 ---------- 

343 butler : `lsst.daf.butler.Butler 

344 The butler to query. 

345 day_obs : `int` or `str` 

346 The day_obs for which the seq_nums are desired. 

347 extraWhere : `str` 

348 Any extra where conditions to add to the queryDimensionRecords call. 

349 

350 Returns 

351 ------- 

352 seq_nums : `list` of `int` 

353 The seq_nums taken on the corresponding day_obs in ascending numerical 

354 order. 

355 """ 

356 day_obs = sanitizeDayObs(day_obs) 

357 where = "exposure.day_obs=dayObs" 

358 if extraWhere: 

359 extraWhere = extraWhere.replace('"', "'") 

360 where += f" and {extraWhere}" 

361 records = butler.registry.queryDimensionRecords( 

362 "exposure", where=where, bind={"dayObs": day_obs}, datasets="raw" 

363 ) 

364 return sorted(set([r.seq_num for r in records])) 

365 

366 

367def sortRecordsByDayObsThenSeqNum( 

368 records: dafButler.registry.queries.DimensionRecordQueryResults | list[dafButler.DimensionRecord], 

369) -> list[dafButler.DimensionRecord]: 

370 """Sort a set of records by dayObs, then seqNum to get the order in which 

371 they were taken. 

372 

373 Parameters 

374 ---------- 

375 records : `list` of `dafButler.DimensionRecord` 

376 The records to be sorted. 

377 

378 Returns 

379 ------- 

380 sortedRecords : `list` of `dafButler.DimensionRecord` 

381 The sorted records 

382 

383 Raises 

384 ------ 

385 ValueError 

386 Raised if the recordSet contains duplicate records, or if it contains 

387 (dayObs, seqNum) collisions. 

388 """ 

389 records = list(records) # must call list in case we have a generator 

390 recordSet = set(records) 

391 if len(records) != len(recordSet): 

392 raise ValueError("Record set contains duplicate records and therefore cannot be sorted unambiguously") 

393 

394 daySeqTuples = [(r.day_obs, r.seq_num) for r in records] 

395 if len(daySeqTuples) != len(set(daySeqTuples)): 

396 raise ValueError( 

397 "Record set contains dayObs/seqNum collisions, and therefore cannot be sorted " "unambiguously" 

398 ) 

399 

400 records.sort(key=lambda r: (r.day_obs, r.seq_num)) 

401 return records 

402 

403 

404def getDaysWithData(butler: dafButler.Butler, datasetType: str = "raw") -> list[int]: 

405 """Get all the days for which LATISS has taken data on the mountain. 

406 

407 Parameters 

408 ---------- 

409 butler : `lsst.daf.butler.Butler 

410 The butler to query. 

411 datasetType : `str` 

412 The datasetType to query. 

413 

414 Returns 

415 ------- 

416 days : `list` of `int` 

417 A sorted list of the day_obs values for which mountain-top data exists. 

418 """ 

419 # 20200101 is a day between shipping LATISS and going on sky 

420 # We used to constrain on exposure.seq_num<50 to massively reduce the 

421 # number of returned records whilst being large enough to ensure that no 

422 # days are missed because early seq_nums were skipped. However, because 

423 # we have test datasets like LATISS-test-data-tts where we only kept 

424 # seqNums from 950 on one day, we can no longer assume this so don't be 

425 # tempted to add such a constraint back in here for speed. 

426 where = "exposure.day_obs>20200101" 

427 records = butler.registry.queryDimensionRecords("exposure", where=where, datasets=datasetType) 

428 return sorted(set([r.day_obs for r in records])) 

429 

430 

431def getMostRecentDataId(butler: dafButler.Butler) -> dict[str, Any]: 

432 """Get the dataId for the most recent observation. 

433 

434 Parameters 

435 ---------- 

436 butler : `lsst.daf.butler.Butler 

437 The butler to query. 

438 

439 Returns 

440 ------- 

441 dataId : `dict[str, Any]` 

442 The dataId of the most recent exposure. 

443 """ 

444 lastDay = getMostRecentDayObs(butler) 

445 seqNum = getSeqNumsForDayObs(butler, lastDay)[-1] 

446 dataId = {"day_obs": lastDay, "seq_num": seqNum, "detector": 0} 

447 dataId.update(getExpIdFromDayObsSeqNum(butler, dataId)) 

448 return dataId 

449 

450 

451def getExpIdFromDayObsSeqNum(butler: dafButler.Butler, dataId: dafButler.DataId) -> dict[str, int]: 

452 """Get the exposure id for the dataId. 

453 

454 Parameters 

455 ---------- 

456 butler : `lsst.daf.butler.Butler 

457 The butler to query. 

458 dataId : `dafButler.DataId` 

459 The dataId for which to return the exposure id. 

460 

461 Returns 

462 ------- 

463 dataId : `dict[str, int]` 

464 The dataId of the most recent exposure. 

465 """ 

466 expRecord = getExpRecordFromDataId(butler, dataId) 

467 return {"exposure": expRecord.id} 

468 

469 

470def updateDataIdOrDataCord(dataId: dafButler.DataId, **updateKwargs: Any) -> Mapping[str, Any]: 

471 """Add key, value pairs to a dataId or data coordinate. 

472 

473 Parameters 

474 ---------- 

475 dataId : `dafButler.DataId` 

476 The dataId for which to return the exposure id. 

477 updateKwargs : `dict[str, Any]` 

478 The key value pairs add to the dataId or dataCoord. 

479 

480 Returns 

481 ------- 

482 dataId : `Mapping[str, Any]` 

483 The updated dataId. 

484 

485 Notes 

486 ----- 

487 Always returns a dict, so note that if a data coordinate is supplied, a 

488 dict is returned, changing the type. 

489 """ 

490 newId = copy.copy(dataId) 

491 newId = _assureDict(newId) 

492 newId.update(updateKwargs) 

493 return newId 

494 

495 

496def fillDataId(butler: DirectButler, dataId: dafButler.DataId) -> Mapping[str, Any]: 

497 """Given a dataId, fill it with values for all available dimensions. 

498 

499 Parameters 

500 ---------- 

501 butler : `lsst.daf.butler.Butler` 

502 The butler. 

503 dataId : `dafButler.DataId` 

504 The dataId to fill. 

505 

506 Returns 

507 ------- 

508 dataId : `Mapping[str, Any]` 

509 The filled dataId. 

510 

511 Notes 

512 ----- 

513 This function is *slow*! Running this on 20,000 dataIds takes approximately 

514 7 minutes. Virtually all the slowdown is in the 

515 butler.registry.expandDataId() call though, so this wrapper is not to blame 

516 here, and might speed up in future with butler improvements. 

517 """ 

518 # ensure it's a dict to deal with records etc 

519 dictId = _assureDict(dataId) 

520 

521 # this removes extraneous keys that would trip up the registry call 

522 # using _rewrite_data_id is perhaps ever so slightly slower than popping 

523 # the bad keys, or making a minimal dataId by hand, but is more 

524 # reliable/general, so we choose that over the other approach here 

525 realDataId, _ = butler._rewrite_data_id(dictId, butler.get_dataset_type("raw")) 

526 

527 # now expand and turn back to a dict - this call is VERY slow 

528 expandedDictDataId = dict(butler.registry.expandDataId(realDataId, detector=0).mapping) 

529 

530 missingExpId = getExpId(expandedDictDataId) is None 

531 missingDayObs = getDayObs(expandedDictDataId) is None 

532 missingSeqNum = getSeqNum(expandedDictDataId) is None 

533 

534 if missingDayObs or missingSeqNum: 

535 dayObsSeqNum = getDayObsSeqNumFromExposureId(butler, expandedDictDataId) 

536 expandedDictDataId.update(dayObsSeqNum) 

537 

538 if missingExpId: 

539 expId = getExpIdFromDayObsSeqNum(butler, expandedDictDataId) 

540 expandedDictDataId.update(expId) 

541 

542 return expandedDictDataId 

543 

544 

545def _assureDict( 

546 dataId: dafButler.DataId | dafButler.DimensionRecord, 

547) -> dict[str, Any]: 

548 """Turn any data-identifier-like object into a dict. 

549 

550 Parameters 

551 ---------- 

552 dataId : `dafButler.DataId` or 

553 `lsst.daf.butler.dimensions.DimensionRecord` 

554 The data identifier. 

555 

556 Returns 

557 ------- 

558 dataId : `dict[str, Any]` 

559 The data identifier as a dict. 

560 """ 

561 if isinstance(dataId, dict): 

562 return dataId 

563 elif hasattr(dataId, "mapping"): # dafButler.DataCoordinate 

564 return {str(k): v for k, v in dataId.mapping.items()} 

565 elif hasattr(dataId, "dataId"): # dafButler.DimensionRecord 

566 return {str(k): v for k, v in dataId.dataId.mapping.items()} 

567 elif hasattr(dataId, "keys"): # some other mapping, assume str keys 

568 return dict(dataId) 

569 else: 

570 raise RuntimeError(f"Failed to coerce {type(dataId)} to dict") 

571 

572 

573def getExpRecordFromDataId(butler: dafButler.Butler, dataId: dafButler.DataId) -> dafButler.DimensionRecord: 

574 """Get the exposure record for a given dataId. 

575 

576 Parameters 

577 ---------- 

578 butler : `lsst.daf.butler.Butler` 

579 The butler. 

580 dataId : `dafButler.DataId` 

581 The dataId. 

582 

583 Returns 

584 ------- 

585 expRecord : `lsst.daf.butler.dimensions.DimensionRecord` 

586 The exposure record. 

587 """ 

588 dataId = _assureDict(dataId) 

589 assert isinstance(dataId, dict), f"dataId must be a dict or DimensionRecord, got {type(dataId)}" 

590 

591 if expId := getExpId(dataId): 

592 where = "exposure.id=expId" 

593 expRecords = butler.registry.queryDimensionRecords( 

594 "exposure", where=where, bind={"expId": expId}, datasets="raw" 

595 ) 

596 

597 else: 

598 dayObs = getDayObs(dataId) 

599 seqNum = getSeqNum(dataId) 

600 if not (dayObs and seqNum): 

601 raise RuntimeError(f"Failed to find either expId or day_obs and seq_num in dataId {dataId}") 

602 where = "exposure.day_obs=dayObs AND exposure.seq_num=seq_num" 

603 expRecords = butler.registry.queryDimensionRecords( 

604 "exposure", where=where, bind={"dayObs": dayObs, "seq_num": seqNum}, datasets="raw" 

605 ) 

606 

607 filteredExpRecords = set(expRecords) 

608 if not filteredExpRecords: 

609 raise LookupError(f"No exposure records found for {dataId}") 

610 assert len(filteredExpRecords) == 1, f"Found {len(filteredExpRecords)} exposure records for {dataId}" 

611 return filteredExpRecords.pop() 

612 

613 

614def getDayObsSeqNumFromExposureId(butler: dafButler.Butler, dataId: Mapping[str, Any]) -> dict[str, int]: 

615 """Get the day_obs and seq_num for an exposure id. 

616 

617 Parameters 

618 ---------- 

619 butler : `lsst.daf.butler.Butler` 

620 The butler. 

621 dataId : ` Mapping[str, Any]` 

622 The dataId containing the exposure id. 

623 

624 Returns 

625 ------- 

626 dataId : ` dict[str, int]` 

627 A dict containing only the day_obs and seq_num. 

628 """ 

629 if (dayObs := getDayObs(dataId)) and (seqNum := getSeqNum(dataId)): 

630 return {"day_obs": dayObs, "seq_num": seqNum} 

631 

632 if isinstance(dataId, int): 

633 dataId = {"exposure": dataId} 

634 else: 

635 dataId = _assureDict(dataId) 

636 assert isinstance(dataId, dict) 

637 

638 if not (expId := getExpId(dataId)): 

639 raise RuntimeError(f"Failed to find exposure id in {dataId}") 

640 

641 where = "exposure.id=expId" 

642 expRecords = list( 

643 butler.registry.queryDimensionRecords("exposure", where=where, bind={"expId": expId}, datasets="raw") 

644 ) 

645 uniqueExpRecords = set(expRecords) 

646 if not uniqueExpRecords: 

647 raise LookupError(f"No exposure records found for {dataId}") 

648 assert len(uniqueExpRecords) == 1, f"Found {len(uniqueExpRecords)} exposure records for {dataId}" 

649 record = uniqueExpRecords.pop() 

650 return {"day_obs": record.day_obs, "seq_num": record.seq_num} 

651 

652 

653def getDatasetRefForDataId( 

654 butler: dafButler.Butler, datasetType: str | dafButler.DatasetType, dataId: dict[str, Any] 

655) -> dafButler.DatasetRef | None: 

656 """Get the datasetReference for a dataId. 

657 

658 Parameters 

659 ---------- 

660 butler : `lsst.daf.butler.Butler` 

661 The butler. 

662 datasetType : `str` or `datasetType` 

663 The dataset type. 

664 dataId : `dict[str, Any]` 

665 The dataId. 

666 

667 Returns 

668 ------- 

669 datasetRef : `lsst.daf.butler.dimensions.DatasetReference` 

670 The dataset reference. 

671 """ 

672 if not _expid_present(dataId): 

673 assert _dayobs_present(dataId) and _seqnum_present(dataId) 

674 dataId.update(getExpIdFromDayObsSeqNum(butler, dataId)) 

675 

676 dRef = butler.find_dataset(datasetType, dataId) 

677 return dRef 

678 

679 

680def removeDataProduct( 

681 butler: dafButler.Butler, datasetType: str | dafButler.DatasetType, dataId: dict[str, Any] 

682) -> None: 

683 """Remove a data prodcut from the registry. Use with caution. 

684 

685 Parameters 

686 ---------- 

687 butler : `lsst.daf.butler.Butler` 

688 The butler. 

689 datasetType : `str` or `datasetType` 

690 The dataset type. 

691 dataId : `dict[str, Any]` 

692 The dataId. 

693 

694 """ 

695 if datasetType == "raw": 

696 raise RuntimeError("I'm sorry, Dave, I'm afraid I can't do that.") 

697 dRef = getDatasetRefForDataId(butler, datasetType, dataId) 

698 if dRef is None: 

699 return 

700 butler.pruneDatasets([dRef], disassociate=True, unstore=True, purge=True) 

701 return 

702 

703 

704def _dayobs_present(dataId: Mapping[str, Any]) -> bool: 

705 return _get_dayobs_key(dataId) is not None 

706 

707 

708def _seqnum_present(dataId: Mapping[str, Any]) -> bool: 

709 return _get_seqnum_key(dataId) is not None 

710 

711 

712def _expid_present(dataId: Mapping[str, Any]) -> bool: 

713 return _get_expid_key(dataId) is not None 

714 

715 

716def _get_dayobs_key(dataId: Mapping[str, Any]) -> str | None: 

717 """Return the key for day_obs if present, else None""" 

718 keys = [k for k in dataId.keys() if k.find("day_obs") != -1] 

719 if not keys: 

720 return None 

721 return keys[0] 

722 

723 

724def _get_seqnum_key(dataId: Mapping[str, Any]) -> str | None: 

725 """Return the key for seq_num if present, else None""" 

726 keys = [k for k in dataId.keys() if k.find("seq_num") != -1] 

727 if not keys: 

728 return None 

729 return keys[0] 

730 

731 

732def _get_expid_key(dataId: Mapping[str, Any]) -> str | None: 

733 """Return the key for expId if present, else None""" 

734 if "exposure.id" in dataId: 

735 return "exposure.id" 

736 elif "exposure" in dataId: 

737 return "exposure" 

738 return None 

739 

740 

741def getDayObs(dataId: Mapping[str, Any] | dafButler.DimensionRecord) -> int | None: 

742 """Get the day_obs from a dataId. 

743 

744 Parameters 

745 ---------- 

746 dataId : `Mapping[str, Any]` or `lsst.daf.butler.DimensionRecord` 

747 The dataId. 

748 

749 Returns 

750 ------- 

751 day_obs : `int` or `None` 

752 The day_obs value if present, else None. 

753 """ 

754 if hasattr(dataId, "day_obs"): 

755 return getattr(dataId, "day_obs") 

756 if not _dayobs_present(dataId): 

757 return None 

758 return dataId["day_obs"] if "day_obs" in dataId else dataId["exposure.day_obs"] 

759 

760 

761def getSeqNum(dataId: Mapping[str, Any] | dafButler.DimensionRecord) -> int | None: 

762 """Get the seq_num from a dataId. 

763 

764 Parameters 

765 ---------- 

766 dataId : `Mapping[str, Any]` or `lsst.daf.butler.DimensionRecord` 

767 The dataId. 

768 

769 Returns 

770 ------- 

771 seq_num : `int` or `None` 

772 The seq_num value if present, else None. 

773 """ 

774 if hasattr(dataId, "seq_num"): 

775 return getattr(dataId, "seq_num") 

776 if not _seqnum_present(dataId): 

777 return None 

778 return dataId["seq_num"] if "seq_num" in dataId else dataId["exposure.seq_num"] 

779 

780 

781def getExpId(dataId: Mapping[str, Any] | dafButler.DimensionRecord) -> int | None: 

782 """Get the expId from a dataId. 

783 

784 Parameters 

785 ---------- 

786 dataId : `Mapping[str, Any]` or `lsst.daf.butler.DimensionRecord` 

787 The dataId. 

788 

789 Returns 

790 ------- 

791 expId : `int` or `None` 

792 The expId value if present, else None. 

793 """ 

794 if hasattr(dataId, "id"): 

795 return getattr(dataId, "id") 

796 if not _expid_present(dataId): 

797 return None 

798 return dataId["exposure"] if "exposure" in dataId else dataId["exposure.id"] 

799 

800 

801def getLatissOnSkyDataIds( 

802 butler: DirectButler, 

803 skipTypes: Iterable[str] = ("bias", "dark", "flat"), 

804 checkObject: bool = True, 

805 full: bool = True, 

806 startDate: int | None = None, 

807 endDate: int | None = None, 

808) -> list[Mapping[str, int | str | None]]: 

809 """Get a list of all on-sky dataIds taken. 

810 

811 Parameters 

812 ---------- 

813 butler : `lsst.daf.butler.Butler` 

814 The butler. 

815 skipTypes : `list` of `str` 

816 Image types to exclude. 

817 checkObject : `bool` 

818 Check if the value of target_name (formerly OBJECT) is set and exlude 

819 if it is not. 

820 full : `bool` 

821 Return filled dataIds. Required for some analyses, but runs much 

822 (~30x) slower. 

823 startDate : `int` 

824 The day_obs to start at, inclusive. 

825 endDate : `int` 

826 The day_obs to end at, inclusive. 

827 

828 Returns 

829 ------- 

830 dataIds : `list` or `dataIds` 

831 The dataIds. 

832 """ 

833 

834 def isOnSky(expRecord: dafButler.DimensionRecord) -> bool: 

835 imageType = expRecord.observation_type 

836 obj = expRecord.target_name 

837 if checkObject and obj == "NOTSET": 

838 return False 

839 if imageType not in skipTypes: 

840 return True 

841 return False 

842 

843 recordSets = [] 

844 days = getDaysWithData(butler) 

845 if startDate: 

846 days = [d for d in days if d >= startDate] 

847 if endDate: 

848 days = [d for d in days if d <= endDate] 

849 days = sorted(set(days)) 

850 

851 where = "exposure.day_obs=dayObs" 

852 for day in days: 

853 # queryDataIds would be better here, but it's then hard/impossible 

854 # to do the filtering for which is on sky, so just take the dataIds 

855 records = butler.registry.queryDimensionRecords( 

856 "exposure", where=where, bind={"dayObs": day}, datasets="raw" 

857 ) 

858 recordSets.append(sortRecordsByDayObsThenSeqNum(records)) 

859 

860 dataIds = [r.dataId for r in filter(isOnSky, itertools.chain(*recordSets))] 

861 if full: 

862 expandedIds = [ 

863 updateDataIdOrDataCord(butler.registry.expandDataId(dataId, detector=0).mapping) 

864 for dataId in dataIds 

865 ] 

866 filledIds = [fillDataId(butler, dataId) for dataId in expandedIds] 

867 return filledIds 

868 else: 

869 return [updateDataIdOrDataCord(dataId, detector=0) for dataId in dataIds] 

870 

871 

872def getExpRecord( 

873 butler: dafButler.Butler, 

874 instrument: str, 

875 expId: int | None = None, 

876 dayObs: int | None = None, 

877 seqNum: int | None = None, 

878) -> dafButler.DimensionRecord: 

879 """Get the exposure record for a given exposure ID or dayObs+seqNum. 

880 

881 Parameters 

882 ---------- 

883 butler : `lsst.daf.butler.Butler` 

884 The butler. 

885 instrument : `str` 

886 The instrument name, e.g. 'LSSTCam'. 

887 expId : `int` 

888 The exposure ID. 

889 

890 Returns 

891 ------- 

892 expRecord : `lsst.daf.butler.DimensionRecord` 

893 The exposure record. 

894 """ 

895 if expId is None and (dayObs is None or seqNum is None): 

896 raise ValueError("Must supply either expId or (dayObs AND seqNum)") 

897 

898 where = "instrument=inst" # Note you can't use =instrument as bind-strings can't clash with dimensions 

899 bind: "dict[str, str | int]" = {"inst": instrument} 

900 if expId: 

901 where += " AND exposure.id=expId" 

902 bind.update({"expId": expId}) 

903 if dayObs and seqNum: 

904 where += " AND exposure.day_obs=dayObs AND exposure.seq_num=seqNum" 

905 bind.update({"dayObs": dayObs, "seqNum": seqNum}) 

906 

907 expRecords = butler.registry.queryDimensionRecords("exposure", where=where, bind=bind, datasets="raw") 

908 filteredExpRecords = list(set(expRecords)) # must call set as this may contain many duplicates 

909 if len(filteredExpRecords) != 1: 

910 raise RuntimeError( 

911 f"Failed to find unique exposure record for {instrument=} with" 

912 f" {expId=}, {dayObs=}, {seqNum=}, got {len(filteredExpRecords)} records" 

913 ) 

914 return filteredExpRecords[0]