Coverage for python/lsst/summit/utils/butlerUtils.py: 13%

240 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-20 10:33 +0000

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import lsst.daf.butler as dafButler 

23import itertools 

24import copy 

25 

26from lsst.summit.utils.utils import getSite 

27 

28 

29__all__ = ["makeDefaultLatissButler", 

30 "updateDataId", 

31 "sanitize_day_obs", 

32 "getMostRecentDayObs", 

33 "getSeqNumsForDayObs", 

34 "getMostRecentDataId", 

35 "getDatasetRefForDataId", 

36 "getDayObs", 

37 "getSeqNum", 

38 "getExpId", 

39 "datasetExists", 

40 "sortRecordsByDayObsThenSeqNum", 

41 "getDaysWithData", 

42 "getExpIdFromDayObsSeqNum", 

43 "updateDataIdOrDataCord", 

44 "fillDataId", 

45 "getExpRecordFromDataId", 

46 "getDayObsSeqNumFromExposureId", 

47 "removeDataProduct", 

48 "getLatissOnSkyDataIds", 

49 ] 

50 

51_LATISS_DEFAULT_COLLECTIONS = ['LATISS/raw/all', 'LATISS/calib', "LATISS/runs/quickLook"] 

52 

53# RECENT_DAY must be in the past *and have data* (otherwise some tests are 

54# no-ops), to speed up queries by restricting them significantly, 

55# but data must definitely been taken since. Should 

56# also not be more than 2 months in the past due to 60 day lookback time on the 

57# summit. All this means it should be updated by an informed human. 

58RECENT_DAY = 20220503 

59 

60 

61def _configureForSite(): 

62 try: 

63 site = getSite() 

64 except ValueError: 

65 # this method is run automatically on module import, so 

66 # don't fail for k8s where this cannot yet be determined 

67 print("WARNING: failed to automatically determine site") 

68 site = None 

69 

70 if site == 'tucson': 70 ↛ 72line 70 didn't jump to line 72, because the condition on line 70 was never true

71 global RECENT_DAY 

72 RECENT_DAY = 20211104 # TTS has limited data, so use this day 

73 

74 

75_configureForSite() 

76 

77 

78def getLatissDefaultCollections(): 

79 """Get the default set of LATISS collections, updated for the site at 

80 which the code is being run. 

81 

82 Returns 

83 ------- 

84 collections : `list` of `str` 

85 The default collections for the site. 

86 """ 

87 collections = _LATISS_DEFAULT_COLLECTIONS 

88 try: 

89 site = getSite() 

90 except ValueError: 

91 site = '' 

92 

93 if site == 'tucson': 

94 collections.append("LATISS-test-data") 

95 return collections 

96 if site == 'summit': 

97 collections.append("LATISS-test-data") 

98 return collections 

99 return collections 

100 

101 

102def _update_RECENT_DAY(day): 

103 """Update the value for RECENT_DAY once we have a value for free.""" 

104 global RECENT_DAY 

105 RECENT_DAY = max(day-1, RECENT_DAY) 

106 

107 

108def makeDefaultLatissButler(*, extraCollections=None, writeable=False, embargo=False): 

109 """Create a butler for LATISS using the default collections. 

110 

111 Parameters 

112 ---------- 

113 extraCollections : `list` of `str` 

114 Extra input collections to supply to the butler init. 

115 writable : `bool`, optional 

116 Whether to make a writable butler. 

117 embargo : `bool`, optional 

118 Use the embargo repo instead of the main one. Needed to access 

119 embargoed data. 

120 

121 Returns 

122 ------- 

123 butler : `lsst.daf.butler.Butler` 

124 The butler. 

125 """ 

126 # TODO: Add logging to which collections are going in 

127 collections = getLatissDefaultCollections() 

128 if extraCollections: 

129 collections.extend(extraCollections) 

130 try: 

131 repoString = "LATISS" if not embargo else "/repo/embargo" 

132 butler = dafButler.Butler(repoString, 

133 collections=collections, 

134 writeable=writeable, 

135 instrument='LATISS') 

136 except(FileNotFoundError, RuntimeError): 

137 # Depending on the value of DAF_BUTLER_REPOSITORY_INDEX and whether 

138 # it is present and blank, or just not set, both these exception 

139 # types can be raised, see tests/test_butlerUtils.py:ButlerInitTestCase 

140 # for details and tests which confirm these have not changed 

141 raise FileNotFoundError # unify exception type 

142 return butler 

143 

144 

145# TODO: DM-32940 can remove this whole function once this ticket merges. 

146def datasetExists(butler, dataProduct, dataId, **kwargs): 

147 """Collapse the tri-state behaviour of butler.datasetExists to a boolean. 

148 

149 Parameters 

150 ---------- 

151 butler : `lsst.daf.butler.Butler` 

152 The butler 

153 dataProduct : `str` 

154 The type of data product to check for 

155 dataId : `dict` 

156 The dataId of the dataProduct to check for 

157 

158 Returns 

159 ------- 

160 exists : `bool` 

161 True if the dataProduct exists for the dataProduct and can be retreived 

162 else False. 

163 """ 

164 try: 

165 exists = butler.datasetExists(dataProduct, dataId, **kwargs) 

166 return exists 

167 except (LookupError, RuntimeError): 

168 return False 

169 

170 

171def updateDataId(dataId, **kwargs): 

172 """Update a DataCoordinate or dataId dict with kwargs. 

173 

174 Provides a single interface for adding the detector key (or others) to a 

175 dataId whether it's a DataCoordinate or a dict 

176 

177 Parameters 

178 ---------- 

179 dataId : `dict` or `lsst.daf.butler.DataCoordinate` 

180 The dataId to update. 

181 kwargs : `dict` 

182 The keys and values to add to the dataId. 

183 

184 Returns 

185 ------- 

186 dataId : `dict` or `lsst.daf.butler.DataCoordinate` 

187 The updated dataId, with the same type as the input. 

188 """ 

189 

190 match dataId: 

191 case dafButler.DataCoordinate(): 

192 return dafButler.DataCoordinate.standardize(dataId, **kwargs) 

193 case dict() as dataId: 

194 return dict(dataId, **kwargs) 

195 raise ValueError(f"Unknown dataId type {type(dataId)}") 

196 

197 

198def sanitize_day_obs(day_obs): 

199 """Take string or int day_obs and turn it into the int version. 

200 

201 Parameters 

202 ---------- 

203 day_obs : `str` or `int` 

204 The day_obs to sanitize. 

205 

206 Returns 

207 ------- 

208 day_obs : `int` 

209 The sanitized day_obs. 

210 

211 Raises 

212 ------ 

213 ValueError 

214 Raised if the day_obs fails to translate for any reason. 

215 """ 

216 if isinstance(day_obs, int): 

217 return day_obs 

218 elif isinstance(day_obs, str): 

219 try: 

220 return int(day_obs.replace('-', '')) 

221 except Exception: 

222 ValueError(f'Failed to sanitize {day_obs!r} to a day_obs') 

223 else: 

224 raise ValueError(f'Cannot sanitize {day_obs!r} to a day_obs') 

225 

226 

227def getMostRecentDayObs(butler): 

228 """Get the most recent day_obs for which there is data. 

229 

230 Parameters 

231 ---------- 

232 butler : `lsst.daf.butler.Butler 

233 The butler to query. 

234 

235 Returns 

236 ------- 

237 day_obs : `int` 

238 The day_obs. 

239 """ 

240 where = "exposure.day_obs>=RECENT_DAY" 

241 records = butler.registry.queryDimensionRecords('exposure', where=where, datasets='raw', 

242 bind={'RECENT_DAY': RECENT_DAY}) 

243 recentDay = max(r.day_obs for r in records) 

244 _update_RECENT_DAY(recentDay) 

245 return recentDay 

246 

247 

248def getSeqNumsForDayObs(butler, day_obs, extraWhere=''): 

249 """Get a list of all seq_nums taken on a given day_obs. 

250 

251 Parameters 

252 ---------- 

253 butler : `lsst.daf.butler.Butler 

254 The butler to query. 

255 day_obs : `int` or `str` 

256 The day_obs for which the seq_nums are desired. 

257 extraWhere : `str` 

258 Any extra where conditions to add to the queryDimensionRecords call. 

259 

260 Returns 

261 ------- 

262 seq_nums : `iterable` 

263 The seq_nums taken on the corresponding day_obs in ascending numerical 

264 order. 

265 """ 

266 day_obs = sanitize_day_obs(day_obs) 

267 where = "exposure.day_obs=day_obs" 

268 if extraWhere: 

269 extraWhere = extraWhere.replace('"', '\'') 

270 where += f" and {extraWhere}" 

271 records = butler.registry.queryDimensionRecords("exposure", 

272 where=where, 

273 bind={'day_obs': day_obs}, 

274 datasets='raw') 

275 return sorted([r.seq_num for r in records]) 

276 

277 

278def sortRecordsByDayObsThenSeqNum(records): 

279 """Sort a set of records by dayObs, then seqNum to get the order in which 

280 they were taken. 

281 

282 Parameters 

283 ---------- 

284 records : `list` of `dict` 

285 The records to be sorted. 

286 

287 Returns 

288 ------- 

289 sortedRecords : `list` of `dict` 

290 The sorted records 

291 

292 Raises 

293 ------ 

294 ValueError 

295 Raised if the recordSet contains duplicate records, or if it contains 

296 (dayObs, seqNum) collisions. 

297 """ 

298 records = list(records) # must call list in case we have a generator 

299 recordSet = set(records) 

300 if len(records) != len(recordSet): 

301 raise ValueError("Record set contains duplicate records and therefore cannot be sorted unambiguously") 

302 

303 daySeqTuples = [(r.day_obs, r.seq_num) for r in records] 

304 if len(daySeqTuples) != len(set(daySeqTuples)): 

305 raise ValueError("Record set contains dayObs/seqNum collisions, and therefore cannot be sorted " 

306 "unambiguously") 

307 

308 records.sort(key=lambda r: (r.day_obs, r.seq_num)) 

309 return records 

310 

311 

312def getDaysWithData(butler, datasetType='raw'): 

313 """Get all the days for which LATISS has taken data on the mountain. 

314 

315 Parameters 

316 ---------- 

317 butler : `lsst.daf.butler.Butler 

318 The butler to query. 

319 datasetType : `str` 

320 The datasetType to query. 

321 

322 Returns 

323 ------- 

324 days : `list` of `int` 

325 A sorted list of the day_obs values for which mountain-top data exists. 

326 """ 

327 # 20200101 is a day between shipping LATISS and going on sky 

328 # We used to constrain on exposure.seq_num<50 to massively reduce the 

329 # number of returned records whilst being large enough to ensure that no 

330 # days are missed because early seq_nums were skipped. However, because 

331 # we have test datasets like LATISS-test-data-tts where we only kept 

332 # seqNums from 950 on one day, we can no longer assume this so don't be 

333 # tempted to add such a constraint back in here for speed. 

334 where = "exposure.day_obs>20200101" 

335 records = butler.registry.queryDimensionRecords("exposure", where=where, datasets=datasetType) 

336 return sorted(set([r.day_obs for r in records])) 

337 

338 

339def getMostRecentDataId(butler): 

340 """Get the dataId for the most recent observation. 

341 

342 Parameters 

343 ---------- 

344 butler : `lsst.daf.butler.Butler 

345 The butler to query. 

346 

347 Returns 

348 ------- 

349 dataId : `dict` 

350 The dataId of the most recent exposure. 

351 """ 

352 lastDay = getMostRecentDayObs(butler) 

353 seqNum = getSeqNumsForDayObs(butler, lastDay)[-1] 

354 dataId = {'day_obs': lastDay, 'seq_num': seqNum, 'detector': 0} 

355 dataId.update(getExpIdFromDayObsSeqNum(butler, dataId)) 

356 return dataId 

357 

358 

359def getExpIdFromDayObsSeqNum(butler, dataId): 

360 """Get the exposure id for the dataId. 

361 

362 Parameters 

363 ---------- 

364 butler : `lsst.daf.butler.Butler 

365 The butler to query. 

366 dataId : `dict` 

367 The dataId for which to return the exposure id. 

368 

369 Returns 

370 ------- 

371 dataId : `dict` 

372 The dataId of the most recent exposure. 

373 """ 

374 expRecord = getExpRecordFromDataId(butler, dataId) 

375 return {'exposure': expRecord.id} 

376 

377 

378def updateDataIdOrDataCord(dataId, **updateKwargs): 

379 """Add key, value pairs to a dataId or data coordinate. 

380 

381 Parameters 

382 ---------- 

383 dataId : `dict` 

384 The dataId for which to return the exposure id. 

385 updateKwargs : `dict` 

386 The key value pairs add to the dataId or dataCoord. 

387 

388 Returns 

389 ------- 

390 dataId : `dict` 

391 The updated dataId. 

392 

393 Notes 

394 ----- 

395 Always returns a dict, so note that if a data coordinate is supplied, a 

396 dict is returned, changing the type. 

397 """ 

398 newId = copy.copy(dataId) 

399 newId = _assureDict(newId) 

400 newId.update(updateKwargs) 

401 return newId 

402 

403 

404def fillDataId(butler, dataId): 

405 """Given a dataId, fill it with values for all available dimensions. 

406 

407 Parameters 

408 ---------- 

409 butler : `lsst.daf.butler.Butler` 

410 The butler. 

411 dataId : `dict` 

412 The dataId to fill. 

413 

414 Returns 

415 ------- 

416 dataId : `dict` 

417 The filled dataId. 

418 

419 Notes 

420 ----- 

421 This function is *slow*! Running this on 20,000 dataIds takes approximately 

422 7 minutes. Virtually all the slowdown is in the 

423 butler.registry.expandDataId() call though, so this wrapper is not to blame 

424 here, and might speed up in future with butler improvements. 

425 """ 

426 # ensure it's a dict to deal with records etc 

427 dataId = _assureDict(dataId) 

428 

429 # this removes extraneous keys that would trip up the registry call 

430 # using _rewrite_data_id is perhaps ever so slightly slower than popping 

431 # the bad keys, or making a minimal dataId by hand, but is more 

432 # reliable/general, so we choose that over the other approach here 

433 dataId, _ = butler._rewrite_data_id(dataId, butler.registry.getDatasetType('raw')) 

434 

435 # now expand and turn back to a dict 

436 dataId = butler.registry.expandDataId(dataId, detector=0).full # this call is VERY slow 

437 dataId = _assureDict(dataId) 

438 

439 missingExpId = getExpId(dataId) is None 

440 missingDayObs = getDayObs(dataId) is None 

441 missingSeqNum = getSeqNum(dataId) is None 

442 

443 if missingDayObs or missingSeqNum: 

444 dayObsSeqNum = getDayObsSeqNumFromExposureId(butler, dataId) 

445 dataId.update(dayObsSeqNum) 

446 

447 if missingExpId: 

448 expId = getExpIdFromDayObsSeqNum(butler, dataId) 

449 dataId.update(expId) 

450 

451 return dataId 

452 

453 

454def _assureDict(dataId): 

455 """Turn any data-identifier-like object into a dict. 

456 

457 Parameters 

458 ---------- 

459 dataId : `dict` or `lsst.daf.butler.dimensions.DataCoordinate` or 

460 `lsst.daf.butler.dimensions.DimensionRecord` 

461 The data identifier. 

462 

463 Returns 

464 ------- 

465 dataId : `dict` 

466 The data identifier as a dict. 

467 """ 

468 if isinstance(dataId, dict): 

469 return dataId 

470 elif hasattr(dataId, 'items'): # dafButler.dimensions.DataCoordinate 

471 return {str(k): v for k, v in dataId.items()} # str() required due to full names 

472 elif hasattr(dataId, 'dataId'): # dafButler.dimensions.DimensionRecord 

473 return {str(k): v for k, v in dataId.dataId.items()} 

474 else: 

475 raise RuntimeError(f'Failed to coerce {type(dataId)} to dict') 

476 

477 

478def getExpRecordFromDataId(butler, dataId): 

479 """Get the exposure record for a given dataId. 

480 

481 Parameters 

482 ---------- 

483 butler : `lsst.daf.butler.Butler` 

484 The butler. 

485 dataId : `dict` 

486 The dataId. 

487 

488 Returns 

489 ------- 

490 expRecord : `lsst.daf.butler.dimensions.ExposureRecord` 

491 The exposure record. 

492 """ 

493 dataId = _assureDict(dataId) 

494 assert isinstance(dataId, dict), f'dataId must be a dict or DimensionRecord, got {type(dataId)}' 

495 

496 if expId := getExpId(dataId): 

497 where = "exposure.id=expId" 

498 expRecords = butler.registry.queryDimensionRecords("exposure", 

499 where=where, 

500 bind={'expId': expId}, 

501 datasets='raw') 

502 

503 else: 

504 dayObs = getDayObs(dataId) 

505 seqNum = getSeqNum(dataId) 

506 if not (dayObs and seqNum): 

507 raise RuntimeError(f'Failed to find either expId or day_obs and seq_num in dataId {dataId}') 

508 where = "exposure.day_obs=day_obs AND exposure.seq_num=seq_num" 

509 expRecords = butler.registry.queryDimensionRecords("exposure", 

510 where=where, 

511 bind={'day_obs': dayObs, 'seq_num': seqNum}, 

512 datasets='raw') 

513 

514 expRecords = set(expRecords) 

515 if not expRecords: 

516 raise LookupError(f"No exposure records found for {dataId}") 

517 assert len(expRecords) == 1, f'Found {len(expRecords)} exposure records for {dataId}' 

518 return expRecords.pop() 

519 

520 

521def getDayObsSeqNumFromExposureId(butler, dataId): 

522 """Get the day_obs and seq_num for an exposure id. 

523 

524 Parameters 

525 ---------- 

526 butler : `lsst.daf.butler.Butler` 

527 The butler. 

528 dataId : `dict` 

529 The dataId containing the exposure id. 

530 

531 Returns 

532 ------- 

533 dataId : `dict` 

534 A dict containing only the day_obs and seq_num. 

535 """ 

536 if (dayObs := getDayObs(dataId)) and (seqNum := getSeqNum(dataId)): 

537 return {'day_obs': dayObs, 'seq_num': seqNum} 

538 

539 if isinstance(dataId, int): 

540 dataId = {'exposure': dataId} 

541 else: 

542 dataId = _assureDict(dataId) 

543 assert isinstance(dataId, dict) 

544 

545 if not (expId := getExpId(dataId)): 

546 raise RuntimeError(f'Failed to find exposure id in {dataId}') 

547 

548 where = "exposure.id=expId" 

549 expRecords = butler.registry.queryDimensionRecords("exposure", 

550 where=where, 

551 bind={'expId': expId}, 

552 datasets='raw') 

553 expRecords = set(expRecords) 

554 if not expRecords: 

555 raise LookupError(f"No exposure records found for {dataId}") 

556 assert len(expRecords) == 1, f'Found {len(expRecords)} exposure records for {dataId}' 

557 record = expRecords.pop() 

558 return {'day_obs': record.day_obs, 'seq_num': record.seq_num} 

559 

560 

561def getDatasetRefForDataId(butler, datasetType, dataId): 

562 """Get the datasetReference for a dataId. 

563 

564 Parameters 

565 ---------- 

566 butler : `lsst.daf.butler.Butler` 

567 The butler. 

568 datasetType : `str` or `datasetType` 

569 The dataset type. 

570 dataId : `dict` 

571 The dataId. 

572 

573 Returns 

574 ------- 

575 datasetRef : `lsst.daf.butler.dimensions.DatasetReference` 

576 The dataset reference. 

577 """ 

578 if not _expid_present(dataId): 

579 assert _dayobs_present(dataId) and _seqnum_present(dataId) 

580 dataId.update(getExpIdFromDayObsSeqNum(butler, dataId)) 

581 

582 dRef = butler.registry.findDataset(datasetType, dataId) 

583 return dRef 

584 

585 

586def removeDataProduct(butler, datasetType, dataId): 

587 """Remove a data prodcut from the registry. Use with caution. 

588 

589 Parameters 

590 ---------- 

591 butler : `lsst.daf.butler.Butler` 

592 The butler. 

593 datasetType : `str` or `datasetType` 

594 The dataset type. 

595 dataId : `dict` 

596 The dataId. 

597 

598 """ 

599 if datasetType == 'raw': 

600 raise RuntimeError("I'm sorry, Dave, I'm afraid I can't do that.") 

601 dRef = getDatasetRefForDataId(butler, datasetType, dataId) 

602 butler.pruneDatasets([dRef], disassociate=True, unstore=True, purge=True) 

603 return 

604 

605 

606def _dayobs_present(dataId): 

607 return _get_dayobs_key(dataId) is not None 

608 

609 

610def _seqnum_present(dataId): 

611 return _get_seqnum_key(dataId) is not None 

612 

613 

614def _expid_present(dataId): 

615 return _get_expid_key(dataId) is not None 

616 

617 

618def _get_dayobs_key(dataId): 

619 """Return the key for day_obs if present, else None 

620 """ 

621 keys = [k for k in dataId.keys() if k.find('day_obs') != -1] 

622 if not keys: 

623 return None 

624 return keys[0] 

625 

626 

627def _get_seqnum_key(dataId): 

628 """Return the key for seq_num if present, else None 

629 """ 

630 keys = [k for k in dataId.keys() if k.find('seq_num') != -1] 

631 if not keys: 

632 return None 

633 return keys[0] 

634 

635 

636def _get_expid_key(dataId): 

637 """Return the key for expId if present, else None 

638 """ 

639 if 'exposure.id' in dataId: 

640 return 'exposure.id' 

641 elif 'exposure' in dataId: 

642 return 'exposure' 

643 return None 

644 

645 

646def getDayObs(dataId): 

647 """Get the day_obs from a dataId. 

648 

649 Parameters 

650 ---------- 

651 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

652 The dataId. 

653 

654 Returns 

655 ------- 

656 day_obs : `int` or `None` 

657 The day_obs value if present, else None. 

658 """ 

659 if hasattr(dataId, 'day_obs'): 

660 return getattr(dataId, 'day_obs') 

661 if not _dayobs_present(dataId): 

662 return None 

663 return dataId['day_obs'] if 'day_obs' in dataId else dataId['exposure.day_obs'] 

664 

665 

666def getSeqNum(dataId): 

667 """Get the seq_num from a dataId. 

668 

669 Parameters 

670 ---------- 

671 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

672 The dataId. 

673 

674 Returns 

675 ------- 

676 seq_num : `int` or `None` 

677 The seq_num value if present, else None. 

678 """ 

679 if hasattr(dataId, 'seq_num'): 

680 return getattr(dataId, 'seq_num') 

681 if not _seqnum_present(dataId): 

682 return None 

683 return dataId['seq_num'] if 'seq_num' in dataId else dataId['exposure.seq_num'] 

684 

685 

686def getExpId(dataId): 

687 """Get the expId from a dataId. 

688 

689 Parameters 

690 ---------- 

691 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

692 The dataId. 

693 

694 Returns 

695 ------- 

696 expId : `int` or `None` 

697 The expId value if present, else None. 

698 """ 

699 if hasattr(dataId, 'id'): 

700 return getattr(dataId, 'id') 

701 if not _expid_present(dataId): 

702 return None 

703 return dataId['exposure'] if 'exposure' in dataId else dataId['exposure.id'] 

704 

705 

706def getLatissOnSkyDataIds(butler, skipTypes=('bias', 'dark', 'flat'), checkObject=True, full=True, 

707 startDate=None, endDate=None): 

708 """Get a list of all on-sky dataIds taken. 

709 

710 Parameters 

711 ---------- 

712 butler : `lsst.daf.butler.Butler` 

713 The butler. 

714 skipTypes : `list` of `str` 

715 Image types to exclude. 

716 checkObject : `bool` 

717 Check if the value of target_name (formerly OBJECT) is set and exlude 

718 if it is not. 

719 full : `bool` 

720 Return filled dataIds. Required for some analyses, but runs much 

721 (~30x) slower. 

722 startDate : `int` 

723 The day_obs to start at, inclusive. 

724 endDate : `int` 

725 The day_obs to end at, inclusive. 

726 

727 Returns 

728 ------- 

729 dataIds : `list` or `dataIds` 

730 The dataIds. 

731 """ 

732 def isOnSky(expRecord): 

733 imageType = expRecord.observation_type 

734 obj = expRecord.target_name 

735 if checkObject and obj == 'NOTSET': 

736 return False 

737 if imageType not in skipTypes: 

738 return True 

739 return False 

740 

741 recordSets = [] 

742 days = getDaysWithData(butler) 

743 if startDate: 

744 days = [d for d in days if d >= startDate] 

745 if endDate: 

746 days = [d for d in days if d <= endDate] 

747 days = sorted(set(days)) 

748 

749 where = "exposure.day_obs=day_obs" 

750 for day in days: 

751 # queryDataIds would be better here, but it's then hard/impossible 

752 # to do the filtering for which is on sky, so just take the dataIds 

753 records = butler.registry.queryDimensionRecords("exposure", 

754 where=where, 

755 bind={'day_obs': day}, 

756 datasets='raw') 

757 recordSets.append(sortRecordsByDayObsThenSeqNum(records)) 

758 

759 dataIds = [r.dataId for r in filter(isOnSky, itertools.chain(*recordSets))] 

760 if full: 

761 expandedIds = [updateDataIdOrDataCord(butler.registry.expandDataId(dataId, detector=0).full) 

762 for dataId in dataIds] 

763 filledIds = [fillDataId(butler, dataId) for dataId in expandedIds] 

764 return filledIds 

765 else: 

766 return [updateDataIdOrDataCord(dataId, detector=0) for dataId in dataIds]