Coverage for python/lsst/summit/utils/butlerUtils.py: 13%

240 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-14 04:32 -0800

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import lsst.daf.butler as dafButler 

23import itertools 

24import copy 

25 

26from lsst.summit.utils.utils import getSite 

27 

28 

29__all__ = ["makeDefaultLatissButler", 

30 "updateDataId", 

31 "sanitize_day_obs", 

32 "getMostRecentDayObs", 

33 "getSeqNumsForDayObs", 

34 "getMostRecentDataId", 

35 "getDatasetRefForDataId", 

36 "getDayObs", 

37 "getSeqNum", 

38 "getExpId", 

39 "datasetExists", 

40 "sortRecordsByDayObsThenSeqNum", 

41 "getDaysWithData", 

42 "getExpIdFromDayObsSeqNum", 

43 "updateDataIdOrDataCord", 

44 "fillDataId", 

45 "getExpRecordFromDataId", 

46 "getDayObsSeqNumFromExposureId", 

47 "removeDataProduct", 

48 "getLatissOnSkyDataIds", 

49 ] 

50 

51_LATISS_DEFAULT_COLLECTIONS = ['LATISS/raw/all', 'LATISS/calib', "LATISS/runs/quickLook"] 

52 

53# RECENT_DAY must be in the past *and have data* (otherwise some tests are 

54# no-ops), to speed up queries by restricting them significantly, 

55# but data must definitely been taken since. Should 

56# also not be more than 2 months in the past due to 60 day lookback time on the 

57# summit. All this means it should be updated by an informed human. 

58RECENT_DAY = 20220503 

59 

60 

61def _configureForSite(): 

62 try: 

63 site = getSite() 

64 except ValueError: 

65 # this method is run automatically on module import, so 

66 # don't fail for k8s where this cannot yet be determined 

67 print("WARNING: failed to automatically determine site") 

68 site = None 

69 

70 if site == 'tucson': 70 ↛ 72line 70 didn't jump to line 72, because the condition on line 70 was never true

71 global RECENT_DAY 

72 RECENT_DAY = 20211104 # TTS has limited data, so use this day 

73 

74 

75_configureForSite() 

76 

77 

78def getLatissDefaultCollections(): 

79 """Get the default set of LATISS collections, updated for the site at 

80 which the code is being run. 

81 

82 Returns 

83 ------- 

84 collections : `list` of `str` 

85 The default collections for the site. 

86 """ 

87 collections = _LATISS_DEFAULT_COLLECTIONS 

88 try: 

89 site = getSite() 

90 except ValueError: 

91 site = '' 

92 

93 if site == 'tucson': 

94 collections.append("LATISS-test-data-tts") 

95 return collections 

96 if site == 'summit': 

97 collections.append("LATISS_test_data") 

98 return collections 

99 return collections 

100 

101 

102def _update_RECENT_DAY(day): 

103 """Update the value for RECENT_DAY once we have a value for free.""" 

104 global RECENT_DAY 

105 RECENT_DAY = max(day-1, RECENT_DAY) 

106 

107 

108def makeDefaultLatissButler(*, extraCollections=None, writeable=False, embargo=False): 

109 """Create a butler for LATISS using the default collections. 

110 

111 Parameters 

112 ---------- 

113 extraCollections : `list` of `str` 

114 Extra input collections to supply to the butler init. 

115 writable : `bool`, optional 

116 Whether to make a writable butler. 

117 embargo : `bool`, optional 

118 Use the embargo repo instead of the main one. Needed to access 

119 embargoed data. 

120 

121 Returns 

122 ------- 

123 butler : `lsst.daf.butler.Butler` 

124 The butler. 

125 """ 

126 # TODO: Add logging to which collections are going in 

127 collections = getLatissDefaultCollections() 

128 if extraCollections: 

129 collections.extend(extraCollections) 

130 try: 

131 repoString = "LATISS" if not embargo else "/repo/embargo" 

132 butler = dafButler.Butler(repoString, 

133 collections=collections, 

134 writeable=writeable, 

135 instrument='LATISS') 

136 except(FileNotFoundError, RuntimeError): 

137 # Depending on the value of DAF_BUTLER_REPOSITORY_INDEX and whether 

138 # it is present and blank, or just not set, both these exception 

139 # types can be raised, see tests/test_butlerUtils.py:ButlerInitTestCase 

140 # for details and tests which confirm these have not changed 

141 raise FileNotFoundError # unify exception type 

142 return butler 

143 

144 

145# TODO: DM-32940 can remove this whole function once this ticket merges. 

146def datasetExists(butler, dataProduct, dataId, **kwargs): 

147 """Collapse the tri-state behaviour of butler.datasetExists to a boolean. 

148 

149 Parameters 

150 ---------- 

151 butler : `lsst.daf.butler.Butler` 

152 The butler 

153 dataProduct : `str` 

154 The type of data product to check for 

155 dataId : `dict` 

156 The dataId of the dataProduct to check for 

157 

158 Returns 

159 ------- 

160 exists : `bool` 

161 True if the dataProduct exists for the dataProduct and can be retreived 

162 else False. 

163 """ 

164 try: 

165 exists = butler.datasetExists(dataProduct, dataId, **kwargs) 

166 return exists 

167 except (LookupError, RuntimeError): 

168 return False 

169 

170 

171def updateDataId(dataId, **kwargs): 

172 """Update a DataCoordinate or dataId dict with kwargs. 

173 

174 Provides a single interface for adding the detector key (or others) to a 

175 dataId whether it's a DataCoordinate or a dict 

176 

177 Parameters 

178 ---------- 

179 dataId : `dict` or `lsst.daf.butler.DataCoordinate` 

180 The dataId to update. 

181 kwargs : `dict` 

182 The keys and values to add to the dataId. 

183 

184 Returns 

185 ------- 

186 dataId : `dict` or `lsst.daf.butler.DataCoordinate` 

187 The updated dataId, with the same type as the input. 

188 """ 

189 

190 match dataId: 

191 case dafButler.DataCoordinate(): 

192 return dafButler.DataCoordinate.standardize(dataId, **kwargs) 

193 case dict() as dataId: 

194 return dict(dataId, **kwargs) 

195 raise ValueError(f"Unknown dataId type {type(dataId)}") 

196 

197 

198def sanitize_day_obs(day_obs): 

199 """Take string or int day_obs and turn it into the int version. 

200 

201 Parameters 

202 ---------- 

203 day_obs : `str` or `int` 

204 The day_obs to sanitize. 

205 

206 Returns 

207 ------- 

208 day_obs : `int` 

209 The sanitized day_obs. 

210 

211 Raises 

212 ------ 

213 ValueError 

214 Raised if the day_obs fails to translate for any reason. 

215 """ 

216 if isinstance(day_obs, int): 

217 return day_obs 

218 elif isinstance(day_obs, str): 

219 try: 

220 return int(day_obs.replace('-', '')) 

221 except Exception: 

222 ValueError(f'Failed to sanitize {day_obs!r} to a day_obs') 

223 else: 

224 raise ValueError(f'Cannot sanitize {day_obs!r} to a day_obs') 

225 

226 

227def getMostRecentDayObs(butler): 

228 """Get the most recent day_obs for which there is data. 

229 

230 Parameters 

231 ---------- 

232 butler : `lsst.daf.butler.Butler 

233 The butler to query. 

234 

235 Returns 

236 ------- 

237 day_obs : `int` 

238 The day_obs. 

239 """ 

240 where = "exposure.day_obs>=RECENT_DAY" 

241 records = butler.registry.queryDimensionRecords('exposure', where=where, datasets='raw', 

242 bind={'RECENT_DAY': RECENT_DAY}) 

243 recentDay = max(r.day_obs for r in records) 

244 _update_RECENT_DAY(recentDay) 

245 return recentDay 

246 

247 

248def getSeqNumsForDayObs(butler, day_obs, extraWhere=''): 

249 """Get a list of all seq_nums taken on a given day_obs. 

250 

251 Parameters 

252 ---------- 

253 butler : `lsst.daf.butler.Butler 

254 The butler to query. 

255 day_obs : `int` or `str` 

256 The day_obs for which the seq_nums are desired. 

257 extraWhere : `str` 

258 Any extra where conditions to add to the queryDimensionRecords call. 

259 

260 Returns 

261 ------- 

262 seq_nums : `iterable` 

263 The seq_nums taken on the corresponding day_obs in ascending numerical 

264 order. 

265 """ 

266 day_obs = sanitize_day_obs(day_obs) 

267 where = "exposure.day_obs=day_obs" 

268 if extraWhere: 

269 extraWhere = extraWhere.replace('"', '\'') 

270 where += f" and {extraWhere}" 

271 records = butler.registry.queryDimensionRecords("exposure", 

272 where=where, 

273 bind={'day_obs': day_obs}, 

274 datasets='raw') 

275 return sorted([r.seq_num for r in records]) 

276 

277 

278def sortRecordsByDayObsThenSeqNum(records): 

279 """Sort a set of records by dayObs, then seqNum to get the order in which 

280 they were taken. 

281 

282 Parameters 

283 ---------- 

284 records : `list` of `dict` 

285 The records to be sorted. 

286 

287 Returns 

288 ------- 

289 sortedRecords : `list` of `dict` 

290 The sorted records 

291 

292 Raises 

293 ------ 

294 ValueError 

295 Raised if the recordSet contains duplicate records, or if it contains 

296 (dayObs, seqNum) collisions. 

297 """ 

298 records = list(records) # must call list in case we have a generator 

299 recordSet = set(records) 

300 if len(records) != len(recordSet): 

301 raise ValueError("Record set contains duplicate records and therefore cannot be sorted unambiguously") 

302 

303 daySeqTuples = [(r.day_obs, r.seq_num) for r in records] 

304 if len(daySeqTuples) != len(set(daySeqTuples)): 

305 raise ValueError("Record set contains dayObs/seqNum collisions, and therefore cannot be sorted " 

306 "unambiguously") 

307 

308 records.sort(key=lambda r: (r.day_obs, r.seq_num)) 

309 return records 

310 

311 

312def getDaysWithData(butler): 

313 """Get all the days for which LATISS has taken data on the mountain. 

314 

315 Parameters 

316 ---------- 

317 butler : `lsst.daf.butler.Butler 

318 The butler to query. 

319 

320 Returns 

321 ------- 

322 days : `list` of `int` 

323 A sorted list of the day_obs values for which mountain-top data exists. 

324 """ 

325 # 20200101 is a day between shipping LATISS and going on sky 

326 # We used to constrain on exposure.seq_num<50 to massively reduce the 

327 # number of returned records whilst being large enough to ensure that no 

328 # days are missed because early seq_nums were skipped. However, because 

329 # we have test datasets like LATISS-test-data-tts where we only kept 

330 # seqNums from 950 on one day, we can no longer assume this so don't be 

331 # tempted to add such a constraint back in here for speed. 

332 where = "exposure.day_obs>20200101" 

333 records = butler.registry.queryDimensionRecords("exposure", where=where, datasets='raw') 

334 return sorted(set([r.day_obs for r in records])) 

335 

336 

337def getMostRecentDataId(butler): 

338 """Get the dataId for the most recent observation. 

339 

340 Parameters 

341 ---------- 

342 butler : `lsst.daf.butler.Butler 

343 The butler to query. 

344 

345 Returns 

346 ------- 

347 dataId : `dict` 

348 The dataId of the most recent exposure. 

349 """ 

350 lastDay = getMostRecentDayObs(butler) 

351 seqNum = getSeqNumsForDayObs(butler, lastDay)[-1] 

352 dataId = {'day_obs': lastDay, 'seq_num': seqNum, 'detector': 0} 

353 dataId.update(getExpIdFromDayObsSeqNum(butler, dataId)) 

354 return dataId 

355 

356 

357def getExpIdFromDayObsSeqNum(butler, dataId): 

358 """Get the exposure id for the dataId. 

359 

360 Parameters 

361 ---------- 

362 butler : `lsst.daf.butler.Butler 

363 The butler to query. 

364 dataId : `dict` 

365 The dataId for which to return the exposure id. 

366 

367 Returns 

368 ------- 

369 dataId : `dict` 

370 The dataId of the most recent exposure. 

371 """ 

372 expRecord = getExpRecordFromDataId(butler, dataId) 

373 return {'exposure': expRecord.id} 

374 

375 

376def updateDataIdOrDataCord(dataId, **updateKwargs): 

377 """Add key, value pairs to a dataId or data coordinate. 

378 

379 Parameters 

380 ---------- 

381 dataId : `dict` 

382 The dataId for which to return the exposure id. 

383 updateKwargs : `dict` 

384 The key value pairs add to the dataId or dataCoord. 

385 

386 Returns 

387 ------- 

388 dataId : `dict` 

389 The updated dataId. 

390 

391 Notes 

392 ----- 

393 Always returns a dict, so note that if a data coordinate is supplied, a 

394 dict is returned, changing the type. 

395 """ 

396 newId = copy.copy(dataId) 

397 newId = _assureDict(newId) 

398 newId.update(updateKwargs) 

399 return newId 

400 

401 

402def fillDataId(butler, dataId): 

403 """Given a dataId, fill it with values for all available dimensions. 

404 

405 Parameters 

406 ---------- 

407 butler : `lsst.daf.butler.Butler` 

408 The butler. 

409 dataId : `dict` 

410 The dataId to fill. 

411 

412 Returns 

413 ------- 

414 dataId : `dict` 

415 The filled dataId. 

416 

417 Notes 

418 ----- 

419 This function is *slow*! Running this on 20,000 dataIds takes approximately 

420 7 minutes. Virtually all the slowdown is in the 

421 butler.registry.expandDataId() call though, so this wrapper is not to blame 

422 here, and might speed up in future with butler improvements. 

423 """ 

424 # ensure it's a dict to deal with records etc 

425 dataId = _assureDict(dataId) 

426 

427 # this removes extraneous keys that would trip up the registry call 

428 # using _rewrite_data_id is perhaps ever so slightly slower than popping 

429 # the bad keys, or making a minimal dataId by hand, but is more 

430 # reliable/general, so we choose that over the other approach here 

431 dataId, _ = butler._rewrite_data_id(dataId, butler.registry.getDatasetType('raw')) 

432 

433 # now expand and turn back to a dict 

434 dataId = butler.registry.expandDataId(dataId, detector=0).full # this call is VERY slow 

435 dataId = _assureDict(dataId) 

436 

437 missingExpId = getExpId(dataId) is None 

438 missingDayObs = getDayObs(dataId) is None 

439 missingSeqNum = getSeqNum(dataId) is None 

440 

441 if missingDayObs or missingSeqNum: 

442 dayObsSeqNum = getDayObsSeqNumFromExposureId(butler, dataId) 

443 dataId.update(dayObsSeqNum) 

444 

445 if missingExpId: 

446 expId = getExpIdFromDayObsSeqNum(butler, dataId) 

447 dataId.update(expId) 

448 

449 return dataId 

450 

451 

452def _assureDict(dataId): 

453 """Turn any data-identifier-like object into a dict. 

454 

455 Parameters 

456 ---------- 

457 dataId : `dict` or `lsst.daf.butler.dimensions.DataCoordinate` or 

458 `lsst.daf.butler.dimensions.DimensionRecord` 

459 The data identifier. 

460 

461 Returns 

462 ------- 

463 dataId : `dict` 

464 The data identifier as a dict. 

465 """ 

466 if isinstance(dataId, dict): 

467 return dataId 

468 elif hasattr(dataId, 'items'): # dafButler.dimensions.DataCoordinate 

469 return {str(k): v for k, v in dataId.items()} # str() required due to full names 

470 elif hasattr(dataId, 'dataId'): # dafButler.dimensions.DimensionRecord 

471 return {str(k): v for k, v in dataId.dataId.items()} 

472 else: 

473 raise RuntimeError(f'Failed to coerce {type(dataId)} to dict') 

474 

475 

476def getExpRecordFromDataId(butler, dataId): 

477 """Get the exposure record for a given dataId. 

478 

479 Parameters 

480 ---------- 

481 butler : `lsst.daf.butler.Butler` 

482 The butler. 

483 dataId : `dict` 

484 The dataId. 

485 

486 Returns 

487 ------- 

488 expRecord : `lsst.daf.butler.dimensions.ExposureRecord` 

489 The exposure record. 

490 """ 

491 dataId = _assureDict(dataId) 

492 assert isinstance(dataId, dict), f'dataId must be a dict or DimensionRecord, got {type(dataId)}' 

493 

494 if expId := getExpId(dataId): 

495 where = "exposure.id=expId" 

496 expRecords = butler.registry.queryDimensionRecords("exposure", 

497 where=where, 

498 bind={'expId': expId}, 

499 datasets='raw') 

500 

501 else: 

502 dayObs = getDayObs(dataId) 

503 seqNum = getSeqNum(dataId) 

504 if not (dayObs and seqNum): 

505 raise RuntimeError(f'Failed to find either expId or day_obs and seq_num in dataId {dataId}') 

506 where = "exposure.day_obs=day_obs AND exposure.seq_num=seq_num" 

507 expRecords = butler.registry.queryDimensionRecords("exposure", 

508 where=where, 

509 bind={'day_obs': dayObs, 'seq_num': seqNum}, 

510 datasets='raw') 

511 

512 expRecords = set(expRecords) 

513 if not expRecords: 

514 raise LookupError(f"No exposure records found for {dataId}") 

515 assert len(expRecords) == 1, f'Found {len(expRecords)} exposure records for {dataId}' 

516 return expRecords.pop() 

517 

518 

519def getDayObsSeqNumFromExposureId(butler, dataId): 

520 """Get the day_obs and seq_num for an exposure id. 

521 

522 Parameters 

523 ---------- 

524 butler : `lsst.daf.butler.Butler` 

525 The butler. 

526 dataId : `dict` 

527 The dataId containing the exposure id. 

528 

529 Returns 

530 ------- 

531 dataId : `dict` 

532 A dict containing only the day_obs and seq_num. 

533 """ 

534 if (dayObs := getDayObs(dataId)) and (seqNum := getSeqNum(dataId)): 

535 return {'day_obs': dayObs, 'seq_num': seqNum} 

536 

537 if isinstance(dataId, int): 

538 dataId = {'exposure': dataId} 

539 else: 

540 dataId = _assureDict(dataId) 

541 assert isinstance(dataId, dict) 

542 

543 if not (expId := getExpId(dataId)): 

544 raise RuntimeError(f'Failed to find exposure id in {dataId}') 

545 

546 where = "exposure.id=expId" 

547 expRecords = butler.registry.queryDimensionRecords("exposure", 

548 where=where, 

549 bind={'expId': expId}, 

550 datasets='raw') 

551 expRecords = set(expRecords) 

552 if not expRecords: 

553 raise LookupError(f"No exposure records found for {dataId}") 

554 assert len(expRecords) == 1, f'Found {len(expRecords)} exposure records for {dataId}' 

555 record = expRecords.pop() 

556 return {'day_obs': record.day_obs, 'seq_num': record.seq_num} 

557 

558 

559def getDatasetRefForDataId(butler, datasetType, dataId): 

560 """Get the datasetReference for a dataId. 

561 

562 Parameters 

563 ---------- 

564 butler : `lsst.daf.butler.Butler` 

565 The butler. 

566 datasetType : `str` or `datasetType` 

567 The dataset type. 

568 dataId : `dict` 

569 The dataId. 

570 

571 Returns 

572 ------- 

573 datasetRef : `lsst.daf.butler.dimensions.DatasetReference` 

574 The dataset reference. 

575 """ 

576 if not _expid_present(dataId): 

577 assert _dayobs_present(dataId) and _seqnum_present(dataId) 

578 dataId.update(getExpIdFromDayObsSeqNum(butler, dataId)) 

579 

580 dRef = butler.registry.findDataset(datasetType, dataId) 

581 return dRef 

582 

583 

584def removeDataProduct(butler, datasetType, dataId): 

585 """Remove a data prodcut from the registry. Use with caution. 

586 

587 Parameters 

588 ---------- 

589 butler : `lsst.daf.butler.Butler` 

590 The butler. 

591 datasetType : `str` or `datasetType` 

592 The dataset type. 

593 dataId : `dict` 

594 The dataId. 

595 

596 """ 

597 if datasetType == 'raw': 

598 raise RuntimeError("I'm sorry, Dave, I'm afraid I can't do that.") 

599 dRef = getDatasetRefForDataId(butler, datasetType, dataId) 

600 butler.pruneDatasets([dRef], disassociate=True, unstore=True, purge=True) 

601 return 

602 

603 

604def _dayobs_present(dataId): 

605 return _get_dayobs_key(dataId) is not None 

606 

607 

608def _seqnum_present(dataId): 

609 return _get_seqnum_key(dataId) is not None 

610 

611 

612def _expid_present(dataId): 

613 return _get_expid_key(dataId) is not None 

614 

615 

616def _get_dayobs_key(dataId): 

617 """Return the key for day_obs if present, else None 

618 """ 

619 keys = [k for k in dataId.keys() if k.find('day_obs') != -1] 

620 if not keys: 

621 return None 

622 return keys[0] 

623 

624 

625def _get_seqnum_key(dataId): 

626 """Return the key for seq_num if present, else None 

627 """ 

628 keys = [k for k in dataId.keys() if k.find('seq_num') != -1] 

629 if not keys: 

630 return None 

631 return keys[0] 

632 

633 

634def _get_expid_key(dataId): 

635 """Return the key for expId if present, else None 

636 """ 

637 if 'exposure.id' in dataId: 

638 return 'exposure.id' 

639 elif 'exposure' in dataId: 

640 return 'exposure' 

641 return None 

642 

643 

644def getDayObs(dataId): 

645 """Get the day_obs from a dataId. 

646 

647 Parameters 

648 ---------- 

649 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

650 The dataId. 

651 

652 Returns 

653 ------- 

654 day_obs : `int` or `None` 

655 The day_obs value if present, else None. 

656 """ 

657 if hasattr(dataId, 'day_obs'): 

658 return getattr(dataId, 'day_obs') 

659 if not _dayobs_present(dataId): 

660 return None 

661 return dataId['day_obs'] if 'day_obs' in dataId else dataId['exposure.day_obs'] 

662 

663 

664def getSeqNum(dataId): 

665 """Get the seq_num from a dataId. 

666 

667 Parameters 

668 ---------- 

669 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

670 The dataId. 

671 

672 Returns 

673 ------- 

674 seq_num : `int` or `None` 

675 The seq_num value if present, else None. 

676 """ 

677 if hasattr(dataId, 'seq_num'): 

678 return getattr(dataId, 'seq_num') 

679 if not _seqnum_present(dataId): 

680 return None 

681 return dataId['seq_num'] if 'seq_num' in dataId else dataId['exposure.seq_num'] 

682 

683 

684def getExpId(dataId): 

685 """Get the expId from a dataId. 

686 

687 Parameters 

688 ---------- 

689 dataId : `dict` or `lsst.daf.butler.DimensionRecord` 

690 The dataId. 

691 

692 Returns 

693 ------- 

694 expId : `int` or `None` 

695 The expId value if present, else None. 

696 """ 

697 if hasattr(dataId, 'id'): 

698 return getattr(dataId, 'id') 

699 if not _expid_present(dataId): 

700 return None 

701 return dataId['exposure'] if 'exposure' in dataId else dataId['exposure.id'] 

702 

703 

704def getLatissOnSkyDataIds(butler, skipTypes=('bias', 'dark', 'flat'), checkObject=True, full=True, 

705 startDate=None, endDate=None): 

706 """Get a list of all on-sky dataIds taken. 

707 

708 Parameters 

709 ---------- 

710 butler : `lsst.daf.butler.Butler` 

711 The butler. 

712 skipTypes : `list` of `str` 

713 Image types to exclude. 

714 checkObject : `bool` 

715 Check if the value of target_name (formerly OBJECT) is set and exlude 

716 if it is not. 

717 full : `bool` 

718 Return filled dataIds. Required for some analyses, but runs much 

719 (~30x) slower. 

720 startDate : `int` 

721 The day_obs to start at, inclusive. 

722 endDate : `int` 

723 The day_obs to end at, inclusive. 

724 

725 Returns 

726 ------- 

727 dataIds : `list` or `dataIds` 

728 The dataIds. 

729 """ 

730 def isOnSky(expRecord): 

731 imageType = expRecord.observation_type 

732 obj = expRecord.target_name 

733 if checkObject and obj == 'NOTSET': 

734 return False 

735 if imageType not in skipTypes: 

736 return True 

737 return False 

738 

739 recordSets = [] 

740 days = getDaysWithData(butler) 

741 if startDate: 

742 days = [d for d in days if d >= startDate] 

743 if endDate: 

744 days = [d for d in days if d <= endDate] 

745 days = sorted(set(days)) 

746 

747 where = "exposure.day_obs=day_obs" 

748 for day in days: 

749 # queryDataIds would be better here, but it's then hard/impossible 

750 # to do the filtering for which is on sky, so just take the dataIds 

751 records = butler.registry.queryDimensionRecords("exposure", 

752 where=where, 

753 bind={'day_obs': day}, 

754 datasets='raw') 

755 recordSets.append(sortRecordsByDayObsThenSeqNum(records)) 

756 

757 dataIds = [r.dataId for r in filter(isOnSky, itertools.chain(*recordSets))] 

758 if full: 

759 expandedIds = [updateDataIdOrDataCord(butler.registry.expandDataId(dataId, detector=0).full) 

760 for dataId in dataIds] 

761 filledIds = [fillDataId(butler, dataId) for dataId in expandedIds] 

762 return filledIds 

763 else: 

764 return [updateDataIdOrDataCord(dataId, detector=0) for dataId in dataIds]