Coverage for python / lsst / summit / utils / butlerUtils.py: 14%
281 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 09:02 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 09:02 +0000
1# This file is part of summit_utils.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import copy
23import itertools
24import logging
25from collections.abc import Iterable, Mapping
26from typing import Any
28from deprecated.sphinx import deprecated
30import lsst.daf.butler as dafButler
31from lsst.daf.butler.direct_butler import DirectButler
32from lsst.summit.utils.utils import getSite
33from lsst.utils.iteration import ensure_iterable
35__all__ = [
36 "makeDefaultLatissButler", # deprecated
37 "makeDefaultButler",
38 "updateDataId",
39 "sanitizeDayObs",
40 "getMostRecentDayObs",
41 "getSeqNumsForDayObs",
42 "getMostRecentDataId",
43 "getDatasetRefForDataId",
44 "getDayObs",
45 "getSeqNum",
46 "getExpId",
47 "datasetExists", # deprecated
48 "sortRecordsByDayObsThenSeqNum",
49 "getDaysWithData",
50 "getExpIdFromDayObsSeqNum",
51 "updateDataIdOrDataCord",
52 "fillDataId",
53 "getExpRecordFromDataId",
54 "getDayObsSeqNumFromExposureId",
55 "removeDataProduct",
56 "getLatissOnSkyDataIds",
57 "getExpRecord",
58]
60_LATISS_DEFAULT_COLLECTIONS = ["LATISS/raw/all", "LATISS/calib", "LATISS/runs/quickLook"]
62# RECENT_DAY must be in the past *and have data* (otherwise some tests are
63# no-ops), to speed up queries by restricting them significantly,
64# but data must definitely been taken since. Should
65# also not be more than 2 months in the past due to 60 day lookback time on the
66# summit. All this means it should be updated by an informed human.
67RECENT_DAY = 20220503
70def _configureForSite() -> None:
71 try:
72 site = getSite()
73 except ValueError:
74 # this method is run automatically on module import, so
75 # don't fail for k8s where this cannot yet be determined
76 print("WARNING: failed to automatically determine site")
77 site = None
78 if site == "tucson": 78 ↛ 80line 78 didn't jump to line 80 because the condition on line 78 was never true
79 global RECENT_DAY
80 RECENT_DAY = 20211104 # TTS has limited data, so use this day
83_configureForSite()
86def getLatissDefaultCollections() -> list[str]:
87 """Get the default set of LATISS collections, updated for the site at
88 which the code is being run.
90 Returns
91 -------
92 collections : `list` of `str`
93 The default collections for the site.
94 """
95 collections = _LATISS_DEFAULT_COLLECTIONS
96 try:
97 site = getSite()
98 except ValueError:
99 site = ""
101 if site == "tucson":
102 collections.append("LATISS-test-data")
103 return collections
104 if site == "summit":
105 collections.append("LATISS-test-data")
106 return collections
107 return collections
110def _update_RECENT_DAY(day: int) -> None:
111 """Update the value for RECENT_DAY once we have a value for free."""
112 global RECENT_DAY
113 RECENT_DAY = max(day - 1, RECENT_DAY)
116@deprecated(
117 reason="Use the more generic makeDefaultButler('LATISS'). Will be removed after v28.0.",
118 version="v27.0",
119 category=FutureWarning,
120)
121def makeDefaultLatissButler(
122 *,
123 extraCollections: list[str] | None = None,
124 writeable: bool = False,
125 embargo: bool = False,
126) -> dafButler.Butler:
127 """Create a butler for LATISS using the default collections.
129 Parameters
130 ----------
131 extraCollections : `list` of `str`
132 Extra input collections to supply to the butler init.
133 writable : `bool`, optional
134 Whether to make a writable butler.
135 embargo : `bool`, optional
136 Use the embargo repo instead of the main one. Needed to access
137 embargoed data.
139 Returns
140 -------
141 butler : `lsst.daf.butler.Butler`
142 The butler.
143 """
144 return makeDefaultButler(
145 "LATISS", extraCollections=extraCollections, writeable=writeable, embargo=embargo
146 )
149def makeDefaultButler(
150 instrument: str,
151 *,
152 extraCollections: list[str] | None = None,
153 writeable: bool = False,
154 embargo: bool = True,
155) -> dafButler.Butler:
156 """Create a butler for the instrument using default collections, regardless
157 of the location.
159 Parameters
160 ----------
161 extraCollections : `list` of `str`
162 Extra input collections to supply to the butler init.
163 writable : `bool`, optional
164 Whether to make a writable butler.
165 embargo : `bool`, optional
166 Use the embargo repo instead of the main one. Needed to access
167 embargoed data if not at a summit-like location.
169 Returns
170 -------
171 butler : `lsst.daf.butler.Butler`
172 The butler.
174 Raises
175 ------
176 FileNotFoundError
177 Raised if the butler cannot be created, because this is the error
178 when the DAF_BUTLER_REPOSITORY_INDEX is not set correctly.
179 """
180 SUPPORTED_SITES = [
181 "summit",
182 "tucson",
183 "base",
184 "staff-rsp",
185 "rubin-devl",
186 "usdf-k8s",
187 ]
189 site = getSite()
190 if site not in SUPPORTED_SITES:
191 # This might look like a slightly weird error to raise, but this is
192 # the same error that's raised when the DAF_BUTLER_REPOSITORY_INDEX
193 # isn't set, so it's the most appropriate error to raise here, i.e.
194 # this is what would be raised if this function was allowed to continue
195 # only this is a less confusing version of it.
196 raise FileNotFoundError(f"Default butler creation only available at: {SUPPORTED_SITES}, got {site=}")
198 summitLike = site in ["summit", "tucson", "base"]
199 if summitLike:
200 if embargo is True:
201 logger = logging.getLogger(__name__)
202 logger.debug("embargo option is irrelevant on the summit, ignoring")
203 embargo = False # there's only one repo too, so this makes the code more simple too
205 collections: list[str] = [f"{instrument}/defaults"]
206 raCollection = (
207 [f"{instrument}/runs/quickLook"] if summitLike else [f"{instrument}/runs/nightlyValidation"]
208 )
209 collections.extend(raCollection)
210 if instrument == "LSSTCam":
211 collections.append("LSSTCam/raw/guider")
213 repo = instrument if embargo is False else "embargo"
215 if extraCollections is not None:
216 assert extraCollections is not None # just for mypy
217 extraCollectionsList = ensure_iterable(extraCollections)
218 collections.extend(extraCollectionsList)
220 try:
221 butler = dafButler.Butler.from_config(
222 repo, collections=collections, writeable=writeable, instrument=instrument
223 )
224 except (FileNotFoundError, RuntimeError):
225 # Depending on the value of DAF_BUTLER_REPOSITORY_INDEX and whether
226 # it is present and blank, or just not set, both these exception
227 # types can be raised, see tests/test_butlerUtils.py:ButlerInitTestCase
228 # for details and tests which confirm these have not changed
229 raise FileNotFoundError # unify exception type
230 return butler
233@deprecated(
234 reason="datasExists has been replaced by Butler.exists(). Will be removed after v26.0.",
235 version="v26.0",
236 category=FutureWarning,
237)
238def datasetExists(
239 butler: dafButler.Butler, dataProduct: str, dataId: dafButler.DataId, **kwargs: Any
240) -> bool:
241 """Collapse the tri-state behaviour of butler.datasetExists to a boolean.
243 Parameters
244 ----------
245 butler : `lsst.daf.butler.Butler`
246 The butler
247 dataProduct : `str`
248 The type of data product to check for
249 dataId : `dafButler.DataId`
250 The dataId of the dataProduct to check for
252 Returns
253 -------
254 exists : `bool`
255 True if the dataProduct exists for the dataProduct and can be retreived
256 else False.
257 """
258 return bool(butler.exists(dataProduct, dataId, **kwargs))
261def updateDataId(dataId: dafButler.DataId, **kwargs: Any) -> dafButler.DataId:
262 """Update a DataCoordinate or dataId dict with kwargs.
264 Provides a single interface for adding the detector key (or others) to a
265 dataId whether it's a DataCoordinate or a dict
267 Parameters
268 ----------
269 dataId : `dafButler.DataId`
270 The dataId to update.
271 kwargs : `dict`
272 The keys and values to add to the dataId.
274 Returns
275 -------
276 dataId : `dict` or `lsst.daf.butler.DataCoordinate`
277 The updated dataId, with the same type as the input.
278 """
280 match dataId:
281 case dafButler.DataCoordinate():
282 return dafButler.DataCoordinate.standardize(dataId, **kwargs)
283 case dict() as dataId:
284 return dict(dataId, **kwargs)
285 raise ValueError(f"Unknown dataId type {type(dataId)}")
288def sanitizeDayObs(day_obs: int | str) -> int:
289 """Take string or int day_obs and turn it into the int version.
291 Parameters
292 ----------
293 day_obs : `str` or `int`
294 The day_obs to sanitize.
296 Returns
297 -------
298 day_obs : `int`
299 The sanitized day_obs.
301 Raises
302 ------
303 ValueError
304 Raised if the day_obs fails to translate for any reason.
305 """
306 if isinstance(day_obs, int):
307 return day_obs
308 elif isinstance(day_obs, str):
309 try:
310 return int(day_obs.replace("-", ""))
311 except Exception:
312 ValueError(f"Failed to sanitize {day_obs!r} to a day_obs")
313 raise ValueError(f"Cannot sanitize {day_obs!r} to a day_obs")
316def getMostRecentDayObs(butler: dafButler.Butler) -> int:
317 """Get the most recent day_obs for which there is data.
319 Parameters
320 ----------
321 butler : `lsst.daf.butler.Butler
322 The butler to query.
324 Returns
325 -------
326 day_obs : `int`
327 The day_obs.
328 """
329 where = "exposure.day_obs>=RECENT_DAY"
330 records = butler.registry.queryDimensionRecords(
331 "exposure", where=where, datasets="raw", bind={"RECENT_DAY": RECENT_DAY}
332 )
333 recentDay = max(r.day_obs for r in records)
334 _update_RECENT_DAY(recentDay)
335 return recentDay
338def getSeqNumsForDayObs(butler: dafButler.Butler, day_obs: int, extraWhere: str = "") -> list[int]:
339 """Get a list of all seq_nums taken on a given day_obs.
341 Parameters
342 ----------
343 butler : `lsst.daf.butler.Butler
344 The butler to query.
345 day_obs : `int` or `str`
346 The day_obs for which the seq_nums are desired.
347 extraWhere : `str`
348 Any extra where conditions to add to the queryDimensionRecords call.
350 Returns
351 -------
352 seq_nums : `list` of `int`
353 The seq_nums taken on the corresponding day_obs in ascending numerical
354 order.
355 """
356 day_obs = sanitizeDayObs(day_obs)
357 where = "exposure.day_obs=dayObs"
358 if extraWhere:
359 extraWhere = extraWhere.replace('"', "'")
360 where += f" and {extraWhere}"
361 records = butler.registry.queryDimensionRecords(
362 "exposure", where=where, bind={"dayObs": day_obs}, datasets="raw"
363 )
364 return sorted(set([r.seq_num for r in records]))
367def sortRecordsByDayObsThenSeqNum(
368 records: dafButler.registry.queries.DimensionRecordQueryResults | list[dafButler.DimensionRecord],
369) -> list[dafButler.DimensionRecord]:
370 """Sort a set of records by dayObs, then seqNum to get the order in which
371 they were taken.
373 Parameters
374 ----------
375 records : `list` of `dafButler.DimensionRecord`
376 The records to be sorted.
378 Returns
379 -------
380 sortedRecords : `list` of `dafButler.DimensionRecord`
381 The sorted records
383 Raises
384 ------
385 ValueError
386 Raised if the recordSet contains duplicate records, or if it contains
387 (dayObs, seqNum) collisions.
388 """
389 records = list(records) # must call list in case we have a generator
390 recordSet = set(records)
391 if len(records) != len(recordSet):
392 raise ValueError("Record set contains duplicate records and therefore cannot be sorted unambiguously")
394 daySeqTuples = [(r.day_obs, r.seq_num) for r in records]
395 if len(daySeqTuples) != len(set(daySeqTuples)):
396 raise ValueError(
397 "Record set contains dayObs/seqNum collisions, and therefore cannot be sorted " "unambiguously"
398 )
400 records.sort(key=lambda r: (r.day_obs, r.seq_num))
401 return records
404def getDaysWithData(butler: dafButler.Butler, datasetType: str = "raw") -> list[int]:
405 """Get all the days for which LATISS has taken data on the mountain.
407 Parameters
408 ----------
409 butler : `lsst.daf.butler.Butler
410 The butler to query.
411 datasetType : `str`
412 The datasetType to query.
414 Returns
415 -------
416 days : `list` of `int`
417 A sorted list of the day_obs values for which mountain-top data exists.
418 """
419 # 20200101 is a day between shipping LATISS and going on sky
420 # We used to constrain on exposure.seq_num<50 to massively reduce the
421 # number of returned records whilst being large enough to ensure that no
422 # days are missed because early seq_nums were skipped. However, because
423 # we have test datasets like LATISS-test-data-tts where we only kept
424 # seqNums from 950 on one day, we can no longer assume this so don't be
425 # tempted to add such a constraint back in here for speed.
426 where = "exposure.day_obs>20200101"
427 records = butler.registry.queryDimensionRecords("exposure", where=where, datasets=datasetType)
428 return sorted(set([r.day_obs for r in records]))
431def getMostRecentDataId(butler: dafButler.Butler) -> dict[str, Any]:
432 """Get the dataId for the most recent observation.
434 Parameters
435 ----------
436 butler : `lsst.daf.butler.Butler
437 The butler to query.
439 Returns
440 -------
441 dataId : `dict[str, Any]`
442 The dataId of the most recent exposure.
443 """
444 lastDay = getMostRecentDayObs(butler)
445 seqNum = getSeqNumsForDayObs(butler, lastDay)[-1]
446 dataId = {"day_obs": lastDay, "seq_num": seqNum, "detector": 0}
447 dataId.update(getExpIdFromDayObsSeqNum(butler, dataId))
448 return dataId
451def getExpIdFromDayObsSeqNum(butler: dafButler.Butler, dataId: dafButler.DataId) -> dict[str, int]:
452 """Get the exposure id for the dataId.
454 Parameters
455 ----------
456 butler : `lsst.daf.butler.Butler
457 The butler to query.
458 dataId : `dafButler.DataId`
459 The dataId for which to return the exposure id.
461 Returns
462 -------
463 dataId : `dict[str, int]`
464 The dataId of the most recent exposure.
465 """
466 expRecord = getExpRecordFromDataId(butler, dataId)
467 return {"exposure": expRecord.id}
470def updateDataIdOrDataCord(dataId: dafButler.DataId, **updateKwargs: Any) -> Mapping[str, Any]:
471 """Add key, value pairs to a dataId or data coordinate.
473 Parameters
474 ----------
475 dataId : `dafButler.DataId`
476 The dataId for which to return the exposure id.
477 updateKwargs : `dict[str, Any]`
478 The key value pairs add to the dataId or dataCoord.
480 Returns
481 -------
482 dataId : `Mapping[str, Any]`
483 The updated dataId.
485 Notes
486 -----
487 Always returns a dict, so note that if a data coordinate is supplied, a
488 dict is returned, changing the type.
489 """
490 newId = copy.copy(dataId)
491 newId = _assureDict(newId)
492 newId.update(updateKwargs)
493 return newId
496def fillDataId(butler: DirectButler, dataId: dafButler.DataId) -> Mapping[str, Any]:
497 """Given a dataId, fill it with values for all available dimensions.
499 Parameters
500 ----------
501 butler : `lsst.daf.butler.Butler`
502 The butler.
503 dataId : `dafButler.DataId`
504 The dataId to fill.
506 Returns
507 -------
508 dataId : `Mapping[str, Any]`
509 The filled dataId.
511 Notes
512 -----
513 This function is *slow*! Running this on 20,000 dataIds takes approximately
514 7 minutes. Virtually all the slowdown is in the
515 butler.registry.expandDataId() call though, so this wrapper is not to blame
516 here, and might speed up in future with butler improvements.
517 """
518 # ensure it's a dict to deal with records etc
519 dictId = _assureDict(dataId)
521 # this removes extraneous keys that would trip up the registry call
522 # using _rewrite_data_id is perhaps ever so slightly slower than popping
523 # the bad keys, or making a minimal dataId by hand, but is more
524 # reliable/general, so we choose that over the other approach here
525 realDataId, _ = butler._rewrite_data_id(dictId, butler.get_dataset_type("raw"))
527 # now expand and turn back to a dict - this call is VERY slow
528 expandedDictDataId = dict(butler.registry.expandDataId(realDataId, detector=0).mapping)
530 missingExpId = getExpId(expandedDictDataId) is None
531 missingDayObs = getDayObs(expandedDictDataId) is None
532 missingSeqNum = getSeqNum(expandedDictDataId) is None
534 if missingDayObs or missingSeqNum:
535 dayObsSeqNum = getDayObsSeqNumFromExposureId(butler, expandedDictDataId)
536 expandedDictDataId.update(dayObsSeqNum)
538 if missingExpId:
539 expId = getExpIdFromDayObsSeqNum(butler, expandedDictDataId)
540 expandedDictDataId.update(expId)
542 return expandedDictDataId
545def _assureDict(
546 dataId: dafButler.DataId | dafButler.DimensionRecord,
547) -> dict[str, Any]:
548 """Turn any data-identifier-like object into a dict.
550 Parameters
551 ----------
552 dataId : `dafButler.DataId` or
553 `lsst.daf.butler.dimensions.DimensionRecord`
554 The data identifier.
556 Returns
557 -------
558 dataId : `dict[str, Any]`
559 The data identifier as a dict.
560 """
561 if isinstance(dataId, dict):
562 return dataId
563 elif hasattr(dataId, "mapping"): # dafButler.DataCoordinate
564 return {str(k): v for k, v in dataId.mapping.items()}
565 elif hasattr(dataId, "dataId"): # dafButler.DimensionRecord
566 return {str(k): v for k, v in dataId.dataId.mapping.items()}
567 elif hasattr(dataId, "keys"): # some other mapping, assume str keys
568 return dict(dataId)
569 else:
570 raise RuntimeError(f"Failed to coerce {type(dataId)} to dict")
573def getExpRecordFromDataId(butler: dafButler.Butler, dataId: dafButler.DataId) -> dafButler.DimensionRecord:
574 """Get the exposure record for a given dataId.
576 Parameters
577 ----------
578 butler : `lsst.daf.butler.Butler`
579 The butler.
580 dataId : `dafButler.DataId`
581 The dataId.
583 Returns
584 -------
585 expRecord : `lsst.daf.butler.dimensions.DimensionRecord`
586 The exposure record.
587 """
588 dataId = _assureDict(dataId)
589 assert isinstance(dataId, dict), f"dataId must be a dict or DimensionRecord, got {type(dataId)}"
591 if expId := getExpId(dataId):
592 where = "exposure.id=expId"
593 expRecords = butler.registry.queryDimensionRecords(
594 "exposure", where=where, bind={"expId": expId}, datasets="raw"
595 )
597 else:
598 dayObs = getDayObs(dataId)
599 seqNum = getSeqNum(dataId)
600 if not (dayObs and seqNum):
601 raise RuntimeError(f"Failed to find either expId or day_obs and seq_num in dataId {dataId}")
602 where = "exposure.day_obs=dayObs AND exposure.seq_num=seq_num"
603 expRecords = butler.registry.queryDimensionRecords(
604 "exposure", where=where, bind={"dayObs": dayObs, "seq_num": seqNum}, datasets="raw"
605 )
607 filteredExpRecords = set(expRecords)
608 if not filteredExpRecords:
609 raise LookupError(f"No exposure records found for {dataId}")
610 assert len(filteredExpRecords) == 1, f"Found {len(filteredExpRecords)} exposure records for {dataId}"
611 return filteredExpRecords.pop()
614def getDayObsSeqNumFromExposureId(butler: dafButler.Butler, dataId: Mapping[str, Any]) -> dict[str, int]:
615 """Get the day_obs and seq_num for an exposure id.
617 Parameters
618 ----------
619 butler : `lsst.daf.butler.Butler`
620 The butler.
621 dataId : ` Mapping[str, Any]`
622 The dataId containing the exposure id.
624 Returns
625 -------
626 dataId : ` dict[str, int]`
627 A dict containing only the day_obs and seq_num.
628 """
629 if (dayObs := getDayObs(dataId)) and (seqNum := getSeqNum(dataId)):
630 return {"day_obs": dayObs, "seq_num": seqNum}
632 if isinstance(dataId, int):
633 dataId = {"exposure": dataId}
634 else:
635 dataId = _assureDict(dataId)
636 assert isinstance(dataId, dict)
638 if not (expId := getExpId(dataId)):
639 raise RuntimeError(f"Failed to find exposure id in {dataId}")
641 where = "exposure.id=expId"
642 expRecords = list(
643 butler.registry.queryDimensionRecords("exposure", where=where, bind={"expId": expId}, datasets="raw")
644 )
645 uniqueExpRecords = set(expRecords)
646 if not uniqueExpRecords:
647 raise LookupError(f"No exposure records found for {dataId}")
648 assert len(uniqueExpRecords) == 1, f"Found {len(uniqueExpRecords)} exposure records for {dataId}"
649 record = uniqueExpRecords.pop()
650 return {"day_obs": record.day_obs, "seq_num": record.seq_num}
653def getDatasetRefForDataId(
654 butler: dafButler.Butler, datasetType: str | dafButler.DatasetType, dataId: dict[str, Any]
655) -> dafButler.DatasetRef | None:
656 """Get the datasetReference for a dataId.
658 Parameters
659 ----------
660 butler : `lsst.daf.butler.Butler`
661 The butler.
662 datasetType : `str` or `datasetType`
663 The dataset type.
664 dataId : `dict[str, Any]`
665 The dataId.
667 Returns
668 -------
669 datasetRef : `lsst.daf.butler.dimensions.DatasetReference`
670 The dataset reference.
671 """
672 if not _expid_present(dataId):
673 assert _dayobs_present(dataId) and _seqnum_present(dataId)
674 dataId.update(getExpIdFromDayObsSeqNum(butler, dataId))
676 dRef = butler.find_dataset(datasetType, dataId)
677 return dRef
680def removeDataProduct(
681 butler: dafButler.Butler, datasetType: str | dafButler.DatasetType, dataId: dict[str, Any]
682) -> None:
683 """Remove a data prodcut from the registry. Use with caution.
685 Parameters
686 ----------
687 butler : `lsst.daf.butler.Butler`
688 The butler.
689 datasetType : `str` or `datasetType`
690 The dataset type.
691 dataId : `dict[str, Any]`
692 The dataId.
694 """
695 if datasetType == "raw":
696 raise RuntimeError("I'm sorry, Dave, I'm afraid I can't do that.")
697 dRef = getDatasetRefForDataId(butler, datasetType, dataId)
698 if dRef is None:
699 return
700 butler.pruneDatasets([dRef], disassociate=True, unstore=True, purge=True)
701 return
704def _dayobs_present(dataId: Mapping[str, Any]) -> bool:
705 return _get_dayobs_key(dataId) is not None
708def _seqnum_present(dataId: Mapping[str, Any]) -> bool:
709 return _get_seqnum_key(dataId) is not None
712def _expid_present(dataId: Mapping[str, Any]) -> bool:
713 return _get_expid_key(dataId) is not None
716def _get_dayobs_key(dataId: Mapping[str, Any]) -> str | None:
717 """Return the key for day_obs if present, else None"""
718 keys = [k for k in dataId.keys() if k.find("day_obs") != -1]
719 if not keys:
720 return None
721 return keys[0]
724def _get_seqnum_key(dataId: Mapping[str, Any]) -> str | None:
725 """Return the key for seq_num if present, else None"""
726 keys = [k for k in dataId.keys() if k.find("seq_num") != -1]
727 if not keys:
728 return None
729 return keys[0]
732def _get_expid_key(dataId: Mapping[str, Any]) -> str | None:
733 """Return the key for expId if present, else None"""
734 if "exposure.id" in dataId:
735 return "exposure.id"
736 elif "exposure" in dataId:
737 return "exposure"
738 return None
741def getDayObs(dataId: Mapping[str, Any] | dafButler.DimensionRecord) -> int | None:
742 """Get the day_obs from a dataId.
744 Parameters
745 ----------
746 dataId : `Mapping[str, Any]` or `lsst.daf.butler.DimensionRecord`
747 The dataId.
749 Returns
750 -------
751 day_obs : `int` or `None`
752 The day_obs value if present, else None.
753 """
754 if hasattr(dataId, "day_obs"):
755 return getattr(dataId, "day_obs")
756 if not _dayobs_present(dataId):
757 return None
758 return dataId["day_obs"] if "day_obs" in dataId else dataId["exposure.day_obs"]
761def getSeqNum(dataId: Mapping[str, Any] | dafButler.DimensionRecord) -> int | None:
762 """Get the seq_num from a dataId.
764 Parameters
765 ----------
766 dataId : `Mapping[str, Any]` or `lsst.daf.butler.DimensionRecord`
767 The dataId.
769 Returns
770 -------
771 seq_num : `int` or `None`
772 The seq_num value if present, else None.
773 """
774 if hasattr(dataId, "seq_num"):
775 return getattr(dataId, "seq_num")
776 if not _seqnum_present(dataId):
777 return None
778 return dataId["seq_num"] if "seq_num" in dataId else dataId["exposure.seq_num"]
781def getExpId(dataId: Mapping[str, Any] | dafButler.DimensionRecord) -> int | None:
782 """Get the expId from a dataId.
784 Parameters
785 ----------
786 dataId : `Mapping[str, Any]` or `lsst.daf.butler.DimensionRecord`
787 The dataId.
789 Returns
790 -------
791 expId : `int` or `None`
792 The expId value if present, else None.
793 """
794 if hasattr(dataId, "id"):
795 return getattr(dataId, "id")
796 if not _expid_present(dataId):
797 return None
798 return dataId["exposure"] if "exposure" in dataId else dataId["exposure.id"]
801def getLatissOnSkyDataIds(
802 butler: DirectButler,
803 skipTypes: Iterable[str] = ("bias", "dark", "flat"),
804 checkObject: bool = True,
805 full: bool = True,
806 startDate: int | None = None,
807 endDate: int | None = None,
808) -> list[Mapping[str, int | str | None]]:
809 """Get a list of all on-sky dataIds taken.
811 Parameters
812 ----------
813 butler : `lsst.daf.butler.Butler`
814 The butler.
815 skipTypes : `list` of `str`
816 Image types to exclude.
817 checkObject : `bool`
818 Check if the value of target_name (formerly OBJECT) is set and exlude
819 if it is not.
820 full : `bool`
821 Return filled dataIds. Required for some analyses, but runs much
822 (~30x) slower.
823 startDate : `int`
824 The day_obs to start at, inclusive.
825 endDate : `int`
826 The day_obs to end at, inclusive.
828 Returns
829 -------
830 dataIds : `list` or `dataIds`
831 The dataIds.
832 """
834 def isOnSky(expRecord: dafButler.DimensionRecord) -> bool:
835 imageType = expRecord.observation_type
836 obj = expRecord.target_name
837 if checkObject and obj == "NOTSET":
838 return False
839 if imageType not in skipTypes:
840 return True
841 return False
843 recordSets = []
844 days = getDaysWithData(butler)
845 if startDate:
846 days = [d for d in days if d >= startDate]
847 if endDate:
848 days = [d for d in days if d <= endDate]
849 days = sorted(set(days))
851 where = "exposure.day_obs=dayObs"
852 for day in days:
853 # queryDataIds would be better here, but it's then hard/impossible
854 # to do the filtering for which is on sky, so just take the dataIds
855 records = butler.registry.queryDimensionRecords(
856 "exposure", where=where, bind={"dayObs": day}, datasets="raw"
857 )
858 recordSets.append(sortRecordsByDayObsThenSeqNum(records))
860 dataIds = [r.dataId for r in filter(isOnSky, itertools.chain(*recordSets))]
861 if full:
862 expandedIds = [
863 updateDataIdOrDataCord(butler.registry.expandDataId(dataId, detector=0).mapping)
864 for dataId in dataIds
865 ]
866 filledIds = [fillDataId(butler, dataId) for dataId in expandedIds]
867 return filledIds
868 else:
869 return [updateDataIdOrDataCord(dataId, detector=0) for dataId in dataIds]
872def getExpRecord(
873 butler: dafButler.Butler,
874 instrument: str,
875 expId: int | None = None,
876 dayObs: int | None = None,
877 seqNum: int | None = None,
878) -> dafButler.DimensionRecord:
879 """Get the exposure record for a given exposure ID or dayObs+seqNum.
881 Parameters
882 ----------
883 butler : `lsst.daf.butler.Butler`
884 The butler.
885 instrument : `str`
886 The instrument name, e.g. 'LSSTCam'.
887 expId : `int`
888 The exposure ID.
890 Returns
891 -------
892 expRecord : `lsst.daf.butler.DimensionRecord`
893 The exposure record.
894 """
895 if expId is None and (dayObs is None or seqNum is None):
896 raise ValueError("Must supply either expId or (dayObs AND seqNum)")
898 where = "instrument=inst" # Note you can't use =instrument as bind-strings can't clash with dimensions
899 bind: "dict[str, str | int]" = {"inst": instrument}
900 if expId:
901 where += " AND exposure.id=expId"
902 bind.update({"expId": expId})
903 if dayObs and seqNum:
904 where += " AND exposure.day_obs=dayObs AND exposure.seq_num=seqNum"
905 bind.update({"dayObs": dayObs, "seqNum": seqNum})
907 expRecords = butler.registry.queryDimensionRecords("exposure", where=where, bind=bind, datasets="raw")
908 filteredExpRecords = list(set(expRecords)) # must call set as this may contain many duplicates
909 if len(filteredExpRecords) != 1:
910 raise RuntimeError(
911 f"Failed to find unique exposure record for {instrument=} with"
912 f" {expId=}, {dayObs=}, {seqNum=}, got {len(filteredExpRecords)} records"
913 )
914 return filteredExpRecords[0]