Coverage for python/lsst/daf/butler/script/exportCalibs.py: 13%
57 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-05 09:58 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-05 09:58 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29import logging
30import os
31from collections.abc import Iterable
32from typing import TYPE_CHECKING
34from astropy.table import Table
36from .._butler import Butler
37from ..registry import CollectionType
39if TYPE_CHECKING:
40 from lsst.daf.butler import DatasetRef, DatasetType, Registry
42log = logging.getLogger(__name__)
45def parseCalibrationCollection(
46 registry: Registry, collection: str, datasetTypes: Iterable[DatasetType]
47) -> tuple[list[str], list[DatasetRef]]:
48 """Search a calibration collection for calibration datasets.
50 Parameters
51 ----------
52 registry : `lsst.daf.butler.Registry`
53 Butler registry to use.
54 collection : `str`
55 Collection to search. This should be a CALIBRATION
56 collection.
57 datasetTypes : `list` [`lsst.daf.Butler.DatasetType`]
58 List of calibration dataset types.
60 Returns
61 -------
62 exportCollections : `list` [`str`]
63 List of collections to save on export.
64 exportDatasets : `list` [`lsst.daf.butler.DatasetRef`]
65 Datasets to save on export.
67 Raises
68 ------
69 RuntimeError
70 Raised if the collection to search is not a CALIBRATION collection.
71 """
72 if registry.getCollectionType(collection) != CollectionType.CALIBRATION:
73 raise RuntimeError(f"Collection {collection} is not a CALIBRATION collection.")
75 exportCollections = []
76 exportDatasets = []
77 for calibType in datasetTypes:
78 associations = registry.queryDatasetAssociations(
79 calibType, collections=collection, collectionTypes=[CollectionType.CALIBRATION]
80 )
81 for result in associations:
82 # Need an expanded dataId in case file templates will be used
83 # in the transfer.
84 dataId = registry.expandDataId(result.ref.dataId)
85 ref = result.ref.expanded(dataId)
86 exportDatasets.append(ref)
87 assert ref.run is not None, "These refs must all be resolved."
88 exportCollections.append(ref.run)
89 return exportCollections, exportDatasets
92def exportCalibs(
93 repo: str, directory: str, collections: Iterable[str], dataset_type: Iterable[str], transfer: str
94) -> Table:
95 """Certify a set of calibrations with a validity range.
97 Parameters
98 ----------
99 repo : `str`
100 URI to the location of the repo or URI to a config file
101 describing the repo and its location.
102 directory : `str`
103 URI string of the directory to write the exported
104 calibrations.
105 collections : `list` [`str`]
106 Data collections to pull calibrations from. Must be an
107 existing `~CollectionType.CHAINED` or
108 `~CollectionType.CALIBRATION` collection.
109 dataset_type : `tuple` [`str`]
110 The dataset types to export. Default is to export all.
111 transfer : `str`
112 The transfer mode to use for exporting.
114 Returns
115 -------
116 datasetTable : `astropy.table.Table`
117 A table containing relevant information about the calibrations
118 exported.
120 Raises
121 ------
122 RuntimeError
123 Raised if the output directory already exists.
124 """
125 butler = Butler.from_config(repo, writeable=False)
127 dataset_type_query = dataset_type or ...
128 collections_query = collections or ...
130 calibTypes = [
131 datasetType
132 for datasetType in butler.registry.queryDatasetTypes(dataset_type_query)
133 if datasetType.isCalibration()
134 ]
136 collectionsToExport = []
137 datasetsToExport = []
139 for collection in butler.registry.queryCollections(
140 collections_query,
141 flattenChains=True,
142 includeChains=True,
143 collectionTypes={CollectionType.CALIBRATION, CollectionType.CHAINED},
144 ):
145 log.info("Checking collection: %s", collection)
147 # Get collection information.
148 collectionsToExport.append(collection)
149 collectionType = butler.registry.getCollectionType(collection)
150 if collectionType == CollectionType.CALIBRATION:
151 exportCollections, exportDatasets = parseCalibrationCollection(
152 butler.registry, collection, calibTypes
153 )
154 collectionsToExport.extend(exportCollections)
155 datasetsToExport.extend(exportDatasets)
157 if os.path.exists(directory):
158 raise RuntimeError(f"Export directory exists: {directory}")
159 os.makedirs(directory)
160 with butler.export(directory=directory, format="yaml", transfer=transfer) as export:
161 collectionsToExport = list(set(collectionsToExport))
162 datasetsToExport = list(set(datasetsToExport))
164 for exportable in collectionsToExport:
165 try:
166 export.saveCollection(exportable)
167 except Exception as e:
168 log.warning("Did not save collection %s due to %s.", exportable, e)
170 log.info("Saving %d dataset(s)", len(datasetsToExport))
171 export.saveDatasets(datasetsToExport)
173 sortedDatasets = sorted(datasetsToExport, key=lambda x: x.datasetType.name)
175 requiredDimensions: set[str] = set()
176 for ref in sortedDatasets:
177 requiredDimensions.update(ref.dimensions.names)
178 dimensionColumns = {
179 dimensionName: [ref.dataId.get(dimensionName, "") for ref in sortedDatasets]
180 for dimensionName in requiredDimensions
181 }
183 return Table(
184 {
185 "calibrationType": [ref.datasetType.name for ref in sortedDatasets],
186 "run": [ref.run for ref in sortedDatasets],
187 **dimensionColumns,
188 }
189 )