Coverage for python/lsst/daf/butler/script/exportCalibs.py: 13%
57 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:13 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:13 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23import logging
24import os
25from collections.abc import Iterable
26from typing import TYPE_CHECKING
28from astropy.table import Table
30from .._butler import Butler
31from ..registry import CollectionType
33if TYPE_CHECKING:
34 from lsst.daf.butler import DatasetRef, DatasetType, Registry
36log = logging.getLogger(__name__)
39def parseCalibrationCollection(
40 registry: Registry, collection: str, datasetTypes: Iterable[DatasetType]
41) -> tuple[list[str], list[DatasetRef]]:
42 """Search a calibration collection for calibration datasets.
44 Parameters
45 ----------
46 registry : `lsst.daf.butler.Registry`
47 Butler registry to use.
48 collection : `str`
49 Collection to search. This should be a CALIBRATION
50 collection.
51 datasetTypes : `list` [`lsst.daf.Butler.DatasetType`]
52 List of calibration dataset types.
54 Returns
55 -------
56 exportCollections : `list` [`str`]
57 List of collections to save on export.
58 exportDatasets : `list` [`lsst.daf.butler.DatasetRef`]
59 Datasets to save on export.
61 Raises
62 ------
63 RuntimeError
64 Raised if the collection to search is not a CALIBRATION collection.
65 """
66 if registry.getCollectionType(collection) != CollectionType.CALIBRATION:
67 raise RuntimeError(f"Collection {collection} is not a CALIBRATION collection.")
69 exportCollections = []
70 exportDatasets = []
71 for calibType in datasetTypes:
72 associations = registry.queryDatasetAssociations(
73 calibType, collections=collection, collectionTypes=[CollectionType.CALIBRATION]
74 )
75 for result in associations:
76 # Need an expanded dataId in case file templates will be used
77 # in the transfer.
78 dataId = registry.expandDataId(result.ref.dataId)
79 ref = result.ref.expanded(dataId)
80 exportDatasets.append(ref)
81 assert ref.run is not None, "These refs must all be resolved."
82 exportCollections.append(ref.run)
83 return exportCollections, exportDatasets
86def exportCalibs(
87 repo: str, directory: str, collections: Iterable[str], dataset_type: Iterable[str], transfer: str
88) -> Table:
89 """Certify a set of calibrations with a validity range.
91 Parameters
92 ----------
93 repo : `str`
94 URI to the location of the repo or URI to a config file
95 describing the repo and its location.
96 directory : `str`
97 URI string of the directory to write the exported
98 calibrations.
99 collections : `list` [`str`]
100 Data collections to pull calibrations from. Must be an
101 existing `~CollectionType.CHAINED` or
102 `~CollectionType.CALIBRATION` collection.
103 dataset_type : `tuple` [`str`]
104 The dataset types to export. Default is to export all.
105 transfer : `str`
106 The transfer mode to use for exporting.
108 Returns
109 -------
110 datasetTable : `astropy.table.Table`
111 A table containing relevant information about the calibrations
112 exported.
114 Raises
115 ------
116 RuntimeError :
117 Raised if the output directory already exists.
118 """
119 butler = Butler(repo, writeable=False)
121 dataset_type_query = dataset_type or ...
122 collections_query = collections or ...
124 calibTypes = [
125 datasetType
126 for datasetType in butler.registry.queryDatasetTypes(dataset_type_query)
127 if datasetType.isCalibration()
128 ]
130 collectionsToExport = []
131 datasetsToExport = []
133 for collection in butler.registry.queryCollections(
134 collections_query,
135 flattenChains=True,
136 includeChains=True,
137 collectionTypes={CollectionType.CALIBRATION, CollectionType.CHAINED},
138 ):
139 log.info("Checking collection: %s", collection)
141 # Get collection information.
142 collectionsToExport.append(collection)
143 collectionType = butler.registry.getCollectionType(collection)
144 if collectionType == CollectionType.CALIBRATION:
145 exportCollections, exportDatasets = parseCalibrationCollection(
146 butler.registry, collection, calibTypes
147 )
148 collectionsToExport.extend(exportCollections)
149 datasetsToExport.extend(exportDatasets)
151 if os.path.exists(directory):
152 raise RuntimeError(f"Export directory exists: {directory}")
153 os.makedirs(directory)
154 with butler.export(directory=directory, format="yaml", transfer=transfer) as export:
155 collectionsToExport = list(set(collectionsToExport))
156 datasetsToExport = list(set(datasetsToExport))
158 for exportable in collectionsToExport:
159 try:
160 export.saveCollection(exportable)
161 except Exception as e:
162 log.warning("Did not save collection %s due to %s.", exportable, e)
164 log.info("Saving %d dataset(s)", len(datasetsToExport))
165 export.saveDatasets(datasetsToExport)
167 sortedDatasets = sorted(datasetsToExport, key=lambda x: x.datasetType.name)
169 requiredDimensions: set[str] = set()
170 for ref in sortedDatasets:
171 requiredDimensions.update(ref.dimensions.names)
172 dimensionColumns = {
173 dimensionName: [ref.dataId.get(dimensionName, "") for ref in sortedDatasets]
174 for dimensionName in requiredDimensions
175 }
177 return Table(
178 {
179 "calibrationType": [ref.datasetType.name for ref in sortedDatasets],
180 "run": [ref.run for ref in sortedDatasets],
181 **dimensionColumns,
182 }
183 )