Coverage for python / lsst / daf / butler / script / queryCollections.py: 7%
123 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-26 08:49 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-26 08:49 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30from collections.abc import Iterable
31from fnmatch import fnmatch
32from typing import Literal
34from astropy.table import Column, Table, hstack, vstack
36from .._butler import Butler
37from .._butler_collections import CollectionInfo
38from .._collection_type import CollectionType
41def _parseDatasetTypes(dataset_types: frozenset[str] | list[str] | None) -> list[str]:
42 """Parse dataset types from a collection info object or a list of strings.
44 Parameters
45 ----------
46 dataset_types : `frozenset` [`str`] | `list` [`str`] | `None`
47 The dataset types to parse. If `None`, an empty list is returned.
48 If a `frozenset` or `list` is provided, it is returned as a list.
50 Returns
51 -------
52 dataset_types : `list` [`str`]
53 The parsed dataset types.
54 """
55 return [""] if not dataset_types else list(dataset_types)
58def _getTable(
59 repo: str,
60 glob: Iterable[str],
61 collection_type: Iterable[CollectionType],
62 inverse: bool,
63 show_dataset_types: bool = False,
64 exclude_dataset_types: Iterable[str] | None = None,
65) -> Table:
66 """Run queryCollections and return the results in Table form.
68 Only lists the first child (or parent if `inverse` is `True`) in the
69 description column.
71 Parameters
72 ----------
73 repo : `str`
74 The Butler repository location.
75 glob : `collections.abc.Iterable` of `str`
76 Wildcard to pass to ``queryCollections``.
77 collection_type
78 Same as `queryCollections`
79 inverse : `bool`
80 True if parent CHAINED datasets of each dataset should be listed in the
81 description column, False if children of CHAINED datasets should be
82 listed.
83 show_dataset_types : `bool`, optional
84 If `True`, also show the dataset types present within each collection.
85 exclude_dataset_types : `~collections.abc.Iterable` [ `str` ], optional
86 A glob-style iterable of dataset types to exclude.
87 Only has an effect if `show_dataset_types` is True.
89 Returns
90 -------
91 collections : `astropy.table.Table`
92 Same as `queryCollections`
93 """
94 typeCol = "Type"
95 descriptionCol = "Parents" if inverse else "Children"
96 table = Table(
97 names=("Name", typeCol, descriptionCol),
98 dtype=(str, str, str),
99 )
100 if show_dataset_types:
101 table.add_column(Column(name="Dataset Types", dtype=str))
103 with Butler.from_config(repo) as butler:
105 def addDatasetTypes(collection_table: Table, collection: str, dataset_types: list[str]) -> Table:
106 if dataset_types[0] == "":
107 cinfo = butler.collections.get_info(collection, include_summary=True)
108 dataset_types = _parseDatasetTypes(cinfo.dataset_types)
109 if exclude_dataset_types:
110 dataset_types = [
111 dt
112 for dt in dataset_types
113 if not any(fnmatch(dt, pattern) for pattern in exclude_dataset_types)
114 ]
115 dataset_types = _parseDatasetTypes(dataset_types)
116 types_table = Table({"Dataset Types": sorted(dataset_types)}, dtype=(str,))
117 collection_table = hstack([collection_table, types_table]).filled("")
118 return collection_table
120 def addCollection(info: CollectionInfo, relation: str) -> None:
121 try:
122 info_relatives = getattr(info, relation)
123 except AttributeError:
124 info_relatives = []
125 # Parent results can be returned in a non-deterministic order, so
126 # sort them to make the output deterministic.
127 if relation == "parents":
128 info_relatives = sorted(info_relatives)
129 if info_relatives:
130 collection_table = Table([[info.name], [info.type.name]], names=("Name", typeCol))
131 description_table = Table(names=(descriptionCol,), dtype=(str,))
132 for info_relative in info_relatives:
133 relative_table = Table([[info_relative]], names=(descriptionCol,))
134 if show_dataset_types:
135 relative_table = addDatasetTypes(relative_table, info_relative, [""])
136 description_table = vstack([description_table, relative_table])
137 collection_table = hstack([collection_table, description_table]).filled("")
138 for row in collection_table:
139 table.add_row(row)
140 else:
141 collection_table = Table(
142 [[info.name], [info.type.name], [""]], names=("Name", typeCol, descriptionCol)
143 )
144 if show_dataset_types:
145 collection_table = addDatasetTypes(collection_table, info.name, [""])
146 for row in collection_table:
147 table.add_row(row)
149 collections = sorted(
150 butler.collections.query_info(
151 glob or "*",
152 collection_types=frozenset(collection_type),
153 include_parents=inverse,
154 include_summary=show_dataset_types,
155 )
156 )
157 if inverse:
158 for info in collections:
159 addCollection(info, "parents")
160 # If none of the datasets has a parent dataset then remove the
161 # description column.
162 if not any(c for c in table[descriptionCol]):
163 del table[descriptionCol]
164 else:
165 for info in collections:
166 if info.type == CollectionType.CHAINED:
167 addCollection(info, "children")
168 else:
169 addCollection(info, "self")
170 # If there aren't any CHAINED datasets in the results then remove
171 # the description column.
172 if not any(columnVal == CollectionType.CHAINED.name for columnVal in table[typeCol]):
173 del table[descriptionCol]
175 return table
178def _getTree(
179 repo: str,
180 glob: Iterable[str],
181 collection_type: Iterable[CollectionType],
182 inverse: bool,
183 show_dataset_types: bool = False,
184 exclude_dataset_types: Iterable[str] | None = None,
185) -> Table:
186 """Run queryCollections and return the results in a table representing tree
187 form.
189 Recursively lists children (or parents if `inverse` is `True`)
191 Parameters
192 ----------
193 repo : `str`
194 Butler repository location.
195 glob : `collections.abc.Iterable` of `str`
196 Wildcards to pass to ``queryCollections``.
197 collection_type
198 Same as `queryCollections`
199 inverse : `bool`
200 True if parent CHAINED datasets of each dataset should be listed in the
201 description column, False if children of CHAINED datasets should be
202 listed.
203 show_dataset_types : `bool`, optional
204 If `True`, also show the dataset types present within each collection.
205 exclude_dataset_types : `~collections.abc.Iterable` [ `str` ], optional
206 A glob-style iterable of dataset types to exclude.
207 Only has an effect if `show_dataset_types` is True.
209 Returns
210 -------
211 collections : `astropy.table.Table`
212 Same as `queryCollections`
213 """
214 table = Table(
215 names=("Name", "Type"),
216 dtype=(str, str),
217 )
218 if show_dataset_types:
219 table.add_column(Column(name="Dataset Types", dtype=str))
221 with Butler.from_config(repo, without_datastore=True) as butler:
223 def addCollection(info: CollectionInfo, level: int = 0) -> None:
224 collection_table = Table([[" " * level + info.name], [info.type.name]], names=["Name", "Type"])
225 if show_dataset_types:
226 if info.type == CollectionType.CHAINED:
227 collection_table = hstack(
228 [collection_table, Table([[""] * len(collection_table)], names=["Dataset Types"])]
229 )
230 else:
231 dataset_types = _parseDatasetTypes(info.dataset_types)
232 if exclude_dataset_types:
233 dataset_types = [
234 dt
235 for dt in dataset_types
236 if not any(fnmatch(dt, pattern) for pattern in exclude_dataset_types)
237 ]
238 dataset_types = _parseDatasetTypes(dataset_types)
239 dataset_types_table = Table({"Dataset Types": sorted(dataset_types)}, dtype=(str,))
240 collection_table = hstack([collection_table, dataset_types_table]).filled("")
241 for row in collection_table:
242 table.add_row(row)
244 if inverse:
245 assert info.parents is not None # For mypy.
246 for pname in sorted(info.parents):
247 pinfo = butler.collections.get_info(
248 pname, include_parents=inverse, include_summary=show_dataset_types
249 )
250 addCollection(pinfo, level + 1)
251 else:
252 if info.type == CollectionType.CHAINED:
253 for name in info.children:
254 cinfo = butler.collections.get_info(name, include_summary=show_dataset_types)
255 addCollection(cinfo, level + 1)
257 collections = butler.collections.query_info(
258 glob or "*",
259 collection_types=frozenset(collection_type),
260 include_parents=inverse,
261 include_summary=show_dataset_types,
262 )
263 for collection in sorted(collections):
264 addCollection(collection)
265 return table
268def _getList(
269 repo: str,
270 glob: Iterable[str],
271 collection_type: Iterable[CollectionType],
272 flatten_chains: bool,
273 show_dataset_types: bool = False,
274 exclude_dataset_types: Iterable[str] | None = None,
275) -> Table:
276 """Return collection results as a table representing a flat list of
277 collections.
279 Parameters
280 ----------
281 repo : `str`
282 Butler repository location.
283 glob : `collections.abc.Iterable` of `str`
284 Wildcards to pass to ``queryCollections``.
285 collection_type
286 Same as `queryCollections`
287 flatten_chains : `bool`
288 If `True`, flatten the tree of CHAINED datasets.
289 show_dataset_types : `bool`, optional
290 If `True`, also show the dataset types present within each collection.
291 exclude_dataset_types : `~collections.abc.Iterable` [ `str` ], optional
292 A glob-style iterable of dataset types to exclude.
293 Only has an effect if `show_dataset_types` is True.
295 Returns
296 -------
297 collections : `astropy.table.Table`
298 Same as `queryCollections`
299 """
300 table = Table(
301 names=("Name", "Type"),
302 dtype=(str, str),
303 )
304 if show_dataset_types:
305 table.add_column(Column(name="Dataset Types", dtype=str))
307 with Butler.from_config(repo) as butler:
309 def addCollection(info: CollectionInfo) -> None:
310 collection_table = Table([[info.name], [info.type.name]], names=["Name", "Type"])
311 if show_dataset_types:
312 dataset_types = _parseDatasetTypes(info.dataset_types)
313 if exclude_dataset_types:
314 dataset_types = [
315 dt
316 for dt in dataset_types
317 if not any(fnmatch(dt, pattern) for pattern in exclude_dataset_types)
318 ]
319 dataset_types = _parseDatasetTypes(dataset_types)
320 dataset_types_table = Table({"Dataset Types": sorted(dataset_types)}, dtype=(str,))
321 collection_table = hstack([collection_table, dataset_types_table]).filled("")
322 for row in collection_table:
323 table.add_row(row)
325 collections = list(
326 butler.collections.query_info(
327 glob or "*",
328 collection_types=frozenset(collection_type),
329 flatten_chains=flatten_chains,
330 include_summary=show_dataset_types,
331 )
332 )
333 for collection in collections:
334 addCollection(collection)
336 return table
339def queryCollections(
340 repo: str,
341 glob: Iterable[str],
342 collection_type: Iterable[CollectionType],
343 chains: Literal["INVERSE-TABLE", "TABLE", "TREE", "INVERSE-TREE", "FLATTEN", "NO-CHILDREN"],
344 show_dataset_types: bool = False,
345 exclude_dataset_types: Iterable[str] | None = None,
346) -> Table:
347 """Get the collections whose names match an expression.
349 Parameters
350 ----------
351 repo : `str`
352 URI to the location of the repo or URI to a config file describing the
353 repo and its location.
354 glob : `~collections.abc.Iterable` [`str`]
355 A list of glob-style search string that fully or partially identify
356 the dataset type names to search for.
357 collection_type : `~collections.abc.Iterable` [ `CollectionType` ], \
358 optional
359 If provided, only return collections of these types.
360 chains : `str`
361 Affects contents and formatting of results, see
362 ``cli.commands.query_collections``.
363 show_dataset_types : `bool`, optional
364 If `True`, include the dataset types present within each collection.
365 exclude_dataset_types : `~collections.abc.Iterable` [ `str` ], optional
366 A glob-style iterable of dataset types to exclude.
367 Only has an effect if `show_dataset_types` is True.
369 Returns
370 -------
371 collections : `astropy.table.Table`
372 A table containing information about collections.
373 """
374 if (inverse := chains == "INVERSE-TABLE") or chains == "TABLE":
375 return _getTable(repo, glob, collection_type, inverse, show_dataset_types, exclude_dataset_types)
376 elif (inverse := chains == "INVERSE-TREE") or chains == "TREE":
377 return _getTree(repo, glob, collection_type, inverse, show_dataset_types, exclude_dataset_types)
378 elif chains == "FLATTEN" or chains == "NO-CHILDREN":
379 flatten = chains == "FLATTEN"
380 return _getList(repo, glob, collection_type, flatten, show_dataset_types, exclude_dataset_types)
381 raise RuntimeError(f"Value for --chains not recognized: {chains}")