Coverage for python/lsst/daf/butler/script/queryDatasets.py : 16%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23from astropy.table import Table as AstropyTable
24from collections import defaultdict, namedtuple
25from typing import Any, Dict
26import numpy as np
28from .. import Butler
29from ..core.utils import globToRegex
30from ..cli.utils import sortAstropyTable
33_RefInfo = namedtuple("_RefInfo", ["datasetRef", "uri"])
36class _Table:
37 """Aggregates rows for a single dataset type, and creates an astropy table
38 with the aggregated data. Eliminates duplicate rows.
39 """
41 def __init__(self):
42 self.datasetRefs = set()
44 def add(self, datasetRef, uri=None):
45 """Add a row of information to the table.
47 ``uri`` is optional but must be the consistent; provided or not, for
48 every call to a ``_Table`` instance.
50 Parameters
51 ----------
52 datasetRef : ``DatasetRef``
53 A dataset ref that will be added as a row in the table.
54 uri : ``ButlerURI``, optional
55 The URI to show as a file location in the table, by default None
56 """
57 if uri:
58 uri = str(uri)
59 self.datasetRefs.add(_RefInfo(datasetRef, uri))
61 def getAstropyTable(self, datasetTypeName):
62 """Get the table as an astropy table.
64 Parameters
65 ----------
66 datasetTypeName : `str`
67 The dataset type name to show in the ``type`` column of the table.
69 Returns
70 -------
71 table : `astropy.table._Table`
72 The table with the provided column names and rows.
73 """
74 # Should never happen; adding a dataset should be the action that
75 # causes a _Table to be created.
76 if not self.datasetRefs:
77 raise RuntimeError(f"No DatasetRefs were provided for dataset type {datasetTypeName}")
79 refInfo = next(iter(self.datasetRefs))
80 dimensions = list(refInfo.datasetRef.dataId.full.keys())
81 columnNames = ["type", "run", "id",
82 *[str(item) for item in dimensions]]
84 # Need to hint the column types for numbers since the per-row
85 # constructor of Table does not work this out on its own and sorting
86 # will not work properly without.
87 typeMap = {float: np.float, int: np.int64}
88 columnTypes = [None, None, np.int64,
89 *[typeMap.get(type(value)) for value in refInfo.datasetRef.dataId.full.values()]]
90 if refInfo.uri:
91 columnNames.append("URI")
92 columnTypes.append(None)
94 rows = []
95 for refInfo in self.datasetRefs:
96 row = [datasetTypeName,
97 refInfo.datasetRef.run,
98 refInfo.datasetRef.id,
99 *[value for value in refInfo.datasetRef.dataId.full.values()]]
100 if refInfo.uri:
101 row.append(refInfo.uri)
102 rows.append(row)
104 dataset_table = AstropyTable(np.array(rows), names=columnNames, dtype=columnTypes)
105 return sortAstropyTable(dataset_table, dimensions, ["type", "run"])
108def queryDatasets(repo, glob, collections, where, find_first, show_uri):
109 """Get dataset refs from a repository.
111 Parameters
112 ----------
113 repo : `str`
114 URI to the location of the repo or URI to a config file describing the
115 repo and its location.
116 glob : iterable [`str`]
117 A list of glob-style search string that fully or partially identify
118 the dataset type names to search for.
119 collections : iterable [`str`]
120 A list of glob-style search string that fully or partially identify
121 the collections to search for.
122 where : `str`
123 A string expression similar to a SQL WHERE clause. May involve any
124 column of a dimension table or (as a shortcut for the primary key
125 column of a dimension table) dimension name.
126 find_first : `bool`
127 For each result data ID, only yield one DatasetRef of each DatasetType,
128 from the first collection in which a dataset of that dataset type
129 appears (according to the order of `collections` passed in). If used,
130 `collections` must specify at least one expression and must not contain
131 wildcards.
132 show_uri : `bool`
133 If True, include the dataset URI in the output.
134 Returns
135 -------
136 datasetTables : `list` [``astropy.table._Table``]
137 A list of astropy tables, one for each dataset type.
138 """
139 butler = Butler(repo)
141 dataset: Any = globToRegex(glob)
142 if not dataset:
143 dataset = ...
145 if collections and not find_first:
146 collections = globToRegex(collections)
147 elif not collections:
148 collections = ...
150 datasets = butler.registry.queryDatasets(datasetType=dataset,
151 collections=collections,
152 where=where,
153 findFirst=find_first)
155 tables: Dict[str, _Table] = defaultdict(_Table)
157 for datasetRef in datasets:
158 if not show_uri:
159 tables[datasetRef.datasetType.name].add(datasetRef)
160 else:
161 primaryURI, componentURIs = butler.getURIs(datasetRef, collections=datasetRef.run)
162 if primaryURI:
163 tables[datasetRef.datasetType.name].add(datasetRef, primaryURI)
164 for name, uri in componentURIs.items():
165 tables[datasetRef.datasetType.componentTypeName(name)].add(datasetRef, uri)
167 return [table.getAstropyTable(datasetTypeName) for datasetTypeName, table in tables.items()]