Coverage for python/lsst/daf/butler/script/queryDatasets.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from astropy.table import Table as AstropyTable
23from collections import defaultdict, namedtuple
24import numpy as np
26from .. import Butler
27from ..core.utils import globToRegex
28from ..cli.utils import sortAstropyTable
31_RefInfo = namedtuple("RefInfo", "datasetRef uri")
34class _Table:
35 """Aggregates rows for a single dataset type, and creates an astropy table
36 with the aggregated data. Eliminates duplicate rows.
37 """
39 def __init__(self):
40 self.datasetRefs = set()
42 def add(self, datasetRef, uri=None):
43 """Add a row of information to the table.
45 ``uri`` is optional but must be the consistent; provided or not, for
46 every call to a ``_Table`` instance.
48 Parameters
49 ----------
50 datasetRef : ``DatasetRef``
51 A dataset ref that will be added as a row in the table.
52 uri : ``ButlerURI``, optional
53 The URI to show as a file location in the table, by default None
54 """
55 if uri:
56 uri = str(uri)
57 self.datasetRefs.add(_RefInfo(datasetRef, uri))
59 def getAstropyTable(self, datasetTypeName):
60 """Get the table as an astropy table.
62 Parameters
63 ----------
64 datasetTypeName : `str`
65 The dataset type name to show in the ``type`` column of the table.
67 Returns
68 -------
69 table : `astropy.table._Table`
70 The table with the provided column names and rows.
71 """
72 # Should never happen; adding a dataset should be the action that
73 # causes a _Table to be created.
74 if not self.datasetRefs:
75 raise RuntimeError(f"No DatasetRefs were provided for dataset type {datasetTypeName}")
77 refInfo = next(iter(self.datasetRefs))
78 dimensions = list(refInfo.datasetRef.dataId.full.keys())
79 columnNames = ["type", "run", "id",
80 *[str(item) for item in dimensions]]
82 # Need to hint the column types for numbers since the per-row
83 # constructor of Table does not work this out on its own and sorting
84 # will not work properly without.
85 typeMap = {float: np.float, int: np.int64}
86 columnTypes = [None, None, np.int64,
87 *[typeMap.get(type(value)) for value in refInfo.datasetRef.dataId.full.values()]]
88 if refInfo.uri:
89 columnNames.append("URI")
90 columnTypes.append(None)
92 rows = []
93 for refInfo in self.datasetRefs:
94 row = [datasetTypeName,
95 refInfo.datasetRef.run,
96 refInfo.datasetRef.id,
97 *[value for value in refInfo.datasetRef.dataId.full.values()]]
98 if refInfo.uri:
99 row.append(refInfo.uri)
100 rows.append(row)
102 dataset_table = AstropyTable(np.array(rows), names=columnNames, dtype=columnTypes)
103 return sortAstropyTable(dataset_table, dimensions, ["type", "run"])
106def queryDatasets(repo, glob, collections, where, find_first, show_uri):
107 """Get dataset refs from a repository.
109 Parameters
110 ----------
111 repo : `str`
112 URI to the location of the repo or URI to a config file describing the
113 repo and its location.
114 glob : iterable [`str`]
115 A list of glob-style search string that fully or partially identify
116 the dataset type names to search for.
117 collections : iterable [`str`]
118 A list of glob-style search string that fully or partially identify
119 the collections to search for.
120 where : `str`
121 A string expression similar to a SQL WHERE clause. May involve any
122 column of a dimension table or (as a shortcut for the primary key
123 column of a dimension table) dimension name.
124 find_first : `bool`
125 For each result data ID, only yield one DatasetRef of each DatasetType,
126 from the first collection in which a dataset of that dataset type
127 appears (according to the order of `collections` passed in). If used,
128 `collections` must specify at least one expression and must not contain
129 wildcards.
130 show_uri : `bool`
131 If True, include the dataset URI in the output.
132 Returns
133 -------
134 datasetTables : `list` [``astropy.table._Table``]
135 A list of astropy tables, one for each dataset type.
136 """
137 butler = Butler(repo)
139 dataset = globToRegex(glob)
140 if not dataset:
141 dataset = ...
143 if collections and not find_first:
144 collections = globToRegex(collections)
145 elif not collections:
146 collections = ...
148 datasets = butler.registry.queryDatasets(datasetType=dataset,
149 collections=collections,
150 where=where,
151 findFirst=find_first)
153 tables = defaultdict(_Table)
155 for datasetRef in datasets:
156 if not show_uri:
157 tables[datasetRef.datasetType.name].add(datasetRef)
158 else:
159 primaryURI, componentURIs = butler.getURIs(datasetRef, collections=datasetRef.run)
160 if primaryURI:
161 tables[datasetRef.datasetType.name].add(datasetRef, primaryURI)
162 for name, uri in componentURIs.items():
163 tables[datasetRef.datasetType.componentTypeName(name)].add(datasetRef, uri)
165 return [table.getAstropyTable(datasetTypeName) for datasetTypeName, table in tables.items()]