Coverage for python/lsst/daf/butler/script/queryDatasets.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from astropy.table import Table as AstropyTable
23from collections import defaultdict, namedtuple
24from numpy import array
26from .. import Butler
27from ..core.utils import globToRegex
29_RefInfo = namedtuple("RefInfo", "datasetRef uri")
32class _Table:
33 """Aggregates rows for a single dataset type, and creates an astropy table
34 with the aggregated data. Eliminates duplicate rows.
36 Parameters
37 ----------
38 columnNames : `list` [`str`]
39 The names of columns.
40 """
42 def __init__(self):
43 self.datasetRefs = set()
45 def add(self, datasetRef, uri=None):
46 """Add a row of information to the table.
48 ``uri`` is optional but must be the consistent; provided or not, for
49 every call to a ``_Table`` instance.
51 Parameters
52 ----------
53 datasetRef : ``DatasetRef``
54 A dataset ref that will be added as a row in the table.
55 uri : ``ButlerURI``, optional
56 The URI to show as a file location in the table, by default None
57 """
58 if uri:
59 uri = str(uri)
60 self.datasetRefs.add(_RefInfo(datasetRef, uri))
62 def getAstropyTable(self, datasetTypeName):
63 """Get the table as an astropy table.
65 Parameters
66 ----------
67 datasetTypeName : `str`
68 The dataset type name to show in the ``type`` column of the table.
70 Returns
71 -------
72 table : `astropy.table._Table`
73 The table with the provided column names and rows.
74 """
75 # Should never happen; adding a dataset should be the action that
76 # causes a _Table to be created.
77 if not self.datasetRefs:
78 raise RuntimeError(f"No DatasetRefs were provided for dataset type {datasetTypeName}")
80 refInfo = next(iter(self.datasetRefs))
81 columnNames = ["type", "run", "id",
82 *[str(item) for item in refInfo.datasetRef.dataId.keys()]]
83 if refInfo.uri:
84 columnNames.append("URI")
86 rows = []
87 for refInfo in sorted(self.datasetRefs):
88 row = [datasetTypeName,
89 refInfo.datasetRef.run,
90 refInfo.datasetRef.id,
91 *[str(value) for value in refInfo.datasetRef.dataId.values()]]
92 if refInfo.uri:
93 row.append(refInfo.uri)
94 rows.append(row)
96 return AstropyTable(array(rows), names=columnNames)
99def queryDatasets(repo, glob, collections, where, find_first, show_uri):
100 """Get dataset refs from a repository.
102 Parameters
103 ----------
104 repo : `str`
105 URI to the location of the repo or URI to a config file describing the
106 repo and its location.
107 glob : iterable [`str`]
108 A list of glob-style search string that fully or partially identify
109 the dataset type names to search for.
110 collections : iterable [`str`]
111 A list of glob-style search string that fully or partially identify
112 the collections to search for.
113 where : `str`
114 A string expression similar to a SQL WHERE clause. May involve any
115 column of a dimension table or (as a shortcut for the primary key
116 column of a dimension table) dimension name.
117 find_first : `bool`
118 For each result data ID, only yield one DatasetRef of each DatasetType,
119 from the first collection in which a dataset of that dataset type
120 appears (according to the order of `collections` passed in). If used,
121 `collections` must specify at least one expression and must not contain
122 wildcards.
123 show_uri : `bool`
124 If True, include the dataset URI in the output.
125 Returns
126 -------
127 datasetTables : `list` [``astropy.table._Table``]
128 A list of astropy tables, one for each dataset type.
129 """
130 butler = Butler(repo)
132 dataset = globToRegex(glob)
133 if not dataset:
134 dataset = ...
136 if collections and not find_first:
137 collections = globToRegex(collections)
138 elif not collections:
139 collections = ...
141 datasets = butler.registry.queryDatasets(datasetType=dataset,
142 collections=collections,
143 where=where,
144 findFirst=find_first)
146 tables = defaultdict(_Table)
148 for datasetRef in datasets:
149 if not show_uri:
150 tables[datasetRef.datasetType.name].add(datasetRef)
151 else:
152 primaryURI, componentURIs = butler.getURIs(datasetRef, collections=datasetRef.run)
153 if primaryURI:
154 tables[datasetRef.datasetType.name].add(datasetRef, primaryURI)
155 for name, uri in componentURIs.items():
156 tables[datasetRef.datasetType.componentTypeName(name)].add(datasetRef, uri)
158 return [table.getAstropyTable(datasetTypeName) for datasetTypeName, table in tables.items()]