Coverage for python/lsst/daf/butler/script/queryDataIds.py: 12%
50 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-29 02:20 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-29 02:20 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import logging
24import numpy as np
25from astropy.table import Table as AstropyTable
27from .._butler import Butler
28from ..cli.utils import sortAstropyTable
30_LOG = logging.getLogger(__name__)
33class _Table:
34 """Aggregates DataIds and creates an astropy table with one DataId per
35 row. Eliminates duplicate rows.
37 Parameters
38 ----------
39 dataIds : `iterable` [ ``DataId`` ]
40 The DataIds to add to the table.
41 """
43 def __init__(self, dataIds):
44 # use dict to store dataIds as keys to preserve ordering
45 self.dataIds = dict.fromkeys(dataIds)
47 def getAstropyTable(self, order):
48 """Get the table as an astropy table.
50 Returns
51 -------
52 table : `astropy.table.Table`
53 The dataIds, sorted by spatial and temporal columns first, and then
54 the rest of the columns, with duplicate dataIds removed.
55 order : `bool`
56 If True then order rows based on DataIds.
57 """
58 # Should never happen; adding a dataset should be the action that
59 # causes a _Table to be created.
60 if not self.dataIds:
61 raise RuntimeError("No DataIds were provided.")
63 dataId = next(iter(self.dataIds))
64 dimensions = list(dataId.full.keys())
65 columnNames = [str(item) for item in dimensions]
67 # Need to hint the column types for numbers since the per-row
68 # constructor of Table does not work this out on its own and sorting
69 # will not work properly without.
70 typeMap = {float: np.float64, int: np.int64}
71 columnTypes = [typeMap.get(type(value)) for value in dataId.full.values()]
73 rows = [[value for value in dataId.full.values()] for dataId in self.dataIds]
75 table = AstropyTable(np.array(rows), names=columnNames, dtype=columnTypes)
76 if order:
77 table = sortAstropyTable(table, dimensions)
78 return table
81def queryDataIds(repo, dimensions, datasets, where, collections, order_by, limit, offset):
82 # Docstring for supported parameters is the same as Registry.queryDataIds
84 butler = Butler(repo)
86 if datasets and collections and not dimensions:
87 # Determine the dimensions relevant to all given dataset types.
88 # Since we are going to AND together all dimensions, we can not
89 # seed the result with an empty set.
90 graph = None
91 dataset_types = list(butler.registry.queryDatasetTypes(datasets))
92 for dataset_type in dataset_types:
93 if graph is None:
94 # Seed with dimensions of first dataset type.
95 graph = dataset_type.dimensions
96 else:
97 # Only retain dimensions that are in the current
98 # set AND the set from this dataset type.
99 graph = graph.intersection(dataset_type.dimensions)
100 _LOG.debug("Dimensions now %s from %s", set(graph.names), dataset_type.name)
102 # Break out of the loop early. No additional dimensions
103 # can be added to an empty set when using AND.
104 if not graph:
105 break
107 if not graph:
108 names = [d.name for d in dataset_types]
109 return None, f"No dimensions in common for specified dataset types ({names})"
110 dimensions = set(graph.names)
111 _LOG.info("Determined dimensions %s from datasets option %s", dimensions, datasets)
113 results = butler.registry.queryDataIds(
114 dimensions, datasets=datasets, where=where, collections=collections
115 )
117 if order_by:
118 results.order_by(*order_by)
119 if limit > 0:
120 if offset <= 0:
121 offset = None
122 results.limit(limit, offset)
124 if results.count() > 0 and len(results.graph) > 0:
125 table = _Table(results)
126 return table.getAstropyTable(not order_by), None
127 else:
128 return None, "\n".join(results.explain_no_results())