Coverage for python/lsst/daf/butler/script/queryDataIds.py: 15%
61 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-25 10:24 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-25 10:24 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29import logging
30from collections.abc import Iterable
31from types import EllipsisType
32from typing import TYPE_CHECKING
34import numpy as np
35from astropy.table import Table as AstropyTable
37from .._butler import Butler
38from ..cli.utils import sortAstropyTable
39from ..dimensions import DataCoordinate
41if TYPE_CHECKING:
42 from lsst.daf.butler import DimensionGroup
44_LOG = logging.getLogger(__name__)
47class _Table:
48 """Aggregates DataIds and creates an astropy table with one DataId per
49 row. Eliminates duplicate rows.
51 Parameters
52 ----------
53 dataIds : `iterable` [ ``DataId`` ]
54 The DataIds to add to the table.
55 """
57 def __init__(self, dataIds: Iterable[DataCoordinate]):
58 # use dict to store dataIds as keys to preserve ordering
59 self.dataIds = dict.fromkeys(dataIds)
61 def getAstropyTable(self, order: bool) -> AstropyTable:
62 """Get the table as an astropy table.
64 Parameters
65 ----------
66 order : `bool`
67 If True then order rows based on DataIds.
69 Returns
70 -------
71 table : `astropy.table.Table`
72 The dataIds, sorted by spatial and temporal columns first, and then
73 the rest of the columns, with duplicate dataIds removed.
74 """
75 # Should never happen; adding a dataset should be the action that
76 # causes a _Table to be created.
77 if not self.dataIds:
78 raise RuntimeError("No DataIds were provided.")
80 dataId = next(iter(self.dataIds))
81 dimensions = [dataId.universe.dimensions[k] for k in dataId.dimensions.data_coordinate_keys]
82 columnNames = [str(item) for item in dimensions]
84 # Need to hint the column types for numbers since the per-row
85 # constructor of Table does not work this out on its own and sorting
86 # will not work properly without.
87 typeMap = {float: np.float64, int: np.int64}
88 columnTypes = [typeMap.get(type(value)) for value in dataId.full_values]
90 rows = [dataId.full_values for dataId in self.dataIds]
92 table = AstropyTable(np.array(rows), names=columnNames, dtype=columnTypes)
93 if order:
94 table = sortAstropyTable(table, dimensions)
95 return table
98def queryDataIds(
99 repo: str,
100 dimensions: Iterable[str],
101 datasets: tuple[str, ...],
102 where: str,
103 collections: Iterable[str],
104 order_by: tuple[str, ...],
105 limit: int,
106 offset: int,
107) -> tuple[AstropyTable | None, str | None]:
108 """Query for data IDs.
110 Parameters
111 ----------
112 repo : `str`
113 Butler location.
114 dimensions : `~collections.abc.Iterable` of `str`
115 Dimensions to use for query.
116 datasets : `tuple` of `str`
117 Dataset types to restrict query by.
118 where : `str`
119 Query string.
120 collections : `~collections.abc.Iterable` of `str`
121 Collections to search.
122 order_by : `tuple` of `str`
123 Columns to order results by.
124 limit : `int`
125 Maximum number of results.
126 offset : `int`
127 Offset into the results.
129 Notes
130 -----
131 Docstring for supported parameters is the same as
132 `~lsst.daf.butler.Registry.queryDataIds`.
133 """
134 butler = Butler.from_config(repo, without_datastore=True)
136 if datasets and collections and not dimensions:
137 # Determine the dimensions relevant to all given dataset types.
138 # Since we are going to AND together all dimensions, we can not
139 # seed the result with an empty set.
140 dataset_type_dimensions: DimensionGroup | None = None
141 dataset_types = list(butler.registry.queryDatasetTypes(datasets))
142 for dataset_type in dataset_types:
143 if dataset_type_dimensions is None:
144 # Seed with dimensions of first dataset type.
145 dataset_type_dimensions = dataset_type.dimensions.as_group()
146 else:
147 # Only retain dimensions that are in the current
148 # set AND the set from this dataset type.
149 dataset_type_dimensions = dataset_type_dimensions.intersection(
150 dataset_type.dimensions.as_group()
151 )
152 _LOG.debug("Dimensions now %s from %s", set(dataset_type_dimensions.names), dataset_type.name)
154 # Break out of the loop early. No additional dimensions
155 # can be added to an empty set when using AND.
156 if not dataset_type_dimensions:
157 break
159 if not dataset_type_dimensions:
160 names = [d.name for d in dataset_types]
161 return None, f"No dimensions in common for specified dataset types ({names})"
162 dimensions = set(dataset_type_dimensions.names)
163 _LOG.info("Determined dimensions %s from datasets option %s", dimensions, datasets)
165 query_collections: Iterable[str] | EllipsisType | None = None
166 if datasets:
167 query_collections = collections or ...
168 results = butler.registry.queryDataIds(
169 dimensions, datasets=datasets, where=where, collections=query_collections
170 )
172 if order_by:
173 results = results.order_by(*order_by)
174 if limit > 0:
175 new_offset = offset if offset > 0 else None
176 results = results.limit(limit, new_offset)
178 if results.any(exact=False):
179 if results.dimensions:
180 table = _Table(results)
181 if not table.dataIds:
182 return None, "Post-query region filtering removed all rows, since nothing overlapped."
183 return table.getAstropyTable(not order_by), None
184 else:
185 return None, "Result has one logical row but no columns because no dimensions were requested."
186 else:
187 return None, "\n".join(results.explain_no_results())