Coverage for python/lsst/daf/butler/script/queryDataIds.py: 15%
60 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-02 02:16 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-02 02:16 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23import logging
24from collections.abc import Iterable
25from typing import TYPE_CHECKING
27import numpy as np
28from astropy.table import Table as AstropyTable
29from lsst.utils.ellipsis import Ellipsis, EllipsisType
31from .._butler import Butler, DataCoordinate
32from ..cli.utils import sortAstropyTable
34if TYPE_CHECKING:
35 from lsst.daf.butler import DimensionGraph
37_LOG = logging.getLogger(__name__)
40class _Table:
41 """Aggregates DataIds and creates an astropy table with one DataId per
42 row. Eliminates duplicate rows.
44 Parameters
45 ----------
46 dataIds : `iterable` [ ``DataId`` ]
47 The DataIds to add to the table.
48 """
50 def __init__(self, dataIds: Iterable[DataCoordinate]):
51 # use dict to store dataIds as keys to preserve ordering
52 self.dataIds = dict.fromkeys(dataIds)
54 def getAstropyTable(self, order: bool) -> AstropyTable:
55 """Get the table as an astropy table.
57 Parameters
58 ----------
59 order : `bool`
60 If True then order rows based on DataIds.
62 Returns
63 -------
64 table : `astropy.table.Table`
65 The dataIds, sorted by spatial and temporal columns first, and then
66 the rest of the columns, with duplicate dataIds removed.
67 """
68 # Should never happen; adding a dataset should be the action that
69 # causes a _Table to be created.
70 if not self.dataIds:
71 raise RuntimeError("No DataIds were provided.")
73 dataId = next(iter(self.dataIds))
74 dimensions = list(dataId.full.keys())
75 columnNames = [str(item) for item in dimensions]
77 # Need to hint the column types for numbers since the per-row
78 # constructor of Table does not work this out on its own and sorting
79 # will not work properly without.
80 typeMap = {float: np.float64, int: np.int64}
81 columnTypes = [typeMap.get(type(value)) for value in dataId.full.values()]
83 rows = [[value for value in dataId.full.values()] for dataId in self.dataIds]
85 table = AstropyTable(np.array(rows), names=columnNames, dtype=columnTypes)
86 if order:
87 table = sortAstropyTable(table, dimensions)
88 return table
91def queryDataIds(
92 repo: str,
93 dimensions: Iterable[str],
94 datasets: tuple[str, ...],
95 where: str,
96 collections: Iterable[str],
97 order_by: tuple[str, ...],
98 limit: int,
99 offset: int,
100) -> tuple[AstropyTable | None, str | None]:
101 # Docstring for supported parameters is the same as Registry.queryDataIds
103 butler = Butler(repo)
105 if datasets and collections and not dimensions:
106 # Determine the dimensions relevant to all given dataset types.
107 # Since we are going to AND together all dimensions, we can not
108 # seed the result with an empty set.
109 graph: DimensionGraph | None = None
110 dataset_types = list(butler.registry.queryDatasetTypes(datasets))
111 for dataset_type in dataset_types:
112 if graph is None:
113 # Seed with dimensions of first dataset type.
114 graph = dataset_type.dimensions
115 else:
116 # Only retain dimensions that are in the current
117 # set AND the set from this dataset type.
118 graph = graph.intersection(dataset_type.dimensions)
119 _LOG.debug("Dimensions now %s from %s", set(graph.names), dataset_type.name)
121 # Break out of the loop early. No additional dimensions
122 # can be added to an empty set when using AND.
123 if not graph:
124 break
126 if not graph:
127 names = [d.name for d in dataset_types]
128 return None, f"No dimensions in common for specified dataset types ({names})"
129 dimensions = set(graph.names)
130 _LOG.info("Determined dimensions %s from datasets option %s", dimensions, datasets)
132 query_collections: Iterable[str] | EllipsisType | None = None
133 if datasets:
134 query_collections = collections if collections else Ellipsis
135 results = butler.registry.queryDataIds(
136 dimensions, datasets=datasets, where=where, collections=query_collections
137 )
139 if order_by:
140 results = results.order_by(*order_by)
141 if limit > 0:
142 new_offset = offset if offset > 0 else None
143 results = results.limit(limit, new_offset)
145 if results.any(exact=False):
146 if results.graph:
147 table = _Table(results)
148 if not table.dataIds:
149 return None, "Post-query region filtering removed all rows, since nothing overlapped."
150 return table.getAstropyTable(not order_by), None
151 else:
152 return None, "Result has one logical row but no columns because no dimensions were requested."
153 else:
154 return None, "\n".join(results.explain_no_results())