Coverage for python/lsst/daf/butler/script/queryDataIds.py: 15%

61 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-05 02:53 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29import logging 

30from collections.abc import Iterable 

31from types import EllipsisType 

32from typing import TYPE_CHECKING 

33 

34import numpy as np 

35from astropy.table import Table as AstropyTable 

36 

37from .._butler import Butler 

38from ..cli.utils import sortAstropyTable 

39from ..dimensions import DataCoordinate 

40 

41if TYPE_CHECKING: 

42 from lsst.daf.butler import DimensionGroup 

43 

44_LOG = logging.getLogger(__name__) 

45 

46 

47class _Table: 

48 """Aggregates DataIds and creates an astropy table with one DataId per 

49 row. Eliminates duplicate rows. 

50 

51 Parameters 

52 ---------- 

53 dataIds : `iterable` [ ``DataId`` ] 

54 The DataIds to add to the table. 

55 """ 

56 

57 def __init__(self, dataIds: Iterable[DataCoordinate]): 

58 # use dict to store dataIds as keys to preserve ordering 

59 self.dataIds = dict.fromkeys(dataIds) 

60 

61 def getAstropyTable(self, order: bool) -> AstropyTable: 

62 """Get the table as an astropy table. 

63 

64 Parameters 

65 ---------- 

66 order : `bool` 

67 If True then order rows based on DataIds. 

68 

69 Returns 

70 ------- 

71 table : `astropy.table.Table` 

72 The dataIds, sorted by spatial and temporal columns first, and then 

73 the rest of the columns, with duplicate dataIds removed. 

74 """ 

75 # Should never happen; adding a dataset should be the action that 

76 # causes a _Table to be created. 

77 if not self.dataIds: 

78 raise RuntimeError("No DataIds were provided.") 

79 

80 dataId = next(iter(self.dataIds)) 

81 dimensions = [dataId.universe.dimensions[k] for k in dataId.dimensions.data_coordinate_keys] 

82 columnNames = [str(item) for item in dimensions] 

83 

84 # Need to hint the column types for numbers since the per-row 

85 # constructor of Table does not work this out on its own and sorting 

86 # will not work properly without. 

87 typeMap = {float: np.float64, int: np.int64} 

88 columnTypes = [typeMap.get(type(value)) for value in dataId.full_values] 

89 

90 rows = [dataId.full_values for dataId in self.dataIds] 

91 

92 table = AstropyTable(np.array(rows), names=columnNames, dtype=columnTypes) 

93 if order: 

94 table = sortAstropyTable(table, dimensions) 

95 return table 

96 

97 

98def queryDataIds( 

99 repo: str, 

100 dimensions: Iterable[str], 

101 datasets: tuple[str, ...], 

102 where: str, 

103 collections: Iterable[str], 

104 order_by: tuple[str, ...], 

105 limit: int, 

106 offset: int, 

107) -> tuple[AstropyTable | None, str | None]: 

108 """Query for data IDs. 

109 

110 Parameters 

111 ---------- 

112 repo : `str` 

113 Butler location. 

114 dimensions : `~collections.abc.Iterable` of `str` 

115 Dimensions to use for query. 

116 datasets : `tuple` of `str` 

117 Dataset types to restrict query by. 

118 where : `str` 

119 Query string. 

120 collections : `~collections.abc.Iterable` of `str` 

121 Collections to search. 

122 order_by : `tuple` of `str` 

123 Columns to order results by. 

124 limit : `int` 

125 Maximum number of results. 

126 offset : `int` 

127 Offset into the results. 

128 

129 Notes 

130 ----- 

131 Docstring for supported parameters is the same as 

132 `~lsst.daf.butler.Registry.queryDataIds`. 

133 """ 

134 butler = Butler.from_config(repo, without_datastore=True) 

135 

136 if datasets and collections and not dimensions: 

137 # Determine the dimensions relevant to all given dataset types. 

138 # Since we are going to AND together all dimensions, we can not 

139 # seed the result with an empty set. 

140 dataset_type_dimensions: DimensionGroup | None = None 

141 dataset_types = list(butler.registry.queryDatasetTypes(datasets)) 

142 for dataset_type in dataset_types: 

143 if dataset_type_dimensions is None: 

144 # Seed with dimensions of first dataset type. 

145 dataset_type_dimensions = dataset_type.dimensions.as_group() 

146 else: 

147 # Only retain dimensions that are in the current 

148 # set AND the set from this dataset type. 

149 dataset_type_dimensions = dataset_type_dimensions.intersection( 

150 dataset_type.dimensions.as_group() 

151 ) 

152 _LOG.debug("Dimensions now %s from %s", set(dataset_type_dimensions.names), dataset_type.name) 

153 

154 # Break out of the loop early. No additional dimensions 

155 # can be added to an empty set when using AND. 

156 if not dataset_type_dimensions: 

157 break 

158 

159 if not dataset_type_dimensions: 

160 names = [d.name for d in dataset_types] 

161 return None, f"No dimensions in common for specified dataset types ({names})" 

162 dimensions = set(dataset_type_dimensions.names) 

163 _LOG.info("Determined dimensions %s from datasets option %s", dimensions, datasets) 

164 

165 query_collections: Iterable[str] | EllipsisType | None = None 

166 if datasets: 

167 query_collections = collections or ... 

168 results = butler.registry.queryDataIds( 

169 dimensions, datasets=datasets, where=where, collections=query_collections 

170 ) 

171 

172 if order_by: 

173 results = results.order_by(*order_by) 

174 if limit > 0: 

175 new_offset = offset if offset > 0 else None 

176 results = results.limit(limit, new_offset) 

177 

178 if results.any(exact=False): 

179 if results.dimensions: 

180 table = _Table(results) 

181 if not table.dataIds: 

182 return None, "Post-query region filtering removed all rows, since nothing overlapped." 

183 return table.getAstropyTable(not order_by), None 

184 else: 

185 return None, "Result has one logical row but no columns because no dimensions were requested." 

186 else: 

187 return None, "\n".join(results.explain_no_results())