Coverage for python/lsst/daf/butler/script/queryDataIds.py: 15%

61 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-01 11:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29import logging 

30from collections.abc import Iterable 

31from types import EllipsisType 

32from typing import TYPE_CHECKING 

33 

34import numpy as np 

35from astropy.table import Table as AstropyTable 

36 

37from .._butler import Butler 

38from ..cli.utils import sortAstropyTable 

39from ..dimensions import DataCoordinate 

40 

41if TYPE_CHECKING: 

42 from lsst.daf.butler import DimensionGroup 

43 

44_LOG = logging.getLogger(__name__) 

45 

46 

47class _Table: 

48 """Aggregates DataIds and creates an astropy table with one DataId per 

49 row. Eliminates duplicate rows. 

50 

51 Parameters 

52 ---------- 

53 dataIds : `iterable` [ ``DataId`` ] 

54 The DataIds to add to the table. 

55 """ 

56 

57 def __init__(self, dataIds: Iterable[DataCoordinate]): 

58 # use dict to store dataIds as keys to preserve ordering 

59 self.dataIds = dict.fromkeys(dataIds) 

60 

61 def getAstropyTable(self, order: bool) -> AstropyTable: 

62 """Get the table as an astropy table. 

63 

64 Parameters 

65 ---------- 

66 order : `bool` 

67 If True then order rows based on DataIds. 

68 

69 Returns 

70 ------- 

71 table : `astropy.table.Table` 

72 The dataIds, sorted by spatial and temporal columns first, and then 

73 the rest of the columns, with duplicate dataIds removed. 

74 """ 

75 # Should never happen; adding a dataset should be the action that 

76 # causes a _Table to be created. 

77 if not self.dataIds: 

78 raise RuntimeError("No DataIds were provided.") 

79 

80 dataId = next(iter(self.dataIds)) 

81 dimensions = [dataId.universe.dimensions[k] for k in dataId.dimensions.data_coordinate_keys] 

82 columnNames = [str(item) for item in dimensions] 

83 

84 # Need to hint the column types for numbers since the per-row 

85 # constructor of Table does not work this out on its own and sorting 

86 # will not work properly without. 

87 typeMap = {float: np.float64, int: np.int64} 

88 columnTypes = [typeMap.get(type(value)) for value in dataId.full_values] 

89 

90 rows = [dataId.full_values for dataId in self.dataIds] 

91 

92 table = AstropyTable(np.array(rows), names=columnNames, dtype=columnTypes) 

93 if order: 

94 table = sortAstropyTable(table, dimensions) 

95 return table 

96 

97 

98def queryDataIds( 

99 repo: str, 

100 dimensions: Iterable[str], 

101 datasets: tuple[str, ...], 

102 where: str, 

103 collections: Iterable[str], 

104 order_by: tuple[str, ...], 

105 limit: int, 

106 offset: int, 

107) -> tuple[AstropyTable | None, str | None]: 

108 """Query for data IDs. 

109 

110 Docstring for supported parameters is the same as 

111 `~lsst.daf.butler.Registry.queryDataIds`. 

112 """ 

113 butler = Butler.from_config(repo, without_datastore=True) 

114 

115 if datasets and collections and not dimensions: 

116 # Determine the dimensions relevant to all given dataset types. 

117 # Since we are going to AND together all dimensions, we can not 

118 # seed the result with an empty set. 

119 dataset_type_dimensions: DimensionGroup | None = None 

120 dataset_types = list(butler.registry.queryDatasetTypes(datasets)) 

121 for dataset_type in dataset_types: 

122 if dataset_type_dimensions is None: 

123 # Seed with dimensions of first dataset type. 

124 dataset_type_dimensions = dataset_type.dimensions.as_group() 

125 else: 

126 # Only retain dimensions that are in the current 

127 # set AND the set from this dataset type. 

128 dataset_type_dimensions = dataset_type_dimensions.intersection( 

129 dataset_type.dimensions.as_group() 

130 ) 

131 _LOG.debug("Dimensions now %s from %s", set(dataset_type_dimensions.names), dataset_type.name) 

132 

133 # Break out of the loop early. No additional dimensions 

134 # can be added to an empty set when using AND. 

135 if not dataset_type_dimensions: 

136 break 

137 

138 if not dataset_type_dimensions: 

139 names = [d.name for d in dataset_types] 

140 return None, f"No dimensions in common for specified dataset types ({names})" 

141 dimensions = set(dataset_type_dimensions.names) 

142 _LOG.info("Determined dimensions %s from datasets option %s", dimensions, datasets) 

143 

144 query_collections: Iterable[str] | EllipsisType | None = None 

145 if datasets: 

146 query_collections = collections or ... 

147 results = butler.registry.queryDataIds( 

148 dimensions, datasets=datasets, where=where, collections=query_collections 

149 ) 

150 

151 if order_by: 

152 results = results.order_by(*order_by) 

153 if limit > 0: 

154 new_offset = offset if offset > 0 else None 

155 results = results.limit(limit, new_offset) 

156 

157 if results.any(exact=False): 

158 if results.dimensions: 

159 table = _Table(results) 

160 if not table.dataIds: 

161 return None, "Post-query region filtering removed all rows, since nothing overlapped." 

162 return table.getAstropyTable(not order_by), None 

163 else: 

164 return None, "Result has one logical row but no columns because no dimensions were requested." 

165 else: 

166 return None, "\n".join(results.explain_no_results())