Coverage for python/lsst/daf/butler/script/queryDataIds.py: 15%

60 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29import logging 

30from collections.abc import Iterable 

31from types import EllipsisType 

32from typing import TYPE_CHECKING 

33 

34import numpy as np 

35from astropy.table import Table as AstropyTable 

36 

37from .._butler import Butler, DataCoordinate 

38from ..cli.utils import sortAstropyTable 

39 

40if TYPE_CHECKING: 

41 from lsst.daf.butler import DimensionGraph 

42 

43_LOG = logging.getLogger(__name__) 

44 

45 

46class _Table: 

47 """Aggregates DataIds and creates an astropy table with one DataId per 

48 row. Eliminates duplicate rows. 

49 

50 Parameters 

51 ---------- 

52 dataIds : `iterable` [ ``DataId`` ] 

53 The DataIds to add to the table. 

54 """ 

55 

56 def __init__(self, dataIds: Iterable[DataCoordinate]): 

57 # use dict to store dataIds as keys to preserve ordering 

58 self.dataIds = dict.fromkeys(dataIds) 

59 

60 def getAstropyTable(self, order: bool) -> AstropyTable: 

61 """Get the table as an astropy table. 

62 

63 Parameters 

64 ---------- 

65 order : `bool` 

66 If True then order rows based on DataIds. 

67 

68 Returns 

69 ------- 

70 table : `astropy.table.Table` 

71 The dataIds, sorted by spatial and temporal columns first, and then 

72 the rest of the columns, with duplicate dataIds removed. 

73 """ 

74 # Should never happen; adding a dataset should be the action that 

75 # causes a _Table to be created. 

76 if not self.dataIds: 

77 raise RuntimeError("No DataIds were provided.") 

78 

79 dataId = next(iter(self.dataIds)) 

80 dimensions = list(dataId.full.keys()) 

81 columnNames = [str(item) for item in dimensions] 

82 

83 # Need to hint the column types for numbers since the per-row 

84 # constructor of Table does not work this out on its own and sorting 

85 # will not work properly without. 

86 typeMap = {float: np.float64, int: np.int64} 

87 columnTypes = [typeMap.get(type(value)) for value in dataId.full.values()] 

88 

89 rows = [list(dataId.full.values()) for dataId in self.dataIds] 

90 

91 table = AstropyTable(np.array(rows), names=columnNames, dtype=columnTypes) 

92 if order: 

93 table = sortAstropyTable(table, dimensions) 

94 return table 

95 

96 

97def queryDataIds( 

98 repo: str, 

99 dimensions: Iterable[str], 

100 datasets: tuple[str, ...], 

101 where: str, 

102 collections: Iterable[str], 

103 order_by: tuple[str, ...], 

104 limit: int, 

105 offset: int, 

106) -> tuple[AstropyTable | None, str | None]: 

107 """Query for data IDs. 

108 

109 Docstring for supported parameters is the same as 

110 `~lsst.daf.butler.Registry.queryDataIds`. 

111 """ 

112 butler = Butler(repo, without_datastore=True) 

113 

114 if datasets and collections and not dimensions: 

115 # Determine the dimensions relevant to all given dataset types. 

116 # Since we are going to AND together all dimensions, we can not 

117 # seed the result with an empty set. 

118 graph: DimensionGraph | None = None 

119 dataset_types = list(butler.registry.queryDatasetTypes(datasets)) 

120 for dataset_type in dataset_types: 

121 if graph is None: 

122 # Seed with dimensions of first dataset type. 

123 graph = dataset_type.dimensions 

124 else: 

125 # Only retain dimensions that are in the current 

126 # set AND the set from this dataset type. 

127 graph = graph.intersection(dataset_type.dimensions) 

128 _LOG.debug("Dimensions now %s from %s", set(graph.names), dataset_type.name) 

129 

130 # Break out of the loop early. No additional dimensions 

131 # can be added to an empty set when using AND. 

132 if not graph: 

133 break 

134 

135 if not graph: 

136 names = [d.name for d in dataset_types] 

137 return None, f"No dimensions in common for specified dataset types ({names})" 

138 dimensions = set(graph.names) 

139 _LOG.info("Determined dimensions %s from datasets option %s", dimensions, datasets) 

140 

141 query_collections: Iterable[str] | EllipsisType | None = None 

142 if datasets: 

143 query_collections = collections or ... 

144 results = butler.registry.queryDataIds( 

145 dimensions, datasets=datasets, where=where, collections=query_collections 

146 ) 

147 

148 if order_by: 

149 results = results.order_by(*order_by) 

150 if limit > 0: 

151 new_offset = offset if offset > 0 else None 

152 results = results.limit(limit, new_offset) 

153 

154 if results.any(exact=False): 

155 if results.graph: 

156 table = _Table(results) 

157 if not table.dataIds: 

158 return None, "Post-query region filtering removed all rows, since nothing overlapped." 

159 return table.getAstropyTable(not order_by), None 

160 else: 

161 return None, "Result has one logical row but no columns because no dimensions were requested." 

162 else: 

163 return None, "\n".join(results.explain_no_results())