Coverage for python/lsst/daf/butler/script/queryDataIds.py: 15%

60 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-18 09:13 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23import logging 

24from collections.abc import Iterable 

25from typing import TYPE_CHECKING 

26 

27import numpy as np 

28from astropy.table import Table as AstropyTable 

29from lsst.utils.ellipsis import Ellipsis, EllipsisType 

30 

31from .._butler import Butler, DataCoordinate 

32from ..cli.utils import sortAstropyTable 

33 

34if TYPE_CHECKING: 

35 from lsst.daf.butler import DimensionGraph 

36 

37_LOG = logging.getLogger(__name__) 

38 

39 

40class _Table: 

41 """Aggregates DataIds and creates an astropy table with one DataId per 

42 row. Eliminates duplicate rows. 

43 

44 Parameters 

45 ---------- 

46 dataIds : `iterable` [ ``DataId`` ] 

47 The DataIds to add to the table. 

48 """ 

49 

50 def __init__(self, dataIds: Iterable[DataCoordinate]): 

51 # use dict to store dataIds as keys to preserve ordering 

52 self.dataIds = dict.fromkeys(dataIds) 

53 

54 def getAstropyTable(self, order: bool) -> AstropyTable: 

55 """Get the table as an astropy table. 

56 

57 Parameters 

58 ---------- 

59 order : `bool` 

60 If True then order rows based on DataIds. 

61 

62 Returns 

63 ------- 

64 table : `astropy.table.Table` 

65 The dataIds, sorted by spatial and temporal columns first, and then 

66 the rest of the columns, with duplicate dataIds removed. 

67 """ 

68 # Should never happen; adding a dataset should be the action that 

69 # causes a _Table to be created. 

70 if not self.dataIds: 

71 raise RuntimeError("No DataIds were provided.") 

72 

73 dataId = next(iter(self.dataIds)) 

74 dimensions = list(dataId.full.keys()) 

75 columnNames = [str(item) for item in dimensions] 

76 

77 # Need to hint the column types for numbers since the per-row 

78 # constructor of Table does not work this out on its own and sorting 

79 # will not work properly without. 

80 typeMap = {float: np.float64, int: np.int64} 

81 columnTypes = [typeMap.get(type(value)) for value in dataId.full.values()] 

82 

83 rows = [[value for value in dataId.full.values()] for dataId in self.dataIds] 

84 

85 table = AstropyTable(np.array(rows), names=columnNames, dtype=columnTypes) 

86 if order: 

87 table = sortAstropyTable(table, dimensions) 

88 return table 

89 

90 

91def queryDataIds( 

92 repo: str, 

93 dimensions: Iterable[str], 

94 datasets: tuple[str, ...], 

95 where: str, 

96 collections: Iterable[str], 

97 order_by: tuple[str, ...], 

98 limit: int, 

99 offset: int, 

100) -> tuple[AstropyTable | None, str | None]: 

101 # Docstring for supported parameters is the same as Registry.queryDataIds 

102 

103 butler = Butler(repo) 

104 

105 if datasets and collections and not dimensions: 

106 # Determine the dimensions relevant to all given dataset types. 

107 # Since we are going to AND together all dimensions, we can not 

108 # seed the result with an empty set. 

109 graph: DimensionGraph | None = None 

110 dataset_types = list(butler.registry.queryDatasetTypes(datasets)) 

111 for dataset_type in dataset_types: 

112 if graph is None: 

113 # Seed with dimensions of first dataset type. 

114 graph = dataset_type.dimensions 

115 else: 

116 # Only retain dimensions that are in the current 

117 # set AND the set from this dataset type. 

118 graph = graph.intersection(dataset_type.dimensions) 

119 _LOG.debug("Dimensions now %s from %s", set(graph.names), dataset_type.name) 

120 

121 # Break out of the loop early. No additional dimensions 

122 # can be added to an empty set when using AND. 

123 if not graph: 

124 break 

125 

126 if not graph: 

127 names = [d.name for d in dataset_types] 

128 return None, f"No dimensions in common for specified dataset types ({names})" 

129 dimensions = set(graph.names) 

130 _LOG.info("Determined dimensions %s from datasets option %s", dimensions, datasets) 

131 

132 query_collections: Iterable[str] | EllipsisType | None = None 

133 if datasets: 

134 query_collections = collections if collections else Ellipsis 

135 results = butler.registry.queryDataIds( 

136 dimensions, datasets=datasets, where=where, collections=query_collections 

137 ) 

138 

139 if order_by: 

140 results = results.order_by(*order_by) 

141 if limit > 0: 

142 new_offset = offset if offset > 0 else None 

143 results = results.limit(limit, new_offset) 

144 

145 if results.any(exact=False): 

146 if results.graph: 

147 table = _Table(results) 

148 if not table.dataIds: 

149 return None, "Post-query region filtering removed all rows, since nothing overlapped." 

150 return table.getAstropyTable(not order_by), None 

151 else: 

152 return None, "Result has one logical row but no columns because no dimensions were requested." 

153 else: 

154 return None, "\n".join(results.explain_no_results())