Coverage for python/lsst/daf/butler/script/queryDataIds.py: 12%

50 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-26 02:02 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import logging 

23 

24import numpy as np 

25from astropy.table import Table as AstropyTable 

26 

27from .._butler import Butler 

28from ..cli.utils import sortAstropyTable 

29 

30_LOG = logging.getLogger(__name__) 

31 

32 

33class _Table: 

34 """Aggregates DataIds and creates an astropy table with one DataId per 

35 row. Eliminates duplicate rows. 

36 

37 Parameters 

38 ---------- 

39 dataIds : `iterable` [ ``DataId`` ] 

40 The DataIds to add to the table. 

41 """ 

42 

43 def __init__(self, dataIds): 

44 # use dict to store dataIds as keys to preserve ordering 

45 self.dataIds = dict.fromkeys(dataIds) 

46 

47 def getAstropyTable(self, order): 

48 """Get the table as an astropy table. 

49 

50 Returns 

51 ------- 

52 table : `astropy.table.Table` 

53 The dataIds, sorted by spatial and temporal columns first, and then 

54 the rest of the columns, with duplicate dataIds removed. 

55 order : `bool` 

56 If True then order rows based on DataIds. 

57 """ 

58 # Should never happen; adding a dataset should be the action that 

59 # causes a _Table to be created. 

60 if not self.dataIds: 

61 raise RuntimeError("No DataIds were provided.") 

62 

63 dataId = next(iter(self.dataIds)) 

64 dimensions = list(dataId.full.keys()) 

65 columnNames = [str(item) for item in dimensions] 

66 

67 # Need to hint the column types for numbers since the per-row 

68 # constructor of Table does not work this out on its own and sorting 

69 # will not work properly without. 

70 typeMap = {float: np.float64, int: np.int64} 

71 columnTypes = [typeMap.get(type(value)) for value in dataId.full.values()] 

72 

73 rows = [[value for value in dataId.full.values()] for dataId in self.dataIds] 

74 

75 table = AstropyTable(np.array(rows), names=columnNames, dtype=columnTypes) 

76 if order: 

77 table = sortAstropyTable(table, dimensions) 

78 return table 

79 

80 

81def queryDataIds(repo, dimensions, datasets, where, collections, order_by, limit, offset): 

82 # Docstring for supported parameters is the same as Registry.queryDataIds 

83 

84 butler = Butler(repo) 

85 

86 if datasets and collections and not dimensions: 

87 # Determine the dimensions relevant to all given dataset types. 

88 # Since we are going to AND together all dimensions, we can not 

89 # seed the result with an empty set. 

90 graph = None 

91 dataset_types = list(butler.registry.queryDatasetTypes(datasets)) 

92 for dataset_type in dataset_types: 

93 if graph is None: 

94 # Seed with dimensions of first dataset type. 

95 graph = dataset_type.dimensions 

96 else: 

97 # Only retain dimensions that are in the current 

98 # set AND the set from this dataset type. 

99 graph = graph.intersection(dataset_type.dimensions) 

100 _LOG.debug("Dimensions now %s from %s", set(graph.names), dataset_type.name) 

101 

102 # Break out of the loop early. No additional dimensions 

103 # can be added to an empty set when using AND. 

104 if not graph: 

105 break 

106 

107 if not graph: 

108 names = [d.name for d in dataset_types] 

109 return None, f"No dimensions in common for specified dataset types ({names})" 

110 dimensions = set(graph.names) 

111 _LOG.info("Determined dimensions %s from datasets option %s", dimensions, datasets) 

112 

113 results = butler.registry.queryDataIds( 

114 dimensions, datasets=datasets, where=where, collections=collections 

115 ) 

116 

117 if order_by: 

118 results.order_by(*order_by) 

119 if limit > 0: 

120 if offset <= 0: 

121 offset = None 

122 results.limit(limit, offset) 

123 

124 if results.count() > 0 and len(results.graph) > 0: 

125 table = _Table(results) 

126 return table.getAstropyTable(not order_by), None 

127 else: 

128 return None, "\n".join(results.explain_no_results())