Coverage for python/lsst/daf/butler/registry/queries/expressions/categorize.py: 11%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

63 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = () # all symbols intentionally private; for internal package use. 

24 

25import enum 

26from typing import ( 

27 Optional, 

28 Tuple, 

29) 

30 

31from ....core import ( 

32 DimensionUniverse, 

33 Dimension, 

34 DimensionElement, 

35 DimensionGraph, 

36) 

37 

38 

39class ExpressionConstant(enum.Enum): 

40 """Enumeration for constants recognized in all expressions. 

41 """ 

42 NULL = "null" 

43 INGEST_DATE = "ingest_date" 

44 

45 

46def categorizeConstant(name: str) -> Optional[ExpressionConstant]: 

47 """Categorize an identifier in a parsed expression as one of a few global 

48 constants. 

49 

50 Parameters 

51 ---------- 

52 name : `str` 

53 Identifier to categorize. Case-insensitive. 

54 

55 Returns 

56 ------- 

57 categorized : `ExpressionConstant` or `None` 

58 Enumeration value if the string represents a constant, `None` 

59 otherwise. 

60 """ 

61 try: 

62 return ExpressionConstant(name.lower()) 

63 except ValueError: 

64 return None 

65 

66 

67def categorizeElementId(universe: DimensionUniverse, name: str) -> Tuple[DimensionElement, Optional[str]]: 

68 """Categorize an identifier in a parsed expression as either a `Dimension` 

69 name (indicating the primary key for that dimension) or a non-primary-key 

70 column in a `DimensionElement` table. 

71 

72 Parameters 

73 ---------- 

74 universe : `DimensionUniverse` 

75 All known dimensions. 

76 name : `str` 

77 Identifier to categorize. 

78 

79 Returns 

80 ------- 

81 element : `DimensionElement` 

82 The `DimensionElement` the identifier refers to. 

83 column : `str` or `None` 

84 The name of a column in the table for ``element``, or `None` if 

85 ``element`` is a `Dimension` and the requested column is its primary 

86 key. 

87 

88 Raises 

89 ------ 

90 LookupError 

91 Raised if the identifier refers to a nonexistent `DimensionElement` 

92 or column. 

93 RuntimeError 

94 Raised if the expression refers to a primary key in an illegal way. 

95 This exception includes a suggestion for how to rewrite the expression, 

96 so at least its message should generally be propagated up to a context 

97 where the error can be interpreted by a human. 

98 """ 

99 table, sep, column = name.partition('.') 

100 if column: 

101 try: 

102 element = universe[table] 

103 except KeyError as err: 

104 raise LookupError(f"No dimension element with name '{table}'.") from err 

105 if isinstance(element, Dimension) and column == element.primaryKey.name: 

106 # Allow e.g. "visit.id = x" instead of just "visit = x"; this 

107 # can be clearer. 

108 return element, None 

109 elif column in element.graph.names: 

110 # User said something like "patch.tract = x" or 

111 # "tract.tract = x" instead of just "tract = x" or 

112 # "tract.id = x", which is at least needlessly confusing and 

113 # possibly not actually a column name, though we can guess 

114 # what they were trying to do. 

115 # Encourage them to clean that up and try again. 

116 raise RuntimeError( 

117 f"Invalid reference to '{table}.{column}' " # type: ignore 

118 f"in expression; please use '{column}' or " 

119 f"'{column}.{universe[column].primaryKey.name}' instead." 

120 ) 

121 else: 

122 return element, column 

123 else: 

124 try: 

125 dimension = universe[table] 

126 except KeyError as err: 

127 raise LookupError(f"No dimension with name '{table}'.") from err 

128 return dimension, None 

129 

130 

131def categorizeOrderByName(graph: DimensionGraph, name: str) -> Tuple[DimensionElement, Optional[str]]: 

132 """Categorize an identifier in an ORDER BY clause. 

133 

134 Parameters 

135 ---------- 

136 graph : `DimensionGraph` 

137 All known dimensions. 

138 name : `str` 

139 Identifier to categorize. 

140 

141 Returns 

142 ------- 

143 element : `DimensionElement` 

144 The `DimensionElement` the identifier refers to. 

145 column : `str` or `None` 

146 The name of a column in the table for ``element``, or `None` if 

147 ``element`` is a `Dimension` and the requested column is its primary 

148 key. 

149 

150 Raises 

151 ------ 

152 ValueError 

153 Raised if element name is not found in a graph, metadata name is not 

154 recognized, or if there is more than one element has specified 

155 metadata. 

156 

157 Notes 

158 ----- 

159 For ORDER BY identifiers we use slightly different set of rules compared to 

160 the rules in `categorizeElementId`: 

161 

162 - Name can be a dimension element name. e.g. ``visit``. 

163 - Name can be an element name and a metadata name (or key name) separated 

164 by dot, e.g. ``detector.full_name``. 

165 - Name can be a metadata name without element name prefix, e.g. 

166 ``day_obs``; in that case metadata (or key) is searched in all elements 

167 present in a graph. Exception is raised if name appears in more than one 

168 element. 

169 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be 

170 used with temporal elements, if element name is not given then a temporal 

171 element from a graph is used. 

172 """ 

173 element: DimensionElement 

174 field_name: Optional[str] = None 

175 if name in ("timespan.begin", "timespan.end"): 

176 matches = [element for element in graph.elements if element.temporal] 

177 if len(matches) == 1: 

178 element = matches[0] 

179 field_name = name 

180 elif len(matches) > 1: 

181 raise ValueError( 

182 f"Timespan exists in more than one dimesion element: {matches}," 

183 " qualify timespan with specific dimension name.") 

184 else: 

185 raise ValueError( 

186 f"Cannot find any temporal dimension element for '{name}'.") 

187 elif "." not in name: 

188 # No dot, can be either a dimension name or a field name (in any of 

189 # the known elements) 

190 if name in graph.elements.names: 

191 element = graph.elements[name] 

192 else: 

193 # Can be a metadata name or any of unique keys 

194 matches = [elem for elem in graph.elements if name in elem.metadata.names] 

195 matches += [dim for dim in graph if name in dim.uniqueKeys.names] 

196 if len(matches) == 1: 

197 element = matches[0] 

198 field_name = name 

199 elif len(matches) > 1: 

200 raise ValueError( 

201 f"Metadata '{name}' exists in more than one dimension element: {matches}," 

202 " qualify metadata name with dimension name.") 

203 else: 

204 raise ValueError( 

205 f"Metadata '{name}' cannot be found in any dimension.") 

206 else: 

207 # qualified name, must be a dimension element and a field 

208 elem_name, _, field_name = name.partition(".") 

209 if elem_name not in graph.elements.names: 

210 raise ValueError(f"Unknown dimension element name '{elem_name}'") 

211 element = graph.elements[elem_name] 

212 if field_name in ("timespan.begin", "timespan.end"): 

213 if not element.temporal: 

214 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.") 

215 elif isinstance(element, Dimension) and field_name == element.primaryKey.name: 

216 # Primary key is optional 

217 field_name = None 

218 else: 

219 if not (field_name in element.metadata.names 

220 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names)): 

221 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.") 

222 

223 return element, field_name