Coverage for python/lsst/daf/butler/core/configSupport.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Support for configuration snippets"""
26__all__ = ("LookupKey", "processLookupConfigs",
27 "processLookupConfigList")
29import logging
30import re
31from collections.abc import Mapping
33from typing import (
34 TYPE_CHECKING,
35 Any,
36 Dict,
37 Iterable,
38 Optional,
39 Set,
40 Union,
41)
43from .dimensions import DimensionGraph
45if TYPE_CHECKING: 45 ↛ 46line 45 didn't jump to line 46, because the condition on line 45 was never true
46 from .dimensions import DimensionUniverse, Dimension
47 from .config import Config
49log = logging.getLogger(__name__)
51DATAID_RE = re.compile(r"([a-z_]+)<(.*)>$")
52"""Regex to find dataIds embedded in configurations."""
55class LookupKey:
56 """Representation of key that can be used to lookup information based
57 on dataset type name, storage class name, dimensions.
59 Parameters
60 ----------
61 name : `str`, optional
62 Primary index string for lookup. If this string looks like it
63 represents dimensions (via ``dim1+dim2+dim3`` syntax) the name
64 is converted to a `DimensionGraph` and stored in ``dimensions``
65 property.
66 dimensions : `DimensionGraph`, optional
67 Dimensions that are relevant for lookup. Should not be specified
68 if ``name`` is also specified.
69 dataId : `dict`, optional
70 Keys and values from a dataId that should control lookups.
71 universe : `DimensionUniverse`, optional
72 Set of all known dimensions, used to expand and validate ``name`` or
73 ``dimensions``. Required if the key represents dimensions and a
74 full `DimensionGraph` is not provided.
75 """
77 def __init__(self, name: Optional[str] = None,
78 dimensions: Optional[Iterable[Union[str, Dimension]]] = None,
79 dataId: Optional[Dict[str, Any]] = None, *, universe: Optional[DimensionUniverse] = None):
80 if name is None and dimensions is None: 80 ↛ 81line 80 didn't jump to line 81, because the condition on line 80 was never true
81 raise ValueError("At least one of name or dimensions must be given")
83 if name is not None and dimensions is not None: 83 ↛ 84line 83 didn't jump to line 84, because the condition on line 83 was never true
84 raise ValueError("Can only accept one of name or dimensions")
86 self._dimensions = None
87 self._name = None
89 if name is not None: 89 ↛ 105line 89 didn't jump to line 105, because the condition on line 89 was never false
91 if not isinstance(name, str): 91 ↛ 92line 91 didn't jump to line 92, because the condition on line 91 was never true
92 raise ValueError(f"Supplied name must be str not: '{name}'")
94 if "+" in name: 94 ↛ 97line 94 didn't jump to line 97, because the condition on line 94 was never true
95 # If we are given a single dimension we use the "+" to
96 # indicate this but have to filter out the empty value
97 dimension_names = [n for n in name.split("+") if n]
98 if universe is None:
99 raise ValueError(f"Cannot construct LookupKey for {name} without dimension universe.")
100 else:
101 self._dimensions = universe.extract(dimension_names)
102 else:
103 self._name = name
105 elif dimensions is not None:
106 if not isinstance(dimensions, DimensionGraph):
107 if universe is None:
108 raise ValueError(f"Cannot construct LookupKey for dimensions={dimensions} "
109 "without universe.")
110 else:
111 self._dimensions = universe.extract(dimensions)
112 else:
113 self._dimensions = dimensions
114 else:
115 # mypy cannot work this out on its own
116 raise ValueError("Name was None but dimensions is also None")
118 # The dataId is converted to a frozenset of key/value
119 # tuples so that it is not mutable
120 self._dataId = frozenset(dataId.items()) if dataId is not None else None
122 def __str__(self) -> str:
123 # For the simple case return the simple string
124 if self._name:
125 name = self._name
126 elif self._dimensions is not None:
127 name = "+".join(self._dimensions.names)
128 else:
129 raise RuntimeError("Internal error since name and dimensions are both None")
131 if not self._dataId:
132 return name
134 return f"{name} ({self.dataId})"
136 def __repr__(self) -> str:
137 params = ""
138 if self.name:
139 params += f"name={self.name!r},"
140 if self.dimensions:
141 params += f"dimensions={self.dimensions!r},"
142 if self._dataId:
143 params += "dataId={" + ",".join(f"'{k}': {v!r}" for k, v in self._dataId) + "}"
145 return f"{self.__class__.__name__}({params})"
147 def __eq__(self, other: Any) -> bool:
148 if not isinstance(other, type(self)):
149 return False
150 if self._name == other._name and self._dimensions == other._dimensions and \
151 self._dataId == other._dataId:
152 return True
153 return False
155 @property
156 def name(self) -> Optional[str]:
157 """Primary name string to use as lookup. (`str`)"""
158 return self._name
160 @property
161 def dimensions(self) -> Optional[DimensionGraph]:
162 """Dimensions associated with lookup. (`DimensionGraph`)"""
163 return self._dimensions
165 @property
166 def dataId(self) -> Optional[Dict[str, Any]]:
167 """Dict of keys/values that are important for dataId lookup.
168 (`dict` or `None`)"""
169 if self._dataId is not None:
170 return {k: v for k, v in self._dataId}
171 else:
172 return None
174 def __hash__(self) -> int:
175 """Hash the lookup to allow use as a key in a dict."""
176 return hash((self._name, self._dimensions, self._dataId))
178 def clone(self, name: Optional[str] = None, dimensions: Optional[DimensionGraph] = None,
179 dataId: Optional[Dict[str, Any]] = None) -> LookupKey:
180 """Clone the object, overriding some options.
182 Used to create a new instance of the object whilst updating
183 some of it.
185 Parameters
186 ----------
187 name : `str`, optional
188 Primary index string for lookup. Will override ``dimensions``
189 if ``dimensions`` are set.
190 dimensions : `DimensionGraph`, optional
191 Dimensions that are relevant for lookup. Will override ``name``
192 if ``name`` is already set.
193 dataId : `dict`, optional
194 Keys and values from a dataId that should control lookups.
196 Returns
197 -------
198 clone : `LookupKey`
199 Copy with updates.
200 """
201 if name is not None and dimensions is not None:
202 raise ValueError("Both name and dimensions can not be set")
204 # if neither name nor dimensions are specified we copy from current
205 # object. Otherwise we'll use the supplied values
206 if name is None and dimensions is None:
207 name = self._name
208 dimensions = self._dimensions
210 # Make sure we use the dict form for the constructor
211 if dataId is None and self._dataId is not None:
212 dataId = self.dataId
214 return self.__class__(name=name, dimensions=dimensions, dataId=dataId)
217def processLookupConfigs(config: Config, *,
218 allow_hierarchy: bool = False,
219 universe: Optional[DimensionUniverse] = None) -> Dict[LookupKey,
220 Union[str, Dict[str, Any]]]:
221 """Process sections of configuration relating to lookups by dataset type
222 name, storage class name, dimensions, or values of dimensions.
224 Parameters
225 ----------
226 config : `Config`
227 A `Config` representing a configuration mapping keys to values where
228 the keys can be dataset type names, storage class names, dimensions
229 or dataId components.
230 allow_hierarchy : `bool`, optional
231 If `True`, keys that refer to a hierarchy that does not look like
232 a DataID specification are allowed and the full hierarchy, as a dict,
233 will be returned in the value for the lookup key.
234 universe : `DimensionUniverse`, optional
235 Set of all known dimensions, used to expand and validate any used
236 in lookup keys.
238 Returns
239 -------
240 contents : `dict` of `LookupKey` to `str`
241 A `dict` with keys constructed from the configuration keys and values
242 being simple strings. It is assumed the caller will convert the
243 values to the required form.
245 Notes
246 -----
247 The configuration is a mapping where the keys correspond to names
248 that can refer to dataset type or storage class names, or can use a
249 special syntax to refer to dimensions or dataId values.
251 Dimensions are indicated by using dimension names separated by a ``+``.
252 If a single dimension is specified this is also supported so long as
253 a ``+`` is found. Dimensions are normalized before use such that if
254 ``physical_filter+visit`` is defined, then an implicit ``instrument``
255 will automatically be added.
257 DataID overrides can be specified using the form: ``field<value>`` to
258 indicate a subhierarchy. All keys within that new hierarchy will take
259 precedence over equivalent values in the root hierarchy.
261 Currently only a single dataId field can be specified for a key.
262 For example with a config such as:
264 .. code-block:: yaml
266 something:
267 calexp: value1
268 instrument<HSC>:
269 calexp: value2
271 Requesting the match for ``calexp`` would return ``value1`` unless
272 a `DatasetRef` is used with a dataId containing the key ``instrument``
273 and value ``HSC``.
275 The values of the mapping are stored as strings.
276 """
277 contents = {}
278 for name, value in config.items():
279 lookup = LookupKey(name=name, universe=universe)
281 if isinstance(value, Mapping):
282 # indicates a dataId component -- check the format
283 kv = DATAID_RE.match(name)
284 if kv:
285 dataIdKey = kv.group(1)
286 dataIdValue = kv.group(2)
287 for subKey, subStr in value.items():
288 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe)
289 contents[lookup] = subStr
290 elif allow_hierarchy:
291 contents[lookup] = value
292 else:
293 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'")
294 else:
295 contents[lookup] = value
297 return contents
300def processLookupConfigList(config: Iterable[Union[str, Mapping]],
301 *, universe: Optional[DimensionUniverse] = None) -> Set[LookupKey]:
302 """Process sections of configuration relating to lookups by dataset type
303 name, storage class name, dimensions, or values of dimensions.
305 Parameters
306 ----------
307 config : `list` of `str` or `dict`
308 Contents of a configuration listing keys that can be
309 dataset type names, storage class names, dimensions
310 or dataId components. DataId components are represented as entries
311 in the `list` of `dicts` with a single key with a value of a `list`
312 of new keys.
313 universe : `DimensionUniverse`, optional
314 Set of all known dimensions, used to expand and validate any used
315 in lookup keys.
317 Returns
318 -------
319 lookups : `set` of `LookupKey`
320 All the entries in the input list converted to `LookupKey` and
321 returned in a `set`.
323 Notes
324 -----
325 Keys are parsed as described in `processLookupConfigs`.
326 """
327 contents = set()
329 for name in config:
330 if isinstance(name, Mapping):
331 if len(name) != 1:
332 raise RuntimeError(f"Config dict entry {name} has more than key present")
333 for dataIdLookUp, subKeys in name.items():
334 kv = DATAID_RE.match(dataIdLookUp)
335 if kv:
336 dataIdKey = kv.group(1)
337 dataIdValue = kv.group(2)
338 for subKey in subKeys:
339 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe)
340 contents.add(lookup)
341 else:
342 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'")
343 else:
344 contents.add(LookupKey(name=name, universe=universe))
346 return contents