Coverage for python/lsst/daf/butler/core/configSupport.py: 23%
121 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-12 09:20 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-12 09:20 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for configuration snippets"""
24from __future__ import annotations
26__all__ = ("LookupKey", "processLookupConfigs", "processLookupConfigList")
28import logging
29import re
30from collections.abc import Iterable, Mapping
31from typing import TYPE_CHECKING, Any
33from .dimensions import DimensionGraph
35if TYPE_CHECKING:
36 from .config import Config
37 from .dimensions import Dimension, DimensionUniverse
39log = logging.getLogger(__name__)
41DATAID_RE = re.compile(r"([a-z_]+)<(.*)>$")
42"""Regex to find dataIds embedded in configurations."""
45class LookupKey:
46 """Representation of key that can be used to lookup information.
48 Look up is based on dataset type name, storage class name, dimensions.
50 Parameters
51 ----------
52 name : `str`, optional
53 Primary index string for lookup. If this string looks like it
54 represents dimensions (via ``dim1+dim2+dim3`` syntax) the name
55 is converted to a `DimensionGraph` and stored in ``dimensions``
56 property.
57 dimensions : `DimensionGraph`, optional
58 Dimensions that are relevant for lookup. Should not be specified
59 if ``name`` is also specified.
60 dataId : `dict`, optional
61 Keys and values from a dataId that should control lookups.
62 universe : `DimensionUniverse`, optional
63 Set of all known dimensions, used to expand and validate ``name`` or
64 ``dimensions``. Required if the key represents dimensions and a
65 full `DimensionGraph` is not provided.
66 """
68 def __init__(
69 self,
70 name: str | None = None,
71 dimensions: Iterable[str | Dimension] | None = None,
72 dataId: dict[str, Any] | None = None,
73 *,
74 universe: DimensionUniverse | None = None,
75 ):
76 if name is None and dimensions is None: 76 ↛ 77line 76 didn't jump to line 77, because the condition on line 76 was never true
77 raise ValueError("At least one of name or dimensions must be given")
79 if name is not None and dimensions is not None: 79 ↛ 80line 79 didn't jump to line 80, because the condition on line 79 was never true
80 raise ValueError("Can only accept one of name or dimensions")
82 self._dimensions = None
83 self._name = None
85 if name is not None: 85 ↛ 118line 85 didn't jump to line 118, because the condition on line 85 was never false
86 if not isinstance(name, str): 86 ↛ 87line 86 didn't jump to line 87, because the condition on line 86 was never true
87 raise ValueError(f"Supplied name must be str not: '{name}'")
89 if "+" in name: 89 ↛ 90line 89 didn't jump to line 90, because the condition on line 89 was never true
90 if universe is None:
91 raise ValueError(f"Cannot construct LookupKey for {name} without dimension universe.")
93 # If we are given a single dimension we use the "+" to
94 # indicate this but have to filter out the empty value
95 dimension_names = [n for n in name.split("+") if n]
96 try:
97 self._dimensions = universe.extract(dimension_names)
98 except KeyError:
99 # One or more of the dimensions is not known to the
100 # universe. This could be a typo or it could be that
101 # a config is being used that is not compatible with
102 # this universe. Use the name directly as a lookup key
103 # but issue a warning. This will be potentially annoying
104 # in the scenario where a lookup key comes from a
105 # default config but the users are using an external
106 # universe.
107 unknown = [name for name in dimension_names if universe.get(name) is None]
108 log.debug(
109 "A LookupKey '%s' uses unknown dimensions: %s. Possible typo?"
110 " Using the name explicitly.",
111 name,
112 unknown,
113 )
114 self._name = name
115 else:
116 self._name = name
118 elif dimensions is not None:
119 if not isinstance(dimensions, DimensionGraph):
120 if universe is None:
121 raise ValueError(
122 f"Cannot construct LookupKey for dimensions={dimensions} without universe."
123 )
124 else:
125 self._dimensions = universe.extract(dimensions)
126 else:
127 self._dimensions = dimensions
128 else:
129 # mypy cannot work this out on its own
130 raise ValueError("Name was None but dimensions is also None")
132 # The dataId is converted to a frozenset of key/value
133 # tuples so that it is not mutable
134 self._dataId = frozenset(dataId.items()) if dataId is not None else None
136 def __str__(self) -> str:
137 # For the simple case return the simple string
138 if self._name:
139 name = self._name
140 elif self._dimensions is not None:
141 name = "+".join(self._dimensions.names)
142 else:
143 raise RuntimeError("Internal error since name and dimensions are both None")
145 if not self._dataId:
146 return name
148 return f"{name} ({self.dataId})"
150 def __repr__(self) -> str:
151 params = ""
152 if self.name:
153 params += f"name={self.name!r},"
154 if self.dimensions:
155 params += f"dimensions={self.dimensions!r},"
156 if self._dataId:
157 params += "dataId={" + ",".join(f"'{k}': {v!r}" for k, v in self._dataId) + "}"
159 return f"{self.__class__.__name__}({params})"
161 def __eq__(self, other: Any) -> bool:
162 if not isinstance(other, type(self)):
163 return False
164 if (
165 self._name == other._name
166 and self._dimensions == other._dimensions
167 and self._dataId == other._dataId
168 ):
169 return True
170 return False
172 @property
173 def name(self) -> str | None:
174 """Primary name string to use as lookup (`str`)."""
175 return self._name
177 @property
178 def dimensions(self) -> DimensionGraph | None:
179 """Dimensions associated with lookup (`DimensionGraph`)."""
180 return self._dimensions
182 @property
183 def dataId(self) -> dict[str, Any] | None:
184 """Return dict of keys/values that are important for dataId lookup.
186 (`dict` or `None`)
187 """
188 if self._dataId is not None:
189 return dict(self._dataId)
190 else:
191 return None
193 def __hash__(self) -> int:
194 """Hash the lookup to allow use as a key in a dict."""
195 return hash((self._name, self._dimensions, self._dataId))
197 def clone(
198 self,
199 name: str | None = None,
200 dimensions: DimensionGraph | None = None,
201 dataId: dict[str, Any] | None = None,
202 ) -> LookupKey:
203 """Clone the object, overriding some options.
205 Used to create a new instance of the object whilst updating
206 some of it.
208 Parameters
209 ----------
210 name : `str`, optional
211 Primary index string for lookup. Will override ``dimensions``
212 if ``dimensions`` are set.
213 dimensions : `DimensionGraph`, optional
214 Dimensions that are relevant for lookup. Will override ``name``
215 if ``name`` is already set.
216 dataId : `dict`, optional
217 Keys and values from a dataId that should control lookups.
219 Returns
220 -------
221 clone : `LookupKey`
222 Copy with updates.
223 """
224 if name is not None and dimensions is not None:
225 raise ValueError("Both name and dimensions can not be set")
227 # if neither name nor dimensions are specified we copy from current
228 # object. Otherwise we'll use the supplied values
229 if name is None and dimensions is None:
230 name = self._name
231 dimensions = self._dimensions
233 # Make sure we use the dict form for the constructor
234 if dataId is None and self._dataId is not None:
235 dataId = self.dataId
237 return self.__class__(name=name, dimensions=dimensions, dataId=dataId)
240def processLookupConfigs(
241 config: Config, *, allow_hierarchy: bool = False, universe: DimensionUniverse | None = None
242) -> dict[LookupKey, str | dict[str, Any]]:
243 """Process sections of configuration relating to lookups.
245 Can be by dataset type name, storage class name, dimensions, or values
246 of dimensions.
248 Parameters
249 ----------
250 config : `Config`
251 A `Config` representing a configuration mapping keys to values where
252 the keys can be dataset type names, storage class names, dimensions
253 or dataId components.
254 allow_hierarchy : `bool`, optional
255 If `True`, keys that refer to a hierarchy that does not look like
256 a DataID specification are allowed and the full hierarchy, as a dict,
257 will be returned in the value for the lookup key.
258 universe : `DimensionUniverse`, optional
259 Set of all known dimensions, used to expand and validate any used
260 in lookup keys.
262 Returns
263 -------
264 contents : `dict` of `LookupKey` to `str`
265 A `dict` with keys constructed from the configuration keys and values
266 being simple strings. It is assumed the caller will convert the
267 values to the required form.
269 Notes
270 -----
271 The configuration is a mapping where the keys correspond to names
272 that can refer to dataset type or storage class names, or can use a
273 special syntax to refer to dimensions or dataId values.
275 Dimensions are indicated by using dimension names separated by a ``+``.
276 If a single dimension is specified this is also supported so long as
277 a ``+`` is found. Dimensions are normalized before use such that if
278 ``physical_filter+visit`` is defined, then an implicit ``instrument``
279 will automatically be added.
281 DataID overrides can be specified using the form: ``field<value>`` to
282 indicate a subhierarchy. All keys within that new hierarchy will take
283 precedence over equivalent values in the root hierarchy.
285 Currently only a single dataId field can be specified for a key.
286 For example with a config such as:
288 .. code-block:: yaml
290 something:
291 calexp: value1
292 instrument<HSC>:
293 calexp: value2
295 Requesting the match for ``calexp`` would return ``value1`` unless
296 a `DatasetRef` is used with a dataId containing the key ``instrument``
297 and value ``HSC``.
299 The values of the mapping are stored as strings.
300 """
301 contents = {}
302 for name, value in config.items():
303 lookup = LookupKey(name=name, universe=universe)
305 if isinstance(value, Mapping):
306 # indicates a dataId component -- check the format
307 kv = DATAID_RE.match(name)
308 if kv:
309 dataIdKey = kv.group(1)
310 dataIdValue = kv.group(2)
311 for subKey, subStr in value.items():
312 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe)
313 contents[lookup] = subStr
314 elif allow_hierarchy:
315 contents[lookup] = value
316 else:
317 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'")
318 else:
319 contents[lookup] = value
321 return contents
324def processLookupConfigList(
325 config: Iterable[str | Mapping], *, universe: DimensionUniverse | None = None
326) -> set[LookupKey]:
327 """Process sections of configuration relating to lookups.
329 Can be by dataset type name, storage class name, dimensions, or values
330 of dimensions.
332 Parameters
333 ----------
334 config : `list` of `str` or `dict`
335 Contents of a configuration listing keys that can be
336 dataset type names, storage class names, dimensions
337 or dataId components. DataId components are represented as entries
338 in the `list` of `dicts` with a single key with a value of a `list`
339 of new keys.
340 universe : `DimensionUniverse`, optional
341 Set of all known dimensions, used to expand and validate any used
342 in lookup keys.
344 Returns
345 -------
346 lookups : `set` of `LookupKey`
347 All the entries in the input list converted to `LookupKey` and
348 returned in a `set`.
350 Notes
351 -----
352 Keys are parsed as described in `processLookupConfigs`.
353 """
354 contents = set()
356 for name in config:
357 if isinstance(name, Mapping):
358 if len(name) != 1:
359 raise RuntimeError(f"Config dict entry {name} has more than key present")
360 for dataIdLookUp, subKeys in name.items():
361 kv = DATAID_RE.match(dataIdLookUp)
362 if kv:
363 dataIdKey = kv.group(1)
364 dataIdValue = kv.group(2)
365 for subKey in subKeys:
366 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe)
367 contents.add(lookup)
368 else:
369 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'")
370 else:
371 contents.add(LookupKey(name=name, universe=universe))
373 return contents