Coverage for python / lsst / daf / butler / registry / wildcards.py: 18%
179 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-28 08:36 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-28 08:36 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = (
30 "CategorizedWildcard",
31 "CollectionWildcard",
32 "DatasetTypeWildcard",
33)
35import contextlib
36import dataclasses
37import re
38import warnings
39from collections.abc import Callable, Iterable, Mapping
40from types import EllipsisType
41from typing import Any
43from lsst.utils.iteration import ensure_iterable
45from .._dataset_type import DatasetType
46from ..utils import globToRegex
47from ._exceptions import CollectionExpressionError, DatasetTypeExpressionError
50@dataclasses.dataclass
51class CategorizedWildcard:
52 """The results of preprocessing a wildcard expression to separate match
53 patterns from strings.
55 The `fromExpression` method should almost always be used to construct
56 instances, as the regular constructor performs no checking of inputs (and
57 that can lead to confusing error messages downstream).
58 """
60 @classmethod
61 def fromExpression(
62 cls,
63 expression: Any,
64 *,
65 allowAny: bool = True,
66 allowPatterns: bool = True,
67 coerceUnrecognized: Callable[[Any], tuple[str, Any] | str] | None = None,
68 coerceItemValue: Callable[[Any], Any] | None = None,
69 defaultItemValue: Any | None = None,
70 ) -> CategorizedWildcard | EllipsisType:
71 """Categorize a wildcard expression.
73 Parameters
74 ----------
75 expression : `~typing.Any`
76 The expression to categorize. May be any of:
78 - `str` (including glob patterns if ``allowPatterns`` is `True`);
79 - `re.Pattern` (only if ``allowPatterns`` is `True`);
80 - objects recognized by ``coerceUnrecognized`` (if provided);
81 - two-element tuples of (`str`, value) where value is recognized
82 by ``coerceItemValue`` (if provided);
83 - a non-`str`, non-mapping iterable containing any of the above;
84 - the special value ``...`` (only if ``allowAny`` is `True`),
85 which matches anything;
86 - a mapping from `str` to a value are recognized by
87 ``coerceItemValue`` (if provided);
88 - a `CategorizedWildcard` instance (passed through unchanged if
89 it meets the requirements specified by keyword arguments).
90 allowAny : `bool`, optional
91 If `False` (`True` is default) raise `TypeError` if ``...`` is
92 encountered.
93 allowPatterns : `bool`, optional
94 If `False` (`True` is default) raise `TypeError` if a `re.Pattern`
95 is encountered, or if ``expression`` is a `CategorizedWildcard`
96 with `patterns` not empty.
97 coerceUnrecognized : `~collections.abc.Callable`, optional
98 A callback that takes a single argument of arbitrary type and
99 returns either a `str` - appended to `strings` - or a `tuple` of
100 (`str`, `typing.Any`) to be appended to `items`. This will be
101 called on objects of unrecognized type. Exceptions will be reraised
102 as `TypeError` (and chained).
103 coerceItemValue : `~collections.abc.Callable`, optional
104 If provided, ``expression`` may be a mapping from `str` to any
105 type that can be passed to this function; the result of that call
106 will be stored instead as the value in ``self.items``.
107 defaultItemValue : `typing.Any`, optional
108 If provided, combine this value with any string values encountered
109 (including any returned by ``coerceUnrecognized``) to form a
110 `tuple` and add it to `items`, guaranteeing that `strings` will be
111 empty. Patterns are never added to `items`.
113 Returns
114 -------
115 categorized : `CategorizedWildcard` or ``...``.
116 The struct describing the wildcard. ``...`` is passed through
117 unchanged.
119 Raises
120 ------
121 TypeError
122 Raised if an unsupported type is found in the expression.
123 """
124 assert expression is not None
125 # See if we were given ...; just return that if we were.
126 if expression is ...:
127 if not allowAny:
128 raise TypeError("This expression may not be unconstrained.")
129 return ...
130 if isinstance(expression, cls):
131 # This is already a CategorizedWildcard. Make sure it meets the
132 # reqs. implied by the kwargs we got.
133 if not allowPatterns and expression.patterns:
134 raise TypeError(
135 f"Regular expression(s) {expression.patterns} are not allowed in this context."
136 )
137 if defaultItemValue is not None and expression.strings:
138 if expression.items:
139 raise TypeError(
140 "Incompatible preprocessed expression: an ordered sequence of str is "
141 "needed, but the original order was lost in the preprocessing."
142 )
143 return cls(
144 strings=[],
145 patterns=expression.patterns,
146 items=[(k, defaultItemValue) for k in expression.strings],
147 )
148 elif defaultItemValue is None and expression.items:
149 if expression.strings:
150 raise TypeError(
151 "Incompatible preprocessed expression: an ordered sequence of items is "
152 "needed, but the original order was lost in the preprocessing."
153 )
154 return cls(strings=[k for k, _ in expression.items], patterns=expression.patterns, items=[])
155 else:
156 # Original expression was created with keyword arguments that
157 # were at least as restrictive as what we just got; pass it
158 # through.
159 return expression
161 # If we get here, we know we'll be creating a new instance.
162 # Initialize an empty one now.
163 self = cls(strings=[], patterns=[], items=[])
165 # If mappings are allowed, see if we were given a single mapping by
166 # trying to get items.
167 if coerceItemValue is not None:
168 rawItems = None
169 with contextlib.suppress(AttributeError):
170 rawItems = expression.items()
172 if rawItems is not None:
173 for k, v in rawItems:
174 try:
175 self.items.append((k, coerceItemValue(v)))
176 except Exception as err:
177 raise TypeError(f"Could not coerce mapping value '{v}' for key '{k}'.") from err
178 return self
180 # Not ..., a CategorizedWildcard instance, or a mapping. Just
181 # process scalars or an iterable. We put the body of the loop inside
182 # a local function so we can recurse after coercion.
184 def process(element: Any, alreadyCoerced: bool = False) -> EllipsisType | None:
185 was_string = False
186 if isinstance(element, str):
187 was_string = True
188 if defaultItemValue is not None:
189 self.items.append((element, defaultItemValue))
190 return None
191 else:
192 # This returns a list but we know we only passed in
193 # single value.
194 converted = globToRegex(element)
195 if converted is ...:
196 return ...
197 element = converted[0]
198 # Let regex and ... go through to the next check
199 if isinstance(element, str):
200 self.strings.append(element)
201 return None
202 if allowPatterns and isinstance(element, re.Pattern):
203 if not was_string:
204 warnings.warn(
205 "Regular expressions should no longer be used in collection or dataset type searches."
206 " Use globs ('*' wildcards) instead. Will be removed after v28.",
207 FutureWarning,
208 )
209 self.patterns.append(element)
210 return None
211 if alreadyCoerced:
212 try:
213 k, v = element
214 except TypeError:
215 raise TypeError(
216 f"Object '{element!r}' returned by coercion function must be `str` or `tuple`."
217 ) from None
218 else:
219 self.items.append((k, v))
220 return None
221 if coerceItemValue is not None:
222 try:
223 k, v = element
224 except TypeError:
225 pass
226 else:
227 if not isinstance(k, str):
228 raise TypeError(f"Item key '{k}' is not a string.")
229 try:
230 v = coerceItemValue(v)
231 except Exception as err:
232 raise TypeError(f"Could not coerce tuple item value '{v}' for key '{k}'.") from err
233 self.items.append((k, v))
234 return None
235 if coerceUnrecognized is not None:
236 try:
237 # This should be safe but flake8 cant tell that the
238 # function will be re-declared next function call
239 process(coerceUnrecognized(element), alreadyCoerced=True) # noqa: F821
240 except Exception as err:
241 raise TypeError(f"Could not coerce expression element '{element!r}'.") from err
242 else:
243 extra = "."
244 if isinstance(element, re.Pattern):
245 extra = " and patterns are not allowed."
246 raise TypeError(f"Unsupported object in wildcard expression: '{element!r}'{extra}")
247 return None
249 for element in ensure_iterable(expression):
250 retval = process(element)
251 if retval is ...:
252 # One of the globs matched everything
253 if not allowAny:
254 raise TypeError("This expression may not be unconstrained.")
255 return ...
256 del process
257 return self
259 strings: list[str]
260 """Explicit string values found in the wildcard (`list` [ `str` ]).
261 """
263 patterns: list[re.Pattern]
264 """Regular expression patterns found in the wildcard
265 (`list` [ `re.Pattern` ]).
266 """
268 items: list[tuple[str, Any]]
269 """Two-item tuples that relate string values to other objects
270 (`list` [ `tuple` [ `str`, `typing.Any` ] ]).
271 """
274@dataclasses.dataclass(frozen=True)
275class CollectionWildcard:
276 """A validated wildcard for collection names.
278 The `from_expression` method should almost always be used to construct
279 instances, as the regular constructor performs no checking of inputs (and
280 that can lead to confusing error messages downstream).
282 Notes
283 -----
284 `CollectionWildcard` is expected to be rarely used outside of `Registry`
285 (which uses it to back several of its "query" methods that take general
286 expressions for collections), but it may occasionally be useful outside
287 `Registry` as a way to preprocess expressions that contain single-pass
288 iterators into a form that can be used to call those `Registry` methods
289 multiple times.
290 """
292 strings: tuple[str, ...] = ()
293 """An an ordered list of explicitly-named collections. (`tuple` [ `str` ]).
294 """
296 patterns: tuple[re.Pattern, ...] | EllipsisType = ...
297 """Regular expression patterns to match against collection names, or the
298 special value ``...`` indicating all collections.
300 ``...`` must be accompanied by ``strings=()``.
301 """
303 def __post_init__(self) -> None:
304 if self.patterns is ... and self.strings:
305 raise ValueError(
306 f"Collection wildcard matches any string, but still has explicit strings {self.strings}."
307 )
309 @classmethod
310 def from_expression(cls, expression: Any, require_ordered: bool = False) -> CollectionWildcard:
311 """Process a general expression to construct a `CollectionWildcard`
312 instance.
314 Parameters
315 ----------
316 expression : `~typing.Any`
317 May be:
319 - a `str` collection name;
320 - an `re.Pattern` instance to match (with `re.Pattern.fullmatch`)
321 against collection names;
322 - any iterable containing any of the above;
323 - another `CollectionWildcard` instance (passed through unchanged).
325 Duplicate collection names will be removed (preserving the first
326 appearance of each collection name).
327 require_ordered : `bool`, optional
328 If `True` (`False` is default) require the expression to be
329 ordered, and raise `CollectionExpressionError` if it is not.
331 Returns
332 -------
333 wildcard : `CollectionWildcard`
334 A `CollectionWildcard` instance.
336 Raises
337 ------
338 CollectionExpressionError
339 Raised if the patterns has regular expression, glob patterns, or
340 the ``...`` wildcard, and ``require_ordered=True``.
341 """
342 if isinstance(expression, cls):
343 return expression
344 if expression is ...:
345 return cls()
346 wildcard = CategorizedWildcard.fromExpression(
347 expression,
348 allowAny=True,
349 allowPatterns=True,
350 )
351 if wildcard is ...:
352 return cls()
353 result = cls(
354 strings=tuple(wildcard.strings),
355 patterns=tuple(wildcard.patterns),
356 )
357 if require_ordered:
358 result.require_ordered()
359 return result
361 @classmethod
362 def from_names(cls, names: Iterable[str]) -> CollectionWildcard:
363 """Construct from an iterable of explicit collection names.
365 Parameters
366 ----------
367 names : `~collections.abc.Iterable` [ `str` ]
368 Iterable of collection names.
370 Returns
371 -------
372 wildcard : `CollectionWildcard`
373 A `CollectionWildcard` instance. `require_ordered` is guaranteed
374 to succeed and return the given names in order.
375 """
376 return cls(strings=tuple(names), patterns=())
378 def require_ordered(self) -> tuple[str, ...]:
379 """Require that this wildcard contains no patterns, and return the
380 ordered tuple of names that it does hold.
382 Returns
383 -------
384 names : `tuple` [ `str` ]
385 Ordered tuple of collection names.
387 Raises
388 ------
389 CollectionExpressionError
390 Raised if the patterns has regular expression, glob patterns, or
391 the ``...`` wildcard.
392 """
393 if self.patterns:
394 raise CollectionExpressionError(
395 f"An ordered collection expression is required; got patterns {self.patterns}."
396 )
397 return self.strings
399 def empty(self) -> bool:
400 """Return true if both ``strings`` and ``patterns`` are empty."""
401 # bool(Ellipsis) is True
402 return not self.strings and not self.patterns
404 def __str__(self) -> str:
405 if self.patterns is ...:
406 return "..."
407 else:
408 terms = list(self.strings)
409 terms.extend(str(p) for p in self.patterns)
410 return "[{}]".format(", ".join(terms))
413@dataclasses.dataclass
414class DatasetTypeWildcard:
415 """A validated expression that resolves to one or more dataset types.
417 The `from_expression` method should almost always be used to construct
418 instances, as the regular constructor performs no checking of inputs (and
419 that can lead to confusing error messages downstream).
420 """
422 values: Mapping[str, DatasetType | None] = dataclasses.field(default_factory=dict)
423 """A mapping with `str` dataset type name keys and optional `DatasetType`
424 instances.
425 """
427 patterns: tuple[re.Pattern, ...] | EllipsisType = ...
428 """Regular expressions to be matched against dataset type names, or the
429 special value ``...`` indicating all dataset types.
431 Any pattern matching a dataset type is considered an overall match for
432 the expression.
433 """
435 @classmethod
436 def from_expression(cls, expression: Any) -> DatasetTypeWildcard:
437 """Construct an instance by analyzing the given expression.
439 Parameters
440 ----------
441 expression : `~typing.Any`
442 Expression to analyze. May be any of the following:
444 - a `str` dataset type name;
445 - a `DatasetType` instance;
446 - an iterable whose elements may be any of the above (any dataset
447 type matching any element in the list is an overall match);
448 - an existing `DatasetTypeWildcard` instance;
449 - the special ``...`` ellipsis object, which matches any dataset
450 type.
452 Returns
453 -------
454 query : `DatasetTypeWildcard`
455 An instance of this class (new unless an existing instance was
456 passed in).
458 Raises
459 ------
460 DatasetTypeExpressionError
461 Raised if the given expression does not have one of the allowed
462 types.
463 """
464 if isinstance(expression, cls):
465 return expression
466 # CategorizedWildcard currently allows globs and regex as patterns
467 # but RFC-879 drops support for regex in dataset type specifications.
468 # Therefore check for their presence.
469 for exp in ensure_iterable(expression):
470 if isinstance(exp, re.Pattern):
471 raise DatasetTypeExpressionError("Regular expressions are not supported.")
472 try:
473 wildcard = CategorizedWildcard.fromExpression(
474 expression,
475 coerceUnrecognized=lambda d: (d.name, d),
476 )
477 except TypeError as err:
478 raise DatasetTypeExpressionError(f"Invalid dataset type expression: {expression!r}.") from err
479 if wildcard is ...:
480 return cls()
481 values: dict[str, DatasetType | None] = {}
482 for name in wildcard.strings:
483 values[name] = None
484 for name, item in wildcard.items:
485 if not isinstance(item, DatasetType):
486 raise DatasetTypeExpressionError(
487 f"Invalid value '{item}' of type {type(item)} in dataset type expression; "
488 "expected str, re.Pattern, DatasetType objects, iterables thereof, or '...'."
489 )
490 values[name] = item
491 return cls(values, patterns=tuple(wildcard.patterns))
493 def __str__(self) -> str:
494 if self.patterns is ...:
495 return "..."
496 else:
497 terms = list(self.values.keys())
498 terms.extend(str(p) for p in self.patterns)
499 return "[{}]".format(", ".join(terms))