Coverage for python/lsst/daf/butler/registry/wildcards.py: 25%
212 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:21 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:21 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = (
24 "CategorizedWildcard",
25 "CollectionWildcard",
26 "CollectionSearch",
27 "DatasetTypeWildcard",
28)
30import dataclasses
31import re
32from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
33from types import EllipsisType
34from typing import Any
36from deprecated.sphinx import deprecated
37from lsst.utils.iteration import ensure_iterable
39try:
40 from pydantic.v1 import BaseModel
41except ModuleNotFoundError:
42 from pydantic import BaseModel # type: ignore
44from ..core import DatasetType
45from ..core.utils import globToRegex
46from ._exceptions import CollectionExpressionError, DatasetTypeExpressionError
49@dataclasses.dataclass
50class CategorizedWildcard:
51 """The results of preprocessing a wildcard expression to separate match
52 patterns from strings.
54 The `fromExpression` method should almost always be used to construct
55 instances, as the regular constructor performs no checking of inputs (and
56 that can lead to confusing error messages downstream).
57 """
59 @classmethod
60 def fromExpression(
61 cls,
62 expression: Any,
63 *,
64 allowAny: bool = True,
65 allowPatterns: bool = True,
66 coerceUnrecognized: Callable[[Any], tuple[str, Any] | str] | None = None,
67 coerceItemValue: Callable[[Any], Any] | None = None,
68 defaultItemValue: Any | None = None,
69 ) -> CategorizedWildcard | EllipsisType:
70 """Categorize a wildcard expression.
72 Parameters
73 ----------
74 expression
75 The expression to categorize. May be any of:
76 - `str` (including glob patterns if ``allowPatterns`` is `True`);
77 - `re.Pattern` (only if ``allowPatterns`` is `True`);
78 - objects recognized by ``coerceUnrecognized`` (if provided);
79 - two-element tuples of (`str`, value) where value is recognized
80 by ``coerceItemValue`` (if provided);
81 - a non-`str`, non-mapping iterable containing any of the above;
82 - the special value `...` (only if ``allowAny`` is `True`), which
83 matches anything;
84 - a mapping from `str` to a value are recognized by
85 ``coerceItemValue`` (if provided);
86 - a `CategorizedWildcard` instance (passed through unchanged if
87 it meets the requirements specified by keyword arguments).
88 allowAny: `bool`, optional
89 If `False` (`True` is default) raise `TypeError` if `...` is
90 encountered.
91 allowPatterns: `bool`, optional
92 If `False` (`True` is default) raise `TypeError` if a `re.Pattern`
93 is encountered, or if ``expression`` is a `CategorizedWildcard`
94 with `patterns` not empty.
95 coerceUnrecognized: `~collections.abc.Callable`, optional
96 A callback that takes a single argument of arbitrary type and
97 returns either a `str` - appended to `strings` - or a `tuple` of
98 (`str`, `Any`) to be appended to `items`. This will be called on
99 objects of unrecognized type. Exceptions will be reraised as
100 `TypeError` (and chained).
101 coerceItemValue: `~collections.abc.Callable`, optional
102 If provided, ``expression`` may be a mapping from `str` to any
103 type that can be passed to this function; the result of that call
104 will be stored instead as the value in ``self.items``.
105 defaultItemValue: `Any`, optional
106 If provided, combine this value with any string values encountered
107 (including any returned by ``coerceUnrecognized``) to form a
108 `tuple` and add it to `items`, guaranteeing that `strings` will be
109 empty. Patterns are never added to `items`.
111 Returns
112 -------
113 categorized : `CategorizedWildcard` or ``...``.
114 The struct describing the wildcard. ``...`` is passed through
115 unchanged.
117 Raises
118 ------
119 TypeError
120 Raised if an unsupported type is found in the expression.
121 """
122 assert expression is not None
123 # See if we were given ...; just return that if we were.
124 if expression is ...:
125 if not allowAny:
126 raise TypeError("This expression may not be unconstrained.")
127 return ...
128 if isinstance(expression, cls):
129 # This is already a CategorizedWildcard. Make sure it meets the
130 # reqs. implied by the kwargs we got.
131 if not allowPatterns and expression.patterns:
132 raise TypeError(
133 f"Regular expression(s) {expression.patterns} are not allowed in this context."
134 )
135 if defaultItemValue is not None and expression.strings:
136 if expression.items:
137 raise TypeError(
138 "Incompatible preprocessed expression: an ordered sequence of str is "
139 "needed, but the original order was lost in the preprocessing."
140 )
141 return cls(
142 strings=[],
143 patterns=expression.patterns,
144 items=[(k, defaultItemValue) for k in expression.strings],
145 )
146 elif defaultItemValue is None and expression.items:
147 if expression.strings:
148 raise TypeError(
149 "Incompatible preprocessed expression: an ordered sequence of items is "
150 "needed, but the original order was lost in the preprocessing."
151 )
152 return cls(strings=[k for k, _ in expression.items], patterns=expression.patterns, items=[])
153 else:
154 # Original expression was created with keyword arguments that
155 # were at least as restrictive as what we just got; pass it
156 # through.
157 return expression
159 # If we get here, we know we'll be creating a new instance.
160 # Initialize an empty one now.
161 self = cls(strings=[], patterns=[], items=[])
163 # If mappings are allowed, see if we were given a single mapping by
164 # trying to get items.
165 if coerceItemValue is not None:
166 rawItems = None
167 try:
168 rawItems = expression.items()
169 except AttributeError:
170 pass
171 if rawItems is not None:
172 for k, v in rawItems:
173 try:
174 self.items.append((k, coerceItemValue(v)))
175 except Exception as err:
176 raise TypeError(f"Could not coerce mapping value '{v}' for key '{k}'.") from err
177 return self
179 # Not ..., a CategorizedWildcard instance, or a mapping. Just
180 # process scalars or an iterable. We put the body of the loop inside
181 # a local function so we can recurse after coercion.
183 def process(element: Any, alreadyCoerced: bool = False) -> EllipsisType | None:
184 if isinstance(element, str):
185 if defaultItemValue is not None:
186 self.items.append((element, defaultItemValue))
187 return None
188 else:
189 # This returns a list but we know we only passed in
190 # single value.
191 converted = globToRegex(element)
192 if converted is ...:
193 return ...
194 element = converted[0]
195 # Let regex and ... go through to the next check
196 if isinstance(element, str):
197 self.strings.append(element)
198 return None
199 if allowPatterns and isinstance(element, re.Pattern):
200 self.patterns.append(element)
201 return None
202 if alreadyCoerced:
203 try:
204 k, v = element
205 except TypeError:
206 raise TypeError(
207 f"Object '{element!r}' returned by coercion function must be `str` or `tuple`."
208 ) from None
209 else:
210 self.items.append((k, v))
211 return None
212 if coerceItemValue is not None:
213 try:
214 k, v = element
215 except TypeError:
216 pass
217 else:
218 if not isinstance(k, str):
219 raise TypeError(f"Item key '{k}' is not a string.")
220 try:
221 v = coerceItemValue(v)
222 except Exception as err:
223 raise TypeError(f"Could not coerce tuple item value '{v}' for key '{k}'.") from err
224 self.items.append((k, v))
225 return None
226 if coerceUnrecognized is not None:
227 try:
228 # This should be safe but flake8 cant tell that the
229 # function will be re-declared next function call
230 process(coerceUnrecognized(element), alreadyCoerced=True) # noqa: F821
231 except Exception as err:
232 raise TypeError(f"Could not coerce expression element '{element!r}'.") from err
233 else:
234 extra = "."
235 if isinstance(element, re.Pattern):
236 extra = " and patterns are not allowed."
237 raise TypeError(f"Unsupported object in wildcard expression: '{element!r}'{extra}")
238 return None
240 for element in ensure_iterable(expression):
241 retval = process(element)
242 if retval is ...:
243 # One of the globs matched everything
244 if not allowAny:
245 raise TypeError("This expression may not be unconstrained.")
246 return ...
247 del process
248 return self
250 strings: list[str]
251 """Explicit string values found in the wildcard (`list` [ `str` ]).
252 """
254 patterns: list[re.Pattern]
255 """Regular expression patterns found in the wildcard
256 (`list` [ `re.Pattern` ]).
257 """
259 items: list[tuple[str, Any]]
260 """Two-item tuples that relate string values to other objects
261 (`list` [ `tuple` [ `str`, `Any` ] ]).
262 """
265@deprecated(
266 reason="Tuples of string collection names are now preferred. Will be removed after v26.",
267 version="v25.0",
268 category=FutureWarning,
269)
270class CollectionSearch(BaseModel, Sequence[str]):
271 """An ordered search path of collections.
273 The `fromExpression` method should almost always be used to construct
274 instances, as the regular constructor performs no checking of inputs (and
275 that can lead to confusing error messages downstream).
277 Parameters
278 ----------
279 collections : `tuple` [ `str` ]
280 Tuple of collection names, ordered from the first searched to the last
281 searched.
283 Notes
284 -----
285 A `CollectionSearch` is used to find a single dataset (or set of datasets
286 with different dataset types or data IDs) according to its dataset type and
287 data ID, giving preference to collections in the order in which they are
288 specified. A `CollectionWildcard` can be constructed from a broader range
289 of expressions but does not order the collections to be searched.
291 `CollectionSearch` is an immutable sequence of `str` collection names.
293 A `CollectionSearch` instance constructed properly (e.g. via
294 `fromExpression`) is a unique representation of a particular search path;
295 it is exactly the same internally and compares as equal to any
296 `CollectionSearch` constructed from an equivalent expression, regardless of
297 how different the original expressions appear.
298 """
300 __root__: tuple[str, ...]
302 @classmethod
303 def fromExpression(cls, expression: Any) -> CollectionSearch:
304 """Process a general expression to construct a `CollectionSearch`
305 instance.
307 Parameters
308 ----------
309 expression
310 May be:
311 - a `str` collection name;
312 - an iterable of `str` collection names;
313 - another `CollectionSearch` instance (passed through
314 unchanged).
316 Duplicate entries will be removed (preserving the first appearance
317 of each collection name).
319 Returns
320 -------
321 collections : `CollectionSearch`
322 A `CollectionSearch` instance.
323 """
324 # First see if this is already a CollectionSearch; just pass that
325 # through unchanged. This lets us standardize expressions (and turn
326 # single-pass iterators into multi-pass iterables) in advance and pass
327 # them down to other routines that accept arbitrary expressions.
328 if isinstance(expression, cls):
329 return expression
330 try:
331 wildcard = CategorizedWildcard.fromExpression(
332 expression,
333 allowAny=False,
334 allowPatterns=False,
335 )
336 except TypeError as err:
337 raise CollectionExpressionError(str(err)) from None
338 assert wildcard is not ...
339 assert not wildcard.patterns
340 assert not wildcard.items
341 deduplicated = []
342 for name in wildcard.strings:
343 if name not in deduplicated:
344 deduplicated.append(name)
345 return cls(__root__=tuple(deduplicated))
347 def explicitNames(self) -> Iterator[str]:
348 """Iterate over collection names that were specified explicitly."""
349 yield from self.__root__
351 def __iter__(self) -> Iterator[str]: # type: ignore
352 yield from self.__root__
354 def __len__(self) -> int:
355 return len(self.__root__)
357 def __getitem__(self, index: Any) -> str:
358 return self.__root__[index]
360 def __eq__(self, other: Any) -> bool:
361 if isinstance(other, CollectionSearch):
362 return self.__root__ == other.__root__
363 return False
365 def __str__(self) -> str:
366 return "[{}]".format(", ".join(self))
368 def __repr__(self) -> str:
369 return f"CollectionSearch({self.__root__!r})"
372@dataclasses.dataclass(frozen=True)
373class CollectionWildcard:
374 """A validated wildcard for collection names.
376 The `from_expression` method should almost always be used to construct
377 instances, as the regular constructor performs no checking of inputs (and
378 that can lead to confusing error messages downstream).
380 Notes
381 -----
382 `CollectionWildcard` is expected to be rarely used outside of `Registry`
383 (which uses it to back several of its "query" methods that take general
384 expressions for collections), but it may occasionally be useful outside
385 `Registry` as a way to preprocess expressions that contain single-pass
386 iterators into a form that can be used to call those `Registry` methods
387 multiple times.
388 """
390 strings: tuple[str, ...] = ()
391 """An an ordered list of explicitly-named collections. (`tuple` [ `str` ]).
392 """
394 patterns: tuple[re.Pattern, ...] | EllipsisType = ...
395 """Regular expression patterns to match against collection names, or the
396 special value ``...`` indicating all collections.
398 `...` must be accompanied by ``strings=()``.
399 """
401 def __post_init__(self) -> None:
402 if self.patterns is ... and self.strings:
403 raise ValueError(
404 f"Collection wildcard matches any string, but still has explicit strings {self.strings}."
405 )
407 @classmethod
408 def from_expression(cls, expression: Any, require_ordered: bool = False) -> CollectionWildcard:
409 """Process a general expression to construct a `CollectionWildcard`
410 instance.
412 Parameters
413 ----------
414 expression
415 May be:
416 - a `str` collection name;
417 - an `re.Pattern` instance to match (with `re.Pattern.fullmatch`)
418 against collection names;
419 - any iterable containing any of the above;
420 - another `CollectionWildcard` instance (passed through
421 unchanged).
423 Duplicate collection names will be removed (preserving the first
424 appearance of each collection name).
425 require_ordered : `bool`, optional
426 If `True` (`False` is default) require the expression to be
427 ordered, and raise `CollectionExpressionError` if it is not.
429 Returns
430 -------
431 wildcard : `CollectionWildcard`
432 A `CollectionWildcard` instance.
434 Raises
435 ------
436 CollectionExpressionError
437 Raised if the patterns has regular expression, glob patterns, or
438 the ``...`` wildcard, and ``require_ordered=True``.
439 """
440 if isinstance(expression, cls):
441 return expression
442 if expression is ...:
443 return cls()
444 wildcard = CategorizedWildcard.fromExpression(
445 expression,
446 allowAny=True,
447 allowPatterns=True,
448 )
449 if wildcard is ...:
450 return cls()
451 result = cls(
452 strings=tuple(wildcard.strings),
453 patterns=tuple(wildcard.patterns),
454 )
455 if require_ordered:
456 result.require_ordered()
457 return result
459 @classmethod
460 def from_names(cls, names: Iterable[str]) -> CollectionWildcard:
461 """Construct from an iterable of explicit collection names.
463 Parameters
464 ----------
465 names : `~collections.abc.Iterable` [ `str` ]
466 Iterable of collection names.
468 Returns
469 -------
470 wildcard : ~CollectionWildcard`
471 A `CollectionWildcard` instance. `require_ordered` is guaranteed
472 to succeed and return the given names in order.
473 """
474 return cls(strings=tuple(names), patterns=())
476 def require_ordered(self) -> tuple[str, ...]:
477 """Require that this wildcard contains no patterns, and return the
478 ordered tuple of names that it does hold.
480 Returns
481 -------
482 names : `tuple` [ `str` ]
483 Ordered tuple of collection names.
485 Raises
486 ------
487 CollectionExpressionError
488 Raised if the patterns has regular expression, glob patterns, or
489 the ``...`` wildcard.
490 """
491 if self.patterns:
492 raise CollectionExpressionError(
493 f"An ordered collection expression is required; got patterns {self.patterns}."
494 )
495 return self.strings
497 def empty(self) -> bool:
498 """Return true if both ``strings`` and ``patterns`` are empty."""
499 # bool(Ellipsis) is True
500 return not self.strings and not self.patterns
502 def __str__(self) -> str:
503 if self.patterns is ...:
504 return "..."
505 else:
506 terms = list(self.strings)
507 terms.extend(str(p) for p in self.patterns)
508 return "[{}]".format(", ".join(terms))
511@dataclasses.dataclass
512class DatasetTypeWildcard:
513 """A validated expression that resolves to one or more dataset types.
515 The `from_expression` method should almost always be used to construct
516 instances, as the regular constructor performs no checking of inputs (and
517 that can lead to confusing error messages downstream).
518 """
520 values: Mapping[str, DatasetType | None] = dataclasses.field(default_factory=dict)
521 """A mapping with `str` dataset type name keys and optional `DatasetType`
522 instances.
523 """
525 patterns: tuple[re.Pattern, ...] | EllipsisType = ...
526 """Regular expressions to be matched against dataset type names, or the
527 special value ``...`` indicating all dataset types.
529 Any pattern matching a dataset type is considered an overall match for
530 the expression.
531 """
533 @classmethod
534 def from_expression(cls, expression: Any) -> DatasetTypeWildcard:
535 """Construct an instance by analyzing the given expression.
537 Parameters
538 ----------
539 expression
540 Expression to analyze. May be any of the following:
542 - a `str` dataset type name;
543 - a `DatasetType` instance;
544 - a `re.Pattern` to match against dataset type names;
545 - an iterable whose elements may be any of the above (any dataset
546 type matching any element in the list is an overall match);
547 - an existing `DatasetTypeWildcard` instance;
548 - the special ``...`` ellipsis object, which matches any dataset
549 type.
551 Returns
552 -------
553 query : `DatasetTypeWildcard`
554 An instance of this class (new unless an existing instance was
555 passed in).
557 Raises
558 ------
559 DatasetTypeExpressionError
560 Raised if the given expression does not have one of the allowed
561 types.
562 """
563 if isinstance(expression, cls):
564 return expression
565 try:
566 wildcard = CategorizedWildcard.fromExpression(
567 expression, coerceUnrecognized=lambda d: (d.name, d)
568 )
569 except TypeError as err:
570 raise DatasetTypeExpressionError(f"Invalid dataset type expression: {expression!r}.") from err
571 if wildcard is ...:
572 return cls()
573 values: dict[str, DatasetType | None] = {}
574 for name in wildcard.strings:
575 values[name] = None
576 for name, item in wildcard.items:
577 if not isinstance(item, DatasetType):
578 raise DatasetTypeExpressionError(
579 f"Invalid value '{item}' of type {type(item)} in dataset type expression; "
580 "expected str, re.Pattern, DatasetType objects, iterables thereof, or '...'."
581 )
582 values[name] = item
583 return cls(values, patterns=tuple(wildcard.patterns))
585 def __str__(self) -> str:
586 if self.patterns is ...:
587 return "..."
588 else:
589 terms = list(self.values.keys())
590 terms.extend(str(p) for p in self.patterns)
591 return "[{}]".format(", ".join(terms))