Coverage for python / lsst / daf / butler / registry / wildcards.py: 18%

179 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-26 08:49 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ( 

30 "CategorizedWildcard", 

31 "CollectionWildcard", 

32 "DatasetTypeWildcard", 

33) 

34 

35import contextlib 

36import dataclasses 

37import re 

38import warnings 

39from collections.abc import Callable, Iterable, Mapping 

40from types import EllipsisType 

41from typing import Any 

42 

43from lsst.utils.iteration import ensure_iterable 

44 

45from .._dataset_type import DatasetType 

46from ..utils import globToRegex 

47from ._exceptions import CollectionExpressionError, DatasetTypeExpressionError 

48 

49 

50@dataclasses.dataclass 

51class CategorizedWildcard: 

52 """The results of preprocessing a wildcard expression to separate match 

53 patterns from strings. 

54 

55 The `fromExpression` method should almost always be used to construct 

56 instances, as the regular constructor performs no checking of inputs (and 

57 that can lead to confusing error messages downstream). 

58 """ 

59 

60 @classmethod 

61 def fromExpression( 

62 cls, 

63 expression: Any, 

64 *, 

65 allowAny: bool = True, 

66 allowPatterns: bool = True, 

67 coerceUnrecognized: Callable[[Any], tuple[str, Any] | str] | None = None, 

68 coerceItemValue: Callable[[Any], Any] | None = None, 

69 defaultItemValue: Any | None = None, 

70 ) -> CategorizedWildcard | EllipsisType: 

71 """Categorize a wildcard expression. 

72 

73 Parameters 

74 ---------- 

75 expression : `~typing.Any` 

76 The expression to categorize. May be any of: 

77 

78 - `str` (including glob patterns if ``allowPatterns`` is `True`); 

79 - `re.Pattern` (only if ``allowPatterns`` is `True`); 

80 - objects recognized by ``coerceUnrecognized`` (if provided); 

81 - two-element tuples of (`str`, value) where value is recognized 

82 by ``coerceItemValue`` (if provided); 

83 - a non-`str`, non-mapping iterable containing any of the above; 

84 - the special value ``...`` (only if ``allowAny`` is `True`), 

85 which matches anything; 

86 - a mapping from `str` to a value are recognized by 

87 ``coerceItemValue`` (if provided); 

88 - a `CategorizedWildcard` instance (passed through unchanged if 

89 it meets the requirements specified by keyword arguments). 

90 allowAny : `bool`, optional 

91 If `False` (`True` is default) raise `TypeError` if ``...`` is 

92 encountered. 

93 allowPatterns : `bool`, optional 

94 If `False` (`True` is default) raise `TypeError` if a `re.Pattern` 

95 is encountered, or if ``expression`` is a `CategorizedWildcard` 

96 with `patterns` not empty. 

97 coerceUnrecognized : `~collections.abc.Callable`, optional 

98 A callback that takes a single argument of arbitrary type and 

99 returns either a `str` - appended to `strings` - or a `tuple` of 

100 (`str`, `typing.Any`) to be appended to `items`. This will be 

101 called on objects of unrecognized type. Exceptions will be reraised 

102 as `TypeError` (and chained). 

103 coerceItemValue : `~collections.abc.Callable`, optional 

104 If provided, ``expression`` may be a mapping from `str` to any 

105 type that can be passed to this function; the result of that call 

106 will be stored instead as the value in ``self.items``. 

107 defaultItemValue : `typing.Any`, optional 

108 If provided, combine this value with any string values encountered 

109 (including any returned by ``coerceUnrecognized``) to form a 

110 `tuple` and add it to `items`, guaranteeing that `strings` will be 

111 empty. Patterns are never added to `items`. 

112 

113 Returns 

114 ------- 

115 categorized : `CategorizedWildcard` or ``...``. 

116 The struct describing the wildcard. ``...`` is passed through 

117 unchanged. 

118 

119 Raises 

120 ------ 

121 TypeError 

122 Raised if an unsupported type is found in the expression. 

123 """ 

124 assert expression is not None 

125 # See if we were given ...; just return that if we were. 

126 if expression is ...: 

127 if not allowAny: 

128 raise TypeError("This expression may not be unconstrained.") 

129 return ... 

130 if isinstance(expression, cls): 

131 # This is already a CategorizedWildcard. Make sure it meets the 

132 # reqs. implied by the kwargs we got. 

133 if not allowPatterns and expression.patterns: 

134 raise TypeError( 

135 f"Regular expression(s) {expression.patterns} are not allowed in this context." 

136 ) 

137 if defaultItemValue is not None and expression.strings: 

138 if expression.items: 

139 raise TypeError( 

140 "Incompatible preprocessed expression: an ordered sequence of str is " 

141 "needed, but the original order was lost in the preprocessing." 

142 ) 

143 return cls( 

144 strings=[], 

145 patterns=expression.patterns, 

146 items=[(k, defaultItemValue) for k in expression.strings], 

147 ) 

148 elif defaultItemValue is None and expression.items: 

149 if expression.strings: 

150 raise TypeError( 

151 "Incompatible preprocessed expression: an ordered sequence of items is " 

152 "needed, but the original order was lost in the preprocessing." 

153 ) 

154 return cls(strings=[k for k, _ in expression.items], patterns=expression.patterns, items=[]) 

155 else: 

156 # Original expression was created with keyword arguments that 

157 # were at least as restrictive as what we just got; pass it 

158 # through. 

159 return expression 

160 

161 # If we get here, we know we'll be creating a new instance. 

162 # Initialize an empty one now. 

163 self = cls(strings=[], patterns=[], items=[]) 

164 

165 # If mappings are allowed, see if we were given a single mapping by 

166 # trying to get items. 

167 if coerceItemValue is not None: 

168 rawItems = None 

169 with contextlib.suppress(AttributeError): 

170 rawItems = expression.items() 

171 

172 if rawItems is not None: 

173 for k, v in rawItems: 

174 try: 

175 self.items.append((k, coerceItemValue(v))) 

176 except Exception as err: 

177 raise TypeError(f"Could not coerce mapping value '{v}' for key '{k}'.") from err 

178 return self 

179 

180 # Not ..., a CategorizedWildcard instance, or a mapping. Just 

181 # process scalars or an iterable. We put the body of the loop inside 

182 # a local function so we can recurse after coercion. 

183 

184 def process(element: Any, alreadyCoerced: bool = False) -> EllipsisType | None: 

185 was_string = False 

186 if isinstance(element, str): 

187 was_string = True 

188 if defaultItemValue is not None: 

189 self.items.append((element, defaultItemValue)) 

190 return None 

191 else: 

192 # This returns a list but we know we only passed in 

193 # single value. 

194 converted = globToRegex(element) 

195 if converted is ...: 

196 return ... 

197 element = converted[0] 

198 # Let regex and ... go through to the next check 

199 if isinstance(element, str): 

200 self.strings.append(element) 

201 return None 

202 if allowPatterns and isinstance(element, re.Pattern): 

203 if not was_string: 

204 warnings.warn( 

205 "Regular expressions should no longer be used in collection or dataset type searches." 

206 " Use globs ('*' wildcards) instead. Will be removed after v28.", 

207 FutureWarning, 

208 ) 

209 self.patterns.append(element) 

210 return None 

211 if alreadyCoerced: 

212 try: 

213 k, v = element 

214 except TypeError: 

215 raise TypeError( 

216 f"Object '{element!r}' returned by coercion function must be `str` or `tuple`." 

217 ) from None 

218 else: 

219 self.items.append((k, v)) 

220 return None 

221 if coerceItemValue is not None: 

222 try: 

223 k, v = element 

224 except TypeError: 

225 pass 

226 else: 

227 if not isinstance(k, str): 

228 raise TypeError(f"Item key '{k}' is not a string.") 

229 try: 

230 v = coerceItemValue(v) 

231 except Exception as err: 

232 raise TypeError(f"Could not coerce tuple item value '{v}' for key '{k}'.") from err 

233 self.items.append((k, v)) 

234 return None 

235 if coerceUnrecognized is not None: 

236 try: 

237 # This should be safe but flake8 cant tell that the 

238 # function will be re-declared next function call 

239 process(coerceUnrecognized(element), alreadyCoerced=True) # noqa: F821 

240 except Exception as err: 

241 raise TypeError(f"Could not coerce expression element '{element!r}'.") from err 

242 else: 

243 extra = "." 

244 if isinstance(element, re.Pattern): 

245 extra = " and patterns are not allowed." 

246 raise TypeError(f"Unsupported object in wildcard expression: '{element!r}'{extra}") 

247 return None 

248 

249 for element in ensure_iterable(expression): 

250 retval = process(element) 

251 if retval is ...: 

252 # One of the globs matched everything 

253 if not allowAny: 

254 raise TypeError("This expression may not be unconstrained.") 

255 return ... 

256 del process 

257 return self 

258 

259 strings: list[str] 

260 """Explicit string values found in the wildcard (`list` [ `str` ]). 

261 """ 

262 

263 patterns: list[re.Pattern] 

264 """Regular expression patterns found in the wildcard 

265 (`list` [ `re.Pattern` ]). 

266 """ 

267 

268 items: list[tuple[str, Any]] 

269 """Two-item tuples that relate string values to other objects 

270 (`list` [ `tuple` [ `str`, `typing.Any` ] ]). 

271 """ 

272 

273 

274@dataclasses.dataclass(frozen=True) 

275class CollectionWildcard: 

276 """A validated wildcard for collection names. 

277 

278 The `from_expression` method should almost always be used to construct 

279 instances, as the regular constructor performs no checking of inputs (and 

280 that can lead to confusing error messages downstream). 

281 

282 Notes 

283 ----- 

284 `CollectionWildcard` is expected to be rarely used outside of `Registry` 

285 (which uses it to back several of its "query" methods that take general 

286 expressions for collections), but it may occasionally be useful outside 

287 `Registry` as a way to preprocess expressions that contain single-pass 

288 iterators into a form that can be used to call those `Registry` methods 

289 multiple times. 

290 """ 

291 

292 strings: tuple[str, ...] = () 

293 """An an ordered list of explicitly-named collections. (`tuple` [ `str` ]). 

294 """ 

295 

296 patterns: tuple[re.Pattern, ...] | EllipsisType = ... 

297 """Regular expression patterns to match against collection names, or the 

298 special value ``...`` indicating all collections. 

299 

300 ``...`` must be accompanied by ``strings=()``. 

301 """ 

302 

303 def __post_init__(self) -> None: 

304 if self.patterns is ... and self.strings: 

305 raise ValueError( 

306 f"Collection wildcard matches any string, but still has explicit strings {self.strings}." 

307 ) 

308 

309 @classmethod 

310 def from_expression(cls, expression: Any, require_ordered: bool = False) -> CollectionWildcard: 

311 """Process a general expression to construct a `CollectionWildcard` 

312 instance. 

313 

314 Parameters 

315 ---------- 

316 expression : `~typing.Any` 

317 May be: 

318 

319 - a `str` collection name; 

320 - an `re.Pattern` instance to match (with `re.Pattern.fullmatch`) 

321 against collection names; 

322 - any iterable containing any of the above; 

323 - another `CollectionWildcard` instance (passed through unchanged). 

324 

325 Duplicate collection names will be removed (preserving the first 

326 appearance of each collection name). 

327 require_ordered : `bool`, optional 

328 If `True` (`False` is default) require the expression to be 

329 ordered, and raise `CollectionExpressionError` if it is not. 

330 

331 Returns 

332 ------- 

333 wildcard : `CollectionWildcard` 

334 A `CollectionWildcard` instance. 

335 

336 Raises 

337 ------ 

338 CollectionExpressionError 

339 Raised if the patterns has regular expression, glob patterns, or 

340 the ``...`` wildcard, and ``require_ordered=True``. 

341 """ 

342 if isinstance(expression, cls): 

343 return expression 

344 if expression is ...: 

345 return cls() 

346 wildcard = CategorizedWildcard.fromExpression( 

347 expression, 

348 allowAny=True, 

349 allowPatterns=True, 

350 ) 

351 if wildcard is ...: 

352 return cls() 

353 result = cls( 

354 strings=tuple(wildcard.strings), 

355 patterns=tuple(wildcard.patterns), 

356 ) 

357 if require_ordered: 

358 result.require_ordered() 

359 return result 

360 

361 @classmethod 

362 def from_names(cls, names: Iterable[str]) -> CollectionWildcard: 

363 """Construct from an iterable of explicit collection names. 

364 

365 Parameters 

366 ---------- 

367 names : `~collections.abc.Iterable` [ `str` ] 

368 Iterable of collection names. 

369 

370 Returns 

371 ------- 

372 wildcard : `CollectionWildcard` 

373 A `CollectionWildcard` instance. `require_ordered` is guaranteed 

374 to succeed and return the given names in order. 

375 """ 

376 return cls(strings=tuple(names), patterns=()) 

377 

378 def require_ordered(self) -> tuple[str, ...]: 

379 """Require that this wildcard contains no patterns, and return the 

380 ordered tuple of names that it does hold. 

381 

382 Returns 

383 ------- 

384 names : `tuple` [ `str` ] 

385 Ordered tuple of collection names. 

386 

387 Raises 

388 ------ 

389 CollectionExpressionError 

390 Raised if the patterns has regular expression, glob patterns, or 

391 the ``...`` wildcard. 

392 """ 

393 if self.patterns: 

394 raise CollectionExpressionError( 

395 f"An ordered collection expression is required; got patterns {self.patterns}." 

396 ) 

397 return self.strings 

398 

399 def empty(self) -> bool: 

400 """Return true if both ``strings`` and ``patterns`` are empty.""" 

401 # bool(Ellipsis) is True 

402 return not self.strings and not self.patterns 

403 

404 def __str__(self) -> str: 

405 if self.patterns is ...: 

406 return "..." 

407 else: 

408 terms = list(self.strings) 

409 terms.extend(str(p) for p in self.patterns) 

410 return "[{}]".format(", ".join(terms)) 

411 

412 

413@dataclasses.dataclass 

414class DatasetTypeWildcard: 

415 """A validated expression that resolves to one or more dataset types. 

416 

417 The `from_expression` method should almost always be used to construct 

418 instances, as the regular constructor performs no checking of inputs (and 

419 that can lead to confusing error messages downstream). 

420 """ 

421 

422 values: Mapping[str, DatasetType | None] = dataclasses.field(default_factory=dict) 

423 """A mapping with `str` dataset type name keys and optional `DatasetType` 

424 instances. 

425 """ 

426 

427 patterns: tuple[re.Pattern, ...] | EllipsisType = ... 

428 """Regular expressions to be matched against dataset type names, or the 

429 special value ``...`` indicating all dataset types. 

430 

431 Any pattern matching a dataset type is considered an overall match for 

432 the expression. 

433 """ 

434 

435 @classmethod 

436 def from_expression(cls, expression: Any) -> DatasetTypeWildcard: 

437 """Construct an instance by analyzing the given expression. 

438 

439 Parameters 

440 ---------- 

441 expression : `~typing.Any` 

442 Expression to analyze. May be any of the following: 

443 

444 - a `str` dataset type name; 

445 - a `DatasetType` instance; 

446 - an iterable whose elements may be any of the above (any dataset 

447 type matching any element in the list is an overall match); 

448 - an existing `DatasetTypeWildcard` instance; 

449 - the special ``...`` ellipsis object, which matches any dataset 

450 type. 

451 

452 Returns 

453 ------- 

454 query : `DatasetTypeWildcard` 

455 An instance of this class (new unless an existing instance was 

456 passed in). 

457 

458 Raises 

459 ------ 

460 DatasetTypeExpressionError 

461 Raised if the given expression does not have one of the allowed 

462 types. 

463 """ 

464 if isinstance(expression, cls): 

465 return expression 

466 # CategorizedWildcard currently allows globs and regex as patterns 

467 # but RFC-879 drops support for regex in dataset type specifications. 

468 # Therefore check for their presence. 

469 for exp in ensure_iterable(expression): 

470 if isinstance(exp, re.Pattern): 

471 raise DatasetTypeExpressionError("Regular expressions are not supported.") 

472 try: 

473 wildcard = CategorizedWildcard.fromExpression( 

474 expression, 

475 coerceUnrecognized=lambda d: (d.name, d), 

476 ) 

477 except TypeError as err: 

478 raise DatasetTypeExpressionError(f"Invalid dataset type expression: {expression!r}.") from err 

479 if wildcard is ...: 

480 return cls() 

481 values: dict[str, DatasetType | None] = {} 

482 for name in wildcard.strings: 

483 values[name] = None 

484 for name, item in wildcard.items: 

485 if not isinstance(item, DatasetType): 

486 raise DatasetTypeExpressionError( 

487 f"Invalid value '{item}' of type {type(item)} in dataset type expression; " 

488 "expected str, re.Pattern, DatasetType objects, iterables thereof, or '...'." 

489 ) 

490 values[name] = item 

491 return cls(values, patterns=tuple(wildcard.patterns)) 

492 

493 def __str__(self) -> str: 

494 if self.patterns is ...: 

495 return "..." 

496 else: 

497 terms = list(self.values.keys()) 

498 terms.extend(str(p) for p in self.patterns) 

499 return "[{}]".format(", ".join(terms))