Coverage for python/lsst/daf/butler/core/utils.py: 34%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

162 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ( 

24 "allSlots", 

25 "chunk_iterable", 

26 "getClassOf", 

27 "getFullTypeName", 

28 "getInstanceOf", 

29 "immutable", 

30 "isplit", 

31 "iterable", 

32 "safeMakeDir", 

33 "Singleton", 

34 "stripIfNotNone", 

35 "time_this", 

36 "transactional", 

37) 

38 

39import errno 

40import os 

41import builtins 

42import fnmatch 

43import functools 

44import itertools 

45import logging 

46import time 

47import re 

48from contextlib import contextmanager 

49from typing import ( 

50 Any, 

51 Callable, 

52 Dict, 

53 Iterable, 

54 Iterator, 

55 List, 

56 Mapping, 

57 Optional, 

58 Pattern, 

59 Tuple, 

60 Type, 

61 TypeVar, 

62 TYPE_CHECKING, 

63 Union, 

64) 

65 

66from lsst.utils import doImport 

67 

68if TYPE_CHECKING: 68 ↛ 69line 68 didn't jump to line 69, because the condition on line 68 was never true

69 from ..registry.wildcards import Ellipsis, EllipsisType 

70 

71 

72_LOG = logging.getLogger(__name__) 

73 

74 

75def safeMakeDir(directory: str) -> None: 

76 """Make a directory in a manner avoiding race conditions.""" 

77 if directory != "" and not os.path.exists(directory): 

78 try: 

79 os.makedirs(directory) 

80 except OSError as e: 

81 # Don't fail if directory exists due to race 

82 if e.errno != errno.EEXIST: 

83 raise e 

84 

85 

86def iterable(a: Any) -> Iterable[Any]: 

87 """Make input iterable. 

88 

89 There are three cases, when the input is: 

90 

91 - iterable, but not a `str` or Mapping -> iterate over elements 

92 (e.g. ``[i for i in a]``) 

93 - a `str` -> return single element iterable (e.g. ``[a]``) 

94 - a Mapping -> return single element iterable 

95 - not iterable -> return single element iterable (e.g. ``[a]``). 

96 

97 Parameters 

98 ---------- 

99 a : iterable or `str` or not iterable 

100 Argument to be converted to an iterable. 

101 

102 Returns 

103 ------- 

104 i : `generator` 

105 Iterable version of the input value. 

106 """ 

107 if isinstance(a, str): 

108 yield a 

109 return 

110 if isinstance(a, Mapping): 

111 yield a 

112 return 

113 try: 

114 yield from a 

115 except Exception: 

116 yield a 

117 

118 

119def allSlots(self: Any) -> Iterator[str]: 

120 """ 

121 Return combined ``__slots__`` for all classes in objects mro. 

122 

123 Parameters 

124 ---------- 

125 self : `object` 

126 Instance to be inspected. 

127 

128 Returns 

129 ------- 

130 slots : `itertools.chain` 

131 All the slots as an iterable. 

132 """ 

133 from itertools import chain 

134 return chain.from_iterable(getattr(cls, "__slots__", []) for cls in self.__class__.__mro__) 

135 

136 

137def getFullTypeName(cls: Any) -> str: 

138 """Return full type name of the supplied entity. 

139 

140 Parameters 

141 ---------- 

142 cls : `type` or `object` 

143 Entity from which to obtain the full name. Can be an instance 

144 or a `type`. 

145 

146 Returns 

147 ------- 

148 name : `str` 

149 Full name of type. 

150 

151 Notes 

152 ----- 

153 Builtins are returned without the ``builtins`` specifier included. This 

154 allows `str` to be returned as "str" rather than "builtins.str". Any 

155 parts of the path that start with a leading underscore are removed 

156 on the assumption that they are an implementation detail and the 

157 entity will be hoisted into the parent namespace. 

158 """ 

159 # If we have an instance we need to convert to a type 

160 if not hasattr(cls, "__qualname__"): 160 ↛ 161line 160 didn't jump to line 161, because the condition on line 160 was never true

161 cls = type(cls) 

162 if hasattr(builtins, cls.__qualname__): 

163 # Special case builtins such as str and dict 

164 return cls.__qualname__ 

165 

166 real_name = cls.__module__ + "." + cls.__qualname__ 

167 

168 # Remove components with leading underscores 

169 cleaned_name = ".".join(c for c in real_name.split(".") if not c.startswith("_")) 

170 

171 # Consistency check 

172 if real_name != cleaned_name: 172 ↛ 173line 172 didn't jump to line 173, because the condition on line 172 was never true

173 try: 

174 test = doImport(cleaned_name) 

175 except Exception: 

176 # Could not import anything so return the real name 

177 return real_name 

178 

179 # The thing we imported should match the class we started with 

180 # despite the clean up. If it does not we return the real name 

181 if test is not cls: 

182 return real_name 

183 

184 return cleaned_name 

185 

186 

187def getClassOf(typeOrName: Union[Type, str]) -> Type: 

188 """Given the type name or a type, return the python type. 

189 

190 If a type name is given, an attempt will be made to import the type. 

191 

192 Parameters 

193 ---------- 

194 typeOrName : `str` or Python class 

195 A string describing the Python class to load or a Python type. 

196 

197 Returns 

198 ------- 

199 type_ : `type` 

200 Directly returns the Python type if a type was provided, else 

201 tries to import the given string and returns the resulting type. 

202 

203 Notes 

204 ----- 

205 This is a thin wrapper around `~lsst.utils.doImport`. 

206 """ 

207 if isinstance(typeOrName, str): 

208 cls = doImport(typeOrName) 

209 else: 

210 cls = typeOrName 

211 return cls 

212 

213 

214def getInstanceOf(typeOrName: Union[Type, str], *args: Any, **kwargs: Any) -> Any: 

215 """Given the type name or a type, instantiate an object of that type. 

216 

217 If a type name is given, an attempt will be made to import the type. 

218 

219 Parameters 

220 ---------- 

221 typeOrName : `str` or Python class 

222 A string describing the Python class to load or a Python type. 

223 args : `tuple` 

224 Positional arguments to use pass to the object constructor. 

225 **kwargs 

226 Keyword arguments to pass to object constructor. 

227 

228 Returns 

229 ------- 

230 instance : `object` 

231 Instance of the requested type, instantiated with the provided 

232 parameters. 

233 """ 

234 cls = getClassOf(typeOrName) 

235 return cls(*args, **kwargs) 

236 

237 

238class Singleton(type): 

239 """Metaclass to convert a class to a Singleton. 

240 

241 If this metaclass is used the constructor for the singleton class must 

242 take no arguments. This is because a singleton class will only accept 

243 the arguments the first time an instance is instantiated. 

244 Therefore since you do not know if the constructor has been called yet it 

245 is safer to always call it with no arguments and then call a method to 

246 adjust state of the singleton. 

247 """ 

248 

249 _instances: Dict[Type, Any] = {} 

250 

251 # Signature is intentionally not substitutable for type.__call__ (no *args, 

252 # **kwargs) to require classes that use this metaclass to have no 

253 # constructor arguments. 

254 def __call__(cls) -> Any: # type: ignore 

255 if cls not in cls._instances: 

256 cls._instances[cls] = super(Singleton, cls).__call__() 

257 return cls._instances[cls] 

258 

259 

260F = TypeVar("F", bound=Callable) 

261 

262 

263def transactional(func: F) -> F: 

264 """Decorate a method and makes it transactional. 

265 

266 This depends on the class also defining a `transaction` method 

267 that takes no arguments and acts as a context manager. 

268 """ 

269 @functools.wraps(func) 

270 def inner(self: Any, *args: Any, **kwargs: Any) -> Any: 

271 with self.transaction(): 

272 return func(self, *args, **kwargs) 

273 return inner # type: ignore 

274 

275 

276def stripIfNotNone(s: Optional[str]) -> Optional[str]: 

277 """Strip leading and trailing whitespace if the given object is not None. 

278 

279 Parameters 

280 ---------- 

281 s : `str`, optional 

282 Input string. 

283 

284 Returns 

285 ------- 

286 r : `str` or `None` 

287 A string with leading and trailing whitespace stripped if `s` is not 

288 `None`, or `None` if `s` is `None`. 

289 """ 

290 if s is not None: 

291 s = s.strip() 

292 return s 

293 

294 

295_T = TypeVar("_T", bound="Type") 

296 

297 

298def immutable(cls: _T) -> _T: 

299 """Decorate a class to simulates a simple form of immutability. 

300 

301 A class decorated as `immutable` may only set each of its attributes once; 

302 any attempts to set an already-set attribute will raise `AttributeError`. 

303 

304 Notes 

305 ----- 

306 Subclasses of classes marked with ``@immutable`` are also immutable. 

307 

308 Because this behavior interferes with the default implementation for the 

309 ``pickle`` modules, `immutable` provides implementations of 

310 ``__getstate__`` and ``__setstate__`` that override this behavior. 

311 Immutable classes can then implement pickle via ``__reduce__`` or 

312 ``__getnewargs__``. 

313 

314 Following the example of Python's built-in immutable types, such as `str` 

315 and `tuple`, the `immutable` decorator provides a ``__copy__`` 

316 implementation that just returns ``self``, because there is no reason to 

317 actually copy an object if none of its shared owners can modify it. 

318 

319 Similarly, objects that are recursively (i.e. are themselves immutable and 

320 have only recursively immutable attributes) should also reimplement 

321 ``__deepcopy__`` to return ``self``. This is not done by the decorator, as 

322 it has no way of checking for recursive immutability. 

323 """ 

324 def __setattr__(self: _T, name: str, value: Any) -> None: # noqa: N807 

325 if hasattr(self, name): 

326 raise AttributeError(f"{cls.__name__} instances are immutable.") 

327 object.__setattr__(self, name, value) 

328 # mypy says the variable here has signature (str, Any) i.e. no "self"; 

329 # I think it's just confused by descriptor stuff. 

330 cls.__setattr__ = __setattr__ # type: ignore 

331 

332 def __getstate__(self: _T) -> dict: # noqa: N807 

333 # Disable default state-setting when unpickled. 

334 return {} 

335 cls.__getstate__ = __getstate__ 

336 

337 def __setstate__(self: _T, state: Any) -> None: # noqa: N807 

338 # Disable default state-setting when copied. 

339 # Sadly what works for pickle doesn't work for copy. 

340 assert not state 

341 cls.__setstate__ = __setstate__ 

342 

343 def __copy__(self: _T) -> _T: # noqa: N807 

344 return self 

345 cls.__copy__ = __copy__ 

346 return cls 

347 

348 

349_S = TypeVar("_S") 

350_R = TypeVar("_R") 

351 

352 

353def cached_getter(func: Callable[[_S], _R]) -> Callable[[_S], _R]: 

354 """Decorate a method to caches the result. 

355 

356 Only works on methods that take only ``self`` 

357 as an argument, and returns the cached result on subsequent calls. 

358 

359 Notes 

360 ----- 

361 This is intended primarily as a stopgap for Python 3.8's more sophisticated 

362 ``functools.cached_property``, but it is also explicitly compatible with 

363 the `immutable` decorator, which may not be true of ``cached_property``. 

364 

365 `cached_getter` guarantees that the cached value will be stored in 

366 an attribute named ``_cached_{name-of-decorated-function}``. Classes that 

367 use `cached_getter` are responsible for guaranteeing that this name is not 

368 otherwise used, and is included if ``__slots__`` is defined. 

369 """ 

370 attribute = f"_cached_{func.__name__}" 

371 

372 @functools.wraps(func) 

373 def inner(self: _S) -> _R: 

374 if not hasattr(self, attribute): 

375 object.__setattr__(self, attribute, func(self)) 

376 return getattr(self, attribute) 

377 

378 return inner 

379 

380 

381def findFileResources(values: Iterable[str], regex: Optional[str] = None) -> List[str]: 

382 """Scan the supplied directories and return all matching files. 

383 

384 Get the files from a list of values. If a value is a file it is added to 

385 the list of returned files. If a value is a directory, all the files in 

386 the directory (recursively) that match the regex will be returned. 

387 

388 Parameters 

389 ---------- 

390 values : iterable [`str`] 

391 The files to return and directories in which to look for files to 

392 return. 

393 regex : `str` 

394 The regex to use when searching for files within directories. Optional, 

395 by default returns all the found files. 

396 

397 Returns 

398 ------- 

399 resources: `list` [`str`] 

400 The passed-in files and files found in passed-in directories. 

401 """ 

402 fileRegex = None if regex is None else re.compile(regex) 

403 resources = [] 

404 

405 # Find all the files of interest 

406 for location in values: 

407 if os.path.isdir(location): 

408 for root, dirs, files in os.walk(location): 

409 for name in files: 

410 path = os.path.join(root, name) 

411 if os.path.isfile(path) and (fileRegex is None or fileRegex.search(name)): 

412 resources.append(path) 

413 else: 

414 resources.append(location) 

415 return resources 

416 

417 

418def globToRegex(expressions: Union[str, EllipsisType, None, 

419 List[str]]) -> Union[List[Union[str, Pattern]], EllipsisType]: 

420 """Translate glob-style search terms to regex. 

421 

422 If a stand-alone '``*``' is found in ``expressions``, or expressions is 

423 empty or `None`, then the special value ``...`` will be returned, 

424 indicating that any string will match. 

425 

426 Parameters 

427 ---------- 

428 expressions : `str` or `list` [`str`] 

429 A list of glob-style pattern strings to convert. 

430 

431 Returns 

432 ------- 

433 expressions : `list` [`str` or `re.Pattern`] or ``...`` 

434 A list of regex Patterns or simple strings. Returns ``...`` if 

435 the provided expressions would match everything. 

436 """ 

437 if expressions is Ellipsis or expressions is None: 

438 return Ellipsis 

439 expressions = list(iterable(expressions)) 

440 if not expressions or "*" in expressions: 

441 return Ellipsis 

442 

443 nomagic = re.compile(r"^[\w/\.\-]+$", re.ASCII) 

444 

445 # Try not to convert simple string to a regex. 

446 results: List[Union[str, Pattern]] = [] 

447 for e in expressions: 

448 res: Union[str, Pattern] 

449 if nomagic.match(e): 

450 res = e 

451 else: 

452 res = re.compile(fnmatch.translate(e)) 

453 results.append(res) 

454 return results 

455 

456 

457T = TypeVar('T', str, bytes) 

458 

459 

460def isplit(string: T, sep: T) -> Iterator[T]: 

461 """Split a string or bytes by separator returning a generator. 

462 

463 Parameters 

464 ---------- 

465 string : `str` or `bytes` 

466 The string to split into substrings. 

467 sep : `str` or `bytes` 

468 The separator to use to split the string. Must be the same 

469 type as ``string``. Must always be given. 

470 

471 Yields 

472 ------ 

473 subset : `str` or `bytes` 

474 The next subset extracted from the input until the next separator. 

475 """ 

476 begin = 0 

477 while True: 

478 end = string.find(sep, begin) 

479 if end == -1: 

480 yield string[begin:] 

481 return 

482 yield string[begin:end] 

483 begin = end + 1 

484 

485 

486@contextmanager 

487def time_this(log: Optional[logging.Logger] = None, msg: Optional[str] = None, 

488 level: int = logging.DEBUG, prefix: Optional[str] = "timer", 

489 args: Iterable[Any] = ()) -> Iterator[None]: 

490 """Time the enclosed block and issue a log message. 

491 

492 Parameters 

493 ---------- 

494 log : `logging.Logger`, optional 

495 Logger to use to report the timer message. The root logger will 

496 be used if none is given. 

497 msg : `str`, optional 

498 Context to include in log message. 

499 level : `int`, optional 

500 Python logging level to use to issue the log message. If the 

501 code block raises an exception the log message will automatically 

502 switch to level ERROR. 

503 prefix : `str`, optional 

504 Prefix to use to prepend to the supplied logger to 

505 create a new logger to use instead. No prefix is used if the value 

506 is set to `None`. Defaults to "timer". 

507 args : iterable of any 

508 Additional parameters passed to the log command that should be 

509 written to ``msg``. 

510 """ 

511 if log is None: 

512 log = logging.getLogger() 

513 if prefix: 

514 log_name = f"{prefix}.{log.name}" if not isinstance(log, logging.RootLogger) else prefix 

515 log = logging.getLogger(log_name) 

516 

517 success = False 

518 start = time.time() 

519 try: 

520 yield 

521 success = True 

522 finally: 

523 end = time.time() 

524 

525 # The message is pre-inserted to allow the logger to expand 

526 # the additional args provided. Make that easier by converting 

527 # the None message to empty string. 

528 if msg is None: 

529 msg = "" 

530 

531 if not success: 

532 # Something went wrong so change the log level to indicate 

533 # this. 

534 level = logging.ERROR 

535 

536 # Specify stacklevel to ensure the message is reported from the 

537 # caller (1 is this file, 2 is contextlib, 3 is user) 

538 log.log(level, msg + "%sTook %.4f seconds", *args, 

539 ": " if msg else "", end - start, stacklevel=3) 

540 

541 

542def chunk_iterable(data: Iterable[Any], chunk_size: int = 1_000) -> Iterator[Tuple[Any, ...]]: 

543 """Return smaller chunks of an iterable. 

544 

545 Parameters 

546 ---------- 

547 data : iterable of anything 

548 The iterable to be chunked. Can be a mapping, in which case 

549 the keys are returned in chunks. 

550 chunk_size : int, optional 

551 The largest chunk to return. Can be smaller and depends on the 

552 number of elements in the iterator. Defaults to 1_000. 

553 

554 Yields 

555 ------ 

556 chunk : `tuple` 

557 The contents of a chunk of the iterator as a `tuple`. A tuple is 

558 preferred over an iterator since it is more convenient to tell it is 

559 empty and the caller knows it can be sized and indexed. 

560 """ 

561 it = iter(data) 

562 while (chunk := tuple(itertools.islice(it, chunk_size))): 

563 yield chunk