Coverage for python/lsst/daf/butler/core/utils.py: 33%
162 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-01 19:55 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-01 19:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = (
24 "allSlots",
25 "chunk_iterable",
26 "getClassOf",
27 "getFullTypeName",
28 "getInstanceOf",
29 "immutable",
30 "isplit",
31 "iterable",
32 "safeMakeDir",
33 "Singleton",
34 "stripIfNotNone",
35 "time_this",
36 "transactional",
37)
39import errno
40import os
41import builtins
42import fnmatch
43import functools
44import itertools
45import logging
46import time
47import re
48from contextlib import contextmanager
49from typing import (
50 Any,
51 Callable,
52 Dict,
53 Iterable,
54 Iterator,
55 List,
56 Mapping,
57 Optional,
58 Pattern,
59 Tuple,
60 Type,
61 TypeVar,
62 TYPE_CHECKING,
63 Union,
64)
66from lsst.utils import doImport
68if TYPE_CHECKING: 68 ↛ 69line 68 didn't jump to line 69, because the condition on line 68 was never true
69 from ..registry.wildcards import Ellipsis, EllipsisType
72_LOG = logging.getLogger(__name__)
75def safeMakeDir(directory: str) -> None:
76 """Make a directory in a manner avoiding race conditions."""
77 if directory != "" and not os.path.exists(directory):
78 try:
79 os.makedirs(directory)
80 except OSError as e:
81 # Don't fail if directory exists due to race
82 if e.errno != errno.EEXIST:
83 raise e
86def iterable(a: Any) -> Iterable[Any]:
87 """Make input iterable.
89 There are three cases, when the input is:
91 - iterable, but not a `str` or Mapping -> iterate over elements
92 (e.g. ``[i for i in a]``)
93 - a `str` -> return single element iterable (e.g. ``[a]``)
94 - a Mapping -> return single element iterable
95 - not iterable -> return single element iterable (e.g. ``[a]``).
97 Parameters
98 ----------
99 a : iterable or `str` or not iterable
100 Argument to be converted to an iterable.
102 Returns
103 -------
104 i : `generator`
105 Iterable version of the input value.
106 """
107 if isinstance(a, str):
108 yield a
109 return
110 if isinstance(a, Mapping):
111 yield a
112 return
113 try:
114 yield from a
115 except Exception:
116 yield a
119def allSlots(self: Any) -> Iterator[str]:
120 """
121 Return combined ``__slots__`` for all classes in objects mro.
123 Parameters
124 ----------
125 self : `object`
126 Instance to be inspected.
128 Returns
129 -------
130 slots : `itertools.chain`
131 All the slots as an iterable.
132 """
133 from itertools import chain
134 return chain.from_iterable(getattr(cls, "__slots__", []) for cls in self.__class__.__mro__)
137def getFullTypeName(cls: Any) -> str:
138 """Return full type name of the supplied entity.
140 Parameters
141 ----------
142 cls : `type` or `object`
143 Entity from which to obtain the full name. Can be an instance
144 or a `type`.
146 Returns
147 -------
148 name : `str`
149 Full name of type.
151 Notes
152 -----
153 Builtins are returned without the ``builtins`` specifier included. This
154 allows `str` to be returned as "str" rather than "builtins.str". Any
155 parts of the path that start with a leading underscore are removed
156 on the assumption that they are an implementation detail and the
157 entity will be hoisted into the parent namespace.
158 """
159 # If we have an instance we need to convert to a type
160 if not hasattr(cls, "__qualname__"): 160 ↛ 161line 160 didn't jump to line 161, because the condition on line 160 was never true
161 cls = type(cls)
162 if hasattr(builtins, cls.__qualname__):
163 # Special case builtins such as str and dict
164 return cls.__qualname__
166 real_name = cls.__module__ + "." + cls.__qualname__
168 # Remove components with leading underscores
169 cleaned_name = ".".join(c for c in real_name.split(".") if not c.startswith("_"))
171 # Consistency check
172 if real_name != cleaned_name: 172 ↛ 173line 172 didn't jump to line 173, because the condition on line 172 was never true
173 try:
174 test = doImport(cleaned_name)
175 except Exception:
176 # Could not import anything so return the real name
177 return real_name
179 # The thing we imported should match the class we started with
180 # despite the clean up. If it does not we return the real name
181 if test is not cls:
182 return real_name
184 return cleaned_name
187def getClassOf(typeOrName: Union[Type, str]) -> Type:
188 """Given the type name or a type, return the python type.
190 If a type name is given, an attempt will be made to import the type.
192 Parameters
193 ----------
194 typeOrName : `str` or Python class
195 A string describing the Python class to load or a Python type.
197 Returns
198 -------
199 type_ : `type`
200 Directly returns the Python type if a type was provided, else
201 tries to import the given string and returns the resulting type.
203 Notes
204 -----
205 This is a thin wrapper around `~lsst.utils.doImport`.
206 """
207 if isinstance(typeOrName, str):
208 cls = doImport(typeOrName)
209 else:
210 cls = typeOrName
211 return cls
214def getInstanceOf(typeOrName: Union[Type, str], *args: Any, **kwargs: Any) -> Any:
215 """Given the type name or a type, instantiate an object of that type.
217 If a type name is given, an attempt will be made to import the type.
219 Parameters
220 ----------
221 typeOrName : `str` or Python class
222 A string describing the Python class to load or a Python type.
223 args : `tuple`
224 Positional arguments to use pass to the object constructor.
225 **kwargs
226 Keyword arguments to pass to object constructor.
228 Returns
229 -------
230 instance : `object`
231 Instance of the requested type, instantiated with the provided
232 parameters.
233 """
234 cls = getClassOf(typeOrName)
235 return cls(*args, **kwargs)
238class Singleton(type):
239 """Metaclass to convert a class to a Singleton.
241 If this metaclass is used the constructor for the singleton class must
242 take no arguments. This is because a singleton class will only accept
243 the arguments the first time an instance is instantiated.
244 Therefore since you do not know if the constructor has been called yet it
245 is safer to always call it with no arguments and then call a method to
246 adjust state of the singleton.
247 """
249 _instances: Dict[Type, Any] = {}
251 # Signature is intentionally not substitutable for type.__call__ (no *args,
252 # **kwargs) to require classes that use this metaclass to have no
253 # constructor arguments.
254 def __call__(cls) -> Any: # type: ignore
255 if cls not in cls._instances:
256 cls._instances[cls] = super(Singleton, cls).__call__()
257 return cls._instances[cls]
260F = TypeVar("F", bound=Callable)
263def transactional(func: F) -> F:
264 """Decorate a method and makes it transactional.
266 This depends on the class also defining a `transaction` method
267 that takes no arguments and acts as a context manager.
268 """
269 @functools.wraps(func)
270 def inner(self: Any, *args: Any, **kwargs: Any) -> Any:
271 with self.transaction():
272 return func(self, *args, **kwargs)
273 return inner # type: ignore
276def stripIfNotNone(s: Optional[str]) -> Optional[str]:
277 """Strip leading and trailing whitespace if the given object is not None.
279 Parameters
280 ----------
281 s : `str`, optional
282 Input string.
284 Returns
285 -------
286 r : `str` or `None`
287 A string with leading and trailing whitespace stripped if `s` is not
288 `None`, or `None` if `s` is `None`.
289 """
290 if s is not None:
291 s = s.strip()
292 return s
295_T = TypeVar("_T", bound="Type")
298def immutable(cls: _T) -> _T:
299 """Decorate a class to simulates a simple form of immutability.
301 A class decorated as `immutable` may only set each of its attributes once;
302 any attempts to set an already-set attribute will raise `AttributeError`.
304 Notes
305 -----
306 Subclasses of classes marked with ``@immutable`` are also immutable.
308 Because this behavior interferes with the default implementation for the
309 ``pickle`` modules, `immutable` provides implementations of
310 ``__getstate__`` and ``__setstate__`` that override this behavior.
311 Immutable classes can then implement pickle via ``__reduce__`` or
312 ``__getnewargs__``.
314 Following the example of Python's built-in immutable types, such as `str`
315 and `tuple`, the `immutable` decorator provides a ``__copy__``
316 implementation that just returns ``self``, because there is no reason to
317 actually copy an object if none of its shared owners can modify it.
319 Similarly, objects that are recursively (i.e. are themselves immutable and
320 have only recursively immutable attributes) should also reimplement
321 ``__deepcopy__`` to return ``self``. This is not done by the decorator, as
322 it has no way of checking for recursive immutability.
323 """
324 def __setattr__(self: _T, name: str, value: Any) -> None: # noqa: N807
325 if hasattr(self, name):
326 raise AttributeError(f"{cls.__name__} instances are immutable.")
327 object.__setattr__(self, name, value)
328 # mypy says the variable here has signature (str, Any) i.e. no "self";
329 # I think it's just confused by descriptor stuff.
330 cls.__setattr__ = __setattr__ # type: ignore
332 def __getstate__(self: _T) -> dict: # noqa: N807
333 # Disable default state-setting when unpickled.
334 return {}
335 cls.__getstate__ = __getstate__
337 def __setstate__(self: _T, state: Any) -> None: # noqa: N807
338 # Disable default state-setting when copied.
339 # Sadly what works for pickle doesn't work for copy.
340 assert not state
341 cls.__setstate__ = __setstate__
343 def __copy__(self: _T) -> _T: # noqa: N807
344 return self
345 cls.__copy__ = __copy__
346 return cls
349_S = TypeVar("_S")
350_R = TypeVar("_R")
353def cached_getter(func: Callable[[_S], _R]) -> Callable[[_S], _R]:
354 """Decorate a method to caches the result.
356 Only works on methods that take only ``self``
357 as an argument, and returns the cached result on subsequent calls.
359 Notes
360 -----
361 This is intended primarily as a stopgap for Python 3.8's more sophisticated
362 ``functools.cached_property``, but it is also explicitly compatible with
363 the `immutable` decorator, which may not be true of ``cached_property``.
365 `cached_getter` guarantees that the cached value will be stored in
366 an attribute named ``_cached_{name-of-decorated-function}``. Classes that
367 use `cached_getter` are responsible for guaranteeing that this name is not
368 otherwise used, and is included if ``__slots__`` is defined.
369 """
370 attribute = f"_cached_{func.__name__}"
372 @functools.wraps(func)
373 def inner(self: _S) -> _R:
374 if not hasattr(self, attribute):
375 object.__setattr__(self, attribute, func(self))
376 return getattr(self, attribute)
378 return inner
381def findFileResources(values: Iterable[str], regex: Optional[str] = None) -> List[str]:
382 """Scan the supplied directories and return all matching files.
384 Get the files from a list of values. If a value is a file it is added to
385 the list of returned files. If a value is a directory, all the files in
386 the directory (recursively) that match the regex will be returned.
388 Parameters
389 ----------
390 values : iterable [`str`]
391 The files to return and directories in which to look for files to
392 return.
393 regex : `str`
394 The regex to use when searching for files within directories. Optional,
395 by default returns all the found files.
397 Returns
398 -------
399 resources: `list` [`str`]
400 The passed-in files and files found in passed-in directories.
401 """
402 fileRegex = None if regex is None else re.compile(regex)
403 resources = []
405 # Find all the files of interest
406 for location in values:
407 if os.path.isdir(location):
408 for root, dirs, files in os.walk(location):
409 for name in files:
410 path = os.path.join(root, name)
411 if os.path.isfile(path) and (fileRegex is None or fileRegex.search(name)):
412 resources.append(path)
413 else:
414 resources.append(location)
415 return resources
418def globToRegex(expressions: Union[str, EllipsisType, None,
419 List[str]]) -> Union[List[Union[str, Pattern]], EllipsisType]:
420 """Translate glob-style search terms to regex.
422 If a stand-alone '``*``' is found in ``expressions``, or expressions is
423 empty or `None`, then the special value ``...`` will be returned,
424 indicating that any string will match.
426 Parameters
427 ----------
428 expressions : `str` or `list` [`str`]
429 A list of glob-style pattern strings to convert.
431 Returns
432 -------
433 expressions : `list` [`str` or `re.Pattern`] or ``...``
434 A list of regex Patterns or simple strings. Returns ``...`` if
435 the provided expressions would match everything.
436 """
437 if expressions is Ellipsis or expressions is None:
438 return Ellipsis
439 expressions = list(iterable(expressions))
440 if not expressions or "*" in expressions:
441 return Ellipsis
443 nomagic = re.compile(r"^[\w/\.\-]+$", re.ASCII)
445 # Try not to convert simple string to a regex.
446 results: List[Union[str, Pattern]] = []
447 for e in expressions:
448 res: Union[str, Pattern]
449 if nomagic.match(e):
450 res = e
451 else:
452 res = re.compile(fnmatch.translate(e))
453 results.append(res)
454 return results
457T = TypeVar('T', str, bytes)
460def isplit(string: T, sep: T) -> Iterator[T]:
461 """Split a string or bytes by separator returning a generator.
463 Parameters
464 ----------
465 string : `str` or `bytes`
466 The string to split into substrings.
467 sep : `str` or `bytes`
468 The separator to use to split the string. Must be the same
469 type as ``string``. Must always be given.
471 Yields
472 ------
473 subset : `str` or `bytes`
474 The next subset extracted from the input until the next separator.
475 """
476 begin = 0
477 while True:
478 end = string.find(sep, begin)
479 if end == -1:
480 yield string[begin:]
481 return
482 yield string[begin:end]
483 begin = end + 1
486@contextmanager
487def time_this(log: Optional[logging.Logger] = None, msg: Optional[str] = None,
488 level: int = logging.DEBUG, prefix: Optional[str] = "timer",
489 args: Iterable[Any] = ()) -> Iterator[None]:
490 """Time the enclosed block and issue a log message.
492 Parameters
493 ----------
494 log : `logging.Logger`, optional
495 Logger to use to report the timer message. The root logger will
496 be used if none is given.
497 msg : `str`, optional
498 Context to include in log message.
499 level : `int`, optional
500 Python logging level to use to issue the log message. If the
501 code block raises an exception the log message will automatically
502 switch to level ERROR.
503 prefix : `str`, optional
504 Prefix to use to prepend to the supplied logger to
505 create a new logger to use instead. No prefix is used if the value
506 is set to `None`. Defaults to "timer".
507 args : iterable of any
508 Additional parameters passed to the log command that should be
509 written to ``msg``.
510 """
511 if log is None:
512 log = logging.getLogger()
513 if prefix:
514 log_name = f"{prefix}.{log.name}" if not isinstance(log, logging.RootLogger) else prefix
515 log = logging.getLogger(log_name)
517 success = False
518 start = time.time()
519 try:
520 yield
521 success = True
522 finally:
523 end = time.time()
525 # The message is pre-inserted to allow the logger to expand
526 # the additional args provided. Make that easier by converting
527 # the None message to empty string.
528 if msg is None:
529 msg = ""
531 if not success:
532 # Something went wrong so change the log level to indicate
533 # this.
534 level = logging.ERROR
536 # Specify stacklevel to ensure the message is reported from the
537 # caller (1 is this file, 2 is contextlib, 3 is user)
538 log.log(level, msg + "%sTook %.4f seconds", *args,
539 ": " if msg else "", end - start, stacklevel=3)
542def chunk_iterable(data: Iterable[Any], chunk_size: int = 1_000) -> Iterator[Tuple[Any, ...]]:
543 """Return smaller chunks of an iterable.
545 Parameters
546 ----------
547 data : iterable of anything
548 The iterable to be chunked. Can be a mapping, in which case
549 the keys are returned in chunks.
550 chunk_size : int, optional
551 The largest chunk to return. Can be smaller and depends on the
552 number of elements in the iterator. Defaults to 1_000.
554 Yields
555 ------
556 chunk : `tuple`
557 The contents of a chunk of the iterator as a `tuple`. A tuple is
558 preferred over an iterator since it is more convenient to tell it is
559 empty and the caller knows it can be sized and indexed.
560 """
561 it = iter(data)
562 while (chunk := tuple(itertools.islice(it, chunk_size))):
563 yield chunk