Coverage for python/lsst/utils/packages.py: 21%
232 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-20 10:50 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-20 10:50 +0000
1# This file is part of utils.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
11#
12"""Determine which packages are being used in the system and their versions.
13"""
15from __future__ import annotations
17import contextlib
18import hashlib
19import importlib
20import io
21import json
22import logging
23import os
24import pickle
25import re
26import subprocess
27import sys
28import types
29from collections.abc import Mapping
30from functools import lru_cache
31from importlib.metadata import packages_distributions
32from typing import Any, ClassVar
34import yaml
36log = logging.getLogger(__name__)
38__all__ = [
39 "getVersionFromPythonModule",
40 "getPythonPackages",
41 "getEnvironmentPackages",
42 "getCondaPackages",
43 "Packages",
44]
47# Packages used at build-time (e.g., header-only)
48BUILDTIME = {"boost", "eigen", "tmv"}
50# Python modules to attempt to load so we can try to get the version
51# We do this because the version only appears to be available from python,
52# but we use the library
53PYTHON: set[str] = set()
55SPECIAL_NAMESPACES = {"lsst"}
57# Packages that don't seem to have a mechanism for reporting the runtime
58# version. We need to guess the version from the environment
59ENVIRONMENT = {"astrometry_net", "astrometry_net_data", "minuit2", "xpa"}
61try:
62 # Python 3.10 includes a list of standard library modules.
63 # These will all have the same version number as Python itself.
64 _STDLIB = sys.stdlib_module_names
65except AttributeError:
66 _STDLIB = frozenset()
69def getVersionFromPythonModule(module: types.ModuleType) -> str:
70 """Determine the version of a python module.
72 Parameters
73 ----------
74 module : `~types.ModuleType`
75 Module for which to get version.
77 Returns
78 -------
79 version : `str`
80 The version of the python module.
82 Raises
83 ------
84 AttributeError
85 Raised if ``__version__`` attribute is not set.
87 Notes
88 -----
89 We supplement the version with information from the
90 ``__dependency_versions__`` (a specific variable set by LSST's
91 `~lsst.sconsUtils` at build time) only for packages that are typically
92 used only at build-time.
93 """
94 version = module.__version__
95 if hasattr(module, "__dependency_versions__"):
96 # Add build-time dependencies
97 deps = module.__dependency_versions__
98 buildtime = BUILDTIME & set(deps.keys())
99 if buildtime:
100 version += " with " + " ".join(f"{pkg}={deps[pkg]}" for pkg in sorted(buildtime))
101 return str(version)
104def getPythonPackages() -> dict[str, str]:
105 """Get imported python packages and their versions.
107 Returns
108 -------
109 packages : `dict`
110 Keys (type `str`) are package names; values (type `str`) are their
111 versions.
113 Notes
114 -----
115 We wade through `sys.modules` and attempt to determine the version for each
116 module. Note, therefore, that we can only report on modules that have
117 *already* been imported.
119 We don't include any module for which we cannot determine a version.
120 """
121 # Attempt to import libraries that only report their version in python
122 for module_name in PYTHON:
123 # If it's not available we continue.
124 with contextlib.suppress(Exception):
125 importlib.import_module(module_name)
127 package_dist = packages_distributions()
129 packages = {"python": sys.version}
131 # Not iterating with sys.modules.iteritems() because it's not atomic and
132 # subject to race conditions
133 module_names = sorted(list(sys.modules.keys()))
135 # Use knowledge of package hierarchy to find the versions rather than
136 # using each name independently. Group all the module names into the
137 # hierarchy, splitting on dot, and skipping any component that starts
138 # with an underscore.
140 # Sorting the module names gives us:
141 # lsst
142 # lsst.afw
143 # lsst.afw.cameraGeom
144 # ...
145 # lsst.daf
146 # lsst.daf.butler
147 #
148 # and so we can use knowledge of the previous version to inform whether
149 # we need to look at the subsequent line.
150 n_versions = 0
151 n_checked = 0
152 previous = ""
153 for name in module_names:
154 if name.startswith("_") or "._" in name:
155 # Refers to a private module so we can ignore it and assume
156 # version has been lifted into parent or, if top level, not
157 # relevant for versioning. This applies also to standard library
158 # packages such as _abc and __future__.
159 continue
161 if name in _STDLIB:
162 # Assign all standard library packages the python version
163 # since they almost all lack explicit versions.
164 packages[name] = sys.version
165 previous = name
166 continue
168 if name.startswith(previous + ".") and previous in packages:
169 # Already have this version. Use the same previous name
170 # for the line after this.
171 continue
173 # Find the namespace which we need to use package_dist.
174 namespace = name.split(".")[0]
176 # package_dist is a mapping from import namespace to distribution
177 # package names. This may be a one-to-many mapping due to namespace
178 # packages. Note that package_dist does not know about editable
179 # installs or eups installs via path manipulation.
180 if namespace in package_dist:
181 dist_names = package_dist[namespace]
182 else:
183 dist_names = [name]
185 ver = _get_python_package_version(name, namespace, dist_names, packages)
187 n_checked += 1
188 if ver is not None:
189 n_versions += 1
190 previous = name
192 log.debug(
193 "Given %d modules but checked %d in hierarchy and found versions for %d",
194 len(module_names),
195 n_checked,
196 n_versions,
197 )
199 for name in list(packages.keys()):
200 # Use LSST package names instead of python module names
201 # This matches the names we get from the environment (i.e., EUPS)
202 # so we can clobber these build-time versions if the environment
203 # reveals that we're not using the packages as-built.
204 if name.startswith("lsst."):
205 new_name = name.replace("lsst.", "").replace(".", "_")
206 packages[new_name] = packages[name]
207 del packages[name]
209 return packages
212def _get_python_package_version(
213 name: str, namespace: str, dist_names: list[str], packages: dict[str, str]
214) -> str | None:
215 """Given a package or module name, try to determine the version.
217 Parameters
218 ----------
219 name : `str`
220 The imported name of the package or module to try.
221 namespace : `str`
222 The namespace of the package or module.
223 dist_names : `list` [ `str` ]
224 The distribution names of the package or module.
225 packages : `dict` [ `str`, `str` ]
226 A dictionary mapping a name to a version. Modified in place.
227 The key used might not match exactly the given key.
229 Returns
230 -------
231 ver : `str` or `None`
232 The version string stored in ``packages``. Nothing is stored if the
233 value here is `None`.
234 """
235 # We have certain special namespaces that are used via eups that
236 # need to be enumerated here.
237 if len(dist_names) > 1 or namespace in SPECIAL_NAMESPACES:
238 # Split the name into parts.
239 name_parts = re.split("[._-]", name)
241 found = False
242 for dist_name in dist_names:
243 dist_name_parts = re.split("[._-]", dist_name)
244 # Check if the components start with the namespace; this is
245 # needed because (at least) lsst.ts packages do not use
246 # ``lsst`` in the package name.
247 if dist_name_parts[0] != namespace:
248 dist_name_parts.insert(0, namespace)
250 if dist_name_parts == name_parts:
251 found = True
252 break
254 if not found:
255 # This fallback case occurs when (a) we are testing the overall
256 # namespace (e.g. "lsst" or "sphinxcontrib") and the code below
257 # will return None; or (b) for eups-installed and other
258 # "editable installations" that are not registered as part
259 # of importlib.packages_distributions().
260 dist_name = name
261 else:
262 dist_name = dist_names[0]
264 try:
265 # This is the Python standard way to find a package version.
266 # It can be slow.
267 ver = importlib.metadata.version(dist_name)
268 except Exception:
269 # Fall back to using the module itself. There is no guarantee
270 # that "a" exists for module "a.b" so if hierarchy has been expanded
271 # this might fail. Check first.
272 if name not in sys.modules:
273 return None
274 module = sys.modules[name]
275 try:
276 ver = getVersionFromPythonModule(module)
277 except Exception:
278 return None # Can't get a version from it, don't care
280 # Update the package information.
281 if ver is not None:
282 packages[name] = ver
284 return ver
287_eups: Any | None = None # Singleton Eups object
290@lru_cache(maxsize=1)
291def getEnvironmentPackages(include_all: bool = False) -> dict[str, str]:
292 """Get products and their versions from the environment.
294 Parameters
295 ----------
296 include_all : `bool`
297 If `False` only returns locally-setup packages. If `True` all set
298 up packages are returned with a version that includes any associated
299 non-current tags.
301 Returns
302 -------
303 packages : `dict`
304 Keys (type `str`) are product names; values (type `str`) are their
305 versions.
307 Notes
308 -----
309 We use EUPS to determine the version of certain products (those that don't
310 provide a means to determine the version any other way) and to check if
311 uninstalled packages are being used. We only report the product/version
312 for these packages unless ``include_all`` is `True`.
313 """
314 try:
315 from eups import Eups
316 from eups.Product import Product
317 except ImportError:
318 log.warning("Unable to import eups, so cannot determine package versions from environment")
319 return {}
321 # Cache eups object since creating it can take a while
322 global _eups
323 if not _eups:
324 _eups = Eups()
325 products = _eups.findProducts(tags=["setup"])
327 # Get versions for things we can't determine via runtime mechanisms
328 # XXX Should we just grab everything we can, rather than just a
329 # predetermined set?
330 packages = {prod.name: prod.version for prod in products if prod in ENVIRONMENT}
332 # The string 'LOCAL:' (the value of Product.LocalVersionPrefix) in the
333 # version name indicates uninstalled code, so the version could be
334 # different than what's being reported by the runtime environment (because
335 # we don't tend to run "scons" every time we update some python file,
336 # and even if we did sconsUtils probably doesn't check to see if the repo
337 # is clean).
338 for prod in products:
339 if not prod.version.startswith(Product.LocalVersionPrefix):
340 if include_all:
341 tags = {t for t in prod.tags if t != "current"}
342 tag_msg = " (" + " ".join(tags) + ")" if tags else ""
343 packages[prod.name] = prod.version + tag_msg
344 continue
345 ver = prod.version
347 gitDir = os.path.join(prod.dir, ".git")
348 if os.path.exists(gitDir):
349 # get the git revision and an indication if the working copy is
350 # clean
351 revCmd = ["git", "--git-dir=" + gitDir, "--work-tree=" + prod.dir, "rev-parse", "HEAD"]
352 diffCmd = [
353 "git",
354 "--no-pager",
355 "--git-dir=" + gitDir,
356 "--work-tree=" + prod.dir,
357 "diff",
358 "--patch",
359 ]
360 try:
361 rev = subprocess.check_output(revCmd).decode().strip()
362 diff = subprocess.check_output(diffCmd)
363 except Exception:
364 ver += "@GIT_ERROR"
365 else:
366 ver += "@" + rev
367 if diff:
368 ver += "+" + hashlib.md5(diff).hexdigest()
369 else:
370 ver += "@NO_GIT"
372 packages[prod.name] = ver
373 return packages
376@lru_cache(maxsize=1)
377def getCondaPackages() -> dict[str, str]:
378 """Get products and their versions from the conda environment.
380 Returns
381 -------
382 packages : `dict`
383 Keys (type `str`) are product names; values (type `str`) are their
384 versions.
386 Notes
387 -----
388 Returns empty result if a conda environment is not in use or can not
389 be queried.
390 """
391 if "CONDA_PREFIX" not in os.environ:
392 return {}
394 # conda list is very slow. Ten times faster to scan the directory
395 # directly. This will only find conda packages and not pip installed
396 # packages.
397 meta_path = os.path.join(os.environ["CONDA_PREFIX"], "conda-meta")
399 try:
400 filenames = os.scandir(path=meta_path)
401 except FileNotFoundError:
402 return {}
404 packages = {}
406 for filename in filenames:
407 if not filename.name.endswith(".json"):
408 continue
409 with open(filename) as f:
410 try:
411 data = json.load(f)
412 except ValueError:
413 continue
414 try:
415 packages[data["name"]] = data["version"]
416 except KeyError:
417 continue
419 packages = {n: v for n, v in sorted(packages.items())}
421 # Try to work out the conda environment name and include it as a fake
422 # package. The "obvious" way of running "conda info --json" does give
423 # access to the active_prefix but takes about 2 seconds to run.
424 # As a compromise look for the env name in the path to the python
425 # executable
426 match = re.search(r"/envs/(.*?)/bin/", sys.executable)
427 if match:
428 packages["conda_env"] = match.group(1)
430 return packages
433class Packages(dict):
434 """A table of packages and their versions.
436 There are a few different types of packages, and their versions are
437 collected in different ways:
439 1. Installed Conda packages are obtained via the Conda API. Conda is
440 not required.
441 2. Python modules (e.g., afw, numpy; galsim is also in this group even
442 though we only use it through the library, because no version
443 information is currently provided through the library): we get their
444 version from the ``__version__`` module variable. Note that this means
445 that we're only aware of modules that have already been imported.
446 3. Other packages provide no run-time accessible version information (e.g.,
447 astrometry_net): we get their version from interrogating the
448 environment. Currently, that means EUPS; if EUPS is replaced or dropped
449 then we'll need to consider an alternative means of getting this version
450 information.
451 4. Local versions of packages (a non-installed EUPS package, selected with
452 ``setup -r /path/to/package``): we identify these through the
453 environment (EUPS again) and use as a version the path supplemented with
454 the ``git`` SHA and, if the git repo isn't clean, an MD5 of the diff.
456 These package versions are collected and stored in a Packages object, which
457 provides useful comparison and persistence features.
459 Example usage:
461 .. code-block:: python
463 from lsst.utils.packages import Packages
464 pkgs = Packages.fromSystem()
465 print("Current packages:", pkgs)
466 old = Packages.read("/path/to/packages.pickle")
467 print("Old packages:", old)
468 print("Missing packages compared to before:", pkgs.missing(old))
469 print("Extra packages compared to before:", pkgs.extra(old))
470 print("Different packages: ", pkgs.difference(old))
471 old.update(pkgs) # Include any new packages in the old
472 old.write("/path/to/packages.pickle").
474 Notes
475 -----
476 This is a wrapper around a dict with some convenience methods.
477 """
479 formats: ClassVar[dict[str, str]] = {
480 ".pkl": "pickle",
481 ".pickle": "pickle",
482 ".yaml": "yaml",
483 ".json": "json",
484 }
486 def __setstate__(self, state: dict[str, Any]) -> None:
487 # This only seems to be called for old pickle files where
488 # the data was stored in _packages.
489 self.update(state["_packages"])
491 @classmethod
492 def fromSystem(cls) -> Packages:
493 """Construct a `Packages` by examining the system.
495 Determine packages by examining python's `sys.modules`, conda
496 libraries and EUPS. EUPS packages take precedence over conda and
497 general python packages.
499 Returns
500 -------
501 packages : `Packages`
502 All version package information that could be obtained.
503 """
504 packages = {}
505 packages.update(getPythonPackages())
506 packages.update(getCondaPackages())
507 packages.update(getEnvironmentPackages()) # Should be last, to override products with LOCAL versions
508 return cls(packages)
510 @classmethod
511 def fromBytes(cls, data: bytes, format: str) -> Packages:
512 """Construct the object from a byte representation.
514 Parameters
515 ----------
516 data : `bytes`
517 The serialized form of this object in bytes.
518 format : `str`
519 The format of those bytes. Can be ``yaml``, ``json``, or
520 ``pickle``.
522 Returns
523 -------
524 packages : `Packages`
525 The package information read from the input data.
526 """
527 if format == "pickle":
528 file = io.BytesIO(data)
529 new = _BackwardsCompatibilityUnpickler(file).load()
530 elif format == "yaml":
531 new = yaml.load(data, Loader=yaml.SafeLoader)
532 elif format == "json":
533 new = cls(json.loads(data))
534 else:
535 raise ValueError(f"Unexpected serialization format given: {format}")
536 if not isinstance(new, cls):
537 raise TypeError(f"Extracted object of class '{type(new)}' but expected '{cls}'")
538 return new
540 @classmethod
541 def read(cls, filename: str) -> Packages:
542 """Read packages from filename.
544 Parameters
545 ----------
546 filename : `str`
547 Filename from which to read. The format is determined from the
548 file extension. Currently support ``.pickle``, ``.pkl``,
549 ``.json``, and ``.yaml``.
551 Returns
552 -------
553 packages : `Packages`
554 The packages information read from the file.
555 """
556 _, ext = os.path.splitext(filename)
557 if ext not in cls.formats:
558 raise ValueError(f"Format from {ext} extension in file {filename} not recognized")
559 with open(filename, "rb") as ff:
560 # We assume that these classes are tiny so there is no
561 # substantive memory impact by reading the entire file up front
562 data = ff.read()
563 return cls.fromBytes(data, cls.formats[ext])
565 def toBytes(self, format: str) -> bytes:
566 """Convert the object to a serialized bytes form using the
567 specified format.
569 Parameters
570 ----------
571 format : `str`
572 Format to use when serializing. Can be ``yaml``, ``json``,
573 or ``pickle``.
575 Returns
576 -------
577 data : `bytes`
578 Byte string representing the serialized object.
579 """
580 if format == "pickle":
581 return pickle.dumps(self)
582 elif format == "yaml":
583 return yaml.dump(self).encode("utf-8")
584 elif format == "json":
585 return json.dumps(self).encode("utf-8")
586 else:
587 raise ValueError(f"Unexpected serialization format requested: {format}")
589 def write(self, filename: str) -> None:
590 """Write to file.
592 Parameters
593 ----------
594 filename : `str`
595 Filename to which to write. The format of the data file
596 is determined from the file extension. Currently supports
597 ``.pickle``, ``.json``, and ``.yaml``.
598 """
599 _, ext = os.path.splitext(filename)
600 if ext not in self.formats:
601 raise ValueError(f"Format from {ext} extension in file {filename} not recognized")
602 with open(filename, "wb") as ff:
603 # Assumes that the bytes serialization of this object is
604 # relatively small.
605 ff.write(self.toBytes(self.formats[ext]))
607 def __str__(self) -> str:
608 ss = self.__class__.__name__ + "({\n"
609 # Sort alphabetically by module name, for convenience in reading
610 ss += ",\n".join(f"{prod!r}:{self[prod]!r}" for prod in sorted(self))
611 ss += ",\n})"
612 return ss
614 def __repr__(self) -> str:
615 # Default repr() does not report the class name.
616 return f"{self.__class__.__name__}({super().__repr__()})"
618 def extra(self, other: Mapping) -> dict[str, str]:
619 """Get packages in self but not in another `Packages` object.
621 Parameters
622 ----------
623 other : `Packages` or `Mapping`
624 Other packages to compare against.
626 Returns
627 -------
628 extra : `dict`
629 Extra packages. Keys (type `str`) are package names; values
630 (type `str`) are their versions.
631 """
632 return {pkg: self[pkg] for pkg in self.keys() - other.keys()}
634 def missing(self, other: Mapping) -> dict[str, str]:
635 """Get packages in another `Packages` object but missing from self.
637 Parameters
638 ----------
639 other : `Packages`
640 Other packages to compare against.
642 Returns
643 -------
644 missing : `dict` [`str`, `str`]
645 Missing packages. Keys (type `str`) are package names; values
646 (type `str`) are their versions.
647 """
648 return {pkg: other[pkg] for pkg in other.keys() - self.keys()}
650 def difference(self, other: Mapping) -> dict[str, tuple[str, str]]:
651 """Get packages in symmetric difference of self and another `Packages`
652 object.
654 Parameters
655 ----------
656 other : `Packages`
657 Other packages to compare against.
659 Returns
660 -------
661 difference : `dict` [`str`, `tuple` [ `str`, `str` ]]
662 Packages in symmetric difference. Keys (type `str`) are package
663 names; values (type `tuple` [ `str`, `str` ]) are their versions.
664 """
665 return {pkg: (self[pkg], other[pkg]) for pkg in self.keys() & other.keys() if self[pkg] != other[pkg]}
668class _BackwardsCompatibilityUnpickler(pickle.Unpickler):
669 """Replacement for the default unpickler.
671 It is required so that users of this API can read pickle files
672 created when the `~lsst.utils.packages.Packages` class was in a different
673 package and known as ``lsst.base.Packages``. If this unpickler is being
674 used then we know for sure that we must return a
675 `~lsst.utils.packages.Packages` instance.
676 """
678 def find_class(self, module: str, name: str) -> type:
679 """Return the class that should be used for unpickling.
681 This is always known to be the class in this package.
683 Parameters
684 ----------
685 module : `str`
686 Ignored.
687 name : `str`
688 Ignored.
690 Returns
691 -------
692 `type` [`Packages`]
693 The Python type to use. Always returns `Packages`.
694 """
695 return Packages
698# Register YAML representers
701def _pkg_representer(dumper: yaml.Dumper, data: Any) -> yaml.MappingNode:
702 """Represent Packages as a simple dict"""
703 return dumper.represent_mapping("lsst.utils.packages.Packages", data, flow_style=None)
706yaml.add_representer(Packages, _pkg_representer)
709def _pkg_constructor(loader: yaml.constructor.SafeConstructor, node: yaml.Node) -> Any:
710 """Convert YAML representation back to Python class."""
711 yield Packages(loader.construct_mapping(node, deep=True)) # type: ignore
714for loader in (yaml.Loader, yaml.CLoader, yaml.UnsafeLoader, yaml.SafeLoader, yaml.FullLoader):
715 yaml.add_constructor("lsst.utils.packages.Packages", _pkg_constructor, Loader=loader)
717 # Register the old name with YAML.
718 yaml.add_constructor("lsst.base.Packages", _pkg_constructor, Loader=loader)