Coverage for python / lsst / utils / packages.py: 20%
249 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-26 08:43 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-26 08:43 +0000
1# This file is part of utils.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
11#
12"""Determine which packages are being used in the system and their versions."""
14from __future__ import annotations
16import contextlib
17import hashlib
18import importlib
19import io
20import json
21import logging
22import os
23import pickle
24import re
25import subprocess
26import sys
27import types
28from collections.abc import Mapping
29from functools import cache, lru_cache
30from importlib.metadata import packages_distributions
31from typing import Any, ClassVar
33import yaml
35log = logging.getLogger(__name__)
37__all__ = [
38 "Packages",
39 "getAllPythonDistributions",
40 "getCondaPackages",
41 "getEnvironmentPackages",
42 "getPythonPackages",
43 "getVersionFromPythonModule",
44]
47# Packages used at build-time (e.g., header-only)
48BUILDTIME = {"boost", "eigen", "tmv"}
50# Python modules to attempt to load so we can try to get the version
51# We do this because the version only appears to be available from python,
52# but we use the library
53PYTHON: set[str] = set()
55SPECIAL_NAMESPACES = {"lsst"}
57# Packages that don't seem to have a mechanism for reporting the runtime
58# version. We need to guess the version from the environment
59ENVIRONMENT = {"astrometry_net", "astrometry_net_data", "minuit2", "xpa"}
61try:
62 # Python 3.10 includes a list of standard library modules.
63 # These will all have the same version number as Python itself.
64 _STDLIB = sys.stdlib_module_names
65except AttributeError:
66 _STDLIB = frozenset()
69def getVersionFromPythonModule(module: types.ModuleType) -> str:
70 """Determine the version of a python module.
72 Parameters
73 ----------
74 module : `~types.ModuleType`
75 Module for which to get version.
77 Returns
78 -------
79 version : `str`
80 The version of the python module.
82 Raises
83 ------
84 AttributeError
85 Raised if ``__version__`` attribute is not set.
87 Notes
88 -----
89 We supplement the version with information from the
90 ``__dependency_versions__`` (a specific variable set by LSST's
91 `~lsst.sconsUtils` at build time) only for packages that are typically
92 used only at build-time.
93 """
94 version = module.__version__
95 if hasattr(module, "__dependency_versions__"):
96 # Add build-time dependencies
97 deps = module.__dependency_versions__
98 buildtime = BUILDTIME & set(deps.keys())
99 if buildtime:
100 version += " with " + " ".join(f"{pkg}={deps[pkg]}" for pkg in sorted(buildtime))
101 return str(version)
104@cache
105def getAllPythonDistributions() -> dict[str, str]:
106 """Get the versions for all Python distributions that are installed.
108 Returns
109 -------
110 packages : `dict` [ `str`, `str` ]
111 Keys are distribution names; values are their versions.
112 Unlike `getPythonPackages` this function will not include
113 standard library packages defined in `sys.stdlib_module_names` but
114 will include a special ``python`` key reporting the Python version.
116 Notes
117 -----
118 If this function is called a second time an identical result will be
119 returned even if a new distribution has been installed.
120 """
121 packages = {"python": sys.version}
123 for dist in importlib.metadata.distributions():
124 packages[dist.name] = dist.version
125 return _mangle_lsst_package_names(packages)
128def getPythonPackages() -> dict[str, str]:
129 """Get imported python packages and their versions.
131 Returns
132 -------
133 packages : `dict` [ `str`, `str` ]
134 Keys are package names; values are their versions.
136 Notes
137 -----
138 We wade through `sys.modules` and attempt to determine the version for each
139 module. Note, therefore, that we can only report on modules that have
140 *already* been imported.
142 Python standard library packages are not included in the report. A
143 ``python`` key is inserted that records the Python version.
145 We don't include any module for which we cannot determine a version.
147 Whilst distribution names are used to determine package versions, the
148 key returned for the package version is the package name that was imported.
149 This means that ``yaml`` will appear as the version key even though the
150 distribution would be called ``PyYAML``.
151 """
152 # Attempt to import libraries that only report their version in python
153 for module_name in PYTHON:
154 # If it's not available we continue.
155 with contextlib.suppress(Exception):
156 importlib.import_module(module_name)
158 package_dist = packages_distributions()
160 packages = {"python": sys.version}
162 # Not iterating with sys.modules.iteritems() because it's not atomic and
163 # subject to race conditions
164 module_names = sorted(sys.modules.keys())
166 # Use knowledge of package hierarchy to find the versions rather than
167 # using each name independently. Group all the module names into the
168 # hierarchy, splitting on dot, and skipping any component that starts
169 # with an underscore.
171 # Sorting the module names gives us:
172 # lsst
173 # lsst.afw
174 # lsst.afw.cameraGeom
175 # ...
176 # lsst.daf
177 # lsst.daf.butler
178 #
179 # and so we can use knowledge of the previous version to inform whether
180 # we need to look at the subsequent line.
181 n_versions = 0
182 n_checked = 0
183 previous = ""
184 for name in module_names:
185 if name.startswith("_") or "._" in name:
186 # Refers to a private module so we can ignore it and assume
187 # version has been lifted into parent or, if top level, not
188 # relevant for versioning. This applies also to standard library
189 # packages such as _abc and __future__.
190 continue
192 if name.startswith(previous + ".") and previous in packages:
193 # Already have this version. Use the same previous name
194 # for the line after this.
195 continue
197 # Find the namespace which we need to use package_dist.
198 namespace = name.split(".")[0]
200 if namespace in _STDLIB:
201 # If this is an import from the standard library, skip it.
202 # Standard library names only refer to top-level namespace
203 # so "importlib" appears but "importlib.metadata" does not.
204 previous = name
205 continue
207 # package_dist is a mapping from import namespace to distribution
208 # package names. This may be a one-to-many mapping due to namespace
209 # packages. Note that package_dist does not know about editable
210 # installs or eups installs via path manipulation.
211 if namespace in package_dist:
212 dist_names = package_dist[namespace]
213 else:
214 dist_names = [name]
216 ver = _get_python_package_version(name, namespace, dist_names, packages)
218 n_checked += 1
219 if ver is not None:
220 n_versions += 1
221 previous = name
223 log.debug(
224 "Given %d modules but checked %d in hierarchy and found versions for %d",
225 len(module_names),
226 n_checked,
227 n_versions,
228 )
230 return _mangle_lsst_package_names(packages)
233def _mangle_lsst_package_names(packages: dict[str, str]) -> dict[str, str]:
234 for name in list(packages.keys()):
235 # Use LSST package names instead of python module names
236 # This matches the names we get from the environment (i.e., EUPS)
237 # so we can clobber these build-time versions if the environment
238 # reveals that we're not using the packages as-built.
239 if name.startswith("lsst."):
240 sep = "."
241 elif name.startswith("lsst-"):
242 sep = "-"
243 else:
244 continue
245 new_name = name.replace(f"lsst{sep}", "").replace(sep, "_")
246 packages[new_name] = packages[name]
247 del packages[name]
249 return packages
252def _get_python_package_version(
253 name: str, namespace: str, dist_names: list[str], packages: dict[str, str]
254) -> str | None:
255 """Given a package or module name, try to determine the version.
257 Parameters
258 ----------
259 name : `str`
260 The imported name of the package or module to try.
261 namespace : `str`
262 The namespace of the package or module.
263 dist_names : `list` [ `str` ]
264 The distribution names of the package or module.
265 packages : `dict` [ `str`, `str` ]
266 A dictionary mapping a name to a version. Modified in place.
267 The key used might not match exactly the given key.
269 Returns
270 -------
271 ver : `str` or `None`
272 The version string stored in ``packages``. Nothing is stored if the
273 value here is `None`.
274 """
275 # We have certain special namespaces that are used via eups that
276 # need to be enumerated here.
277 if len(dist_names) > 1 or namespace in SPECIAL_NAMESPACES:
278 # Split the name into parts.
279 name_parts = re.split("[._-]", name)
281 found = False
282 for dist_name in dist_names:
283 # It should be impossible for this to happen but it has happened
284 # so check for it.
285 if dist_name is None:
286 continue # type: ignore
287 dist_name_parts = re.split("[._-]", dist_name)
289 # Check if the components start with the namespace; this is
290 # needed because (at least) lsst.ts packages do not use
291 # ``lsst`` in the package name.
292 if dist_name_parts[0] != namespace:
293 dist_name_parts.insert(0, namespace)
295 if dist_name_parts == name_parts:
296 found = True
297 break
299 if not found:
300 # This fallback case occurs when (a) we are testing the overall
301 # namespace (e.g. "lsst" or "sphinxcontrib") and the code below
302 # will return None; or (b) for eups-installed and other
303 # "editable installations" that are not registered as part
304 # of importlib.packages_distributions().
305 dist_name = name
306 else:
307 dist_name = dist_names[0]
309 try:
310 # This is the Python standard way to find a package version.
311 # It can be slow.
312 ver = importlib.metadata.version(dist_name)
313 except Exception:
314 # Fall back to using the module itself. There is no guarantee
315 # that "a" exists for module "a.b" so if hierarchy has been expanded
316 # this might fail. Check first.
317 if name not in sys.modules:
318 return None
319 module = sys.modules[name]
320 try:
321 ver = getVersionFromPythonModule(module)
322 except Exception:
323 return None # Can't get a version from it, don't care
325 # Update the package information.
326 if ver is not None:
327 packages[name] = ver
329 return ver
332_eups: Any | None = None # Singleton Eups object
335@lru_cache(maxsize=2)
336def getEnvironmentPackages(include_all: bool = False) -> dict[str, str]:
337 """Get products and their versions from the environment.
339 Parameters
340 ----------
341 include_all : `bool`
342 If `False` only returns locally-setup packages. If `True` all set
343 up packages are returned with a version that includes any associated
344 non-current tags.
346 Returns
347 -------
348 packages : `dict`
349 Keys (type `str`) are product names; values (type `str`) are their
350 versions.
352 Notes
353 -----
354 We use EUPS to determine the version of certain products (those that don't
355 provide a means to determine the version any other way) and to check if
356 uninstalled packages are being used. We only report the product/version
357 for these packages unless ``include_all`` is `True`.
359 Assumes that no new EUPS packages are set up after this function is
360 called the first time.
361 """
362 try:
363 from eups import Eups
364 from eups.Product import Product
365 except ImportError:
366 log.warning("Unable to import eups, so cannot determine package versions from environment")
367 return {}
369 # Cache eups object since creating it can take a while
370 global _eups
371 if not _eups:
372 _eups = Eups()
373 products = _eups.findProducts(tags=["setup"])
375 # Get versions for things we can't determine via runtime mechanisms
376 # XXX Should we just grab everything we can, rather than just a
377 # predetermined set?
378 packages = {prod.name: prod.version for prod in products if prod in ENVIRONMENT}
380 # The string 'LOCAL:' (the value of Product.LocalVersionPrefix) in the
381 # version name indicates uninstalled code, so the version could be
382 # different than what's being reported by the runtime environment (because
383 # we don't tend to run "scons" every time we update some python file,
384 # and even if we did sconsUtils probably doesn't check to see if the repo
385 # is clean).
386 for prod in products:
387 if not prod.version.startswith(Product.LocalVersionPrefix):
388 if include_all:
389 tags = {t for t in prod.tags if t != "current"}
390 tag_msg = " (" + " ".join(sorted(tags)) + ")" if tags else ""
391 packages[prod.name] = prod.version + tag_msg
392 continue
393 ver = prod.version
395 gitDir = os.path.join(prod.dir, ".git")
396 if os.path.exists(gitDir):
397 # get the git revision and an indication if the working copy is
398 # clean
399 revCmd = ["git", "--git-dir=" + gitDir, "--work-tree=" + prod.dir, "rev-parse", "HEAD"]
400 diffCmd = [
401 "git",
402 "--no-pager",
403 "--git-dir=" + gitDir,
404 "--work-tree=" + prod.dir,
405 "diff",
406 "--patch",
407 ]
408 try:
409 rev = subprocess.check_output(revCmd).decode().strip()
410 diff = subprocess.check_output(diffCmd)
411 except Exception:
412 ver += "@GIT_ERROR"
413 else:
414 ver += "@" + rev
415 if diff:
416 ver += "+" + hashlib.md5(diff).hexdigest()
417 else:
418 ver += "@NO_GIT"
420 packages[prod.name] = ver
421 return packages
424@lru_cache(maxsize=1)
425def getCondaPackages() -> dict[str, str]:
426 """Get products and their versions from the conda environment.
428 Returns
429 -------
430 packages : `dict`
431 Keys (type `str`) are product names; values (type `str`) are their
432 versions.
434 Notes
435 -----
436 Returns empty result if a conda environment is not in use or can not
437 be queried.
438 """
439 if "CONDA_PREFIX" not in os.environ:
440 return {}
442 # conda list is very slow. Ten times faster to scan the directory
443 # directly. This will only find conda packages and not pip installed
444 # packages.
445 meta_path = os.path.join(os.environ["CONDA_PREFIX"], "conda-meta")
447 try:
448 filenames = os.scandir(path=meta_path)
449 except FileNotFoundError:
450 return {}
452 packages = {}
454 for filename in filenames:
455 if not filename.name.endswith(".json"):
456 continue
457 with open(filename) as f:
458 try:
459 data = json.load(f)
460 except ValueError:
461 continue
462 try:
463 packages[data["name"]] = data["version"]
464 except KeyError:
465 continue
467 packages = dict(sorted(packages.items()))
469 # Try to work out the conda environment name and include it as a fake
470 # package. The "obvious" way of running "conda info --json" does give
471 # access to the active_prefix but takes about 2 seconds to run.
472 # As a compromise look for the env name in the path to the python
473 # executable
474 match = re.search(r"/envs/(.*?)/bin/", sys.executable)
475 if match:
476 packages["conda_env"] = match.group(1)
478 return packages
481class Packages(dict):
482 """A table of packages and their versions.
484 There are a few different types of packages, and their versions are
485 collected in different ways:
487 1. Installed Conda packages are obtained via the Conda API. Conda is
488 not required.
489 2. Python modules (e.g., afw, numpy; galsim is also in this group even
490 though we only use it through the library, because no version
491 information is currently provided through the library): we get their
492 version from the ``__version__`` module variable. Note that this means
493 that we're only aware of modules that have already been imported.
494 3. Other packages provide no run-time accessible version information (e.g.,
495 astrometry_net): we get their version from interrogating the
496 environment. Currently, that means EUPS; if EUPS is replaced or dropped
497 then we'll need to consider an alternative means of getting this version
498 information.
499 4. Local versions of packages (a non-installed EUPS package, selected with
500 ``setup -r /path/to/package``): we identify these through the
501 environment (EUPS again) and use as a version the path supplemented with
502 the ``git`` SHA and, if the git repo isn't clean, an MD5 of the diff.
504 These package versions are collected and stored in a Packages object, which
505 provides useful comparison and persistence features.
507 Example usage:
509 .. code-block:: python
511 from lsst.utils.packages import Packages
512 pkgs = Packages.fromSystem()
513 print("Current packages:", pkgs)
514 old = Packages.read("/path/to/packages.pickle")
515 print("Old packages:", old)
516 print("Missing packages compared to before:", pkgs.missing(old))
517 print("Extra packages compared to before:", pkgs.extra(old))
518 print("Different packages: ", pkgs.difference(old))
519 old.update(pkgs) # Include any new packages in the old
520 old.write("/path/to/packages.pickle").
522 Notes
523 -----
524 This is a wrapper around a dict with some convenience methods.
525 """
527 formats: ClassVar[dict[str, str]] = {
528 ".pkl": "pickle",
529 ".pickle": "pickle",
530 ".yaml": "yaml",
531 ".json": "json",
532 }
534 def __setstate__(self, state: dict[str, Any]) -> None:
535 # This only seems to be called for old pickle files where
536 # the data was stored in _packages.
537 self.update(state["_packages"])
539 @classmethod
540 def fromSystem(cls, include_all: bool = False) -> Packages:
541 """Construct a `Packages` by examining the system.
543 Determine packages by examining python's installed packages
544 (by default filtered by `sys.modules`) or distributions, conda
545 libraries and EUPS. EUPS packages take precedence over conda and
546 general python packages.
548 Parameters
549 ----------
550 include_all : `bool`, optional
551 If `False`, will only include imported Python packages, installed
552 Conda packages and locally-setup EUPS packages. If `True` all
553 installed Python distributions and conda packages will be reported
554 as well as all EUPS packages that are set up.
556 Returns
557 -------
558 packages : `Packages`
559 All version package information that could be obtained.
561 Note
562 ----
563 The names of Python distributions can differ from the names of the
564 Python packages installed by those distributions. Since ``include_all``
565 set to `True` uses Python distributions and `False` uses Python
566 packages do not expect that the answers are directly comparable.
567 """
568 packages = {}
569 if include_all:
570 packages.update(getAllPythonDistributions())
571 else:
572 packages.update(getPythonPackages())
573 # Conda list always reports all Conda packages.
574 packages.update(getCondaPackages())
575 # Should be last, to override products with LOCAL versions
576 packages.update(getEnvironmentPackages(include_all=include_all))
577 return cls(packages)
579 @classmethod
580 def fromBytes(cls, data: bytes, format: str) -> Packages:
581 """Construct the object from a byte representation.
583 Parameters
584 ----------
585 data : `bytes`
586 The serialized form of this object in bytes.
587 format : `str`
588 The format of those bytes. Can be ``yaml``, ``json``, or
589 ``pickle``.
591 Returns
592 -------
593 packages : `Packages`
594 The package information read from the input data.
595 """
596 if format == "pickle":
597 file = io.BytesIO(data)
598 new = _BackwardsCompatibilityUnpickler(file).load()
599 elif format == "yaml":
600 new = yaml.load(data, Loader=yaml.SafeLoader)
601 elif format == "json":
602 new = cls(json.loads(data))
603 else:
604 raise ValueError(f"Unexpected serialization format given: {format}")
605 if not isinstance(new, cls):
606 raise TypeError(f"Extracted object of class '{type(new)}' but expected '{cls}'")
607 return new
609 @classmethod
610 def read(cls, filename: str) -> Packages:
611 """Read packages from filename.
613 Parameters
614 ----------
615 filename : `str`
616 Filename from which to read. The format is determined from the
617 file extension. Currently support ``.pickle``, ``.pkl``,
618 ``.json``, and ``.yaml``.
620 Returns
621 -------
622 packages : `Packages`
623 The packages information read from the file.
624 """
625 _, ext = os.path.splitext(filename)
626 if ext not in cls.formats:
627 raise ValueError(f"Format from {ext} extension in file {filename} not recognized")
628 with open(filename, "rb") as ff:
629 # We assume that these classes are tiny so there is no
630 # substantive memory impact by reading the entire file up front
631 data = ff.read()
632 return cls.fromBytes(data, cls.formats[ext])
634 def toBytes(self, format: str) -> bytes:
635 """Convert the object to a serialized bytes form using the
636 specified format.
638 Parameters
639 ----------
640 format : `str`
641 Format to use when serializing. Can be ``yaml``, ``json``,
642 or ``pickle``.
644 Returns
645 -------
646 data : `bytes`
647 Byte string representing the serialized object.
648 """
649 if format == "pickle":
650 return pickle.dumps(self)
651 elif format == "yaml":
652 return yaml.dump(self).encode("utf-8")
653 elif format == "json":
654 return json.dumps(self).encode("utf-8")
655 else:
656 raise ValueError(f"Unexpected serialization format requested: {format}")
658 def write(self, filename: str) -> None:
659 """Write to file.
661 Parameters
662 ----------
663 filename : `str`
664 Filename to which to write. The format of the data file
665 is determined from the file extension. Currently supports
666 ``.pickle``, ``.json``, and ``.yaml``.
667 """
668 _, ext = os.path.splitext(filename)
669 if ext not in self.formats:
670 raise ValueError(f"Format from {ext} extension in file {filename} not recognized")
671 with open(filename, "wb") as ff:
672 # Assumes that the bytes serialization of this object is
673 # relatively small.
674 ff.write(self.toBytes(self.formats[ext]))
676 def __str__(self) -> str:
677 ss = self.__class__.__name__ + "({\n"
678 # Sort alphabetically by module name, for convenience in reading
679 ss += ",\n".join(f"{prod!r}:{self[prod]!r}" for prod in sorted(self))
680 ss += ",\n})"
681 return ss
683 def __repr__(self) -> str:
684 # Default repr() does not report the class name.
685 return f"{self.__class__.__name__}({super().__repr__()})"
687 def extra(self, other: Mapping) -> dict[str, str]:
688 """Get packages in self but not in another `Packages` object.
690 Parameters
691 ----------
692 other : `Packages` or `Mapping`
693 Other packages to compare against.
695 Returns
696 -------
697 extra : `dict`
698 Extra packages. Keys (type `str`) are package names; values
699 (type `str`) are their versions.
700 """
701 return {pkg: self[pkg] for pkg in self.keys() - other.keys()}
703 def missing(self, other: Mapping) -> dict[str, str]:
704 """Get packages in another `Packages` object but missing from self.
706 Parameters
707 ----------
708 other : `Packages`
709 Other packages to compare against.
711 Returns
712 -------
713 missing : `dict` [`str`, `str`]
714 Missing packages. Keys (type `str`) are package names; values
715 (type `str`) are their versions.
716 """
717 return {pkg: other[pkg] for pkg in other.keys() - self.keys()}
719 def difference(self, other: Mapping) -> dict[str, tuple[str, str]]:
720 """Get packages in symmetric difference of self and another `Packages`
721 object.
723 Parameters
724 ----------
725 other : `Packages`
726 Other packages to compare against.
728 Returns
729 -------
730 difference : `dict` [`str`, `tuple` [ `str`, `str` ]]
731 Packages in symmetric difference. Keys (type `str`) are package
732 names; values (type `tuple` [ `str`, `str` ]) are their versions.
733 """
734 return {pkg: (self[pkg], other[pkg]) for pkg in self.keys() & other.keys() if self[pkg] != other[pkg]}
737class _BackwardsCompatibilityUnpickler(pickle.Unpickler):
738 """Replacement for the default unpickler.
740 It is required so that users of this API can read pickle files
741 created when the `~lsst.utils.packages.Packages` class was in a different
742 package and known as ``lsst.base.Packages``. If this unpickler is being
743 used then we know for sure that we must return a
744 `~lsst.utils.packages.Packages` instance.
745 """
747 def find_class(self, module: str, name: str) -> type:
748 """Return the class that should be used for unpickling.
750 This is always known to be the class in this package.
752 Parameters
753 ----------
754 module : `str`
755 Ignored.
756 name : `str`
757 Ignored.
759 Returns
760 -------
761 `type` [`Packages`]
762 The Python type to use. Always returns `Packages`.
763 """
764 return Packages
767# Register YAML representers
770def _pkg_representer(dumper: yaml.Dumper, data: Any) -> yaml.MappingNode:
771 """Represent Packages as a simple dict"""
772 return dumper.represent_mapping("lsst.utils.packages.Packages", data, flow_style=None)
775yaml.add_representer(Packages, _pkg_representer)
778def _pkg_constructor(loader: yaml.constructor.SafeConstructor, node: yaml.Node) -> Any:
779 """Convert YAML representation back to Python class."""
780 yield Packages(loader.construct_mapping(node, deep=True)) # type: ignore
783for loader_str in ("Loader", "CLoader", "UnsafeLoader", "SafeLoader", "FullLoader"):
784 loader = getattr(yaml, loader_str, None)
785 if loader is None: 785 ↛ 786line 785 didn't jump to line 786 because the condition on line 785 was never true
786 continue
788 yaml.add_constructor("lsst.utils.packages.Packages", _pkg_constructor, Loader=loader)
790 # Register the old name with YAML.
791 yaml.add_constructor("lsst.base.Packages", _pkg_constructor, Loader=loader)