Coverage for python/lsst/utils/packages.py: 24%
197 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-17 07:53 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-17 07:53 +0000
1# This file is part of utils.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
11#
12"""Determine which packages are being used in the system and their versions.
13"""
15from __future__ import annotations
17import contextlib
18import hashlib
19import importlib
20import io
21import json
22import logging
23import os
24import pickle
25import re
26import subprocess
27import sys
28import types
29from collections.abc import Mapping
30from functools import lru_cache
31from typing import Any, ClassVar
33import yaml
35log = logging.getLogger(__name__)
37__all__ = [
38 "getVersionFromPythonModule",
39 "getPythonPackages",
40 "getEnvironmentPackages",
41 "getCondaPackages",
42 "Packages",
43]
46# Packages used at build-time (e.g., header-only)
47BUILDTIME = {"boost", "eigen", "tmv"}
49# Python modules to attempt to load so we can try to get the version
50# We do this because the version only appears to be available from python,
51# but we use the library
52PYTHON = {"galsim"}
54# Packages that don't seem to have a mechanism for reporting the runtime
55# version. We need to guess the version from the environment
56ENVIRONMENT = {"astrometry_net", "astrometry_net_data", "minuit2", "xpa"}
58try:
59 # Python 3.10 includes a list of standard library modules.
60 # These will all have the same version number as Python itself.
61 _STDLIB = sys.stdlib_module_names
62except AttributeError:
63 _STDLIB = frozenset()
66def getVersionFromPythonModule(module: types.ModuleType) -> str:
67 """Determine the version of a python module.
69 Parameters
70 ----------
71 module : `~types.ModuleType`
72 Module for which to get version.
74 Returns
75 -------
76 version : `str`
78 Raises
79 ------
80 AttributeError
81 Raised if ``__version__`` attribute is not set.
83 Notes
84 -----
85 We supplement the version with information from the
86 ``__dependency_versions__`` (a specific variable set by LSST's
87 `~lsst.sconsUtils` at build time) only for packages that are typically
88 used only at build-time.
89 """
90 version = module.__version__
91 if hasattr(module, "__dependency_versions__"):
92 # Add build-time dependencies
93 deps = module.__dependency_versions__
94 buildtime = BUILDTIME & set(deps.keys())
95 if buildtime:
96 version += " with " + " ".join(f"{pkg}={deps[pkg]}" for pkg in sorted(buildtime))
97 return str(version)
100def getPythonPackages() -> dict[str, str]:
101 """Get imported python packages and their versions.
103 Returns
104 -------
105 packages : `dict`
106 Keys (type `str`) are package names; values (type `str`) are their
107 versions.
109 Notes
110 -----
111 We wade through `sys.modules` and attempt to determine the version for each
112 module. Note, therefore, that we can only report on modules that have
113 *already* been imported.
115 We don't include any module for which we cannot determine a version.
116 """
117 # Attempt to import libraries that only report their version in python
118 for module_name in PYTHON:
119 # If it's not available we continue.
120 with contextlib.suppress(Exception):
121 importlib.import_module(module_name)
123 packages = {"python": sys.version}
125 # Not iterating with sys.modules.iteritems() because it's not atomic and
126 # subject to race conditions
127 module_names = list(sys.modules.keys())
129 # Use knowledge of package hierarchy to find the versions rather than
130 # using each name independently. Group all the module names into the
131 # hierarchy, splitting on dot, and skipping any component that starts
132 # with an underscore.
134 # Sorting the module names gives us:
135 # lsst
136 # lsst.afw
137 # lsst.afw.cameraGeom
138 # ...
139 # lsst.daf
140 # lsst.daf.butler
141 #
142 # and so we can use knowledge of the previous version to inform whether
143 # we need to look at the subsequent line.
144 n_versions = 0
145 n_checked = 0
146 previous = ""
147 for name in sorted(module_names):
148 if name.startswith("_") or "._" in name:
149 # Refers to a private module so we can ignore it and assume
150 # version has been lifted into parent or, if top level, not
151 # relevant for versioning. This applies also to standard library
152 # packages such as _abc and __future__.
153 continue
155 if name in _STDLIB:
156 # Assign all standard library packages the python version
157 # since they almost all lack explicit versions.
158 packages[name] = sys.version
159 previous = name
160 continue
162 if name.startswith(previous + ".") and previous in packages:
163 # Already have this version. Use the same previous name
164 # for the line after this.
165 continue
167 # Look for a version.
168 ver = _get_python_package_version(name, packages)
170 n_checked += 1
171 if ver is not None:
172 n_versions += 1
173 previous = name
175 log.debug(
176 "Given %d modules but checked %d in hierarchy and found versions for %d",
177 len(module_names),
178 n_checked,
179 n_versions,
180 )
182 for name in list(packages.keys()):
183 # Use LSST package names instead of python module names
184 # This matches the names we get from the environment (i.e., EUPS)
185 # so we can clobber these build-time versions if the environment
186 # reveals that we're not using the packages as-built.
187 if name.startswith("lsst."):
188 new_name = name.replace("lsst.", "").replace(".", "_")
189 packages[new_name] = packages[name]
190 del packages[name]
192 return packages
195def _get_python_package_version(name: str, packages: dict[str, str]) -> str | None:
196 """Given a package or module name, try to determine the version.
198 Parameters
199 ----------
200 name : `str`
201 The name of the package or module to try.
202 packages : `dict` [ `str`, `str` ]
203 A dictionary mapping a name to a version. Modified in place.
204 The key used might not match exactly the given key.
206 Returns
207 -------
208 ver : `str` or `None`
209 The version string stored in ``packages``. Nothing is stored if the
210 value here is `None`.
211 """
212 try:
213 # This is the Python standard way to find a package version.
214 # It can be slow.
215 ver = importlib.metadata.version(name)
216 except Exception:
217 # Fall back to using the module itself. There is no guarantee
218 # that "a" exists for module "a.b" so if hierarchy has been expanded
219 # this might fail. Check first.
220 if name not in sys.modules:
221 return None
222 module = sys.modules[name]
223 try:
224 ver = getVersionFromPythonModule(module)
225 except Exception:
226 return None # Can't get a version from it, don't care
228 # Update the package information.
229 if ver is not None:
230 packages[name] = ver
232 return ver
235_eups: Any | None = None # Singleton Eups object
238@lru_cache(maxsize=1)
239def getEnvironmentPackages(include_all: bool = False) -> dict[str, str]:
240 """Get products and their versions from the environment.
242 Parameters
243 ----------
244 include_all : `bool`
245 If `False` only returns locally-setup packages. If `True` all set
246 up packages are returned with a version that includes any associated
247 non-current tags.
249 Returns
250 -------
251 packages : `dict`
252 Keys (type `str`) are product names; values (type `str`) are their
253 versions.
255 Notes
256 -----
257 We use EUPS to determine the version of certain products (those that don't
258 provide a means to determine the version any other way) and to check if
259 uninstalled packages are being used. We only report the product/version
260 for these packages unless ``include_all`` is `True`.
261 """
262 try:
263 from eups import Eups
264 from eups.Product import Product
265 except ImportError:
266 log.warning("Unable to import eups, so cannot determine package versions from environment")
267 return {}
269 # Cache eups object since creating it can take a while
270 global _eups
271 if not _eups:
272 _eups = Eups()
273 products = _eups.findProducts(tags=["setup"])
275 # Get versions for things we can't determine via runtime mechanisms
276 # XXX Should we just grab everything we can, rather than just a
277 # predetermined set?
278 packages = {prod.name: prod.version for prod in products if prod in ENVIRONMENT}
280 # The string 'LOCAL:' (the value of Product.LocalVersionPrefix) in the
281 # version name indicates uninstalled code, so the version could be
282 # different than what's being reported by the runtime environment (because
283 # we don't tend to run "scons" every time we update some python file,
284 # and even if we did sconsUtils probably doesn't check to see if the repo
285 # is clean).
286 for prod in products:
287 if not prod.version.startswith(Product.LocalVersionPrefix):
288 if include_all:
289 tags = {t for t in prod.tags if t != "current"}
290 tag_msg = " (" + " ".join(tags) + ")" if tags else ""
291 packages[prod.name] = prod.version + tag_msg
292 continue
293 ver = prod.version
295 gitDir = os.path.join(prod.dir, ".git")
296 if os.path.exists(gitDir):
297 # get the git revision and an indication if the working copy is
298 # clean
299 revCmd = ["git", "--git-dir=" + gitDir, "--work-tree=" + prod.dir, "rev-parse", "HEAD"]
300 diffCmd = [
301 "git",
302 "--no-pager",
303 "--git-dir=" + gitDir,
304 "--work-tree=" + prod.dir,
305 "diff",
306 "--patch",
307 ]
308 try:
309 rev = subprocess.check_output(revCmd).decode().strip()
310 diff = subprocess.check_output(diffCmd)
311 except Exception:
312 ver += "@GIT_ERROR"
313 else:
314 ver += "@" + rev
315 if diff:
316 ver += "+" + hashlib.md5(diff).hexdigest()
317 else:
318 ver += "@NO_GIT"
320 packages[prod.name] = ver
321 return packages
324@lru_cache(maxsize=1)
325def getCondaPackages() -> dict[str, str]:
326 """Get products and their versions from the conda environment.
328 Returns
329 -------
330 packages : `dict`
331 Keys (type `str`) are product names; values (type `str`) are their
332 versions.
334 Notes
335 -----
336 Returns empty result if a conda environment is not in use or can not
337 be queried.
338 """
339 try:
340 from conda.cli.python_api import Commands, run_command
341 except ImportError:
342 return {}
344 # Get the installed package list
345 versions_json = run_command(Commands.LIST, "--json")
346 packages = {pkg["name"]: pkg["version"] for pkg in json.loads(versions_json[0])}
348 # Try to work out the conda environment name and include it as a fake
349 # package. The "obvious" way of running "conda info --json" does give
350 # access to the active_prefix but takes about 2 seconds to run.
351 # The equivalent to the code above would be:
352 # info_json = run_command(Commands.INFO, "--json")
353 # As a comporomise look for the env name in the path to the python
354 # executable
355 match = re.search(r"/envs/(.*?)/bin/", sys.executable)
356 if match:
357 packages["conda_env"] = match.group(1)
359 return packages
362class Packages(dict):
363 """A table of packages and their versions.
365 There are a few different types of packages, and their versions are
366 collected in different ways:
368 1. Installed Conda packages are obtained via the Conda API. Conda is
369 not required.
370 2. Python modules (e.g., afw, numpy; galsim is also in this group even
371 though we only use it through the library, because no version
372 information is currently provided through the library): we get their
373 version from the ``__version__`` module variable. Note that this means
374 that we're only aware of modules that have already been imported.
375 3. Other packages provide no run-time accessible version information (e.g.,
376 astrometry_net): we get their version from interrogating the
377 environment. Currently, that means EUPS; if EUPS is replaced or dropped
378 then we'll need to consider an alternative means of getting this version
379 information.
380 4. Local versions of packages (a non-installed EUPS package, selected with
381 ``setup -r /path/to/package``): we identify these through the
382 environment (EUPS again) and use as a version the path supplemented with
383 the ``git`` SHA and, if the git repo isn't clean, an MD5 of the diff.
385 These package versions are collected and stored in a Packages object, which
386 provides useful comparison and persistence features.
388 Example usage:
390 .. code-block:: python
392 from lsst.utils.packages import Packages
393 pkgs = Packages.fromSystem()
394 print("Current packages:", pkgs)
395 old = Packages.read("/path/to/packages.pickle")
396 print("Old packages:", old)
397 print("Missing packages compared to before:", pkgs.missing(old))
398 print("Extra packages compared to before:", pkgs.extra(old))
399 print("Different packages: ", pkgs.difference(old))
400 old.update(pkgs) # Include any new packages in the old
401 old.write("/path/to/packages.pickle")
403 Parameters
404 ----------
405 packages : `dict`
406 A mapping {package: version} where both keys and values are type `str`.
408 Notes
409 -----
410 This is a wrapper around a dict with some convenience methods.
411 """
413 formats: ClassVar[dict[str, str]] = {
414 ".pkl": "pickle",
415 ".pickle": "pickle",
416 ".yaml": "yaml",
417 ".json": "json",
418 }
420 def __setstate__(self, state: dict[str, Any]) -> None:
421 # This only seems to be called for old pickle files where
422 # the data was stored in _packages.
423 self.update(state["_packages"])
425 @classmethod
426 def fromSystem(cls) -> Packages:
427 """Construct a `Packages` by examining the system.
429 Determine packages by examining python's `sys.modules`, conda
430 libraries and EUPS. EUPS packages take precedence over conda and
431 general python packages.
433 Returns
434 -------
435 packages : `Packages`
436 All version package information that could be obtained.
437 """
438 packages = {}
439 packages.update(getPythonPackages())
440 packages.update(getCondaPackages())
441 packages.update(getEnvironmentPackages()) # Should be last, to override products with LOCAL versions
442 return cls(packages)
444 @classmethod
445 def fromBytes(cls, data: bytes, format: str) -> Packages:
446 """Construct the object from a byte representation.
448 Parameters
449 ----------
450 data : `bytes`
451 The serialized form of this object in bytes.
452 format : `str`
453 The format of those bytes. Can be ``yaml``, ``json``, or
454 ``pickle``.
456 Returns
457 -------
458 packages : `Packages`
459 The package information read from the input data.
460 """
461 if format == "pickle":
462 file = io.BytesIO(data)
463 new = _BackwardsCompatibilityUnpickler(file).load()
464 elif format == "yaml":
465 new = yaml.load(data, Loader=yaml.SafeLoader)
466 elif format == "json":
467 new = cls(json.loads(data))
468 else:
469 raise ValueError(f"Unexpected serialization format given: {format}")
470 if not isinstance(new, cls):
471 raise TypeError(f"Extracted object of class '{type(new)}' but expected '{cls}'")
472 return new
474 @classmethod
475 def read(cls, filename: str) -> Packages:
476 """Read packages from filename.
478 Parameters
479 ----------
480 filename : `str`
481 Filename from which to read. The format is determined from the
482 file extension. Currently support ``.pickle``, ``.pkl``,
483 ``.json``, and ``.yaml``.
485 Returns
486 -------
487 packages : `Packages`
488 The packages information read from the file.
489 """
490 _, ext = os.path.splitext(filename)
491 if ext not in cls.formats:
492 raise ValueError(f"Format from {ext} extension in file {filename} not recognized")
493 with open(filename, "rb") as ff:
494 # We assume that these classes are tiny so there is no
495 # substantive memory impact by reading the entire file up front
496 data = ff.read()
497 return cls.fromBytes(data, cls.formats[ext])
499 def toBytes(self, format: str) -> bytes:
500 """Convert the object to a serialized bytes form using the
501 specified format.
503 Parameters
504 ----------
505 format : `str`
506 Format to use when serializing. Can be ``yaml``, ``json``,
507 or ``pickle``.
509 Returns
510 -------
511 data : `bytes`
512 Byte string representing the serialized object.
513 """
514 if format == "pickle":
515 return pickle.dumps(self)
516 elif format == "yaml":
517 return yaml.dump(self).encode("utf-8")
518 elif format == "json":
519 return json.dumps(self).encode("utf-8")
520 else:
521 raise ValueError(f"Unexpected serialization format requested: {format}")
523 def write(self, filename: str) -> None:
524 """Write to file.
526 Parameters
527 ----------
528 filename : `str`
529 Filename to which to write. The format of the data file
530 is determined from the file extension. Currently supports
531 ``.pickle``, ``.json``, and ``.yaml``
532 """
533 _, ext = os.path.splitext(filename)
534 if ext not in self.formats:
535 raise ValueError(f"Format from {ext} extension in file {filename} not recognized")
536 with open(filename, "wb") as ff:
537 # Assumes that the bytes serialization of this object is
538 # relatively small.
539 ff.write(self.toBytes(self.formats[ext]))
541 def __str__(self) -> str:
542 ss = self.__class__.__name__ + "({\n"
543 # Sort alphabetically by module name, for convenience in reading
544 ss += ",\n".join(f"{prod!r}:{self[prod]!r}" for prod in sorted(self))
545 ss += ",\n})"
546 return ss
548 def __repr__(self) -> str:
549 # Default repr() does not report the class name.
550 return f"{self.__class__.__name__}({super().__repr__()})"
552 def extra(self, other: Mapping) -> dict[str, str]:
553 """Get packages in self but not in another `Packages` object.
555 Parameters
556 ----------
557 other : `Packages` or `Mapping`
558 Other packages to compare against.
560 Returns
561 -------
562 extra : `dict`
563 Extra packages. Keys (type `str`) are package names; values
564 (type `str`) are their versions.
565 """
566 return {pkg: self[pkg] for pkg in self.keys() - other.keys()}
568 def missing(self, other: Mapping) -> dict[str, str]:
569 """Get packages in another `Packages` object but missing from self.
571 Parameters
572 ----------
573 other : `Packages`
574 Other packages to compare against.
576 Returns
577 -------
578 missing : `dict` [`str`, `str`]
579 Missing packages. Keys (type `str`) are package names; values
580 (type `str`) are their versions.
581 """
582 return {pkg: other[pkg] for pkg in other.keys() - self.keys()}
584 def difference(self, other: Mapping) -> dict[str, tuple[str, str]]:
585 """Get packages in symmetric difference of self and another `Packages`
586 object.
588 Parameters
589 ----------
590 other : `Packages`
591 Other packages to compare against.
593 Returns
594 -------
595 difference : `dict` [`str`, `tuple` [ `str`, `str` ]]
596 Packages in symmetric difference. Keys (type `str`) are package
597 names; values (type `tuple` [ `str`, `str` ]) are their versions.
598 """
599 return {pkg: (self[pkg], other[pkg]) for pkg in self.keys() & other.keys() if self[pkg] != other[pkg]}
602class _BackwardsCompatibilityUnpickler(pickle.Unpickler):
603 """Replacement for the default unpickler.
605 It is required so that users of this API can read pickle files
606 created when the `~lsst.utils.packages.Packages` class was in a different
607 package and known as ``lsst.base.Packages``. If this unpickler is being
608 used then we know for sure that we must return a
609 `~lsst.utils.packages.Packages` instance.
610 """
612 def find_class(self, module: str, name: str) -> type:
613 """Return the class that should be used for unpickling.
615 This is always known to be the class in this package.
616 """
617 return Packages
620# Register YAML representers
623def _pkg_representer(dumper: yaml.Dumper, data: Any) -> yaml.MappingNode:
624 """Represent Packages as a simple dict"""
625 return dumper.represent_mapping("lsst.utils.packages.Packages", data, flow_style=None)
628yaml.add_representer(Packages, _pkg_representer)
631def _pkg_constructor(loader: yaml.constructor.SafeConstructor, node: yaml.Node) -> Any:
632 """Convert YAML representation back to Python class."""
633 yield Packages(loader.construct_mapping(node, deep=True)) # type: ignore
636for loader in (yaml.Loader, yaml.CLoader, yaml.UnsafeLoader, yaml.SafeLoader, yaml.FullLoader):
637 yaml.add_constructor("lsst.utils.packages.Packages", _pkg_constructor, Loader=loader)
639 # Register the old name with YAML.
640 yaml.add_constructor("lsst.base.Packages", _pkg_constructor, Loader=loader)