Coverage for python/lsst/utils/packages.py: 21%
198 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-01 02:29 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-01 02:29 -0700
1# This file is part of utils.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
11#
12from __future__ import annotations
14"""
15Determine which packages are being used in the system and their versions
16"""
17import hashlib
18import importlib
19import io
20import json
21import logging
22import os
23import pickle
24import re
25import subprocess
26import sys
27import types
28from collections.abc import Mapping
29from functools import lru_cache
30from typing import Any, Dict, Optional, Tuple, Type
32import yaml
34log = logging.getLogger(__name__)
36__all__ = [
37 "getVersionFromPythonModule",
38 "getPythonPackages",
39 "getEnvironmentPackages",
40 "getCondaPackages",
41 "Packages",
42]
45# Packages used at build-time (e.g., header-only)
46BUILDTIME = set(["boost", "eigen", "tmv"])
48# Python modules to attempt to load so we can try to get the version
49# We do this because the version only appears to be available from python,
50# but we use the library
51PYTHON = set(["galsim"])
53# Packages that don't seem to have a mechanism for reporting the runtime
54# version. We need to guess the version from the environment
55ENVIRONMENT = set(["astrometry_net", "astrometry_net_data", "minuit2", "xpa"])
57try:
58 # Python 3.10 includes a list of standard library modules.
59 # These will all have the same version number as Python itself.
60 _STDLIB = sys.stdlib_module_names
61except AttributeError:
62 _STDLIB = frozenset()
65def getVersionFromPythonModule(module: types.ModuleType) -> str:
66 """Determine the version of a python module.
68 Parameters
69 ----------
70 module : `module`
71 Module for which to get version.
73 Returns
74 -------
75 version : `str`
77 Raises
78 ------
79 AttributeError
80 Raised if __version__ attribute is not set.
82 Notes
83 -----
84 We supplement the version with information from the
85 ``__dependency_versions__`` (a specific variable set by LSST's
86 `~lsst.sconsUtils` at build time) only for packages that are typically
87 used only at build-time.
88 """
89 version = module.__version__
90 if hasattr(module, "__dependency_versions__"):
91 # Add build-time dependencies
92 deps = module.__dependency_versions__
93 buildtime = BUILDTIME & set(deps.keys())
94 if buildtime:
95 version += " with " + " ".join("%s=%s" % (pkg, deps[pkg]) for pkg in sorted(buildtime))
96 return str(version)
99def getPythonPackages() -> Dict[str, str]:
100 """Get imported python packages and their versions.
102 Returns
103 -------
104 packages : `dict`
105 Keys (type `str`) are package names; values (type `str`) are their
106 versions.
108 Notes
109 -----
110 We wade through `sys.modules` and attempt to determine the version for each
111 module. Note, therefore, that we can only report on modules that have
112 *already* been imported.
114 We don't include any module for which we cannot determine a version.
115 """
116 # Attempt to import libraries that only report their version in python
117 for module_name in PYTHON:
118 try:
119 importlib.import_module(module_name)
120 except Exception:
121 pass # It's not available, so don't care
123 packages = {"python": sys.version}
125 # Not iterating with sys.modules.iteritems() because it's not atomic and
126 # subject to race conditions
127 module_names = list(sys.modules.keys())
129 # Use knowledge of package hierarchy to find the versions rather than
130 # using each name independently. Group all the module names into the
131 # hierarchy, splitting on dot, and skipping any component that starts
132 # with an underscore.
134 # Sorting the module names gives us:
135 # lsst
136 # lsst.afw
137 # lsst.afw.cameraGeom
138 # ...
139 # lsst.daf
140 # lsst.daf.butler
141 #
142 # and so we can use knowledge of the previous version to inform whether
143 # we need to look at the subsequent line.
144 n_versions = 0
145 n_checked = 0
146 previous = ""
147 for name in sorted(module_names):
148 if name.startswith("_") or "._" in name:
149 # Refers to a private module so we can ignore it and assume
150 # version has been lifted into parent or, if top level, not
151 # relevant for versioning. This applies also to standard library
152 # packages such as _abc and __future__.
153 continue
155 if name in _STDLIB:
156 # Assign all standard library packages the python version
157 # since they almost all lack explicit versions.
158 packages[name] = sys.version
159 previous = name
160 continue
162 if name.startswith(previous + ".") and previous in packages:
163 # Already have this version. Use the same previous name
164 # for the line after this.
165 continue
167 # Look for a version.
168 ver = _get_python_package_version(name, packages)
170 n_checked += 1
171 if ver is not None:
172 n_versions += 1
173 previous = name
175 log.debug(
176 "Given %d modules but checked %d in hierarchy and found versions for %d",
177 len(module_names),
178 n_checked,
179 n_versions,
180 )
182 for name in list(packages.keys()):
183 # Use LSST package names instead of python module names
184 # This matches the names we get from the environment (i.e., EUPS)
185 # so we can clobber these build-time versions if the environment
186 # reveals that we're not using the packages as-built.
187 if name.startswith("lsst."):
188 new_name = name.replace("lsst.", "").replace(".", "_")
189 packages[new_name] = packages[name]
190 del packages[name]
192 return packages
195def _get_python_package_version(name: str, packages: dict[str, str]) -> str | None:
196 """Given a package or module name, try to determine the version.
198 Parameters
199 ----------
200 name : `str`
201 The name of the package or module to try.
202 packages : `dict`[`str`, `str`]
203 A dictionary mapping a name to a version. Modified in place.
204 The key used might not match exactly the given key.
206 Returns
207 -------
208 ver : `str` or `None`
209 The version string stored in ``packages``. Nothing is stored if the
210 value here is `None`.
211 """
212 try:
213 # This is the Python standard way to find a package version.
214 # It can be slow.
215 ver = importlib.metadata.version(name)
216 except Exception:
217 # Fall back to using the module itself. There is no guarantee
218 # that "a" exists for module "a.b" so if hierarchy has been expanded
219 # this might fail. Check first.
220 if name not in sys.modules:
221 return None
222 module = sys.modules[name]
223 try:
224 ver = getVersionFromPythonModule(module)
225 except Exception:
226 return None # Can't get a version from it, don't care
228 # Update the package information.
229 if ver is not None:
230 packages[name] = ver
232 return ver
235_eups: Optional[Any] = None # Singleton Eups object
238@lru_cache(maxsize=1)
239def getEnvironmentPackages(include_all: bool = False) -> Dict[str, str]:
240 """Get products and their versions from the environment.
242 Parameters
243 ----------
244 include_all : `bool`
245 If `False` only returns locally-setup packages. If `True` all set
246 up packages are returned with a version that includes any associated
247 non-current tags.
249 Returns
250 -------
251 packages : `dict`
252 Keys (type `str`) are product names; values (type `str`) are their
253 versions.
255 Notes
256 -----
257 We use EUPS to determine the version of certain products (those that don't
258 provide a means to determine the version any other way) and to check if
259 uninstalled packages are being used. We only report the product/version
260 for these packages unless ``include_all`` is `True`.
261 """
262 try:
263 from eups import Eups
264 from eups.Product import Product
265 except ImportError:
266 log.warning("Unable to import eups, so cannot determine package versions from environment")
267 return {}
269 # Cache eups object since creating it can take a while
270 global _eups
271 if not _eups:
272 _eups = Eups()
273 products = _eups.findProducts(tags=["setup"])
275 # Get versions for things we can't determine via runtime mechanisms
276 # XXX Should we just grab everything we can, rather than just a
277 # predetermined set?
278 packages = {prod.name: prod.version for prod in products if prod in ENVIRONMENT}
280 # The string 'LOCAL:' (the value of Product.LocalVersionPrefix) in the
281 # version name indicates uninstalled code, so the version could be
282 # different than what's being reported by the runtime environment (because
283 # we don't tend to run "scons" every time we update some python file,
284 # and even if we did sconsUtils probably doesn't check to see if the repo
285 # is clean).
286 for prod in products:
287 if not prod.version.startswith(Product.LocalVersionPrefix):
288 if include_all:
289 tags = {t for t in prod.tags if t != "current"}
290 tag_msg = " (" + " ".join(tags) + ")" if tags else ""
291 packages[prod.name] = prod.version + tag_msg
292 continue
293 ver = prod.version
295 gitDir = os.path.join(prod.dir, ".git")
296 if os.path.exists(gitDir):
297 # get the git revision and an indication if the working copy is
298 # clean
299 revCmd = ["git", "--git-dir=" + gitDir, "--work-tree=" + prod.dir, "rev-parse", "HEAD"]
300 diffCmd = [
301 "git",
302 "--no-pager",
303 "--git-dir=" + gitDir,
304 "--work-tree=" + prod.dir,
305 "diff",
306 "--patch",
307 ]
308 try:
309 rev = subprocess.check_output(revCmd).decode().strip()
310 diff = subprocess.check_output(diffCmd)
311 except Exception:
312 ver += "@GIT_ERROR"
313 else:
314 ver += "@" + rev
315 if diff:
316 ver += "+" + hashlib.md5(diff).hexdigest()
317 else:
318 ver += "@NO_GIT"
320 packages[prod.name] = ver
321 return packages
324@lru_cache(maxsize=1)
325def getCondaPackages() -> Dict[str, str]:
326 """Get products and their versions from the conda environment.
328 Returns
329 -------
330 packages : `dict`
331 Keys (type `str`) are product names; values (type `str`) are their
332 versions.
334 Notes
335 -----
336 Returns empty result if a conda environment is not in use or can not
337 be queried.
338 """
339 try:
340 from conda.cli.python_api import Commands, run_command
341 except ImportError:
342 return {}
344 # Get the installed package list
345 versions_json = run_command(Commands.LIST, "--json")
346 packages = {pkg["name"]: pkg["version"] for pkg in json.loads(versions_json[0])}
348 # Try to work out the conda environment name and include it as a fake
349 # package. The "obvious" way of running "conda info --json" does give
350 # access to the active_prefix but takes about 2 seconds to run.
351 # The equivalent to the code above would be:
352 # info_json = run_command(Commands.INFO, "--json")
353 # As a comporomise look for the env name in the path to the python
354 # executable
355 match = re.search(r"/envs/(.*?)/bin/", sys.executable)
356 if match:
357 packages["conda_env"] = match.group(1)
359 return packages
362class Packages(dict):
363 """A table of packages and their versions.
365 There are a few different types of packages, and their versions are
366 collected in different ways:
368 1. Installed Conda packages are obtained via the Conda API. Conda is
369 not required.
370 2. Python modules (e.g., afw, numpy; galsim is also in this group even
371 though we only use it through the library, because no version
372 information is currently provided through the library): we get their
373 version from the ``__version__`` module variable. Note that this means
374 that we're only aware of modules that have already been imported.
375 3. Other packages provide no run-time accessible version information (e.g.,
376 astrometry_net): we get their version from interrogating the
377 environment. Currently, that means EUPS; if EUPS is replaced or dropped
378 then we'll need to consider an alternative means of getting this version
379 information.
380 4. Local versions of packages (a non-installed EUPS package, selected with
381 ``setup -r /path/to/package``): we identify these through the
382 environment (EUPS again) and use as a version the path supplemented with
383 the ``git`` SHA and, if the git repo isn't clean, an MD5 of the diff.
385 These package versions are collected and stored in a Packages object, which
386 provides useful comparison and persistence features.
388 Example usage:
390 .. code-block:: python
392 from lsst.utils.packages import Packages
393 pkgs = Packages.fromSystem()
394 print("Current packages:", pkgs)
395 old = Packages.read("/path/to/packages.pickle")
396 print("Old packages:", old)
397 print("Missing packages compared to before:", pkgs.missing(old))
398 print("Extra packages compared to before:", pkgs.extra(old))
399 print("Different packages: ", pkgs.difference(old))
400 old.update(pkgs) # Include any new packages in the old
401 old.write("/path/to/packages.pickle")
403 Parameters
404 ----------
405 packages : `dict`
406 A mapping {package: version} where both keys and values are type `str`.
408 Notes
409 -----
410 This is a wrapper around a dict with some convenience methods.
411 """
413 formats = {".pkl": "pickle", ".pickle": "pickle", ".yaml": "yaml", ".json": "json"}
415 def __setstate__(self, state: Dict[str, Any]) -> None:
416 # This only seems to be called for old pickle files where
417 # the data was stored in _packages.
418 self.update(state["_packages"])
420 @classmethod
421 def fromSystem(cls) -> Packages:
422 """Construct a `Packages` by examining the system.
424 Determine packages by examining python's `sys.modules`, conda
425 libraries and EUPS. EUPS packages take precedence over conda and
426 general python packages.
428 Returns
429 -------
430 packages : `Packages`
431 All version package information that could be obtained.
432 """
433 packages = {}
434 packages.update(getPythonPackages())
435 packages.update(getCondaPackages())
436 packages.update(getEnvironmentPackages()) # Should be last, to override products with LOCAL versions
437 return cls(packages)
439 @classmethod
440 def fromBytes(cls, data: bytes, format: str) -> Packages:
441 """Construct the object from a byte representation.
443 Parameters
444 ----------
445 data : `bytes`
446 The serialized form of this object in bytes.
447 format : `str`
448 The format of those bytes. Can be ``yaml``, ``json``, or
449 ``pickle``.
451 Returns
452 -------
453 packages : `Packages`
454 The package information read from the input data.
455 """
456 if format == "pickle":
457 file = io.BytesIO(data)
458 new = _BackwardsCompatibilityUnpickler(file).load()
459 elif format == "yaml":
460 new = yaml.load(data, Loader=yaml.SafeLoader)
461 elif format == "json":
462 new = cls(json.loads(data))
463 else:
464 raise ValueError(f"Unexpected serialization format given: {format}")
465 if not isinstance(new, cls):
466 raise TypeError(f"Extracted object of class '{type(new)}' but expected '{cls}'")
467 return new
469 @classmethod
470 def read(cls, filename: str) -> Packages:
471 """Read packages from filename.
473 Parameters
474 ----------
475 filename : `str`
476 Filename from which to read. The format is determined from the
477 file extension. Currently support ``.pickle``, ``.pkl``,
478 ``.json``, and ``.yaml``.
480 Returns
481 -------
482 packages : `Packages`
483 The packages information read from the file.
484 """
485 _, ext = os.path.splitext(filename)
486 if ext not in cls.formats:
487 raise ValueError(f"Format from {ext} extension in file {filename} not recognized")
488 with open(filename, "rb") as ff:
489 # We assume that these classes are tiny so there is no
490 # substantive memory impact by reading the entire file up front
491 data = ff.read()
492 return cls.fromBytes(data, cls.formats[ext])
494 def toBytes(self, format: str) -> bytes:
495 """Convert the object to a serialized bytes form using the
496 specified format.
498 Parameters
499 ----------
500 format : `str`
501 Format to use when serializing. Can be ``yaml``, ``json``,
502 or ``pickle``.
504 Returns
505 -------
506 data : `bytes`
507 Byte string representing the serialized object.
508 """
509 if format == "pickle":
510 return pickle.dumps(self)
511 elif format == "yaml":
512 return yaml.dump(self).encode("utf-8")
513 elif format == "json":
514 return json.dumps(self).encode("utf-8")
515 else:
516 raise ValueError(f"Unexpected serialization format requested: {format}")
518 def write(self, filename: str) -> None:
519 """Write to file.
521 Parameters
522 ----------
523 filename : `str`
524 Filename to which to write. The format of the data file
525 is determined from the file extension. Currently supports
526 ``.pickle``, ``.json``, and ``.yaml``
527 """
528 _, ext = os.path.splitext(filename)
529 if ext not in self.formats:
530 raise ValueError(f"Format from {ext} extension in file {filename} not recognized")
531 with open(filename, "wb") as ff:
532 # Assumes that the bytes serialization of this object is
533 # relatively small.
534 ff.write(self.toBytes(self.formats[ext]))
536 def __str__(self) -> str:
537 ss = "%s({\n" % self.__class__.__name__
538 # Sort alphabetically by module name, for convenience in reading
539 ss += ",\n".join(f"{prod!r}:{self[prod]!r}" for prod in sorted(self))
540 ss += ",\n})"
541 return ss
543 def __repr__(self) -> str:
544 # Default repr() does not report the class name.
545 return f"{self.__class__.__name__}({super().__repr__()})"
547 def extra(self, other: Mapping) -> Dict[str, str]:
548 """Get packages in self but not in another `Packages` object.
550 Parameters
551 ----------
552 other : `Packages` or `Mapping`
553 Other packages to compare against.
555 Returns
556 -------
557 extra : `dict`
558 Extra packages. Keys (type `str`) are package names; values
559 (type `str`) are their versions.
560 """
561 return {pkg: self[pkg] for pkg in self.keys() - other.keys()}
563 def missing(self, other: Mapping) -> Dict[str, str]:
564 """Get packages in another `Packages` object but missing from self.
566 Parameters
567 ----------
568 other : `Packages`
569 Other packages to compare against.
571 Returns
572 -------
573 missing : `dict` [`str`, `str`]
574 Missing packages. Keys (type `str`) are package names; values
575 (type `str`) are their versions.
576 """
577 return {pkg: other[pkg] for pkg in other.keys() - self.keys()}
579 def difference(self, other: Mapping) -> Dict[str, Tuple[str, str]]:
580 """Get packages in symmetric difference of self and another `Packages`
581 object.
583 Parameters
584 ----------
585 other : `Packages`
586 Other packages to compare against.
588 Returns
589 -------
590 difference : `dict` [`str`, `tuple` [`str`, `str`]]
591 Packages in symmetric difference. Keys (type `str`) are package
592 names; values (type `tuple`[`str`, `str`]) are their versions.
593 """
594 return {pkg: (self[pkg], other[pkg]) for pkg in self.keys() & other.keys() if self[pkg] != other[pkg]}
597class _BackwardsCompatibilityUnpickler(pickle.Unpickler):
598 """Replacement for the default unpickler.
600 It is required so that users of this API can read pickle files
601 created when the `~lsst.utils.packages.Packages` class was in a different
602 package and known as ``lsst.base.Packages``. If this unpickler is being
603 used then we know for sure that we must return a
604 `~lsst.utils.packages.Packages` instance.
605 """
607 def find_class(self, module: str, name: str) -> Type:
608 """Return the class that should be used for unpickling.
610 This is always known to be the class in this package.
611 """
612 return Packages
615# Register YAML representers
618def pkg_representer(dumper: yaml.Dumper, data: Any) -> yaml.MappingNode:
619 """Represent Packages as a simple dict"""
620 return dumper.represent_mapping("lsst.utils.packages.Packages", data, flow_style=None)
623yaml.add_representer(Packages, pkg_representer)
626def pkg_constructor(loader: yaml.constructor.SafeConstructor, node: yaml.Node) -> Any:
627 yield Packages(loader.construct_mapping(node, deep=True)) # type: ignore
630for loader in (yaml.Loader, yaml.CLoader, yaml.UnsafeLoader, yaml.SafeLoader, yaml.FullLoader):
631 yaml.add_constructor("lsst.utils.packages.Packages", pkg_constructor, Loader=loader)
633 # Register the old name with YAML.
634 yaml.add_constructor("lsst.base.Packages", pkg_constructor, Loader=loader)