Coverage for python/lsst/utils/packages.py: 22%
176 statements
« prev ^ index » next coverage.py v7.2.3, created at 2023-04-19 03:35 -0700
« prev ^ index » next coverage.py v7.2.3, created at 2023-04-19 03:35 -0700
1# This file is part of utils.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
11#
12from __future__ import annotations
14"""
15Determine which packages are being used in the system and their versions
16"""
17import hashlib
18import importlib
19import io
20import json
21import logging
22import os
23import pickle
24import re
25import subprocess
26import sys
27import types
28from collections.abc import Mapping
29from functools import lru_cache
30from typing import Any, Dict, Optional, Tuple, Type
32import yaml
34log = logging.getLogger(__name__)
36__all__ = [
37 "getVersionFromPythonModule",
38 "getPythonPackages",
39 "getEnvironmentPackages",
40 "getCondaPackages",
41 "Packages",
42]
45# Packages used at build-time (e.g., header-only)
46BUILDTIME = set(["boost", "eigen", "tmv"])
48# Python modules to attempt to load so we can try to get the version
49# We do this because the version only appears to be available from python,
50# but we use the library
51PYTHON = set(["galsim"])
53# Packages that don't seem to have a mechanism for reporting the runtime
54# version. We need to guess the version from the environment
55ENVIRONMENT = set(["astrometry_net", "astrometry_net_data", "minuit2", "xpa"])
58def getVersionFromPythonModule(module: types.ModuleType) -> str:
59 """Determine the version of a python module.
61 Parameters
62 ----------
63 module : `module`
64 Module for which to get version.
66 Returns
67 -------
68 version : `str`
70 Raises
71 ------
72 AttributeError
73 Raised if __version__ attribute is not set.
75 Notes
76 -----
77 We supplement the version with information from the
78 ``__dependency_versions__`` (a specific variable set by LSST's
79 `~lsst.sconsUtils` at build time) only for packages that are typically
80 used only at build-time.
81 """
82 version = module.__version__
83 if hasattr(module, "__dependency_versions__"):
84 # Add build-time dependencies
85 deps = module.__dependency_versions__
86 buildtime = BUILDTIME & set(deps.keys())
87 if buildtime:
88 version += " with " + " ".join("%s=%s" % (pkg, deps[pkg]) for pkg in sorted(buildtime))
89 return str(version)
92def getPythonPackages() -> Dict[str, str]:
93 """Get imported python packages and their versions.
95 Returns
96 -------
97 packages : `dict`
98 Keys (type `str`) are package names; values (type `str`) are their
99 versions.
101 Notes
102 -----
103 We wade through `sys.modules` and attempt to determine the version for each
104 module. Note, therefore, that we can only report on modules that have
105 *already* been imported.
107 We don't include any module for which we cannot determine a version.
108 """
109 # Attempt to import libraries that only report their version in python
110 for module_name in PYTHON:
111 try:
112 importlib.import_module(module_name)
113 except Exception:
114 pass # It's not available, so don't care
116 packages = {"python": sys.version}
117 # Not iterating with sys.modules.iteritems() because it's not atomic and
118 # subject to race conditions
119 moduleNames = list(sys.modules.keys())
120 for name in moduleNames:
121 module = sys.modules[name]
122 try:
123 ver = getVersionFromPythonModule(module)
124 except Exception:
125 continue # Can't get a version from it, don't care
127 # Remove "foo.bar.version" in favor of "foo.bar"
128 # This prevents duplication when the __init__.py includes
129 # "from .version import *"
130 modified = False
131 for ending in (".version", "._version"):
132 if name.endswith(ending):
133 name = name[: -len(ending)]
134 modified = True
135 break
137 # Check if this name has already been registered.
138 # This can happen if x._version is encountered before x.
139 if name in packages:
140 if ver != packages[name]:
141 # There is an inconsistency between this version
142 # and that previously calculated. Raising an exception
143 # would go against the ethos of this package. If this
144 # is the stripped package name we should drop it and
145 # trust the primary version. Else if this was not
146 # the modified version we should use it in preference.
147 if modified:
148 continue
150 # Use LSST package names instead of python module names
151 # This matches the names we get from the environment (i.e., EUPS)
152 # so we can clobber these build-time versions if the environment
153 # reveals that we're not using the packages as-built.
154 if name.startswith("lsst."):
155 name = name.replace("lsst.", "").replace(".", "_")
157 packages[name] = ver
159 return packages
162_eups: Optional[Any] = None # Singleton Eups object
165@lru_cache(maxsize=1)
166def getEnvironmentPackages(include_all: bool = False) -> Dict[str, str]:
167 """Get products and their versions from the environment.
169 Parameters
170 ----------
171 include_all : `bool`
172 If `False` only returns locally-setup packages. If `True` all set
173 up packages are returned with a version that includes any associated
174 non-current tags.
176 Returns
177 -------
178 packages : `dict`
179 Keys (type `str`) are product names; values (type `str`) are their
180 versions.
182 Notes
183 -----
184 We use EUPS to determine the version of certain products (those that don't
185 provide a means to determine the version any other way) and to check if
186 uninstalled packages are being used. We only report the product/version
187 for these packages unless ``include_all`` is `True`.
188 """
189 try:
190 from eups import Eups
191 from eups.Product import Product
192 except ImportError:
193 log.warning("Unable to import eups, so cannot determine package versions from environment")
194 return {}
196 # Cache eups object since creating it can take a while
197 global _eups
198 if not _eups:
199 _eups = Eups()
200 products = _eups.findProducts(tags=["setup"])
202 # Get versions for things we can't determine via runtime mechanisms
203 # XXX Should we just grab everything we can, rather than just a
204 # predetermined set?
205 packages = {prod.name: prod.version for prod in products if prod in ENVIRONMENT}
207 # The string 'LOCAL:' (the value of Product.LocalVersionPrefix) in the
208 # version name indicates uninstalled code, so the version could be
209 # different than what's being reported by the runtime environment (because
210 # we don't tend to run "scons" every time we update some python file,
211 # and even if we did sconsUtils probably doesn't check to see if the repo
212 # is clean).
213 for prod in products:
214 if not prod.version.startswith(Product.LocalVersionPrefix):
215 if include_all:
216 tags = {t for t in prod.tags if t != "current"}
217 tag_msg = " (" + " ".join(tags) + ")" if tags else ""
218 packages[prod.name] = prod.version + tag_msg
219 continue
220 ver = prod.version
222 gitDir = os.path.join(prod.dir, ".git")
223 if os.path.exists(gitDir):
224 # get the git revision and an indication if the working copy is
225 # clean
226 revCmd = ["git", "--git-dir=" + gitDir, "--work-tree=" + prod.dir, "rev-parse", "HEAD"]
227 diffCmd = [
228 "git",
229 "--no-pager",
230 "--git-dir=" + gitDir,
231 "--work-tree=" + prod.dir,
232 "diff",
233 "--patch",
234 ]
235 try:
236 rev = subprocess.check_output(revCmd).decode().strip()
237 diff = subprocess.check_output(diffCmd)
238 except Exception:
239 ver += "@GIT_ERROR"
240 else:
241 ver += "@" + rev
242 if diff:
243 ver += "+" + hashlib.md5(diff).hexdigest()
244 else:
245 ver += "@NO_GIT"
247 packages[prod.name] = ver
248 return packages
251@lru_cache(maxsize=1)
252def getCondaPackages() -> Dict[str, str]:
253 """Get products and their versions from the conda environment.
255 Returns
256 -------
257 packages : `dict`
258 Keys (type `str`) are product names; values (type `str`) are their
259 versions.
261 Notes
262 -----
263 Returns empty result if a conda environment is not in use or can not
264 be queried.
265 """
266 try:
267 from conda.cli.python_api import Commands, run_command
268 except ImportError:
269 return {}
271 # Get the installed package list
272 versions_json = run_command(Commands.LIST, "--json")
273 packages = {pkg["name"]: pkg["version"] for pkg in json.loads(versions_json[0])}
275 # Try to work out the conda environment name and include it as a fake
276 # package. The "obvious" way of running "conda info --json" does give
277 # access to the active_prefix but takes about 2 seconds to run.
278 # The equivalent to the code above would be:
279 # info_json = run_command(Commands.INFO, "--json")
280 # As a comporomise look for the env name in the path to the python
281 # executable
282 match = re.search(r"/envs/(.*?)/bin/", sys.executable)
283 if match:
284 packages["conda_env"] = match.group(1)
286 return packages
289class Packages(dict):
290 """A table of packages and their versions.
292 There are a few different types of packages, and their versions are
293 collected in different ways:
295 1. Installed Conda packages are obtained via the Conda API. Conda is
296 not required.
297 2. Python modules (e.g., afw, numpy; galsim is also in this group even
298 though we only use it through the library, because no version
299 information is currently provided through the library): we get their
300 version from the ``__version__`` module variable. Note that this means
301 that we're only aware of modules that have already been imported.
302 3. Other packages provide no run-time accessible version information (e.g.,
303 astrometry_net): we get their version from interrogating the
304 environment. Currently, that means EUPS; if EUPS is replaced or dropped
305 then we'll need to consider an alternative means of getting this version
306 information.
307 4. Local versions of packages (a non-installed EUPS package, selected with
308 ``setup -r /path/to/package``): we identify these through the
309 environment (EUPS again) and use as a version the path supplemented with
310 the ``git`` SHA and, if the git repo isn't clean, an MD5 of the diff.
312 These package versions are collected and stored in a Packages object, which
313 provides useful comparison and persistence features.
315 Example usage:
317 .. code-block:: python
319 from lsst.utils.packages import Packages
320 pkgs = Packages.fromSystem()
321 print("Current packages:", pkgs)
322 old = Packages.read("/path/to/packages.pickle")
323 print("Old packages:", old)
324 print("Missing packages compared to before:", pkgs.missing(old))
325 print("Extra packages compared to before:", pkgs.extra(old))
326 print("Different packages: ", pkgs.difference(old))
327 old.update(pkgs) # Include any new packages in the old
328 old.write("/path/to/packages.pickle")
330 Parameters
331 ----------
332 packages : `dict`
333 A mapping {package: version} where both keys and values are type `str`.
335 Notes
336 -----
337 This is a wrapper around a dict with some convenience methods.
338 """
340 formats = {".pkl": "pickle", ".pickle": "pickle", ".yaml": "yaml", ".json": "json"}
342 def __setstate__(self, state: Dict[str, Any]) -> None:
343 # This only seems to be called for old pickle files where
344 # the data was stored in _packages.
345 self.update(state["_packages"])
347 @classmethod
348 def fromSystem(cls) -> Packages:
349 """Construct a `Packages` by examining the system.
351 Determine packages by examining python's `sys.modules`, conda
352 libraries and EUPS. EUPS packages take precedence over conda and
353 general python packages.
355 Returns
356 -------
357 packages : `Packages`
358 All version package information that could be obtained.
359 """
360 packages = {}
361 packages.update(getPythonPackages())
362 packages.update(getCondaPackages())
363 packages.update(getEnvironmentPackages()) # Should be last, to override products with LOCAL versions
364 return cls(packages)
366 @classmethod
367 def fromBytes(cls, data: bytes, format: str) -> Packages:
368 """Construct the object from a byte representation.
370 Parameters
371 ----------
372 data : `bytes`
373 The serialized form of this object in bytes.
374 format : `str`
375 The format of those bytes. Can be ``yaml``, ``json``, or
376 ``pickle``.
378 Returns
379 -------
380 packages : `Packages`
381 The package information read from the input data.
382 """
383 if format == "pickle":
384 file = io.BytesIO(data)
385 new = _BackwardsCompatibilityUnpickler(file).load()
386 elif format == "yaml":
387 new = yaml.load(data, Loader=yaml.SafeLoader)
388 elif format == "json":
389 new = cls(json.loads(data))
390 else:
391 raise ValueError(f"Unexpected serialization format given: {format}")
392 if not isinstance(new, cls):
393 raise TypeError(f"Extracted object of class '{type(new)}' but expected '{cls}'")
394 return new
396 @classmethod
397 def read(cls, filename: str) -> Packages:
398 """Read packages from filename.
400 Parameters
401 ----------
402 filename : `str`
403 Filename from which to read. The format is determined from the
404 file extension. Currently support ``.pickle``, ``.pkl``,
405 ``.json``, and ``.yaml``.
407 Returns
408 -------
409 packages : `Packages`
410 The packages information read from the file.
411 """
412 _, ext = os.path.splitext(filename)
413 if ext not in cls.formats:
414 raise ValueError(f"Format from {ext} extension in file {filename} not recognized")
415 with open(filename, "rb") as ff:
416 # We assume that these classes are tiny so there is no
417 # substantive memory impact by reading the entire file up front
418 data = ff.read()
419 return cls.fromBytes(data, cls.formats[ext])
421 def toBytes(self, format: str) -> bytes:
422 """Convert the object to a serialized bytes form using the
423 specified format.
425 Parameters
426 ----------
427 format : `str`
428 Format to use when serializing. Can be ``yaml``, ``json``,
429 or ``pickle``.
431 Returns
432 -------
433 data : `bytes`
434 Byte string representing the serialized object.
435 """
436 if format == "pickle":
437 return pickle.dumps(self)
438 elif format == "yaml":
439 return yaml.dump(self).encode("utf-8")
440 elif format == "json":
441 return json.dumps(self).encode("utf-8")
442 else:
443 raise ValueError(f"Unexpected serialization format requested: {format}")
445 def write(self, filename: str) -> None:
446 """Write to file.
448 Parameters
449 ----------
450 filename : `str`
451 Filename to which to write. The format of the data file
452 is determined from the file extension. Currently supports
453 ``.pickle``, ``.json``, and ``.yaml``
454 """
455 _, ext = os.path.splitext(filename)
456 if ext not in self.formats:
457 raise ValueError(f"Format from {ext} extension in file {filename} not recognized")
458 with open(filename, "wb") as ff:
459 # Assumes that the bytes serialization of this object is
460 # relatively small.
461 ff.write(self.toBytes(self.formats[ext]))
463 def __str__(self) -> str:
464 ss = "%s({\n" % self.__class__.__name__
465 # Sort alphabetically by module name, for convenience in reading
466 ss += ",\n".join(f"{prod!r}:{self[prod]!r}" for prod in sorted(self))
467 ss += ",\n})"
468 return ss
470 def __repr__(self) -> str:
471 # Default repr() does not report the class name.
472 return f"{self.__class__.__name__}({super().__repr__()})"
474 def extra(self, other: Mapping) -> Dict[str, str]:
475 """Get packages in self but not in another `Packages` object.
477 Parameters
478 ----------
479 other : `Packages` or `Mapping`
480 Other packages to compare against.
482 Returns
483 -------
484 extra : `dict`
485 Extra packages. Keys (type `str`) are package names; values
486 (type `str`) are their versions.
487 """
488 return {pkg: self[pkg] for pkg in self.keys() - other.keys()}
490 def missing(self, other: Mapping) -> Dict[str, str]:
491 """Get packages in another `Packages` object but missing from self.
493 Parameters
494 ----------
495 other : `Packages`
496 Other packages to compare against.
498 Returns
499 -------
500 missing : `dict` [`str`, `str`]
501 Missing packages. Keys (type `str`) are package names; values
502 (type `str`) are their versions.
503 """
504 return {pkg: other[pkg] for pkg in other.keys() - self.keys()}
506 def difference(self, other: Mapping) -> Dict[str, Tuple[str, str]]:
507 """Get packages in symmetric difference of self and another `Packages`
508 object.
510 Parameters
511 ----------
512 other : `Packages`
513 Other packages to compare against.
515 Returns
516 -------
517 difference : `dict` [`str`, `tuple` [`str`, `str`]]
518 Packages in symmetric difference. Keys (type `str`) are package
519 names; values (type `tuple`[`str`, `str`]) are their versions.
520 """
521 return {pkg: (self[pkg], other[pkg]) for pkg in self.keys() & other.keys() if self[pkg] != other[pkg]}
524class _BackwardsCompatibilityUnpickler(pickle.Unpickler):
525 """Replacement for the default unpickler.
527 It is required so that users of this API can read pickle files
528 created when the `~lsst.utils.packages.Packages` class was in a different
529 package and known as ``lsst.base.Packages``. If this unpickler is being
530 used then we know for sure that we must return a
531 `~lsst.utils.packages.Packages` instance.
532 """
534 def find_class(self, module: str, name: str) -> Type:
535 """Return the class that should be used for unpickling.
537 This is always known to be the class in this package.
538 """
539 return Packages
542# Register YAML representers
545def pkg_representer(dumper: yaml.Dumper, data: Any) -> yaml.MappingNode:
546 """Represent Packages as a simple dict"""
547 return dumper.represent_mapping("lsst.utils.packages.Packages", data, flow_style=None)
550yaml.add_representer(Packages, pkg_representer)
553def pkg_constructor(loader: yaml.constructor.SafeConstructor, node: yaml.Node) -> Any:
554 yield Packages(loader.construct_mapping(node, deep=True)) # type: ignore
557for loader in (yaml.Loader, yaml.CLoader, yaml.UnsafeLoader, yaml.SafeLoader, yaml.FullLoader):
558 yaml.add_constructor("lsst.utils.packages.Packages", pkg_constructor, Loader=loader)
560 # Register the old name with YAML.
561 yaml.add_constructor("lsst.base.Packages", pkg_constructor, Loader=loader)