lsst.base g8cdbaf45f8+8827c0d83c
LSST Data Management Base Package
packages.py
Go to the documentation of this file.
1# This file is part of base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21"""
22Determine which packages are being used in the system and their versions
23"""
24import os
25import sys
26import hashlib
27import importlib
28import subprocess
29import logging
30import pickle as pickle
31import re
32import yaml
33from functools import lru_cache
34
35from .versions import getRuntimeVersions
36
37log = logging.getLogger(__name__)
38
39__all__ = ["getVersionFromPythonModule", "getPythonPackages", "getEnvironmentPackages",
40 "getCondaPackages", "Packages"]
41
42
43# Packages used at build-time (e.g., header-only)
44BUILDTIME = set(["boost", "eigen", "tmv"])
45
46# Python modules to attempt to load so we can try to get the version
47# We do this because the version only appears to be available from python,
48# but we use the library
49PYTHON = set(["galsim"])
50
51# Packages that don't seem to have a mechanism for reporting the runtime
52# version. We need to guess the version from the environment
53ENVIRONMENT = set(["astrometry_net", "astrometry_net_data", "minuit2", "xpa"])
54
55
57 """Determine the version of a python module.
58
59 Parameters
60 ----------
61 module : `module`
62 Module for which to get version.
63
64 Returns
65 -------
66 version : `str`
67
68 Raises
69 ------
70 AttributeError
71 Raised if __version__ attribute is not set.
72
73 Notes
74 -----
75 We supplement the version with information from the
76 ``__dependency_versions__`` (a specific variable set by LSST's
77 `~lsst.sconsUtils` at build time) only for packages that are typically
78 used only at build-time.
79 """
80 version = module.__version__
81 if hasattr(module, "__dependency_versions__"):
82 # Add build-time dependencies
83 deps = module.__dependency_versions__
84 buildtime = BUILDTIME & set(deps.keys())
85 if buildtime:
86 version += " with " + " ".join("%s=%s" % (pkg, deps[pkg])
87 for pkg in sorted(buildtime))
88 return str(version)
89
90
92 """Get imported python packages and their versions.
93
94 Returns
95 -------
96 packages : `dict`
97 Keys (type `str`) are package names; values (type `str`) are their
98 versions.
99
100 Notes
101 -----
102 We wade through `sys.modules` and attempt to determine the version for each
103 module. Note, therefore, that we can only report on modules that have
104 *already* been imported.
105
106 We don't include any module for which we cannot determine a version.
107 """
108 # Attempt to import libraries that only report their version in python
109 for module in PYTHON:
110 try:
111 importlib.import_module(module)
112 except Exception:
113 pass # It's not available, so don't care
114
115 packages = {"python": sys.version}
116 # Not iterating with sys.modules.iteritems() because it's not atomic and
117 # subject to race conditions
118 moduleNames = list(sys.modules.keys())
119 for name in moduleNames:
120 module = sys.modules[name]
121 try:
122 ver = getVersionFromPythonModule(module)
123 except Exception:
124 continue # Can't get a version from it, don't care
125
126 # Remove "foo.bar.version" in favor of "foo.bar"
127 # This prevents duplication when the __init__.py includes
128 # "from .version import *"
129 for ending in (".version", "._version"):
130 if name.endswith(ending):
131 name = name[:-len(ending)]
132 if name in packages:
133 assert ver == packages[name]
134 elif name in packages:
135 assert ver == packages[name]
136
137 # Use LSST package names instead of python module names
138 # This matches the names we get from the environment (i.e., EUPS)
139 # so we can clobber these build-time versions if the environment
140 # reveals that we're not using the packages as-built.
141 if "lsst" in name:
142 name = name.replace("lsst.", "").replace(".", "_")
143
144 packages[name] = ver
145
146 return packages
147
148
149_eups = None # Singleton Eups object
150
151
152@lru_cache(maxsize=1)
154 """Get products and their versions from the environment.
155
156 Returns
157 -------
158 packages : `dict`
159 Keys (type `str`) are product names; values (type `str`) are their
160 versions.
161
162 Notes
163 -----
164 We use EUPS to determine the version of certain products (those that don't
165 provide a means to determine the version any other way) and to check if
166 uninstalled packages are being used. We only report the product/version
167 for these packages.
168 """
169 try:
170 from eups import Eups
171 from eups.Product import Product
172 except ImportError:
173 log.warning("Unable to import eups, so cannot determine package versions from environment")
174 return {}
175
176 # Cache eups object since creating it can take a while
177 global _eups
178 if not _eups:
179 _eups = Eups()
180 products = _eups.findProducts(tags=["setup"])
181
182 # Get versions for things we can't determine via runtime mechanisms
183 # XXX Should we just grab everything we can, rather than just a
184 # predetermined set?
185 packages = {prod.name: prod.version for prod in products if prod in ENVIRONMENT}
186
187 # The string 'LOCAL:' (the value of Product.LocalVersionPrefix) in the
188 # version name indicates uninstalled code, so the version could be
189 # different than what's being reported by the runtime environment (because
190 # we don't tend to run "scons" every time we update some python file,
191 # and even if we did sconsUtils probably doesn't check to see if the repo
192 # is clean).
193 for prod in products:
194 if not prod.version.startswith(Product.LocalVersionPrefix):
195 continue
196 ver = prod.version
197
198 gitDir = os.path.join(prod.dir, ".git")
199 if os.path.exists(gitDir):
200 # get the git revision and an indication if the working copy is
201 # clean
202 revCmd = ["git", "--git-dir=" + gitDir, "--work-tree=" + prod.dir, "rev-parse", "HEAD"]
203 diffCmd = ["git", "--no-pager", "--git-dir=" + gitDir, "--work-tree=" + prod.dir, "diff",
204 "--patch"]
205 try:
206 rev = subprocess.check_output(revCmd).decode().strip()
207 diff = subprocess.check_output(diffCmd)
208 except Exception:
209 ver += "@GIT_ERROR"
210 else:
211 ver += "@" + rev
212 if diff:
213 ver += "+" + hashlib.md5(diff).hexdigest()
214 else:
215 ver += "@NO_GIT"
216
217 packages[prod.name] = ver
218 return packages
219
220
221@lru_cache(maxsize=1)
223 """Get products and their versions from the conda environment.
224
225 Returns
226 -------
227 packages : `dict`
228 Keys (type `str`) are product names; values (type `str`) are their
229 versions.
230
231 Notes
232 -----
233 Returns empty result if a conda environment is not in use or can not
234 be queried.
235 """
236
237 try:
238 import json
239 from conda.cli.python_api import Commands, run_command
240 except ImportError:
241 return {}
242
243 # Get the installed package list
244 versions_json = run_command(Commands.LIST, "--json")
245 packages = {pkg["name"]: pkg["version"] for pkg in json.loads(versions_json[0])}
246
247 # Try to work out the conda environment name and include it as a fake
248 # package. The "obvious" way of running "conda info --json" does give
249 # access to the active_prefix but takes about 2 seconds to run.
250 # The equivalent to the code above would be:
251 # info_json = run_command(Commands.INFO, "--json")
252 # As a comporomise look for the env name in the path to the python
253 # executable
254 match = re.search(r"/envs/(.*?)/bin/", sys.executable)
255 if match:
256 packages["conda_env"] = match.group(1)
257
258 return packages
259
260
261class Packages(dict):
262 """A table of packages and their versions.
263
264 There are a few different types of packages, and their versions are
265 collected in different ways:
266
267 1. Run-time libraries (e.g., cfitsio, fftw): we get their version from
268 interrogating the dynamic library
269 2. Python modules (e.g., afw, numpy; galsim is also in this group even
270 though we only use it through the library, because no version
271 information is currently provided through the library): we get their
272 version from the ``__version__`` module variable. Note that this means
273 that we're only aware of modules that have already been imported.
274 3. Other packages provide no run-time accessible version information (e.g.,
275 astrometry_net): we get their version from interrogating the
276 environment. Currently, that means EUPS; if EUPS is replaced or dropped
277 then we'll need to consider an alternative means of getting this version
278 information.
279 4. Local versions of packages (a non-installed EUPS package, selected with
280 ``setup -r /path/to/package``): we identify these through the
281 environment (EUPS again) and use as a version the path supplemented with
282 the ``git`` SHA and, if the git repo isn't clean, an MD5 of the diff.
283
284 These package versions are collected and stored in a Packages object, which
285 provides useful comparison and persistence features.
286
287 Example usage:
288
289 .. code-block:: python
290
291 from lsst.base import Packages
292 pkgs = Packages.fromSystem()
293 print("Current packages:", pkgs)
294 old = Packages.read("/path/to/packages.pickle")
295 print("Old packages:", old)
296 print("Missing packages compared to before:", pkgs.missing(old))
297 print("Extra packages compared to before:", pkgs.extra(old))
298 print("Different packages: ", pkgs.difference(old))
299 old.update(pkgs) # Include any new packages in the old
300 old.write("/path/to/packages.pickle")
301
302 Parameters
303 ----------
304 packages : `dict`
305 A mapping {package: version} where both keys and values are type `str`.
306
307 Notes
308 -----
309 This is essentially a wrapper around a dict with some conveniences.
310 """
311
312 formats = {".pkl": "pickle",
313 ".pickle": "pickle",
314 ".yaml": "yaml"}
315
316 def __setstate__(self, state):
317 # This only seems to be called for old pickle files where
318 # the data was stored in _packages.
319 self.update(state["_packages"])
320
321 @classmethod
322 def fromSystem(cls):
323 """Construct a `Packages` by examining the system.
324
325 Determine packages by examining python's `sys.modules`, runtime
326 libraries and EUPS.
327
328 Returns
329 -------
330 packages : `Packages`
331 """
332 packages = {}
333 packages.update(getPythonPackages())
334 packages.update(getCondaPackages())
335 packages.update(getRuntimeVersions())
336 packages.update(getEnvironmentPackages()) # Should be last, to override products with LOCAL versions
337 return cls(packages)
338
339 @classmethod
340 def fromBytes(cls, data, format):
341 """Construct the object from a byte representation.
342
343 Parameters
344 ----------
345 data : `bytes`
346 The serialized form of this object in bytes.
347 format : `str`
348 The format of those bytes. Can be ``yaml`` or ``pickle``.
349 """
350 if format == "pickle":
351 new = pickle.loads(data)
352 elif format == "yaml":
353 new = yaml.load(data, Loader=yaml.SafeLoader)
354 else:
355 raise ValueError(f"Unexpected serialization format given: {format}")
356 if not isinstance(new, cls):
357 raise TypeError(f"Extracted object of class '{type(new)}' but expected '{cls}'")
358 return new
359
360 @classmethod
361 def read(cls, filename):
362 """Read packages from filename.
363
364 Parameters
365 ----------
366 filename : `str`
367 Filename from which to read. The format is determined from the
368 file extension. Currently support ``.pickle``, ``.pkl``
369 and ``.yaml``.
370
371 Returns
372 -------
373 packages : `Packages`
374 """
375 _, ext = os.path.splitext(filename)
376 if ext not in cls.formatsformats:
377 raise ValueError(f"Format from {ext} extension in file {filename} not recognized")
378 with open(filename, "rb") as ff:
379 # We assume that these classes are tiny so there is no
380 # substantive memory impact by reading the entire file up front
381 data = ff.read()
382 return cls.fromBytesfromBytes(data, cls.formatsformats[ext])
383
384 def toBytes(self, format):
385 """Convert the object to a serialized bytes form using the
386 specified format.
387
388 Parameters
389 ----------
390 format : `str`
391 Format to use when serializing. Can be ``yaml`` or ``pickle``.
392
393 Returns
394 -------
395 data : `bytes`
396 Byte string representing the serialized object.
397 """
398 if format == "pickle":
399 return pickle.dumps(self)
400 elif format == "yaml":
401 return yaml.dump(self).encode("utf-8")
402 else:
403 raise ValueError(f"Unexpected serialization format requested: {format}")
404
405 def write(self, filename):
406 """Write to file.
407
408 Parameters
409 ----------
410 filename : `str`
411 Filename to which to write. The format of the data file
412 is determined from the file extension. Currently supports
413 ``.pickle`` and ``.yaml``
414 """
415 _, ext = os.path.splitext(filename)
416 if ext not in self.formatsformats:
417 raise ValueError(f"Format from {ext} extension in file {filename} not recognized")
418 with open(filename, "wb") as ff:
419 # Assumes that the bytes serialization of this object is
420 # relatively small.
421 ff.write(self.toBytestoBytes(self.formatsformats[ext]))
422
423 def __str__(self):
424 ss = "%s({\n" % self.__class__.__name__
425 # Sort alphabetically by module name, for convenience in reading
426 ss += ",\n".join(f"{prod!r}:{self[prod]!r}" for prod in sorted(self))
427 ss += ",\n})"
428 return ss
429
430 def __repr__(self):
431 # Default repr() does not report the class name.
432 return f"{self.__class__.__name__}({super().__repr__()})"
433
434 def extra(self, other):
435 """Get packages in self but not in another `Packages` object.
436
437 Parameters
438 ----------
439 other : `Packages` or `Mapping`
440 Other packages to compare against.
441
442 Returns
443 -------
444 extra : `dict`
445 Extra packages. Keys (type `str`) are package names; values
446 (type `str`) are their versions.
447 """
448 return {pkg: self[pkg] for pkg in self.keys() - other.keys()}
449
450 def missing(self, other):
451 """Get packages in another `Packages` object but missing from self.
452
453 Parameters
454 ----------
455 other : `Packages`
456 Other packages to compare against.
457
458 Returns
459 -------
460 missing : `dict`
461 Missing packages. Keys (type `str`) are package names; values
462 (type `str`) are their versions.
463 """
464 return {pkg: other[pkg] for pkg in other.keys() - self.keys()}
465
466 def difference(self, other):
467 """Get packages in symmetric difference of self and another `Packages`
468 object.
469
470 Parameters
471 ----------
472 other : `Packages`
473 Other packages to compare against.
474
475 Returns
476 -------
477 difference : `dict`
478 Packages in symmetric difference. Keys (type `str`) are package
479 names; values (type `str`) are their versions.
480 """
481 return {pkg: (self[pkg], other[pkg]) for
482 pkg in self.keys() & other.keys() if self[pkg] != other[pkg]}
483
484
485# Register YAML representers
486
487def pkg_representer(dumper, data):
488 """Represent Packages as a simple dict"""
489 return dumper.represent_mapping("lsst.base.Packages", data,
490 flow_style=None)
491
492
493yaml.add_representer(Packages, pkg_representer)
494
495
496def pkg_constructor(loader, node):
497 yield Packages(loader.construct_mapping(node, deep=True))
498
499
500for loader in (yaml.Loader, yaml.CLoader, yaml.UnsafeLoader, yaml.SafeLoader, yaml.FullLoader):
501 yaml.add_constructor("lsst.base.Packages", pkg_constructor, Loader=loader)
def fromBytes(cls, data, format)
Definition: packages.py:340
def __setstate__(self, state)
Definition: packages.py:316
def read(cls, filename)
Definition: packages.py:361
def missing(self, other)
Definition: packages.py:450
def write(self, filename)
Definition: packages.py:405
def difference(self, other)
Definition: packages.py:466
def extra(self, other)
Definition: packages.py:434
def toBytes(self, format)
Definition: packages.py:384
def getEnvironmentPackages()
Definition: packages.py:153
def getCondaPackages()
Definition: packages.py:222
def getPythonPackages()
Definition: packages.py:91
def pkg_representer(dumper, data)
Definition: packages.py:487
def getVersionFromPythonModule(module)
Definition: packages.py:56
std::map< std::string, std::string > getRuntimeVersions()
Return version strings for dependencies.
Definition: versions.cc:54