Coverage for python/lsst/utils/packages.py: 21%

232 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-03-14 10:19 -0700

1# This file is part of utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11# 

12"""Determine which packages are being used in the system and their versions. 

13""" 

14 

15from __future__ import annotations 

16 

17import contextlib 

18import hashlib 

19import importlib 

20import io 

21import json 

22import logging 

23import os 

24import pickle 

25import re 

26import subprocess 

27import sys 

28import types 

29from collections.abc import Mapping 

30from functools import lru_cache 

31from importlib.metadata import packages_distributions 

32from typing import Any, ClassVar 

33 

34import yaml 

35 

36log = logging.getLogger(__name__) 

37 

38__all__ = [ 

39 "getVersionFromPythonModule", 

40 "getPythonPackages", 

41 "getEnvironmentPackages", 

42 "getCondaPackages", 

43 "Packages", 

44] 

45 

46 

47# Packages used at build-time (e.g., header-only) 

48BUILDTIME = {"boost", "eigen", "tmv"} 

49 

50# Python modules to attempt to load so we can try to get the version 

51# We do this because the version only appears to be available from python, 

52# but we use the library 

53PYTHON: set[str] = set() 

54 

55SPECIAL_NAMESPACES = {"lsst"} 

56 

57# Packages that don't seem to have a mechanism for reporting the runtime 

58# version. We need to guess the version from the environment 

59ENVIRONMENT = {"astrometry_net", "astrometry_net_data", "minuit2", "xpa"} 

60 

61try: 

62 # Python 3.10 includes a list of standard library modules. 

63 # These will all have the same version number as Python itself. 

64 _STDLIB = sys.stdlib_module_names 

65except AttributeError: 

66 _STDLIB = frozenset() 

67 

68 

69def getVersionFromPythonModule(module: types.ModuleType) -> str: 

70 """Determine the version of a python module. 

71 

72 Parameters 

73 ---------- 

74 module : `~types.ModuleType` 

75 Module for which to get version. 

76 

77 Returns 

78 ------- 

79 version : `str` 

80 The version of the python module. 

81 

82 Raises 

83 ------ 

84 AttributeError 

85 Raised if ``__version__`` attribute is not set. 

86 

87 Notes 

88 ----- 

89 We supplement the version with information from the 

90 ``__dependency_versions__`` (a specific variable set by LSST's 

91 `~lsst.sconsUtils` at build time) only for packages that are typically 

92 used only at build-time. 

93 """ 

94 version = module.__version__ 

95 if hasattr(module, "__dependency_versions__"): 

96 # Add build-time dependencies 

97 deps = module.__dependency_versions__ 

98 buildtime = BUILDTIME & set(deps.keys()) 

99 if buildtime: 

100 version += " with " + " ".join(f"{pkg}={deps[pkg]}" for pkg in sorted(buildtime)) 

101 return str(version) 

102 

103 

104def getPythonPackages() -> dict[str, str]: 

105 """Get imported python packages and their versions. 

106 

107 Returns 

108 ------- 

109 packages : `dict` 

110 Keys (type `str`) are package names; values (type `str`) are their 

111 versions. 

112 

113 Notes 

114 ----- 

115 We wade through `sys.modules` and attempt to determine the version for each 

116 module. Note, therefore, that we can only report on modules that have 

117 *already* been imported. 

118 

119 We don't include any module for which we cannot determine a version. 

120 """ 

121 # Attempt to import libraries that only report their version in python 

122 for module_name in PYTHON: 

123 # If it's not available we continue. 

124 with contextlib.suppress(Exception): 

125 importlib.import_module(module_name) 

126 

127 package_dist = packages_distributions() 

128 

129 packages = {"python": sys.version} 

130 

131 # Not iterating with sys.modules.iteritems() because it's not atomic and 

132 # subject to race conditions 

133 module_names = sorted(list(sys.modules.keys())) 

134 

135 # Use knowledge of package hierarchy to find the versions rather than 

136 # using each name independently. Group all the module names into the 

137 # hierarchy, splitting on dot, and skipping any component that starts 

138 # with an underscore. 

139 

140 # Sorting the module names gives us: 

141 # lsst 

142 # lsst.afw 

143 # lsst.afw.cameraGeom 

144 # ... 

145 # lsst.daf 

146 # lsst.daf.butler 

147 # 

148 # and so we can use knowledge of the previous version to inform whether 

149 # we need to look at the subsequent line. 

150 n_versions = 0 

151 n_checked = 0 

152 previous = "" 

153 for name in module_names: 

154 if name.startswith("_") or "._" in name: 

155 # Refers to a private module so we can ignore it and assume 

156 # version has been lifted into parent or, if top level, not 

157 # relevant for versioning. This applies also to standard library 

158 # packages such as _abc and __future__. 

159 continue 

160 

161 if name in _STDLIB: 

162 # Assign all standard library packages the python version 

163 # since they almost all lack explicit versions. 

164 packages[name] = sys.version 

165 previous = name 

166 continue 

167 

168 if name.startswith(previous + ".") and previous in packages: 

169 # Already have this version. Use the same previous name 

170 # for the line after this. 

171 continue 

172 

173 # Find the namespace which we need to use package_dist. 

174 namespace = name.split(".")[0] 

175 

176 # package_dist is a mapping from import namespace to distribution 

177 # package names. This may be a one-to-many mapping due to namespace 

178 # packages. Note that package_dist does not know about editable 

179 # installs or eups installs via path manipulation. 

180 if namespace in package_dist: 

181 dist_names = package_dist[namespace] 

182 else: 

183 dist_names = [name] 

184 

185 ver = _get_python_package_version(name, namespace, dist_names, packages) 

186 

187 n_checked += 1 

188 if ver is not None: 

189 n_versions += 1 

190 previous = name 

191 

192 log.debug( 

193 "Given %d modules but checked %d in hierarchy and found versions for %d", 

194 len(module_names), 

195 n_checked, 

196 n_versions, 

197 ) 

198 

199 for name in list(packages.keys()): 

200 # Use LSST package names instead of python module names 

201 # This matches the names we get from the environment (i.e., EUPS) 

202 # so we can clobber these build-time versions if the environment 

203 # reveals that we're not using the packages as-built. 

204 if name.startswith("lsst."): 

205 new_name = name.replace("lsst.", "").replace(".", "_") 

206 packages[new_name] = packages[name] 

207 del packages[name] 

208 

209 return packages 

210 

211 

212def _get_python_package_version( 

213 name: str, namespace: str, dist_names: list[str], packages: dict[str, str] 

214) -> str | None: 

215 """Given a package or module name, try to determine the version. 

216 

217 Parameters 

218 ---------- 

219 name : `str` 

220 The imported name of the package or module to try. 

221 namespace : `str` 

222 The namespace of the package or module. 

223 dist_names : `list` [ `str` ] 

224 The distribution names of the package or module. 

225 packages : `dict` [ `str`, `str` ] 

226 A dictionary mapping a name to a version. Modified in place. 

227 The key used might not match exactly the given key. 

228 

229 Returns 

230 ------- 

231 ver : `str` or `None` 

232 The version string stored in ``packages``. Nothing is stored if the 

233 value here is `None`. 

234 """ 

235 # We have certain special namespaces that are used via eups that 

236 # need to be enumerated here. 

237 if len(dist_names) > 1 or namespace in SPECIAL_NAMESPACES: 

238 # Split the name into parts. 

239 name_parts = re.split("[._-]", name) 

240 

241 found = False 

242 for dist_name in dist_names: 

243 dist_name_parts = re.split("[._-]", dist_name) 

244 # Check if the components start with the namespace; this is 

245 # needed because (at least) lsst.ts packages do not use 

246 # ``lsst`` in the package name. 

247 if dist_name_parts[0] != namespace: 

248 dist_name_parts.insert(0, namespace) 

249 

250 if dist_name_parts == name_parts: 

251 found = True 

252 break 

253 

254 if not found: 

255 # This fallback case occurs when (a) we are testing the overall 

256 # namespace (e.g. "lsst" or "sphinxcontrib") and the code below 

257 # will return None; or (b) for eups-installed and other 

258 # "editable installations" that are not registered as part 

259 # of importlib.packages_distributions(). 

260 dist_name = name 

261 else: 

262 dist_name = dist_names[0] 

263 

264 try: 

265 # This is the Python standard way to find a package version. 

266 # It can be slow. 

267 ver = importlib.metadata.version(dist_name) 

268 except Exception: 

269 # Fall back to using the module itself. There is no guarantee 

270 # that "a" exists for module "a.b" so if hierarchy has been expanded 

271 # this might fail. Check first. 

272 if name not in sys.modules: 

273 return None 

274 module = sys.modules[name] 

275 try: 

276 ver = getVersionFromPythonModule(module) 

277 except Exception: 

278 return None # Can't get a version from it, don't care 

279 

280 # Update the package information. 

281 if ver is not None: 

282 packages[name] = ver 

283 

284 return ver 

285 

286 

287_eups: Any | None = None # Singleton Eups object 

288 

289 

290@lru_cache(maxsize=1) 

291def getEnvironmentPackages(include_all: bool = False) -> dict[str, str]: 

292 """Get products and their versions from the environment. 

293 

294 Parameters 

295 ---------- 

296 include_all : `bool` 

297 If `False` only returns locally-setup packages. If `True` all set 

298 up packages are returned with a version that includes any associated 

299 non-current tags. 

300 

301 Returns 

302 ------- 

303 packages : `dict` 

304 Keys (type `str`) are product names; values (type `str`) are their 

305 versions. 

306 

307 Notes 

308 ----- 

309 We use EUPS to determine the version of certain products (those that don't 

310 provide a means to determine the version any other way) and to check if 

311 uninstalled packages are being used. We only report the product/version 

312 for these packages unless ``include_all`` is `True`. 

313 """ 

314 try: 

315 from eups import Eups 

316 from eups.Product import Product 

317 except ImportError: 

318 log.warning("Unable to import eups, so cannot determine package versions from environment") 

319 return {} 

320 

321 # Cache eups object since creating it can take a while 

322 global _eups 

323 if not _eups: 

324 _eups = Eups() 

325 products = _eups.findProducts(tags=["setup"]) 

326 

327 # Get versions for things we can't determine via runtime mechanisms 

328 # XXX Should we just grab everything we can, rather than just a 

329 # predetermined set? 

330 packages = {prod.name: prod.version for prod in products if prod in ENVIRONMENT} 

331 

332 # The string 'LOCAL:' (the value of Product.LocalVersionPrefix) in the 

333 # version name indicates uninstalled code, so the version could be 

334 # different than what's being reported by the runtime environment (because 

335 # we don't tend to run "scons" every time we update some python file, 

336 # and even if we did sconsUtils probably doesn't check to see if the repo 

337 # is clean). 

338 for prod in products: 

339 if not prod.version.startswith(Product.LocalVersionPrefix): 

340 if include_all: 

341 tags = {t for t in prod.tags if t != "current"} 

342 tag_msg = " (" + " ".join(tags) + ")" if tags else "" 

343 packages[prod.name] = prod.version + tag_msg 

344 continue 

345 ver = prod.version 

346 

347 gitDir = os.path.join(prod.dir, ".git") 

348 if os.path.exists(gitDir): 

349 # get the git revision and an indication if the working copy is 

350 # clean 

351 revCmd = ["git", "--git-dir=" + gitDir, "--work-tree=" + prod.dir, "rev-parse", "HEAD"] 

352 diffCmd = [ 

353 "git", 

354 "--no-pager", 

355 "--git-dir=" + gitDir, 

356 "--work-tree=" + prod.dir, 

357 "diff", 

358 "--patch", 

359 ] 

360 try: 

361 rev = subprocess.check_output(revCmd).decode().strip() 

362 diff = subprocess.check_output(diffCmd) 

363 except Exception: 

364 ver += "@GIT_ERROR" 

365 else: 

366 ver += "@" + rev 

367 if diff: 

368 ver += "+" + hashlib.md5(diff).hexdigest() 

369 else: 

370 ver += "@NO_GIT" 

371 

372 packages[prod.name] = ver 

373 return packages 

374 

375 

376@lru_cache(maxsize=1) 

377def getCondaPackages() -> dict[str, str]: 

378 """Get products and their versions from the conda environment. 

379 

380 Returns 

381 ------- 

382 packages : `dict` 

383 Keys (type `str`) are product names; values (type `str`) are their 

384 versions. 

385 

386 Notes 

387 ----- 

388 Returns empty result if a conda environment is not in use or can not 

389 be queried. 

390 """ 

391 if "CONDA_PREFIX" not in os.environ: 

392 return {} 

393 

394 # conda list is very slow. Ten times faster to scan the directory 

395 # directly. This will only find conda packages and not pip installed 

396 # packages. 

397 meta_path = os.path.join(os.environ["CONDA_PREFIX"], "conda-meta") 

398 

399 try: 

400 filenames = os.scandir(path=meta_path) 

401 except FileNotFoundError: 

402 return {} 

403 

404 packages = {} 

405 

406 for filename in filenames: 

407 if not filename.name.endswith(".json"): 

408 continue 

409 with open(filename) as f: 

410 try: 

411 data = json.load(f) 

412 except ValueError: 

413 continue 

414 try: 

415 packages[data["name"]] = data["version"] 

416 except KeyError: 

417 continue 

418 

419 packages = {n: v for n, v in sorted(packages.items())} 

420 

421 # Try to work out the conda environment name and include it as a fake 

422 # package. The "obvious" way of running "conda info --json" does give 

423 # access to the active_prefix but takes about 2 seconds to run. 

424 # As a compromise look for the env name in the path to the python 

425 # executable 

426 match = re.search(r"/envs/(.*?)/bin/", sys.executable) 

427 if match: 

428 packages["conda_env"] = match.group(1) 

429 

430 return packages 

431 

432 

433class Packages(dict): 

434 """A table of packages and their versions. 

435 

436 There are a few different types of packages, and their versions are 

437 collected in different ways: 

438 

439 1. Installed Conda packages are obtained via the Conda API. Conda is 

440 not required. 

441 2. Python modules (e.g., afw, numpy; galsim is also in this group even 

442 though we only use it through the library, because no version 

443 information is currently provided through the library): we get their 

444 version from the ``__version__`` module variable. Note that this means 

445 that we're only aware of modules that have already been imported. 

446 3. Other packages provide no run-time accessible version information (e.g., 

447 astrometry_net): we get their version from interrogating the 

448 environment. Currently, that means EUPS; if EUPS is replaced or dropped 

449 then we'll need to consider an alternative means of getting this version 

450 information. 

451 4. Local versions of packages (a non-installed EUPS package, selected with 

452 ``setup -r /path/to/package``): we identify these through the 

453 environment (EUPS again) and use as a version the path supplemented with 

454 the ``git`` SHA and, if the git repo isn't clean, an MD5 of the diff. 

455 

456 These package versions are collected and stored in a Packages object, which 

457 provides useful comparison and persistence features. 

458 

459 Example usage: 

460 

461 .. code-block:: python 

462 

463 from lsst.utils.packages import Packages 

464 pkgs = Packages.fromSystem() 

465 print("Current packages:", pkgs) 

466 old = Packages.read("/path/to/packages.pickle") 

467 print("Old packages:", old) 

468 print("Missing packages compared to before:", pkgs.missing(old)) 

469 print("Extra packages compared to before:", pkgs.extra(old)) 

470 print("Different packages: ", pkgs.difference(old)) 

471 old.update(pkgs) # Include any new packages in the old 

472 old.write("/path/to/packages.pickle"). 

473 

474 Notes 

475 ----- 

476 This is a wrapper around a dict with some convenience methods. 

477 """ 

478 

479 formats: ClassVar[dict[str, str]] = { 

480 ".pkl": "pickle", 

481 ".pickle": "pickle", 

482 ".yaml": "yaml", 

483 ".json": "json", 

484 } 

485 

486 def __setstate__(self, state: dict[str, Any]) -> None: 

487 # This only seems to be called for old pickle files where 

488 # the data was stored in _packages. 

489 self.update(state["_packages"]) 

490 

491 @classmethod 

492 def fromSystem(cls) -> Packages: 

493 """Construct a `Packages` by examining the system. 

494 

495 Determine packages by examining python's `sys.modules`, conda 

496 libraries and EUPS. EUPS packages take precedence over conda and 

497 general python packages. 

498 

499 Returns 

500 ------- 

501 packages : `Packages` 

502 All version package information that could be obtained. 

503 """ 

504 packages = {} 

505 packages.update(getPythonPackages()) 

506 packages.update(getCondaPackages()) 

507 packages.update(getEnvironmentPackages()) # Should be last, to override products with LOCAL versions 

508 return cls(packages) 

509 

510 @classmethod 

511 def fromBytes(cls, data: bytes, format: str) -> Packages: 

512 """Construct the object from a byte representation. 

513 

514 Parameters 

515 ---------- 

516 data : `bytes` 

517 The serialized form of this object in bytes. 

518 format : `str` 

519 The format of those bytes. Can be ``yaml``, ``json``, or 

520 ``pickle``. 

521 

522 Returns 

523 ------- 

524 packages : `Packages` 

525 The package information read from the input data. 

526 """ 

527 if format == "pickle": 

528 file = io.BytesIO(data) 

529 new = _BackwardsCompatibilityUnpickler(file).load() 

530 elif format == "yaml": 

531 new = yaml.load(data, Loader=yaml.SafeLoader) 

532 elif format == "json": 

533 new = cls(json.loads(data)) 

534 else: 

535 raise ValueError(f"Unexpected serialization format given: {format}") 

536 if not isinstance(new, cls): 

537 raise TypeError(f"Extracted object of class '{type(new)}' but expected '{cls}'") 

538 return new 

539 

540 @classmethod 

541 def read(cls, filename: str) -> Packages: 

542 """Read packages from filename. 

543 

544 Parameters 

545 ---------- 

546 filename : `str` 

547 Filename from which to read. The format is determined from the 

548 file extension. Currently support ``.pickle``, ``.pkl``, 

549 ``.json``, and ``.yaml``. 

550 

551 Returns 

552 ------- 

553 packages : `Packages` 

554 The packages information read from the file. 

555 """ 

556 _, ext = os.path.splitext(filename) 

557 if ext not in cls.formats: 

558 raise ValueError(f"Format from {ext} extension in file {filename} not recognized") 

559 with open(filename, "rb") as ff: 

560 # We assume that these classes are tiny so there is no 

561 # substantive memory impact by reading the entire file up front 

562 data = ff.read() 

563 return cls.fromBytes(data, cls.formats[ext]) 

564 

565 def toBytes(self, format: str) -> bytes: 

566 """Convert the object to a serialized bytes form using the 

567 specified format. 

568 

569 Parameters 

570 ---------- 

571 format : `str` 

572 Format to use when serializing. Can be ``yaml``, ``json``, 

573 or ``pickle``. 

574 

575 Returns 

576 ------- 

577 data : `bytes` 

578 Byte string representing the serialized object. 

579 """ 

580 if format == "pickle": 

581 return pickle.dumps(self) 

582 elif format == "yaml": 

583 return yaml.dump(self).encode("utf-8") 

584 elif format == "json": 

585 return json.dumps(self).encode("utf-8") 

586 else: 

587 raise ValueError(f"Unexpected serialization format requested: {format}") 

588 

589 def write(self, filename: str) -> None: 

590 """Write to file. 

591 

592 Parameters 

593 ---------- 

594 filename : `str` 

595 Filename to which to write. The format of the data file 

596 is determined from the file extension. Currently supports 

597 ``.pickle``, ``.json``, and ``.yaml``. 

598 """ 

599 _, ext = os.path.splitext(filename) 

600 if ext not in self.formats: 

601 raise ValueError(f"Format from {ext} extension in file {filename} not recognized") 

602 with open(filename, "wb") as ff: 

603 # Assumes that the bytes serialization of this object is 

604 # relatively small. 

605 ff.write(self.toBytes(self.formats[ext])) 

606 

607 def __str__(self) -> str: 

608 ss = self.__class__.__name__ + "({\n" 

609 # Sort alphabetically by module name, for convenience in reading 

610 ss += ",\n".join(f"{prod!r}:{self[prod]!r}" for prod in sorted(self)) 

611 ss += ",\n})" 

612 return ss 

613 

614 def __repr__(self) -> str: 

615 # Default repr() does not report the class name. 

616 return f"{self.__class__.__name__}({super().__repr__()})" 

617 

618 def extra(self, other: Mapping) -> dict[str, str]: 

619 """Get packages in self but not in another `Packages` object. 

620 

621 Parameters 

622 ---------- 

623 other : `Packages` or `Mapping` 

624 Other packages to compare against. 

625 

626 Returns 

627 ------- 

628 extra : `dict` 

629 Extra packages. Keys (type `str`) are package names; values 

630 (type `str`) are their versions. 

631 """ 

632 return {pkg: self[pkg] for pkg in self.keys() - other.keys()} 

633 

634 def missing(self, other: Mapping) -> dict[str, str]: 

635 """Get packages in another `Packages` object but missing from self. 

636 

637 Parameters 

638 ---------- 

639 other : `Packages` 

640 Other packages to compare against. 

641 

642 Returns 

643 ------- 

644 missing : `dict` [`str`, `str`] 

645 Missing packages. Keys (type `str`) are package names; values 

646 (type `str`) are their versions. 

647 """ 

648 return {pkg: other[pkg] for pkg in other.keys() - self.keys()} 

649 

650 def difference(self, other: Mapping) -> dict[str, tuple[str, str]]: 

651 """Get packages in symmetric difference of self and another `Packages` 

652 object. 

653 

654 Parameters 

655 ---------- 

656 other : `Packages` 

657 Other packages to compare against. 

658 

659 Returns 

660 ------- 

661 difference : `dict` [`str`, `tuple` [ `str`, `str` ]] 

662 Packages in symmetric difference. Keys (type `str`) are package 

663 names; values (type `tuple` [ `str`, `str` ]) are their versions. 

664 """ 

665 return {pkg: (self[pkg], other[pkg]) for pkg in self.keys() & other.keys() if self[pkg] != other[pkg]} 

666 

667 

668class _BackwardsCompatibilityUnpickler(pickle.Unpickler): 

669 """Replacement for the default unpickler. 

670 

671 It is required so that users of this API can read pickle files 

672 created when the `~lsst.utils.packages.Packages` class was in a different 

673 package and known as ``lsst.base.Packages``. If this unpickler is being 

674 used then we know for sure that we must return a 

675 `~lsst.utils.packages.Packages` instance. 

676 """ 

677 

678 def find_class(self, module: str, name: str) -> type: 

679 """Return the class that should be used for unpickling. 

680 

681 This is always known to be the class in this package. 

682 

683 Parameters 

684 ---------- 

685 module : `str` 

686 Ignored. 

687 name : `str` 

688 Ignored. 

689 

690 Returns 

691 ------- 

692 `type` [`Packages`] 

693 The Python type to use. Always returns `Packages`. 

694 """ 

695 return Packages 

696 

697 

698# Register YAML representers 

699 

700 

701def _pkg_representer(dumper: yaml.Dumper, data: Any) -> yaml.MappingNode: 

702 """Represent Packages as a simple dict""" 

703 return dumper.represent_mapping("lsst.utils.packages.Packages", data, flow_style=None) 

704 

705 

706yaml.add_representer(Packages, _pkg_representer) 

707 

708 

709def _pkg_constructor(loader: yaml.constructor.SafeConstructor, node: yaml.Node) -> Any: 

710 """Convert YAML representation back to Python class.""" 

711 yield Packages(loader.construct_mapping(node, deep=True)) # type: ignore 

712 

713 

714for loader in (yaml.Loader, yaml.CLoader, yaml.UnsafeLoader, yaml.SafeLoader, yaml.FullLoader): 

715 yaml.add_constructor("lsst.utils.packages.Packages", _pkg_constructor, Loader=loader) 

716 

717 # Register the old name with YAML. 

718 yaml.add_constructor("lsst.base.Packages", _pkg_constructor, Loader=loader)