Coverage for python/lsst/utils/packages.py: 24%

197 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-25 09:27 +0000

1# This file is part of utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11# 

12"""Determine which packages are being used in the system and their versions. 

13""" 

14 

15from __future__ import annotations 

16 

17import contextlib 

18import hashlib 

19import importlib 

20import io 

21import json 

22import logging 

23import os 

24import pickle 

25import re 

26import subprocess 

27import sys 

28import types 

29from collections.abc import Mapping 

30from functools import lru_cache 

31from typing import Any, ClassVar 

32 

33import yaml 

34 

35log = logging.getLogger(__name__) 

36 

37__all__ = [ 

38 "getVersionFromPythonModule", 

39 "getPythonPackages", 

40 "getEnvironmentPackages", 

41 "getCondaPackages", 

42 "Packages", 

43] 

44 

45 

46# Packages used at build-time (e.g., header-only) 

47BUILDTIME = {"boost", "eigen", "tmv"} 

48 

49# Python modules to attempt to load so we can try to get the version 

50# We do this because the version only appears to be available from python, 

51# but we use the library 

52PYTHON = {"galsim"} 

53 

54# Packages that don't seem to have a mechanism for reporting the runtime 

55# version. We need to guess the version from the environment 

56ENVIRONMENT = {"astrometry_net", "astrometry_net_data", "minuit2", "xpa"} 

57 

58try: 

59 # Python 3.10 includes a list of standard library modules. 

60 # These will all have the same version number as Python itself. 

61 _STDLIB = sys.stdlib_module_names 

62except AttributeError: 

63 _STDLIB = frozenset() 

64 

65 

66def getVersionFromPythonModule(module: types.ModuleType) -> str: 

67 """Determine the version of a python module. 

68 

69 Parameters 

70 ---------- 

71 module : `~types.ModuleType` 

72 Module for which to get version. 

73 

74 Returns 

75 ------- 

76 version : `str` 

77 

78 Raises 

79 ------ 

80 AttributeError 

81 Raised if ``__version__`` attribute is not set. 

82 

83 Notes 

84 ----- 

85 We supplement the version with information from the 

86 ``__dependency_versions__`` (a specific variable set by LSST's 

87 `~lsst.sconsUtils` at build time) only for packages that are typically 

88 used only at build-time. 

89 """ 

90 version = module.__version__ 

91 if hasattr(module, "__dependency_versions__"): 

92 # Add build-time dependencies 

93 deps = module.__dependency_versions__ 

94 buildtime = BUILDTIME & set(deps.keys()) 

95 if buildtime: 

96 version += " with " + " ".join(f"{pkg}={deps[pkg]}" for pkg in sorted(buildtime)) 

97 return str(version) 

98 

99 

100def getPythonPackages() -> dict[str, str]: 

101 """Get imported python packages and their versions. 

102 

103 Returns 

104 ------- 

105 packages : `dict` 

106 Keys (type `str`) are package names; values (type `str`) are their 

107 versions. 

108 

109 Notes 

110 ----- 

111 We wade through `sys.modules` and attempt to determine the version for each 

112 module. Note, therefore, that we can only report on modules that have 

113 *already* been imported. 

114 

115 We don't include any module for which we cannot determine a version. 

116 """ 

117 # Attempt to import libraries that only report their version in python 

118 for module_name in PYTHON: 

119 # If it's not available we continue. 

120 with contextlib.suppress(Exception): 

121 importlib.import_module(module_name) 

122 

123 packages = {"python": sys.version} 

124 

125 # Not iterating with sys.modules.iteritems() because it's not atomic and 

126 # subject to race conditions 

127 module_names = list(sys.modules.keys()) 

128 

129 # Use knowledge of package hierarchy to find the versions rather than 

130 # using each name independently. Group all the module names into the 

131 # hierarchy, splitting on dot, and skipping any component that starts 

132 # with an underscore. 

133 

134 # Sorting the module names gives us: 

135 # lsst 

136 # lsst.afw 

137 # lsst.afw.cameraGeom 

138 # ... 

139 # lsst.daf 

140 # lsst.daf.butler 

141 # 

142 # and so we can use knowledge of the previous version to inform whether 

143 # we need to look at the subsequent line. 

144 n_versions = 0 

145 n_checked = 0 

146 previous = "" 

147 for name in sorted(module_names): 

148 if name.startswith("_") or "._" in name: 

149 # Refers to a private module so we can ignore it and assume 

150 # version has been lifted into parent or, if top level, not 

151 # relevant for versioning. This applies also to standard library 

152 # packages such as _abc and __future__. 

153 continue 

154 

155 if name in _STDLIB: 

156 # Assign all standard library packages the python version 

157 # since they almost all lack explicit versions. 

158 packages[name] = sys.version 

159 previous = name 

160 continue 

161 

162 if name.startswith(previous + ".") and previous in packages: 

163 # Already have this version. Use the same previous name 

164 # for the line after this. 

165 continue 

166 

167 # Look for a version. 

168 ver = _get_python_package_version(name, packages) 

169 

170 n_checked += 1 

171 if ver is not None: 

172 n_versions += 1 

173 previous = name 

174 

175 log.debug( 

176 "Given %d modules but checked %d in hierarchy and found versions for %d", 

177 len(module_names), 

178 n_checked, 

179 n_versions, 

180 ) 

181 

182 for name in list(packages.keys()): 

183 # Use LSST package names instead of python module names 

184 # This matches the names we get from the environment (i.e., EUPS) 

185 # so we can clobber these build-time versions if the environment 

186 # reveals that we're not using the packages as-built. 

187 if name.startswith("lsst."): 

188 new_name = name.replace("lsst.", "").replace(".", "_") 

189 packages[new_name] = packages[name] 

190 del packages[name] 

191 

192 return packages 

193 

194 

195def _get_python_package_version(name: str, packages: dict[str, str]) -> str | None: 

196 """Given a package or module name, try to determine the version. 

197 

198 Parameters 

199 ---------- 

200 name : `str` 

201 The name of the package or module to try. 

202 packages : `dict` [ `str`, `str` ] 

203 A dictionary mapping a name to a version. Modified in place. 

204 The key used might not match exactly the given key. 

205 

206 Returns 

207 ------- 

208 ver : `str` or `None` 

209 The version string stored in ``packages``. Nothing is stored if the 

210 value here is `None`. 

211 """ 

212 try: 

213 # This is the Python standard way to find a package version. 

214 # It can be slow. 

215 ver = importlib.metadata.version(name) 

216 except Exception: 

217 # Fall back to using the module itself. There is no guarantee 

218 # that "a" exists for module "a.b" so if hierarchy has been expanded 

219 # this might fail. Check first. 

220 if name not in sys.modules: 

221 return None 

222 module = sys.modules[name] 

223 try: 

224 ver = getVersionFromPythonModule(module) 

225 except Exception: 

226 return None # Can't get a version from it, don't care 

227 

228 # Update the package information. 

229 if ver is not None: 

230 packages[name] = ver 

231 

232 return ver 

233 

234 

235_eups: Any | None = None # Singleton Eups object 

236 

237 

238@lru_cache(maxsize=1) 

239def getEnvironmentPackages(include_all: bool = False) -> dict[str, str]: 

240 """Get products and their versions from the environment. 

241 

242 Parameters 

243 ---------- 

244 include_all : `bool` 

245 If `False` only returns locally-setup packages. If `True` all set 

246 up packages are returned with a version that includes any associated 

247 non-current tags. 

248 

249 Returns 

250 ------- 

251 packages : `dict` 

252 Keys (type `str`) are product names; values (type `str`) are their 

253 versions. 

254 

255 Notes 

256 ----- 

257 We use EUPS to determine the version of certain products (those that don't 

258 provide a means to determine the version any other way) and to check if 

259 uninstalled packages are being used. We only report the product/version 

260 for these packages unless ``include_all`` is `True`. 

261 """ 

262 try: 

263 from eups import Eups 

264 from eups.Product import Product 

265 except ImportError: 

266 log.warning("Unable to import eups, so cannot determine package versions from environment") 

267 return {} 

268 

269 # Cache eups object since creating it can take a while 

270 global _eups 

271 if not _eups: 

272 _eups = Eups() 

273 products = _eups.findProducts(tags=["setup"]) 

274 

275 # Get versions for things we can't determine via runtime mechanisms 

276 # XXX Should we just grab everything we can, rather than just a 

277 # predetermined set? 

278 packages = {prod.name: prod.version for prod in products if prod in ENVIRONMENT} 

279 

280 # The string 'LOCAL:' (the value of Product.LocalVersionPrefix) in the 

281 # version name indicates uninstalled code, so the version could be 

282 # different than what's being reported by the runtime environment (because 

283 # we don't tend to run "scons" every time we update some python file, 

284 # and even if we did sconsUtils probably doesn't check to see if the repo 

285 # is clean). 

286 for prod in products: 

287 if not prod.version.startswith(Product.LocalVersionPrefix): 

288 if include_all: 

289 tags = {t for t in prod.tags if t != "current"} 

290 tag_msg = " (" + " ".join(tags) + ")" if tags else "" 

291 packages[prod.name] = prod.version + tag_msg 

292 continue 

293 ver = prod.version 

294 

295 gitDir = os.path.join(prod.dir, ".git") 

296 if os.path.exists(gitDir): 

297 # get the git revision and an indication if the working copy is 

298 # clean 

299 revCmd = ["git", "--git-dir=" + gitDir, "--work-tree=" + prod.dir, "rev-parse", "HEAD"] 

300 diffCmd = [ 

301 "git", 

302 "--no-pager", 

303 "--git-dir=" + gitDir, 

304 "--work-tree=" + prod.dir, 

305 "diff", 

306 "--patch", 

307 ] 

308 try: 

309 rev = subprocess.check_output(revCmd).decode().strip() 

310 diff = subprocess.check_output(diffCmd) 

311 except Exception: 

312 ver += "@GIT_ERROR" 

313 else: 

314 ver += "@" + rev 

315 if diff: 

316 ver += "+" + hashlib.md5(diff).hexdigest() 

317 else: 

318 ver += "@NO_GIT" 

319 

320 packages[prod.name] = ver 

321 return packages 

322 

323 

324@lru_cache(maxsize=1) 

325def getCondaPackages() -> dict[str, str]: 

326 """Get products and their versions from the conda environment. 

327 

328 Returns 

329 ------- 

330 packages : `dict` 

331 Keys (type `str`) are product names; values (type `str`) are their 

332 versions. 

333 

334 Notes 

335 ----- 

336 Returns empty result if a conda environment is not in use or can not 

337 be queried. 

338 """ 

339 try: 

340 from conda.cli.python_api import Commands, run_command 

341 except ImportError: 

342 return {} 

343 

344 # Get the installed package list 

345 versions_json = run_command(Commands.LIST, "--json") 

346 packages = {pkg["name"]: pkg["version"] for pkg in json.loads(versions_json[0])} 

347 

348 # Try to work out the conda environment name and include it as a fake 

349 # package. The "obvious" way of running "conda info --json" does give 

350 # access to the active_prefix but takes about 2 seconds to run. 

351 # The equivalent to the code above would be: 

352 # info_json = run_command(Commands.INFO, "--json") 

353 # As a comporomise look for the env name in the path to the python 

354 # executable 

355 match = re.search(r"/envs/(.*?)/bin/", sys.executable) 

356 if match: 

357 packages["conda_env"] = match.group(1) 

358 

359 return packages 

360 

361 

362class Packages(dict): 

363 """A table of packages and their versions. 

364 

365 There are a few different types of packages, and their versions are 

366 collected in different ways: 

367 

368 1. Installed Conda packages are obtained via the Conda API. Conda is 

369 not required. 

370 2. Python modules (e.g., afw, numpy; galsim is also in this group even 

371 though we only use it through the library, because no version 

372 information is currently provided through the library): we get their 

373 version from the ``__version__`` module variable. Note that this means 

374 that we're only aware of modules that have already been imported. 

375 3. Other packages provide no run-time accessible version information (e.g., 

376 astrometry_net): we get their version from interrogating the 

377 environment. Currently, that means EUPS; if EUPS is replaced or dropped 

378 then we'll need to consider an alternative means of getting this version 

379 information. 

380 4. Local versions of packages (a non-installed EUPS package, selected with 

381 ``setup -r /path/to/package``): we identify these through the 

382 environment (EUPS again) and use as a version the path supplemented with 

383 the ``git`` SHA and, if the git repo isn't clean, an MD5 of the diff. 

384 

385 These package versions are collected and stored in a Packages object, which 

386 provides useful comparison and persistence features. 

387 

388 Example usage: 

389 

390 .. code-block:: python 

391 

392 from lsst.utils.packages import Packages 

393 pkgs = Packages.fromSystem() 

394 print("Current packages:", pkgs) 

395 old = Packages.read("/path/to/packages.pickle") 

396 print("Old packages:", old) 

397 print("Missing packages compared to before:", pkgs.missing(old)) 

398 print("Extra packages compared to before:", pkgs.extra(old)) 

399 print("Different packages: ", pkgs.difference(old)) 

400 old.update(pkgs) # Include any new packages in the old 

401 old.write("/path/to/packages.pickle") 

402 

403 Parameters 

404 ---------- 

405 packages : `dict` 

406 A mapping {package: version} where both keys and values are type `str`. 

407 

408 Notes 

409 ----- 

410 This is a wrapper around a dict with some convenience methods. 

411 """ 

412 

413 formats: ClassVar[dict[str, str]] = { 

414 ".pkl": "pickle", 

415 ".pickle": "pickle", 

416 ".yaml": "yaml", 

417 ".json": "json", 

418 } 

419 

420 def __setstate__(self, state: dict[str, Any]) -> None: 

421 # This only seems to be called for old pickle files where 

422 # the data was stored in _packages. 

423 self.update(state["_packages"]) 

424 

425 @classmethod 

426 def fromSystem(cls) -> Packages: 

427 """Construct a `Packages` by examining the system. 

428 

429 Determine packages by examining python's `sys.modules`, conda 

430 libraries and EUPS. EUPS packages take precedence over conda and 

431 general python packages. 

432 

433 Returns 

434 ------- 

435 packages : `Packages` 

436 All version package information that could be obtained. 

437 """ 

438 packages = {} 

439 packages.update(getPythonPackages()) 

440 packages.update(getCondaPackages()) 

441 packages.update(getEnvironmentPackages()) # Should be last, to override products with LOCAL versions 

442 return cls(packages) 

443 

444 @classmethod 

445 def fromBytes(cls, data: bytes, format: str) -> Packages: 

446 """Construct the object from a byte representation. 

447 

448 Parameters 

449 ---------- 

450 data : `bytes` 

451 The serialized form of this object in bytes. 

452 format : `str` 

453 The format of those bytes. Can be ``yaml``, ``json``, or 

454 ``pickle``. 

455 

456 Returns 

457 ------- 

458 packages : `Packages` 

459 The package information read from the input data. 

460 """ 

461 if format == "pickle": 

462 file = io.BytesIO(data) 

463 new = _BackwardsCompatibilityUnpickler(file).load() 

464 elif format == "yaml": 

465 new = yaml.load(data, Loader=yaml.SafeLoader) 

466 elif format == "json": 

467 new = cls(json.loads(data)) 

468 else: 

469 raise ValueError(f"Unexpected serialization format given: {format}") 

470 if not isinstance(new, cls): 

471 raise TypeError(f"Extracted object of class '{type(new)}' but expected '{cls}'") 

472 return new 

473 

474 @classmethod 

475 def read(cls, filename: str) -> Packages: 

476 """Read packages from filename. 

477 

478 Parameters 

479 ---------- 

480 filename : `str` 

481 Filename from which to read. The format is determined from the 

482 file extension. Currently support ``.pickle``, ``.pkl``, 

483 ``.json``, and ``.yaml``. 

484 

485 Returns 

486 ------- 

487 packages : `Packages` 

488 The packages information read from the file. 

489 """ 

490 _, ext = os.path.splitext(filename) 

491 if ext not in cls.formats: 

492 raise ValueError(f"Format from {ext} extension in file {filename} not recognized") 

493 with open(filename, "rb") as ff: 

494 # We assume that these classes are tiny so there is no 

495 # substantive memory impact by reading the entire file up front 

496 data = ff.read() 

497 return cls.fromBytes(data, cls.formats[ext]) 

498 

499 def toBytes(self, format: str) -> bytes: 

500 """Convert the object to a serialized bytes form using the 

501 specified format. 

502 

503 Parameters 

504 ---------- 

505 format : `str` 

506 Format to use when serializing. Can be ``yaml``, ``json``, 

507 or ``pickle``. 

508 

509 Returns 

510 ------- 

511 data : `bytes` 

512 Byte string representing the serialized object. 

513 """ 

514 if format == "pickle": 

515 return pickle.dumps(self) 

516 elif format == "yaml": 

517 return yaml.dump(self).encode("utf-8") 

518 elif format == "json": 

519 return json.dumps(self).encode("utf-8") 

520 else: 

521 raise ValueError(f"Unexpected serialization format requested: {format}") 

522 

523 def write(self, filename: str) -> None: 

524 """Write to file. 

525 

526 Parameters 

527 ---------- 

528 filename : `str` 

529 Filename to which to write. The format of the data file 

530 is determined from the file extension. Currently supports 

531 ``.pickle``, ``.json``, and ``.yaml`` 

532 """ 

533 _, ext = os.path.splitext(filename) 

534 if ext not in self.formats: 

535 raise ValueError(f"Format from {ext} extension in file {filename} not recognized") 

536 with open(filename, "wb") as ff: 

537 # Assumes that the bytes serialization of this object is 

538 # relatively small. 

539 ff.write(self.toBytes(self.formats[ext])) 

540 

541 def __str__(self) -> str: 

542 ss = self.__class__.__name__ + "({\n" 

543 # Sort alphabetically by module name, for convenience in reading 

544 ss += ",\n".join(f"{prod!r}:{self[prod]!r}" for prod in sorted(self)) 

545 ss += ",\n})" 

546 return ss 

547 

548 def __repr__(self) -> str: 

549 # Default repr() does not report the class name. 

550 return f"{self.__class__.__name__}({super().__repr__()})" 

551 

552 def extra(self, other: Mapping) -> dict[str, str]: 

553 """Get packages in self but not in another `Packages` object. 

554 

555 Parameters 

556 ---------- 

557 other : `Packages` or `Mapping` 

558 Other packages to compare against. 

559 

560 Returns 

561 ------- 

562 extra : `dict` 

563 Extra packages. Keys (type `str`) are package names; values 

564 (type `str`) are their versions. 

565 """ 

566 return {pkg: self[pkg] for pkg in self.keys() - other.keys()} 

567 

568 def missing(self, other: Mapping) -> dict[str, str]: 

569 """Get packages in another `Packages` object but missing from self. 

570 

571 Parameters 

572 ---------- 

573 other : `Packages` 

574 Other packages to compare against. 

575 

576 Returns 

577 ------- 

578 missing : `dict` [`str`, `str`] 

579 Missing packages. Keys (type `str`) are package names; values 

580 (type `str`) are their versions. 

581 """ 

582 return {pkg: other[pkg] for pkg in other.keys() - self.keys()} 

583 

584 def difference(self, other: Mapping) -> dict[str, tuple[str, str]]: 

585 """Get packages in symmetric difference of self and another `Packages` 

586 object. 

587 

588 Parameters 

589 ---------- 

590 other : `Packages` 

591 Other packages to compare against. 

592 

593 Returns 

594 ------- 

595 difference : `dict` [`str`, `tuple` [ `str`, `str` ]] 

596 Packages in symmetric difference. Keys (type `str`) are package 

597 names; values (type `tuple` [ `str`, `str` ]) are their versions. 

598 """ 

599 return {pkg: (self[pkg], other[pkg]) for pkg in self.keys() & other.keys() if self[pkg] != other[pkg]} 

600 

601 

602class _BackwardsCompatibilityUnpickler(pickle.Unpickler): 

603 """Replacement for the default unpickler. 

604 

605 It is required so that users of this API can read pickle files 

606 created when the `~lsst.utils.packages.Packages` class was in a different 

607 package and known as ``lsst.base.Packages``. If this unpickler is being 

608 used then we know for sure that we must return a 

609 `~lsst.utils.packages.Packages` instance. 

610 """ 

611 

612 def find_class(self, module: str, name: str) -> type: 

613 """Return the class that should be used for unpickling. 

614 

615 This is always known to be the class in this package. 

616 """ 

617 return Packages 

618 

619 

620# Register YAML representers 

621 

622 

623def _pkg_representer(dumper: yaml.Dumper, data: Any) -> yaml.MappingNode: 

624 """Represent Packages as a simple dict""" 

625 return dumper.represent_mapping("lsst.utils.packages.Packages", data, flow_style=None) 

626 

627 

628yaml.add_representer(Packages, _pkg_representer) 

629 

630 

631def _pkg_constructor(loader: yaml.constructor.SafeConstructor, node: yaml.Node) -> Any: 

632 """Convert YAML representation back to Python class.""" 

633 yield Packages(loader.construct_mapping(node, deep=True)) # type: ignore 

634 

635 

636for loader in (yaml.Loader, yaml.CLoader, yaml.UnsafeLoader, yaml.SafeLoader, yaml.FullLoader): 

637 yaml.add_constructor("lsst.utils.packages.Packages", _pkg_constructor, Loader=loader) 

638 

639 # Register the old name with YAML. 

640 yaml.add_constructor("lsst.base.Packages", _pkg_constructor, Loader=loader)