Coverage for python/lsst/utils/packages.py: 21%

198 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-01 02:29 -0700

1# This file is part of utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11# 

12from __future__ import annotations 

13 

14""" 

15Determine which packages are being used in the system and their versions 

16""" 

17import hashlib 

18import importlib 

19import io 

20import json 

21import logging 

22import os 

23import pickle 

24import re 

25import subprocess 

26import sys 

27import types 

28from collections.abc import Mapping 

29from functools import lru_cache 

30from typing import Any, Dict, Optional, Tuple, Type 

31 

32import yaml 

33 

34log = logging.getLogger(__name__) 

35 

36__all__ = [ 

37 "getVersionFromPythonModule", 

38 "getPythonPackages", 

39 "getEnvironmentPackages", 

40 "getCondaPackages", 

41 "Packages", 

42] 

43 

44 

45# Packages used at build-time (e.g., header-only) 

46BUILDTIME = set(["boost", "eigen", "tmv"]) 

47 

48# Python modules to attempt to load so we can try to get the version 

49# We do this because the version only appears to be available from python, 

50# but we use the library 

51PYTHON = set(["galsim"]) 

52 

53# Packages that don't seem to have a mechanism for reporting the runtime 

54# version. We need to guess the version from the environment 

55ENVIRONMENT = set(["astrometry_net", "astrometry_net_data", "minuit2", "xpa"]) 

56 

57try: 

58 # Python 3.10 includes a list of standard library modules. 

59 # These will all have the same version number as Python itself. 

60 _STDLIB = sys.stdlib_module_names 

61except AttributeError: 

62 _STDLIB = frozenset() 

63 

64 

65def getVersionFromPythonModule(module: types.ModuleType) -> str: 

66 """Determine the version of a python module. 

67 

68 Parameters 

69 ---------- 

70 module : `module` 

71 Module for which to get version. 

72 

73 Returns 

74 ------- 

75 version : `str` 

76 

77 Raises 

78 ------ 

79 AttributeError 

80 Raised if __version__ attribute is not set. 

81 

82 Notes 

83 ----- 

84 We supplement the version with information from the 

85 ``__dependency_versions__`` (a specific variable set by LSST's 

86 `~lsst.sconsUtils` at build time) only for packages that are typically 

87 used only at build-time. 

88 """ 

89 version = module.__version__ 

90 if hasattr(module, "__dependency_versions__"): 

91 # Add build-time dependencies 

92 deps = module.__dependency_versions__ 

93 buildtime = BUILDTIME & set(deps.keys()) 

94 if buildtime: 

95 version += " with " + " ".join("%s=%s" % (pkg, deps[pkg]) for pkg in sorted(buildtime)) 

96 return str(version) 

97 

98 

99def getPythonPackages() -> Dict[str, str]: 

100 """Get imported python packages and their versions. 

101 

102 Returns 

103 ------- 

104 packages : `dict` 

105 Keys (type `str`) are package names; values (type `str`) are their 

106 versions. 

107 

108 Notes 

109 ----- 

110 We wade through `sys.modules` and attempt to determine the version for each 

111 module. Note, therefore, that we can only report on modules that have 

112 *already* been imported. 

113 

114 We don't include any module for which we cannot determine a version. 

115 """ 

116 # Attempt to import libraries that only report their version in python 

117 for module_name in PYTHON: 

118 try: 

119 importlib.import_module(module_name) 

120 except Exception: 

121 pass # It's not available, so don't care 

122 

123 packages = {"python": sys.version} 

124 

125 # Not iterating with sys.modules.iteritems() because it's not atomic and 

126 # subject to race conditions 

127 module_names = list(sys.modules.keys()) 

128 

129 # Use knowledge of package hierarchy to find the versions rather than 

130 # using each name independently. Group all the module names into the 

131 # hierarchy, splitting on dot, and skipping any component that starts 

132 # with an underscore. 

133 

134 # Sorting the module names gives us: 

135 # lsst 

136 # lsst.afw 

137 # lsst.afw.cameraGeom 

138 # ... 

139 # lsst.daf 

140 # lsst.daf.butler 

141 # 

142 # and so we can use knowledge of the previous version to inform whether 

143 # we need to look at the subsequent line. 

144 n_versions = 0 

145 n_checked = 0 

146 previous = "" 

147 for name in sorted(module_names): 

148 if name.startswith("_") or "._" in name: 

149 # Refers to a private module so we can ignore it and assume 

150 # version has been lifted into parent or, if top level, not 

151 # relevant for versioning. This applies also to standard library 

152 # packages such as _abc and __future__. 

153 continue 

154 

155 if name in _STDLIB: 

156 # Assign all standard library packages the python version 

157 # since they almost all lack explicit versions. 

158 packages[name] = sys.version 

159 previous = name 

160 continue 

161 

162 if name.startswith(previous + ".") and previous in packages: 

163 # Already have this version. Use the same previous name 

164 # for the line after this. 

165 continue 

166 

167 # Look for a version. 

168 ver = _get_python_package_version(name, packages) 

169 

170 n_checked += 1 

171 if ver is not None: 

172 n_versions += 1 

173 previous = name 

174 

175 log.debug( 

176 "Given %d modules but checked %d in hierarchy and found versions for %d", 

177 len(module_names), 

178 n_checked, 

179 n_versions, 

180 ) 

181 

182 for name in list(packages.keys()): 

183 # Use LSST package names instead of python module names 

184 # This matches the names we get from the environment (i.e., EUPS) 

185 # so we can clobber these build-time versions if the environment 

186 # reveals that we're not using the packages as-built. 

187 if name.startswith("lsst."): 

188 new_name = name.replace("lsst.", "").replace(".", "_") 

189 packages[new_name] = packages[name] 

190 del packages[name] 

191 

192 return packages 

193 

194 

195def _get_python_package_version(name: str, packages: dict[str, str]) -> str | None: 

196 """Given a package or module name, try to determine the version. 

197 

198 Parameters 

199 ---------- 

200 name : `str` 

201 The name of the package or module to try. 

202 packages : `dict`[`str`, `str`] 

203 A dictionary mapping a name to a version. Modified in place. 

204 The key used might not match exactly the given key. 

205 

206 Returns 

207 ------- 

208 ver : `str` or `None` 

209 The version string stored in ``packages``. Nothing is stored if the 

210 value here is `None`. 

211 """ 

212 try: 

213 # This is the Python standard way to find a package version. 

214 # It can be slow. 

215 ver = importlib.metadata.version(name) 

216 except Exception: 

217 # Fall back to using the module itself. There is no guarantee 

218 # that "a" exists for module "a.b" so if hierarchy has been expanded 

219 # this might fail. Check first. 

220 if name not in sys.modules: 

221 return None 

222 module = sys.modules[name] 

223 try: 

224 ver = getVersionFromPythonModule(module) 

225 except Exception: 

226 return None # Can't get a version from it, don't care 

227 

228 # Update the package information. 

229 if ver is not None: 

230 packages[name] = ver 

231 

232 return ver 

233 

234 

235_eups: Optional[Any] = None # Singleton Eups object 

236 

237 

238@lru_cache(maxsize=1) 

239def getEnvironmentPackages(include_all: bool = False) -> Dict[str, str]: 

240 """Get products and their versions from the environment. 

241 

242 Parameters 

243 ---------- 

244 include_all : `bool` 

245 If `False` only returns locally-setup packages. If `True` all set 

246 up packages are returned with a version that includes any associated 

247 non-current tags. 

248 

249 Returns 

250 ------- 

251 packages : `dict` 

252 Keys (type `str`) are product names; values (type `str`) are their 

253 versions. 

254 

255 Notes 

256 ----- 

257 We use EUPS to determine the version of certain products (those that don't 

258 provide a means to determine the version any other way) and to check if 

259 uninstalled packages are being used. We only report the product/version 

260 for these packages unless ``include_all`` is `True`. 

261 """ 

262 try: 

263 from eups import Eups 

264 from eups.Product import Product 

265 except ImportError: 

266 log.warning("Unable to import eups, so cannot determine package versions from environment") 

267 return {} 

268 

269 # Cache eups object since creating it can take a while 

270 global _eups 

271 if not _eups: 

272 _eups = Eups() 

273 products = _eups.findProducts(tags=["setup"]) 

274 

275 # Get versions for things we can't determine via runtime mechanisms 

276 # XXX Should we just grab everything we can, rather than just a 

277 # predetermined set? 

278 packages = {prod.name: prod.version for prod in products if prod in ENVIRONMENT} 

279 

280 # The string 'LOCAL:' (the value of Product.LocalVersionPrefix) in the 

281 # version name indicates uninstalled code, so the version could be 

282 # different than what's being reported by the runtime environment (because 

283 # we don't tend to run "scons" every time we update some python file, 

284 # and even if we did sconsUtils probably doesn't check to see if the repo 

285 # is clean). 

286 for prod in products: 

287 if not prod.version.startswith(Product.LocalVersionPrefix): 

288 if include_all: 

289 tags = {t for t in prod.tags if t != "current"} 

290 tag_msg = " (" + " ".join(tags) + ")" if tags else "" 

291 packages[prod.name] = prod.version + tag_msg 

292 continue 

293 ver = prod.version 

294 

295 gitDir = os.path.join(prod.dir, ".git") 

296 if os.path.exists(gitDir): 

297 # get the git revision and an indication if the working copy is 

298 # clean 

299 revCmd = ["git", "--git-dir=" + gitDir, "--work-tree=" + prod.dir, "rev-parse", "HEAD"] 

300 diffCmd = [ 

301 "git", 

302 "--no-pager", 

303 "--git-dir=" + gitDir, 

304 "--work-tree=" + prod.dir, 

305 "diff", 

306 "--patch", 

307 ] 

308 try: 

309 rev = subprocess.check_output(revCmd).decode().strip() 

310 diff = subprocess.check_output(diffCmd) 

311 except Exception: 

312 ver += "@GIT_ERROR" 

313 else: 

314 ver += "@" + rev 

315 if diff: 

316 ver += "+" + hashlib.md5(diff).hexdigest() 

317 else: 

318 ver += "@NO_GIT" 

319 

320 packages[prod.name] = ver 

321 return packages 

322 

323 

324@lru_cache(maxsize=1) 

325def getCondaPackages() -> Dict[str, str]: 

326 """Get products and their versions from the conda environment. 

327 

328 Returns 

329 ------- 

330 packages : `dict` 

331 Keys (type `str`) are product names; values (type `str`) are their 

332 versions. 

333 

334 Notes 

335 ----- 

336 Returns empty result if a conda environment is not in use or can not 

337 be queried. 

338 """ 

339 try: 

340 from conda.cli.python_api import Commands, run_command 

341 except ImportError: 

342 return {} 

343 

344 # Get the installed package list 

345 versions_json = run_command(Commands.LIST, "--json") 

346 packages = {pkg["name"]: pkg["version"] for pkg in json.loads(versions_json[0])} 

347 

348 # Try to work out the conda environment name and include it as a fake 

349 # package. The "obvious" way of running "conda info --json" does give 

350 # access to the active_prefix but takes about 2 seconds to run. 

351 # The equivalent to the code above would be: 

352 # info_json = run_command(Commands.INFO, "--json") 

353 # As a comporomise look for the env name in the path to the python 

354 # executable 

355 match = re.search(r"/envs/(.*?)/bin/", sys.executable) 

356 if match: 

357 packages["conda_env"] = match.group(1) 

358 

359 return packages 

360 

361 

362class Packages(dict): 

363 """A table of packages and their versions. 

364 

365 There are a few different types of packages, and their versions are 

366 collected in different ways: 

367 

368 1. Installed Conda packages are obtained via the Conda API. Conda is 

369 not required. 

370 2. Python modules (e.g., afw, numpy; galsim is also in this group even 

371 though we only use it through the library, because no version 

372 information is currently provided through the library): we get their 

373 version from the ``__version__`` module variable. Note that this means 

374 that we're only aware of modules that have already been imported. 

375 3. Other packages provide no run-time accessible version information (e.g., 

376 astrometry_net): we get their version from interrogating the 

377 environment. Currently, that means EUPS; if EUPS is replaced or dropped 

378 then we'll need to consider an alternative means of getting this version 

379 information. 

380 4. Local versions of packages (a non-installed EUPS package, selected with 

381 ``setup -r /path/to/package``): we identify these through the 

382 environment (EUPS again) and use as a version the path supplemented with 

383 the ``git`` SHA and, if the git repo isn't clean, an MD5 of the diff. 

384 

385 These package versions are collected and stored in a Packages object, which 

386 provides useful comparison and persistence features. 

387 

388 Example usage: 

389 

390 .. code-block:: python 

391 

392 from lsst.utils.packages import Packages 

393 pkgs = Packages.fromSystem() 

394 print("Current packages:", pkgs) 

395 old = Packages.read("/path/to/packages.pickle") 

396 print("Old packages:", old) 

397 print("Missing packages compared to before:", pkgs.missing(old)) 

398 print("Extra packages compared to before:", pkgs.extra(old)) 

399 print("Different packages: ", pkgs.difference(old)) 

400 old.update(pkgs) # Include any new packages in the old 

401 old.write("/path/to/packages.pickle") 

402 

403 Parameters 

404 ---------- 

405 packages : `dict` 

406 A mapping {package: version} where both keys and values are type `str`. 

407 

408 Notes 

409 ----- 

410 This is a wrapper around a dict with some convenience methods. 

411 """ 

412 

413 formats = {".pkl": "pickle", ".pickle": "pickle", ".yaml": "yaml", ".json": "json"} 

414 

415 def __setstate__(self, state: Dict[str, Any]) -> None: 

416 # This only seems to be called for old pickle files where 

417 # the data was stored in _packages. 

418 self.update(state["_packages"]) 

419 

420 @classmethod 

421 def fromSystem(cls) -> Packages: 

422 """Construct a `Packages` by examining the system. 

423 

424 Determine packages by examining python's `sys.modules`, conda 

425 libraries and EUPS. EUPS packages take precedence over conda and 

426 general python packages. 

427 

428 Returns 

429 ------- 

430 packages : `Packages` 

431 All version package information that could be obtained. 

432 """ 

433 packages = {} 

434 packages.update(getPythonPackages()) 

435 packages.update(getCondaPackages()) 

436 packages.update(getEnvironmentPackages()) # Should be last, to override products with LOCAL versions 

437 return cls(packages) 

438 

439 @classmethod 

440 def fromBytes(cls, data: bytes, format: str) -> Packages: 

441 """Construct the object from a byte representation. 

442 

443 Parameters 

444 ---------- 

445 data : `bytes` 

446 The serialized form of this object in bytes. 

447 format : `str` 

448 The format of those bytes. Can be ``yaml``, ``json``, or 

449 ``pickle``. 

450 

451 Returns 

452 ------- 

453 packages : `Packages` 

454 The package information read from the input data. 

455 """ 

456 if format == "pickle": 

457 file = io.BytesIO(data) 

458 new = _BackwardsCompatibilityUnpickler(file).load() 

459 elif format == "yaml": 

460 new = yaml.load(data, Loader=yaml.SafeLoader) 

461 elif format == "json": 

462 new = cls(json.loads(data)) 

463 else: 

464 raise ValueError(f"Unexpected serialization format given: {format}") 

465 if not isinstance(new, cls): 

466 raise TypeError(f"Extracted object of class '{type(new)}' but expected '{cls}'") 

467 return new 

468 

469 @classmethod 

470 def read(cls, filename: str) -> Packages: 

471 """Read packages from filename. 

472 

473 Parameters 

474 ---------- 

475 filename : `str` 

476 Filename from which to read. The format is determined from the 

477 file extension. Currently support ``.pickle``, ``.pkl``, 

478 ``.json``, and ``.yaml``. 

479 

480 Returns 

481 ------- 

482 packages : `Packages` 

483 The packages information read from the file. 

484 """ 

485 _, ext = os.path.splitext(filename) 

486 if ext not in cls.formats: 

487 raise ValueError(f"Format from {ext} extension in file {filename} not recognized") 

488 with open(filename, "rb") as ff: 

489 # We assume that these classes are tiny so there is no 

490 # substantive memory impact by reading the entire file up front 

491 data = ff.read() 

492 return cls.fromBytes(data, cls.formats[ext]) 

493 

494 def toBytes(self, format: str) -> bytes: 

495 """Convert the object to a serialized bytes form using the 

496 specified format. 

497 

498 Parameters 

499 ---------- 

500 format : `str` 

501 Format to use when serializing. Can be ``yaml``, ``json``, 

502 or ``pickle``. 

503 

504 Returns 

505 ------- 

506 data : `bytes` 

507 Byte string representing the serialized object. 

508 """ 

509 if format == "pickle": 

510 return pickle.dumps(self) 

511 elif format == "yaml": 

512 return yaml.dump(self).encode("utf-8") 

513 elif format == "json": 

514 return json.dumps(self).encode("utf-8") 

515 else: 

516 raise ValueError(f"Unexpected serialization format requested: {format}") 

517 

518 def write(self, filename: str) -> None: 

519 """Write to file. 

520 

521 Parameters 

522 ---------- 

523 filename : `str` 

524 Filename to which to write. The format of the data file 

525 is determined from the file extension. Currently supports 

526 ``.pickle``, ``.json``, and ``.yaml`` 

527 """ 

528 _, ext = os.path.splitext(filename) 

529 if ext not in self.formats: 

530 raise ValueError(f"Format from {ext} extension in file {filename} not recognized") 

531 with open(filename, "wb") as ff: 

532 # Assumes that the bytes serialization of this object is 

533 # relatively small. 

534 ff.write(self.toBytes(self.formats[ext])) 

535 

536 def __str__(self) -> str: 

537 ss = "%s({\n" % self.__class__.__name__ 

538 # Sort alphabetically by module name, for convenience in reading 

539 ss += ",\n".join(f"{prod!r}:{self[prod]!r}" for prod in sorted(self)) 

540 ss += ",\n})" 

541 return ss 

542 

543 def __repr__(self) -> str: 

544 # Default repr() does not report the class name. 

545 return f"{self.__class__.__name__}({super().__repr__()})" 

546 

547 def extra(self, other: Mapping) -> Dict[str, str]: 

548 """Get packages in self but not in another `Packages` object. 

549 

550 Parameters 

551 ---------- 

552 other : `Packages` or `Mapping` 

553 Other packages to compare against. 

554 

555 Returns 

556 ------- 

557 extra : `dict` 

558 Extra packages. Keys (type `str`) are package names; values 

559 (type `str`) are their versions. 

560 """ 

561 return {pkg: self[pkg] for pkg in self.keys() - other.keys()} 

562 

563 def missing(self, other: Mapping) -> Dict[str, str]: 

564 """Get packages in another `Packages` object but missing from self. 

565 

566 Parameters 

567 ---------- 

568 other : `Packages` 

569 Other packages to compare against. 

570 

571 Returns 

572 ------- 

573 missing : `dict` [`str`, `str`] 

574 Missing packages. Keys (type `str`) are package names; values 

575 (type `str`) are their versions. 

576 """ 

577 return {pkg: other[pkg] for pkg in other.keys() - self.keys()} 

578 

579 def difference(self, other: Mapping) -> Dict[str, Tuple[str, str]]: 

580 """Get packages in symmetric difference of self and another `Packages` 

581 object. 

582 

583 Parameters 

584 ---------- 

585 other : `Packages` 

586 Other packages to compare against. 

587 

588 Returns 

589 ------- 

590 difference : `dict` [`str`, `tuple` [`str`, `str`]] 

591 Packages in symmetric difference. Keys (type `str`) are package 

592 names; values (type `tuple`[`str`, `str`]) are their versions. 

593 """ 

594 return {pkg: (self[pkg], other[pkg]) for pkg in self.keys() & other.keys() if self[pkg] != other[pkg]} 

595 

596 

597class _BackwardsCompatibilityUnpickler(pickle.Unpickler): 

598 """Replacement for the default unpickler. 

599 

600 It is required so that users of this API can read pickle files 

601 created when the `~lsst.utils.packages.Packages` class was in a different 

602 package and known as ``lsst.base.Packages``. If this unpickler is being 

603 used then we know for sure that we must return a 

604 `~lsst.utils.packages.Packages` instance. 

605 """ 

606 

607 def find_class(self, module: str, name: str) -> Type: 

608 """Return the class that should be used for unpickling. 

609 

610 This is always known to be the class in this package. 

611 """ 

612 return Packages 

613 

614 

615# Register YAML representers 

616 

617 

618def pkg_representer(dumper: yaml.Dumper, data: Any) -> yaml.MappingNode: 

619 """Represent Packages as a simple dict""" 

620 return dumper.represent_mapping("lsst.utils.packages.Packages", data, flow_style=None) 

621 

622 

623yaml.add_representer(Packages, pkg_representer) 

624 

625 

626def pkg_constructor(loader: yaml.constructor.SafeConstructor, node: yaml.Node) -> Any: 

627 yield Packages(loader.construct_mapping(node, deep=True)) # type: ignore 

628 

629 

630for loader in (yaml.Loader, yaml.CLoader, yaml.UnsafeLoader, yaml.SafeLoader, yaml.FullLoader): 

631 yaml.add_constructor("lsst.utils.packages.Packages", pkg_constructor, Loader=loader) 

632 

633 # Register the old name with YAML. 

634 yaml.add_constructor("lsst.base.Packages", pkg_constructor, Loader=loader)