Coverage for python/lsst/utils/packages.py: 22%

179 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-02 09:42 +0000

1# This file is part of utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11# 

12from __future__ import annotations 

13 

14""" 

15Determine which packages are being used in the system and their versions 

16""" 

17import hashlib 

18import importlib 

19import io 

20import json 

21import logging 

22import os 

23import pickle 

24import re 

25import subprocess 

26import sys 

27import types 

28from collections.abc import Mapping 

29from functools import lru_cache 

30from typing import Any, Dict, Optional, Tuple, Type 

31 

32import yaml 

33 

34log = logging.getLogger(__name__) 

35 

36__all__ = [ 

37 "getVersionFromPythonModule", 

38 "getPythonPackages", 

39 "getEnvironmentPackages", 

40 "getCondaPackages", 

41 "Packages", 

42] 

43 

44 

45# Packages used at build-time (e.g., header-only) 

46BUILDTIME = set(["boost", "eigen", "tmv"]) 

47 

48# Python modules to attempt to load so we can try to get the version 

49# We do this because the version only appears to be available from python, 

50# but we use the library 

51PYTHON = set(["galsim"]) 

52 

53# Packages that don't seem to have a mechanism for reporting the runtime 

54# version. We need to guess the version from the environment 

55ENVIRONMENT = set(["astrometry_net", "astrometry_net_data", "minuit2", "xpa"]) 

56 

57 

58def getVersionFromPythonModule(module: types.ModuleType) -> str: 

59 """Determine the version of a python module. 

60 

61 Parameters 

62 ---------- 

63 module : `module` 

64 Module for which to get version. 

65 

66 Returns 

67 ------- 

68 version : `str` 

69 

70 Raises 

71 ------ 

72 AttributeError 

73 Raised if __version__ attribute is not set. 

74 

75 Notes 

76 ----- 

77 We supplement the version with information from the 

78 ``__dependency_versions__`` (a specific variable set by LSST's 

79 `~lsst.sconsUtils` at build time) only for packages that are typically 

80 used only at build-time. 

81 """ 

82 version = module.__version__ 

83 if hasattr(module, "__dependency_versions__"): 

84 # Add build-time dependencies 

85 deps = module.__dependency_versions__ 

86 buildtime = BUILDTIME & set(deps.keys()) 

87 if buildtime: 

88 version += " with " + " ".join("%s=%s" % (pkg, deps[pkg]) for pkg in sorted(buildtime)) 

89 return str(version) 

90 

91 

92def getPythonPackages() -> Dict[str, str]: 

93 """Get imported python packages and their versions. 

94 

95 Returns 

96 ------- 

97 packages : `dict` 

98 Keys (type `str`) are package names; values (type `str`) are their 

99 versions. 

100 

101 Notes 

102 ----- 

103 We wade through `sys.modules` and attempt to determine the version for each 

104 module. Note, therefore, that we can only report on modules that have 

105 *already* been imported. 

106 

107 We don't include any module for which we cannot determine a version. 

108 """ 

109 # Attempt to import libraries that only report their version in python 

110 for module_name in PYTHON: 

111 try: 

112 importlib.import_module(module_name) 

113 except Exception: 

114 pass # It's not available, so don't care 

115 

116 packages = {"python": sys.version} 

117 # Not iterating with sys.modules.iteritems() because it's not atomic and 

118 # subject to race conditions 

119 moduleNames = list(sys.modules.keys()) 

120 for name in moduleNames: 

121 try: 

122 # This is the Python standard way to find a package version. 

123 # It can be slow. 

124 ver = importlib.metadata.version(name) 

125 except Exception: 

126 # Fall back to using the module itself. 

127 module = sys.modules[name] 

128 try: 

129 ver = getVersionFromPythonModule(module) 

130 except Exception: 

131 continue # Can't get a version from it, don't care 

132 

133 # Remove "foo.bar.version" in favor of "foo.bar" 

134 # This prevents duplication when the __init__.py includes 

135 # "from .version import *" 

136 modified = False 

137 for ending in (".version", "._version"): 

138 if name.endswith(ending): 

139 name = name[: -len(ending)] 

140 modified = True 

141 break 

142 

143 # Check if this name has already been registered. 

144 # This can happen if x._version is encountered before x. 

145 if name in packages: 

146 if ver != packages[name]: 

147 # There is an inconsistency between this version 

148 # and that previously calculated. Raising an exception 

149 # would go against the ethos of this package. If this 

150 # is the stripped package name we should drop it and 

151 # trust the primary version. Else if this was not 

152 # the modified version we should use it in preference. 

153 if modified: 

154 continue 

155 

156 # Use LSST package names instead of python module names 

157 # This matches the names we get from the environment (i.e., EUPS) 

158 # so we can clobber these build-time versions if the environment 

159 # reveals that we're not using the packages as-built. 

160 if name.startswith("lsst."): 

161 name = name.replace("lsst.", "").replace(".", "_") 

162 

163 packages[name] = ver 

164 

165 return packages 

166 

167 

168_eups: Optional[Any] = None # Singleton Eups object 

169 

170 

171@lru_cache(maxsize=1) 

172def getEnvironmentPackages(include_all: bool = False) -> Dict[str, str]: 

173 """Get products and their versions from the environment. 

174 

175 Parameters 

176 ---------- 

177 include_all : `bool` 

178 If `False` only returns locally-setup packages. If `True` all set 

179 up packages are returned with a version that includes any associated 

180 non-current tags. 

181 

182 Returns 

183 ------- 

184 packages : `dict` 

185 Keys (type `str`) are product names; values (type `str`) are their 

186 versions. 

187 

188 Notes 

189 ----- 

190 We use EUPS to determine the version of certain products (those that don't 

191 provide a means to determine the version any other way) and to check if 

192 uninstalled packages are being used. We only report the product/version 

193 for these packages unless ``include_all`` is `True`. 

194 """ 

195 try: 

196 from eups import Eups 

197 from eups.Product import Product 

198 except ImportError: 

199 log.warning("Unable to import eups, so cannot determine package versions from environment") 

200 return {} 

201 

202 # Cache eups object since creating it can take a while 

203 global _eups 

204 if not _eups: 

205 _eups = Eups() 

206 products = _eups.findProducts(tags=["setup"]) 

207 

208 # Get versions for things we can't determine via runtime mechanisms 

209 # XXX Should we just grab everything we can, rather than just a 

210 # predetermined set? 

211 packages = {prod.name: prod.version for prod in products if prod in ENVIRONMENT} 

212 

213 # The string 'LOCAL:' (the value of Product.LocalVersionPrefix) in the 

214 # version name indicates uninstalled code, so the version could be 

215 # different than what's being reported by the runtime environment (because 

216 # we don't tend to run "scons" every time we update some python file, 

217 # and even if we did sconsUtils probably doesn't check to see if the repo 

218 # is clean). 

219 for prod in products: 

220 if not prod.version.startswith(Product.LocalVersionPrefix): 

221 if include_all: 

222 tags = {t for t in prod.tags if t != "current"} 

223 tag_msg = " (" + " ".join(tags) + ")" if tags else "" 

224 packages[prod.name] = prod.version + tag_msg 

225 continue 

226 ver = prod.version 

227 

228 gitDir = os.path.join(prod.dir, ".git") 

229 if os.path.exists(gitDir): 

230 # get the git revision and an indication if the working copy is 

231 # clean 

232 revCmd = ["git", "--git-dir=" + gitDir, "--work-tree=" + prod.dir, "rev-parse", "HEAD"] 

233 diffCmd = [ 

234 "git", 

235 "--no-pager", 

236 "--git-dir=" + gitDir, 

237 "--work-tree=" + prod.dir, 

238 "diff", 

239 "--patch", 

240 ] 

241 try: 

242 rev = subprocess.check_output(revCmd).decode().strip() 

243 diff = subprocess.check_output(diffCmd) 

244 except Exception: 

245 ver += "@GIT_ERROR" 

246 else: 

247 ver += "@" + rev 

248 if diff: 

249 ver += "+" + hashlib.md5(diff).hexdigest() 

250 else: 

251 ver += "@NO_GIT" 

252 

253 packages[prod.name] = ver 

254 return packages 

255 

256 

257@lru_cache(maxsize=1) 

258def getCondaPackages() -> Dict[str, str]: 

259 """Get products and their versions from the conda environment. 

260 

261 Returns 

262 ------- 

263 packages : `dict` 

264 Keys (type `str`) are product names; values (type `str`) are their 

265 versions. 

266 

267 Notes 

268 ----- 

269 Returns empty result if a conda environment is not in use or can not 

270 be queried. 

271 """ 

272 try: 

273 from conda.cli.python_api import Commands, run_command 

274 except ImportError: 

275 return {} 

276 

277 # Get the installed package list 

278 versions_json = run_command(Commands.LIST, "--json") 

279 packages = {pkg["name"]: pkg["version"] for pkg in json.loads(versions_json[0])} 

280 

281 # Try to work out the conda environment name and include it as a fake 

282 # package. The "obvious" way of running "conda info --json" does give 

283 # access to the active_prefix but takes about 2 seconds to run. 

284 # The equivalent to the code above would be: 

285 # info_json = run_command(Commands.INFO, "--json") 

286 # As a comporomise look for the env name in the path to the python 

287 # executable 

288 match = re.search(r"/envs/(.*?)/bin/", sys.executable) 

289 if match: 

290 packages["conda_env"] = match.group(1) 

291 

292 return packages 

293 

294 

295class Packages(dict): 

296 """A table of packages and their versions. 

297 

298 There are a few different types of packages, and their versions are 

299 collected in different ways: 

300 

301 1. Installed Conda packages are obtained via the Conda API. Conda is 

302 not required. 

303 2. Python modules (e.g., afw, numpy; galsim is also in this group even 

304 though we only use it through the library, because no version 

305 information is currently provided through the library): we get their 

306 version from the ``__version__`` module variable. Note that this means 

307 that we're only aware of modules that have already been imported. 

308 3. Other packages provide no run-time accessible version information (e.g., 

309 astrometry_net): we get their version from interrogating the 

310 environment. Currently, that means EUPS; if EUPS is replaced or dropped 

311 then we'll need to consider an alternative means of getting this version 

312 information. 

313 4. Local versions of packages (a non-installed EUPS package, selected with 

314 ``setup -r /path/to/package``): we identify these through the 

315 environment (EUPS again) and use as a version the path supplemented with 

316 the ``git`` SHA and, if the git repo isn't clean, an MD5 of the diff. 

317 

318 These package versions are collected and stored in a Packages object, which 

319 provides useful comparison and persistence features. 

320 

321 Example usage: 

322 

323 .. code-block:: python 

324 

325 from lsst.utils.packages import Packages 

326 pkgs = Packages.fromSystem() 

327 print("Current packages:", pkgs) 

328 old = Packages.read("/path/to/packages.pickle") 

329 print("Old packages:", old) 

330 print("Missing packages compared to before:", pkgs.missing(old)) 

331 print("Extra packages compared to before:", pkgs.extra(old)) 

332 print("Different packages: ", pkgs.difference(old)) 

333 old.update(pkgs) # Include any new packages in the old 

334 old.write("/path/to/packages.pickle") 

335 

336 Parameters 

337 ---------- 

338 packages : `dict` 

339 A mapping {package: version} where both keys and values are type `str`. 

340 

341 Notes 

342 ----- 

343 This is a wrapper around a dict with some convenience methods. 

344 """ 

345 

346 formats = {".pkl": "pickle", ".pickle": "pickle", ".yaml": "yaml", ".json": "json"} 

347 

348 def __setstate__(self, state: Dict[str, Any]) -> None: 

349 # This only seems to be called for old pickle files where 

350 # the data was stored in _packages. 

351 self.update(state["_packages"]) 

352 

353 @classmethod 

354 def fromSystem(cls) -> Packages: 

355 """Construct a `Packages` by examining the system. 

356 

357 Determine packages by examining python's `sys.modules`, conda 

358 libraries and EUPS. EUPS packages take precedence over conda and 

359 general python packages. 

360 

361 Returns 

362 ------- 

363 packages : `Packages` 

364 All version package information that could be obtained. 

365 """ 

366 packages = {} 

367 packages.update(getPythonPackages()) 

368 packages.update(getCondaPackages()) 

369 packages.update(getEnvironmentPackages()) # Should be last, to override products with LOCAL versions 

370 return cls(packages) 

371 

372 @classmethod 

373 def fromBytes(cls, data: bytes, format: str) -> Packages: 

374 """Construct the object from a byte representation. 

375 

376 Parameters 

377 ---------- 

378 data : `bytes` 

379 The serialized form of this object in bytes. 

380 format : `str` 

381 The format of those bytes. Can be ``yaml``, ``json``, or 

382 ``pickle``. 

383 

384 Returns 

385 ------- 

386 packages : `Packages` 

387 The package information read from the input data. 

388 """ 

389 if format == "pickle": 

390 file = io.BytesIO(data) 

391 new = _BackwardsCompatibilityUnpickler(file).load() 

392 elif format == "yaml": 

393 new = yaml.load(data, Loader=yaml.SafeLoader) 

394 elif format == "json": 

395 new = cls(json.loads(data)) 

396 else: 

397 raise ValueError(f"Unexpected serialization format given: {format}") 

398 if not isinstance(new, cls): 

399 raise TypeError(f"Extracted object of class '{type(new)}' but expected '{cls}'") 

400 return new 

401 

402 @classmethod 

403 def read(cls, filename: str) -> Packages: 

404 """Read packages from filename. 

405 

406 Parameters 

407 ---------- 

408 filename : `str` 

409 Filename from which to read. The format is determined from the 

410 file extension. Currently support ``.pickle``, ``.pkl``, 

411 ``.json``, and ``.yaml``. 

412 

413 Returns 

414 ------- 

415 packages : `Packages` 

416 The packages information read from the file. 

417 """ 

418 _, ext = os.path.splitext(filename) 

419 if ext not in cls.formats: 

420 raise ValueError(f"Format from {ext} extension in file {filename} not recognized") 

421 with open(filename, "rb") as ff: 

422 # We assume that these classes are tiny so there is no 

423 # substantive memory impact by reading the entire file up front 

424 data = ff.read() 

425 return cls.fromBytes(data, cls.formats[ext]) 

426 

427 def toBytes(self, format: str) -> bytes: 

428 """Convert the object to a serialized bytes form using the 

429 specified format. 

430 

431 Parameters 

432 ---------- 

433 format : `str` 

434 Format to use when serializing. Can be ``yaml``, ``json``, 

435 or ``pickle``. 

436 

437 Returns 

438 ------- 

439 data : `bytes` 

440 Byte string representing the serialized object. 

441 """ 

442 if format == "pickle": 

443 return pickle.dumps(self) 

444 elif format == "yaml": 

445 return yaml.dump(self).encode("utf-8") 

446 elif format == "json": 

447 return json.dumps(self).encode("utf-8") 

448 else: 

449 raise ValueError(f"Unexpected serialization format requested: {format}") 

450 

451 def write(self, filename: str) -> None: 

452 """Write to file. 

453 

454 Parameters 

455 ---------- 

456 filename : `str` 

457 Filename to which to write. The format of the data file 

458 is determined from the file extension. Currently supports 

459 ``.pickle``, ``.json``, and ``.yaml`` 

460 """ 

461 _, ext = os.path.splitext(filename) 

462 if ext not in self.formats: 

463 raise ValueError(f"Format from {ext} extension in file {filename} not recognized") 

464 with open(filename, "wb") as ff: 

465 # Assumes that the bytes serialization of this object is 

466 # relatively small. 

467 ff.write(self.toBytes(self.formats[ext])) 

468 

469 def __str__(self) -> str: 

470 ss = "%s({\n" % self.__class__.__name__ 

471 # Sort alphabetically by module name, for convenience in reading 

472 ss += ",\n".join(f"{prod!r}:{self[prod]!r}" for prod in sorted(self)) 

473 ss += ",\n})" 

474 return ss 

475 

476 def __repr__(self) -> str: 

477 # Default repr() does not report the class name. 

478 return f"{self.__class__.__name__}({super().__repr__()})" 

479 

480 def extra(self, other: Mapping) -> Dict[str, str]: 

481 """Get packages in self but not in another `Packages` object. 

482 

483 Parameters 

484 ---------- 

485 other : `Packages` or `Mapping` 

486 Other packages to compare against. 

487 

488 Returns 

489 ------- 

490 extra : `dict` 

491 Extra packages. Keys (type `str`) are package names; values 

492 (type `str`) are their versions. 

493 """ 

494 return {pkg: self[pkg] for pkg in self.keys() - other.keys()} 

495 

496 def missing(self, other: Mapping) -> Dict[str, str]: 

497 """Get packages in another `Packages` object but missing from self. 

498 

499 Parameters 

500 ---------- 

501 other : `Packages` 

502 Other packages to compare against. 

503 

504 Returns 

505 ------- 

506 missing : `dict` [`str`, `str`] 

507 Missing packages. Keys (type `str`) are package names; values 

508 (type `str`) are their versions. 

509 """ 

510 return {pkg: other[pkg] for pkg in other.keys() - self.keys()} 

511 

512 def difference(self, other: Mapping) -> Dict[str, Tuple[str, str]]: 

513 """Get packages in symmetric difference of self and another `Packages` 

514 object. 

515 

516 Parameters 

517 ---------- 

518 other : `Packages` 

519 Other packages to compare against. 

520 

521 Returns 

522 ------- 

523 difference : `dict` [`str`, `tuple` [`str`, `str`]] 

524 Packages in symmetric difference. Keys (type `str`) are package 

525 names; values (type `tuple`[`str`, `str`]) are their versions. 

526 """ 

527 return {pkg: (self[pkg], other[pkg]) for pkg in self.keys() & other.keys() if self[pkg] != other[pkg]} 

528 

529 

530class _BackwardsCompatibilityUnpickler(pickle.Unpickler): 

531 """Replacement for the default unpickler. 

532 

533 It is required so that users of this API can read pickle files 

534 created when the `~lsst.utils.packages.Packages` class was in a different 

535 package and known as ``lsst.base.Packages``. If this unpickler is being 

536 used then we know for sure that we must return a 

537 `~lsst.utils.packages.Packages` instance. 

538 """ 

539 

540 def find_class(self, module: str, name: str) -> Type: 

541 """Return the class that should be used for unpickling. 

542 

543 This is always known to be the class in this package. 

544 """ 

545 return Packages 

546 

547 

548# Register YAML representers 

549 

550 

551def pkg_representer(dumper: yaml.Dumper, data: Any) -> yaml.MappingNode: 

552 """Represent Packages as a simple dict""" 

553 return dumper.represent_mapping("lsst.utils.packages.Packages", data, flow_style=None) 

554 

555 

556yaml.add_representer(Packages, pkg_representer) 

557 

558 

559def pkg_constructor(loader: yaml.constructor.SafeConstructor, node: yaml.Node) -> Any: 

560 yield Packages(loader.construct_mapping(node, deep=True)) # type: ignore 

561 

562 

563for loader in (yaml.Loader, yaml.CLoader, yaml.UnsafeLoader, yaml.SafeLoader, yaml.FullLoader): 

564 yaml.add_constructor("lsst.utils.packages.Packages", pkg_constructor, Loader=loader) 

565 

566 # Register the old name with YAML. 

567 yaml.add_constructor("lsst.base.Packages", pkg_constructor, Loader=loader)