Coverage for python/lsst/utils/packages.py: 22%

176 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2023-04-19 10:38 +0000

1# This file is part of utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11# 

12from __future__ import annotations 

13 

14""" 

15Determine which packages are being used in the system and their versions 

16""" 

17import hashlib 

18import importlib 

19import io 

20import json 

21import logging 

22import os 

23import pickle 

24import re 

25import subprocess 

26import sys 

27import types 

28from collections.abc import Mapping 

29from functools import lru_cache 

30from typing import Any, Dict, Optional, Tuple, Type 

31 

32import yaml 

33 

34log = logging.getLogger(__name__) 

35 

36__all__ = [ 

37 "getVersionFromPythonModule", 

38 "getPythonPackages", 

39 "getEnvironmentPackages", 

40 "getCondaPackages", 

41 "Packages", 

42] 

43 

44 

45# Packages used at build-time (e.g., header-only) 

46BUILDTIME = set(["boost", "eigen", "tmv"]) 

47 

48# Python modules to attempt to load so we can try to get the version 

49# We do this because the version only appears to be available from python, 

50# but we use the library 

51PYTHON = set(["galsim"]) 

52 

53# Packages that don't seem to have a mechanism for reporting the runtime 

54# version. We need to guess the version from the environment 

55ENVIRONMENT = set(["astrometry_net", "astrometry_net_data", "minuit2", "xpa"]) 

56 

57 

58def getVersionFromPythonModule(module: types.ModuleType) -> str: 

59 """Determine the version of a python module. 

60 

61 Parameters 

62 ---------- 

63 module : `module` 

64 Module for which to get version. 

65 

66 Returns 

67 ------- 

68 version : `str` 

69 

70 Raises 

71 ------ 

72 AttributeError 

73 Raised if __version__ attribute is not set. 

74 

75 Notes 

76 ----- 

77 We supplement the version with information from the 

78 ``__dependency_versions__`` (a specific variable set by LSST's 

79 `~lsst.sconsUtils` at build time) only for packages that are typically 

80 used only at build-time. 

81 """ 

82 version = module.__version__ 

83 if hasattr(module, "__dependency_versions__"): 

84 # Add build-time dependencies 

85 deps = module.__dependency_versions__ 

86 buildtime = BUILDTIME & set(deps.keys()) 

87 if buildtime: 

88 version += " with " + " ".join("%s=%s" % (pkg, deps[pkg]) for pkg in sorted(buildtime)) 

89 return str(version) 

90 

91 

92def getPythonPackages() -> Dict[str, str]: 

93 """Get imported python packages and their versions. 

94 

95 Returns 

96 ------- 

97 packages : `dict` 

98 Keys (type `str`) are package names; values (type `str`) are their 

99 versions. 

100 

101 Notes 

102 ----- 

103 We wade through `sys.modules` and attempt to determine the version for each 

104 module. Note, therefore, that we can only report on modules that have 

105 *already* been imported. 

106 

107 We don't include any module for which we cannot determine a version. 

108 """ 

109 # Attempt to import libraries that only report their version in python 

110 for module_name in PYTHON: 

111 try: 

112 importlib.import_module(module_name) 

113 except Exception: 

114 pass # It's not available, so don't care 

115 

116 packages = {"python": sys.version} 

117 # Not iterating with sys.modules.iteritems() because it's not atomic and 

118 # subject to race conditions 

119 moduleNames = list(sys.modules.keys()) 

120 for name in moduleNames: 

121 module = sys.modules[name] 

122 try: 

123 ver = getVersionFromPythonModule(module) 

124 except Exception: 

125 continue # Can't get a version from it, don't care 

126 

127 # Remove "foo.bar.version" in favor of "foo.bar" 

128 # This prevents duplication when the __init__.py includes 

129 # "from .version import *" 

130 modified = False 

131 for ending in (".version", "._version"): 

132 if name.endswith(ending): 

133 name = name[: -len(ending)] 

134 modified = True 

135 break 

136 

137 # Check if this name has already been registered. 

138 # This can happen if x._version is encountered before x. 

139 if name in packages: 

140 if ver != packages[name]: 

141 # There is an inconsistency between this version 

142 # and that previously calculated. Raising an exception 

143 # would go against the ethos of this package. If this 

144 # is the stripped package name we should drop it and 

145 # trust the primary version. Else if this was not 

146 # the modified version we should use it in preference. 

147 if modified: 

148 continue 

149 

150 # Use LSST package names instead of python module names 

151 # This matches the names we get from the environment (i.e., EUPS) 

152 # so we can clobber these build-time versions if the environment 

153 # reveals that we're not using the packages as-built. 

154 if name.startswith("lsst."): 

155 name = name.replace("lsst.", "").replace(".", "_") 

156 

157 packages[name] = ver 

158 

159 return packages 

160 

161 

162_eups: Optional[Any] = None # Singleton Eups object 

163 

164 

165@lru_cache(maxsize=1) 

166def getEnvironmentPackages(include_all: bool = False) -> Dict[str, str]: 

167 """Get products and their versions from the environment. 

168 

169 Parameters 

170 ---------- 

171 include_all : `bool` 

172 If `False` only returns locally-setup packages. If `True` all set 

173 up packages are returned with a version that includes any associated 

174 non-current tags. 

175 

176 Returns 

177 ------- 

178 packages : `dict` 

179 Keys (type `str`) are product names; values (type `str`) are their 

180 versions. 

181 

182 Notes 

183 ----- 

184 We use EUPS to determine the version of certain products (those that don't 

185 provide a means to determine the version any other way) and to check if 

186 uninstalled packages are being used. We only report the product/version 

187 for these packages unless ``include_all`` is `True`. 

188 """ 

189 try: 

190 from eups import Eups 

191 from eups.Product import Product 

192 except ImportError: 

193 log.warning("Unable to import eups, so cannot determine package versions from environment") 

194 return {} 

195 

196 # Cache eups object since creating it can take a while 

197 global _eups 

198 if not _eups: 

199 _eups = Eups() 

200 products = _eups.findProducts(tags=["setup"]) 

201 

202 # Get versions for things we can't determine via runtime mechanisms 

203 # XXX Should we just grab everything we can, rather than just a 

204 # predetermined set? 

205 packages = {prod.name: prod.version for prod in products if prod in ENVIRONMENT} 

206 

207 # The string 'LOCAL:' (the value of Product.LocalVersionPrefix) in the 

208 # version name indicates uninstalled code, so the version could be 

209 # different than what's being reported by the runtime environment (because 

210 # we don't tend to run "scons" every time we update some python file, 

211 # and even if we did sconsUtils probably doesn't check to see if the repo 

212 # is clean). 

213 for prod in products: 

214 if not prod.version.startswith(Product.LocalVersionPrefix): 

215 if include_all: 

216 tags = {t for t in prod.tags if t != "current"} 

217 tag_msg = " (" + " ".join(tags) + ")" if tags else "" 

218 packages[prod.name] = prod.version + tag_msg 

219 continue 

220 ver = prod.version 

221 

222 gitDir = os.path.join(prod.dir, ".git") 

223 if os.path.exists(gitDir): 

224 # get the git revision and an indication if the working copy is 

225 # clean 

226 revCmd = ["git", "--git-dir=" + gitDir, "--work-tree=" + prod.dir, "rev-parse", "HEAD"] 

227 diffCmd = [ 

228 "git", 

229 "--no-pager", 

230 "--git-dir=" + gitDir, 

231 "--work-tree=" + prod.dir, 

232 "diff", 

233 "--patch", 

234 ] 

235 try: 

236 rev = subprocess.check_output(revCmd).decode().strip() 

237 diff = subprocess.check_output(diffCmd) 

238 except Exception: 

239 ver += "@GIT_ERROR" 

240 else: 

241 ver += "@" + rev 

242 if diff: 

243 ver += "+" + hashlib.md5(diff).hexdigest() 

244 else: 

245 ver += "@NO_GIT" 

246 

247 packages[prod.name] = ver 

248 return packages 

249 

250 

251@lru_cache(maxsize=1) 

252def getCondaPackages() -> Dict[str, str]: 

253 """Get products and their versions from the conda environment. 

254 

255 Returns 

256 ------- 

257 packages : `dict` 

258 Keys (type `str`) are product names; values (type `str`) are their 

259 versions. 

260 

261 Notes 

262 ----- 

263 Returns empty result if a conda environment is not in use or can not 

264 be queried. 

265 """ 

266 try: 

267 from conda.cli.python_api import Commands, run_command 

268 except ImportError: 

269 return {} 

270 

271 # Get the installed package list 

272 versions_json = run_command(Commands.LIST, "--json") 

273 packages = {pkg["name"]: pkg["version"] for pkg in json.loads(versions_json[0])} 

274 

275 # Try to work out the conda environment name and include it as a fake 

276 # package. The "obvious" way of running "conda info --json" does give 

277 # access to the active_prefix but takes about 2 seconds to run. 

278 # The equivalent to the code above would be: 

279 # info_json = run_command(Commands.INFO, "--json") 

280 # As a comporomise look for the env name in the path to the python 

281 # executable 

282 match = re.search(r"/envs/(.*?)/bin/", sys.executable) 

283 if match: 

284 packages["conda_env"] = match.group(1) 

285 

286 return packages 

287 

288 

289class Packages(dict): 

290 """A table of packages and their versions. 

291 

292 There are a few different types of packages, and their versions are 

293 collected in different ways: 

294 

295 1. Installed Conda packages are obtained via the Conda API. Conda is 

296 not required. 

297 2. Python modules (e.g., afw, numpy; galsim is also in this group even 

298 though we only use it through the library, because no version 

299 information is currently provided through the library): we get their 

300 version from the ``__version__`` module variable. Note that this means 

301 that we're only aware of modules that have already been imported. 

302 3. Other packages provide no run-time accessible version information (e.g., 

303 astrometry_net): we get their version from interrogating the 

304 environment. Currently, that means EUPS; if EUPS is replaced or dropped 

305 then we'll need to consider an alternative means of getting this version 

306 information. 

307 4. Local versions of packages (a non-installed EUPS package, selected with 

308 ``setup -r /path/to/package``): we identify these through the 

309 environment (EUPS again) and use as a version the path supplemented with 

310 the ``git`` SHA and, if the git repo isn't clean, an MD5 of the diff. 

311 

312 These package versions are collected and stored in a Packages object, which 

313 provides useful comparison and persistence features. 

314 

315 Example usage: 

316 

317 .. code-block:: python 

318 

319 from lsst.utils.packages import Packages 

320 pkgs = Packages.fromSystem() 

321 print("Current packages:", pkgs) 

322 old = Packages.read("/path/to/packages.pickle") 

323 print("Old packages:", old) 

324 print("Missing packages compared to before:", pkgs.missing(old)) 

325 print("Extra packages compared to before:", pkgs.extra(old)) 

326 print("Different packages: ", pkgs.difference(old)) 

327 old.update(pkgs) # Include any new packages in the old 

328 old.write("/path/to/packages.pickle") 

329 

330 Parameters 

331 ---------- 

332 packages : `dict` 

333 A mapping {package: version} where both keys and values are type `str`. 

334 

335 Notes 

336 ----- 

337 This is a wrapper around a dict with some convenience methods. 

338 """ 

339 

340 formats = {".pkl": "pickle", ".pickle": "pickle", ".yaml": "yaml", ".json": "json"} 

341 

342 def __setstate__(self, state: Dict[str, Any]) -> None: 

343 # This only seems to be called for old pickle files where 

344 # the data was stored in _packages. 

345 self.update(state["_packages"]) 

346 

347 @classmethod 

348 def fromSystem(cls) -> Packages: 

349 """Construct a `Packages` by examining the system. 

350 

351 Determine packages by examining python's `sys.modules`, conda 

352 libraries and EUPS. EUPS packages take precedence over conda and 

353 general python packages. 

354 

355 Returns 

356 ------- 

357 packages : `Packages` 

358 All version package information that could be obtained. 

359 """ 

360 packages = {} 

361 packages.update(getPythonPackages()) 

362 packages.update(getCondaPackages()) 

363 packages.update(getEnvironmentPackages()) # Should be last, to override products with LOCAL versions 

364 return cls(packages) 

365 

366 @classmethod 

367 def fromBytes(cls, data: bytes, format: str) -> Packages: 

368 """Construct the object from a byte representation. 

369 

370 Parameters 

371 ---------- 

372 data : `bytes` 

373 The serialized form of this object in bytes. 

374 format : `str` 

375 The format of those bytes. Can be ``yaml``, ``json``, or 

376 ``pickle``. 

377 

378 Returns 

379 ------- 

380 packages : `Packages` 

381 The package information read from the input data. 

382 """ 

383 if format == "pickle": 

384 file = io.BytesIO(data) 

385 new = _BackwardsCompatibilityUnpickler(file).load() 

386 elif format == "yaml": 

387 new = yaml.load(data, Loader=yaml.SafeLoader) 

388 elif format == "json": 

389 new = cls(json.loads(data)) 

390 else: 

391 raise ValueError(f"Unexpected serialization format given: {format}") 

392 if not isinstance(new, cls): 

393 raise TypeError(f"Extracted object of class '{type(new)}' but expected '{cls}'") 

394 return new 

395 

396 @classmethod 

397 def read(cls, filename: str) -> Packages: 

398 """Read packages from filename. 

399 

400 Parameters 

401 ---------- 

402 filename : `str` 

403 Filename from which to read. The format is determined from the 

404 file extension. Currently support ``.pickle``, ``.pkl``, 

405 ``.json``, and ``.yaml``. 

406 

407 Returns 

408 ------- 

409 packages : `Packages` 

410 The packages information read from the file. 

411 """ 

412 _, ext = os.path.splitext(filename) 

413 if ext not in cls.formats: 

414 raise ValueError(f"Format from {ext} extension in file {filename} not recognized") 

415 with open(filename, "rb") as ff: 

416 # We assume that these classes are tiny so there is no 

417 # substantive memory impact by reading the entire file up front 

418 data = ff.read() 

419 return cls.fromBytes(data, cls.formats[ext]) 

420 

421 def toBytes(self, format: str) -> bytes: 

422 """Convert the object to a serialized bytes form using the 

423 specified format. 

424 

425 Parameters 

426 ---------- 

427 format : `str` 

428 Format to use when serializing. Can be ``yaml``, ``json``, 

429 or ``pickle``. 

430 

431 Returns 

432 ------- 

433 data : `bytes` 

434 Byte string representing the serialized object. 

435 """ 

436 if format == "pickle": 

437 return pickle.dumps(self) 

438 elif format == "yaml": 

439 return yaml.dump(self).encode("utf-8") 

440 elif format == "json": 

441 return json.dumps(self).encode("utf-8") 

442 else: 

443 raise ValueError(f"Unexpected serialization format requested: {format}") 

444 

445 def write(self, filename: str) -> None: 

446 """Write to file. 

447 

448 Parameters 

449 ---------- 

450 filename : `str` 

451 Filename to which to write. The format of the data file 

452 is determined from the file extension. Currently supports 

453 ``.pickle``, ``.json``, and ``.yaml`` 

454 """ 

455 _, ext = os.path.splitext(filename) 

456 if ext not in self.formats: 

457 raise ValueError(f"Format from {ext} extension in file {filename} not recognized") 

458 with open(filename, "wb") as ff: 

459 # Assumes that the bytes serialization of this object is 

460 # relatively small. 

461 ff.write(self.toBytes(self.formats[ext])) 

462 

463 def __str__(self) -> str: 

464 ss = "%s({\n" % self.__class__.__name__ 

465 # Sort alphabetically by module name, for convenience in reading 

466 ss += ",\n".join(f"{prod!r}:{self[prod]!r}" for prod in sorted(self)) 

467 ss += ",\n})" 

468 return ss 

469 

470 def __repr__(self) -> str: 

471 # Default repr() does not report the class name. 

472 return f"{self.__class__.__name__}({super().__repr__()})" 

473 

474 def extra(self, other: Mapping) -> Dict[str, str]: 

475 """Get packages in self but not in another `Packages` object. 

476 

477 Parameters 

478 ---------- 

479 other : `Packages` or `Mapping` 

480 Other packages to compare against. 

481 

482 Returns 

483 ------- 

484 extra : `dict` 

485 Extra packages. Keys (type `str`) are package names; values 

486 (type `str`) are their versions. 

487 """ 

488 return {pkg: self[pkg] for pkg in self.keys() - other.keys()} 

489 

490 def missing(self, other: Mapping) -> Dict[str, str]: 

491 """Get packages in another `Packages` object but missing from self. 

492 

493 Parameters 

494 ---------- 

495 other : `Packages` 

496 Other packages to compare against. 

497 

498 Returns 

499 ------- 

500 missing : `dict` [`str`, `str`] 

501 Missing packages. Keys (type `str`) are package names; values 

502 (type `str`) are their versions. 

503 """ 

504 return {pkg: other[pkg] for pkg in other.keys() - self.keys()} 

505 

506 def difference(self, other: Mapping) -> Dict[str, Tuple[str, str]]: 

507 """Get packages in symmetric difference of self and another `Packages` 

508 object. 

509 

510 Parameters 

511 ---------- 

512 other : `Packages` 

513 Other packages to compare against. 

514 

515 Returns 

516 ------- 

517 difference : `dict` [`str`, `tuple` [`str`, `str`]] 

518 Packages in symmetric difference. Keys (type `str`) are package 

519 names; values (type `tuple`[`str`, `str`]) are their versions. 

520 """ 

521 return {pkg: (self[pkg], other[pkg]) for pkg in self.keys() & other.keys() if self[pkg] != other[pkg]} 

522 

523 

524class _BackwardsCompatibilityUnpickler(pickle.Unpickler): 

525 """Replacement for the default unpickler. 

526 

527 It is required so that users of this API can read pickle files 

528 created when the `~lsst.utils.packages.Packages` class was in a different 

529 package and known as ``lsst.base.Packages``. If this unpickler is being 

530 used then we know for sure that we must return a 

531 `~lsst.utils.packages.Packages` instance. 

532 """ 

533 

534 def find_class(self, module: str, name: str) -> Type: 

535 """Return the class that should be used for unpickling. 

536 

537 This is always known to be the class in this package. 

538 """ 

539 return Packages 

540 

541 

542# Register YAML representers 

543 

544 

545def pkg_representer(dumper: yaml.Dumper, data: Any) -> yaml.MappingNode: 

546 """Represent Packages as a simple dict""" 

547 return dumper.represent_mapping("lsst.utils.packages.Packages", data, flow_style=None) 

548 

549 

550yaml.add_representer(Packages, pkg_representer) 

551 

552 

553def pkg_constructor(loader: yaml.constructor.SafeConstructor, node: yaml.Node) -> Any: 

554 yield Packages(loader.construct_mapping(node, deep=True)) # type: ignore 

555 

556 

557for loader in (yaml.Loader, yaml.CLoader, yaml.UnsafeLoader, yaml.SafeLoader, yaml.FullLoader): 

558 yaml.add_constructor("lsst.utils.packages.Packages", pkg_constructor, Loader=loader) 

559 

560 # Register the old name with YAML. 

561 yaml.add_constructor("lsst.base.Packages", pkg_constructor, Loader=loader)