Coverage for python/lsst/utils/packages.py: 22%

173 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-06 01:34 -0800

1# This file is part of utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11# 

12from __future__ import annotations 

13 

14""" 

15Determine which packages are being used in the system and their versions 

16""" 

17import hashlib 

18import importlib 

19import io 

20import json 

21import logging 

22import os 

23import pickle 

24import re 

25import subprocess 

26import sys 

27import types 

28from collections.abc import Mapping 

29from functools import lru_cache 

30from typing import Any, Dict, Optional, Tuple, Type 

31 

32import yaml 

33 

34log = logging.getLogger(__name__) 

35 

36__all__ = [ 

37 "getVersionFromPythonModule", 

38 "getPythonPackages", 

39 "getEnvironmentPackages", 

40 "getCondaPackages", 

41 "Packages", 

42] 

43 

44 

45# Packages used at build-time (e.g., header-only) 

46BUILDTIME = set(["boost", "eigen", "tmv"]) 

47 

48# Python modules to attempt to load so we can try to get the version 

49# We do this because the version only appears to be available from python, 

50# but we use the library 

51PYTHON = set(["galsim"]) 

52 

53# Packages that don't seem to have a mechanism for reporting the runtime 

54# version. We need to guess the version from the environment 

55ENVIRONMENT = set(["astrometry_net", "astrometry_net_data", "minuit2", "xpa"]) 

56 

57 

58def getVersionFromPythonModule(module: types.ModuleType) -> str: 

59 """Determine the version of a python module. 

60 

61 Parameters 

62 ---------- 

63 module : `module` 

64 Module for which to get version. 

65 

66 Returns 

67 ------- 

68 version : `str` 

69 

70 Raises 

71 ------ 

72 AttributeError 

73 Raised if __version__ attribute is not set. 

74 

75 Notes 

76 ----- 

77 We supplement the version with information from the 

78 ``__dependency_versions__`` (a specific variable set by LSST's 

79 `~lsst.sconsUtils` at build time) only for packages that are typically 

80 used only at build-time. 

81 """ 

82 version = module.__version__ 

83 if hasattr(module, "__dependency_versions__"): 

84 # Add build-time dependencies 

85 deps = module.__dependency_versions__ 

86 buildtime = BUILDTIME & set(deps.keys()) 

87 if buildtime: 

88 version += " with " + " ".join("%s=%s" % (pkg, deps[pkg]) for pkg in sorted(buildtime)) 

89 return str(version) 

90 

91 

92def getPythonPackages() -> Dict[str, str]: 

93 """Get imported python packages and their versions. 

94 

95 Returns 

96 ------- 

97 packages : `dict` 

98 Keys (type `str`) are package names; values (type `str`) are their 

99 versions. 

100 

101 Notes 

102 ----- 

103 We wade through `sys.modules` and attempt to determine the version for each 

104 module. Note, therefore, that we can only report on modules that have 

105 *already* been imported. 

106 

107 We don't include any module for which we cannot determine a version. 

108 """ 

109 # Attempt to import libraries that only report their version in python 

110 for module_name in PYTHON: 

111 try: 

112 importlib.import_module(module_name) 

113 except Exception: 

114 pass # It's not available, so don't care 

115 

116 packages = {"python": sys.version} 

117 # Not iterating with sys.modules.iteritems() because it's not atomic and 

118 # subject to race conditions 

119 moduleNames = list(sys.modules.keys()) 

120 for name in moduleNames: 

121 module = sys.modules[name] 

122 try: 

123 ver = getVersionFromPythonModule(module) 

124 except Exception: 

125 continue # Can't get a version from it, don't care 

126 

127 # Remove "foo.bar.version" in favor of "foo.bar" 

128 # This prevents duplication when the __init__.py includes 

129 # "from .version import *" 

130 for ending in (".version", "._version"): 

131 if name.endswith(ending): 

132 name = name[: -len(ending)] 

133 if name in packages: 

134 assert ver == packages[name] 

135 elif name in packages: 

136 assert ver == packages[name] 

137 

138 # Use LSST package names instead of python module names 

139 # This matches the names we get from the environment (i.e., EUPS) 

140 # so we can clobber these build-time versions if the environment 

141 # reveals that we're not using the packages as-built. 

142 if "lsst" in name: 

143 name = name.replace("lsst.", "").replace(".", "_") 

144 

145 packages[name] = ver 

146 

147 return packages 

148 

149 

150_eups: Optional[Any] = None # Singleton Eups object 

151 

152 

153@lru_cache(maxsize=1) 

154def getEnvironmentPackages(include_all: bool = False) -> Dict[str, str]: 

155 """Get products and their versions from the environment. 

156 

157 Parameters 

158 ---------- 

159 include_all : `bool` 

160 If `False` only returns locally-setup packages. If `True` all set 

161 up packages are returned with a version that includes any associated 

162 non-current tags. 

163 

164 Returns 

165 ------- 

166 packages : `dict` 

167 Keys (type `str`) are product names; values (type `str`) are their 

168 versions. 

169 

170 Notes 

171 ----- 

172 We use EUPS to determine the version of certain products (those that don't 

173 provide a means to determine the version any other way) and to check if 

174 uninstalled packages are being used. We only report the product/version 

175 for these packages unless ``include_all`` is `True`. 

176 """ 

177 try: 

178 from eups import Eups 

179 from eups.Product import Product 

180 except ImportError: 

181 log.warning("Unable to import eups, so cannot determine package versions from environment") 

182 return {} 

183 

184 # Cache eups object since creating it can take a while 

185 global _eups 

186 if not _eups: 

187 _eups = Eups() 

188 products = _eups.findProducts(tags=["setup"]) 

189 

190 # Get versions for things we can't determine via runtime mechanisms 

191 # XXX Should we just grab everything we can, rather than just a 

192 # predetermined set? 

193 packages = {prod.name: prod.version for prod in products if prod in ENVIRONMENT} 

194 

195 # The string 'LOCAL:' (the value of Product.LocalVersionPrefix) in the 

196 # version name indicates uninstalled code, so the version could be 

197 # different than what's being reported by the runtime environment (because 

198 # we don't tend to run "scons" every time we update some python file, 

199 # and even if we did sconsUtils probably doesn't check to see if the repo 

200 # is clean). 

201 for prod in products: 

202 if not prod.version.startswith(Product.LocalVersionPrefix): 

203 if include_all: 

204 tags = {t for t in prod.tags if t != "current"} 

205 tag_msg = " (" + " ".join(tags) + ")" if tags else "" 

206 packages[prod.name] = prod.version + tag_msg 

207 continue 

208 ver = prod.version 

209 

210 gitDir = os.path.join(prod.dir, ".git") 

211 if os.path.exists(gitDir): 

212 # get the git revision and an indication if the working copy is 

213 # clean 

214 revCmd = ["git", "--git-dir=" + gitDir, "--work-tree=" + prod.dir, "rev-parse", "HEAD"] 

215 diffCmd = [ 

216 "git", 

217 "--no-pager", 

218 "--git-dir=" + gitDir, 

219 "--work-tree=" + prod.dir, 

220 "diff", 

221 "--patch", 

222 ] 

223 try: 

224 rev = subprocess.check_output(revCmd).decode().strip() 

225 diff = subprocess.check_output(diffCmd) 

226 except Exception: 

227 ver += "@GIT_ERROR" 

228 else: 

229 ver += "@" + rev 

230 if diff: 

231 ver += "+" + hashlib.md5(diff).hexdigest() 

232 else: 

233 ver += "@NO_GIT" 

234 

235 packages[prod.name] = ver 

236 return packages 

237 

238 

239@lru_cache(maxsize=1) 

240def getCondaPackages() -> Dict[str, str]: 

241 """Get products and their versions from the conda environment. 

242 

243 Returns 

244 ------- 

245 packages : `dict` 

246 Keys (type `str`) are product names; values (type `str`) are their 

247 versions. 

248 

249 Notes 

250 ----- 

251 Returns empty result if a conda environment is not in use or can not 

252 be queried. 

253 """ 

254 try: 

255 from conda.cli.python_api import Commands, run_command 

256 except ImportError: 

257 return {} 

258 

259 # Get the installed package list 

260 versions_json = run_command(Commands.LIST, "--json") 

261 packages = {pkg["name"]: pkg["version"] for pkg in json.loads(versions_json[0])} 

262 

263 # Try to work out the conda environment name and include it as a fake 

264 # package. The "obvious" way of running "conda info --json" does give 

265 # access to the active_prefix but takes about 2 seconds to run. 

266 # The equivalent to the code above would be: 

267 # info_json = run_command(Commands.INFO, "--json") 

268 # As a comporomise look for the env name in the path to the python 

269 # executable 

270 match = re.search(r"/envs/(.*?)/bin/", sys.executable) 

271 if match: 

272 packages["conda_env"] = match.group(1) 

273 

274 return packages 

275 

276 

277class Packages(dict): 

278 """A table of packages and their versions. 

279 

280 There are a few different types of packages, and their versions are 

281 collected in different ways: 

282 

283 1. Installed Conda packages are obtained via the Conda API. Conda is 

284 not required. 

285 2. Python modules (e.g., afw, numpy; galsim is also in this group even 

286 though we only use it through the library, because no version 

287 information is currently provided through the library): we get their 

288 version from the ``__version__`` module variable. Note that this means 

289 that we're only aware of modules that have already been imported. 

290 3. Other packages provide no run-time accessible version information (e.g., 

291 astrometry_net): we get their version from interrogating the 

292 environment. Currently, that means EUPS; if EUPS is replaced or dropped 

293 then we'll need to consider an alternative means of getting this version 

294 information. 

295 4. Local versions of packages (a non-installed EUPS package, selected with 

296 ``setup -r /path/to/package``): we identify these through the 

297 environment (EUPS again) and use as a version the path supplemented with 

298 the ``git`` SHA and, if the git repo isn't clean, an MD5 of the diff. 

299 

300 These package versions are collected and stored in a Packages object, which 

301 provides useful comparison and persistence features. 

302 

303 Example usage: 

304 

305 .. code-block:: python 

306 

307 from lsst.utils.packages import Packages 

308 pkgs = Packages.fromSystem() 

309 print("Current packages:", pkgs) 

310 old = Packages.read("/path/to/packages.pickle") 

311 print("Old packages:", old) 

312 print("Missing packages compared to before:", pkgs.missing(old)) 

313 print("Extra packages compared to before:", pkgs.extra(old)) 

314 print("Different packages: ", pkgs.difference(old)) 

315 old.update(pkgs) # Include any new packages in the old 

316 old.write("/path/to/packages.pickle") 

317 

318 Parameters 

319 ---------- 

320 packages : `dict` 

321 A mapping {package: version} where both keys and values are type `str`. 

322 

323 Notes 

324 ----- 

325 This is a wrapper around a dict with some convenience methods. 

326 """ 

327 

328 formats = {".pkl": "pickle", ".pickle": "pickle", ".yaml": "yaml", ".json": "json"} 

329 

330 def __setstate__(self, state: Dict[str, Any]) -> None: 

331 # This only seems to be called for old pickle files where 

332 # the data was stored in _packages. 

333 self.update(state["_packages"]) 

334 

335 @classmethod 

336 def fromSystem(cls) -> Packages: 

337 """Construct a `Packages` by examining the system. 

338 

339 Determine packages by examining python's `sys.modules`, conda 

340 libraries and EUPS. EUPS packages take precedence over conda and 

341 general python packages. 

342 

343 Returns 

344 ------- 

345 packages : `Packages` 

346 All version package information that could be obtained. 

347 """ 

348 packages = {} 

349 packages.update(getPythonPackages()) 

350 packages.update(getCondaPackages()) 

351 packages.update(getEnvironmentPackages()) # Should be last, to override products with LOCAL versions 

352 return cls(packages) 

353 

354 @classmethod 

355 def fromBytes(cls, data: bytes, format: str) -> Packages: 

356 """Construct the object from a byte representation. 

357 

358 Parameters 

359 ---------- 

360 data : `bytes` 

361 The serialized form of this object in bytes. 

362 format : `str` 

363 The format of those bytes. Can be ``yaml``, ``json``, or 

364 ``pickle``. 

365 

366 Returns 

367 ------- 

368 packages : `Packages` 

369 The package information read from the input data. 

370 """ 

371 if format == "pickle": 

372 file = io.BytesIO(data) 

373 new = _BackwardsCompatibilityUnpickler(file).load() 

374 elif format == "yaml": 

375 new = yaml.load(data, Loader=yaml.SafeLoader) 

376 elif format == "json": 

377 new = cls(json.loads(data)) 

378 else: 

379 raise ValueError(f"Unexpected serialization format given: {format}") 

380 if not isinstance(new, cls): 

381 raise TypeError(f"Extracted object of class '{type(new)}' but expected '{cls}'") 

382 return new 

383 

384 @classmethod 

385 def read(cls, filename: str) -> Packages: 

386 """Read packages from filename. 

387 

388 Parameters 

389 ---------- 

390 filename : `str` 

391 Filename from which to read. The format is determined from the 

392 file extension. Currently support ``.pickle``, ``.pkl``, 

393 ``.json``, and ``.yaml``. 

394 

395 Returns 

396 ------- 

397 packages : `Packages` 

398 The packages information read from the file. 

399 """ 

400 _, ext = os.path.splitext(filename) 

401 if ext not in cls.formats: 

402 raise ValueError(f"Format from {ext} extension in file {filename} not recognized") 

403 with open(filename, "rb") as ff: 

404 # We assume that these classes are tiny so there is no 

405 # substantive memory impact by reading the entire file up front 

406 data = ff.read() 

407 return cls.fromBytes(data, cls.formats[ext]) 

408 

409 def toBytes(self, format: str) -> bytes: 

410 """Convert the object to a serialized bytes form using the 

411 specified format. 

412 

413 Parameters 

414 ---------- 

415 format : `str` 

416 Format to use when serializing. Can be ``yaml``, ``json``, 

417 or ``pickle``. 

418 

419 Returns 

420 ------- 

421 data : `bytes` 

422 Byte string representing the serialized object. 

423 """ 

424 if format == "pickle": 

425 return pickle.dumps(self) 

426 elif format == "yaml": 

427 return yaml.dump(self).encode("utf-8") 

428 elif format == "json": 

429 return json.dumps(self).encode("utf-8") 

430 else: 

431 raise ValueError(f"Unexpected serialization format requested: {format}") 

432 

433 def write(self, filename: str) -> None: 

434 """Write to file. 

435 

436 Parameters 

437 ---------- 

438 filename : `str` 

439 Filename to which to write. The format of the data file 

440 is determined from the file extension. Currently supports 

441 ``.pickle``, ``.json``, and ``.yaml`` 

442 """ 

443 _, ext = os.path.splitext(filename) 

444 if ext not in self.formats: 

445 raise ValueError(f"Format from {ext} extension in file {filename} not recognized") 

446 with open(filename, "wb") as ff: 

447 # Assumes that the bytes serialization of this object is 

448 # relatively small. 

449 ff.write(self.toBytes(self.formats[ext])) 

450 

451 def __str__(self) -> str: 

452 ss = "%s({\n" % self.__class__.__name__ 

453 # Sort alphabetically by module name, for convenience in reading 

454 ss += ",\n".join(f"{prod!r}:{self[prod]!r}" for prod in sorted(self)) 

455 ss += ",\n})" 

456 return ss 

457 

458 def __repr__(self) -> str: 

459 # Default repr() does not report the class name. 

460 return f"{self.__class__.__name__}({super().__repr__()})" 

461 

462 def extra(self, other: Mapping) -> Dict[str, str]: 

463 """Get packages in self but not in another `Packages` object. 

464 

465 Parameters 

466 ---------- 

467 other : `Packages` or `Mapping` 

468 Other packages to compare against. 

469 

470 Returns 

471 ------- 

472 extra : `dict` 

473 Extra packages. Keys (type `str`) are package names; values 

474 (type `str`) are their versions. 

475 """ 

476 return {pkg: self[pkg] for pkg in self.keys() - other.keys()} 

477 

478 def missing(self, other: Mapping) -> Dict[str, str]: 

479 """Get packages in another `Packages` object but missing from self. 

480 

481 Parameters 

482 ---------- 

483 other : `Packages` 

484 Other packages to compare against. 

485 

486 Returns 

487 ------- 

488 missing : `dict` [`str`, `str`] 

489 Missing packages. Keys (type `str`) are package names; values 

490 (type `str`) are their versions. 

491 """ 

492 return {pkg: other[pkg] for pkg in other.keys() - self.keys()} 

493 

494 def difference(self, other: Mapping) -> Dict[str, Tuple[str, str]]: 

495 """Get packages in symmetric difference of self and another `Packages` 

496 object. 

497 

498 Parameters 

499 ---------- 

500 other : `Packages` 

501 Other packages to compare against. 

502 

503 Returns 

504 ------- 

505 difference : `dict` [`str`, `tuple` [`str`, `str`]] 

506 Packages in symmetric difference. Keys (type `str`) are package 

507 names; values (type `tuple`[`str`, `str`]) are their versions. 

508 """ 

509 return {pkg: (self[pkg], other[pkg]) for pkg in self.keys() & other.keys() if self[pkg] != other[pkg]} 

510 

511 

512class _BackwardsCompatibilityUnpickler(pickle.Unpickler): 

513 """Replacement for the default unpickler. 

514 

515 It is required so that users of this API can read pickle files 

516 created when the `~lsst.utils.packages.Packages` class was in a different 

517 package and known as ``lsst.base.Packages``. If this unpickler is being 

518 used then we know for sure that we must return a 

519 `~lsst.utils.packages.Packages` instance. 

520 """ 

521 

522 def find_class(self, module: str, name: str) -> Type: 

523 """Return the class that should be used for unpickling. 

524 

525 This is always known to be the class in this package. 

526 """ 

527 return Packages 

528 

529 

530# Register YAML representers 

531 

532 

533def pkg_representer(dumper: yaml.Dumper, data: Any) -> yaml.MappingNode: 

534 """Represent Packages as a simple dict""" 

535 return dumper.represent_mapping("lsst.utils.packages.Packages", data, flow_style=None) 

536 

537 

538yaml.add_representer(Packages, pkg_representer) 

539 

540 

541def pkg_constructor(loader: yaml.constructor.SafeConstructor, node: yaml.Node) -> Any: 

542 yield Packages(loader.construct_mapping(node, deep=True)) # type: ignore 

543 

544 

545for loader in (yaml.Loader, yaml.CLoader, yaml.UnsafeLoader, yaml.SafeLoader, yaml.FullLoader): 

546 yaml.add_constructor("lsst.utils.packages.Packages", pkg_constructor, Loader=loader) 

547 

548 # Register the old name with YAML. 

549 yaml.add_constructor("lsst.base.Packages", pkg_constructor, Loader=loader)