Coverage for python/lsst/utils/packages.py: 25%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

168 statements  

1# This file is part of utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11# 

12from __future__ import annotations 

13 

14""" 

15Determine which packages are being used in the system and their versions 

16""" 

17import hashlib 

18import importlib 

19import io 

20import json 

21import logging 

22import os 

23import pickle 

24import re 

25import subprocess 

26import sys 

27import types 

28from collections.abc import Mapping 

29from functools import lru_cache 

30from typing import Any, Dict, Tuple, Type 

31 

32import yaml 

33 

34log = logging.getLogger(__name__) 

35 

36__all__ = [ 

37 "getVersionFromPythonModule", 

38 "getPythonPackages", 

39 "getEnvironmentPackages", 

40 "getCondaPackages", 

41 "Packages", 

42] 

43 

44 

45# Packages used at build-time (e.g., header-only) 

46BUILDTIME = set(["boost", "eigen", "tmv"]) 

47 

48# Python modules to attempt to load so we can try to get the version 

49# We do this because the version only appears to be available from python, 

50# but we use the library 

51PYTHON = set(["galsim"]) 

52 

53# Packages that don't seem to have a mechanism for reporting the runtime 

54# version. We need to guess the version from the environment 

55ENVIRONMENT = set(["astrometry_net", "astrometry_net_data", "minuit2", "xpa"]) 

56 

57 

58def getVersionFromPythonModule(module: types.ModuleType) -> str: 

59 """Determine the version of a python module. 

60 

61 Parameters 

62 ---------- 

63 module : `module` 

64 Module for which to get version. 

65 

66 Returns 

67 ------- 

68 version : `str` 

69 

70 Raises 

71 ------ 

72 AttributeError 

73 Raised if __version__ attribute is not set. 

74 

75 Notes 

76 ----- 

77 We supplement the version with information from the 

78 ``__dependency_versions__`` (a specific variable set by LSST's 

79 `~lsst.sconsUtils` at build time) only for packages that are typically 

80 used only at build-time. 

81 """ 

82 version = module.__version__ 

83 if hasattr(module, "__dependency_versions__"): 

84 # Add build-time dependencies 

85 deps = module.__dependency_versions__ 

86 buildtime = BUILDTIME & set(deps.keys()) 

87 if buildtime: 

88 version += " with " + " ".join("%s=%s" % (pkg, deps[pkg]) for pkg in sorted(buildtime)) 

89 return str(version) 

90 

91 

92def getPythonPackages() -> Dict[str, str]: 

93 """Get imported python packages and their versions. 

94 

95 Returns 

96 ------- 

97 packages : `dict` 

98 Keys (type `str`) are package names; values (type `str`) are their 

99 versions. 

100 

101 Notes 

102 ----- 

103 We wade through `sys.modules` and attempt to determine the version for each 

104 module. Note, therefore, that we can only report on modules that have 

105 *already* been imported. 

106 

107 We don't include any module for which we cannot determine a version. 

108 """ 

109 # Attempt to import libraries that only report their version in python 

110 for module_name in PYTHON: 

111 try: 

112 importlib.import_module(module_name) 

113 except Exception: 

114 pass # It's not available, so don't care 

115 

116 packages = {"python": sys.version} 

117 # Not iterating with sys.modules.iteritems() because it's not atomic and 

118 # subject to race conditions 

119 moduleNames = list(sys.modules.keys()) 

120 for name in moduleNames: 

121 module = sys.modules[name] 

122 try: 

123 ver = getVersionFromPythonModule(module) 

124 except Exception: 

125 continue # Can't get a version from it, don't care 

126 

127 # Remove "foo.bar.version" in favor of "foo.bar" 

128 # This prevents duplication when the __init__.py includes 

129 # "from .version import *" 

130 for ending in (".version", "._version"): 

131 if name.endswith(ending): 

132 name = name[: -len(ending)] 

133 if name in packages: 

134 assert ver == packages[name] 

135 elif name in packages: 

136 assert ver == packages[name] 

137 

138 # Use LSST package names instead of python module names 

139 # This matches the names we get from the environment (i.e., EUPS) 

140 # so we can clobber these build-time versions if the environment 

141 # reveals that we're not using the packages as-built. 

142 if "lsst" in name: 

143 name = name.replace("lsst.", "").replace(".", "_") 

144 

145 packages[name] = ver 

146 

147 return packages 

148 

149 

150_eups = None # Singleton Eups object 

151 

152 

153@lru_cache(maxsize=1) 

154def getEnvironmentPackages() -> Dict[str, str]: 

155 """Get products and their versions from the environment. 

156 

157 Returns 

158 ------- 

159 packages : `dict` 

160 Keys (type `str`) are product names; values (type `str`) are their 

161 versions. 

162 

163 Notes 

164 ----- 

165 We use EUPS to determine the version of certain products (those that don't 

166 provide a means to determine the version any other way) and to check if 

167 uninstalled packages are being used. We only report the product/version 

168 for these packages. 

169 """ 

170 try: 

171 from eups import Eups 

172 from eups.Product import Product 

173 except ImportError: 

174 log.warning("Unable to import eups, so cannot determine package versions from environment") 

175 return {} 

176 

177 # Cache eups object since creating it can take a while 

178 global _eups 

179 if not _eups: 

180 _eups = Eups() 

181 products = _eups.findProducts(tags=["setup"]) 

182 

183 # Get versions for things we can't determine via runtime mechanisms 

184 # XXX Should we just grab everything we can, rather than just a 

185 # predetermined set? 

186 packages = {prod.name: prod.version for prod in products if prod in ENVIRONMENT} 

187 

188 # The string 'LOCAL:' (the value of Product.LocalVersionPrefix) in the 

189 # version name indicates uninstalled code, so the version could be 

190 # different than what's being reported by the runtime environment (because 

191 # we don't tend to run "scons" every time we update some python file, 

192 # and even if we did sconsUtils probably doesn't check to see if the repo 

193 # is clean). 

194 for prod in products: 

195 if not prod.version.startswith(Product.LocalVersionPrefix): 

196 continue 

197 ver = prod.version 

198 

199 gitDir = os.path.join(prod.dir, ".git") 

200 if os.path.exists(gitDir): 

201 # get the git revision and an indication if the working copy is 

202 # clean 

203 revCmd = ["git", "--git-dir=" + gitDir, "--work-tree=" + prod.dir, "rev-parse", "HEAD"] 

204 diffCmd = [ 

205 "git", 

206 "--no-pager", 

207 "--git-dir=" + gitDir, 

208 "--work-tree=" + prod.dir, 

209 "diff", 

210 "--patch", 

211 ] 

212 try: 

213 rev = subprocess.check_output(revCmd).decode().strip() 

214 diff = subprocess.check_output(diffCmd) 

215 except Exception: 

216 ver += "@GIT_ERROR" 

217 else: 

218 ver += "@" + rev 

219 if diff: 

220 ver += "+" + hashlib.md5(diff).hexdigest() 

221 else: 

222 ver += "@NO_GIT" 

223 

224 packages[prod.name] = ver 

225 return packages 

226 

227 

228@lru_cache(maxsize=1) 

229def getCondaPackages() -> Dict[str, str]: 

230 """Get products and their versions from the conda environment. 

231 

232 Returns 

233 ------- 

234 packages : `dict` 

235 Keys (type `str`) are product names; values (type `str`) are their 

236 versions. 

237 

238 Notes 

239 ----- 

240 Returns empty result if a conda environment is not in use or can not 

241 be queried. 

242 """ 

243 try: 

244 from conda.cli.python_api import Commands, run_command 

245 except ImportError: 

246 return {} 

247 

248 # Get the installed package list 

249 versions_json = run_command(Commands.LIST, "--json") 

250 packages = {pkg["name"]: pkg["version"] for pkg in json.loads(versions_json[0])} 

251 

252 # Try to work out the conda environment name and include it as a fake 

253 # package. The "obvious" way of running "conda info --json" does give 

254 # access to the active_prefix but takes about 2 seconds to run. 

255 # The equivalent to the code above would be: 

256 # info_json = run_command(Commands.INFO, "--json") 

257 # As a comporomise look for the env name in the path to the python 

258 # executable 

259 match = re.search(r"/envs/(.*?)/bin/", sys.executable) 

260 if match: 

261 packages["conda_env"] = match.group(1) 

262 

263 return packages 

264 

265 

266class Packages(dict): 

267 """A table of packages and their versions. 

268 

269 There are a few different types of packages, and their versions are 

270 collected in different ways: 

271 

272 1. Installed Conda packages are obtained via the Conda API. Conda is 

273 not required. 

274 2. Python modules (e.g., afw, numpy; galsim is also in this group even 

275 though we only use it through the library, because no version 

276 information is currently provided through the library): we get their 

277 version from the ``__version__`` module variable. Note that this means 

278 that we're only aware of modules that have already been imported. 

279 3. Other packages provide no run-time accessible version information (e.g., 

280 astrometry_net): we get their version from interrogating the 

281 environment. Currently, that means EUPS; if EUPS is replaced or dropped 

282 then we'll need to consider an alternative means of getting this version 

283 information. 

284 4. Local versions of packages (a non-installed EUPS package, selected with 

285 ``setup -r /path/to/package``): we identify these through the 

286 environment (EUPS again) and use as a version the path supplemented with 

287 the ``git`` SHA and, if the git repo isn't clean, an MD5 of the diff. 

288 

289 These package versions are collected and stored in a Packages object, which 

290 provides useful comparison and persistence features. 

291 

292 Example usage: 

293 

294 .. code-block:: python 

295 

296 from lsst.utils.packages import Packages 

297 pkgs = Packages.fromSystem() 

298 print("Current packages:", pkgs) 

299 old = Packages.read("/path/to/packages.pickle") 

300 print("Old packages:", old) 

301 print("Missing packages compared to before:", pkgs.missing(old)) 

302 print("Extra packages compared to before:", pkgs.extra(old)) 

303 print("Different packages: ", pkgs.difference(old)) 

304 old.update(pkgs) # Include any new packages in the old 

305 old.write("/path/to/packages.pickle") 

306 

307 Parameters 

308 ---------- 

309 packages : `dict` 

310 A mapping {package: version} where both keys and values are type `str`. 

311 

312 Notes 

313 ----- 

314 This is a wrapper around a dict with some convenience methods. 

315 """ 

316 

317 formats = {".pkl": "pickle", ".pickle": "pickle", ".yaml": "yaml", ".json": "json"} 

318 

319 def __setstate__(self, state: Dict[str, Any]) -> None: 

320 # This only seems to be called for old pickle files where 

321 # the data was stored in _packages. 

322 self.update(state["_packages"]) 

323 

324 @classmethod 

325 def fromSystem(cls) -> Packages: 

326 """Construct a `Packages` by examining the system. 

327 

328 Determine packages by examining python's `sys.modules`, conda 

329 libraries and EUPS. EUPS packages take precedence over conda and 

330 general python packages. 

331 

332 Returns 

333 ------- 

334 packages : `Packages` 

335 All version package information that could be obtained. 

336 """ 

337 packages = {} 

338 packages.update(getPythonPackages()) 

339 packages.update(getCondaPackages()) 

340 packages.update(getEnvironmentPackages()) # Should be last, to override products with LOCAL versions 

341 return cls(packages) 

342 

343 @classmethod 

344 def fromBytes(cls, data: bytes, format: str) -> Packages: 

345 """Construct the object from a byte representation. 

346 

347 Parameters 

348 ---------- 

349 data : `bytes` 

350 The serialized form of this object in bytes. 

351 format : `str` 

352 The format of those bytes. Can be ``yaml``, ``json``, or 

353 ``pickle``. 

354 

355 Returns 

356 ------- 

357 packages : `Packages` 

358 The package information read from the input data. 

359 """ 

360 if format == "pickle": 

361 file = io.BytesIO(data) 

362 new = _BackwardsCompatibilityUnpickler(file).load() 

363 elif format == "yaml": 

364 new = yaml.load(data, Loader=yaml.SafeLoader) 

365 elif format == "json": 

366 new = cls(json.loads(data)) 

367 else: 

368 raise ValueError(f"Unexpected serialization format given: {format}") 

369 if not isinstance(new, cls): 

370 raise TypeError(f"Extracted object of class '{type(new)}' but expected '{cls}'") 

371 return new 

372 

373 @classmethod 

374 def read(cls, filename: str) -> Packages: 

375 """Read packages from filename. 

376 

377 Parameters 

378 ---------- 

379 filename : `str` 

380 Filename from which to read. The format is determined from the 

381 file extension. Currently support ``.pickle``, ``.pkl``, 

382 ``.json``, and ``.yaml``. 

383 

384 Returns 

385 ------- 

386 packages : `Packages` 

387 The packages information read from the file. 

388 """ 

389 _, ext = os.path.splitext(filename) 

390 if ext not in cls.formats: 

391 raise ValueError(f"Format from {ext} extension in file {filename} not recognized") 

392 with open(filename, "rb") as ff: 

393 # We assume that these classes are tiny so there is no 

394 # substantive memory impact by reading the entire file up front 

395 data = ff.read() 

396 return cls.fromBytes(data, cls.formats[ext]) 

397 

398 def toBytes(self, format: str) -> bytes: 

399 """Convert the object to a serialized bytes form using the 

400 specified format. 

401 

402 Parameters 

403 ---------- 

404 format : `str` 

405 Format to use when serializing. Can be ``yaml``, ``json``, 

406 or ``pickle``. 

407 

408 Returns 

409 ------- 

410 data : `bytes` 

411 Byte string representing the serialized object. 

412 """ 

413 if format == "pickle": 

414 return pickle.dumps(self) 

415 elif format == "yaml": 

416 return yaml.dump(self).encode("utf-8") 

417 elif format == "json": 

418 return json.dumps(self).encode("utf-8") 

419 else: 

420 raise ValueError(f"Unexpected serialization format requested: {format}") 

421 

422 def write(self, filename: str) -> None: 

423 """Write to file. 

424 

425 Parameters 

426 ---------- 

427 filename : `str` 

428 Filename to which to write. The format of the data file 

429 is determined from the file extension. Currently supports 

430 ``.pickle``, ``.json``, and ``.yaml`` 

431 """ 

432 _, ext = os.path.splitext(filename) 

433 if ext not in self.formats: 

434 raise ValueError(f"Format from {ext} extension in file {filename} not recognized") 

435 with open(filename, "wb") as ff: 

436 # Assumes that the bytes serialization of this object is 

437 # relatively small. 

438 ff.write(self.toBytes(self.formats[ext])) 

439 

440 def __str__(self) -> str: 

441 ss = "%s({\n" % self.__class__.__name__ 

442 # Sort alphabetically by module name, for convenience in reading 

443 ss += ",\n".join(f"{prod!r}:{self[prod]!r}" for prod in sorted(self)) 

444 ss += ",\n})" 

445 return ss 

446 

447 def __repr__(self) -> str: 

448 # Default repr() does not report the class name. 

449 return f"{self.__class__.__name__}({super().__repr__()})" 

450 

451 def extra(self, other: Mapping) -> Dict[str, str]: 

452 """Get packages in self but not in another `Packages` object. 

453 

454 Parameters 

455 ---------- 

456 other : `Packages` or `Mapping` 

457 Other packages to compare against. 

458 

459 Returns 

460 ------- 

461 extra : `dict` 

462 Extra packages. Keys (type `str`) are package names; values 

463 (type `str`) are their versions. 

464 """ 

465 return {pkg: self[pkg] for pkg in self.keys() - other.keys()} 

466 

467 def missing(self, other: Mapping) -> Dict[str, str]: 

468 """Get packages in another `Packages` object but missing from self. 

469 

470 Parameters 

471 ---------- 

472 other : `Packages` 

473 Other packages to compare against. 

474 

475 Returns 

476 ------- 

477 missing : `dict` [`str`, `str`] 

478 Missing packages. Keys (type `str`) are package names; values 

479 (type `str`) are their versions. 

480 """ 

481 return {pkg: other[pkg] for pkg in other.keys() - self.keys()} 

482 

483 def difference(self, other: Mapping) -> Dict[str, Tuple[str, str]]: 

484 """Get packages in symmetric difference of self and another `Packages` 

485 object. 

486 

487 Parameters 

488 ---------- 

489 other : `Packages` 

490 Other packages to compare against. 

491 

492 Returns 

493 ------- 

494 difference : `dict` [`str`, `tuple` [`str`, `str`]] 

495 Packages in symmetric difference. Keys (type `str`) are package 

496 names; values (type `tuple`[`str`, `str`]) are their versions. 

497 """ 

498 return {pkg: (self[pkg], other[pkg]) for pkg in self.keys() & other.keys() if self[pkg] != other[pkg]} 

499 

500 

501class _BackwardsCompatibilityUnpickler(pickle.Unpickler): 

502 """Replacement for the default unpickler. 

503 

504 It is required so that users of this API can read pickle files 

505 created when the `~lsst.utils.packages.Packages` class was in a different 

506 package and known as ``lsst.base.Packages``. If this unpickler is being 

507 used then we know for sure that we must return a 

508 `~lsst.utils.packages.Packages` instance. 

509 """ 

510 

511 def find_class(self, module: str, name: str) -> Type: 

512 """Return the class that should be used for unpickling. 

513 

514 This is always known to be the class in this package. 

515 """ 

516 return Packages 

517 

518 

519# Register YAML representers 

520 

521 

522def pkg_representer(dumper: yaml.Dumper, data: Any) -> yaml.MappingNode: 

523 """Represent Packages as a simple dict""" 

524 return dumper.represent_mapping("lsst.utils.packages.Packages", data, flow_style=None) 

525 

526 

527yaml.add_representer(Packages, pkg_representer) 

528 

529 

530def pkg_constructor(loader: yaml.constructor.SafeConstructor, node: yaml.Node) -> Any: 

531 yield Packages(loader.construct_mapping(node, deep=True)) 

532 

533 

534for loader in (yaml.Loader, yaml.CLoader, yaml.UnsafeLoader, yaml.SafeLoader, yaml.FullLoader): 

535 yaml.add_constructor("lsst.utils.packages.Packages", pkg_constructor, Loader=loader) 

536 

537 # Register the old name with YAML. 

538 yaml.add_constructor("lsst.base.Packages", pkg_constructor, Loader=loader)