Coverage for python/lsst/meas/base/_id_generator.py: 59%

124 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-02 10:59 -0700

1# This file is part of meas_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "IdGenerator", 

26 "FullIdGenerator", 

27 "BaseIdGeneratorConfig", 

28 "DetectorExposureIdGeneratorConfig", 

29 "DetectorVisitIdGeneratorConfig", 

30 "SkyMapIdGeneratorConfig", 

31) 

32 

33import dataclasses 

34from typing import Any, Callable 

35 

36import numpy as np 

37from lsst.afw.table import IdFactory, Schema, SourceCatalog, SourceTable 

38from lsst.daf.butler import DataCoordinate, DimensionPacker 

39from lsst.obs.base import ExposureIdInfo 

40from lsst.pex.config import Config, ConfigField, Field 

41from lsst.pipe.base import Instrument 

42from lsst.skymap.packers import SkyMapDimensionPacker 

43 

44DEFAULT_RELEASE_ID = 0 

45"""Default release ID to embed in catalog IDs. 

46 

47This can be changed globally to avoid having to override individual task 

48configs to set the release ID. 

49""" 

50 

51DEFAULT_N_RELEASES = 1 # 1 means don't reserve space for releases. 

52"""Default number of releases to reserve space for in catalog IDs.""" 

53 

54 

55class BaseIdGeneratorConfig(Config): 

56 """Base class for configuration of `IdGenerator` instances. 

57 

58 This class is abstract (it cannot use `abc.ABCMeta` due to a metaclass 

59 conflict), and it should mostly be considered an implementation detail 

60 of how the attributes it defines are included in its concrete derived 

61 classes. Derived classes must implemented `_make_dimension_packer`. 

62 

63 See `IdGenerator` for usage. 

64 """ 

65 

66 release_id = Field( 66 ↛ exitline 66 didn't jump to the function exit

67 doc=( 

68 "Identifier for a data release or other version to embed in generated IDs. " 

69 "Zero is reserved for IDs with no embedded release identifier." 

70 ), 

71 dtype=int, 

72 default=DEFAULT_RELEASE_ID, 

73 check=lambda x: x >= 0, 

74 ) 

75 

76 n_releases = Field( 76 ↛ exitline 76 didn't jump to the function exit

77 doc=( 

78 "Number of (contiguous, starting from zero) `release_id` values to reserve space for. " 

79 "One (not zero) is used to reserve no space." 

80 ), 

81 dtype=int, 

82 default=DEFAULT_N_RELEASES, 

83 check=lambda x: x > 0, 

84 ) 

85 

86 @classmethod 

87 def make_field( 

88 cls, doc="Configuration for how to generate catalog IDs from data IDs." 

89 ): 

90 """Return a config field that holds an instance of this class. 

91 

92 Parameters 

93 ---------- 

94 doc : `str`, optional 

95 Documentation for the config field. As this configuration almost 

96 always plays the same role in any parent config, the default is 

97 usually fine. 

98 

99 Returns 

100 ------- 

101 field : `lsst.pex.config.ConfigField` 

102 New config field for instances of this class. 

103 

104 Notes 

105 ----- 

106 This method is provided as a convenience to reduce boilerplate 

107 downstream: it typically saves an import or two, and it allows the same 

108 usually-appropriate docstring to be reused instead of rewritten each 

109 time. It does not need to be used in order to use this config class. 

110 """ 

111 return ConfigField(doc, dtype=cls) 

112 

113 def apply(self, data_id: DataCoordinate, **kwargs: Any) -> IdGenerator: 

114 """Construct an `IdGenerator` instance from this configuration. 

115 

116 Parameters 

117 ---------- 

118 data_id : `DataCoordinate` 

119 The data ID the `IdGenerator` will embed into all IDs. This 

120 generally must be a fully-expanded data ID (i.e. have dimension 

121 records attached), that identifies the "instrument" or "skymap" 

122 dimension, though this requirement may be relaxed for certain 

123 dimension packer types. 

124 **kwargs 

125 Additional keyword arguments are interpreted as dimension value 

126 pairs to include in the data ID. This may be used to provide 

127 constraints on dimensions for which records are not available. 

128 

129 Returns 

130 ------- 

131 id_generator : `IdGenerator` 

132 Object that generates integer IDs for catalogs and their rows by 

133 embedding the given data ID and a configurably-optional release ID. 

134 

135 Notes 

136 ----- 

137 This method is called `apply` for consistency with the pattern of using 

138 `lsst.pex.config.ConfigurableField` and `lsst.pex.config.RegistryField` 

139 to construct the objects whose configuration they hold. It doesn't 

140 actually use those mechanisms because we have many config classes for 

141 the one `IdGenerator` class, instead of the other way around, and as a 

142 result a "config as factory" approach works better. 

143 """ 

144 packer = self._make_dimension_packer(data_id) 

145 return FullIdGenerator( 

146 packer, 

147 DataCoordinate.standardize(data_id, **kwargs, graph=packer.dimensions), 

148 release_id=self.release_id, 

149 n_releases=self.n_releases, 

150 ) 

151 

152 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker: 

153 """Abstract hook for building a dimension packer from configuration. 

154 

155 Parameters 

156 ---------- 

157 data_id : `DataCoordinate` 

158 The data ID the `IdGenerator` will embed into all IDs. This 

159 generally must be a fully-expanded data ID (i.e. have dimension 

160 records attached), that identifies the "instrument" or "skymap" 

161 dimension, though this requirement may be relaxed for certain 

162 dimension packer types. 

163 

164 Returns 

165 ------- 

166 packer : `lsst.daf.butler.DimensionPacker` 

167 Object that packs data IDs into integers. 

168 """ 

169 raise NotImplementedError("Method is abstract.") 

170 

171 

172class DetectorExposureIdGeneratorConfig(BaseIdGeneratorConfig): 

173 """Configuration class for generating integer IDs from 

174 ``{exposure, detector}`` data IDs. 

175 

176 See `IdGenerator` for usage. 

177 """ 

178 

179 packer = Instrument.make_dimension_packer_config_field() 

180 

181 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker: 

182 # Docstring inherited. 

183 return self.packer.apply(data_id, is_exposure=True) 

184 

185 

186class DetectorVisitIdGeneratorConfig(BaseIdGeneratorConfig): 

187 """Configuration class for generating integer IDs from 

188 ``{visit, detector}`` data IDs. 

189 

190 See `IdGenerator` for usage. 

191 """ 

192 

193 packer = Instrument.make_dimension_packer_config_field() 

194 

195 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker: 

196 # Docstring inherited. 

197 return self.packer.apply(data_id, is_exposure=False) 

198 

199 

200class SkyMapIdGeneratorConfig(BaseIdGeneratorConfig): 

201 """Configuration class for generating integer IDs from 

202 ``{tract, patch, [band]}`` data IDs. 

203 

204 See `IdGenerator` for usage. 

205 """ 

206 

207 packer = SkyMapDimensionPacker.make_config_field() 

208 

209 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker: 

210 # Docstring inherited. 

211 return self.packer.apply(data_id) 

212 

213 

214class IdGenerator: 

215 """A helper class for packing some combination of a data ID, a per-data-ID 

216 counter, and a release ID into a single 64-bit integer. 

217 

218 As an object frequently passed into code that otherwise has no knowledge of 

219 its own data ID, `IdGenerator` also implements ``__str__`` to provide a 

220 human-readable representation of the data ID for use in logs and exception 

221 messages, with a suitable fallback when no data ID was provided to it. 

222 

223 Notes 

224 ----- 

225 Instances of this class are expected to usually be created via 

226 configuration, which will return a derived instance. This pattern starts 

227 with one of `DetectorExposureIdGeneratorConfig`, 

228 `DetectorVisitIdGeneratorConfig`, and `SkyMapIdGeneratorConfig` (which have 

229 the same interface), and looks something this: 

230 

231 from lsst.meas.base import DetectorVisitIdGeneratorConfig 

232 from lsst.pex.config import Config 

233 from lsst.pipe.base import PipelineTask 

234 

235 class SomeTaskConfig(PipelineTaskConfig, ...): 

236 id_generator = DetectorVisitIdGeneratorConfig.make_field() 

237 

238 class SomeTask(PipelineTaskTask): 

239 

240 ConfigClass = SomeTaskConfig 

241 

242 ... 

243 

244 def runQuantum(self, ..., data_id: DataCoordinate): 

245 id_generator = self.config.apply(data_id) 

246 catalog = id_generator.make_source_catalog(self.schema) ... 

247 

248 There is no requirement that `IdGenerator` instances be constructed in 

249 `PipelineTask.runQuantum` methods and passed to the ``run`` method, but 

250 this is the most common approach. 

251 

252 Code that wishes to instead unpack these record IDs to obtain the release 

253 ID, data ID and counter value should use the same config (often loaded from 

254 the ``Butler``) and pass a fully-expanded data ID identifying only a 

255 particular ``skymap`` or ``instrument`` to `unpacker_from_config`:: 

256 

257 config = butler.get("some_task_config") 

258 catalog = butler.get("some_output_catalog", given_data_id) 

259 unpacker = IdGenerator.unpacker_from_config( 

260 config.id_generator, butler.registry.expandDataId(skymap="HSC"), 

261 ) 

262 release_id, embedded_data_id, counter = unpacker(catalog[0]["id"]) 

263 assert embedded_data_id == given_data_id 

264 

265 This example is a bit contrived, as the ability to reconstruct the data ID 

266 is really only useful when you don't have it already, such as when the 

267 record ID is obtained from some further-processed version of the original 

268 table (such as a SQL database), and in that context the right config to 

269 load will not be obvious unless it has been carefully documented. 

270 

271 Simple instances of the base class that do not include a data ID may also 

272 be constructed by calling the constructor directly:: 

273 

274 id_generator = IdGenerator() 

275 

276 These IDs may not be unpacked, but they also don't need to be, because 

277 they're just the per-catalog "counter" integer already. 

278 

279 See Also 

280 -------- 

281 :ref:`lsst.meas.base-generating-source-and-object-ids` 

282 """ 

283 

284 # TODO: remove this method on DM-38687. 

285 # No deprecation decorator here because the type this method accepts is 

286 # itself deprecated, so it's only going to be called by code paths that 

287 # will go away when the deprecation turns into a removal, and which already 

288 # warn. 

289 @staticmethod 

290 def _from_exposure_id_info(exposure_id_info: ExposureIdInfo) -> IdGenerator: 

291 """Construct a new ID generator from the object this class supersedes. 

292 

293 This method is deprecated along with the type it accepts; it's provided 

294 only as a temporary helper to aid in the transition from 

295 `lsst.obs.base.ExposureIdInfo` to `IdGenerator`. 

296 """ 

297 return _ExposureIdInfoIdGenerator(exposure_id_info) 

298 

299 @property 

300 def catalog_id(self) -> int: 

301 """The integer identifier for the full catalog with this data ID, not 

302 just one of its rows (`int`). 

303 

304 This combines the packed data ID and release ID, but not the 

305 counter. 

306 """ 

307 return 0 

308 

309 def __str__(self) -> str: 

310 """Return a human-readable representation of the data ID (or a note 

311 about its absence) for use in log and error messages. 

312 """ 

313 return "[no data ID]" 

314 

315 def make_table_id_factory(self) -> IdFactory: 

316 """Construct a new `lsst.afw.table.IdFactory` for this catalog.""" 

317 return IdFactory.makeSimple() 

318 

319 def make_source_catalog(self, schema: Schema) -> SourceCatalog: 

320 """Construct a empty catalog object with an ID factory. 

321 

322 This is a convenience function for the common pattern of calling 

323 `make_table_id_factory`, constructing a `~lsst.afw.table.SourceTable` 

324 from that, and then constructing an (empty) 

325 `~lsst.afw.table.SourceCatalog` from that. 

326 """ 

327 table = SourceTable.make(schema, self.make_table_id_factory()) 

328 return SourceCatalog(table) 

329 

330 def arange(self, *args, **kwargs) -> np.ndarray: 

331 """Generate an array of integer IDs for this catalog. 

332 

333 All parameters are forwarded to `numpy.arange` to generate an array of 

334 per-catalog counter integers. These are then combined with the 

335 `catalog_id`` to form the returned array. 

336 

337 The IDs generated by `arange` will be equivalent to those generated by 

338 `make_table_id_factory` (and by extension, `make_source_catalog`) only 

339 if the counter integers start with ``1``, not ``0``, because that's 

340 what `~lsst.afw.table.IdFactory` does. 

341 """ 

342 return np.arange(*args, **kwargs) 

343 

344 @classmethod 

345 def unpacker_from_config( 

346 cls, 

347 config: BaseIdGeneratorConfig, 

348 fixed: DataCoordinate, 

349 ) -> Callable[[int], tuple[DataCoordinate, int]]: 

350 """Return a callable that unpacks the IDs generated by this class, 

351 from a config field. 

352 

353 Parameters 

354 ---------- 

355 config : `BaseIdGeneratorConfig` 

356 Configuration for an ID generator. 

357 fixed : `DataCoordinate` 

358 Data ID identifying the dimensions that are considered fixed by the 

359 `IdGenerator` that produced the IDs: usually just ``instrument`` or 

360 ``skymap``, depending on the configuration. For most configurations 

361 this will need to be a fully-expanded data ID. 

362 

363 Returns 

364 ------- 

365 unpacker 

366 Callable that takes a single `int` argument (an ID generated by an 

367 identically-configured `IdGenerator`) and returns a tuple of: 

368 

369 - release_id: the integer that identifies a data release or 

370 similar (`int`); 

371 - data_id : the data ID used to initialize the original ID 

372 generator (`DataCoordinate`); 

373 - counter : the counter part of the original ID (`int`). 

374 

375 Notes 

376 ----- 

377 This method cannot be used on IDs generated without a data ID. 

378 """ 

379 packer = config._make_dimension_packer(fixed) 

380 return cls.unpacker_from_dimension_packer(packer, config.n_releases) 

381 

382 @classmethod 

383 def unpacker_from_dimension_packer( 

384 cls, 

385 dimension_packer: DimensionPacker, 

386 n_releases: int = DEFAULT_N_RELEASES, 

387 ) -> Callable[[int], tuple[int, DataCoordinate, int]]: 

388 """Return a callable that unpacks the IDs generated by this class, 

389 from a `lsst.daf.butler.DimensionPacker` instance. 

390 

391 Parameters 

392 ---------- 

393 dimension_packer : `lsst.daf.butler.DimensionPacker` 

394 Dimension packer used to construct the original 

395 `DimensionPackerIdGenerator`. 

396 n_releases : `int`, optional 

397 Number of (contiguous, starting from zero) ``release_id`` values to 

398 reserve space for. One (not zero) is used to reserve no space. 

399 

400 Returns 

401 ------- 

402 unpacker 

403 Callable that takes a single `int` argument (an ID generated by an 

404 identically-constructed `DimensionPackerIdGenerator`) and returns a 

405 tuple of: 

406 

407 - release_id: the integer that identifies a data release or 

408 similar (`int`); 

409 - data_id : the data ID used to initialize the original ID 

410 generator (`DataCoordinate`); 

411 - counter : the counter part of the original ID (`int`). 

412 

413 Notes 

414 ----- 

415 This method cannot be used on IDs generated with no data ID. 

416 """ 

417 bits = _IdGeneratorBits(dimension_packer, n_releases) 

418 

419 def unpack(record_id: int) -> tuple[int, DataCoordinate, int]: 

420 rest, counter = divmod(record_id, bits.n_counters) 

421 rest, packed_data_id = divmod(rest, bits.n_data_ids) 

422 rest, release_id = divmod(rest, bits.n_data_ids) 

423 if rest: 

424 raise ValueError( 

425 f"Unexpected overall factor {rest} in record_id {record_id}, " 

426 f"after extracting packed_data_id={packed_data_id}, counter={counter}, and " 

427 f"release_id={release_id}." 

428 ) 

429 data_id = bits.packer.unpack(packed_data_id) 

430 return release_id, data_id, counter 

431 

432 return unpack 

433 

434 

435class FullIdGenerator(IdGenerator): 

436 """The subclass of `IdGenerator` that actually includes packed data IDs 

437 and release IDs in its generated IDs. 

438 

439 Parameters 

440 ---------- 

441 dimension_packer : `lsst.daf.butler.DimensionPacker` 

442 Object that packs data IDs into integers. 

443 data_id : `lsst.daf.butler.DataCoordinate` 

444 Data ID to embed in all generated IDs and random seeds. 

445 release_id : `int`, optional 

446 Release identifier to embed in generated IDs. 

447 n_releases : `int`, optional 

448 Number of (contiguous, starting from zero) `release_id` values to 

449 reserve space for. One (not zero) is used to reserve no space. 

450 

451 Notes 

452 ----- 

453 Instances of this class should usually be constructed via configuration 

454 instead of by calling the constructor directly; see `IdGenerator` for 

455 details. 

456 """ 

457 

458 def __init__( 

459 self, 

460 dimension_packer: DimensionPacker, 

461 data_id: DataCoordinate, 

462 release_id: int = DEFAULT_RELEASE_ID, 

463 n_releases: int = DEFAULT_N_RELEASES, 

464 ): 

465 self._bits = _IdGeneratorBits(dimension_packer, n_releases) 

466 self._release_id = release_id 

467 self._data_id = data_id.subset(self._bits.packer.dimensions) 

468 self._packed_data_id = self._bits.packer.pack(self._data_id) 

469 

470 @property 

471 def data_id(self) -> DataCoordinate: 

472 """The data ID that will be embedded in all generated IDs 

473 (`DataCoordinate`).""" 

474 return self._data_id 

475 

476 @property 

477 def release_id(self) -> int: 

478 """The release ID that will embedded in all generated IDs (`int`).""" 

479 return self._release_id 

480 

481 @property 

482 def catalog_id(self) -> int: 

483 # Docstring inherited. 

484 return self._packed_data_id + self._bits.n_data_ids * self._release_id 

485 

486 def __str__(self) -> str: 

487 # Docstring inherited. 

488 return str(self.data_id) 

489 

490 def make_table_id_factory(self) -> IdFactory: 

491 # Docstring inherited. 

492 return IdFactory.makeSource(self.catalog_id, self._bits.counter_bits) 

493 

494 def arange(self, *args, **kwargs) -> np.ndarray: 

495 # Docstring inherited. 

496 lower = super().arange(*args, **kwargs) 

497 if np.any(lower >= self._bits.n_counters): 

498 arg_terms = [repr(arg) for arg in args] + [f"{k}={v!r}" for k, v in kwargs.items()] 

499 raise ValueError( 

500 f"Integer range from numpy.arange({arg_terms}) has " 

501 f"{(lower >= self._bits.n_counters).sum()} values that are not " 

502 f"below the upper bound of {self._bits.n_counters}." 

503 ) 

504 return lower + self.catalog_id * self._bits.n_counters 

505 

506 

507@dataclasses.dataclass 

508class _IdGeneratorBits: 

509 """A private helper struct that manages the allocation of bits between the 

510 packed data ID, the release ID, and a per-catalog counter. 

511 """ 

512 

513 packer: DimensionPacker 

514 """Object that maps data IDs to integers 

515 (`lsst.daf.butler.DimensionPacker`). 

516 """ 

517 

518 n_releases: int = dataclasses.field(default=0) 

519 """Number of releases to reserve space for, starting from zero (`int`).""" 

520 

521 n_data_ids: int = dataclasses.field(init=False) 

522 """Number of contiguous packed data IDs to reserve space for, starting 

523 from zero (`int`). 

524 """ 

525 

526 counter_bits: int = dataclasses.field(init=False) 

527 """Number of bits allocated to the per-catalog counter (`int`).""" 

528 

529 n_counters: int = dataclasses.field(init=False) 

530 """Number of contiguous counter values to reserve space for, starting from 

531 zero (`int`).""" 

532 

533 def __post_init__(self) -> None: 

534 self.n_data_ids = 1 << self.packer.maxBits 

535 upper_bits = (self.n_releases - 1).bit_length() + self.packer.maxBits 

536 self.counter_bits = IdFactory.computeReservedFromMaxBits(upper_bits) 

537 self.n_counters = 1 << self.counter_bits 

538 

539 

540# TODO: remove this method on DM-38687. 

541# No deprecation decorator here because the type this class holds is itself 

542# deprecated, so it's only going to be called by code paths that will go away 

543# when the deprecation turns into a removal, and which already warn. 

544class _ExposureIdInfoIdGenerator(IdGenerator): 

545 """A `IdGenerator` implementation to aid in the transition from 

546 `lsst.obs.base.ExposureIdInfo`. 

547 """ 

548 

549 def __init__(self, exposure_id_info: ExposureIdInfo): 

550 self._exposure_id_info = exposure_id_info 

551 

552 @property 

553 def catalog_id(self) -> int: 

554 # Docstring inherited. 

555 return self._exposure_id_info.expId 

556 

557 def __str__(self) -> str: 

558 return str(self.catalog_id) 

559 

560 def make_table_id_factory(self) -> IdFactory: 

561 # Docstring inherited. 

562 return self._exposure_id_info.makeSourceIdFactory() 

563 

564 def arange(self, *args, **kwargs) -> np.ndarray: 

565 # Docstring inherited. 

566 raise NotImplementedError( 

567 "This IdGenerator implementation does not support arange; " 

568 "please update to IdGenerator.from_config for a full-featured implementation." 

569 )