Coverage for python/lsst/meas/base/_id_generator.py: 63%

108 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-11 02:37 -0700

1# This file is part of meas_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "IdGenerator", 

26 "FullIdGenerator", 

27 "BaseIdGeneratorConfig", 

28 "DetectorExposureIdGeneratorConfig", 

29 "DetectorVisitIdGeneratorConfig", 

30 "SkyMapIdGeneratorConfig", 

31) 

32 

33import dataclasses 

34from typing import Any, Callable 

35 

36import numpy as np 

37from lsst.afw.table import IdFactory, Schema, SourceCatalog, SourceTable 

38from lsst.daf.butler import DataCoordinate, DimensionPacker 

39from lsst.pex.config import Config, ConfigField, Field 

40from lsst.pipe.base import Instrument 

41from lsst.skymap.packers import SkyMapDimensionPacker 

42 

43DEFAULT_RELEASE_ID = 0 

44"""Default release ID to embed in catalog IDs. 

45 

46This can be changed globally to avoid having to override individual task 

47configs to set the release ID. 

48""" 

49 

50DEFAULT_N_RELEASES = 1 # 1 means don't reserve space for releases. 

51"""Default number of releases to reserve space for in catalog IDs.""" 

52 

53 

54class BaseIdGeneratorConfig(Config): 

55 """Base class for configuration of `IdGenerator` instances. 

56 

57 This class is abstract (it cannot use `abc.ABCMeta` due to a metaclass 

58 conflict), and it should mostly be considered an implementation detail 

59 of how the attributes it defines are included in its concrete derived 

60 classes. Derived classes must implemented `_make_dimension_packer`. 

61 

62 See `IdGenerator` for usage. 

63 """ 

64 

65 release_id = Field( 65 ↛ exitline 65 didn't jump to the function exit

66 doc=( 

67 "Identifier for a data release or other version to embed in generated IDs. " 

68 "Zero is reserved for IDs with no embedded release identifier." 

69 ), 

70 dtype=int, 

71 default=DEFAULT_RELEASE_ID, 

72 check=lambda x: x >= 0, 

73 ) 

74 

75 n_releases = Field( 75 ↛ exitline 75 didn't jump to the function exit

76 doc=( 

77 "Number of (contiguous, starting from zero) `release_id` values to reserve space for. " 

78 "One (not zero) is used to reserve no space." 

79 ), 

80 dtype=int, 

81 default=DEFAULT_N_RELEASES, 

82 check=lambda x: x > 0, 

83 ) 

84 

85 @classmethod 

86 def make_field( 

87 cls, doc="Configuration for how to generate catalog IDs from data IDs." 

88 ): 

89 """Return a config field that holds an instance of this class. 

90 

91 Parameters 

92 ---------- 

93 doc : `str`, optional 

94 Documentation for the config field. As this configuration almost 

95 always plays the same role in any parent config, the default is 

96 usually fine. 

97 

98 Returns 

99 ------- 

100 field : `lsst.pex.config.ConfigField` 

101 New config field for instances of this class. 

102 

103 Notes 

104 ----- 

105 This method is provided as a convenience to reduce boilerplate 

106 downstream: it typically saves an import or two, and it allows the same 

107 usually-appropriate docstring to be reused instead of rewritten each 

108 time. It does not need to be used in order to use this config class. 

109 """ 

110 return ConfigField(doc, dtype=cls) 

111 

112 def apply(self, data_id: DataCoordinate, **kwargs: Any) -> IdGenerator: 

113 """Construct an `IdGenerator` instance from this configuration. 

114 

115 Parameters 

116 ---------- 

117 data_id : `DataCoordinate` 

118 The data ID the `IdGenerator` will embed into all IDs. This 

119 generally must be a fully-expanded data ID (i.e. have dimension 

120 records attached), that identifies the "instrument" or "skymap" 

121 dimension, though this requirement may be relaxed for certain 

122 dimension packer types. 

123 **kwargs 

124 Additional keyword arguments are interpreted as dimension value 

125 pairs to include in the data ID. This may be used to provide 

126 constraints on dimensions for which records are not available. 

127 

128 Returns 

129 ------- 

130 id_generator : `IdGenerator` 

131 Object that generates integer IDs for catalogs and their rows by 

132 embedding the given data ID and a configurably-optional release ID. 

133 

134 Notes 

135 ----- 

136 This method is called `apply` for consistency with the pattern of using 

137 `lsst.pex.config.ConfigurableField` and `lsst.pex.config.RegistryField` 

138 to construct the objects whose configuration they hold. It doesn't 

139 actually use those mechanisms because we have many config classes for 

140 the one `IdGenerator` class, instead of the other way around, and as a 

141 result a "config as factory" approach works better. 

142 """ 

143 packer = self._make_dimension_packer(data_id) 

144 return FullIdGenerator( 

145 packer, 

146 DataCoordinate.standardize(data_id, **kwargs, dimensions=packer.dimensions), 

147 release_id=self.release_id, 

148 n_releases=self.n_releases, 

149 ) 

150 

151 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker: 

152 """Abstract hook for building a dimension packer from configuration. 

153 

154 Parameters 

155 ---------- 

156 data_id : `DataCoordinate` 

157 The data ID the `IdGenerator` will embed into all IDs. This 

158 generally must be a fully-expanded data ID (i.e. have dimension 

159 records attached), that identifies the "instrument" or "skymap" 

160 dimension, though this requirement may be relaxed for certain 

161 dimension packer types. 

162 

163 Returns 

164 ------- 

165 packer : `lsst.daf.butler.DimensionPacker` 

166 Object that packs data IDs into integers. 

167 """ 

168 raise NotImplementedError("Method is abstract.") 

169 

170 

171class DetectorExposureIdGeneratorConfig(BaseIdGeneratorConfig): 

172 """Configuration class for generating integer IDs from 

173 ``{exposure, detector}`` data IDs. 

174 

175 See `IdGenerator` for usage. 

176 """ 

177 

178 packer = Instrument.make_dimension_packer_config_field() 

179 

180 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker: 

181 # Docstring inherited. 

182 return self.packer.apply(data_id, is_exposure=True) 

183 

184 

185class DetectorVisitIdGeneratorConfig(BaseIdGeneratorConfig): 

186 """Configuration class for generating integer IDs from 

187 ``{visit, detector}`` data IDs. 

188 

189 See `IdGenerator` for usage. 

190 """ 

191 

192 packer = Instrument.make_dimension_packer_config_field() 

193 

194 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker: 

195 # Docstring inherited. 

196 return self.packer.apply(data_id, is_exposure=False) 

197 

198 

199class SkyMapIdGeneratorConfig(BaseIdGeneratorConfig): 

200 """Configuration class for generating integer IDs from 

201 ``{tract, patch, [band]}`` data IDs. 

202 

203 See `IdGenerator` for usage. 

204 """ 

205 

206 packer = SkyMapDimensionPacker.make_config_field() 

207 

208 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker: 

209 # Docstring inherited. 

210 return self.packer.apply(data_id) 

211 

212 

213class IdGenerator: 

214 """A helper class for packing some combination of a data ID, a per-data-ID 

215 counter, and a release ID into a single 64-bit integer. 

216 

217 As an object frequently passed into code that otherwise has no knowledge of 

218 its own data ID, `IdGenerator` also implements ``__str__`` to provide a 

219 human-readable representation of the data ID for use in logs and exception 

220 messages, with a suitable fallback when no data ID was provided to it. 

221 

222 Notes 

223 ----- 

224 Instances of this class are expected to usually be created via 

225 configuration, which will return a derived instance. This pattern starts 

226 with one of `DetectorExposureIdGeneratorConfig`, 

227 `DetectorVisitIdGeneratorConfig`, and `SkyMapIdGeneratorConfig` (which have 

228 the same interface), and looks something this: 

229 

230 from lsst.meas.base import DetectorVisitIdGeneratorConfig 

231 from lsst.pex.config import Config 

232 from lsst.pipe.base import PipelineTask 

233 

234 class SomeTaskConfig(PipelineTaskConfig, ...): 

235 id_generator = DetectorVisitIdGeneratorConfig.make_field() 

236 

237 class SomeTask(PipelineTaskTask): 

238 

239 ConfigClass = SomeTaskConfig 

240 

241 ... 

242 

243 def runQuantum(self, ..., data_id: DataCoordinate): 

244 id_generator = self.config.apply(data_id) 

245 catalog = id_generator.make_source_catalog(self.schema) ... 

246 

247 There is no requirement that `IdGenerator` instances be constructed in 

248 `PipelineTask.runQuantum` methods and passed to the ``run`` method, but 

249 this is the most common approach. 

250 

251 Code that wishes to instead unpack these record IDs to obtain the release 

252 ID, data ID and counter value should use the same config (often loaded from 

253 the ``Butler``) and pass a fully-expanded data ID identifying only a 

254 particular ``skymap`` or ``instrument`` to `unpacker_from_config`:: 

255 

256 config = butler.get("some_task_config") 

257 catalog = butler.get("some_output_catalog", given_data_id) 

258 unpacker = IdGenerator.unpacker_from_config( 

259 config.id_generator, butler.registry.expandDataId(skymap="HSC"), 

260 ) 

261 release_id, embedded_data_id, counter = unpacker(catalog[0]["id"]) 

262 assert embedded_data_id == given_data_id 

263 

264 This example is a bit contrived, as the ability to reconstruct the data ID 

265 is really only useful when you don't have it already, such as when the 

266 record ID is obtained from some further-processed version of the original 

267 table (such as a SQL database), and in that context the right config to 

268 load will not be obvious unless it has been carefully documented. 

269 

270 Simple instances of the base class that do not include a data ID may also 

271 be constructed by calling the constructor directly:: 

272 

273 id_generator = IdGenerator() 

274 

275 These IDs may not be unpacked, but they also don't need to be, because 

276 they're just the per-catalog "counter" integer already. 

277 

278 See Also 

279 -------- 

280 :ref:`lsst.meas.base-generating-source-and-object-ids` 

281 """ 

282 

283 @property 

284 def catalog_id(self) -> int: 

285 """The integer identifier for the full catalog with this data ID, not 

286 just one of its rows (`int`). 

287 

288 This combines the packed data ID and release ID, but not the 

289 counter. 

290 """ 

291 return 0 

292 

293 def __str__(self) -> str: 

294 """Return a human-readable representation of the data ID (or a note 

295 about its absence) for use in log and error messages. 

296 """ 

297 return "[no data ID]" 

298 

299 def make_table_id_factory(self) -> IdFactory: 

300 """Construct a new `lsst.afw.table.IdFactory` for this catalog.""" 

301 return IdFactory.makeSimple() 

302 

303 def make_source_catalog(self, schema: Schema) -> SourceCatalog: 

304 """Construct a empty catalog object with an ID factory. 

305 

306 This is a convenience function for the common pattern of calling 

307 `make_table_id_factory`, constructing a `~lsst.afw.table.SourceTable` 

308 from that, and then constructing an (empty) 

309 `~lsst.afw.table.SourceCatalog` from that. 

310 """ 

311 table = SourceTable.make(schema, self.make_table_id_factory()) 

312 return SourceCatalog(table) 

313 

314 def arange(self, *args, **kwargs) -> np.ndarray: 

315 """Generate an array of integer IDs for this catalog. 

316 

317 All parameters are forwarded to `numpy.arange` to generate an array of 

318 per-catalog counter integers. These are then combined with the 

319 `catalog_id`` to form the returned array. 

320 

321 The IDs generated by `arange` will be equivalent to those generated by 

322 `make_table_id_factory` (and by extension, `make_source_catalog`) only 

323 if the counter integers start with ``1``, not ``0``, because that's 

324 what `~lsst.afw.table.IdFactory` does. 

325 """ 

326 return np.arange(*args, **kwargs) 

327 

328 @classmethod 

329 def unpacker_from_config( 

330 cls, 

331 config: BaseIdGeneratorConfig, 

332 fixed: DataCoordinate, 

333 ) -> Callable[[int], tuple[DataCoordinate, int]]: 

334 """Return a callable that unpacks the IDs generated by this class, 

335 from a config field. 

336 

337 Parameters 

338 ---------- 

339 config : `BaseIdGeneratorConfig` 

340 Configuration for an ID generator. 

341 fixed : `DataCoordinate` 

342 Data ID identifying the dimensions that are considered fixed by the 

343 `IdGenerator` that produced the IDs: usually just ``instrument`` or 

344 ``skymap``, depending on the configuration. For most configurations 

345 this will need to be a fully-expanded data ID. 

346 

347 Returns 

348 ------- 

349 unpacker 

350 Callable that takes a single `int` argument (an ID generated by an 

351 identically-configured `IdGenerator`) and returns a tuple of: 

352 

353 - release_id: the integer that identifies a data release or 

354 similar (`int`); 

355 - data_id : the data ID used to initialize the original ID 

356 generator (`DataCoordinate`); 

357 - counter : the counter part of the original ID (`int`). 

358 

359 Notes 

360 ----- 

361 This method cannot be used on IDs generated without a data ID. 

362 """ 

363 packer = config._make_dimension_packer(fixed) 

364 return cls.unpacker_from_dimension_packer(packer, config.n_releases) 

365 

366 @classmethod 

367 def unpacker_from_dimension_packer( 

368 cls, 

369 dimension_packer: DimensionPacker, 

370 n_releases: int = DEFAULT_N_RELEASES, 

371 ) -> Callable[[int], tuple[int, DataCoordinate, int]]: 

372 """Return a callable that unpacks the IDs generated by this class, 

373 from a `lsst.daf.butler.DimensionPacker` instance. 

374 

375 Parameters 

376 ---------- 

377 dimension_packer : `lsst.daf.butler.DimensionPacker` 

378 Dimension packer used to construct the original 

379 `DimensionPackerIdGenerator`. 

380 n_releases : `int`, optional 

381 Number of (contiguous, starting from zero) ``release_id`` values to 

382 reserve space for. One (not zero) is used to reserve no space. 

383 

384 Returns 

385 ------- 

386 unpacker 

387 Callable that takes a single `int` argument (an ID generated by an 

388 identically-constructed `DimensionPackerIdGenerator`) and returns a 

389 tuple of: 

390 

391 - release_id: the integer that identifies a data release or 

392 similar (`int`); 

393 - data_id : the data ID used to initialize the original ID 

394 generator (`DataCoordinate`); 

395 - counter : the counter part of the original ID (`int`). 

396 

397 Notes 

398 ----- 

399 This method cannot be used on IDs generated with no data ID. 

400 """ 

401 bits = _IdGeneratorBits(dimension_packer, n_releases) 

402 

403 def unpack(record_id: int) -> tuple[int, DataCoordinate, int]: 

404 rest, counter = divmod(record_id, bits.n_counters) 

405 rest, packed_data_id = divmod(rest, bits.n_data_ids) 

406 rest, release_id = divmod(rest, bits.n_data_ids) 

407 if rest: 

408 raise ValueError( 

409 f"Unexpected overall factor {rest} in record_id {record_id}, " 

410 f"after extracting packed_data_id={packed_data_id}, counter={counter}, and " 

411 f"release_id={release_id}." 

412 ) 

413 data_id = bits.packer.unpack(packed_data_id) 

414 return release_id, data_id, counter 

415 

416 return unpack 

417 

418 

419class FullIdGenerator(IdGenerator): 

420 """The subclass of `IdGenerator` that actually includes packed data IDs 

421 and release IDs in its generated IDs. 

422 

423 Parameters 

424 ---------- 

425 dimension_packer : `lsst.daf.butler.DimensionPacker` 

426 Object that packs data IDs into integers. 

427 data_id : `lsst.daf.butler.DataCoordinate` 

428 Data ID to embed in all generated IDs and random seeds. 

429 release_id : `int`, optional 

430 Release identifier to embed in generated IDs. 

431 n_releases : `int`, optional 

432 Number of (contiguous, starting from zero) `release_id` values to 

433 reserve space for. One (not zero) is used to reserve no space. 

434 

435 Notes 

436 ----- 

437 Instances of this class should usually be constructed via configuration 

438 instead of by calling the constructor directly; see `IdGenerator` for 

439 details. 

440 """ 

441 

442 def __init__( 

443 self, 

444 dimension_packer: DimensionPacker, 

445 data_id: DataCoordinate, 

446 release_id: int = DEFAULT_RELEASE_ID, 

447 n_releases: int = DEFAULT_N_RELEASES, 

448 ): 

449 self._bits = _IdGeneratorBits(dimension_packer, n_releases) 

450 self._release_id = release_id 

451 self._data_id = data_id.subset(self._bits.packer.dimensions) 

452 self._packed_data_id = self._bits.packer.pack(self._data_id) 

453 

454 @property 

455 def data_id(self) -> DataCoordinate: 

456 """The data ID that will be embedded in all generated IDs 

457 (`DataCoordinate`).""" 

458 return self._data_id 

459 

460 @property 

461 def release_id(self) -> int: 

462 """The release ID that will embedded in all generated IDs (`int`).""" 

463 return self._release_id 

464 

465 @property 

466 def catalog_id(self) -> int: 

467 # Docstring inherited. 

468 return self._packed_data_id + self._bits.n_data_ids * self._release_id 

469 

470 def __str__(self) -> str: 

471 # Docstring inherited. 

472 return str(self.data_id) 

473 

474 def make_table_id_factory(self) -> IdFactory: 

475 # Docstring inherited. 

476 return IdFactory.makeSource(self.catalog_id, self._bits.counter_bits) 

477 

478 def arange(self, *args, **kwargs) -> np.ndarray: 

479 # Docstring inherited. 

480 lower = super().arange(*args, **kwargs) 

481 if np.any(lower >= self._bits.n_counters): 

482 arg_terms = [repr(arg) for arg in args] + [f"{k}={v!r}" for k, v in kwargs.items()] 

483 raise ValueError( 

484 f"Integer range from numpy.arange({arg_terms}) has " 

485 f"{(lower >= self._bits.n_counters).sum()} values that are not " 

486 f"below the upper bound of {self._bits.n_counters}." 

487 ) 

488 return lower + self.catalog_id * self._bits.n_counters 

489 

490 

491@dataclasses.dataclass 

492class _IdGeneratorBits: 

493 """A private helper struct that manages the allocation of bits between the 

494 packed data ID, the release ID, and a per-catalog counter. 

495 """ 

496 

497 packer: DimensionPacker 

498 """Object that maps data IDs to integers 

499 (`lsst.daf.butler.DimensionPacker`). 

500 """ 

501 

502 n_releases: int = dataclasses.field(default=0) 

503 """Number of releases to reserve space for, starting from zero (`int`).""" 

504 

505 n_data_ids: int = dataclasses.field(init=False) 

506 """Number of contiguous packed data IDs to reserve space for, starting 

507 from zero (`int`). 

508 """ 

509 

510 counter_bits: int = dataclasses.field(init=False) 

511 """Number of bits allocated to the per-catalog counter (`int`).""" 

512 

513 n_counters: int = dataclasses.field(init=False) 

514 """Number of contiguous counter values to reserve space for, starting from 

515 zero (`int`).""" 

516 

517 def __post_init__(self) -> None: 

518 self.n_data_ids = 1 << self.packer.maxBits 

519 upper_bits = (self.n_releases - 1).bit_length() + self.packer.maxBits 

520 self.counter_bits = IdFactory.computeReservedFromMaxBits(upper_bits) 

521 self.n_counters = 1 << self.counter_bits