Coverage for python/lsst/meas/base/_id_generator.py: 64%
124 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:26 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:26 +0000
1# This file is part of meas_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "IdGenerator",
26 "FullIdGenerator",
27 "BaseIdGeneratorConfig",
28 "DetectorExposureIdGeneratorConfig",
29 "DetectorVisitIdGeneratorConfig",
30 "SkyMapIdGeneratorConfig",
31)
33import dataclasses
34from typing import Any, Callable
36import numpy as np
37from lsst.afw.table import IdFactory, Schema, SourceCatalog, SourceTable
38from lsst.daf.butler import DataCoordinate, DimensionPacker
39from lsst.obs.base import ExposureIdInfo
40from lsst.pex.config import Config, ConfigField, Field
41from lsst.pipe.base import Instrument
42from lsst.skymap.packers import SkyMapDimensionPacker
44DEFAULT_RELEASE_ID = 0
45"""Default release ID to embed in catalog IDs.
47This can be changed globally to avoid having to override individual task
48configs to set the release ID.
49"""
51DEFAULT_N_RELEASES = 1 # 1 means don't reserve space for releases.
52"""Default number of releases to reserve space for in catalog IDs."""
55class BaseIdGeneratorConfig(Config):
56 """Base class for configuration of `IdGenerator` instances.
58 This class is abstract (it cannot use `abc.ABCMeta` due to a metaclass
59 conflict), and it should mostly be considered an implementation detail
60 of how the attributes it defines are included in its concrete derived
61 classes. Derived classes must implemented `_make_dimension_packer`.
63 See `IdGenerator` for usage.
64 """
66 release_id = Field( 66 ↛ exitline 66 didn't jump to the function exit
67 doc=(
68 "Identifier for a data release or other version to embed in generated IDs. "
69 "Zero is reserved for IDs with no embedded release identifier."
70 ),
71 dtype=int,
72 default=DEFAULT_RELEASE_ID,
73 check=lambda x: x >= 0,
74 )
76 n_releases = Field( 76 ↛ exitline 76 didn't jump to the function exit
77 doc=(
78 "Number of (contiguous, starting from zero) `release_id` values to reserve space for. "
79 "One (not zero) is used to reserve no space."
80 ),
81 dtype=int,
82 default=DEFAULT_N_RELEASES,
83 check=lambda x: x > 0,
84 )
86 @classmethod
87 def make_field(
88 cls, doc="Configuration for how to generate catalog IDs from data IDs."
89 ):
90 """Return a config field that holds an instance of this class.
92 Parameters
93 ----------
94 doc : `str`, optional
95 Documentation for the config field. As this configuration almost
96 always plays the same role in any parent config, the default is
97 usually fine.
99 Returns
100 -------
101 field : `lsst.pex.config.ConfigField`
102 New config field for instances of this class.
104 Notes
105 -----
106 This method is provided as a convenience to reduce boilerplate
107 downstream: it typically saves an import or two, and it allows the same
108 usually-appropriate docstring to be reused instead of rewritten each
109 time. It does not need to be used in order to use this config class.
110 """
111 return ConfigField(doc, dtype=cls)
113 def apply(self, data_id: DataCoordinate, **kwargs: Any) -> IdGenerator:
114 """Construct an `IdGenerator` instance from this configuration.
116 Parameters
117 ----------
118 data_id : `DataCoordinate`
119 The data ID the `IdGenerator` will embed into all IDs. This
120 generally must be a fully-expanded data ID (i.e. have dimension
121 records attached), that identifies the "instrument" or "skymap"
122 dimension, though this requirement may be relaxed for certain
123 dimension packer types.
124 **kwargs
125 Additional keyword arguments are interpreted as dimension value
126 pairs to include in the data ID. This may be used to provide
127 constraints on dimensions for which records are not available.
129 Returns
130 -------
131 id_generator : `IdGenerator`
132 Object that generates integer IDs for catalogs and their rows by
133 embedding the given data ID and a configurably-optional release ID.
135 Notes
136 -----
137 This method is called `apply` for consistency with the pattern of using
138 `lsst.pex.config.ConfigurableField` and `lsst.pex.config.RegistryField`
139 to construct the objects whose configuration they hold. It doesn't
140 actually use those mechanisms because we have many config classes for
141 the one `IdGenerator` class, instead of the other way around, and as a
142 result a "config as factory" approach works better.
143 """
144 packer = self._make_dimension_packer(data_id)
145 return FullIdGenerator(
146 packer,
147 DataCoordinate.standardize(data_id, **kwargs, graph=packer.dimensions),
148 release_id=self.release_id,
149 n_releases=self.n_releases,
150 )
152 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker:
153 """Abstract hook for building a dimension packer from configuration.
155 Parameters
156 ----------
157 data_id : `DataCoordinate`
158 The data ID the `IdGenerator` will embed into all IDs. This
159 generally must be a fully-expanded data ID (i.e. have dimension
160 records attached), that identifies the "instrument" or "skymap"
161 dimension, though this requirement may be relaxed for certain
162 dimension packer types.
164 Returns
165 -------
166 packer : `lsst.daf.butler.DimensionPacker`
167 Object that packs data IDs into integers.
168 """
169 raise NotImplementedError("Method is abstract.")
172class DetectorExposureIdGeneratorConfig(BaseIdGeneratorConfig):
173 """Configuration class for generating integer IDs from
174 ``{exposure, detector}`` data IDs.
176 See `IdGenerator` for usage.
177 """
179 packer = Instrument.make_dimension_packer_config_field()
181 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker:
182 # Docstring inherited.
183 return self.packer.apply(data_id, is_exposure=True)
186class DetectorVisitIdGeneratorConfig(BaseIdGeneratorConfig):
187 """Configuration class for generating integer IDs from
188 ``{visit, detector}`` data IDs.
190 See `IdGenerator` for usage.
191 """
193 packer = Instrument.make_dimension_packer_config_field()
195 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker:
196 # Docstring inherited.
197 return self.packer.apply(data_id, is_exposure=False)
200class SkyMapIdGeneratorConfig(BaseIdGeneratorConfig):
201 """Configuration class for generating integer IDs from
202 ``{tract, patch, [band]}`` data IDs.
204 See `IdGenerator` for usage.
205 """
207 packer = SkyMapDimensionPacker.make_config_field()
209 def _make_dimension_packer(self, data_id: DataCoordinate) -> DimensionPacker:
210 # Docstring inherited.
211 return self.packer.apply(data_id)
214class IdGenerator:
215 """A helper class for packing some combination of a data ID, a per-data-ID
216 counter, and a release ID into a single 64-bit integer.
218 As an object frequently passed into code that otherwise has no knowledge of
219 its own data ID, `IdGenerator` also implements ``__str__`` to provide a
220 human-readable representation of the data ID for use in logs and exception
221 messages, with a suitable fallback when no data ID was provided to it.
223 Notes
224 -----
225 Instances of this class are expected to usually be created via
226 configuration, which will return a derived instance. This pattern starts
227 with one of `DetectorExposureIdGeneratorConfig`,
228 `DetectorVisitIdGeneratorConfig`, and `SkyMapIdGeneratorConfig` (which have
229 the same interface), and looks something this:
231 from lsst.meas.base import DetectorVisitIdGeneratorConfig
232 from lsst.pex.config import Config
233 from lsst.pipe.base import PipelineTask
235 class SomeTaskConfig(PipelineTaskConfig, ...):
236 id_generator = DetectorVisitIdGeneratorConfig.make_field()
238 class SomeTask(PipelineTaskTask):
240 ConfigClass = SomeTaskConfig
242 ...
244 def runQuantum(self, ..., data_id: DataCoordinate):
245 id_generator = self.config.apply(data_id)
246 catalog = id_generator.make_source_catalog(self.schema) ...
248 There is no requirement that `IdGenerator` instances be constructed in
249 `PipelineTask.runQuantum` methods and passed to the ``run`` method, but
250 this is the most common approach.
252 Code that wishes to instead unpack these record IDs to obtain the release
253 ID, data ID and counter value should use the same config (often loaded from
254 the ``Butler``) and pass a fully-expanded data ID identifying only a
255 particular ``skymap`` or ``instrument`` to `unpacker_from_config`::
257 config = butler.get("some_task_config")
258 catalog = butler.get("some_output_catalog", given_data_id)
259 unpacker = IdGenerator.unpacker_from_config(
260 config.id_generator, butler.registry.expandDataId(skymap="HSC"),
261 )
262 release_id, embedded_data_id, counter = unpacker(catalog[0]["id"])
263 assert embedded_data_id == given_data_id
265 This example is a bit contrived, as the ability to reconstruct the data ID
266 is really only useful when you don't have it already, such as when the
267 record ID is obtained from some further-processed version of the original
268 table (such as a SQL database), and in that context the right config to
269 load will not be obvious unless it has been carefully documented.
271 Simple instances of the base class that do not include a data ID may also
272 be constructed by calling the constructor directly::
274 id_generator = IdGenerator()
276 These IDs may not be unpacked, but they also don't need to be, because
277 they're just the per-catalog "counter" integer already.
279 See Also
280 --------
281 :ref:`lsst.meas.base-generating-source-and-object-ids`
282 """
284 # TODO: remove this method on DM-38687.
285 # No deprecation decorator here because the type this method accepts is
286 # itself deprecated, so it's only going to be called by code paths that
287 # will go away when the deprecation turns into a removal, and which already
288 # warn.
289 @staticmethod
290 def _from_exposure_id_info(exposure_id_info: ExposureIdInfo) -> IdGenerator:
291 """Construct a new ID generator from the object this class supersedes.
293 This method is deprecated along with the type it accepts; it's provided
294 only as a temporary helper to aid in the transition from
295 `lsst.obs.base.ExposureIdInfo` to `IdGenerator`.
296 """
297 return _ExposureIdInfoIdGenerator(exposure_id_info)
299 @property
300 def catalog_id(self) -> int:
301 """The integer identifier for the full catalog with this data ID, not
302 just one of its rows (`int`).
304 This combines the packed data ID and release ID, but not the
305 counter.
306 """
307 return 0
309 def __str__(self) -> str:
310 """Return a human-readable representation of the data ID (or a note
311 about its absence) for use in log and error messages.
312 """
313 return "[no data ID]"
315 def make_table_id_factory(self) -> IdFactory:
316 """Construct a new `lsst.afw.table.IdFactory` for this catalog."""
317 return IdFactory.makeSimple()
319 def make_source_catalog(self, schema: Schema) -> SourceCatalog:
320 """Construct a empty catalog object with an ID factory.
322 This is a convenience function for the common pattern of calling
323 `make_table_id_factory`, constructing a `~lsst.afw.table.SourceTable`
324 from that, and then constructing an (empty)
325 `~lsst.afw.table.SourceCatalog` from that.
326 """
327 table = SourceTable.make(schema, self.make_table_id_factory())
328 return SourceCatalog(table)
330 def arange(self, *args, **kwargs) -> np.ndarray:
331 """Generate an array of integer IDs for this catalog.
333 All parameters are forwarded to `numpy.arange` to generate an array of
334 per-catalog counter integers. These are then combined with the
335 `catalog_id`` to form the returned array.
337 The IDs generated by `arange` will be equivalent to those generated by
338 `make_table_id_factory` (and by extension, `make_source_catalog`) only
339 if the counter integers start with ``1``, not ``0``, because that's
340 what `~lsst.afw.table.IdFactory` does.
341 """
342 return np.arange(*args, **kwargs)
344 @classmethod
345 def unpacker_from_config(
346 cls,
347 config: BaseIdGeneratorConfig,
348 fixed: DataCoordinate,
349 ) -> Callable[[int], tuple[DataCoordinate, int]]:
350 """Return a callable that unpacks the IDs generated by this class,
351 from a config field.
353 Parameters
354 ----------
355 config : `BaseIdGeneratorConfig`
356 Configuration for an ID generator.
357 fixed : `DataCoordinate`
358 Data ID identifying the dimensions that are considered fixed by the
359 `IdGenerator` that produced the IDs: usually just ``instrument`` or
360 ``skymap``, depending on the configuration. For most configurations
361 this will need to be a fully-expanded data ID.
363 Returns
364 -------
365 unpacker
366 Callable that takes a single `int` argument (an ID generated by an
367 identically-configured `IdGenerator`) and returns a tuple of:
369 - release_id: the integer that identifies a data release or
370 similar (`int`);
371 - data_id : the data ID used to initialize the original ID
372 generator (`DataCoordinate`);
373 - counter : the counter part of the original ID (`int`).
375 Notes
376 -----
377 This method cannot be used on IDs generated without a data ID.
378 """
379 packer = config._make_dimension_packer(fixed)
380 return cls.unpacker_from_dimension_packer(packer, config.n_releases)
382 @classmethod
383 def unpacker_from_dimension_packer(
384 cls,
385 dimension_packer: DimensionPacker,
386 n_releases: int = DEFAULT_N_RELEASES,
387 ) -> Callable[[int], tuple[int, DataCoordinate, int]]:
388 """Return a callable that unpacks the IDs generated by this class,
389 from a `lsst.daf.butler.DimensionPacker` instance.
391 Parameters
392 ----------
393 dimension_packer : `lsst.daf.butler.DimensionPacker`
394 Dimension packer used to construct the original
395 `DimensionPackerIdGenerator`.
396 n_releases : `int`, optional
397 Number of (contiguous, starting from zero) ``release_id`` values to
398 reserve space for. One (not zero) is used to reserve no space.
400 Returns
401 -------
402 unpacker
403 Callable that takes a single `int` argument (an ID generated by an
404 identically-constructed `DimensionPackerIdGenerator`) and returns a
405 tuple of:
407 - release_id: the integer that identifies a data release or
408 similar (`int`);
409 - data_id : the data ID used to initialize the original ID
410 generator (`DataCoordinate`);
411 - counter : the counter part of the original ID (`int`).
413 Notes
414 -----
415 This method cannot be used on IDs generated with no data ID.
416 """
417 bits = _IdGeneratorBits(dimension_packer, n_releases)
419 def unpack(record_id: int) -> tuple[int, DataCoordinate, int]:
420 rest, counter = divmod(record_id, bits.n_counters)
421 rest, packed_data_id = divmod(rest, bits.n_data_ids)
422 rest, release_id = divmod(rest, bits.n_data_ids)
423 if rest:
424 raise ValueError(
425 f"Unexpected overall factor {rest} in record_id {record_id}, "
426 f"after extracting packed_data_id={packed_data_id}, counter={counter}, and "
427 f"release_id={release_id}."
428 )
429 data_id = bits.packer.unpack(packed_data_id)
430 return release_id, data_id, counter
432 return unpack
435class FullIdGenerator(IdGenerator):
436 """The subclass of `IdGenerator` that actually includes packed data IDs
437 and release IDs in its generated IDs.
439 Parameters
440 ----------
441 dimension_packer : `lsst.daf.butler.DimensionPacker`
442 Object that packs data IDs into integers.
443 data_id : `lsst.daf.butler.DataCoordinate`
444 Data ID to embed in all generated IDs and random seeds.
445 release_id : `int`, optional
446 Release identifier to embed in generated IDs.
447 n_releases : `int`, optional
448 Number of (contiguous, starting from zero) `release_id` values to
449 reserve space for. One (not zero) is used to reserve no space.
451 Notes
452 -----
453 Instances of this class should usually be constructed via configuration
454 instead of by calling the constructor directly; see `IdGenerator` for
455 details.
456 """
458 def __init__(
459 self,
460 dimension_packer: DimensionPacker,
461 data_id: DataCoordinate,
462 release_id: int = DEFAULT_RELEASE_ID,
463 n_releases: int = DEFAULT_N_RELEASES,
464 ):
465 self._bits = _IdGeneratorBits(dimension_packer, n_releases)
466 self._release_id = release_id
467 self._data_id = data_id.subset(self._bits.packer.dimensions)
468 self._packed_data_id = self._bits.packer.pack(self._data_id)
470 @property
471 def data_id(self) -> DataCoordinate:
472 """The data ID that will be embedded in all generated IDs
473 (`DataCoordinate`)."""
474 return self._data_id
476 @property
477 def release_id(self) -> int:
478 """The release ID that will embedded in all generated IDs (`int`)."""
479 return self._release_id
481 @property
482 def catalog_id(self) -> int:
483 # Docstring inherited.
484 return self._packed_data_id + self._bits.n_data_ids * self._release_id
486 def __str__(self) -> str:
487 # Docstring inherited.
488 return str(self.data_id)
490 def make_table_id_factory(self) -> IdFactory:
491 # Docstring inherited.
492 return IdFactory.makeSource(self.catalog_id, self._bits.counter_bits)
494 def arange(self, *args, **kwargs) -> np.ndarray:
495 # Docstring inherited.
496 lower = super().arange(*args, **kwargs)
497 if np.any(lower >= self._bits.n_counters):
498 arg_terms = [repr(arg) for arg in args] + [f"{k}={v!r}" for k, v in kwargs.items()]
499 raise ValueError(
500 f"Integer range from numpy.arange({arg_terms}) has "
501 f"{(lower >= self._bits.n_counters).sum()} values that are not "
502 f"below the upper bound of {self._bits.n_counters}."
503 )
504 return lower + self.catalog_id * self._bits.n_counters
507@dataclasses.dataclass
508class _IdGeneratorBits:
509 """A private helper struct that manages the allocation of bits between the
510 packed data ID, the release ID, and a per-catalog counter.
511 """
513 packer: DimensionPacker
514 """Object that maps data IDs to integers
515 (`lsst.daf.butler.DimensionPacker`).
516 """
518 n_releases: int = dataclasses.field(default=0)
519 """Number of releases to reserve space for, starting from zero (`int`)."""
521 n_data_ids: int = dataclasses.field(init=False)
522 """Number of contiguous packed data IDs to reserve space for, starting
523 from zero (`int`).
524 """
526 counter_bits: int = dataclasses.field(init=False)
527 """Number of bits allocated to the per-catalog counter (`int`)."""
529 n_counters: int = dataclasses.field(init=False)
530 """Number of contiguous counter values to reserve space for, starting from
531 zero (`int`)."""
533 def __post_init__(self) -> None:
534 self.n_data_ids = 1 << self.packer.maxBits
535 upper_bits = (self.n_releases - 1).bit_length() + self.packer.maxBits
536 self.counter_bits = IdFactory.computeReservedFromMaxBits(upper_bits)
537 self.n_counters = 1 << self.counter_bits
540# TODO: remove this method on DM-38687.
541# No deprecation decorator here because the type this class holds is itself
542# deprecated, so it's only going to be called by code paths that will go away
543# when the deprecation turns into a removal, and which already warn.
544class _ExposureIdInfoIdGenerator(IdGenerator):
545 """A `IdGenerator` implementation to aid in the transition from
546 `lsst.obs.base.ExposureIdInfo`.
547 """
549 def __init__(self, exposure_id_info: ExposureIdInfo):
550 self._exposure_id_info = exposure_id_info
552 @property
553 def catalog_id(self) -> int:
554 # Docstring inherited.
555 return self._exposure_id_info.expId
557 def __str__(self) -> str:
558 return str(self.catalog_id)
560 def make_table_id_factory(self) -> IdFactory:
561 # Docstring inherited.
562 return self._exposure_id_info.makeSourceIdFactory()
564 def arange(self, *args, **kwargs) -> np.ndarray:
565 # Docstring inherited.
566 raise NotImplementedError(
567 "This IdGenerator implementation does not support arange; "
568 "please update to IdGenerator.from_config for a full-featured implementation."
569 )