22from __future__
import annotations
27 "BaseIdGeneratorConfig",
28 "DetectorExposureIdGeneratorConfig",
29 "DetectorVisitIdGeneratorConfig",
30 "SkyMapIdGeneratorConfig",
34from typing
import Any, Callable
37from lsst.afw.table import IdFactory, Schema, SourceCatalog, SourceTable
38from lsst.daf.butler
import DataCoordinate, DimensionPacker
39from lsst.obs.base
import ExposureIdInfo
42from lsst.skymap.packers
import SkyMapDimensionPacker
45"""Default release ID to embed in catalog IDs.
47This can be changed globally to avoid having to override individual task
48configs to set the release ID.
52"""Default number of releases to reserve space for in catalog IDs."""
56 """Base class for configuration of `IdGenerator` instances.
58 This class is abstract (it cannot use `abc.ABCMeta` due to a metaclass
59 conflict), and it should mostly be considered an implementation detail
60 of how the attributes it defines are included in its concrete derived
61 classes. Derived classes must implemented `_make_dimension_packer`.
63 See `IdGenerator` for usage.
68 "Identifier for a data release or other version to embed in generated IDs. "
69 "Zero is reserved for IDs with no embedded release identifier."
72 default=DEFAULT_RELEASE_ID,
73 check=
lambda x: x >= 0,
78 "Number of (contiguous, starting from zero) `release_id` values to reserve space for. "
79 "One (not zero) is used to reserve no space."
82 default=DEFAULT_N_RELEASES,
83 check=
lambda x: x > 0,
88 cls, doc="Configuration for how to generate catalog IDs from data IDs."
90 """Return a config field that holds an instance of this class.
95 Documentation for the config field. As this configuration almost
96 always plays the same role in any parent config, the default is
101 field : `lsst.pex.config.ConfigField`
102 New config field for instances of this class.
106 This method is provided as a convenience to reduce boilerplate
107 downstream: it typically saves an import or two, and it allows the same
108 usually-appropriate docstring to be reused instead of rewritten each
109 time. It does not need to be used in order to use this config class.
111 return ConfigField(doc, dtype=cls)
113 def apply(self, data_id: DataCoordinate, **kwargs: Any) -> IdGenerator:
114 """Construct an `IdGenerator` instance from this configuration.
118 data_id : `DataCoordinate`
119 The data ID the `IdGenerator` will embed into all IDs. This
120 generally must be a fully-expanded data ID (i.e. have dimension
121 records attached), that identifies the "instrument" or "skymap"
122 dimension, though this requirement may be relaxed for certain
123 dimension packer types.
125 Additional keyword arguments are interpreted as dimension value
126 pairs to include in the data ID. This may be used to provide
127 constraints on dimensions for which records are not available.
131 id_generator : `IdGenerator`
132 Object that generates integer IDs for catalogs and their rows by
133 embedding the given data ID and a configurably-optional release ID.
137 This method is called `apply` for consistency with the pattern of using
138 `lsst.pex.config.ConfigurableField` and `lsst.pex.config.RegistryField`
139 to construct the objects whose configuration they hold. It doesn't
140 actually use those mechanisms because we have many config classes for
141 the one `IdGenerator` class, instead of the other way around, and as a
142 result a "config as factory" approach works better.
147 DataCoordinate.standardize(data_id, **kwargs, dimensions=packer.dimensions),
153 """Abstract hook for building a dimension packer from configuration.
157 data_id : `DataCoordinate`
158 The data ID the `IdGenerator` will embed into all IDs. This
159 generally must be a fully-expanded data ID (i.e. have dimension
160 records attached), that identifies the "instrument" or "skymap"
161 dimension, though this requirement may be relaxed for certain
162 dimension packer types.
166 packer : `lsst.daf.butler.DimensionPacker`
167 Object that packs data IDs into integers.
173 """Configuration class for generating integer IDs from
174 ``{exposure, detector}`` data IDs.
176 See `IdGenerator` for usage.
179 packer = Instrument.make_dimension_packer_config_field()
183 return self.
packer.
apply(data_id, is_exposure=
True)
187 """Configuration class for generating integer IDs from
188 ``{visit, detector}`` data IDs.
190 See `IdGenerator` for usage.
193 packer = Instrument.make_dimension_packer_config_field()
197 return self.
packer.
apply(data_id, is_exposure=
False)
201 """Configuration class for generating integer IDs from
202 ``{tract, patch, [band]}`` data IDs.
204 See `IdGenerator` for usage.
207 packer = SkyMapDimensionPacker.make_config_field()
215 """A helper class for packing some combination of a data ID, a per-data-ID
216 counter, and a release ID into a single 64-bit integer.
218 As an object frequently passed into code that otherwise has no knowledge of
219 its own data ID, `IdGenerator` also implements ``__str__`` to provide a
220 human-readable representation of the data ID for use in logs and exception
221 messages, with a suitable fallback when no data ID was provided to it.
225 Instances of this class are expected to usually be created via
226 configuration, which will return a derived instance. This pattern starts
227 with one of `DetectorExposureIdGeneratorConfig`,
228 `DetectorVisitIdGeneratorConfig`, and `SkyMapIdGeneratorConfig` (which have
229 the same interface), and looks something this:
231 from lsst.meas.base import DetectorVisitIdGeneratorConfig
232 from lsst.pex.config import Config
233 from lsst.pipe.base import PipelineTask
235 class SomeTaskConfig(PipelineTaskConfig, ...):
236 id_generator = DetectorVisitIdGeneratorConfig.make_field()
238 class SomeTask(PipelineTaskTask):
240 ConfigClass = SomeTaskConfig
244 def runQuantum(self, ..., data_id: DataCoordinate):
245 id_generator = self.config.apply(data_id)
246 catalog = id_generator.make_source_catalog(self.schema) ...
248 There is no requirement that `IdGenerator` instances be constructed in
249 `PipelineTask.runQuantum` methods and passed to the ``run`` method, but
250 this is the most common approach.
252 Code that wishes to instead unpack these record IDs to obtain the release
253 ID, data ID and counter value should use the same config (often loaded from
254 the ``Butler``) and pass a fully-expanded data ID identifying only a
255 particular ``skymap`` or ``instrument`` to `unpacker_from_config`::
257 config = butler.get("some_task_config")
258 catalog = butler.get("some_output_catalog", given_data_id)
259 unpacker = IdGenerator.unpacker_from_config(
260 config.id_generator, butler.registry.expandDataId(skymap="HSC"),
262 release_id, embedded_data_id, counter = unpacker(catalog[0]["id"])
263 assert embedded_data_id == given_data_id
265 This example is a bit contrived, as the ability to reconstruct the data ID
266 is really only useful when you don't have it already, such as when the
267 record ID is obtained from some further-processed version of the original
268 table (such as a SQL database), and in that context the right config to
269 load will not be obvious unless it has been carefully documented.
271 Simple instances of the base class that do not include a data ID may also
272 be constructed by calling the constructor directly::
274 id_generator = IdGenerator()
276 These IDs may not be unpacked, but they also don't need to be, because
277 they're just the per-catalog "counter" integer already.
281 :ref:`lsst.meas.base-generating-source-and-object-ids`
291 """Construct a new ID generator from the object this class supersedes.
293 This method is deprecated along with the type it accepts; it's provided
294 only as a temporary helper to aid in the transition from
295 `lsst.obs.base.ExposureIdInfo` to `IdGenerator`.
301 """The integer identifier for the full catalog with this data ID, not
302 just one of its rows (`int`).
304 This combines the packed data ID and release ID, but not the
310 """Return a human-readable representation of the data ID (or a note
311 about its absence) for use in log and error messages.
313 return "[no data ID]"
316 """Construct a new `lsst.afw.table.IdFactory` for this catalog."""
317 return IdFactory.makeSimple()
320 """Construct a empty catalog object with an ID factory.
322 This is a convenience function for the common pattern of calling
323 `make_table_id_factory`, constructing a `~lsst.afw.table.SourceTable`
324 from that, and then constructing an (empty)
325 `~lsst.afw.table.SourceCatalog` from that.
328 return SourceCatalog(table)
330 def arange(self, *args, **kwargs) -> np.ndarray:
331 """Generate an array of integer IDs for this catalog.
333 All parameters are forwarded to `numpy.arange` to generate an array of
334 per-catalog counter integers. These are then combined with the
335 `catalog_id`` to form the returned array.
337 The IDs generated by `arange` will be equivalent to those generated by
338 `make_table_id_factory` (and by extension, `make_source_catalog`) only
339 if the counter integers start with ``1``, not ``0``, because that's
340 what `~lsst.afw.table.IdFactory` does.
342 return np.arange(*args, **kwargs)
347 config: BaseIdGeneratorConfig,
348 fixed: DataCoordinate,
349 ) -> Callable[[int], tuple[DataCoordinate, int]]:
350 """Return a callable that unpacks the IDs generated by this class,
355 config : `BaseIdGeneratorConfig`
356 Configuration for an ID generator.
357 fixed : `DataCoordinate`
358 Data ID identifying the dimensions that are considered fixed by the
359 `IdGenerator` that produced the IDs: usually just ``instrument`` or
360 ``skymap``, depending on the configuration. For most configurations
361 this will need to be a fully-expanded data ID.
366 Callable that takes a single `int` argument (an ID generated by an
367 identically-configured `IdGenerator`) and returns a tuple of:
369 - release_id: the integer that identifies a data release or
371 - data_id : the data ID used to initialize the original ID
372 generator (`DataCoordinate`);
373 - counter : the counter part of the original ID (`int`).
377 This method cannot be used on IDs generated without a data ID.
379 packer = config._make_dimension_packer(fixed)
385 dimension_packer: DimensionPacker,
386 n_releases: int = DEFAULT_N_RELEASES,
387 ) -> Callable[[int], tuple[int, DataCoordinate, int]]:
388 """Return a callable that unpacks the IDs generated by this class,
389 from a `lsst.daf.butler.DimensionPacker` instance.
393 dimension_packer : `lsst.daf.butler.DimensionPacker`
394 Dimension packer used to construct the original
395 `DimensionPackerIdGenerator`.
396 n_releases : `int`, optional
397 Number of (contiguous, starting from zero) ``release_id`` values to
398 reserve space for. One (not zero) is used to reserve no space.
403 Callable that takes a single `int` argument (an ID generated by an
404 identically-constructed `DimensionPackerIdGenerator`) and returns a
407 - release_id: the integer that identifies a data release or
409 - data_id : the data ID used to initialize the original ID
410 generator (`DataCoordinate`);
411 - counter : the counter part of the original ID (`int`).
415 This method cannot be used on IDs generated with no data ID.
419 def unpack(record_id: int) -> tuple[int, DataCoordinate, int]:
420 rest, counter =
divmod(record_id, bits.n_counters)
421 rest, packed_data_id =
divmod(rest, bits.n_data_ids)
422 rest, release_id =
divmod(rest, bits.n_data_ids)
425 f
"Unexpected overall factor {rest} in record_id {record_id}, "
426 f
"after extracting packed_data_id={packed_data_id}, counter={counter}, and "
427 f
"release_id={release_id}."
429 data_id = bits.packer.unpack(packed_data_id)
430 return release_id, data_id, counter
436 """The subclass of `IdGenerator` that actually includes packed data IDs
437 and release IDs in its generated IDs.
441 dimension_packer : `lsst.daf.butler.DimensionPacker`
442 Object that packs data IDs into integers.
443 data_id : `lsst.daf.butler.DataCoordinate`
444 Data ID to embed in all generated IDs and random seeds.
445 release_id : `int`, optional
446 Release identifier to embed in generated IDs.
447 n_releases : `int`, optional
448 Number of (contiguous, starting from zero) `release_id` values to
449 reserve space for. One (not zero) is used to reserve no space.
453 Instances of this class should usually be constructed via configuration
454 instead of by calling the constructor directly; see `IdGenerator` for
460 dimension_packer: DimensionPacker,
461 data_id: DataCoordinate,
462 release_id: int = DEFAULT_RELEASE_ID,
463 n_releases: int = DEFAULT_N_RELEASES,
472 """The data ID that will be embedded in all generated IDs
473 (`DataCoordinate`)."""
478 """The release ID that will embedded in all generated IDs (`int`)."""
494 def arange(self, *args, **kwargs) -> np.ndarray:
497 if np.any(lower >= self.
_bits.n_counters):
498 arg_terms = [
repr(arg)
for arg
in args] + [f
"{k}={v!r}" for k, v
in kwargs.items()]
500 f
"Integer range from numpy.arange({arg_terms}) has "
501 f
"{(lower >= self._bits.n_counters).sum()} values that are not "
502 f
"below the upper bound of {self._bits.n_counters}."
507@dataclasses.dataclass
509 """A private helper struct that manages the allocation of bits between the
510 packed data ID, the release ID, and a per-catalog counter.
513 packer: DimensionPacker
514 """Object that maps data IDs to integers
515 (`lsst.daf.butler.DimensionPacker`).
518 n_releases: int = dataclasses.field(default=0)
519 """Number of releases to reserve space for, starting from zero (`int`)."""
521 n_data_ids: int = dataclasses.field(init=
False)
522 """Number of contiguous packed data IDs to reserve space for, starting
526 counter_bits: int = dataclasses.field(init=
False)
527 """Number of bits allocated to the per-catalog counter (`int`)."""
529 n_counters: int = dataclasses.field(init=
False)
530 """Number of contiguous counter values to reserve space for, starting from
545 """A `IdGenerator` implementation to aid in the transition from
546 `lsst.obs.base.ExposureIdInfo`.
549 def __init__(self, exposure_id_info: ExposureIdInfo):
564 def arange(self, *args, **kwargs) -> np.ndarray:
567 "This IdGenerator implementation does not support arange; "
568 "please update to IdGenerator.from_config for a full-featured implementation."
IdFactory make_table_id_factory(self)
np.ndarray arange(self, *args, **kwargs)
__init__(self, ExposureIdInfo exposure_id_info)
IdGenerator apply(self, DataCoordinate data_id, **Any kwargs)
DimensionPacker _make_dimension_packer(self, DataCoordinate data_id)
make_field(cls, doc="Configuration for how to generate catalog IDs from data IDs.")
DimensionPacker _make_dimension_packer(self, DataCoordinate data_id)
DimensionPacker _make_dimension_packer(self, DataCoordinate data_id)
__init__(self, DimensionPacker dimension_packer, DataCoordinate data_id, int release_id=DEFAULT_RELEASE_ID, int n_releases=DEFAULT_N_RELEASES)
np.ndarray arange(self, *args, **kwargs)
IdFactory make_table_id_factory(self)
DataCoordinate data_id(self)
SourceCatalog make_source_catalog(self, Schema schema)
np.ndarray arange(self, *args, **kwargs)
IdGenerator _from_exposure_id_info(ExposureIdInfo exposure_id_info)
Callable[[int], tuple[int, DataCoordinate, int]] unpacker_from_dimension_packer(cls, DimensionPacker dimension_packer, int n_releases=DEFAULT_N_RELEASES)
IdFactory make_table_id_factory(self)
Callable[[int], tuple[DataCoordinate, int]] unpacker_from_config(cls, BaseIdGeneratorConfig config, DataCoordinate fixed)
DimensionPacker _make_dimension_packer(self, DataCoordinate data_id)