22from __future__
import annotations
27 "BaseIdGeneratorConfig",
28 "DetectorExposureIdGeneratorConfig",
29 "DetectorVisitIdGeneratorConfig",
30 "SkyMapIdGeneratorConfig",
34from typing
import Any, Callable
37from lsst.afw.table import IdFactory, Schema, SourceCatalog, SourceTable
38from lsst.daf.butler
import DataCoordinate, DimensionPacker
41from lsst.skymap.packers
import SkyMapDimensionPacker
44"""Default release ID to embed in catalog IDs.
46This can be changed globally to avoid having to override individual task
47configs to set the release ID.
51"""Default number of releases to reserve space for in catalog IDs."""
55 """Base class for configuration of `IdGenerator` instances.
57 This class is abstract (it cannot use `abc.ABCMeta` due to a metaclass
58 conflict), and it should mostly be considered an implementation detail
59 of how the attributes it defines are included in its concrete derived
60 classes. Derived classes must implemented `_make_dimension_packer`.
62 See `IdGenerator` for usage.
67 "Identifier for a data release or other version to embed in generated IDs. "
68 "Zero is reserved for IDs with no embedded release identifier."
71 default=DEFAULT_RELEASE_ID,
72 check=
lambda x: x >= 0,
77 "Number of (contiguous, starting from zero) `release_id` values to reserve space for. "
78 "One (not zero) is used to reserve no space."
81 default=DEFAULT_N_RELEASES,
82 check=
lambda x: x > 0,
87 cls, doc="Configuration for how to generate catalog IDs from data IDs."
89 """Return a config field that holds an instance of this class.
94 Documentation for the config field. As this configuration almost
95 always plays the same role in any parent config, the default is
100 field : `lsst.pex.config.ConfigField`
101 New config field for instances of this class.
105 This method is provided as a convenience to reduce boilerplate
106 downstream: it typically saves an import or two, and it allows the same
107 usually-appropriate docstring to be reused instead of rewritten each
108 time. It does not need to be used in order to use this config class.
110 return ConfigField(doc, dtype=cls)
112 def apply(self, data_id: DataCoordinate, **kwargs: Any) -> IdGenerator:
113 """Construct an `IdGenerator` instance from this configuration.
117 data_id : `DataCoordinate`
118 The data ID the `IdGenerator` will embed into all IDs. This
119 generally must be a fully-expanded data ID (i.e. have dimension
120 records attached), that identifies the "instrument" or "skymap"
121 dimension, though this requirement may be relaxed for certain
122 dimension packer types.
124 Additional keyword arguments are interpreted as dimension value
125 pairs to include in the data ID. This may be used to provide
126 constraints on dimensions for which records are not available.
130 id_generator : `IdGenerator`
131 Object that generates integer IDs for catalogs and their rows by
132 embedding the given data ID and a configurably-optional release ID.
136 This method is called `apply` for consistency with the pattern of using
137 `lsst.pex.config.ConfigurableField` and `lsst.pex.config.RegistryField`
138 to construct the objects whose configuration they hold. It doesn't
139 actually use those mechanisms because we have many config classes for
140 the one `IdGenerator` class, instead of the other way around, and as a
141 result a "config as factory" approach works better.
146 DataCoordinate.standardize(data_id, **kwargs, dimensions=packer.dimensions),
152 """Abstract hook for building a dimension packer from configuration.
156 data_id : `DataCoordinate`
157 The data ID the `IdGenerator` will embed into all IDs. This
158 generally must be a fully-expanded data ID (i.e. have dimension
159 records attached), that identifies the "instrument" or "skymap"
160 dimension, though this requirement may be relaxed for certain
161 dimension packer types.
165 packer : `lsst.daf.butler.DimensionPacker`
166 Object that packs data IDs into integers.
172 """Configuration class for generating integer IDs from
173 ``{exposure, detector}`` data IDs.
175 See `IdGenerator` for usage.
178 packer = Instrument.make_dimension_packer_config_field()
182 return self.
packer.
apply(data_id, is_exposure=
True)
186 """Configuration class for generating integer IDs from
187 ``{visit, detector}`` data IDs.
189 See `IdGenerator` for usage.
192 packer = Instrument.make_dimension_packer_config_field()
196 return self.
packer.
apply(data_id, is_exposure=
False)
200 """Configuration class for generating integer IDs from
201 ``{tract, patch, [band]}`` data IDs.
203 See `IdGenerator` for usage.
206 packer = SkyMapDimensionPacker.make_config_field()
214 """A helper class for packing some combination of a data ID, a per-data-ID
215 counter, and a release ID into a single 64-bit integer.
217 As an object frequently passed into code that otherwise has no knowledge of
218 its own data ID, `IdGenerator` also implements ``__str__`` to provide a
219 human-readable representation of the data ID for use in logs and exception
220 messages, with a suitable fallback when no data ID was provided to it.
224 Instances of this class are expected to usually be created via
225 configuration, which will return a derived instance. This pattern starts
226 with one of `DetectorExposureIdGeneratorConfig`,
227 `DetectorVisitIdGeneratorConfig`, and `SkyMapIdGeneratorConfig` (which have
228 the same interface), and looks something this:
230 from lsst.meas.base import DetectorVisitIdGeneratorConfig
231 from lsst.pex.config import Config
232 from lsst.pipe.base import PipelineTask
234 class SomeTaskConfig(PipelineTaskConfig, ...):
235 id_generator = DetectorVisitIdGeneratorConfig.make_field()
237 class SomeTask(PipelineTaskTask):
239 ConfigClass = SomeTaskConfig
243 def runQuantum(self, ..., data_id: DataCoordinate):
244 id_generator = self.config.apply(data_id)
245 catalog = id_generator.make_source_catalog(self.schema) ...
247 There is no requirement that `IdGenerator` instances be constructed in
248 `PipelineTask.runQuantum` methods and passed to the ``run`` method, but
249 this is the most common approach.
251 Code that wishes to instead unpack these record IDs to obtain the release
252 ID, data ID and counter value should use the same config (often loaded from
253 the ``Butler``) and pass a fully-expanded data ID identifying only a
254 particular ``skymap`` or ``instrument`` to `unpacker_from_config`::
256 config = butler.get("some_task_config")
257 catalog = butler.get("some_output_catalog", given_data_id)
258 unpacker = IdGenerator.unpacker_from_config(
259 config.id_generator, butler.registry.expandDataId(skymap="HSC"),
261 release_id, embedded_data_id, counter = unpacker(catalog[0]["id"])
262 assert embedded_data_id == given_data_id
264 This example is a bit contrived, as the ability to reconstruct the data ID
265 is really only useful when you don't have it already, such as when the
266 record ID is obtained from some further-processed version of the original
267 table (such as a SQL database), and in that context the right config to
268 load will not be obvious unless it has been carefully documented.
270 Simple instances of the base class that do not include a data ID may also
271 be constructed by calling the constructor directly::
273 id_generator = IdGenerator()
275 These IDs may not be unpacked, but they also don't need to be, because
276 they're just the per-catalog "counter" integer already.
280 :ref:`lsst.meas.base-generating-source-and-object-ids`
285 """The integer identifier for the full catalog with this data ID, not
286 just one of its rows (`int`).
288 This combines the packed data ID and release ID, but not the
294 """Return a human-readable representation of the data ID (or a note
295 about its absence) for use in log and error messages.
297 return "[no data ID]"
300 """Construct a new `lsst.afw.table.IdFactory` for this catalog."""
301 return IdFactory.makeSimple()
304 """Construct a empty catalog object with an ID factory.
306 This is a convenience function for the common pattern of calling
307 `make_table_id_factory`, constructing a `~lsst.afw.table.SourceTable`
308 from that, and then constructing an (empty)
309 `~lsst.afw.table.SourceCatalog` from that.
312 return SourceCatalog(table)
314 def arange(self, *args, **kwargs) -> np.ndarray:
315 """Generate an array of integer IDs for this catalog.
317 All parameters are forwarded to `numpy.arange` to generate an array of
318 per-catalog counter integers. These are then combined with the
319 `catalog_id`` to form the returned array.
321 The IDs generated by `arange` will be equivalent to those generated by
322 `make_table_id_factory` (and by extension, `make_source_catalog`) only
323 if the counter integers start with ``1``, not ``0``, because that's
324 what `~lsst.afw.table.IdFactory` does.
326 return np.arange(*args, **kwargs)
331 config: BaseIdGeneratorConfig,
332 fixed: DataCoordinate,
333 ) -> Callable[[int], tuple[DataCoordinate, int]]:
334 """Return a callable that unpacks the IDs generated by this class,
339 config : `BaseIdGeneratorConfig`
340 Configuration for an ID generator.
341 fixed : `DataCoordinate`
342 Data ID identifying the dimensions that are considered fixed by the
343 `IdGenerator` that produced the IDs: usually just ``instrument`` or
344 ``skymap``, depending on the configuration. For most configurations
345 this will need to be a fully-expanded data ID.
350 Callable that takes a single `int` argument (an ID generated by an
351 identically-configured `IdGenerator`) and returns a tuple of:
353 - release_id: the integer that identifies a data release or
355 - data_id : the data ID used to initialize the original ID
356 generator (`DataCoordinate`);
357 - counter : the counter part of the original ID (`int`).
361 This method cannot be used on IDs generated without a data ID.
363 packer = config._make_dimension_packer(fixed)
369 dimension_packer: DimensionPacker,
370 n_releases: int = DEFAULT_N_RELEASES,
371 ) -> Callable[[int], tuple[int, DataCoordinate, int]]:
372 """Return a callable that unpacks the IDs generated by this class,
373 from a `lsst.daf.butler.DimensionPacker` instance.
377 dimension_packer : `lsst.daf.butler.DimensionPacker`
378 Dimension packer used to construct the original
379 `DimensionPackerIdGenerator`.
380 n_releases : `int`, optional
381 Number of (contiguous, starting from zero) ``release_id`` values to
382 reserve space for. One (not zero) is used to reserve no space.
387 Callable that takes a single `int` argument (an ID generated by an
388 identically-constructed `DimensionPackerIdGenerator`) and returns a
391 - release_id: the integer that identifies a data release or
393 - data_id : the data ID used to initialize the original ID
394 generator (`DataCoordinate`);
395 - counter : the counter part of the original ID (`int`).
399 This method cannot be used on IDs generated with no data ID.
403 def unpack(record_id: int) -> tuple[int, DataCoordinate, int]:
404 rest, counter =
divmod(record_id, bits.n_counters)
405 rest, packed_data_id =
divmod(rest, bits.n_data_ids)
406 rest, release_id =
divmod(rest, bits.n_data_ids)
409 f
"Unexpected overall factor {rest} in record_id {record_id}, "
410 f
"after extracting packed_data_id={packed_data_id}, counter={counter}, and "
411 f
"release_id={release_id}."
413 data_id = bits.packer.unpack(packed_data_id)
414 return release_id, data_id, counter
420 """The subclass of `IdGenerator` that actually includes packed data IDs
421 and release IDs in its generated IDs.
425 dimension_packer : `lsst.daf.butler.DimensionPacker`
426 Object that packs data IDs into integers.
427 data_id : `lsst.daf.butler.DataCoordinate`
428 Data ID to embed in all generated IDs and random seeds.
429 release_id : `int`, optional
430 Release identifier to embed in generated IDs.
431 n_releases : `int`, optional
432 Number of (contiguous, starting from zero) `release_id` values to
433 reserve space for. One (not zero) is used to reserve no space.
437 Instances of this class should usually be constructed via configuration
438 instead of by calling the constructor directly; see `IdGenerator` for
444 dimension_packer: DimensionPacker,
445 data_id: DataCoordinate,
446 release_id: int = DEFAULT_RELEASE_ID,
447 n_releases: int = DEFAULT_N_RELEASES,
456 """The data ID that will be embedded in all generated IDs
457 (`DataCoordinate`)."""
462 """The release ID that will embedded in all generated IDs (`int`)."""
478 def arange(self, *args, **kwargs) -> np.ndarray:
481 if np.any(lower >= self.
_bits.n_counters):
482 arg_terms = [
repr(arg)
for arg
in args] + [f
"{k}={v!r}" for k, v
in kwargs.items()]
484 f
"Integer range from numpy.arange({arg_terms}) has "
485 f
"{(lower >= self._bits.n_counters).sum()} values that are not "
486 f
"below the upper bound of {self._bits.n_counters}."
491@dataclasses.dataclass
493 """A private helper struct that manages the allocation of bits between the
494 packed data ID, the release ID, and a per-catalog counter.
497 packer: DimensionPacker
498 """Object that maps data IDs to integers
499 (`lsst.daf.butler.DimensionPacker`).
502 n_releases: int = dataclasses.field(default=0)
503 """Number of releases to reserve space for, starting from zero (`int`)."""
505 n_data_ids: int = dataclasses.field(init=
False)
506 """Number of contiguous packed data IDs to reserve space for, starting
510 counter_bits: int = dataclasses.field(init=
False)
511 """Number of bits allocated to the per-catalog counter (`int`)."""
513 n_counters: int = dataclasses.field(init=
False)
514 """Number of contiguous counter values to reserve space for, starting from
IdGenerator apply(self, DataCoordinate data_id, **Any kwargs)
DimensionPacker _make_dimension_packer(self, DataCoordinate data_id)
make_field(cls, doc="Configuration for how to generate catalog IDs from data IDs.")
DimensionPacker _make_dimension_packer(self, DataCoordinate data_id)
DimensionPacker _make_dimension_packer(self, DataCoordinate data_id)
__init__(self, DimensionPacker dimension_packer, DataCoordinate data_id, int release_id=DEFAULT_RELEASE_ID, int n_releases=DEFAULT_N_RELEASES)
np.ndarray arange(self, *args, **kwargs)
IdFactory make_table_id_factory(self)
DataCoordinate data_id(self)
SourceCatalog make_source_catalog(self, Schema schema)
np.ndarray arange(self, *args, **kwargs)
Callable[[int], tuple[int, DataCoordinate, int]] unpacker_from_dimension_packer(cls, DimensionPacker dimension_packer, int n_releases=DEFAULT_N_RELEASES)
IdFactory make_table_id_factory(self)
Callable[[int], tuple[DataCoordinate, int]] unpacker_from_config(cls, BaseIdGeneratorConfig config, DataCoordinate fixed)
DimensionPacker _make_dimension_packer(self, DataCoordinate data_id)