Coverage for python/lsst/daf/butler/core/datastoreRecordData.py: 28%
75 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-15 09:13 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-15 09:13 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for generic data stores."""
24from __future__ import annotations
26__all__ = ("DatastoreRecordData", "SerializedDatastoreRecordData")
28import dataclasses
29import uuid
30from collections.abc import Mapping
31from typing import TYPE_CHECKING, Any
33from lsst.utils import doImportType
34from lsst.utils.introspection import get_full_type_name
35from pydantic import BaseModel
37from .datasets import DatasetId
38from .dimensions import DimensionUniverse
39from .storedFileInfo import StoredDatastoreItemInfo
41if TYPE_CHECKING:
42 from ..registry import Registry
44_Record = dict[str, Any]
47class SerializedDatastoreRecordData(BaseModel):
48 """Representation of a `DatastoreRecordData` suitable for serialization."""
50 dataset_ids: list[uuid.UUID]
51 """List of dataset IDs"""
53 records: Mapping[str, Mapping[str, list[_Record]]]
54 """List of records indexed by record class name and table name."""
56 @classmethod
57 def direct(
58 cls,
59 *,
60 dataset_ids: list[str | uuid.UUID],
61 records: dict[str, dict[str, list[_Record]]],
62 ) -> SerializedDatastoreRecordData:
63 """Construct a `SerializedDatastoreRecordData` directly without
64 validators.
66 This differs from the pydantic "construct" method in that the
67 arguments are explicitly what the model requires, and it will recurse
68 through members, constructing them from their corresponding `direct`
69 methods.
71 This method should only be called when the inputs are trusted.
72 """
73 data = SerializedDatastoreRecordData.__new__(cls)
74 setter = object.__setattr__
75 # JSON makes strings out of UUIDs, need to convert them back
76 setter(data, "dataset_ids", [uuid.UUID(id) if isinstance(id, str) else id for id in dataset_ids])
77 # See also comments in record_ids_to_uuid()
78 for table_data in records.values():
79 for table_records in table_data.values():
80 for record in table_records:
81 # This only checks dataset_id value, if there are any other
82 # columns that are UUIDs we'd need more generic approach.
83 if (id := record.get("dataset_id")) is not None:
84 record["dataset_id"] = uuid.UUID(id) if isinstance(id, str) else id
85 setter(data, "records", records)
86 return data
89@dataclasses.dataclass
90class DatastoreRecordData:
91 """A struct that represents a tabular data export from a single
92 datastore.
93 """
95 records: dict[DatasetId, dict[str, list[StoredDatastoreItemInfo]]] = dataclasses.field(
96 default_factory=dict
97 )
98 """Opaque table data, indexed by dataset ID and grouped by opaque table
99 name."""
101 def update(self, other: DatastoreRecordData) -> None:
102 """Update contents of this instance with data from another instance.
104 Parameters
105 ----------
106 other : `DatastoreRecordData`
107 Records tho merge into this instance.
109 Notes
110 -----
111 Merged instances can not have identical records.
112 """
113 for dataset_id, table_records in other.records.items():
114 this_table_records = self.records.setdefault(dataset_id, {})
115 for table_name, records in table_records.items():
116 this_table_records.setdefault(table_name, []).extend(records)
118 def subset(self, dataset_ids: set[DatasetId]) -> DatastoreRecordData | None:
119 """Extract a subset of the records that match given dataset IDs.
121 Parameters
122 ----------
123 dataset_ids : `set` [ `DatasetId` ]
124 Dataset IDs to match.
126 Returns
127 -------
128 record_data : `DatastoreRecordData` or `None`
129 `None` is returned if there are no matching refs.
131 Notes
132 -----
133 Records in the returned instance are shared with this instance, clients
134 should not update or extend records in the returned instance.
135 """
136 matching_records: dict[DatasetId, dict[str, list[StoredDatastoreItemInfo]]] = {}
137 for dataset_id in dataset_ids:
138 if (id_records := self.records.get(dataset_id)) is not None:
139 matching_records[dataset_id] = id_records
140 if matching_records:
141 return DatastoreRecordData(records=matching_records)
142 else:
143 return None
145 def to_simple(self, minimal: bool = False) -> SerializedDatastoreRecordData:
146 """Make representation of the object for serialization.
148 Implements `~lsst.daf.butler.core.json.SupportsSimple` protocol.
150 Parameters
151 ----------
152 minimal : `bool`, optional
153 If True produce minimal representation, not used by this method.
155 Returns
156 -------
157 simple : `dict`
158 Representation of this instance as a simple dictionary.
159 """
161 def _class_name(records: list[StoredDatastoreItemInfo]) -> str:
162 """Get fully qualified class name for the records. Empty string
163 returned if list is empty. Exception is raised if records are of
164 different classes.
165 """
166 if not records:
167 return ""
168 classes = {record.__class__ for record in records}
169 assert len(classes) == 1, f"Records have to be of the same class: {classes}"
170 return get_full_type_name(classes.pop())
172 records: dict[str, dict[str, list[_Record]]] = {}
173 for table_data in self.records.values():
174 for table_name, table_records in table_data.items():
175 class_name = _class_name(table_records)
176 class_records = records.setdefault(class_name, {})
177 class_records.setdefault(table_name, []).extend(
178 [record.to_record() for record in table_records]
179 )
180 return SerializedDatastoreRecordData(dataset_ids=list(self.records.keys()), records=records)
182 @classmethod
183 def from_simple(
184 cls,
185 simple: SerializedDatastoreRecordData,
186 universe: DimensionUniverse | None = None,
187 registry: Registry | None = None,
188 ) -> DatastoreRecordData:
189 """Make an instance of this class from serialized data.
191 Implements `~lsst.daf.butler.core.json.SupportsSimple` protocol.
193 Parameters
194 ----------
195 data : `dict`
196 Serialized representation returned from `to_simple` method.
197 universe : `DimensionUniverse`, optional
198 Dimension universe, not used by this method.
199 registry : `Registry`, optional
200 Registry instance, not used by this method.
202 Returns
203 -------
204 item_info : `StoredDatastoreItemInfo`
205 De-serialized instance of `StoredDatastoreItemInfo`.
206 """
207 records: dict[DatasetId, dict[str, list[StoredDatastoreItemInfo]]] = {}
208 # make sure that all dataset IDs appear in the dict even if they don't
209 # have records.
210 for dataset_id in simple.dataset_ids:
211 records[dataset_id] = {}
212 for class_name, table_data in simple.records.items():
213 klass = doImportType(class_name)
214 for table_name, table_records in table_data.items():
215 for record in table_records:
216 info = klass.from_record(record)
217 dataset_type_records = records.setdefault(info.dataset_id, {})
218 dataset_type_records.setdefault(table_name, []).append(info)
219 return cls(records=records)