Coverage for python/lsst/daf/butler/core/datastoreRecordData.py: 28%
75 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-10 02:33 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-10 02:33 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for generic data stores."""
24from __future__ import annotations
26__all__ = ("DatastoreRecordData", "SerializedDatastoreRecordData")
28import dataclasses
29import uuid
30from collections import defaultdict
31from typing import TYPE_CHECKING, AbstractSet, Any, Dict, List, Optional, Union
33from lsst.utils import doImportType
34from lsst.utils.introspection import get_full_type_name
35from pydantic import BaseModel
37from .datasets import DatasetId
38from .dimensions import DimensionUniverse
39from .storedFileInfo import StoredDatastoreItemInfo
41if TYPE_CHECKING: 41 ↛ 43line 41 didn't jump to line 43, because the condition on line 41 was never true
43 from ..registry import Registry
45_Record = Dict[str, Any]
48class SerializedDatastoreRecordData(BaseModel):
49 """Representation of a `DatastoreRecordData` suitable for serialization."""
51 dataset_ids: List[uuid.UUID]
52 """List of dataset IDs"""
54 records: Dict[str, Dict[str, List[_Record]]]
55 """List of records indexed by record class name and table name."""
57 @classmethod
58 def direct(
59 cls,
60 *,
61 dataset_ids: List[Union[str, uuid.UUID]],
62 records: Dict[str, Dict[str, List[_Record]]],
63 ) -> SerializedDatastoreRecordData:
64 """Construct a `SerializedDatastoreRecordData` directly without
65 validators.
67 This differs from the pydantic "construct" method in that the
68 arguments are explicitly what the model requires, and it will recurse
69 through members, constructing them from their corresponding `direct`
70 methods.
72 This method should only be called when the inputs are trusted.
73 """
74 data = SerializedDatastoreRecordData.__new__(cls)
75 setter = object.__setattr__
76 # JSON makes strings out of UUIDs, need to convert them back
77 setter(data, "dataset_ids", [uuid.UUID(id) if isinstance(id, str) else id for id in dataset_ids])
78 # See also comments in record_ids_to_uuid()
79 for table_data in records.values():
80 for table_records in table_data.values():
81 for record in table_records:
82 # This only checks dataset_id value, if there are any other
83 # columns that are UUIDs we'd need more generic approach.
84 if (id := record.get("dataset_id")) is not None:
85 record["dataset_id"] = uuid.UUID(id) if isinstance(id, str) else id
86 setter(data, "records", records)
87 return data
90@dataclasses.dataclass
91class DatastoreRecordData:
92 """A struct that represents a tabular data export from a single
93 datastore.
94 """
96 records: defaultdict[DatasetId, defaultdict[str, List[StoredDatastoreItemInfo]]] = dataclasses.field( 96 ↛ exitline 96 didn't jump to the function exit
97 default_factory=lambda: defaultdict(lambda: defaultdict(list))
98 )
99 """Opaque table data, indexed by dataset ID and grouped by opaque table
100 name."""
102 def update(self, other: DatastoreRecordData) -> None:
103 """Update contents of this instance with data from another instance.
105 Parameters
106 ----------
107 other : `DatastoreRecordData`
108 Records tho merge into this instance.
110 Notes
111 -----
112 Merged instances can not have identical records.
113 """
114 for dataset_id, table_records in other.records.items():
115 this_table_records = self.records[dataset_id]
116 for table_name, records in table_records.items():
117 this_table_records[table_name].extend(records)
119 def subset(self, dataset_ids: AbstractSet[DatasetId]) -> Optional[DatastoreRecordData]:
120 """Extract a subset of the records that match given dataset IDs.
122 Parameters
123 ----------
124 dataset_ids : `set` [ `DatasetId` ]
125 Dataset IDs to match.
127 Returns
128 -------
129 record_data : `DatastoreRecordData` or `None`
130 `None` is returned if there are no matching refs.
132 Notes
133 -----
134 Records in the returned instance are shared with this instance, clients
135 should not update or extend records in the returned instance.
136 """
137 matching_records: defaultdict[
138 DatasetId, defaultdict[str, List[StoredDatastoreItemInfo]]
139 ] = defaultdict(lambda: defaultdict(list))
140 for dataset_id in dataset_ids:
141 if (id_records := self.records.get(dataset_id)) is not None:
142 matching_records[dataset_id] = id_records
143 if matching_records:
144 return DatastoreRecordData(records=matching_records)
145 else:
146 return None
148 def to_simple(self, minimal: bool = False) -> SerializedDatastoreRecordData:
149 """Make representation of the object for serialization.
151 Implements `~lsst.daf.butler.core.json.SupportsSimple` protocol.
153 Parameters
154 ----------
155 minimal : `bool`, optional
156 If True produce minimal representation, not used by this method.
158 Returns
159 -------
160 simple : `dict`
161 Representation of this instance as a simple dictionary.
162 """
164 def _class_name(records: list[StoredDatastoreItemInfo]) -> str:
165 """Get fully qualified class name for the records. Empty string
166 returned if list is empty. Exception is raised if records are of
167 different classes.
168 """
169 if not records:
170 return ""
171 classes = set(record.__class__ for record in records)
172 assert len(classes) == 1, f"Records have to be of the same class: {classes}"
173 return get_full_type_name(classes.pop())
175 records: defaultdict[str, defaultdict[str, List[_Record]]] = defaultdict(lambda: defaultdict(list))
176 for table_data in self.records.values():
177 for table_name, table_records in table_data.items():
178 class_name = _class_name(table_records)
179 records[class_name][table_name].extend([record.to_record() for record in table_records])
180 return SerializedDatastoreRecordData(dataset_ids=list(self.records.keys()), records=records)
182 @classmethod
183 def from_simple(
184 cls,
185 simple: SerializedDatastoreRecordData,
186 universe: Optional[DimensionUniverse] = None,
187 registry: Optional[Registry] = None,
188 ) -> DatastoreRecordData:
189 """Make an instance of this class from serialized data.
191 Implements `~lsst.daf.butler.core.json.SupportsSimple` protocol.
193 Parameters
194 ----------
195 data : `dict`
196 Serialized representation returned from `to_simple` method.
197 universe : `DimensionUniverse`, optional
198 Dimension universe, not used by this method.
199 registry : `Registry`, optional
200 Registry instance, not used by this method.
202 Returns
203 -------
204 item_info : `StoredDatastoreItemInfo`
205 De-serialized instance of `StoredDatastoreItemInfo`.
206 """
207 records: defaultdict[DatasetId, defaultdict[str, List[StoredDatastoreItemInfo]]] = defaultdict(
208 lambda: defaultdict(list)
209 )
210 # make sure that all dataset IDs appear in the dict even if they don't
211 # have records.
212 for dataset_id in simple.dataset_ids:
213 records[dataset_id] = defaultdict(list)
214 for class_name, table_data in simple.records.items():
215 klass = doImportType(class_name)
216 for table_name, table_records in table_data.items():
217 for record in table_records:
218 info = klass.from_record(record)
219 records[info.dataset_id][table_name].append(info)
220 return cls(records=records)