Coverage for python / lsst / daf / butler / _file_dataset.py: 49%
49 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 08:17 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 08:17 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("FileDataset", "SerializedFileDataset")
32import uuid
33from collections.abc import Callable
34from dataclasses import dataclass
35from typing import Any, TypeAlias
37import pydantic
39from lsst.resources import ResourcePath, ResourcePathExpression
41from ._dataset_ref import DatasetRef, MinimalistSerializableDatasetRef
42from ._dataset_type import DatasetType
43from ._formatter import FormatterParameter
44from .dimensions import DimensionUniverse
47@dataclass
48class FileDataset:
49 """A struct that represents a dataset exported to a file.
51 Parameters
52 ----------
53 path : `lsst.resources.ResourcePath` or `str`
54 Path to the dataset (`lsst.resources.ResourcePath` or `str`).
56 If the dataset was exported with ``transfer=None`` (i.e. in-place),
57 this is relative to the datastore root (only datastores that have a
58 well-defined root in the local filesystem can be expected to support
59 in-place exports). Otherwise this is relative to the directory passed
60 to `Datastore.export`.
61 refs : `list` [ `DatasetRef` ]
62 Registry information about the dataset.
63 formatter : `Formatter` or `str` or `None`, optional
64 A `Formatter` class or fully-qualified name.
65 """
67 __slots__ = ("refs", "path", "formatter")
69 refs: list[DatasetRef]
70 path: str | ResourcePath
71 formatter: FormatterParameter | None
73 def __init__(
74 self,
75 path: ResourcePathExpression,
76 refs: DatasetRef | list[DatasetRef],
77 *,
78 formatter: FormatterParameter | None = None,
79 ):
80 # Do not want to store all possible options supported by ResourcePath
81 # so force a conversion for the non-str parameters.
82 self.path = path if isinstance(path, str) else ResourcePath(path, forceAbsolute=False)
83 if isinstance(refs, DatasetRef):
84 refs = [refs]
85 runs = {ref.run for ref in refs}
86 if len(runs) != 1:
87 raise ValueError(f"Supplied refs must all share the same run. Got: {runs}")
88 self.refs = refs
89 self.formatter = formatter
91 def __lt__(self, other: Any) -> bool:
92 # Sort on path alone
93 if not isinstance(other, type(self)):
94 return NotImplemented
95 return str(self.path) < str(other.path)
97 def to_simple(self) -> SerializedFileDataset:
98 """
99 Convert this instance to a simplified, JSON-serializable object.
101 Returns
102 -------
103 serialized : `SerializedFileDataset`
104 Serializable representation of this `FileDataset` instance.
105 """
106 if self.formatter is None:
107 formatter = None
108 elif isinstance(self.formatter, str):
109 formatter = self.formatter
110 else:
111 formatter = self.formatter.name()
113 refs = {ref.id: MinimalistSerializableDatasetRef.from_dataset_ref(ref) for ref in self.refs}
115 return SerializedFileDataset(
116 refs=refs,
117 path=str(self.path),
118 formatter=formatter,
119 )
121 @staticmethod
122 def from_simple(
123 dataset: SerializedFileDataset, *, dataset_type_loader: DatasetTypeLoader, universe: DimensionUniverse
124 ) -> FileDataset:
125 """
126 Deserialize a `SerializedFileDataset` into a `FileDataset`.
128 Parameters
129 ----------
130 dataset : `SerializedFileDataset`
131 Object to deserialize.
132 dataset_type_loader : `~collections.abc.Callable` \
133 [[ `str` ], `DatasetType` ]
134 Function that takes a string dataset type name as its
135 only parameter, and returns an instance of `DatasetType`.
136 Used to deserialize the `DatasetRef` instances contained
137 in the serialized `FileDataset`.
138 universe : `DimensionUniverse`
139 Dimension universe associated with the `Butler` instance that
140 created the serialized `FileDataset` instance.
142 Returns
143 -------
144 file_dataset : `FileDataset`
145 Deserialized equivalent of the input dataset.
146 """
147 refs = [
148 ref.to_dataset_ref(id, universe=universe, dataset_type=dataset_type_loader(ref.dataset_type_name))
149 for id, ref in dataset.refs.items()
150 ]
151 return FileDataset(path=dataset.path, refs=refs, formatter=dataset.formatter)
154DatasetTypeLoader: TypeAlias = Callable[[str], DatasetType]
155"""Type signature for a function that takes a string dataset type name as its
156only parameter, and returns an instance of `DatasetType`.
157"""
160class SerializedFileDataset(pydantic.BaseModel):
161 """Serializable format of `FileDataset` object."""
163 refs: dict[uuid.UUID, MinimalistSerializableDatasetRef]
164 path: str
165 formatter: str | None = None