Coverage for python / lsst / daf / butler / remote_butler / server_models.py: 0%

184 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-17 08:48 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28 

29"""Models used for client/server communication.""" 

30 

31from __future__ import annotations 

32 

33__all__ = [ 

34 "CLIENT_REQUEST_ID_HEADER_NAME", 

35 "DatasetTypeName", 

36 "FindDatasetRequestModel", 

37 "FindDatasetResponseModel", 

38 "GetCollectionInfoResponseModel", 

39 "GetCollectionSummaryResponseModel", 

40 "GetFileResponseModel", 

41] 

42 

43 

44from collections.abc import Iterable 

45from typing import Annotated, Any, ClassVar, Literal, NewType, Self, TypeAlias 

46from uuid import UUID 

47 

48import pydantic 

49 

50from lsst.daf.butler import ( 

51 CollectionInfo, 

52 CollectionType, 

53 DataIdValue, 

54 DatasetRef, 

55 SerializedDataCoordinate, 

56 SerializedDataId, 

57 SerializedDatasetRef, 

58 SerializedDatasetType, 

59 SerializedDimensionGroup, 

60 Timespan, 

61) 

62from lsst.daf.butler.registry import SerializedCollectionSummary 

63 

64from ..datastore.stored_file_info import SerializedStoredFileInfo 

65from ..dimensions import SerializedDimensionConfig, SerializedDimensionRecord 

66from ..queries.result_specs import SerializedResultSpec 

67from ..queries.tree import ColumnLiteral, SerializedQueryTree 

68 

69CLIENT_REQUEST_ID_HEADER_NAME = "X-Butler-Client-Request-Id" 

70ERROR_STATUS_CODE = 422 

71 

72CollectionList = NewType("CollectionList", list[str]) 

73"""A list of search patterns for collection names. May use glob 

74syntax to specify wildcards.""" 

75DatasetTypeName = NewType("DatasetTypeName", str) 

76 

77 

78class FindDatasetRequestModel(pydantic.BaseModel): 

79 """Request model for find_dataset.""" 

80 

81 dataset_type: DatasetTypeName 

82 data_id: SerializedDataId 

83 default_data_id: SerializedDataId = pydantic.Field(default_factory=dict) 

84 """Data ID values used as a fallback if required values are not specified 

85 in ``data_id``. 

86 """ 

87 collections: CollectionList 

88 timespan: Timespan | None 

89 dimension_records: bool = False 

90 

91 

92class FindDatasetResponseModel(pydantic.BaseModel): 

93 """Response model for ``find_dataset`` and ``get_dataset``.""" 

94 

95 dataset_ref: SerializedDatasetRef | None 

96 

97 

98class GetDatasetTypeResponseModel(pydantic.BaseModel): 

99 """Response model for ``dataset_type``.""" 

100 

101 dataset_type: SerializedDatasetType 

102 

103 

104class GetUniverseResponseModel(pydantic.BaseModel): 

105 """Response model for ``universe``.""" 

106 

107 universe: SerializedDimensionConfig 

108 

109 

110class GetFileByDataIdRequestModel(pydantic.BaseModel): 

111 """Request model for ``get_file_by_data_id``.""" 

112 

113 dataset_type: DatasetTypeName 

114 data_id: SerializedDataId 

115 default_data_id: SerializedDataId = pydantic.Field(default_factory=dict) 

116 """Data ID values used as a fallback if required values are not specified 

117 in ``data_id``. 

118 """ 

119 collections: CollectionList 

120 timespan: Timespan | None = None 

121 

122 

123class GetFileResponseModel(pydantic.BaseModel): 

124 """Response model for get_file and get_file_by_data_id.""" 

125 

126 dataset_ref: SerializedDatasetRef 

127 artifact: FileInfoPayload | None 

128 """The data needed to retrieve and use an artifact. If this is `None`, that 

129 means this dataset is known to the Butler but the associated files are no 

130 longer available ("known to registry but not known to datastore".) 

131 

132 An example of a situation where this would be `None` is a per-visit image 

133 that is an intermediate file in the processing pipelines. It is deleted to 

134 save space, but the fact that it was once available must be recorded for 

135 provenance tracking. 

136 """ 

137 

138 

139class ErrorResponseModel(pydantic.BaseModel): 

140 """Error response sent with a 422 status code, to propagate server 

141 exceptions with user-facing error messages to the client. 

142 """ 

143 

144 error_type: str 

145 """The ``error_type`` string from one of the subclasses of 

146 `ButlerUserError`. 

147 """ 

148 detail: str 

149 """Detailed explanation of the error that will be sent to the client.""" 

150 

151 

152# TODO DM-46204: This can be removed once the RSP recommended image has been 

153# upgraded to a version that contains DM-46129. 

154class GetCollectionInfoResponseModel(pydantic.BaseModel): 

155 """Response model for get_collection_info.""" 

156 

157 name: str 

158 type: CollectionType 

159 children: list[str] 

160 doc: str | None = None 

161 """Will be `None` unless requested with ``include_doc=True`` query 

162 parameter.""" 

163 parents: set[str] | None = None 

164 """Chained collections that directly contain this collection. Will be 

165 `None` unless requested with ``include_parents=True`` query parameter.""" 

166 

167 

168class GetCollectionSummaryResponseModel(pydantic.BaseModel): 

169 """Response model for get_collection_summary.""" 

170 

171 summary: SerializedCollectionSummary 

172 

173 

174class ExpandDataIdRequestModel(pydantic.BaseModel): 

175 """Request model for expand_data_id.""" 

176 

177 data_id: SerializedDataId 

178 

179 

180class ExpandDataIdResponseModel(pydantic.BaseModel): 

181 """Response model for expand_data_id.""" 

182 

183 data_coordinate: SerializedDataCoordinate 

184 

185 

186# TODO DM-46204: This can be removed once the RSP recommended image has been 

187# upgraded to a version that contains DM-46129. 

188class QueryCollectionsRequestModel(pydantic.BaseModel): 

189 """Request model for query_collections.""" 

190 

191 search: CollectionList 

192 collection_types: list[CollectionType] 

193 flatten_chains: bool 

194 include_chains: bool 

195 

196 

197# TODO DM-46204: This can be removed once the RSP recommended image has been 

198# upgraded to a version that contains DM-46129. 

199class QueryCollectionsResponseModel(pydantic.BaseModel): 

200 """Response model for query_collections.""" 

201 

202 collections: list[str] 

203 """Collection names that match the search.""" 

204 

205 

206class QueryCollectionInfoRequestModel(pydantic.BaseModel): 

207 """Request model for query_collection_info.""" 

208 

209 expression: CollectionList 

210 collection_types: list[CollectionType] 

211 flatten_chains: bool 

212 include_chains: bool 

213 include_parents: bool 

214 include_summary: bool 

215 include_doc: bool 

216 summary_datasets: list[DatasetTypeName] | None 

217 

218 

219class QueryCollectionInfoResponseModel(pydantic.BaseModel): 

220 """Response model for query_collection_info.""" 

221 

222 collections: list[CollectionInfo] 

223 

224 

225class QueryDatasetTypesRequestModel(pydantic.BaseModel): 

226 """Request model for queryDatasetTypes.""" 

227 

228 search: list[str] 

229 """List of glob patterns to match against the name of the dataset types.""" 

230 

231 

232class QueryDatasetTypesResponseModel(pydantic.BaseModel): 

233 """Response model for query_collections.""" 

234 

235 dataset_types: list[SerializedDatasetType] 

236 """Dataset types that match the search.""" 

237 missing: list[str] 

238 """Non-wildcard dataset type names included in the search that are not 

239 known to the server. 

240 """ 

241 

242 

243class MaterializedQuery(pydantic.BaseModel): 

244 """Captures the parameters from a call to ``QueryDriver.materialize``.""" 

245 

246 type: Literal["materialized"] = "materialized" 

247 key: UUID 

248 tree: SerializedQueryTree 

249 dimensions: SerializedDimensionGroup 

250 datasets: list[str] 

251 allow_duplicate_overlaps: bool = False 

252 

253 

254class DataCoordinateUpload(pydantic.BaseModel): 

255 """Captures the parameters from a call to 

256 ``QueryDriver.upload_data_coordinates``. 

257 """ 

258 

259 type: Literal["upload"] = "upload" 

260 key: UUID 

261 dimensions: SerializedDimensionGroup 

262 rows: list[list[DataIdValue]] 

263 

264 

265AdditionalQueryInput: TypeAlias = Annotated[ 

266 MaterializedQuery | DataCoordinateUpload, pydantic.Discriminator("type") 

267] 

268"""Information about additional data tables that may be used by a query.""" 

269 

270 

271class QueryInputs(pydantic.BaseModel): 

272 """Serialized Butler query with additional context needed to execute it.""" 

273 

274 tree: SerializedQueryTree 

275 default_data_id: SerializedDataCoordinate 

276 additional_query_inputs: list[AdditionalQueryInput] 

277 

278 

279class QueryExecuteRequestModel(pydantic.BaseModel): 

280 """Request model for /query/execute/.""" 

281 

282 query: QueryInputs 

283 result_spec: SerializedResultSpec 

284 

285 

286class DataCoordinateResultModel(pydantic.BaseModel): 

287 """Result model for /query/execute/ when user requested DataCoordinate 

288 results. 

289 """ 

290 

291 type: Literal["data_coordinate"] = "data_coordinate" 

292 rows: list[SerializedDataCoordinate] 

293 

294 

295class DimensionRecordsResultModel(pydantic.BaseModel): 

296 """Result model for /query/execute/ when user requested DimensionRecord 

297 results. 

298 """ 

299 

300 type: Literal["dimension_record"] = "dimension_record" 

301 rows: list[SerializedDimensionRecord] 

302 

303 

304class DatasetRefResultModel(pydantic.BaseModel): 

305 """Result model for /query/execute/ when user requested DatasetRef 

306 results. 

307 """ 

308 

309 type: Literal["dataset_ref"] = "dataset_ref" 

310 rows: list[SerializedDatasetRef] 

311 

312 @classmethod 

313 def from_refs(cls, refs: Iterable[DatasetRef]) -> Self: 

314 return cls(rows=[ref.to_simple() for ref in refs]) 

315 

316 

317class GeneralResultModel(pydantic.BaseModel): 

318 """Result model for /query/execute/ when user requested general results.""" 

319 

320 type: Literal["general"] = "general" 

321 rows: list[tuple[Any, ...]] 

322 # Dimension records indexed by element name, only cached and skypix 

323 # elements are included. Default is used for compatibility with older 

324 # servers that do not set this field. 

325 dimension_records: dict[str, list[SerializedDimensionRecord]] | None = None 

326 

327 

328class QueryErrorResultModel(pydantic.BaseModel): 

329 """Result model for /query/execute when an error occurs part-way through 

330 returning rows. 

331 

332 Because we are streaming results, the HTTP status code has already been 

333 sent before the error occurs. So this provides a way to signal an error 

334 in-band with the results. 

335 """ 

336 

337 # (One example of this type of error is a CalibrationLookupError returned 

338 # by query row postprocessing.) 

339 

340 type: Literal["error"] = "error" 

341 error: ErrorResponseModel 

342 

343 

344class QueryKeepAliveModel(pydantic.BaseModel): 

345 """Result model for /query/execute used to keep connection alive. 

346 

347 Some queries require a significant start-up time before they can start 

348 returning results, or a long processing time for each chunk of rows. This 

349 message signals that the server is still fetching the data. 

350 """ 

351 

352 type: Literal["keep-alive"] = "keep-alive" 

353 

354 

355QueryExecuteResultData: TypeAlias = Annotated[ 

356 DataCoordinateResultModel 

357 | DimensionRecordsResultModel 

358 | DatasetRefResultModel 

359 | GeneralResultModel 

360 | QueryErrorResultModel 

361 | QueryKeepAliveModel, 

362 pydantic.Field(discriminator="type"), 

363] 

364 

365 

366class QueryCountRequestModel(pydantic.BaseModel): 

367 """Request model for /query/count/.""" 

368 

369 query: QueryInputs 

370 result_spec: SerializedResultSpec 

371 exact: bool 

372 discard: bool 

373 

374 

375class QueryCountResponseModel(pydantic.BaseModel): 

376 """Response model for /query/count/.""" 

377 

378 count: int 

379 

380 

381class QueryAnyRequestModel(pydantic.BaseModel): 

382 """Request model for /query/any/.""" 

383 

384 query: QueryInputs 

385 execute: bool 

386 exact: bool 

387 

388 

389class QueryAnyResponseModel(pydantic.BaseModel): 

390 """Response model for /query/any/.""" 

391 

392 found_rows: bool 

393 

394 

395class QueryExplainRequestModel(pydantic.BaseModel): 

396 """Request model for /query/explain/.""" 

397 

398 query: QueryInputs 

399 execute: bool 

400 

401 

402class QueryExplainResponseModel(pydantic.BaseModel): 

403 """Response model for /query/explain/.""" 

404 

405 messages: list[str] 

406 

407 

408class QueryAllDatasetsRequestModel(pydantic.BaseModel): 

409 """Request model for /query/all_datasets/.""" 

410 

411 collections: CollectionList 

412 name: list[DatasetTypeName] 

413 find_first: bool 

414 data_id: SerializedDataId 

415 default_data_id: SerializedDataId = pydantic.Field(default_factory=dict) 

416 """Data ID values used as a fallback if required values are not specified 

417 in ``data_id``. 

418 """ 

419 where: str 

420 bind: dict[str, ColumnLiteral] 

421 limit: int | None 

422 with_dimension_records: bool 

423 

424 

425class GetFileTransferInfoRequestModel(pydantic.BaseModel): 

426 MAX_ITEMS_PER_REQUEST: ClassVar[int] = 10_000 

427 dataset_ids: Annotated[list[UUID], pydantic.Field(max_length=MAX_ITEMS_PER_REQUEST)] 

428 

429 

430FileAuthenticationMode: TypeAlias = Literal["none", "gafaelfawr", "datastore"] 

431 

432 

433class FileTransferRecordModel(pydantic.BaseModel): 

434 url: pydantic.AnyHttpUrl 

435 auth: FileAuthenticationMode 

436 file_info: SerializedStoredFileInfo 

437 

438 

439class GetFileTransferInfoResponseModel(pydantic.BaseModel): 

440 files: dict[UUID, list[FileTransferRecordModel]] 

441 

442 

443class FileInfoRecord(pydantic.BaseModel): 

444 """Information required to read a single file stored in `FileDatastore`.""" 

445 

446 # This is intentionally restricted to HTTP for security reasons. Allowing 

447 # arbitrary URLs here would allow the server to trick the client into 

448 # fetching data from any file on its local filesystem or from remote 

449 # storage using credentials laying around in the environment. 

450 url: pydantic.AnyHttpUrl 

451 """An HTTP URL that can be used to read the file.""" 

452 

453 datastoreRecords: SerializedStoredFileInfo 

454 """`FileDatastore` metadata records for this file.""" 

455 

456 auth: FileAuthenticationMode = "none" 

457 

458 

459class FileInfoPayload(pydantic.BaseModel): 

460 """A serializable representation of the data needed for retrieving an 

461 artifact and converting it to a python object. 

462 """ 

463 

464 datastore_type: Literal["file"] 

465 

466 file_info: list[FileInfoRecord] 

467 """List of retrieval information for each file associated with this 

468 artifact. 

469 """ 

470 

471 

472class GetManyDatasetsRequestModel(pydantic.BaseModel): 

473 MAX_ITEMS_PER_REQUEST: ClassVar[int] = 10_000 

474 dataset_ids: Annotated[list[UUID], pydantic.Field(max_length=MAX_ITEMS_PER_REQUEST)] 

475 

476 

477GetManyDatasetsResponseModel: TypeAlias = DatasetRefResultModel