Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Formatter", "FormatterFactory", "FormatterParameter") 

25 

26from abc import ABCMeta, abstractmethod 

27import logging 

28import copy 

29from typing import ClassVar, Set, FrozenSet, Union, Optional, Dict, Any, Tuple, Type, TYPE_CHECKING 

30 

31from .configSupport import processLookupConfigs, LookupKey 

32from .mappingFactory import MappingFactory 

33from .utils import getFullTypeName 

34from .fileDescriptor import FileDescriptor 

35from .location import Location 

36from .config import Config 

37from .dimensions import DimensionUniverse 

38from .storageClass import StorageClass 

39from .datasets import DatasetType, DatasetRef 

40 

41log = logging.getLogger(__name__) 

42 

43# Define a new special type for functions that take "entity" 

44Entity = Union[DatasetType, DatasetRef, StorageClass, str] 

45 

46 

47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true

48 from .dimensions import DataCoordinate 

49 

50 

51class Formatter(metaclass=ABCMeta): 

52 """Interface for reading and writing Datasets with a particular 

53 `StorageClass`. 

54 

55 Parameters 

56 ---------- 

57 fileDescriptor : `FileDescriptor`, optional 

58 Identifies the file to read or write, and the associated storage 

59 classes and parameter information. Its value can be `None` if the 

60 caller will never call `Formatter.read` or `Formatter.write`. 

61 """ 

62 

63 unsupportedParameters: ClassVar[Optional[Union[FrozenSet[str], Set[str]]]] = frozenset() 

64 """Set of parameters not understood by this `Formatter`. An empty set means 

65 all parameters are supported. `None` indicates that no parameters 

66 are supported (`frozenset`). 

67 """ 

68 

69 def __init__(self, fileDescriptor: FileDescriptor, dataId: DataCoordinate = None): 

70 if not isinstance(fileDescriptor, FileDescriptor): 

71 raise TypeError("File descriptor must be a FileDescriptor") 

72 self._fileDescriptor = fileDescriptor 

73 self._dataId = dataId 

74 

75 def __str__(self): 

76 return f"{self.name()}@{self.fileDescriptor.location.path}" 

77 

78 def __repr__(self): 

79 return f"{self.name()}({self.fileDescriptor!r})" 

80 

81 @property 

82 def fileDescriptor(self) -> FileDescriptor: 

83 """FileDescriptor associated with this formatter 

84 (`FileDescriptor`, read-only)""" 

85 return self._fileDescriptor 

86 

87 @property 

88 def dataId(self) -> DataCoordinate: 

89 """DataId associated with this formatter (`DataCoordinate`)""" 

90 return self._dataId 

91 

92 @classmethod 

93 def name(cls) -> str: 

94 """Returns the fully qualified name of the formatter. 

95 

96 Returns 

97 ------- 

98 name : `str` 

99 Fully-qualified name of formatter class. 

100 """ 

101 return getFullTypeName(cls) 

102 

103 @abstractmethod 

104 def read(self, component: Optional[str] = None) -> object: 

105 """Read a Dataset. 

106 

107 Parameters 

108 ---------- 

109 component : `str`, optional 

110 Component to read from the file. Only used if the `StorageClass` 

111 for reading differed from the `StorageClass` used to write the 

112 file. 

113 

114 Returns 

115 ------- 

116 inMemoryDataset : `object` 

117 The requested Dataset. 

118 """ 

119 raise NotImplementedError("Type does not support reading") 

120 

121 @abstractmethod 

122 def write(self, inMemoryDataset: Any) -> str: 

123 """Write a Dataset. 

124 

125 Parameters 

126 ---------- 

127 inMemoryDataset : `object` 

128 The Dataset to store. 

129 

130 Returns 

131 ------- 

132 path : `str` 

133 The path to where the Dataset was stored within the datastore. 

134 """ 

135 raise NotImplementedError("Type does not support writing") 

136 

137 def fromBytes(self, serializedDataset: bytes, 

138 component: Optional[str] = None) -> object: 

139 """Reads serialized data into a Dataset or its component. 

140 

141 Parameters 

142 ---------- 

143 serializedDataset : `bytes` 

144 Bytes object to unserialize. 

145 component : `str`, optional 

146 Component to read from the Dataset. Only used if the `StorageClass` 

147 for reading differed from the `StorageClass` used to write the 

148 file. 

149 

150 Returns 

151 ------- 

152 inMemoryDataset : `object` 

153 The requested data as a Python object. The type of object 

154 is controlled by the specific formatter. 

155 """ 

156 raise NotImplementedError("Type does not support reading from bytes.") 

157 

158 def toBytes(self, inMemoryDataset: Any) -> bytes: 

159 """Serialize the Dataset to bytes based on formatter. 

160 

161 Parameters 

162 ---------- 

163 inMemoryDataset : `object` 

164 The Python object to serialize. 

165 

166 Returns 

167 ------- 

168 serializedDataset : `bytes` 

169 Bytes representing the serialized dataset. 

170 """ 

171 raise NotImplementedError("Type does not support writing to bytes.") 

172 

173 @classmethod 

174 def makeUpdatedLocation(cls, location: Location) -> Location: 

175 """Return a new `Location` instance updated with this formatter's 

176 extension. 

177 

178 Parameters 

179 ---------- 

180 location : `Location` 

181 The location to update. 

182 

183 Returns 

184 ------- 

185 updated : `Location` 

186 The updated location with a new file extension applied. 

187 

188 Raises 

189 ------ 

190 NotImplementedError 

191 Raised if there is no ``extension`` attribute associated with 

192 this formatter. 

193 """ 

194 location = copy.deepcopy(location) 

195 try: 

196 location.updateExtension(cls.extension) 

197 except AttributeError: 

198 raise NotImplementedError("No file extension registered with this formatter") from None 

199 return location 

200 

201 @classmethod 

202 def predictPathFromLocation(cls, location: Location) -> str: 

203 """Return the path that would be returned by write, without actually 

204 writing. 

205 

206 Parameters 

207 ---------- 

208 location : `Location` 

209 Location of file for which path prediction is required. 

210 

211 Returns 

212 ------- 

213 path : `str` 

214 Path within datastore that would be associated with this location. 

215 """ 

216 return cls.makeUpdatedLocation(location).pathInStore 

217 

218 def predictPath(self) -> str: 

219 """Return the path that would be returned by write, without actually 

220 writing. 

221 

222 Uses the `FileDescriptor` associated with the instance. 

223 

224 Returns 

225 ------- 

226 path : `str` 

227 Path within datastore that would be associated with the location 

228 stored in this `Formatter`. 

229 """ 

230 return self.predictPathFromLocation(self.fileDescriptor.location) 

231 

232 def segregateParameters(self, parameters: Optional[Dict[str, Any]] = None) -> Tuple[Dict, Dict]: 

233 """Segregate the supplied parameters into those understood by the 

234 formatter and those not understood by the formatter. 

235 

236 Any unsupported parameters are assumed to be usable by associated 

237 assemblers. 

238 

239 Parameters 

240 ---------- 

241 parameters : `dict`, optional 

242 Parameters with values that have been supplied by the caller 

243 and which might be relevant for the formatter. If `None` 

244 parameters will be read from the registered `FileDescriptor`. 

245 

246 Returns 

247 ------- 

248 supported : `dict` 

249 Those parameters supported by this formatter. 

250 unsupported : `dict` 

251 Those parameters not supported by this formatter. 

252 """ 

253 

254 if parameters is None: 

255 parameters = self.fileDescriptor.parameters 

256 

257 if parameters is None: 

258 return {}, {} 

259 

260 if self.unsupportedParameters is None: 

261 # Support none of the parameters 

262 return {}, parameters.copy() 

263 

264 # Start by assuming all are supported 

265 supported = parameters.copy() 

266 unsupported = {} 

267 

268 # And remove any we know are not supported 

269 for p in set(supported): 

270 if p in self.unsupportedParameters: 

271 unsupported[p] = supported.pop(p) 

272 

273 return supported, unsupported 

274 

275 

276class FormatterFactory: 

277 """Factory for `Formatter` instances. 

278 """ 

279 

280 def __init__(self): 

281 self._mappingFactory = MappingFactory(Formatter) 

282 

283 def __contains__(self, key): 

284 """Indicates whether the supplied key is present in the factory. 

285 

286 Parameters 

287 ---------- 

288 key : `LookupKey`, `str` or objects with ``name`` attribute 

289 Key to use to lookup in the factory whether a corresponding 

290 formatter is present. 

291 

292 Returns 

293 ------- 

294 in : `bool` 

295 `True` if the supplied key is present in the factory. 

296 """ 

297 return key in self._mappingFactory 

298 

299 def registerFormatters(self, config: Config, *, universe: DimensionUniverse) -> None: 

300 """Bulk register formatters from a config. 

301 

302 Parameters 

303 ---------- 

304 config : `Config` 

305 ``formatters`` section of a configuration. 

306 universe : `DimensionUniverse`, optional 

307 Set of all known dimensions, used to expand and validate any used 

308 in lookup keys. 

309 

310 Notes 

311 ----- 

312 The configuration can include one level of hierarchy where an 

313 instrument-specific section can be defined to override more general 

314 template specifications. This is represented in YAML using a 

315 key of form ``instrument<name>`` which can then define templates 

316 that will be returned if a `DatasetRef` contains a matching instrument 

317 name in the data ID. 

318 

319 The config is parsed using the function 

320 `~lsst.daf.butler.configSubset.processLookupConfigs`. 

321 """ 

322 contents = processLookupConfigs(config, universe=universe) 

323 for key, f in contents.items(): 

324 self.registerFormatter(key, f) 

325 

326 def getLookupKeys(self) -> Set[LookupKey]: 

327 """Retrieve the look up keys for all the registry entries. 

328 

329 Returns 

330 ------- 

331 keys : `set` of `LookupKey` 

332 The keys available for matching in the registry. 

333 """ 

334 return self._mappingFactory.getLookupKeys() 

335 

336 def getFormatterClassWithMatch(self, entity: Entity) -> Tuple[LookupKey, Type]: 

337 """Get the matching formatter class along with the matching registry 

338 key. 

339 

340 Parameters 

341 ---------- 

342 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str` 

343 Entity to use to determine the formatter to return. 

344 `StorageClass` will be used as a last resort if `DatasetRef` 

345 or `DatasetType` instance is provided. Supports instrument 

346 override if a `DatasetRef` is provided configured with an 

347 ``instrument`` value for the data ID. 

348 

349 Returns 

350 ------- 

351 matchKey : `LookupKey` 

352 The key that resulted in the successful match. 

353 formatter : `type` 

354 The class of the registered formatter. 

355 """ 

356 if isinstance(entity, str): 

357 names = (entity,) 

358 else: 

359 names = entity._lookupNames() 

360 matchKey, formatter = self._mappingFactory.getClassFromRegistryWithMatch(names) 

361 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter), 

362 matchKey, entity) 

363 

364 return matchKey, formatter 

365 

366 def getFormatterClass(self, entity: Entity) -> Type: 

367 """Get the matching formatter class. 

368 

369 Parameters 

370 ---------- 

371 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str` 

372 Entity to use to determine the formatter to return. 

373 `StorageClass` will be used as a last resort if `DatasetRef` 

374 or `DatasetType` instance is provided. Supports instrument 

375 override if a `DatasetRef` is provided configured with an 

376 ``instrument`` value for the data ID. 

377 

378 Returns 

379 ------- 

380 formatter : `type` 

381 The class of the registered formatter. 

382 """ 

383 _, formatter = self.getFormatterClassWithMatch(entity) 

384 return formatter 

385 

386 def getFormatterWithMatch(self, entity: Entity, *args, **kwargs) -> Tuple[LookupKey, Formatter]: 

387 """Get a new formatter instance along with the matching registry 

388 key. 

389 

390 Parameters 

391 ---------- 

392 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str` 

393 Entity to use to determine the formatter to return. 

394 `StorageClass` will be used as a last resort if `DatasetRef` 

395 or `DatasetType` instance is provided. Supports instrument 

396 override if a `DatasetRef` is provided configured with an 

397 ``instrument`` value for the data ID. 

398 args : `tuple` 

399 Positional arguments to use pass to the object constructor. 

400 kwargs : `dict` 

401 Keyword arguments to pass to object constructor. 

402 

403 Returns 

404 ------- 

405 matchKey : `LookupKey` 

406 The key that resulted in the successful match. 

407 formatter : `Formatter` 

408 An instance of the registered formatter. 

409 """ 

410 if isinstance(entity, str): 

411 names = (entity,) 

412 else: 

413 names = entity._lookupNames() 

414 matchKey, formatter = self._mappingFactory.getFromRegistryWithMatch(names, *args, **kwargs) 

415 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter), 

416 matchKey, entity) 

417 

418 return matchKey, formatter 

419 

420 def getFormatter(self, entity: Entity, *args, **kwargs) -> Formatter: 

421 """Get a new formatter instance. 

422 

423 Parameters 

424 ---------- 

425 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str` 

426 Entity to use to determine the formatter to return. 

427 `StorageClass` will be used as a last resort if `DatasetRef` 

428 or `DatasetType` instance is provided. Supports instrument 

429 override if a `DatasetRef` is provided configured with an 

430 ``instrument`` value for the data ID. 

431 args : `tuple` 

432 Positional arguments to use pass to the object constructor. 

433 kwargs : `dict` 

434 Keyword arguments to pass to object constructor. 

435 

436 Returns 

437 ------- 

438 formatter : `Formatter` 

439 An instance of the registered formatter. 

440 """ 

441 _, formatter = self.getFormatterWithMatch(entity, *args, **kwargs) 

442 return formatter 

443 

444 def registerFormatter(self, type_: Union[LookupKey, str, StorageClass, DatasetType], 

445 formatter: str, overwrite: bool = False) -> None: 

446 """Register a `Formatter`. 

447 

448 Parameters 

449 ---------- 

450 type_ : `LookupKey`, `str`, `StorageClass` or `DatasetType` 

451 Type for which this formatter is to be used. If a `LookupKey` 

452 is not provided, one will be constructed from the supplied string 

453 or by using the ``name`` property of the supplied entity. 

454 formatter : `str` or class of type `Formatter` 

455 Identifies a `Formatter` subclass to use for reading and writing 

456 Datasets of this type. Can be a `Formatter` class. 

457 overwrite : `bool`, optional 

458 If `True` an existing entry will be replaced by the new value. 

459 Default is `False`. 

460 

461 Raises 

462 ------ 

463 ValueError 

464 Raised if the formatter does not name a valid formatter type and 

465 ``overwrite`` is `False`. 

466 """ 

467 self._mappingFactory.placeInRegistry(type_, formatter, overwrite=overwrite) 

468 

469 

470# Type to use when allowing a Formatter or its class name 

471FormatterParameter = Union[None, str, Type[Formatter]]