Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Formatter", "FormatterFactory", "FormatterParameter") 

25 

26from abc import ABCMeta, abstractmethod 

27import logging 

28import copy 

29from typing import ClassVar, Set, FrozenSet, Union, Optional, Dict, Any, Tuple, Type, TYPE_CHECKING 

30 

31from .configSupport import processLookupConfigs, LookupKey 

32from .mappingFactory import MappingFactory 

33from .utils import getFullTypeName 

34from .fileDescriptor import FileDescriptor 

35from .location import Location 

36from .config import Config 

37from .dimensions import DimensionUniverse 

38from .storageClass import StorageClass 

39from .datasets import DatasetType, DatasetRef 

40 

41log = logging.getLogger(__name__) 

42 

43# Define a new special type for functions that take "entity" 

44Entity = Union[DatasetType, DatasetRef, StorageClass, str] 

45 

46 

47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true

48 from .dimensions import DataCoordinate 

49 

50 

51class Formatter(metaclass=ABCMeta): 

52 """Interface for reading and writing Datasets with a particular 

53 `StorageClass`. 

54 

55 Parameters 

56 ---------- 

57 fileDescriptor : `FileDescriptor`, optional 

58 Identifies the file to read or write, and the associated storage 

59 classes and parameter information. Its value can be `None` if the 

60 caller will never call `Formatter.read` or `Formatter.write`. 

61 """ 

62 

63 unsupportedParameters: ClassVar[Optional[Union[FrozenSet[str], Set[str]]]] = frozenset() 

64 """Set of parameters not understood by this `Formatter`. An empty set means 

65 all parameters are supported. `None` indicates that no parameters 

66 are supported (`frozenset`). 

67 """ 

68 

69 extension: Optional[str] = None 

70 """File extension default provided by this formatter.""" 

71 

72 def __init__(self, fileDescriptor: FileDescriptor, dataId: DataCoordinate = None): 

73 if not isinstance(fileDescriptor, FileDescriptor): 

74 raise TypeError("File descriptor must be a FileDescriptor") 

75 self._fileDescriptor = fileDescriptor 

76 self._dataId = dataId 

77 

78 def __str__(self) -> str: 

79 return f"{self.name()}@{self.fileDescriptor.location.path}" 

80 

81 def __repr__(self) -> str: 

82 return f"{self.name()}({self.fileDescriptor!r})" 

83 

84 @property 

85 def fileDescriptor(self) -> FileDescriptor: 

86 """FileDescriptor associated with this formatter 

87 (`FileDescriptor`, read-only)""" 

88 return self._fileDescriptor 

89 

90 @property 

91 def dataId(self) -> Optional[DataCoordinate]: 

92 """DataId associated with this formatter (`DataCoordinate`)""" 

93 return self._dataId 

94 

95 @classmethod 

96 def name(cls) -> str: 

97 """Returns the fully qualified name of the formatter. 

98 

99 Returns 

100 ------- 

101 name : `str` 

102 Fully-qualified name of formatter class. 

103 """ 

104 return getFullTypeName(cls) 

105 

106 @abstractmethod 

107 def read(self, component: Optional[str] = None) -> object: 

108 """Read a Dataset. 

109 

110 Parameters 

111 ---------- 

112 component : `str`, optional 

113 Component to read from the file. Only used if the `StorageClass` 

114 for reading differed from the `StorageClass` used to write the 

115 file. 

116 

117 Returns 

118 ------- 

119 inMemoryDataset : `object` 

120 The requested Dataset. 

121 """ 

122 raise NotImplementedError("Type does not support reading") 

123 

124 @abstractmethod 

125 def write(self, inMemoryDataset: Any) -> str: 

126 """Write a Dataset. 

127 

128 Parameters 

129 ---------- 

130 inMemoryDataset : `object` 

131 The Dataset to store. 

132 

133 Returns 

134 ------- 

135 path : `str` 

136 The path to where the Dataset was stored within the datastore. 

137 """ 

138 raise NotImplementedError("Type does not support writing") 

139 

140 def fromBytes(self, serializedDataset: bytes, 

141 component: Optional[str] = None) -> object: 

142 """Reads serialized data into a Dataset or its component. 

143 

144 Parameters 

145 ---------- 

146 serializedDataset : `bytes` 

147 Bytes object to unserialize. 

148 component : `str`, optional 

149 Component to read from the Dataset. Only used if the `StorageClass` 

150 for reading differed from the `StorageClass` used to write the 

151 file. 

152 

153 Returns 

154 ------- 

155 inMemoryDataset : `object` 

156 The requested data as a Python object. The type of object 

157 is controlled by the specific formatter. 

158 """ 

159 raise NotImplementedError("Type does not support reading from bytes.") 

160 

161 def toBytes(self, inMemoryDataset: Any) -> bytes: 

162 """Serialize the Dataset to bytes based on formatter. 

163 

164 Parameters 

165 ---------- 

166 inMemoryDataset : `object` 

167 The Python object to serialize. 

168 

169 Returns 

170 ------- 

171 serializedDataset : `bytes` 

172 Bytes representing the serialized dataset. 

173 """ 

174 raise NotImplementedError("Type does not support writing to bytes.") 

175 

176 @classmethod 

177 def makeUpdatedLocation(cls, location: Location) -> Location: 

178 """Return a new `Location` instance updated with this formatter's 

179 extension. 

180 

181 Parameters 

182 ---------- 

183 location : `Location` 

184 The location to update. 

185 

186 Returns 

187 ------- 

188 updated : `Location` 

189 The updated location with a new file extension applied. 

190 

191 Raises 

192 ------ 

193 NotImplementedError 

194 Raised if there is no ``extension`` attribute associated with 

195 this formatter. 

196 """ 

197 location = copy.deepcopy(location) 

198 try: 

199 location.updateExtension(cls.extension) 

200 except AttributeError: 

201 raise NotImplementedError("No file extension registered with this formatter") from None 

202 return location 

203 

204 @classmethod 

205 def predictPathFromLocation(cls, location: Location) -> str: 

206 """Return the path that would be returned by write, without actually 

207 writing. 

208 

209 Parameters 

210 ---------- 

211 location : `Location` 

212 Location of file for which path prediction is required. 

213 

214 Returns 

215 ------- 

216 path : `str` 

217 Path within datastore that would be associated with this location. 

218 """ 

219 return cls.makeUpdatedLocation(location).pathInStore 

220 

221 def predictPath(self) -> str: 

222 """Return the path that would be returned by write, without actually 

223 writing. 

224 

225 Uses the `FileDescriptor` associated with the instance. 

226 

227 Returns 

228 ------- 

229 path : `str` 

230 Path within datastore that would be associated with the location 

231 stored in this `Formatter`. 

232 """ 

233 return self.predictPathFromLocation(self.fileDescriptor.location) 

234 

235 def segregateParameters(self, parameters: Optional[Dict[str, Any]] = None) -> Tuple[Dict, Dict]: 

236 """Segregate the supplied parameters into those understood by the 

237 formatter and those not understood by the formatter. 

238 

239 Any unsupported parameters are assumed to be usable by associated 

240 assemblers. 

241 

242 Parameters 

243 ---------- 

244 parameters : `dict`, optional 

245 Parameters with values that have been supplied by the caller 

246 and which might be relevant for the formatter. If `None` 

247 parameters will be read from the registered `FileDescriptor`. 

248 

249 Returns 

250 ------- 

251 supported : `dict` 

252 Those parameters supported by this formatter. 

253 unsupported : `dict` 

254 Those parameters not supported by this formatter. 

255 """ 

256 

257 if parameters is None: 

258 parameters = self.fileDescriptor.parameters 

259 

260 if parameters is None: 

261 return {}, {} 

262 

263 if self.unsupportedParameters is None: 

264 # Support none of the parameters 

265 return {}, parameters.copy() 

266 

267 # Start by assuming all are supported 

268 supported = parameters.copy() 

269 unsupported = {} 

270 

271 # And remove any we know are not supported 

272 for p in set(supported): 

273 if p in self.unsupportedParameters: 

274 unsupported[p] = supported.pop(p) 

275 

276 return supported, unsupported 

277 

278 

279class FormatterFactory: 

280 """Factory for `Formatter` instances. 

281 """ 

282 

283 def __init__(self) -> None: 

284 self._mappingFactory = MappingFactory(Formatter) 

285 

286 def __contains__(self, key: Union[LookupKey, str]) -> bool: 

287 """Indicates whether the supplied key is present in the factory. 

288 

289 Parameters 

290 ---------- 

291 key : `LookupKey`, `str` or objects with ``name`` attribute 

292 Key to use to lookup in the factory whether a corresponding 

293 formatter is present. 

294 

295 Returns 

296 ------- 

297 in : `bool` 

298 `True` if the supplied key is present in the factory. 

299 """ 

300 return key in self._mappingFactory 

301 

302 def registerFormatters(self, config: Config, *, universe: DimensionUniverse) -> None: 

303 """Bulk register formatters from a config. 

304 

305 Parameters 

306 ---------- 

307 config : `Config` 

308 ``formatters`` section of a configuration. 

309 universe : `DimensionUniverse`, optional 

310 Set of all known dimensions, used to expand and validate any used 

311 in lookup keys. 

312 

313 Notes 

314 ----- 

315 The configuration can include one level of hierarchy where an 

316 instrument-specific section can be defined to override more general 

317 template specifications. This is represented in YAML using a 

318 key of form ``instrument<name>`` which can then define templates 

319 that will be returned if a `DatasetRef` contains a matching instrument 

320 name in the data ID. 

321 

322 The config is parsed using the function 

323 `~lsst.daf.butler.configSubset.processLookupConfigs`. 

324 """ 

325 contents = processLookupConfigs(config, universe=universe) 

326 for key, f in contents.items(): 

327 self.registerFormatter(key, f) 

328 

329 def getLookupKeys(self) -> Set[LookupKey]: 

330 """Retrieve the look up keys for all the registry entries. 

331 

332 Returns 

333 ------- 

334 keys : `set` of `LookupKey` 

335 The keys available for matching in the registry. 

336 """ 

337 return self._mappingFactory.getLookupKeys() 

338 

339 def getFormatterClassWithMatch(self, entity: Entity) -> Tuple[LookupKey, Type[Formatter]]: 

340 """Get the matching formatter class along with the matching registry 

341 key. 

342 

343 Parameters 

344 ---------- 

345 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str` 

346 Entity to use to determine the formatter to return. 

347 `StorageClass` will be used as a last resort if `DatasetRef` 

348 or `DatasetType` instance is provided. Supports instrument 

349 override if a `DatasetRef` is provided configured with an 

350 ``instrument`` value for the data ID. 

351 

352 Returns 

353 ------- 

354 matchKey : `LookupKey` 

355 The key that resulted in the successful match. 

356 formatter : `type` 

357 The class of the registered formatter. 

358 """ 

359 names = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

360 matchKey, formatter = self._mappingFactory.getClassFromRegistryWithMatch(names) 

361 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter), 

362 matchKey, entity) 

363 

364 return matchKey, formatter 

365 

366 def getFormatterClass(self, entity: Entity) -> Type: 

367 """Get the matching formatter class. 

368 

369 Parameters 

370 ---------- 

371 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str` 

372 Entity to use to determine the formatter to return. 

373 `StorageClass` will be used as a last resort if `DatasetRef` 

374 or `DatasetType` instance is provided. Supports instrument 

375 override if a `DatasetRef` is provided configured with an 

376 ``instrument`` value for the data ID. 

377 

378 Returns 

379 ------- 

380 formatter : `type` 

381 The class of the registered formatter. 

382 """ 

383 _, formatter = self.getFormatterClassWithMatch(entity) 

384 return formatter 

385 

386 def getFormatterWithMatch(self, entity: Entity, *args: Any, **kwargs: Any) -> Tuple[LookupKey, Formatter]: 

387 """Get a new formatter instance along with the matching registry 

388 key. 

389 

390 Parameters 

391 ---------- 

392 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str` 

393 Entity to use to determine the formatter to return. 

394 `StorageClass` will be used as a last resort if `DatasetRef` 

395 or `DatasetType` instance is provided. Supports instrument 

396 override if a `DatasetRef` is provided configured with an 

397 ``instrument`` value for the data ID. 

398 args : `tuple` 

399 Positional arguments to use pass to the object constructor. 

400 kwargs : `dict` 

401 Keyword arguments to pass to object constructor. 

402 

403 Returns 

404 ------- 

405 matchKey : `LookupKey` 

406 The key that resulted in the successful match. 

407 formatter : `Formatter` 

408 An instance of the registered formatter. 

409 """ 

410 names = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

411 matchKey, formatter = self._mappingFactory.getFromRegistryWithMatch(names, *args, **kwargs) 

412 log.debug("Retrieved formatter %s from key '%s' for entity '%s'", getFullTypeName(formatter), 

413 matchKey, entity) 

414 

415 return matchKey, formatter 

416 

417 def getFormatter(self, entity: Entity, *args: Any, **kwargs: Any) -> Formatter: 

418 """Get a new formatter instance. 

419 

420 Parameters 

421 ---------- 

422 entity : `DatasetRef`, `DatasetType`, `StorageClass`, or `str` 

423 Entity to use to determine the formatter to return. 

424 `StorageClass` will be used as a last resort if `DatasetRef` 

425 or `DatasetType` instance is provided. Supports instrument 

426 override if a `DatasetRef` is provided configured with an 

427 ``instrument`` value for the data ID. 

428 args : `tuple` 

429 Positional arguments to use pass to the object constructor. 

430 kwargs : `dict` 

431 Keyword arguments to pass to object constructor. 

432 

433 Returns 

434 ------- 

435 formatter : `Formatter` 

436 An instance of the registered formatter. 

437 """ 

438 _, formatter = self.getFormatterWithMatch(entity, *args, **kwargs) 

439 return formatter 

440 

441 def registerFormatter(self, type_: Union[LookupKey, str, StorageClass, DatasetType], 

442 formatter: str, overwrite: bool = False) -> None: 

443 """Register a `Formatter`. 

444 

445 Parameters 

446 ---------- 

447 type_ : `LookupKey`, `str`, `StorageClass` or `DatasetType` 

448 Type for which this formatter is to be used. If a `LookupKey` 

449 is not provided, one will be constructed from the supplied string 

450 or by using the ``name`` property of the supplied entity. 

451 formatter : `str` or class of type `Formatter` 

452 Identifies a `Formatter` subclass to use for reading and writing 

453 Datasets of this type. Can be a `Formatter` class. 

454 overwrite : `bool`, optional 

455 If `True` an existing entry will be replaced by the new value. 

456 Default is `False`. 

457 

458 Raises 

459 ------ 

460 ValueError 

461 Raised if the formatter does not name a valid formatter type and 

462 ``overwrite`` is `False`. 

463 """ 

464 self._mappingFactory.placeInRegistry(type_, formatter, overwrite=overwrite) 

465 

466 

467# Type to use when allowing a Formatter or its class name 

468FormatterParameter = Union[str, Type[Formatter], Formatter]