Coverage for python/lsst/pipe/base/_instrument.py: 34%

106 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-04-14 02:16 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Instrument",) 

25 

26import datetime 

27import os.path 

28from abc import ABCMeta, abstractmethod 

29from typing import TYPE_CHECKING, Optional, Sequence, Type, Union 

30 

31from lsst.daf.butler import DataId, Formatter 

32from lsst.daf.butler.registry import DataIdError 

33from lsst.utils import doImportType 

34 

35if TYPE_CHECKING: 35 ↛ 36line 35 didn't jump to line 36, because the condition on line 35 was never true

36 from lsst.daf.butler import Registry 

37 from lsst.pex.config import Config 

38 

39 

40class Instrument(metaclass=ABCMeta): 

41 """Base class for instrument-specific logic for the Gen3 Butler. 

42 

43 Parameters 

44 ---------- 

45 collection_prefix : `str`, optional 

46 Prefix for collection names to use instead of the intrument's own name. 

47 This is primarily for use in simulated-data repositories, where the 

48 instrument name may not be necessary and/or sufficient to distinguish 

49 between collections. 

50 

51 Notes 

52 ----- 

53 Concrete instrument subclasses must have the same construction signature as 

54 the base class. 

55 """ 

56 

57 configPaths: Sequence[str] = () 

58 """Paths to config files to read for specific Tasks. 

59 

60 The paths in this list should contain files of the form `task.py`, for 

61 each of the Tasks that requires special configuration. 

62 """ 

63 

64 policyName: Optional[str] = None 

65 """Instrument specific name to use when locating a policy or configuration 

66 file in the file system.""" 

67 

68 raw_definition: tuple[str, tuple[str, ...], str] | None = None 

69 """Dataset type definition to use for "raw" datasets. This is a tuple 

70 of the dataset type name, a tuple of dimension names, and the storage class 

71 name. If `None` the ingest system will use its default definition.""" 

72 

73 def __init__(self, collection_prefix: Optional[str] = None): 

74 if collection_prefix is None: 

75 collection_prefix = self.getName() 

76 self.collection_prefix = collection_prefix 

77 

78 @classmethod 

79 @abstractmethod 

80 def getName(cls) -> str: 

81 """Return the short (dimension) name for this instrument. 

82 

83 This is not (in general) the same as the class name - it's what is used 

84 as the value of the "instrument" field in data IDs, and is usually an 

85 abbreviation of the full name. 

86 """ 

87 raise NotImplementedError() 

88 

89 @abstractmethod 

90 def register(self, registry: Registry, *, update: bool = False) -> None: 

91 """Insert instrument, and other relevant records into `Registry`. 

92 

93 Parameters 

94 ---------- 

95 registry : `lsst.daf.butler.Registry` 

96 Registry client for the data repository to modify. 

97 update : `bool`, optional 

98 If `True` (`False` is default), update existing records if they 

99 differ from the new ones. 

100 

101 Raises 

102 ------ 

103 lsst.daf.butler.registry.ConflictingDefinitionError 

104 Raised if any existing record has the same key but a different 

105 definition as one being registered. 

106 

107 Notes 

108 ----- 

109 New records can always be added by calling this method multiple times, 

110 as long as no existing records have changed (if existing records have 

111 changed, ``update=True`` must be used). Old records can never be 

112 removed by this method. 

113 

114 Implementations should guarantee that registration is atomic (the 

115 registry should not be modified if any error occurs) and idempotent at 

116 the level of individual dimension entries; new detectors and filters 

117 should be added, but changes to any existing record should not be. 

118 This can generally be achieved via a block like 

119 

120 .. code-block:: python 

121 

122 with registry.transaction(): 

123 registry.syncDimensionData("instrument", ...) 

124 registry.syncDimensionData("detector", ...) 

125 self.registerFilters(registry) 

126 

127 """ 

128 raise NotImplementedError() 

129 

130 @staticmethod 

131 def fromName(name: str, registry: Registry, collection_prefix: Optional[str] = None) -> Instrument: 

132 """Given an instrument name and a butler registry, retrieve a 

133 corresponding instantiated instrument object. 

134 

135 Parameters 

136 ---------- 

137 name : `str` 

138 Name of the instrument (must match the return value of `getName`). 

139 registry : `lsst.daf.butler.Registry` 

140 Butler registry to query to find the information. 

141 collection_prefix : `str`, optional 

142 Prefix for collection names to use instead of the intrument's own 

143 name. This is primarily for use in simulated-data repositories, 

144 where the instrument name may not be necessary and/or sufficient to 

145 distinguish between collections. 

146 

147 Returns 

148 ------- 

149 instrument : `Instrument` 

150 An instance of the relevant `Instrument`. 

151 

152 Notes 

153 ----- 

154 The instrument must be registered in the corresponding butler. 

155 

156 Raises 

157 ------ 

158 LookupError 

159 Raised if the instrument is not known to the supplied registry. 

160 ModuleNotFoundError 

161 Raised if the class could not be imported. This could mean 

162 that the relevant obs package has not been setup. 

163 TypeError 

164 Raised if the class name retrieved is not a string or the imported 

165 symbol is not an `Instrument` subclass. 

166 """ 

167 try: 

168 records = list(registry.queryDimensionRecords("instrument", instrument=name)) 

169 except DataIdError: 

170 records = None 

171 if not records: 

172 raise LookupError(f"No registered instrument with name '{name}'.") 

173 cls_name = records[0].class_name 

174 if not isinstance(cls_name, str): 

175 raise TypeError( 

176 f"Unexpected class name retrieved from {name} instrument dimension (got {cls_name})" 

177 ) 

178 instrument_cls: type = doImportType(cls_name) 

179 if not issubclass(instrument_cls, Instrument): 

180 raise TypeError( 

181 f"{instrument_cls!r}, obtained from importing {cls_name}, is not an Instrument subclass." 

182 ) 

183 return instrument_cls(collection_prefix=collection_prefix) 

184 

185 @staticmethod 

186 def from_string( 

187 name: str, registry: Optional[Registry] = None, collection_prefix: Optional[str] = None 

188 ) -> Instrument: 

189 """Return an instance from the short name or class name. 

190 

191 If the instrument name is not qualified (does not contain a '.') and a 

192 butler registry is provided, this will attempt to load the instrument 

193 using `Instrument.fromName()`. Otherwise the instrument will be 

194 imported and instantiated. 

195 

196 Parameters 

197 ---------- 

198 name : `str` 

199 The name or fully-qualified class name of an instrument. 

200 registry : `lsst.daf.butler.Registry`, optional 

201 Butler registry to query to find information about the instrument, 

202 by default `None`. 

203 collection_prefix : `str`, optional 

204 Prefix for collection names to use instead of the intrument's own 

205 name. This is primarily for use in simulated-data repositories, 

206 where the instrument name may not be necessary and/or sufficient 

207 to distinguish between collections. 

208 

209 Returns 

210 ------- 

211 instrument : `Instrument` 

212 The instantiated instrument. 

213 

214 Raises 

215 ------ 

216 RuntimeError 

217 Raised if the instrument can not be imported, instantiated, or 

218 obtained from the registry. 

219 TypeError 

220 Raised if the instrument is not a subclass of 

221 `~lsst.pipe.base.Instrument`. 

222 

223 See Also 

224 -------- 

225 Instrument.fromName 

226 """ 

227 if "." not in name and registry is not None: 

228 try: 

229 instr = Instrument.fromName(name, registry, collection_prefix=collection_prefix) 

230 except Exception as err: 

231 raise RuntimeError( 

232 f"Could not get instrument from name: {name}. Failed with exception: {err}" 

233 ) from err 

234 else: 

235 try: 

236 instr_class = doImportType(name) 

237 except Exception as err: 

238 raise RuntimeError( 

239 f"Could not import instrument: {name}. Failed with exception: {err}" 

240 ) from err 

241 instr = instr_class(collection_prefix=collection_prefix) 

242 if not isinstance(instr, Instrument): 

243 raise TypeError(f"{name} is not an Instrument subclass.") 

244 return instr 

245 

246 @staticmethod 

247 def importAll(registry: Registry) -> None: 

248 """Import all the instruments known to this registry. 

249 

250 This will ensure that all metadata translators have been registered. 

251 

252 Parameters 

253 ---------- 

254 registry : `lsst.daf.butler.Registry` 

255 Butler registry to query to find the information. 

256 

257 Notes 

258 ----- 

259 It is allowed for a particular instrument class to fail on import. 

260 This might simply indicate that a particular obs package has 

261 not been setup. 

262 """ 

263 records = list(registry.queryDimensionRecords("instrument")) 

264 for record in records: 

265 cls = record.class_name 

266 try: 

267 doImportType(cls) 

268 except Exception: 

269 pass 

270 

271 @abstractmethod 

272 def getRawFormatter(self, dataId: DataId) -> Type[Formatter]: 

273 """Return the Formatter class that should be used to read a particular 

274 raw file. 

275 

276 Parameters 

277 ---------- 

278 dataId : `DataId` 

279 Dimension-based ID for the raw file or files being ingested. 

280 

281 Returns 

282 ------- 

283 formatter : `lsst.daf.butler.Formatter` class 

284 Class to be used that reads the file into the correct 

285 Python object for the raw data. 

286 """ 

287 raise NotImplementedError() 

288 

289 def applyConfigOverrides(self, name: str, config: Config) -> None: 

290 """Apply instrument-specific overrides for a task config. 

291 

292 Parameters 

293 ---------- 

294 name : `str` 

295 Name of the object being configured; typically the _DefaultName 

296 of a Task. 

297 config : `lsst.pex.config.Config` 

298 Config instance to which overrides should be applied. 

299 """ 

300 for root in self.configPaths: 

301 path = os.path.join(root, f"{name}.py") 

302 if os.path.exists(path): 

303 config.load(path) 

304 

305 @staticmethod 

306 def formatCollectionTimestamp(timestamp: Union[str, datetime.datetime]) -> str: 

307 """Format a timestamp for use in a collection name. 

308 

309 Parameters 

310 ---------- 

311 timestamp : `str` or `datetime.datetime` 

312 Timestamp to format. May be a date or datetime string in extended 

313 ISO format (assumed UTC), with or without a timezone specifier, a 

314 datetime string in basic ISO format with a timezone specifier, a 

315 naive `datetime.datetime` instance (assumed UTC) or a 

316 timezone-aware `datetime.datetime` instance (converted to UTC). 

317 This is intended to cover all forms that string ``CALIBDATE`` 

318 metadata values have taken in the past, as well as the format this 

319 method itself writes out (to enable round-tripping). 

320 

321 Returns 

322 ------- 

323 formatted : `str` 

324 Standardized string form for the timestamp. 

325 """ 

326 if isinstance(timestamp, str): 

327 if "-" in timestamp: 

328 # extended ISO format, with - and : delimiters 

329 timestamp = datetime.datetime.fromisoformat(timestamp) 

330 else: 

331 # basic ISO format, with no delimiters (what this method 

332 # returns) 

333 timestamp = datetime.datetime.strptime(timestamp, "%Y%m%dT%H%M%S%z") 

334 if not isinstance(timestamp, datetime.datetime): 

335 raise TypeError(f"Unexpected date/time object: {timestamp!r}.") 

336 if timestamp.tzinfo is not None: 

337 timestamp = timestamp.astimezone(datetime.timezone.utc) 

338 return f"{timestamp:%Y%m%dT%H%M%S}Z" 

339 

340 @staticmethod 

341 def makeCollectionTimestamp() -> str: 

342 """Create a timestamp string for use in a collection name from the 

343 current time. 

344 

345 Returns 

346 ------- 

347 formatted : `str` 

348 Standardized string form of the current time. 

349 """ 

350 return Instrument.formatCollectionTimestamp(datetime.datetime.now(tz=datetime.timezone.utc)) 

351 

352 def makeDefaultRawIngestRunName(self) -> str: 

353 """Make the default instrument-specific run collection string for raw 

354 data ingest. 

355 

356 Returns 

357 ------- 

358 coll : `str` 

359 Run collection name to be used as the default for ingestion of 

360 raws. 

361 """ 

362 return self.makeCollectionName("raw", "all") 

363 

364 def makeUnboundedCalibrationRunName(self, *labels: str) -> str: 

365 """Make a RUN collection name appropriate for inserting calibration 

366 datasets whose validity ranges are unbounded. 

367 

368 Parameters 

369 ---------- 

370 *labels : `str` 

371 Extra strings to be included in the base name, using the default 

372 delimiter for collection names. Usually this is the name of the 

373 ticket on which the calibration collection is being created. 

374 

375 Returns 

376 ------- 

377 name : `str` 

378 Run collection name. 

379 """ 

380 return self.makeCollectionName("calib", *labels, "unbounded") 

381 

382 def makeCuratedCalibrationRunName(self, calibDate: str, *labels: str) -> str: 

383 """Make a RUN collection name appropriate for inserting curated 

384 calibration datasets with the given ``CALIBDATE`` metadata value. 

385 

386 Parameters 

387 ---------- 

388 calibDate : `str` 

389 The ``CALIBDATE`` metadata value. 

390 *labels : `str` 

391 Strings to be included in the collection name (before 

392 ``calibDate``, but after all other terms), using the default 

393 delimiter for collection names. Usually this is the name of the 

394 ticket on which the calibration collection is being created. 

395 

396 Returns 

397 ------- 

398 name : `str` 

399 Run collection name. 

400 """ 

401 return self.makeCollectionName("calib", *labels, "curated", self.formatCollectionTimestamp(calibDate)) 

402 

403 def makeCalibrationCollectionName(self, *labels: str) -> str: 

404 """Make a CALIBRATION collection name appropriate for associating 

405 calibration datasets with validity ranges. 

406 

407 Parameters 

408 ---------- 

409 *labels : `str` 

410 Strings to be appended to the base name, using the default 

411 delimiter for collection names. Usually this is the name of the 

412 ticket on which the calibration collection is being created. 

413 

414 Returns 

415 ------- 

416 name : `str` 

417 Calibration collection name. 

418 """ 

419 return self.makeCollectionName("calib", *labels) 

420 

421 @staticmethod 

422 def makeRefCatCollectionName(*labels: str) -> str: 

423 """Return a global (not instrument-specific) name for a collection that 

424 holds reference catalogs. 

425 

426 With no arguments, this returns the name of the collection that holds 

427 all reference catalogs (usually a ``CHAINED`` collection, at least in 

428 long-lived repos that may contain more than one reference catalog). 

429 

430 Parameters 

431 ---------- 

432 *labels : `str` 

433 Strings to be added to the global collection name, in order to 

434 define a collection name for one or more reference catalogs being 

435 ingested at the same time. 

436 

437 Returns 

438 ------- 

439 name : `str` 

440 Collection name. 

441 

442 Notes 

443 ----- 

444 This is a ``staticmethod``, not a ``classmethod``, because it should 

445 be the same for all instruments. 

446 """ 

447 return "/".join(("refcats",) + labels) 

448 

449 def makeUmbrellaCollectionName(self) -> str: 

450 """Return the name of the umbrella ``CHAINED`` collection for this 

451 instrument that combines all standard recommended input collections. 

452 

453 This method should almost never be overridden by derived classes. 

454 

455 Returns 

456 ------- 

457 name : `str` 

458 Name for the umbrella collection. 

459 """ 

460 return self.makeCollectionName("defaults") 

461 

462 def makeCollectionName(self, *labels: str) -> str: 

463 """Get the instrument-specific collection string to use as derived 

464 from the supplied labels. 

465 

466 Parameters 

467 ---------- 

468 *labels : `str` 

469 Strings to be combined with the instrument name to form a 

470 collection name. 

471 

472 Returns 

473 ------- 

474 name : `str` 

475 Collection name to use that includes the instrument's recommended 

476 prefix. 

477 """ 

478 return "/".join((self.collection_prefix,) + labels)