Coverage for python/lsst/obs/base/mapping.py: 12%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

204 statements  

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import os 

23import re 

24from collections import OrderedDict 

25 

26from lsst.afw.image import DecoratedImage, Exposure, Image, MaskedImage 

27from lsst.daf.base import PropertySet 

28from lsst.daf.persistence import ButlerLocation, NoResults 

29from lsst.utils import doImport 

30 

31__all__ = ["Mapping", "ImageMapping", "ExposureMapping", "CalibrationMapping", "DatasetMapping"] 

32 

33 

34class Mapping(object): 

35 

36 """Mapping is a base class for all mappings. Mappings are used by 

37 the Mapper to map (determine a path to some data given some 

38 identifiers) and standardize (convert data into some standard 

39 format or type) data, and to query the associated registry to see 

40 what data is available. 

41 

42 Subclasses must specify self.storage or else override self.map(). 

43 

44 Public methods: lookup, have, need, getKeys, map 

45 

46 Mappings are specified mainly by policy. A Mapping policy should 

47 consist of: 

48 

49 template (string): a Python string providing the filename for that 

50 particular dataset type based on some data identifiers. In the 

51 case of redundancy in the path (e.g., file uniquely specified by 

52 the exposure number, but filter in the path), the 

53 redundant/dependent identifiers can be looked up in the registry. 

54 

55 python (string): the Python type for the retrieved data (e.g. 

56 lsst.afw.image.ExposureF) 

57 

58 persistable (string): the Persistable registration for the on-disk data 

59 (e.g. ImageU) 

60 

61 storage (string, optional): Storage type for this dataset type (e.g. 

62 "FitsStorage") 

63 

64 level (string, optional): the level in the camera hierarchy at which the 

65 data is stored (Amp, Ccd or skyTile), if relevant 

66 

67 tables (string, optional): a whitespace-delimited list of tables in the 

68 registry that can be NATURAL JOIN-ed to look up additional 

69 information. 

70 

71 Parameters 

72 ---------- 

73 datasetType : `str` 

74 Butler dataset type to be mapped. 

75 policy : `daf_persistence.Policy` 

76 Mapping Policy. 

77 registry : `lsst.obs.base.Registry` 

78 Registry for metadata lookups. 

79 rootStorage : Storage subclass instance 

80 Interface to persisted repository data. 

81 provided : `list` of `str` 

82 Keys provided by the mapper. 

83 """ 

84 

85 def __init__(self, datasetType, policy, registry, rootStorage, provided=None): 

86 

87 if policy is None: 

88 raise RuntimeError("No policy provided for mapping") 

89 

90 self.datasetType = datasetType 

91 self.registry = registry 

92 self.rootStorage = rootStorage 

93 

94 self._template = policy["template"] # Template path 

95 # in most cases, the template can not be used if it is empty, and is 

96 # accessed via a property that will raise if it is used while 

97 # `not self._template`. In this case we *do* allow it to be empty, for 

98 # the purpose of fetching the key dict so that the mapping can be 

99 # constructed, so that it can raise if it's invalid. I know it's a 

100 # little odd, but it allows this template check to be introduced 

101 # without a major refactor. 

102 if self._template: 

103 self.keyDict = dict( 

104 [ 

105 (k, _formatMap(v, k, datasetType)) 

106 for k, v in re.findall(r"\%\((\w+)\).*?([diouxXeEfFgGcrs])", self.template) 

107 ] 

108 ) 

109 else: 

110 self.keyDict = {} 

111 if provided is not None: 

112 for p in provided: 

113 if p in self.keyDict: 

114 del self.keyDict[p] 

115 self.python = policy["python"] # Python type 

116 self.persistable = policy["persistable"] # Persistable type 

117 self.storage = policy["storage"] 

118 if "level" in policy: 

119 self.level = policy["level"] # Level in camera hierarchy 

120 if "tables" in policy: 

121 self.tables = policy.asArray("tables") 

122 else: 

123 self.tables = None 

124 self.range = None 

125 self.columns = None 

126 self.obsTimeName = policy["obsTimeName"] if "obsTimeName" in policy else None 

127 self.recipe = policy["recipe"] if "recipe" in policy else "default" 

128 

129 @property 

130 def template(self): 

131 if self._template: # template must not be an empty string or None 

132 return self._template 

133 else: 

134 raise RuntimeError( 

135 f"Template is not defined for the {self.datasetType} dataset type, " 

136 "it must be set before it can be used." 

137 ) 

138 

139 def keys(self): 

140 """Return the dict of keys and value types required by this mapping.""" 

141 return self.keyDict 

142 

143 def map(self, mapper, dataId, write=False): 

144 """Standard implementation of map function. 

145 

146 Parameters 

147 ---------- 

148 mapper: `lsst.daf.persistence.Mapper` 

149 Object to be mapped. 

150 dataId: `dict` 

151 Dataset identifier. 

152 

153 Returns 

154 ------- 

155 lsst.daf.persistence.ButlerLocation 

156 Location of object that was mapped. 

157 """ 

158 actualId = self.need(iter(self.keyDict.keys()), dataId) 

159 usedDataId = {key: actualId[key] for key in self.keyDict.keys()} 

160 path = mapper._mapActualToPath(self.template, actualId) 

161 if os.path.isabs(path): 

162 raise RuntimeError("Mapped path should not be absolute.") 

163 if not write: 

164 # This allows mapped files to be compressed, ending in .gz or .fz, 

165 # without any indication from the policy that the file should be 

166 # compressed, easily allowing repositories to contain a combination 

167 # of comporessed and not-compressed files. 

168 # If needed we can add a policy flag to allow compressed files or 

169 # not, and perhaps a list of allowed extensions that may exist 

170 # at the end of the template. 

171 for ext in (None, ".gz", ".fz"): 

172 if ext and path.endswith(ext): 

173 continue # if the path already ends with the extension 

174 extPath = path + ext if ext else path 

175 newPath = self.rootStorage.instanceSearch(extPath) 

176 if newPath: 

177 path = newPath 

178 break 

179 assert path, "Fully-qualified filename is empty." 

180 

181 addFunc = "add_" + self.datasetType # Name of method for additionalData 

182 if hasattr(mapper, addFunc): 

183 addFunc = getattr(mapper, addFunc) 

184 additionalData = addFunc(self.datasetType, actualId) 

185 assert isinstance(additionalData, PropertySet), "Bad type for returned data: %s" % ( 

186 type(additionalData), 

187 ) 

188 else: 

189 additionalData = None 

190 

191 return ButlerLocation( 

192 pythonType=self.python, 

193 cppType=self.persistable, 

194 storageName=self.storage, 

195 locationList=path, 

196 dataId=actualId.copy(), 

197 mapper=mapper, 

198 storage=self.rootStorage, 

199 usedDataId=usedDataId, 

200 datasetType=self.datasetType, 

201 additionalData=additionalData, 

202 ) 

203 

204 def lookup(self, properties, dataId): 

205 """Look up properties for in a metadata registry given a partial 

206 dataset identifier. 

207 

208 Parameters 

209 ---------- 

210 properties : `list` of `str` 

211 What to look up. 

212 dataId : `dict` 

213 Dataset identifier 

214 

215 Returns 

216 ------- 

217 `list` of `tuple` 

218 Values of properties. 

219 """ 

220 if self.registry is None: 

221 raise RuntimeError("No registry for lookup") 

222 

223 skyMapKeys = ("tract", "patch") 

224 

225 where = [] 

226 values = [] 

227 

228 # Prepare to remove skymap entries from properties list. These must 

229 # be in the data ID, so we store which ones we're removing and create 

230 # an OrderedDict that tells us where to re-insert them. That maps the 

231 # name of the property to either its index in the properties list 

232 # *after* the skymap ones have been removed (for entries that aren't 

233 # skymap ones) or the value from the data ID (for those that are). 

234 removed = set() 

235 substitutions = OrderedDict() 

236 index = 0 

237 properties = list(properties) # don't modify the original list 

238 for p in properties: 

239 if p in skyMapKeys: 

240 try: 

241 substitutions[p] = dataId[p] 

242 removed.add(p) 

243 except KeyError: 

244 raise RuntimeError( 

245 "Cannot look up skymap key '%s'; it must be explicitly included in the data ID" % p 

246 ) 

247 else: 

248 substitutions[p] = index 

249 index += 1 

250 # Can't actually remove while iterating above, so we do it here. 

251 for p in removed: 

252 properties.remove(p) 

253 

254 fastPath = True 

255 for p in properties: 

256 if p not in ("filter", "expTime", "taiObs"): 

257 fastPath = False 

258 break 

259 if fastPath and "visit" in dataId and "raw" in self.tables: 

260 lookupDataId = {"visit": dataId["visit"]} 

261 result = self.registry.lookup(properties, "raw_visit", lookupDataId, template=self.template) 

262 else: 

263 if dataId is not None: 

264 for k, v in dataId.items(): 

265 if self.columns and k not in self.columns: 

266 continue 

267 if k == self.obsTimeName: 

268 continue 

269 if k in skyMapKeys: 

270 continue 

271 where.append((k, "?")) 

272 values.append(v) 

273 lookupDataId = {k[0]: v for k, v in zip(where, values)} 

274 if self.range: 

275 # format of self.range is 

276 # ('?', isBetween-lowKey, isBetween-highKey) 

277 # here we transform that to {(lowKey, highKey): value} 

278 lookupDataId[(self.range[1], self.range[2])] = dataId[self.obsTimeName] 

279 result = self.registry.lookup(properties, self.tables, lookupDataId, template=self.template) 

280 if not removed: 

281 return result 

282 # Iterate over the query results, re-inserting the skymap entries. 

283 result = [tuple(v if k in removed else item[v] for k, v in substitutions.items()) for item in result] 

284 return result 

285 

286 def have(self, properties, dataId): 

287 """Returns whether the provided data identifier has all 

288 the properties in the provided list. 

289 

290 Parameters 

291 ---------- 

292 properties : `list of `str` 

293 Properties required. 

294 dataId : `dict` 

295 Dataset identifier. 

296 

297 Returns 

298 ------- 

299 bool 

300 True if all properties are present. 

301 """ 

302 for prop in properties: 

303 if prop not in dataId: 

304 return False 

305 return True 

306 

307 def need(self, properties, dataId): 

308 """Ensures all properties in the provided list are present in 

309 the data identifier, looking them up as needed. This is only 

310 possible for the case where the data identifies a single 

311 exposure. 

312 

313 Parameters 

314 ---------- 

315 properties : `list` of `str` 

316 Properties required. 

317 dataId : `dict` 

318 Partial dataset identifier 

319 

320 Returns 

321 ------- 

322 `dict` 

323 Copy of dataset identifier with enhanced values. 

324 """ 

325 newId = dataId.copy() 

326 newProps = [] # Properties we don't already have 

327 for prop in properties: 

328 if prop not in newId: 

329 newProps.append(prop) 

330 if len(newProps) == 0: 

331 return newId 

332 

333 lookups = self.lookup(newProps, newId) 

334 if len(lookups) != 1: 

335 raise NoResults( 

336 "No unique lookup for %s from %s: %d matches" % (newProps, newId, len(lookups)), 

337 self.datasetType, 

338 dataId, 

339 ) 

340 for i, prop in enumerate(newProps): 

341 newId[prop] = lookups[0][i] 

342 return newId 

343 

344 

345def _formatMap(ch, k, datasetType): 

346 """Convert a format character into a Python type.""" 

347 if ch in "diouxX": 

348 return int 

349 elif ch in "eEfFgG": 

350 return float 

351 elif ch in "crs": 

352 return str 

353 else: 

354 raise RuntimeError( 

355 "Unexpected format specifier %s for field %s in template for dataset %s" % (ch, k, datasetType) 

356 ) 

357 

358 

359class ImageMapping(Mapping): 

360 """ImageMapping is a Mapping subclass for non-camera images. 

361 

362 Parameters 

363 ---------- 

364 datasetType : `str` 

365 Butler dataset type to be mapped. 

366 policy : `daf_persistence.Policy` 

367 Mapping Policy. 

368 registry : `lsst.obs.base.Registry` 

369 Registry for metadata lookups 

370 root : `str` 

371 Path of root directory 

372 """ 

373 

374 def __init__(self, datasetType, policy, registry, root, **kwargs): 

375 Mapping.__init__(self, datasetType, policy, registry, root, **kwargs) 

376 self.columns = policy.asArray("columns") if "columns" in policy else None 

377 

378 

379class ExposureMapping(Mapping): 

380 """ExposureMapping is a Mapping subclass for normal exposures. 

381 

382 Parameters 

383 ---------- 

384 datasetType : `str` 

385 Butler dataset type to be mapped. 

386 policy : `daf_persistence.Policy` 

387 Mapping Policy. 

388 registry : `lsst.obs.base.Registry` 

389 Registry for metadata lookups 

390 root : `str` 

391 Path of root directory 

392 """ 

393 

394 def __init__(self, datasetType, policy, registry, root, **kwargs): 

395 Mapping.__init__(self, datasetType, policy, registry, root, **kwargs) 

396 self.columns = policy.asArray("columns") if "columns" in policy else None 

397 

398 def standardize(self, mapper, item, dataId): 

399 return mapper._standardizeExposure(self, item, dataId) 

400 

401 

402class CalibrationMapping(Mapping): 

403 """CalibrationMapping is a Mapping subclass for calibration-type products. 

404 

405 The difference is that data properties in the query or template 

406 can be looked up using a reference Mapping in addition to this one. 

407 

408 CalibrationMapping Policies can contain the following: 

409 

410 reference (string, optional) 

411 a list of tables for finding missing dataset 

412 identifier components (including the observation time, if a validity 

413 range is required) in the exposure registry; note that the "tables" 

414 entry refers to the calibration registry 

415 

416 refCols (string, optional) 

417 a list of dataset properties required from the 

418 reference tables for lookups in the calibration registry 

419 

420 validRange (bool) 

421 true if the calibration dataset has a validity range 

422 specified by a column in the tables of the reference dataset in the 

423 exposure registry) and two columns in the tables of this calibration 

424 dataset in the calibration registry) 

425 

426 obsTimeName (string, optional) 

427 the name of the column in the reference 

428 dataset tables containing the observation time (default "taiObs") 

429 

430 validStartName (string, optional) 

431 the name of the column in the 

432 calibration dataset tables containing the start of the validity range 

433 (default "validStart") 

434 

435 validEndName (string, optional) 

436 the name of the column in the 

437 calibration dataset tables containing the end of the validity range 

438 (default "validEnd") 

439 

440 Parameters 

441 ---------- 

442 datasetType : `str` 

443 Butler dataset type to be mapped. 

444 policy : `daf_persistence.Policy` 

445 Mapping Policy. 

446 registry : `lsst.obs.base.Registry` 

447 Registry for metadata lookups 

448 calibRegistry : `lsst.obs.base.Registry` 

449 Registry for calibration metadata lookups. 

450 calibRoot : `str` 

451 Path of calibration root directory. 

452 dataRoot : `str` 

453 Path of data root directory; used for outputs only. 

454 """ 

455 

456 def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, dataRoot=None, **kwargs): 

457 Mapping.__init__(self, datasetType, policy, calibRegistry, calibRoot, **kwargs) 

458 self.reference = policy.asArray("reference") if "reference" in policy else None 

459 self.refCols = policy.asArray("refCols") if "refCols" in policy else None 

460 self.refRegistry = registry 

461 self.dataRoot = dataRoot 

462 if "validRange" in policy and policy["validRange"]: 

463 self.range = ("?", policy["validStartName"], policy["validEndName"]) 

464 if "columns" in policy: 

465 self.columns = policy.asArray("columns") 

466 if "filter" in policy: 

467 self.setFilter = policy["filter"] 

468 self.metadataKeys = None 

469 if "metadataKey" in policy: 

470 self.metadataKeys = policy.asArray("metadataKey") 

471 

472 def map(self, mapper, dataId, write=False): 

473 location = Mapping.map(self, mapper, dataId, write=write) 

474 # Want outputs to be in the output directory 

475 if write and self.dataRoot: 

476 location.storage = self.dataRoot 

477 return location 

478 

479 def lookup(self, properties, dataId): 

480 """Look up properties for in a metadata registry given a partial 

481 dataset identifier. 

482 

483 Parameters 

484 ---------- 

485 properties : `list` of `str` 

486 Properties to look up. 

487 dataId : `dict` 

488 Dataset identifier. 

489 

490 Returns 

491 ------- 

492 `list` of `tuple` 

493 Values of properties. 

494 """ 

495 

496 # Either look up taiObs in reference and then all in calibRegistry 

497 # Or look up all in registry 

498 

499 newId = dataId.copy() 

500 if self.reference is not None: 

501 where = [] 

502 values = [] 

503 for k, v in dataId.items(): 

504 if self.refCols and k not in self.refCols: 

505 continue 

506 where.append(k) 

507 values.append(v) 

508 

509 # Columns we need from the regular registry 

510 if self.columns is not None: 

511 columns = set(self.columns) 

512 for k in dataId.keys(): 

513 columns.discard(k) 

514 else: 

515 columns = set(properties) 

516 

517 if not columns: 

518 # Nothing to lookup in reference registry; continue with calib 

519 # registry 

520 return Mapping.lookup(self, properties, newId) 

521 

522 lookupDataId = dict(zip(where, values)) 

523 lookups = self.refRegistry.lookup(columns, self.reference, lookupDataId) 

524 if len(lookups) != 1: 

525 raise RuntimeError( 

526 "No unique lookup for %s from %s: %d matches" % (columns, dataId, len(lookups)) 

527 ) 

528 if columns == set(properties): 

529 # Have everything we need 

530 return lookups 

531 for i, prop in enumerate(columns): 

532 newId[prop] = lookups[0][i] 

533 return Mapping.lookup(self, properties, newId) 

534 

535 def standardize(self, mapper, item, dataId): 

536 """Default standardization function for calibration datasets. 

537 

538 If the item is of a type that should be standardized, the base class 

539 ``standardizeExposure`` method is called, otherwise the item is 

540 returned unmodified. 

541 

542 Parameters 

543 ---------- 

544 mapping : `lsst.obs.base.Mapping` 

545 Mapping object to pass through. 

546 item : object 

547 Will be standardized if of type lsst.afw.image.Exposure, 

548 lsst.afw.image.DecoratedImage, lsst.afw.image.Image 

549 or lsst.afw.image.MaskedImage 

550 

551 dataId : `dict` 

552 Dataset identifier 

553 

554 Returns 

555 ------- 

556 `lsst.afw.image.Exposure` or item 

557 The standardized object. 

558 """ 

559 if issubclass(doImport(self.python), (Exposure, MaskedImage, Image, DecoratedImage)): 

560 return mapper._standardizeExposure(self, item, dataId, filter=self.setFilter) 

561 return item 

562 

563 

564class DatasetMapping(Mapping): 

565 """DatasetMapping is a Mapping subclass for non-Exposure datasets that can 

566 be retrieved by the standard daf_persistence mechanism. 

567 

568 The differences are that the Storage type must be specified and no 

569 Exposure standardization is performed. 

570 

571 The "storage" entry in the Policy is mandatory; the "tables" entry is 

572 optional; no "level" entry is allowed. 

573 

574 Parameters 

575 ---------- 

576 datasetType : `str` 

577 Butler dataset type to be mapped. 

578 policy : `daf_persistence.Policy` 

579 Mapping Policy. 

580 registry : `lsst.obs.base.Registry` 

581 Registry for metadata lookups 

582 root : `str` 

583 Path of root directory 

584 """ 

585 

586 def __init__(self, datasetType, policy, registry, root, **kwargs): 

587 Mapping.__init__(self, datasetType, policy, registry, root, **kwargs) 

588 self.storage = policy["storage"] # Storage type