Coverage for python/lsst/obs/base/mapping.py: 12%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

204 statements  

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from collections import OrderedDict 

23import os 

24import re 

25from lsst.daf.base import PropertySet 

26from lsst.daf.persistence import ButlerLocation, NoResults 

27from lsst.utils import doImport 

28from lsst.afw.image import Exposure, MaskedImage, Image, DecoratedImage 

29 

30__all__ = ["Mapping", "ImageMapping", "ExposureMapping", "CalibrationMapping", "DatasetMapping"] 

31 

32 

33class Mapping(object): 

34 

35 """Mapping is a base class for all mappings. Mappings are used by 

36 the Mapper to map (determine a path to some data given some 

37 identifiers) and standardize (convert data into some standard 

38 format or type) data, and to query the associated registry to see 

39 what data is available. 

40 

41 Subclasses must specify self.storage or else override self.map(). 

42 

43 Public methods: lookup, have, need, getKeys, map 

44 

45 Mappings are specified mainly by policy. A Mapping policy should 

46 consist of: 

47 

48 template (string): a Python string providing the filename for that 

49 particular dataset type based on some data identifiers. In the 

50 case of redundancy in the path (e.g., file uniquely specified by 

51 the exposure number, but filter in the path), the 

52 redundant/dependent identifiers can be looked up in the registry. 

53 

54 python (string): the Python type for the retrieved data (e.g. 

55 lsst.afw.image.ExposureF) 

56 

57 persistable (string): the Persistable registration for the on-disk data 

58 (e.g. ImageU) 

59 

60 storage (string, optional): Storage type for this dataset type (e.g. 

61 "FitsStorage") 

62 

63 level (string, optional): the level in the camera hierarchy at which the 

64 data is stored (Amp, Ccd or skyTile), if relevant 

65 

66 tables (string, optional): a whitespace-delimited list of tables in the 

67 registry that can be NATURAL JOIN-ed to look up additional 

68 information. 

69 

70 Parameters 

71 ---------- 

72 datasetType : `str` 

73 Butler dataset type to be mapped. 

74 policy : `daf_persistence.Policy` 

75 Mapping Policy. 

76 registry : `lsst.obs.base.Registry` 

77 Registry for metadata lookups. 

78 rootStorage : Storage subclass instance 

79 Interface to persisted repository data. 

80 provided : `list` of `str` 

81 Keys provided by the mapper. 

82 """ 

83 

84 def __init__(self, datasetType, policy, registry, rootStorage, provided=None): 

85 

86 if policy is None: 

87 raise RuntimeError("No policy provided for mapping") 

88 

89 self.datasetType = datasetType 

90 self.registry = registry 

91 self.rootStorage = rootStorage 

92 

93 self._template = policy['template'] # Template path 

94 # in most cases, the template can not be used if it is empty, and is 

95 # accessed via a property that will raise if it is used while 

96 # `not self._template`. In this case we *do* allow it to be empty, for 

97 # the purpose of fetching the key dict so that the mapping can be 

98 # constructed, so that it can raise if it's invalid. I know it's a 

99 # little odd, but it allows this template check to be introduced 

100 # without a major refactor. 

101 if self._template: 

102 self.keyDict = dict([ 

103 (k, _formatMap(v, k, datasetType)) 

104 for k, v in 

105 re.findall(r'\%\((\w+)\).*?([diouxXeEfFgGcrs])', self.template) 

106 ]) 

107 else: 

108 self.keyDict = {} 

109 if provided is not None: 

110 for p in provided: 

111 if p in self.keyDict: 

112 del self.keyDict[p] 

113 self.python = policy['python'] # Python type 

114 self.persistable = policy['persistable'] # Persistable type 

115 self.storage = policy['storage'] 

116 if 'level' in policy: 

117 self.level = policy['level'] # Level in camera hierarchy 

118 if 'tables' in policy: 

119 self.tables = policy.asArray('tables') 

120 else: 

121 self.tables = None 

122 self.range = None 

123 self.columns = None 

124 self.obsTimeName = policy['obsTimeName'] if 'obsTimeName' in policy else None 

125 self.recipe = policy['recipe'] if 'recipe' in policy else 'default' 

126 

127 @property 

128 def template(self): 

129 if self._template: # template must not be an empty string or None 

130 return self._template 

131 else: 

132 raise RuntimeError(f"Template is not defined for the {self.datasetType} dataset type, " 

133 "it must be set before it can be used.") 

134 

135 def keys(self): 

136 """Return the dict of keys and value types required for this mapping. 

137 """ 

138 return self.keyDict 

139 

140 def map(self, mapper, dataId, write=False): 

141 """Standard implementation of map function. 

142 

143 Parameters 

144 ---------- 

145 mapper: `lsst.daf.persistence.Mapper` 

146 Object to be mapped. 

147 dataId: `dict` 

148 Dataset identifier. 

149 

150 Returns 

151 ------- 

152 lsst.daf.persistence.ButlerLocation 

153 Location of object that was mapped. 

154 """ 

155 actualId = self.need(iter(self.keyDict.keys()), dataId) 

156 usedDataId = {key: actualId[key] for key in self.keyDict.keys()} 

157 path = mapper._mapActualToPath(self.template, actualId) 

158 if os.path.isabs(path): 

159 raise RuntimeError("Mapped path should not be absolute.") 

160 if not write: 

161 # This allows mapped files to be compressed, ending in .gz or .fz, 

162 # without any indication from the policy that the file should be 

163 # compressed, easily allowing repositories to contain a combination 

164 # of comporessed and not-compressed files. 

165 # If needed we can add a policy flag to allow compressed files or 

166 # not, and perhaps a list of allowed extensions that may exist 

167 # at the end of the template. 

168 for ext in (None, '.gz', '.fz'): 

169 if ext and path.endswith(ext): 

170 continue # if the path already ends with the extension 

171 extPath = path + ext if ext else path 

172 newPath = self.rootStorage.instanceSearch(extPath) 

173 if newPath: 

174 path = newPath 

175 break 

176 assert path, "Fully-qualified filename is empty." 

177 

178 addFunc = "add_" + self.datasetType # Name of method for additionalData 

179 if hasattr(mapper, addFunc): 

180 addFunc = getattr(mapper, addFunc) 

181 additionalData = addFunc(self.datasetType, actualId) 

182 assert isinstance(additionalData, PropertySet), \ 

183 "Bad type for returned data: %s" % (type(additionalData),) 

184 else: 

185 additionalData = None 

186 

187 return ButlerLocation(pythonType=self.python, cppType=self.persistable, storageName=self.storage, 

188 locationList=path, dataId=actualId.copy(), mapper=mapper, 

189 storage=self.rootStorage, usedDataId=usedDataId, datasetType=self.datasetType, 

190 additionalData=additionalData) 

191 

192 def lookup(self, properties, dataId): 

193 """Look up properties for in a metadata registry given a partial 

194 dataset identifier. 

195 

196 Parameters 

197 ---------- 

198 properties : `list` of `str` 

199 What to look up. 

200 dataId : `dict` 

201 Dataset identifier 

202 

203 Returns 

204 ------- 

205 `list` of `tuple` 

206 Values of properties. 

207 """ 

208 if self.registry is None: 

209 raise RuntimeError("No registry for lookup") 

210 

211 skyMapKeys = ("tract", "patch") 

212 

213 where = [] 

214 values = [] 

215 

216 # Prepare to remove skymap entries from properties list. These must 

217 # be in the data ID, so we store which ones we're removing and create 

218 # an OrderedDict that tells us where to re-insert them. That maps the 

219 # name of the property to either its index in the properties list 

220 # *after* the skymap ones have been removed (for entries that aren't 

221 # skymap ones) or the value from the data ID (for those that are). 

222 removed = set() 

223 substitutions = OrderedDict() 

224 index = 0 

225 properties = list(properties) # don't modify the original list 

226 for p in properties: 

227 if p in skyMapKeys: 

228 try: 

229 substitutions[p] = dataId[p] 

230 removed.add(p) 

231 except KeyError: 

232 raise RuntimeError( 

233 "Cannot look up skymap key '%s'; it must be explicitly included in the data ID" % p 

234 ) 

235 else: 

236 substitutions[p] = index 

237 index += 1 

238 # Can't actually remove while iterating above, so we do it here. 

239 for p in removed: 

240 properties.remove(p) 

241 

242 fastPath = True 

243 for p in properties: 

244 if p not in ('filter', 'expTime', 'taiObs'): 

245 fastPath = False 

246 break 

247 if fastPath and 'visit' in dataId and "raw" in self.tables: 

248 lookupDataId = {'visit': dataId['visit']} 

249 result = self.registry.lookup(properties, 'raw_visit', lookupDataId, template=self.template) 

250 else: 

251 if dataId is not None: 

252 for k, v in dataId.items(): 

253 if self.columns and k not in self.columns: 

254 continue 

255 if k == self.obsTimeName: 

256 continue 

257 if k in skyMapKeys: 

258 continue 

259 where.append((k, '?')) 

260 values.append(v) 

261 lookupDataId = {k[0]: v for k, v in zip(where, values)} 

262 if self.range: 

263 # format of self.range is 

264 # ('?', isBetween-lowKey, isBetween-highKey) 

265 # here we transform that to {(lowKey, highKey): value} 

266 lookupDataId[(self.range[1], self.range[2])] = dataId[self.obsTimeName] 

267 result = self.registry.lookup(properties, self.tables, lookupDataId, template=self.template) 

268 if not removed: 

269 return result 

270 # Iterate over the query results, re-inserting the skymap entries. 

271 result = [tuple(v if k in removed else item[v] for k, v in substitutions.items()) 

272 for item in result] 

273 return result 

274 

275 def have(self, properties, dataId): 

276 """Returns whether the provided data identifier has all 

277 the properties in the provided list. 

278 

279 Parameters 

280 ---------- 

281 properties : `list of `str` 

282 Properties required. 

283 dataId : `dict` 

284 Dataset identifier. 

285 

286 Returns 

287 ------- 

288 bool 

289 True if all properties are present. 

290 """ 

291 for prop in properties: 

292 if prop not in dataId: 

293 return False 

294 return True 

295 

296 def need(self, properties, dataId): 

297 """Ensures all properties in the provided list are present in 

298 the data identifier, looking them up as needed. This is only 

299 possible for the case where the data identifies a single 

300 exposure. 

301 

302 Parameters 

303 ---------- 

304 properties : `list` of `str` 

305 Properties required. 

306 dataId : `dict` 

307 Partial dataset identifier 

308 

309 Returns 

310 ------- 

311 `dict` 

312 Copy of dataset identifier with enhanced values. 

313 """ 

314 newId = dataId.copy() 

315 newProps = [] # Properties we don't already have 

316 for prop in properties: 

317 if prop not in newId: 

318 newProps.append(prop) 

319 if len(newProps) == 0: 

320 return newId 

321 

322 lookups = self.lookup(newProps, newId) 

323 if len(lookups) != 1: 

324 raise NoResults("No unique lookup for %s from %s: %d matches" % 

325 (newProps, newId, len(lookups)), 

326 self.datasetType, dataId) 

327 for i, prop in enumerate(newProps): 

328 newId[prop] = lookups[0][i] 

329 return newId 

330 

331 

332def _formatMap(ch, k, datasetType): 

333 """Convert a format character into a Python type.""" 

334 if ch in "diouxX": 

335 return int 

336 elif ch in "eEfFgG": 

337 return float 

338 elif ch in "crs": 

339 return str 

340 else: 

341 raise RuntimeError("Unexpected format specifier %s" 

342 " for field %s in template for dataset %s" % 

343 (ch, k, datasetType)) 

344 

345 

346class ImageMapping(Mapping): 

347 """ImageMapping is a Mapping subclass for non-camera images. 

348 

349 Parameters 

350 ---------- 

351 datasetType : `str` 

352 Butler dataset type to be mapped. 

353 policy : `daf_persistence.Policy` 

354 Mapping Policy. 

355 registry : `lsst.obs.base.Registry` 

356 Registry for metadata lookups 

357 root : `str` 

358 Path of root directory 

359 """ 

360 

361 def __init__(self, datasetType, policy, registry, root, **kwargs): 

362 Mapping.__init__(self, datasetType, policy, registry, root, **kwargs) 

363 self.columns = policy.asArray('columns') if 'columns' in policy else None 

364 

365 

366class ExposureMapping(Mapping): 

367 """ExposureMapping is a Mapping subclass for normal exposures. 

368 

369 Parameters 

370 ---------- 

371 datasetType : `str` 

372 Butler dataset type to be mapped. 

373 policy : `daf_persistence.Policy` 

374 Mapping Policy. 

375 registry : `lsst.obs.base.Registry` 

376 Registry for metadata lookups 

377 root : `str` 

378 Path of root directory 

379 """ 

380 

381 def __init__(self, datasetType, policy, registry, root, **kwargs): 

382 Mapping.__init__(self, datasetType, policy, registry, root, **kwargs) 

383 self.columns = policy.asArray('columns') if 'columns' in policy else None 

384 

385 def standardize(self, mapper, item, dataId): 

386 return mapper._standardizeExposure(self, item, dataId) 

387 

388 

389class CalibrationMapping(Mapping): 

390 """CalibrationMapping is a Mapping subclass for calibration-type products. 

391 

392 The difference is that data properties in the query or template 

393 can be looked up using a reference Mapping in addition to this one. 

394 

395 CalibrationMapping Policies can contain the following: 

396 

397 reference (string, optional) 

398 a list of tables for finding missing dataset 

399 identifier components (including the observation time, if a validity 

400 range is required) in the exposure registry; note that the "tables" 

401 entry refers to the calibration registry 

402 

403 refCols (string, optional) 

404 a list of dataset properties required from the 

405 reference tables for lookups in the calibration registry 

406 

407 validRange (bool) 

408 true if the calibration dataset has a validity range 

409 specified by a column in the tables of the reference dataset in the 

410 exposure registry) and two columns in the tables of this calibration 

411 dataset in the calibration registry) 

412 

413 obsTimeName (string, optional) 

414 the name of the column in the reference 

415 dataset tables containing the observation time (default "taiObs") 

416 

417 validStartName (string, optional) 

418 the name of the column in the 

419 calibration dataset tables containing the start of the validity range 

420 (default "validStart") 

421 

422 validEndName (string, optional) 

423 the name of the column in the 

424 calibration dataset tables containing the end of the validity range 

425 (default "validEnd") 

426 

427 Parameters 

428 ---------- 

429 datasetType : `str` 

430 Butler dataset type to be mapped. 

431 policy : `daf_persistence.Policy` 

432 Mapping Policy. 

433 registry : `lsst.obs.base.Registry` 

434 Registry for metadata lookups 

435 calibRegistry : `lsst.obs.base.Registry` 

436 Registry for calibration metadata lookups. 

437 calibRoot : `str` 

438 Path of calibration root directory. 

439 dataRoot : `str` 

440 Path of data root directory; used for outputs only. 

441 """ 

442 

443 def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, dataRoot=None, **kwargs): 

444 Mapping.__init__(self, datasetType, policy, calibRegistry, calibRoot, **kwargs) 

445 self.reference = policy.asArray("reference") if "reference" in policy else None 

446 self.refCols = policy.asArray("refCols") if "refCols" in policy else None 

447 self.refRegistry = registry 

448 self.dataRoot = dataRoot 

449 if "validRange" in policy and policy["validRange"]: 

450 self.range = ("?", policy["validStartName"], policy["validEndName"]) 

451 if "columns" in policy: 

452 self.columns = policy.asArray("columns") 

453 if "filter" in policy: 

454 self.setFilter = policy["filter"] 

455 self.metadataKeys = None 

456 if "metadataKey" in policy: 

457 self.metadataKeys = policy.asArray("metadataKey") 

458 

459 def map(self, mapper, dataId, write=False): 

460 location = Mapping.map(self, mapper, dataId, write=write) 

461 # Want outputs to be in the output directory 

462 if write and self.dataRoot: 

463 location.storage = self.dataRoot 

464 return location 

465 

466 def lookup(self, properties, dataId): 

467 """Look up properties for in a metadata registry given a partial 

468 dataset identifier. 

469 

470 Parameters 

471 ---------- 

472 properties : `list` of `str` 

473 Properties to look up. 

474 dataId : `dict` 

475 Dataset identifier. 

476 

477 Returns 

478 ------- 

479 `list` of `tuple` 

480 Values of properties. 

481 """ 

482 

483# Either look up taiObs in reference and then all in calibRegistry 

484# Or look up all in registry 

485 

486 newId = dataId.copy() 

487 if self.reference is not None: 

488 where = [] 

489 values = [] 

490 for k, v in dataId.items(): 

491 if self.refCols and k not in self.refCols: 

492 continue 

493 where.append(k) 

494 values.append(v) 

495 

496 # Columns we need from the regular registry 

497 if self.columns is not None: 

498 columns = set(self.columns) 

499 for k in dataId.keys(): 

500 columns.discard(k) 

501 else: 

502 columns = set(properties) 

503 

504 if not columns: 

505 # Nothing to lookup in reference registry; continue with calib 

506 # registry 

507 return Mapping.lookup(self, properties, newId) 

508 

509 lookupDataId = dict(zip(where, values)) 

510 lookups = self.refRegistry.lookup(columns, self.reference, lookupDataId) 

511 if len(lookups) != 1: 

512 raise RuntimeError("No unique lookup for %s from %s: %d matches" % 

513 (columns, dataId, len(lookups))) 

514 if columns == set(properties): 

515 # Have everything we need 

516 return lookups 

517 for i, prop in enumerate(columns): 

518 newId[prop] = lookups[0][i] 

519 return Mapping.lookup(self, properties, newId) 

520 

521 def standardize(self, mapper, item, dataId): 

522 """Default standardization function for calibration datasets. 

523 

524 If the item is of a type that should be standardized, the base class 

525 ``standardizeExposure`` method is called, otherwise the item is 

526 returned unmodified. 

527 

528 Parameters 

529 ---------- 

530 mapping : `lsst.obs.base.Mapping` 

531 Mapping object to pass through. 

532 item : object 

533 Will be standardized if of type lsst.afw.image.Exposure, 

534 lsst.afw.image.DecoratedImage, lsst.afw.image.Image 

535 or lsst.afw.image.MaskedImage 

536 

537 dataId : `dict` 

538 Dataset identifier 

539 

540 Returns 

541 ------- 

542 `lsst.afw.image.Exposure` or item 

543 The standardized object. 

544 """ 

545 if issubclass(doImport(self.python), (Exposure, MaskedImage, Image, DecoratedImage)): 

546 return mapper._standardizeExposure(self, item, dataId, filter=self.setFilter) 

547 return item 

548 

549 

550class DatasetMapping(Mapping): 

551 """DatasetMapping is a Mapping subclass for non-Exposure datasets that can 

552 be retrieved by the standard daf_persistence mechanism. 

553 

554 The differences are that the Storage type must be specified and no 

555 Exposure standardization is performed. 

556 

557 The "storage" entry in the Policy is mandatory; the "tables" entry is 

558 optional; no "level" entry is allowed. 

559 

560 Parameters 

561 ---------- 

562 datasetType : `str` 

563 Butler dataset type to be mapped. 

564 policy : `daf_persistence.Policy` 

565 Mapping Policy. 

566 registry : `lsst.obs.base.Registry` 

567 Registry for metadata lookups 

568 root : `str` 

569 Path of root directory 

570 """ 

571 

572 def __init__(self, datasetType, policy, registry, root, **kwargs): 

573 Mapping.__init__(self, datasetType, policy, registry, root, **kwargs) 

574 self.storage = policy["storage"] # Storage type