Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import click 

23 

24from ..opt import ( 

25 collection_type_option, 

26 collection_argument, 

27 collections_argument, 

28 collections_option, 

29 components_option, 

30 dataset_type_option, 

31 datasets_option, 

32 destination_argument, 

33 dimensions_argument, 

34 directory_argument, 

35 element_argument, 

36 glob_argument, 

37 options_file_option, 

38 query_datasets_options, 

39 repo_argument, 

40 transfer_option, 

41 verbose_option, 

42 where_option, 

43) 

44 

45from ..utils import ( 

46 ButlerCommand, 

47 MWOptionDecorator, 

48 option_section, 

49 printAstropyTables, 

50 split_commas, 

51 to_upper, 

52 typeStrAcceptsMultiple, 

53 unwrap, 

54 where_help, 

55) 

56 

57from ... import script 

58 

59 

60willCreateRepoHelp = "REPO is the URI or path to the new repository. Will be created if it does not exist." 

61existingRepoHelp = "REPO is the URI or path to an existing data repository root or configuration file." 

62 

63 

64@click.command(cls=ButlerCommand, short_help="Add existing datasets to a tagged collection.") 

65@repo_argument(required=True) 

66@collection_argument(help="COLLECTION is the collection the datasets should be associated with.") 

67@query_datasets_options(repo=False, showUri=False, useArguments=False) 

68@options_file_option() 

69def associate(**kwargs): 

70 """Add existing datasets to a tagged collection; searches for datasets with 

71 the options and adds them to the named COLLECTION. 

72 """ 

73 script.associate(**kwargs) 

74 

75 

76# The conversion from the import command name to the butler_import function 

77# name for subcommand lookup is implemented in the cli/butler.py, in 

78# funcNameToCmdName and cmdNameToFuncName. If name changes are made here they 

79# must be reflected in that location. If this becomes a common pattern a better 

80# mechanism should be implemented. 

81@click.command("import", cls=ButlerCommand) 

82@repo_argument(required=True, help=willCreateRepoHelp) 

83@directory_argument(required=True) 

84@transfer_option() 

85@click.option("--export-file", 

86 help="Name for the file that contains database information associated with the exported " 

87 "datasets. If this is not an absolute path, does not exist in the current working " 

88 "directory, and --dir is provided, it is assumed to be in that directory. Defaults " 

89 "to \"export.yaml\".", 

90 type=click.File("r")) 

91@click.option("--skip-dimensions", "-s", type=str, multiple=True, callback=split_commas, 

92 metavar=typeStrAcceptsMultiple, 

93 help="Dimensions that should be skipped during import") 

94@click.option("--reuse-ids", is_flag=True, help="Force re-use of imported dataset IDs for integer IDs.") 

95@options_file_option() 

96def butler_import(*args, **kwargs): 

97 """Import data into a butler repository.""" 

98 script.butlerImport(*args, **kwargs) 

99 

100 

101@click.command(cls=ButlerCommand) 

102@repo_argument(required=True, help=willCreateRepoHelp) 

103@click.option("--seed-config", help="Path to an existing YAML config file to apply (on top of defaults).") 

104@click.option("--dimension-config", help="Path to an existing YAML config file with dimension configuration.") 

105@click.option("--standalone", is_flag=True, help="Include all defaults in the config file in the repo, " 

106 "insulating the repo from changes in package defaults.") 

107@click.option("--override", is_flag=True, help="Allow values in the supplied config to override all " 

108 "repo settings.") 

109@click.option("--outfile", "-f", default=None, type=str, help="Name of output file to receive repository " 

110 "configuration. Default is to write butler.yaml into the specified repo.") 

111@options_file_option() 

112def create(*args, **kwargs): 

113 """Create an empty Gen3 Butler repository.""" 

114 script.createRepo(*args, **kwargs) 

115 

116 

117@click.command(short_help="Dump butler config to stdout.", cls=ButlerCommand) 

118@repo_argument(required=True, help=existingRepoHelp) 

119@click.option("--subset", "-s", type=str, 

120 help="Subset of a configuration to report. This can be any key in the hierarchy such as " 

121 "'.datastore.root' where the leading '.' specified the delimiter for the hierarchy.") 

122@click.option("--searchpath", "-p", type=str, multiple=True, callback=split_commas, 

123 metavar=typeStrAcceptsMultiple, 

124 help="Additional search paths to use for configuration overrides") 

125@click.option("--file", "outfile", type=click.File("w"), default="-", 

126 help="Print the (possibly-expanded) configuration for a repository to a file, or to stdout " 

127 "by default.") 

128@options_file_option() 

129def config_dump(*args, **kwargs): 

130 """Dump either a subset or full Butler configuration to standard output.""" 

131 script.configDump(*args, **kwargs) 

132 

133 

134@click.command(short_help="Validate the configuration files.", cls=ButlerCommand) 

135@repo_argument(required=True, help=existingRepoHelp) 

136@click.option("--quiet", "-q", is_flag=True, help="Do not report individual failures.") 

137@dataset_type_option(help="Specific DatasetType(s) to validate.", multiple=True) 

138@click.option("--ignore", "-i", type=str, multiple=True, callback=split_commas, 

139 metavar=typeStrAcceptsMultiple, 

140 help="DatasetType(s) to ignore for validation.") 

141@options_file_option() 

142def config_validate(*args, **kwargs): 

143 """Validate the configuration files for a Gen3 Butler repository.""" 

144 is_good = script.configValidate(*args, **kwargs) 

145 if not is_good: 

146 raise click.exceptions.Exit(1) 

147 

148 

149@click.command(cls=ButlerCommand) 

150@repo_argument(required=True) 

151@collection_argument(help=unwrap("""COLLECTION is the Name of the collection to remove. If this is a tagged or 

152 chained collection, datasets within the collection are not modified unless --unstore 

153 is passed. If this is a run collection, --purge and --unstore must be passed, and 

154 all datasets in it are fully removed from the data repository.""")) 

155@click.option("--purge", 

156 help=unwrap("""Permit RUN collections to be removed, fully removing datasets within them. 

157 Requires --unstore as an added precaution against accidental deletion. Must not be 

158 passed if the collection is not a RUN."""), 

159 is_flag=True) 

160@click.option("--unstore", 

161 help=("""Remove all datasets in the collection from all datastores in which they appear."""), 

162 is_flag=True) 

163@click.option("--unlink", 

164 help="Before removing the given `collection` unlink it from from this parent collection.", 

165 multiple=True, 

166 callback=split_commas) 

167@options_file_option() 

168def prune_collection(**kwargs): 

169 """Remove a collection and possibly prune datasets within it.""" 

170 script.pruneCollection(**kwargs) 

171 

172 

173pruneDatasets_wouldRemoveMsg = unwrap("""The following datasets will be removed from any datastores in which 

174 they are present:""") 

175pruneDatasets_wouldDisassociateMsg = unwrap("""The following datasets will be disassociated from {collections} 

176 if they are currently present in it (which is not checked):""") 

177pruneDatasets_wouldDisassociateAndRemoveMsg = unwrap("""The following datasets will be disassociated from 

178 {collections} if they are currently present in it (which is 

179 not checked), and removed from any datastores in which they 

180 are present.""") 

181pruneDatasets_willRemoveMsg = "The following datasets will be removed:" 

182pruneDatasets_askContinueMsg = "Continue?" 

183pruneDatasets_didRemoveAforementioned = "The datasets were removed." 

184pruneDatasets_didNotRemoveAforementioned = "Did not remove the datasets." 

185pruneDatasets_didRemoveMsg = "Removed the following datasets:" 

186pruneDatasets_noDatasetsFound = "Did not find any datasets." 

187pruneDatasets_errPurgeAndDisassociate = unwrap( 

188 """"--disassociate and --purge may not be used together: --disassociate purges from just the passed TAGged 

189 collections, but --purge forces disassociation from all of them. """ 

190) 

191pruneDatasets_errQuietWithDryRun = "Can not use --quiet and --dry-run together." 

192pruneDatasets_errNoCollectionRestriction = unwrap( 

193 """Must indicate collections from which to prune datasets by passing COLLETION arguments (select all 

194 collections by passing '*', or consider using 'butler prune-collections'), by using --purge to pass a run 

195 collection, or by using --disassociate to select a tagged collection.""") 

196pruneDatasets_errPruneOnNotRun = "Can not prune a collection that is not a RUN collection: {collection}" 

197pruneDatasets_errNoOp = "No operation: one of --purge, --unstore, or --disassociate must be provided." 

198 

199disassociate_option = MWOptionDecorator( 

200 "--disassociate", "disassociate_tags", 

201 help=unwrap("""Disassociate pruned datasets from the given tagged collections. May not be used with 

202 --purge."""), 

203 multiple=True, 

204 callback=split_commas, 

205 metavar="TAG" 

206) 

207 

208 

209purge_option = MWOptionDecorator( 

210 "--purge", "purge_run", 

211 help=unwrap("""Completely remove the dataset from the given RUN in the Registry. May not be used with 

212 --disassociate. Note, this may remove provenance information from datasets other than those 

213 provided, and should be used with extreme care."""), 

214 metavar="RUN" 

215) 

216 

217 

218find_all_option = MWOptionDecorator( 

219 "--find-all", is_flag=True, 

220 help=unwrap("""Purge the dataset results from all of the collections in which a dataset of that dataset 

221 type + data id combination appear. (By default only the first found dataset type + data id is 

222 purged, according to the order of COLLECTIONS passed in).""") 

223) 

224 

225 

226unstore_option = MWOptionDecorator( 

227 "--unstore", 

228 is_flag=True, 

229 help=unwrap("""Remove these datasets from all datastores configured with this data repository. If 

230 --disassociate and --purge are not used then --unstore will be used by default. Note that 

231 --unstore will make it impossible to retrieve these datasets even via other collections. 

232 Datasets that are already not stored are ignored by this option.""") 

233) 

234 

235 

236dry_run_option = MWOptionDecorator( 

237 "--dry-run", 

238 is_flag=True, 

239 help=unwrap("""Display the datasets that would be removed but do not remove them. 

240 

241 Note that a dataset can be in collections other than its RUN-type collection, and removing it 

242 will remove it from all of them, even though the only one this will show is its RUN 

243 collection.""") 

244) 

245 

246 

247confirm_option = MWOptionDecorator( 

248 "--confirm/--no-confirm", 

249 default=True, 

250 help="Print expected action and a confirmation prompt before executing. Default is --confirm." 

251) 

252 

253 

254quiet_option = MWOptionDecorator( 

255 "--quiet", 

256 is_flag=True, 

257 help=unwrap("""Makes output quiet. Implies --no-confirm. Requires --dry-run not be passed.""") 

258) 

259 

260 

261@click.command(cls=ButlerCommand, short_help="Remove datasets.") 

262@repo_argument(required=True) 

263@collections_argument(help=unwrap("""COLLECTIONS is or more expressions that identify the collections to 

264 search for datasets. Glob-style expressions may be used but only if the 

265 --find-all flag is also passed.""")) 

266@option_section("Query Datasets Options:") 

267@datasets_option(help="One or more glob-style expressions that identify the dataset types to be pruned.", 

268 multiple=True, 

269 callback=split_commas) 

270@find_all_option() 

271@where_option(help=where_help) 

272@option_section("Prune Options:") 

273@disassociate_option() 

274@purge_option() 

275@unstore_option() 

276@option_section("Execution Options:") 

277@dry_run_option() 

278@confirm_option() 

279@quiet_option() 

280@option_section("Other Options:") 

281@options_file_option() 

282def prune_datasets(**kwargs): 

283 """Query for and remove one or more datasets from a collection and/or 

284 storage. 

285 """ 

286 quiet = kwargs.pop("quiet", False) 

287 if quiet: 

288 if kwargs["dry_run"]: 

289 raise click.ClickException(pruneDatasets_errQuietWithDryRun) 

290 kwargs["confirm"] = False 

291 

292 result = script.pruneDatasets(**kwargs) 

293 

294 if result.errPurgeAndDisassociate: 

295 raise click.ClickException(pruneDatasets_errPurgeAndDisassociate) 

296 return 

297 if result.errNoCollectionRestriction: 

298 raise click.ClickException(pruneDatasets_errNoCollectionRestriction) 

299 if result.errPruneOnNotRun: 

300 raise click.ClickException(pruneDatasets_errPruneOnNotRun.format(**result.errDict)) 

301 if result.errNoOp: 

302 raise click.ClickException(pruneDatasets_errNoOp) 

303 if result.dryRun: 

304 if result.action["disassociate"] and result.action["unstore"]: 

305 msg = pruneDatasets_wouldDisassociateAndRemoveMsg 

306 elif result.action["disassociate"]: 

307 msg = pruneDatasets_wouldDisassociateMsg 

308 else: 

309 msg = pruneDatasets_wouldRemoveMsg 

310 print(msg.format(**result.action)) 

311 printAstropyTables(result.tables) 

312 return 

313 if result.confirm: 

314 if not result.tables: 

315 print(pruneDatasets_noDatasetsFound) 

316 return 

317 print(pruneDatasets_willRemoveMsg) 

318 printAstropyTables(result.tables) 

319 doContinue = click.confirm(pruneDatasets_askContinueMsg, default=False) 

320 if doContinue: 

321 result.onConfirmation() 

322 print(pruneDatasets_didRemoveAforementioned) 

323 else: 

324 print(pruneDatasets_didNotRemoveAforementioned) 

325 return 

326 if result.finished: 

327 if not quiet: 

328 print(pruneDatasets_didRemoveMsg) 

329 printAstropyTables(result.tables) 

330 return 

331 

332 

333@click.command(short_help="Search for collections.", cls=ButlerCommand) 

334@repo_argument(required=True) 

335@glob_argument(help="GLOB is one or more glob-style expressions that fully or partially identify the " 

336 "collections to return.") 

337@collection_type_option() 

338@click.option("--chains", 

339 default="table", 

340 help=unwrap("""Affects how results are presented. TABLE lists each dataset in a row with 

341 chained datasets' children listed in a Definition column. TREE lists children below 

342 their parent in tree form. FLATTEN lists all datasets, including child datasets in 

343 one list.Defaults to TABLE. """), 

344 callback=to_upper, 

345 type=click.Choice(("TABLE", "TREE", "FLATTEN"), case_sensitive=False)) 

346@options_file_option() 

347def query_collections(*args, **kwargs): 

348 """Get the collections whose names match an expression.""" 

349 table = script.queryCollections(*args, **kwargs) 

350 # The unit test that mocks script.queryCollections does not return a table 

351 # so we need the following `if`. 

352 if table: 

353 # When chains==TREE, the children of chained datasets are indented 

354 # relative to their parents. For this to work properly the table must 

355 # be left-aligned. 

356 table.pprint_all(align="<") 

357 

358 

359@click.command(cls=ButlerCommand) 

360@repo_argument(required=True) 

361@glob_argument(help="GLOB is one or more glob-style expressions that fully or partially identify the " 

362 "dataset types to return.") 

363@verbose_option(help="Include dataset type name, dimensions, and storage class in output.") 

364@components_option() 

365@options_file_option() 

366def query_dataset_types(*args, **kwargs): 

367 """Get the dataset types in a repository.""" 

368 table = script.queryDatasetTypes(*args, **kwargs) 

369 if table: 

370 table.pprint_all() 

371 else: 

372 print("No results. Try --help for more information.") 

373 

374 

375@click.command(cls=ButlerCommand) 

376@repo_argument(required=True) 

377@click.argument('dataset-type-name', nargs=1) 

378def remove_dataset_type(*args, **kwargs): 

379 """Remove a dataset type definition from a repository.""" 

380 script.removeDatasetType(*args, **kwargs) 

381 

382 

383@click.command(cls=ButlerCommand) 

384@query_datasets_options() 

385@options_file_option() 

386def query_datasets(**kwargs): 

387 """List the datasets in a repository.""" 

388 for table in script.QueryDatasets(**kwargs).getTables(): 

389 print("") 

390 table.pprint_all() 

391 print("") 

392 

393 

394@click.command(cls=ButlerCommand) 

395@repo_argument(required=True) 

396@click.argument('input-collection') 

397@click.argument('output-collection') 

398@click.argument('dataset-type-name') 

399@click.option("--begin-date", type=str, default=None, 

400 help=unwrap("""ISO-8601 datetime (TAI) of the beginning of the validity range for the 

401 certified calibrations.""")) 

402@click.option("--end-date", type=str, default=None, 

403 help=unwrap("""ISO-8601 datetime (TAI) of the end of the validity range for the 

404 certified calibrations.""")) 

405@click.option("--search-all-inputs", is_flag=True, default=False, 

406 help=unwrap("""Search all children of the inputCollection if it is a CHAINED collection, 

407 instead of just the most recent one.""")) 

408@options_file_option() 

409def certify_calibrations(*args, **kwargs): 

410 """Certify calibrations in a repository. 

411 """ 

412 script.certifyCalibrations(*args, **kwargs) 

413 

414 

415@click.command(cls=ButlerCommand) 

416@repo_argument(required=True) 

417@dimensions_argument(help=unwrap("""DIMENSIONS are the keys of the data IDs to yield, such as exposure, 

418 instrument, or tract. Will be expanded to include any dependencies.""")) 

419@collections_option() 

420@datasets_option(help=unwrap("""An expression that fully or partially identifies dataset types that should 

421 constrain the yielded data IDs. For example, including "raw" here would 

422 constrain the yielded "instrument", "exposure", "detector", and 

423 "physical_filter" values to only those for which at least one "raw" dataset 

424 exists in "collections".""")) 

425@where_option(help=where_help) 

426@options_file_option() 

427def query_data_ids(**kwargs): 

428 """List the data IDs in a repository. 

429 """ 

430 table = script.queryDataIds(**kwargs) 

431 if table: 

432 table.pprint_all() 

433 else: 

434 if not kwargs.get("dimensions") and not kwargs.get("datasets"): 

435 print("No results. Try requesting some dimensions or datasets, see --help for more information.") 

436 else: 

437 print("No results. Try --help for more information.") 

438 

439 

440@click.command(cls=ButlerCommand) 

441@repo_argument(required=True) 

442@element_argument(required=True) 

443@datasets_option(help=unwrap("""An expression that fully or partially identifies dataset types that should 

444 constrain the yielded records. Only affects results when used with 

445 --collections.""")) 

446@collections_option(help=collections_option.help + " Only affects results when used with --datasets.") 

447@where_option(help=where_help) 

448@click.option("--no-check", is_flag=True, 

449 help=unwrap("""Don't check the query before execution. By default the query is checked before it 

450 executed, this may reject some valid queries that resemble common mistakes.""")) 

451@options_file_option() 

452def query_dimension_records(**kwargs): 

453 """Query for dimension information.""" 

454 table = script.queryDimensionRecords(**kwargs) 

455 if table: 

456 table.pprint_all() 

457 else: 

458 print("No results. Try --help for more information.") 

459 

460 

461@click.command(cls=ButlerCommand) 

462@repo_argument(required=True) 

463@query_datasets_options(showUri=False, useArguments=False, repo=False) 

464@destination_argument(help="Destination URI of folder to receive file artifacts.") 

465@transfer_option() 

466@verbose_option(help="Report destination location of all transferred artifacts.") 

467@click.option("--preserve-path/--no-preserve-path", is_flag=True, default=True, 

468 help="Preserve the datastore path to the artifact at the destination.") 

469@click.option("--clobber/--no-clobber", is_flag=True, default=False, 

470 help="If clobber, overwrite files if they exist locally.") 

471@options_file_option() 

472def retrieve_artifacts(**kwargs): 

473 """Retrieve file artifacts associated with datasets in a repository.""" 

474 verbose = kwargs.pop("verbose") 

475 transferred = script.retrieveArtifacts(**kwargs) 

476 if verbose and transferred: 

477 print(f"Transferred the following to {kwargs['destination']}:") 

478 for uri in transferred: 

479 print(uri) 

480 print() 

481 print(f"Number of artifacts retrieved into destination {kwargs['destination']}: {len(transferred)}") 

482 

483 

484@click.command(cls=ButlerCommand) 

485@click.argument("source", required=True) 

486@click.argument("dest", required=True) 

487@query_datasets_options(showUri=False, useArguments=False, repo=False) 

488@transfer_option() 

489@options_file_option() 

490def transfer_datasets(**kwargs): 

491 """Transfer datasets from a source butler to a destination butler. 

492 

493 SOURCE is a URI to the Butler repository containing the RUN dataset. 

494 

495 DEST is a URI to the Butler repository that will receive copies of the 

496 datasets. 

497 """ 

498 number = script.transferDatasets(**kwargs) 

499 print(f"Number of datasets transferred: {number}")