Coverage for python/felis/cli.py: 50%

243 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-18 09:50 +0000

1# This file is part of felis. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import io 

25import json 

26import logging 

27import sys 

28from collections.abc import Iterable, Mapping, MutableMapping 

29from typing import IO, Any 

30 

31import click 

32import yaml 

33from pydantic import ValidationError 

34from pyld import jsonld 

35from sqlalchemy.engine import Engine, create_engine, create_mock_engine, make_url 

36from sqlalchemy.engine.mock import MockConnection 

37 

38from . import DEFAULT_CONTEXT, DEFAULT_FRAME, __version__ 

39from .check import CheckingVisitor 

40from .datamodel import Schema 

41from .metadata import DatabaseContext, InsertDump, MetaDataBuilder 

42from .tap import Tap11Base, TapLoadingVisitor, init_tables 

43from .utils import ReorderingVisitor 

44from .validation import get_schema 

45 

46logger = logging.getLogger("felis") 

47 

48loglevel_choices = ["CRITICAL", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG"] 

49 

50 

51@click.group() 

52@click.version_option(__version__) 

53@click.option( 

54 "--log-level", 

55 type=click.Choice(loglevel_choices), 

56 envvar="FELIS_LOGLEVEL", 

57 help="Felis log level", 

58 default=logging.getLevelName(logging.INFO), 

59) 

60@click.option( 

61 "--log-file", 

62 type=click.Path(), 

63 envvar="FELIS_LOGFILE", 

64 help="Felis log file path", 

65) 

66def cli(log_level: str, log_file: str | None) -> None: 

67 """Felis Command Line Tools.""" 

68 if log_file: 

69 logging.basicConfig(filename=log_file, level=log_level) 

70 else: 

71 logging.basicConfig(level=log_level) 

72 

73 

74@cli.command("create") 

75@click.option("--engine-url", envvar="ENGINE_URL", help="SQLAlchemy Engine URL", default="sqlite://") 

76@click.option("--schema-name", help="Alternate schema name to override Felis file") 

77@click.option( 

78 "--create-if-not-exists", is_flag=True, help="Create the schema in the database if it does not exist" 

79) 

80@click.option("--drop-if-exists", is_flag=True, help="Drop schema if it already exists in the database") 

81@click.option("--echo", is_flag=True, help="Echo database commands as they are executed") 

82@click.option("--dry-run", is_flag=True, help="Dry run only to print out commands instead of executing") 

83@click.option( 

84 "--output-file", "-o", type=click.File(mode="w"), help="Write SQL commands to a file instead of executing" 

85) 

86@click.argument("file", type=click.File()) 

87def create( 

88 engine_url: str, 

89 schema_name: str | None, 

90 create_if_not_exists: bool, 

91 drop_if_exists: bool, 

92 echo: bool, 

93 dry_run: bool, 

94 output_file: IO[str] | None, 

95 file: IO, 

96) -> None: 

97 """Create database objects from the Felis file.""" 

98 yaml_data = yaml.safe_load(file) 

99 schema = Schema.model_validate(yaml_data) 

100 url_obj = make_url(engine_url) 

101 if schema_name: 

102 logger.info(f"Overriding schema name with: {schema_name}") 

103 schema.name = schema_name 

104 elif url_obj.drivername == "sqlite": 

105 logger.info("Overriding schema name for sqlite with: main") 

106 schema.name = "main" 

107 if not url_obj.host and not url_obj.drivername == "sqlite": 

108 dry_run = True 

109 logger.info("Forcing dry run for non-sqlite engine URL with no host") 

110 

111 builder = MetaDataBuilder(schema) 

112 builder.build() 

113 metadata = builder.metadata 

114 logger.debug(f"Created metadata with schema name: {metadata.schema}") 

115 

116 engine: Engine | MockConnection 

117 if not dry_run and not output_file: 

118 engine = create_engine(engine_url, echo=echo) 

119 else: 

120 if dry_run: 

121 logger.info("Dry run will be executed") 

122 engine = DatabaseContext.create_mock_engine(url_obj, output_file) 

123 if output_file: 

124 logger.info("Writing SQL output to: " + output_file.name) 

125 

126 context = DatabaseContext(metadata, engine) 

127 

128 if drop_if_exists: 

129 logger.debug("Dropping schema if it exists") 

130 context.drop_if_exists() 

131 create_if_not_exists = True # If schema is dropped, it needs to be recreated. 

132 

133 if create_if_not_exists: 

134 logger.debug("Creating schema if not exists") 

135 context.create_if_not_exists() 

136 

137 context.create_all() 

138 

139 

140@cli.command("init-tap") 

141@click.option("--tap-schema-name", help="Alt Schema Name for TAP_SCHEMA") 

142@click.option("--tap-schemas-table", help="Alt Table Name for TAP_SCHEMA.schemas") 

143@click.option("--tap-tables-table", help="Alt Table Name for TAP_SCHEMA.tables") 

144@click.option("--tap-columns-table", help="Alt Table Name for TAP_SCHEMA.columns") 

145@click.option("--tap-keys-table", help="Alt Table Name for TAP_SCHEMA.keys") 

146@click.option("--tap-key-columns-table", help="Alt Table Name for TAP_SCHEMA.key_columns") 

147@click.argument("engine-url") 

148def init_tap( 

149 engine_url: str, 

150 tap_schema_name: str, 

151 tap_schemas_table: str, 

152 tap_tables_table: str, 

153 tap_columns_table: str, 

154 tap_keys_table: str, 

155 tap_key_columns_table: str, 

156) -> None: 

157 """Initialize TAP 1.1 TAP_SCHEMA objects. 

158 

159 Please verify the schema/catalog you are executing this in in your 

160 engine URL. 

161 """ 

162 engine = create_engine(engine_url, echo=True) 

163 init_tables( 

164 tap_schema_name, 

165 tap_schemas_table, 

166 tap_tables_table, 

167 tap_columns_table, 

168 tap_keys_table, 

169 tap_key_columns_table, 

170 ) 

171 Tap11Base.metadata.create_all(engine) 

172 

173 

174@cli.command("load-tap") 

175@click.option("--engine-url", envvar="ENGINE_URL", help="SQLAlchemy Engine URL to catalog") 

176@click.option("--schema-name", help="Alternate Schema Name for Felis file") 

177@click.option("--catalog-name", help="Catalog Name for Schema") 

178@click.option("--dry-run", is_flag=True, help="Dry Run Only. Prints out the DDL that would be executed") 

179@click.option("--tap-schema-name", help="Alt Schema Name for TAP_SCHEMA") 

180@click.option("--tap-tables-postfix", help="Postfix for TAP table names") 

181@click.option("--tap-schemas-table", help="Alt Table Name for TAP_SCHEMA.schemas") 

182@click.option("--tap-tables-table", help="Alt Table Name for TAP_SCHEMA.tables") 

183@click.option("--tap-columns-table", help="Alt Table Name for TAP_SCHEMA.columns") 

184@click.option("--tap-keys-table", help="Alt Table Name for TAP_SCHEMA.keys") 

185@click.option("--tap-key-columns-table", help="Alt Table Name for TAP_SCHEMA.key_columns") 

186@click.argument("file", type=click.File()) 

187def load_tap( 

188 engine_url: str, 

189 schema_name: str, 

190 catalog_name: str, 

191 dry_run: bool, 

192 tap_schema_name: str, 

193 tap_tables_postfix: str, 

194 tap_schemas_table: str, 

195 tap_tables_table: str, 

196 tap_columns_table: str, 

197 tap_keys_table: str, 

198 tap_key_columns_table: str, 

199 file: io.TextIOBase, 

200) -> None: 

201 """Load TAP metadata from a Felis FILE. 

202 

203 This command loads the associated TAP metadata from a Felis FILE 

204 to the TAP_SCHEMA tables. 

205 """ 

206 top_level_object = yaml.load(file, Loader=yaml.SafeLoader) 

207 schema_obj: dict 

208 if isinstance(top_level_object, dict): 

209 schema_obj = top_level_object 

210 if "@graph" not in schema_obj: 

211 schema_obj["@type"] = "felis:Schema" 

212 schema_obj["@context"] = DEFAULT_CONTEXT 

213 elif isinstance(top_level_object, list): 

214 schema_obj = {"@context": DEFAULT_CONTEXT, "@graph": top_level_object} 

215 else: 

216 logger.error("Schema object not of recognizable type") 

217 raise click.exceptions.Exit(1) 

218 

219 normalized = _normalize(schema_obj, embed="@always") 

220 if len(normalized["@graph"]) > 1 and (schema_name or catalog_name): 

221 logger.error("--schema-name and --catalog-name incompatible with multiple schemas") 

222 raise click.exceptions.Exit(1) 

223 

224 # Force normalized["@graph"] to a list, which is what happens when there's 

225 # multiple schemas 

226 if isinstance(normalized["@graph"], dict): 

227 normalized["@graph"] = [normalized["@graph"]] 

228 

229 tap_tables = init_tables( 

230 tap_schema_name, 

231 tap_tables_postfix, 

232 tap_schemas_table, 

233 tap_tables_table, 

234 tap_columns_table, 

235 tap_keys_table, 

236 tap_key_columns_table, 

237 ) 

238 

239 if not dry_run: 

240 engine = create_engine(engine_url) 

241 

242 if engine_url == "sqlite://" and not dry_run: 

243 # In Memory SQLite - Mostly used to test 

244 Tap11Base.metadata.create_all(engine) 

245 

246 for schema in normalized["@graph"]: 

247 tap_visitor = TapLoadingVisitor( 

248 engine, 

249 catalog_name=catalog_name, 

250 schema_name=schema_name, 

251 tap_tables=tap_tables, 

252 ) 

253 tap_visitor.visit_schema(schema) 

254 else: 

255 _insert_dump = InsertDump() 

256 conn = create_mock_engine(make_url(engine_url), executor=_insert_dump.dump, paramstyle="pyformat") 

257 # After the engine is created, update the executor with the dialect 

258 _insert_dump.dialect = conn.dialect 

259 

260 for schema in normalized["@graph"]: 

261 tap_visitor = TapLoadingVisitor.from_mock_connection( 

262 conn, 

263 catalog_name=catalog_name, 

264 schema_name=schema_name, 

265 tap_tables=tap_tables, 

266 ) 

267 tap_visitor.visit_schema(schema) 

268 

269 

270@cli.command("modify-tap") 

271@click.option("--start-schema-at", type=int, help="Rewrite index for tap:schema_index", default=0) 

272@click.argument("files", nargs=-1, type=click.File()) 

273def modify_tap(start_schema_at: int, files: Iterable[io.TextIOBase]) -> None: 

274 """Modify TAP information in Felis schema FILES. 

275 

276 This command has some utilities to aid in rewriting felis FILES 

277 in specific ways. It will write out a merged version of these files. 

278 """ 

279 count = 0 

280 graph = [] 

281 for file in files: 

282 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

283 if "@graph" not in schema_obj: 

284 schema_obj["@type"] = "felis:Schema" 

285 schema_obj["@context"] = DEFAULT_CONTEXT 

286 schema_index = schema_obj.get("tap:schema_index") 

287 if not schema_index or (schema_index and schema_index > start_schema_at): 

288 schema_index = start_schema_at + count 

289 count += 1 

290 schema_obj["tap:schema_index"] = schema_index 

291 graph.extend(jsonld.flatten(schema_obj)) 

292 merged = {"@context": DEFAULT_CONTEXT, "@graph": graph} 

293 normalized = _normalize(merged, embed="@always") 

294 _dump(normalized) 

295 

296 

297@cli.command("basic-check") 

298@click.argument("file", type=click.File()) 

299def basic_check(file: io.TextIOBase) -> None: 

300 """Perform a basic check on a felis FILE. 

301 

302 This performs a very check to ensure required fields are 

303 populated and basic semantics are okay. It does not ensure semantics 

304 are valid for other commands like create-all or load-tap. 

305 """ 

306 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

307 schema_obj["@type"] = "felis:Schema" 

308 # Force Context and Schema Type 

309 schema_obj["@context"] = DEFAULT_CONTEXT 

310 check_visitor = CheckingVisitor() 

311 check_visitor.visit_schema(schema_obj) 

312 

313 

314@cli.command("normalize") 

315@click.argument("file", type=click.File()) 

316def normalize(file: io.TextIOBase) -> None: 

317 """Normalize a Felis FILE. 

318 

319 Takes a felis schema FILE, expands it (resolving the full URLs), 

320 then compacts it, and finally produces output in the canonical 

321 format. 

322 

323 (This is most useful in some debugging scenarios) 

324 

325 See Also : 

326 

327 https://json-ld.org/spec/latest/json-ld/#expanded-document-form 

328 https://json-ld.org/spec/latest/json-ld/#compacted-document-form 

329 """ 

330 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

331 schema_obj["@type"] = "felis:Schema" 

332 # Force Context and Schema Type 

333 schema_obj["@context"] = DEFAULT_CONTEXT 

334 expanded = jsonld.expand(schema_obj) 

335 normalized = _normalize(expanded, embed="@always") 

336 _dump(normalized) 

337 

338 

339@cli.command("merge") 

340@click.argument("files", nargs=-1, type=click.File()) 

341def merge(files: Iterable[io.TextIOBase]) -> None: 

342 """Merge a set of Felis FILES. 

343 

344 This will expand out the felis FILES so that it is easy to 

345 override values (using @Id), then normalize to a single 

346 output. 

347 """ 

348 graph = [] 

349 for file in files: 

350 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

351 if "@graph" not in schema_obj: 

352 schema_obj["@type"] = "felis:Schema" 

353 schema_obj["@context"] = DEFAULT_CONTEXT 

354 graph.extend(jsonld.flatten(schema_obj)) 

355 updated_map: MutableMapping[str, Any] = {} 

356 for item in graph: 

357 _id = item["@id"] 

358 item_to_update = updated_map.get(_id, item) 

359 if item_to_update and item_to_update != item: 

360 logger.debug(f"Overwriting {_id}") 

361 item_to_update.update(item) 

362 updated_map[_id] = item_to_update 

363 merged = {"@context": DEFAULT_CONTEXT, "@graph": list(updated_map.values())} 

364 normalized = _normalize(merged, embed="@always") 

365 _dump(normalized) 

366 

367 

368@cli.command("validate") 

369@click.option( 

370 "-s", 

371 "--schema-name", 

372 help="Schema name for validation", 

373 type=click.Choice(["RSP", "default"]), 

374 default="default", 

375) 

376@click.option( 

377 "-d", "--require-description", is_flag=True, help="Require description for all objects", default=False 

378) 

379@click.option( 

380 "-t", "--check-redundant-datatypes", is_flag=True, help="Check for redundant datatypes", default=False 

381) 

382@click.argument("files", nargs=-1, type=click.File()) 

383def validate( 

384 schema_name: str, 

385 require_description: bool, 

386 check_redundant_datatypes: bool, 

387 files: Iterable[io.TextIOBase], 

388) -> None: 

389 """Validate one or more felis YAML files.""" 

390 schema_class = get_schema(schema_name) 

391 if schema_name != "default": 

392 logger.info(f"Using schema '{schema_class.__name__}'") 

393 

394 rc = 0 

395 for file in files: 

396 file_name = getattr(file, "name", None) 

397 logger.info(f"Validating {file_name}") 

398 try: 

399 data = yaml.load(file, Loader=yaml.SafeLoader) 

400 schema_class.model_validate( 

401 data, 

402 context={ 

403 "check_redundant_datatypes": check_redundant_datatypes, 

404 "require_description": require_description, 

405 }, 

406 ) 

407 except ValidationError as e: 

408 logger.error(e) 

409 rc = 1 

410 if rc: 

411 raise click.exceptions.Exit(rc) 

412 

413 

414@cli.command("dump-json") 

415@click.option("-x", "--expanded", is_flag=True, help="Extended schema before dumping.") 

416@click.option("-f", "--framed", is_flag=True, help="Frame schema before dumping.") 

417@click.option("-c", "--compacted", is_flag=True, help="Compact schema before dumping.") 

418@click.option("-g", "--graph", is_flag=True, help="Pass graph option to compact.") 

419@click.argument("file", type=click.File()) 

420def dump_json( 

421 file: io.TextIOBase, 

422 expanded: bool = False, 

423 compacted: bool = False, 

424 framed: bool = False, 

425 graph: bool = False, 

426) -> None: 

427 """Dump JSON representation using various JSON-LD options.""" 

428 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

429 schema_obj["@type"] = "felis:Schema" 

430 # Force Context and Schema Type 

431 schema_obj["@context"] = DEFAULT_CONTEXT 

432 

433 if expanded: 

434 schema_obj = jsonld.expand(schema_obj) 

435 if framed: 

436 schema_obj = jsonld.frame(schema_obj, DEFAULT_FRAME) 

437 if compacted: 

438 options = {} 

439 if graph: 

440 options["graph"] = True 

441 schema_obj = jsonld.compact(schema_obj, DEFAULT_CONTEXT, options=options) 

442 json.dump(schema_obj, sys.stdout, indent=4) 

443 

444 

445def _dump(obj: Mapping[str, Any]) -> None: 

446 class OrderedDumper(yaml.Dumper): 

447 pass 

448 

449 def _dict_representer(dumper: yaml.Dumper, data: Any) -> Any: 

450 return dumper.represent_mapping(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items()) 

451 

452 OrderedDumper.add_representer(dict, _dict_representer) 

453 print(yaml.dump(obj, Dumper=OrderedDumper, default_flow_style=False)) 

454 

455 

456def _normalize(schema_obj: Mapping[str, Any], embed: str = "@last") -> MutableMapping[str, Any]: 

457 framed = jsonld.frame(schema_obj, DEFAULT_FRAME, options=dict(embed=embed)) 

458 compacted = jsonld.compact(framed, DEFAULT_CONTEXT, options=dict(graph=True)) 

459 graph = compacted["@graph"] 

460 graph = [ReorderingVisitor(add_type=True).visit_schema(schema_obj) for schema_obj in graph] 

461 compacted["@graph"] = graph if len(graph) > 1 else graph[0] 

462 return compacted 

463 

464 

465if __name__ == "__main__": 465 ↛ 466line 465 didn't jump to line 466, because the condition on line 465 was never true

466 cli()