Coverage for python/felis/cli.py: 53%

219 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-26 02:43 -0700

1# This file is part of felis. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import io 

25import json 

26import logging 

27import sys 

28from collections.abc import Iterable, Mapping, MutableMapping 

29from typing import IO, Any 

30 

31import click 

32import yaml 

33from pydantic import ValidationError 

34from pyld import jsonld 

35from sqlalchemy.engine import Engine, create_engine, create_mock_engine, make_url 

36from sqlalchemy.engine.mock import MockConnection 

37 

38from . import DEFAULT_CONTEXT, DEFAULT_FRAME, __version__ 

39from .datamodel import Schema 

40from .metadata import DatabaseContext, InsertDump, MetaDataBuilder 

41from .tap import Tap11Base, TapLoadingVisitor, init_tables 

42from .utils import ReorderingVisitor 

43from .validation import get_schema 

44 

45logger = logging.getLogger("felis") 

46 

47loglevel_choices = ["CRITICAL", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG"] 

48 

49 

50@click.group() 

51@click.version_option(__version__) 

52@click.option( 

53 "--log-level", 

54 type=click.Choice(loglevel_choices), 

55 envvar="FELIS_LOGLEVEL", 

56 help="Felis log level", 

57 default=logging.getLevelName(logging.INFO), 

58) 

59@click.option( 

60 "--log-file", 

61 type=click.Path(), 

62 envvar="FELIS_LOGFILE", 

63 help="Felis log file path", 

64) 

65def cli(log_level: str, log_file: str | None) -> None: 

66 """Felis Command Line Tools.""" 

67 if log_file: 

68 logging.basicConfig(filename=log_file, level=log_level) 

69 else: 

70 logging.basicConfig(level=log_level) 

71 

72 

73@cli.command("create") 

74@click.option("--engine-url", envvar="ENGINE_URL", help="SQLAlchemy Engine URL", default="sqlite://") 

75@click.option("--schema-name", help="Alternate schema name to override Felis file") 

76@click.option( 

77 "--create-if-not-exists", is_flag=True, help="Create the schema in the database if it does not exist" 

78) 

79@click.option("--drop-if-exists", is_flag=True, help="Drop schema if it already exists in the database") 

80@click.option("--echo", is_flag=True, help="Echo database commands as they are executed") 

81@click.option("--dry-run", is_flag=True, help="Dry run only to print out commands instead of executing") 

82@click.option( 

83 "--output-file", "-o", type=click.File(mode="w"), help="Write SQL commands to a file instead of executing" 

84) 

85@click.argument("file", type=click.File()) 

86def create( 

87 engine_url: str, 

88 schema_name: str | None, 

89 create_if_not_exists: bool, 

90 drop_if_exists: bool, 

91 echo: bool, 

92 dry_run: bool, 

93 output_file: IO[str] | None, 

94 file: IO, 

95) -> None: 

96 """Create database objects from the Felis file.""" 

97 yaml_data = yaml.safe_load(file) 

98 schema = Schema.model_validate(yaml_data) 

99 url_obj = make_url(engine_url) 

100 if schema_name: 

101 logger.info(f"Overriding schema name with: {schema_name}") 

102 schema.name = schema_name 

103 elif url_obj.drivername == "sqlite": 

104 logger.info("Overriding schema name for sqlite with: main") 

105 schema.name = "main" 

106 if not url_obj.host and not url_obj.drivername == "sqlite": 

107 dry_run = True 

108 logger.info("Forcing dry run for non-sqlite engine URL with no host") 

109 

110 builder = MetaDataBuilder(schema) 

111 builder.build() 

112 metadata = builder.metadata 

113 logger.debug(f"Created metadata with schema name: {metadata.schema}") 

114 

115 engine: Engine | MockConnection 

116 if not dry_run and not output_file: 

117 engine = create_engine(engine_url, echo=echo) 

118 else: 

119 if dry_run: 

120 logger.info("Dry run will be executed") 

121 engine = DatabaseContext.create_mock_engine(url_obj, output_file) 

122 if output_file: 

123 logger.info("Writing SQL output to: " + output_file.name) 

124 

125 context = DatabaseContext(metadata, engine) 

126 

127 if drop_if_exists: 

128 logger.debug("Dropping schema if it exists") 

129 context.drop_if_exists() 

130 create_if_not_exists = True # If schema is dropped, it needs to be recreated. 

131 

132 if create_if_not_exists: 

133 logger.debug("Creating schema if not exists") 

134 context.create_if_not_exists() 

135 

136 context.create_all() 

137 

138 

139@cli.command("init-tap") 

140@click.option("--tap-schema-name", help="Alt Schema Name for TAP_SCHEMA") 

141@click.option("--tap-schemas-table", help="Alt Table Name for TAP_SCHEMA.schemas") 

142@click.option("--tap-tables-table", help="Alt Table Name for TAP_SCHEMA.tables") 

143@click.option("--tap-columns-table", help="Alt Table Name for TAP_SCHEMA.columns") 

144@click.option("--tap-keys-table", help="Alt Table Name for TAP_SCHEMA.keys") 

145@click.option("--tap-key-columns-table", help="Alt Table Name for TAP_SCHEMA.key_columns") 

146@click.argument("engine-url") 

147def init_tap( 

148 engine_url: str, 

149 tap_schema_name: str, 

150 tap_schemas_table: str, 

151 tap_tables_table: str, 

152 tap_columns_table: str, 

153 tap_keys_table: str, 

154 tap_key_columns_table: str, 

155) -> None: 

156 """Initialize TAP 1.1 TAP_SCHEMA objects. 

157 

158 Please verify the schema/catalog you are executing this in in your 

159 engine URL. 

160 """ 

161 engine = create_engine(engine_url, echo=True) 

162 init_tables( 

163 tap_schema_name, 

164 tap_schemas_table, 

165 tap_tables_table, 

166 tap_columns_table, 

167 tap_keys_table, 

168 tap_key_columns_table, 

169 ) 

170 Tap11Base.metadata.create_all(engine) 

171 

172 

173@cli.command("load-tap") 

174@click.option("--engine-url", envvar="ENGINE_URL", help="SQLAlchemy Engine URL to catalog") 

175@click.option("--schema-name", help="Alternate Schema Name for Felis file") 

176@click.option("--catalog-name", help="Catalog Name for Schema") 

177@click.option("--dry-run", is_flag=True, help="Dry Run Only. Prints out the DDL that would be executed") 

178@click.option("--tap-schema-name", help="Alt Schema Name for TAP_SCHEMA") 

179@click.option("--tap-tables-postfix", help="Postfix for TAP table names") 

180@click.option("--tap-schemas-table", help="Alt Table Name for TAP_SCHEMA.schemas") 

181@click.option("--tap-tables-table", help="Alt Table Name for TAP_SCHEMA.tables") 

182@click.option("--tap-columns-table", help="Alt Table Name for TAP_SCHEMA.columns") 

183@click.option("--tap-keys-table", help="Alt Table Name for TAP_SCHEMA.keys") 

184@click.option("--tap-key-columns-table", help="Alt Table Name for TAP_SCHEMA.key_columns") 

185@click.option("--tap-schema-index", type=int, help="TAP_SCHEMA index of the schema") 

186@click.argument("file", type=click.File()) 

187def load_tap( 

188 engine_url: str, 

189 schema_name: str, 

190 catalog_name: str, 

191 dry_run: bool, 

192 tap_schema_name: str, 

193 tap_tables_postfix: str, 

194 tap_schemas_table: str, 

195 tap_tables_table: str, 

196 tap_columns_table: str, 

197 tap_keys_table: str, 

198 tap_key_columns_table: str, 

199 tap_schema_index: int, 

200 file: io.TextIOBase, 

201) -> None: 

202 """Load TAP metadata from a Felis FILE. 

203 

204 This command loads the associated TAP metadata from a Felis FILE 

205 to the TAP_SCHEMA tables. 

206 """ 

207 yaml_data = yaml.load(file, Loader=yaml.SafeLoader) 

208 schema = Schema.model_validate(yaml_data) 

209 

210 tap_tables = init_tables( 

211 tap_schema_name, 

212 tap_tables_postfix, 

213 tap_schemas_table, 

214 tap_tables_table, 

215 tap_columns_table, 

216 tap_keys_table, 

217 tap_key_columns_table, 

218 ) 

219 

220 if not dry_run: 

221 engine = create_engine(engine_url) 

222 

223 if engine_url == "sqlite://" and not dry_run: 

224 # In Memory SQLite - Mostly used to test 

225 Tap11Base.metadata.create_all(engine) 

226 

227 tap_visitor = TapLoadingVisitor( 

228 engine, 

229 catalog_name=catalog_name, 

230 schema_name=schema_name, 

231 tap_tables=tap_tables, 

232 tap_schema_index=tap_schema_index, 

233 ) 

234 tap_visitor.visit_schema(schema) 

235 else: 

236 _insert_dump = InsertDump() 

237 conn = create_mock_engine(make_url(engine_url), executor=_insert_dump.dump, paramstyle="pyformat") 

238 # After the engine is created, update the executor with the dialect 

239 _insert_dump.dialect = conn.dialect 

240 

241 tap_visitor = TapLoadingVisitor.from_mock_connection( 

242 conn, 

243 catalog_name=catalog_name, 

244 schema_name=schema_name, 

245 tap_tables=tap_tables, 

246 tap_schema_index=tap_schema_index, 

247 ) 

248 tap_visitor.visit_schema(schema) 

249 

250 

251@cli.command("modify-tap") 

252@click.option("--start-schema-at", type=int, help="Rewrite index for tap:schema_index", default=0) 

253@click.argument("files", nargs=-1, type=click.File()) 

254def modify_tap(start_schema_at: int, files: Iterable[io.TextIOBase]) -> None: 

255 """Modify TAP information in Felis schema FILES. 

256 

257 This command has some utilities to aid in rewriting felis FILES 

258 in specific ways. It will write out a merged version of these files. 

259 """ 

260 count = 0 

261 graph = [] 

262 for file in files: 

263 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

264 if "@graph" not in schema_obj: 

265 schema_obj["@type"] = "felis:Schema" 

266 schema_obj["@context"] = DEFAULT_CONTEXT 

267 schema_index = schema_obj.get("tap:schema_index") 

268 if not schema_index or (schema_index and schema_index > start_schema_at): 

269 schema_index = start_schema_at + count 

270 count += 1 

271 schema_obj["tap:schema_index"] = schema_index 

272 graph.extend(jsonld.flatten(schema_obj)) 

273 merged = {"@context": DEFAULT_CONTEXT, "@graph": graph} 

274 normalized = _normalize(merged, embed="@always") 

275 _dump(normalized) 

276 

277 

278@cli.command("normalize") 

279@click.argument("file", type=click.File()) 

280def normalize(file: io.TextIOBase) -> None: 

281 """Normalize a Felis FILE. 

282 

283 Takes a felis schema FILE, expands it (resolving the full URLs), 

284 then compacts it, and finally produces output in the canonical 

285 format. 

286 

287 (This is most useful in some debugging scenarios) 

288 

289 See Also : 

290 

291 https://json-ld.org/spec/latest/json-ld/#expanded-document-form 

292 https://json-ld.org/spec/latest/json-ld/#compacted-document-form 

293 """ 

294 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

295 schema_obj["@type"] = "felis:Schema" 

296 # Force Context and Schema Type 

297 schema_obj["@context"] = DEFAULT_CONTEXT 

298 expanded = jsonld.expand(schema_obj) 

299 normalized = _normalize(expanded, embed="@always") 

300 _dump(normalized) 

301 

302 

303@cli.command("merge") 

304@click.argument("files", nargs=-1, type=click.File()) 

305def merge(files: Iterable[io.TextIOBase]) -> None: 

306 """Merge a set of Felis FILES. 

307 

308 This will expand out the felis FILES so that it is easy to 

309 override values (using @Id), then normalize to a single 

310 output. 

311 """ 

312 graph = [] 

313 for file in files: 

314 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

315 if "@graph" not in schema_obj: 

316 schema_obj["@type"] = "felis:Schema" 

317 schema_obj["@context"] = DEFAULT_CONTEXT 

318 graph.extend(jsonld.flatten(schema_obj)) 

319 updated_map: MutableMapping[str, Any] = {} 

320 for item in graph: 

321 _id = item["@id"] 

322 item_to_update = updated_map.get(_id, item) 

323 if item_to_update and item_to_update != item: 

324 logger.debug(f"Overwriting {_id}") 

325 item_to_update.update(item) 

326 updated_map[_id] = item_to_update 

327 merged = {"@context": DEFAULT_CONTEXT, "@graph": list(updated_map.values())} 

328 normalized = _normalize(merged, embed="@always") 

329 _dump(normalized) 

330 

331 

332@cli.command("validate") 

333@click.option( 

334 "-s", 

335 "--schema-name", 

336 help="Schema name for validation", 

337 type=click.Choice(["RSP", "default"]), 

338 default="default", 

339) 

340@click.option( 

341 "-d", "--require-description", is_flag=True, help="Require description for all objects", default=False 

342) 

343@click.option( 

344 "-t", "--check-redundant-datatypes", is_flag=True, help="Check for redundant datatypes", default=False 

345) 

346@click.argument("files", nargs=-1, type=click.File()) 

347def validate( 

348 schema_name: str, 

349 require_description: bool, 

350 check_redundant_datatypes: bool, 

351 files: Iterable[io.TextIOBase], 

352) -> None: 

353 """Validate one or more felis YAML files.""" 

354 schema_class = get_schema(schema_name) 

355 if schema_name != "default": 

356 logger.info(f"Using schema '{schema_class.__name__}'") 

357 

358 rc = 0 

359 for file in files: 

360 file_name = getattr(file, "name", None) 

361 logger.info(f"Validating {file_name}") 

362 try: 

363 data = yaml.load(file, Loader=yaml.SafeLoader) 

364 schema_class.model_validate( 

365 data, 

366 context={ 

367 "check_redundant_datatypes": check_redundant_datatypes, 

368 "require_description": require_description, 

369 }, 

370 ) 

371 except ValidationError as e: 

372 logger.error(e) 

373 rc = 1 

374 if rc: 

375 raise click.exceptions.Exit(rc) 

376 

377 

378@cli.command("dump-json") 

379@click.option("-x", "--expanded", is_flag=True, help="Extended schema before dumping.") 

380@click.option("-f", "--framed", is_flag=True, help="Frame schema before dumping.") 

381@click.option("-c", "--compacted", is_flag=True, help="Compact schema before dumping.") 

382@click.option("-g", "--graph", is_flag=True, help="Pass graph option to compact.") 

383@click.argument("file", type=click.File()) 

384def dump_json( 

385 file: io.TextIOBase, 

386 expanded: bool = False, 

387 compacted: bool = False, 

388 framed: bool = False, 

389 graph: bool = False, 

390) -> None: 

391 """Dump JSON representation using various JSON-LD options.""" 

392 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

393 schema_obj["@type"] = "felis:Schema" 

394 # Force Context and Schema Type 

395 schema_obj["@context"] = DEFAULT_CONTEXT 

396 

397 if expanded: 

398 schema_obj = jsonld.expand(schema_obj) 

399 if framed: 

400 schema_obj = jsonld.frame(schema_obj, DEFAULT_FRAME) 

401 if compacted: 

402 options = {} 

403 if graph: 

404 options["graph"] = True 

405 schema_obj = jsonld.compact(schema_obj, DEFAULT_CONTEXT, options=options) 

406 json.dump(schema_obj, sys.stdout, indent=4) 

407 

408 

409def _dump(obj: Mapping[str, Any]) -> None: 

410 class OrderedDumper(yaml.Dumper): 

411 pass 

412 

413 def _dict_representer(dumper: yaml.Dumper, data: Any) -> Any: 

414 return dumper.represent_mapping(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items()) 

415 

416 OrderedDumper.add_representer(dict, _dict_representer) 

417 print(yaml.dump(obj, Dumper=OrderedDumper, default_flow_style=False)) 

418 

419 

420def _normalize(schema_obj: Mapping[str, Any], embed: str = "@last") -> MutableMapping[str, Any]: 

421 framed = jsonld.frame(schema_obj, DEFAULT_FRAME, options=dict(embed=embed)) 

422 compacted = jsonld.compact(framed, DEFAULT_CONTEXT, options=dict(graph=True)) 

423 graph = compacted["@graph"] 

424 graph = [ReorderingVisitor(add_type=True).visit_schema(schema_obj) for schema_obj in graph] 

425 compacted["@graph"] = graph if len(graph) > 1 else graph[0] 

426 return compacted 

427 

428 

429if __name__ == "__main__": 429 ↛ 430line 429 didn't jump to line 430, because the condition on line 429 was never true

430 cli()