Coverage for python/felis/cli.py: 49%

229 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-17 10:27 +0000

1# This file is part of felis. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import io 

25import json 

26import logging 

27import sys 

28from collections.abc import Iterable, Mapping, MutableMapping 

29from typing import Any 

30 

31import click 

32import yaml 

33from pydantic import ValidationError 

34from pyld import jsonld 

35from sqlalchemy.engine import Engine, create_engine, create_mock_engine, make_url 

36from sqlalchemy.engine.mock import MockConnection 

37 

38from . import DEFAULT_CONTEXT, DEFAULT_FRAME, __version__ 

39from .check import CheckingVisitor 

40from .datamodel import Schema 

41from .sql import SQLVisitor 

42from .tap import Tap11Base, TapLoadingVisitor, init_tables 

43from .utils import ReorderingVisitor 

44from .validation import get_schema 

45 

46logger = logging.getLogger("felis") 

47 

48 

49@click.group() 

50@click.version_option(__version__) 

51def cli() -> None: 

52 """Felis Command Line Tools.""" 

53 logging.basicConfig(level=logging.INFO) 

54 

55 

56@cli.command("create-all") 

57@click.option("--engine-url", envvar="ENGINE_URL", help="SQLAlchemy Engine URL") 

58@click.option("--schema-name", help="Alternate Schema Name for Felis File") 

59@click.option("--dry-run", is_flag=True, help="Dry Run Only. Prints out the DDL that would be executed") 

60@click.argument("file", type=click.File()) 

61def create_all(engine_url: str, schema_name: str, dry_run: bool, file: io.TextIOBase) -> None: 

62 """Create schema objects from the Felis FILE.""" 

63 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

64 visitor = SQLVisitor(schema_name=schema_name) 

65 schema = visitor.visit_schema(schema_obj) 

66 

67 metadata = schema.metadata 

68 

69 engine: Engine | MockConnection 

70 if not dry_run: 

71 engine = create_engine(engine_url) 

72 else: 

73 _insert_dump = InsertDump() 

74 engine = create_mock_engine(make_url(engine_url), executor=_insert_dump.dump) 

75 _insert_dump.dialect = engine.dialect 

76 metadata.create_all(engine) 

77 

78 

79@cli.command("init-tap") 

80@click.option("--tap-schema-name", help="Alt Schema Name for TAP_SCHEMA") 

81@click.option("--tap-schemas-table", help="Alt Table Name for TAP_SCHEMA.schemas") 

82@click.option("--tap-tables-table", help="Alt Table Name for TAP_SCHEMA.tables") 

83@click.option("--tap-columns-table", help="Alt Table Name for TAP_SCHEMA.columns") 

84@click.option("--tap-keys-table", help="Alt Table Name for TAP_SCHEMA.keys") 

85@click.option("--tap-key-columns-table", help="Alt Table Name for TAP_SCHEMA.key_columns") 

86@click.argument("engine-url") 

87def init_tap( 

88 engine_url: str, 

89 tap_schema_name: str, 

90 tap_schemas_table: str, 

91 tap_tables_table: str, 

92 tap_columns_table: str, 

93 tap_keys_table: str, 

94 tap_key_columns_table: str, 

95) -> None: 

96 """Initialize TAP 1.1 TAP_SCHEMA objects. 

97 

98 Please verify the schema/catalog you are executing this in in your 

99 engine URL. 

100 """ 

101 engine = create_engine(engine_url, echo=True) 

102 init_tables( 

103 tap_schema_name, 

104 tap_schemas_table, 

105 tap_tables_table, 

106 tap_columns_table, 

107 tap_keys_table, 

108 tap_key_columns_table, 

109 ) 

110 Tap11Base.metadata.create_all(engine) 

111 

112 

113@cli.command("load-tap") 

114@click.option("--engine-url", envvar="ENGINE_URL", help="SQLAlchemy Engine URL to catalog") 

115@click.option("--schema-name", help="Alternate Schema Name for Felis file") 

116@click.option("--catalog-name", help="Catalog Name for Schema") 

117@click.option("--dry-run", is_flag=True, help="Dry Run Only. Prints out the DDL that would be executed") 

118@click.option("--tap-schema-name", help="Alt Schema Name for TAP_SCHEMA") 

119@click.option("--tap-tables-postfix", help="Postfix for TAP table names") 

120@click.option("--tap-schemas-table", help="Alt Table Name for TAP_SCHEMA.schemas") 

121@click.option("--tap-tables-table", help="Alt Table Name for TAP_SCHEMA.tables") 

122@click.option("--tap-columns-table", help="Alt Table Name for TAP_SCHEMA.columns") 

123@click.option("--tap-keys-table", help="Alt Table Name for TAP_SCHEMA.keys") 

124@click.option("--tap-key-columns-table", help="Alt Table Name for TAP_SCHEMA.key_columns") 

125@click.argument("file", type=click.File()) 

126def load_tap( 

127 engine_url: str, 

128 schema_name: str, 

129 catalog_name: str, 

130 dry_run: bool, 

131 tap_schema_name: str, 

132 tap_tables_postfix: str, 

133 tap_schemas_table: str, 

134 tap_tables_table: str, 

135 tap_columns_table: str, 

136 tap_keys_table: str, 

137 tap_key_columns_table: str, 

138 file: io.TextIOBase, 

139) -> None: 

140 """Load TAP metadata from a Felis FILE. 

141 

142 This command loads the associated TAP metadata from a Felis FILE 

143 to the TAP_SCHEMA tables. 

144 """ 

145 top_level_object = yaml.load(file, Loader=yaml.SafeLoader) 

146 schema_obj: dict 

147 if isinstance(top_level_object, dict): 

148 schema_obj = top_level_object 

149 if "@graph" not in schema_obj: 

150 schema_obj["@type"] = "felis:Schema" 

151 schema_obj["@context"] = DEFAULT_CONTEXT 

152 elif isinstance(top_level_object, list): 

153 schema_obj = {"@context": DEFAULT_CONTEXT, "@graph": top_level_object} 

154 else: 

155 logger.error("Schema object not of recognizable type") 

156 raise click.exceptions.Exit(1) 

157 

158 normalized = _normalize(schema_obj, embed="@always") 

159 if len(normalized["@graph"]) > 1 and (schema_name or catalog_name): 

160 logger.error("--schema-name and --catalog-name incompatible with multiple schemas") 

161 raise click.exceptions.Exit(1) 

162 

163 # Force normalized["@graph"] to a list, which is what happens when there's 

164 # multiple schemas 

165 if isinstance(normalized["@graph"], dict): 

166 normalized["@graph"] = [normalized["@graph"]] 

167 

168 tap_tables = init_tables( 

169 tap_schema_name, 

170 tap_tables_postfix, 

171 tap_schemas_table, 

172 tap_tables_table, 

173 tap_columns_table, 

174 tap_keys_table, 

175 tap_key_columns_table, 

176 ) 

177 

178 if not dry_run: 

179 engine = create_engine(engine_url) 

180 

181 if engine_url == "sqlite://" and not dry_run: 

182 # In Memory SQLite - Mostly used to test 

183 Tap11Base.metadata.create_all(engine) 

184 

185 for schema in normalized["@graph"]: 

186 tap_visitor = TapLoadingVisitor( 

187 engine, 

188 catalog_name=catalog_name, 

189 schema_name=schema_name, 

190 tap_tables=tap_tables, 

191 ) 

192 tap_visitor.visit_schema(schema) 

193 else: 

194 _insert_dump = InsertDump() 

195 conn = create_mock_engine(make_url(engine_url), executor=_insert_dump.dump, paramstyle="pyformat") 

196 # After the engine is created, update the executor with the dialect 

197 _insert_dump.dialect = conn.dialect 

198 

199 for schema in normalized["@graph"]: 

200 tap_visitor = TapLoadingVisitor.from_mock_connection( 

201 conn, 

202 catalog_name=catalog_name, 

203 schema_name=schema_name, 

204 tap_tables=tap_tables, 

205 ) 

206 tap_visitor.visit_schema(schema) 

207 

208 

209@cli.command("modify-tap") 

210@click.option("--start-schema-at", type=int, help="Rewrite index for tap:schema_index", default=0) 

211@click.argument("files", nargs=-1, type=click.File()) 

212def modify_tap(start_schema_at: int, files: Iterable[io.TextIOBase]) -> None: 

213 """Modify TAP information in Felis schema FILES. 

214 

215 This command has some utilities to aid in rewriting felis FILES 

216 in specific ways. It will write out a merged version of these files. 

217 """ 

218 count = 0 

219 graph = [] 

220 for file in files: 

221 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

222 if "@graph" not in schema_obj: 

223 schema_obj["@type"] = "felis:Schema" 

224 schema_obj["@context"] = DEFAULT_CONTEXT 

225 schema_index = schema_obj.get("tap:schema_index") 

226 if not schema_index or (schema_index and schema_index > start_schema_at): 

227 schema_index = start_schema_at + count 

228 count += 1 

229 schema_obj["tap:schema_index"] = schema_index 

230 graph.extend(jsonld.flatten(schema_obj)) 

231 merged = {"@context": DEFAULT_CONTEXT, "@graph": graph} 

232 normalized = _normalize(merged, embed="@always") 

233 _dump(normalized) 

234 

235 

236@cli.command("basic-check") 

237@click.argument("file", type=click.File()) 

238def basic_check(file: io.TextIOBase) -> None: 

239 """Perform a basic check on a felis FILE. 

240 

241 This performs a very check to ensure required fields are 

242 populated and basic semantics are okay. It does not ensure semantics 

243 are valid for other commands like create-all or load-tap. 

244 """ 

245 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

246 schema_obj["@type"] = "felis:Schema" 

247 # Force Context and Schema Type 

248 schema_obj["@context"] = DEFAULT_CONTEXT 

249 check_visitor = CheckingVisitor() 

250 check_visitor.visit_schema(schema_obj) 

251 

252 

253@cli.command("normalize") 

254@click.argument("file", type=click.File()) 

255def normalize(file: io.TextIOBase) -> None: 

256 """Normalize a Felis FILE. 

257 

258 Takes a felis schema FILE, expands it (resolving the full URLs), 

259 then compacts it, and finally produces output in the canonical 

260 format. 

261 

262 (This is most useful in some debugging scenarios) 

263 

264 See Also : 

265 

266 https://json-ld.org/spec/latest/json-ld/#expanded-document-form 

267 https://json-ld.org/spec/latest/json-ld/#compacted-document-form 

268 """ 

269 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

270 schema_obj["@type"] = "felis:Schema" 

271 # Force Context and Schema Type 

272 schema_obj["@context"] = DEFAULT_CONTEXT 

273 expanded = jsonld.expand(schema_obj) 

274 normalized = _normalize(expanded, embed="@always") 

275 _dump(normalized) 

276 

277 

278@cli.command("merge") 

279@click.argument("files", nargs=-1, type=click.File()) 

280def merge(files: Iterable[io.TextIOBase]) -> None: 

281 """Merge a set of Felis FILES. 

282 

283 This will expand out the felis FILES so that it is easy to 

284 override values (using @Id), then normalize to a single 

285 output. 

286 """ 

287 graph = [] 

288 for file in files: 

289 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

290 if "@graph" not in schema_obj: 

291 schema_obj["@type"] = "felis:Schema" 

292 schema_obj["@context"] = DEFAULT_CONTEXT 

293 graph.extend(jsonld.flatten(schema_obj)) 

294 updated_map: MutableMapping[str, Any] = {} 

295 for item in graph: 

296 _id = item["@id"] 

297 item_to_update = updated_map.get(_id, item) 

298 if item_to_update and item_to_update != item: 

299 logger.debug(f"Overwriting {_id}") 

300 item_to_update.update(item) 

301 updated_map[_id] = item_to_update 

302 merged = {"@context": DEFAULT_CONTEXT, "@graph": list(updated_map.values())} 

303 normalized = _normalize(merged, embed="@always") 

304 _dump(normalized) 

305 

306 

307@cli.command("validate") 

308@click.option( 

309 "-s", 

310 "--schema-name", 

311 help="Schema name for validation", 

312 type=click.Choice(["RSP", "default"]), 

313 default="default", 

314) 

315@click.option("-d", "--require-description", is_flag=True, help="Require description for all objects") 

316@click.argument("files", nargs=-1, type=click.File()) 

317def validate(schema_name: str, require_description: bool, files: Iterable[io.TextIOBase]) -> None: 

318 """Validate one or more felis YAML files.""" 

319 schema_class = get_schema(schema_name) 

320 logger.info(f"Using schema '{schema_class.__name__}'") 

321 

322 if require_description: 

323 Schema.require_description(True) 

324 

325 rc = 0 

326 for file in files: 

327 file_name = getattr(file, "name", None) 

328 logger.info(f"Validating {file_name}") 

329 try: 

330 schema_class.model_validate(yaml.load(file, Loader=yaml.SafeLoader)) 

331 except ValidationError as e: 

332 logger.error(e) 

333 rc = 1 

334 if rc: 

335 raise click.exceptions.Exit(rc) 

336 

337 

338@cli.command("dump-json") 

339@click.option("-x", "--expanded", is_flag=True, help="Extended schema before dumping.") 

340@click.option("-f", "--framed", is_flag=True, help="Frame schema before dumping.") 

341@click.option("-c", "--compacted", is_flag=True, help="Compact schema before dumping.") 

342@click.option("-g", "--graph", is_flag=True, help="Pass graph option to compact.") 

343@click.argument("file", type=click.File()) 

344def dump_json( 

345 file: io.TextIOBase, 

346 expanded: bool = False, 

347 compacted: bool = False, 

348 framed: bool = False, 

349 graph: bool = False, 

350) -> None: 

351 """Dump JSON representation using various JSON-LD options.""" 

352 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

353 schema_obj["@type"] = "felis:Schema" 

354 # Force Context and Schema Type 

355 schema_obj["@context"] = DEFAULT_CONTEXT 

356 

357 if expanded: 

358 schema_obj = jsonld.expand(schema_obj) 

359 if framed: 

360 schema_obj = jsonld.frame(schema_obj, DEFAULT_FRAME) 

361 if compacted: 

362 options = {} 

363 if graph: 

364 options["graph"] = True 

365 schema_obj = jsonld.compact(schema_obj, DEFAULT_CONTEXT, options=options) 

366 json.dump(schema_obj, sys.stdout, indent=4) 

367 

368 

369def _dump(obj: Mapping[str, Any]) -> None: 

370 class OrderedDumper(yaml.Dumper): 

371 pass 

372 

373 def _dict_representer(dumper: yaml.Dumper, data: Any) -> Any: 

374 return dumper.represent_mapping(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items()) 

375 

376 OrderedDumper.add_representer(dict, _dict_representer) 

377 print(yaml.dump(obj, Dumper=OrderedDumper, default_flow_style=False)) 

378 

379 

380def _normalize(schema_obj: Mapping[str, Any], embed: str = "@last") -> MutableMapping[str, Any]: 

381 framed = jsonld.frame(schema_obj, DEFAULT_FRAME, options=dict(embed=embed)) 

382 compacted = jsonld.compact(framed, DEFAULT_CONTEXT, options=dict(graph=True)) 

383 graph = compacted["@graph"] 

384 graph = [ReorderingVisitor(add_type=True).visit_schema(schema_obj) for schema_obj in graph] 

385 compacted["@graph"] = graph if len(graph) > 1 else graph[0] 

386 return compacted 

387 

388 

389class InsertDump: 

390 """An Insert Dumper for SQL statements.""" 

391 

392 dialect: Any = None 

393 

394 def dump(self, sql: Any, *multiparams: Any, **params: Any) -> None: 

395 compiled = sql.compile(dialect=self.dialect) 

396 sql_str = str(compiled) + ";" 

397 params_list = [compiled.params] 

398 for params in params_list: 

399 if not params: 

400 print(sql_str) 

401 continue 

402 new_params = {} 

403 for key, value in params.items(): 

404 if isinstance(value, str): 

405 new_params[key] = f"'{value}'" 

406 elif value is None: 

407 new_params[key] = "null" 

408 else: 

409 new_params[key] = value 

410 

411 print(sql_str % new_params) 

412 

413 

414if __name__ == "__main__": 414 ↛ 415line 414 didn't jump to line 415, because the condition on line 414 was never true

415 cli()