Coverage for python/felis/cli.py: 49%

221 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-23 10:44 +0000

1# This file is part of felis. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import io 

23import json 

24import logging 

25import sys 

26from collections.abc import Iterable, Mapping, MutableMapping 

27from typing import Any 

28 

29import click 

30import yaml 

31from pydantic import ValidationError 

32from pyld import jsonld 

33from sqlalchemy.engine import Engine, create_engine, create_mock_engine, make_url 

34from sqlalchemy.engine.mock import MockConnection 

35 

36from . import DEFAULT_CONTEXT, DEFAULT_FRAME, __version__ 

37from .check import CheckingVisitor 

38from .datamodel import Schema 

39from .sql import SQLVisitor 

40from .tap import Tap11Base, TapLoadingVisitor, init_tables 

41from .utils import ReorderingVisitor 

42 

43logger = logging.getLogger("felis") 

44 

45 

46@click.group() 

47@click.version_option(__version__) 

48def cli() -> None: 

49 """Felis Command Line Tools.""" 

50 logging.basicConfig(level=logging.INFO) 

51 

52 

53@cli.command("create-all") 

54@click.option("--engine-url", envvar="ENGINE_URL", help="SQLAlchemy Engine URL") 

55@click.option("--schema-name", help="Alternate Schema Name for Felis File") 

56@click.option("--dry-run", is_flag=True, help="Dry Run Only. Prints out the DDL that would be executed") 

57@click.argument("file", type=click.File()) 

58def create_all(engine_url: str, schema_name: str, dry_run: bool, file: io.TextIOBase) -> None: 

59 """Create schema objects from the Felis FILE.""" 

60 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

61 visitor = SQLVisitor(schema_name=schema_name) 

62 schema = visitor.visit_schema(schema_obj) 

63 

64 metadata = schema.metadata 

65 

66 engine: Engine | MockConnection 

67 if not dry_run: 

68 engine = create_engine(engine_url) 

69 else: 

70 _insert_dump = InsertDump() 

71 engine = create_mock_engine(make_url(engine_url), executor=_insert_dump.dump) 

72 _insert_dump.dialect = engine.dialect 

73 metadata.create_all(engine) 

74 

75 

76@cli.command("init-tap") 

77@click.option("--tap-schema-name", help="Alt Schema Name for TAP_SCHEMA") 

78@click.option("--tap-schemas-table", help="Alt Table Name for TAP_SCHEMA.schemas") 

79@click.option("--tap-tables-table", help="Alt Table Name for TAP_SCHEMA.tables") 

80@click.option("--tap-columns-table", help="Alt Table Name for TAP_SCHEMA.columns") 

81@click.option("--tap-keys-table", help="Alt Table Name for TAP_SCHEMA.keys") 

82@click.option("--tap-key-columns-table", help="Alt Table Name for TAP_SCHEMA.key_columns") 

83@click.argument("engine-url") 

84def init_tap( 

85 engine_url: str, 

86 tap_schema_name: str, 

87 tap_schemas_table: str, 

88 tap_tables_table: str, 

89 tap_columns_table: str, 

90 tap_keys_table: str, 

91 tap_key_columns_table: str, 

92) -> None: 

93 """Initialize TAP 1.1 TAP_SCHEMA objects. 

94 

95 Please verify the schema/catalog you are executing this in in your 

96 engine URL. 

97 """ 

98 engine = create_engine(engine_url, echo=True) 

99 init_tables( 

100 tap_schema_name, 

101 tap_schemas_table, 

102 tap_tables_table, 

103 tap_columns_table, 

104 tap_keys_table, 

105 tap_key_columns_table, 

106 ) 

107 Tap11Base.metadata.create_all(engine) 

108 

109 

110@cli.command("load-tap") 

111@click.option("--engine-url", envvar="ENGINE_URL", help="SQLAlchemy Engine URL to catalog") 

112@click.option("--schema-name", help="Alternate Schema Name for Felis file") 

113@click.option("--catalog-name", help="Catalog Name for Schema") 

114@click.option("--dry-run", is_flag=True, help="Dry Run Only. Prints out the DDL that would be executed") 

115@click.option("--tap-schema-name", help="Alt Schema Name for TAP_SCHEMA") 

116@click.option("--tap-tables-postfix", help="Postfix for TAP table names") 

117@click.option("--tap-schemas-table", help="Alt Table Name for TAP_SCHEMA.schemas") 

118@click.option("--tap-tables-table", help="Alt Table Name for TAP_SCHEMA.tables") 

119@click.option("--tap-columns-table", help="Alt Table Name for TAP_SCHEMA.columns") 

120@click.option("--tap-keys-table", help="Alt Table Name for TAP_SCHEMA.keys") 

121@click.option("--tap-key-columns-table", help="Alt Table Name for TAP_SCHEMA.key_columns") 

122@click.argument("file", type=click.File()) 

123def load_tap( 

124 engine_url: str, 

125 schema_name: str, 

126 catalog_name: str, 

127 dry_run: bool, 

128 tap_schema_name: str, 

129 tap_tables_postfix: str, 

130 tap_schemas_table: str, 

131 tap_tables_table: str, 

132 tap_columns_table: str, 

133 tap_keys_table: str, 

134 tap_key_columns_table: str, 

135 file: io.TextIOBase, 

136) -> None: 

137 """Load TAP metadata from a Felis FILE. 

138 

139 This command loads the associated TAP metadata from a Felis FILE 

140 to the TAP_SCHEMA tables. 

141 """ 

142 top_level_object = yaml.load(file, Loader=yaml.SafeLoader) 

143 schema_obj: dict 

144 if isinstance(top_level_object, dict): 

145 schema_obj = top_level_object 

146 if "@graph" not in schema_obj: 

147 schema_obj["@type"] = "felis:Schema" 

148 schema_obj["@context"] = DEFAULT_CONTEXT 

149 elif isinstance(top_level_object, list): 

150 schema_obj = {"@context": DEFAULT_CONTEXT, "@graph": top_level_object} 

151 else: 

152 logger.error("Schema object not of recognizable type") 

153 raise click.exceptions.Exit(1) 

154 

155 normalized = _normalize(schema_obj, embed="@always") 

156 if len(normalized["@graph"]) > 1 and (schema_name or catalog_name): 

157 logger.error("--schema-name and --catalog-name incompatible with multiple schemas") 

158 raise click.exceptions.Exit(1) 

159 

160 # Force normalized["@graph"] to a list, which is what happens when there's 

161 # multiple schemas 

162 if isinstance(normalized["@graph"], dict): 

163 normalized["@graph"] = [normalized["@graph"]] 

164 

165 tap_tables = init_tables( 

166 tap_schema_name, 

167 tap_tables_postfix, 

168 tap_schemas_table, 

169 tap_tables_table, 

170 tap_columns_table, 

171 tap_keys_table, 

172 tap_key_columns_table, 

173 ) 

174 

175 if not dry_run: 

176 engine = create_engine(engine_url) 

177 

178 if engine_url == "sqlite://" and not dry_run: 

179 # In Memory SQLite - Mostly used to test 

180 Tap11Base.metadata.create_all(engine) 

181 

182 for schema in normalized["@graph"]: 

183 tap_visitor = TapLoadingVisitor( 

184 engine, 

185 catalog_name=catalog_name, 

186 schema_name=schema_name, 

187 tap_tables=tap_tables, 

188 ) 

189 tap_visitor.visit_schema(schema) 

190 else: 

191 _insert_dump = InsertDump() 

192 conn = create_mock_engine(make_url(engine_url), executor=_insert_dump.dump, paramstyle="pyformat") 

193 # After the engine is created, update the executor with the dialect 

194 _insert_dump.dialect = conn.dialect 

195 

196 for schema in normalized["@graph"]: 

197 tap_visitor = TapLoadingVisitor.from_mock_connection( 

198 conn, 

199 catalog_name=catalog_name, 

200 schema_name=schema_name, 

201 tap_tables=tap_tables, 

202 ) 

203 tap_visitor.visit_schema(schema) 

204 

205 

206@cli.command("modify-tap") 

207@click.option("--start-schema-at", type=int, help="Rewrite index for tap:schema_index") 

208@click.argument("files", nargs=-1, type=click.File()) 

209def modify_tap(start_schema_at: int, files: Iterable[io.TextIOBase]) -> None: 

210 """Modify TAP information in Felis schema FILES. 

211 

212 This command has some utilities to aid in rewriting felis FILES 

213 in specific ways. It will write out a merged version of these files. 

214 """ 

215 count = 0 

216 graph = [] 

217 for file in files: 

218 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

219 if "@graph" not in schema_obj: 

220 schema_obj["@type"] = "felis:Schema" 

221 schema_obj["@context"] = DEFAULT_CONTEXT 

222 schema_index = schema_obj.get("tap:schema_index") 

223 if not schema_index or (schema_index and schema_index > start_schema_at): 

224 schema_index = start_schema_at + count 

225 count += 1 

226 schema_obj["tap:schema_index"] = schema_index 

227 graph.extend(jsonld.flatten(schema_obj)) 

228 merged = {"@context": DEFAULT_CONTEXT, "@graph": graph} 

229 normalized = _normalize(merged, embed="@always") 

230 _dump(normalized) 

231 

232 

233@cli.command("basic-check") 

234@click.argument("file", type=click.File()) 

235def basic_check(file: io.TextIOBase) -> None: 

236 """Perform a basic check on a felis FILE. 

237 

238 This performs a very check to ensure required fields are 

239 populated and basic semantics are okay. It does not ensure semantics 

240 are valid for other commands like create-all or load-tap. 

241 """ 

242 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

243 schema_obj["@type"] = "felis:Schema" 

244 # Force Context and Schema Type 

245 schema_obj["@context"] = DEFAULT_CONTEXT 

246 check_visitor = CheckingVisitor() 

247 check_visitor.visit_schema(schema_obj) 

248 

249 

250@cli.command("normalize") 

251@click.argument("file", type=click.File()) 

252def normalize(file: io.TextIOBase) -> None: 

253 """Normalize a Felis FILE. 

254 

255 Takes a felis schema FILE, expands it (resolving the full URLs), 

256 then compacts it, and finally produces output in the canonical 

257 format. 

258 

259 (This is most useful in some debugging scenarios) 

260 

261 See Also : 

262 

263 https://json-ld.org/spec/latest/json-ld/#expanded-document-form 

264 https://json-ld.org/spec/latest/json-ld/#compacted-document-form 

265 """ 

266 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

267 schema_obj["@type"] = "felis:Schema" 

268 # Force Context and Schema Type 

269 schema_obj["@context"] = DEFAULT_CONTEXT 

270 expanded = jsonld.expand(schema_obj) 

271 normalized = _normalize(expanded, embed="@always") 

272 _dump(normalized) 

273 

274 

275@cli.command("merge") 

276@click.argument("files", nargs=-1, type=click.File()) 

277def merge(files: Iterable[io.TextIOBase]) -> None: 

278 """Merge a set of Felis FILES. 

279 

280 This will expand out the felis FILES so that it is easy to 

281 override values (using @Id), then normalize to a single 

282 output. 

283 """ 

284 graph = [] 

285 for file in files: 

286 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

287 if "@graph" not in schema_obj: 

288 schema_obj["@type"] = "felis:Schema" 

289 schema_obj["@context"] = DEFAULT_CONTEXT 

290 graph.extend(jsonld.flatten(schema_obj)) 

291 updated_map: MutableMapping[str, Any] = {} 

292 for item in graph: 

293 _id = item["@id"] 

294 item_to_update = updated_map.get(_id, item) 

295 if item_to_update and item_to_update != item: 

296 logger.debug(f"Overwriting {_id}") 

297 item_to_update.update(item) 

298 updated_map[_id] = item_to_update 

299 merged = {"@context": DEFAULT_CONTEXT, "@graph": list(updated_map.values())} 

300 normalized = _normalize(merged, embed="@always") 

301 _dump(normalized) 

302 

303 

304@cli.command("validate") 

305@click.argument("files", nargs=-1, type=click.File()) 

306def validate(files: Iterable[io.TextIOBase]) -> None: 

307 """Validate one or more felis YAML files.""" 

308 rc = 0 

309 for file in files: 

310 file_name = getattr(file, "name", None) 

311 logger.info(f"Validating {file_name}") 

312 try: 

313 Schema.model_validate(yaml.load(file, Loader=yaml.SafeLoader)) 

314 except ValidationError as e: 

315 logger.error(e) 

316 rc = 1 

317 if rc: 

318 raise click.exceptions.Exit(rc) 

319 

320 

321@cli.command("dump-json") 

322@click.option("-x", "--expanded", is_flag=True, help="Extended schema before dumping.") 

323@click.option("-f", "--framed", is_flag=True, help="Frame schema before dumping.") 

324@click.option("-c", "--compacted", is_flag=True, help="Compact schema before dumping.") 

325@click.option("-g", "--graph", is_flag=True, help="Pass graph option to compact.") 

326@click.argument("file", type=click.File()) 

327def dump_json( 

328 file: io.TextIOBase, 

329 expanded: bool = False, 

330 compacted: bool = False, 

331 framed: bool = False, 

332 graph: bool = False, 

333) -> None: 

334 """Dump JSON representation using various JSON-LD options.""" 

335 schema_obj = yaml.load(file, Loader=yaml.SafeLoader) 

336 schema_obj["@type"] = "felis:Schema" 

337 # Force Context and Schema Type 

338 schema_obj["@context"] = DEFAULT_CONTEXT 

339 

340 if expanded: 

341 schema_obj = jsonld.expand(schema_obj) 

342 if framed: 

343 schema_obj = jsonld.frame(schema_obj, DEFAULT_FRAME) 

344 if compacted: 

345 options = {} 

346 if graph: 

347 options["graph"] = True 

348 schema_obj = jsonld.compact(schema_obj, DEFAULT_CONTEXT, options=options) 

349 json.dump(schema_obj, sys.stdout, indent=4) 

350 

351 

352def _dump(obj: Mapping[str, Any]) -> None: 

353 class OrderedDumper(yaml.Dumper): 

354 pass 

355 

356 def _dict_representer(dumper: yaml.Dumper, data: Any) -> Any: 

357 return dumper.represent_mapping(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items()) 

358 

359 OrderedDumper.add_representer(dict, _dict_representer) 

360 print(yaml.dump(obj, Dumper=OrderedDumper, default_flow_style=False)) 

361 

362 

363def _normalize(schema_obj: Mapping[str, Any], embed: str = "@last") -> MutableMapping[str, Any]: 

364 framed = jsonld.frame(schema_obj, DEFAULT_FRAME, options=dict(embed=embed)) 

365 compacted = jsonld.compact(framed, DEFAULT_CONTEXT, options=dict(graph=True)) 

366 graph = compacted["@graph"] 

367 graph = [ReorderingVisitor(add_type=True).visit_schema(schema_obj) for schema_obj in graph] 

368 compacted["@graph"] = graph if len(graph) > 1 else graph[0] 

369 return compacted 

370 

371 

372class InsertDump: 

373 """An Insert Dumper for SQL statements.""" 

374 

375 dialect: Any = None 

376 

377 def dump(self, sql: Any, *multiparams: Any, **params: Any) -> None: 

378 compiled = sql.compile(dialect=self.dialect) 

379 sql_str = str(compiled) + ";" 

380 params_list = [compiled.params] 

381 for params in params_list: 

382 if not params: 

383 print(sql_str) 

384 continue 

385 new_params = {} 

386 for key, value in params.items(): 

387 if isinstance(value, str): 

388 new_params[key] = f"'{value}'" 

389 elif value is None: 

390 new_params[key] = "null" 

391 else: 

392 new_params[key] = value 

393 

394 print(sql_str % new_params) 

395 

396 

397if __name__ == "__main__": 397 ↛ 398line 397 didn't jump to line 398, because the condition on line 397 was never true

398 cli()