Coverage for python/lsst/sims/catalogs/db/utils.py : 13%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import with_statement
2from __future__ import print_function
3from future import standard_library
4standard_library.install_aliases()
5from builtins import str
6from builtins import range
7import numpy as np
8from io import BytesIO
9from sqlalchemy import (types as satypes, Column, Table, Index,
10 create_engine, MetaData)
11import string
12import random
15def np_to_sql_type(input_type):
16 """
17 Returns the SQL data type (as encoded by sqlalchemy)
18 corresponding to a numpy dtype
20 input_type is an element of a numpy.dtype array
21 """
22 name = input_type.name
23 size = input_type.itemsize
24 if name.startswith('float'):
25 return satypes.Float(precision=16)
26 if name == 'int64':
27 return satypes.BIGINT()
28 if name == 'int32':
29 return satypes.Integer()
30 if name.startswith('str') or str(input_type).startswith('S') or str(input_type).startswith('|S'):
31 return satypes.String(length=size)
33 raise RuntimeError("Do not know how to map %s to SQL" % str(input_type))
36# from http://stackoverflow.com/questions/2257441/python-random-string-generation-with-upper-case-letters-and-digits
37def id_generator(size=8, chars=string.ascii_lowercase):
38 return ''.join(random.choice(chars) for x in range(size))
41def make_engine(dbAddress):
42 """create and connect to a database engine"""
43 engine = create_engine(dbAddress, echo=False)
44 metadata = MetaData(bind=engine)
45 return engine, metadata
48def guessDtype(dataPath, numGuess, delimiter, **kwargs):
49 cnt = 0
50 teststr = ''
51 with open(dataPath) as fh:
52 while cnt < numGuess:
53 teststr += fh.readline()
54 cnt += 1
55 dataArr = np.genfromtxt(BytesIO(teststr.encode()), dtype=None, names=True, delimiter=delimiter, **kwargs)
56 return dataArr.dtype
59def createSQLTable(dtype, tableid, idCol, metadata):
60 """
61 create a sqlalchemy Table object.
63 Parameters
64 ----------
65 dtype is a numpy dtype describing the columns in the table
66 tableid is the name of the table to be created
67 idCol is the column on which to construct the Table's primary key
68 metadata is the sqlalchemy MetaData object associated with the database connection
70 Returns
71 -------
72 A sqlalchemy Table object with the columns specified by dtype
73 """
74 sqlColumns = []
75 for itype in range(len(dtype)):
76 sqlType = np_to_sql_type(dtype[itype])
77 name = dtype.names[itype]
78 sqlColumns.append(Column(name, sqlType, primary_key = (idCol == name)))
80 if tableid is None:
81 tableid = id_generator()
82 datatable = Table(tableid, metadata, *sqlColumns)
83 metadata.create_all()
84 return datatable
87def loadTable(datapath, datatable, delimiter, dtype, engine,
88 indexCols=[], skipLines=1, chunkSize=100000, **kwargs):
89 cnt = 0
90 with open(datapath) as fh:
91 while cnt < skipLines:
92 fh.readline()
93 cnt += 1
94 cnt = 0
95 tmpstr = ''
96 for l in fh:
97 tmpstr += l
98 cnt += 1
99 if cnt%chunkSize == 0:
100 print("Loading chunk #%i"%(int(cnt/chunkSize)))
101 dataArr = np.genfromtxt(BytesIO(tmpstr.encode()), dtype=dtype, delimiter=delimiter, **kwargs)
102 engine.execute(datatable.insert(),
103 [dict((name, np.asscalar(l[name])) for name in l.dtype.names)
104 for l in dataArr])
105 tmpstr = ''
106 # Clean up the last chunk
107 if len(tmpstr) > 0:
108 dataArr = np.genfromtxt(BytesIO(tmpstr.encode()), dtype=dtype, delimiter=delimiter, **kwargs)
109 try:
110 engine.execute(datatable.insert(),
111 [dict((name, np.asscalar(l[name])) for name in l.dtype.names)
112 for l in dataArr])
113 # If the file only has one line, the result of genfromtxt is a 0-d array, so cannot be iterated
114 except TypeError:
115 engine.execute(datatable.insert(),
116 [dict((name, np.asscalar(dataArr[name])) for name in dataArr.dtype.names), ])
118 for col in indexCols:
119 if hasattr(col, "__iter__"):
120 print("Creating index on %s"%(",".join(col)))
121 colArr = (datatable.c[c] for c in col)
122 i = Index('%sidx'%''.join(col), *colArr)
123 else:
124 print("Creating index on %s"%(col))
125 i = Index('%sidx'%col, datatable.c[col])
127 i.create(engine)
130def loadData(dataPath, dtype, delimiter, tableId, idCol, engine, metaData, numGuess, append=False, **kwargs):
131 if dtype is None:
132 dtype = guessDtype(dataPath, numGuess, delimiter)
134 tableExists = False
136 if tableId is not None:
137 tableExists = engine.dialect.has_table(engine.connect(), tableId)
138 if append and tableId is None:
139 raise ValueError("Cannot append if the table name is missing")
140 elif tableExists and not append:
141 raise ValueError("Append is False but table exists")
142 elif not tableExists:
143 dataTable = createSQLTable(dtype, tableId, idCol, metaData)
144 else:
145 dataTable = Table(tableId, metaData, autoload=True)
146 loadTable(dataPath, dataTable, delimiter, dtype, engine, **kwargs)
147 return dataTable.name