Coverage for python/lsst/obs/sdss/yanny.py : 6%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Licensed under a 3-clause BSD style license - see yanny-LICENSE.rst
2# -*- coding: utf-8 -*-
3"""Python library for reading & writing yanny files.
5yanny is an object-oriented interface to FTCL/yanny data files following
6these specifications_.
8The format of the returned object is similar to that returned by
9``read_yanny()`` in the efftickle perl package (in the yannytools product).
11Currently multidimensional arrays are only supported for type ``char``, and a
12close reading of the specifications indicates that multidimensional arrays
13were only ever intended to be supported for type ``char``. So no
14multidimensional arrays, sorry.
16.. _specifications: http://www.sdss3.org/dr8/software/par.php
18LSST NOTE:
19This file was copied from the version in pydl 0.4.1:
21https://github.com/weaverba137/pydl/blob/0.4.1/pydl/pydlutils/yanny/yanny.py
23It is used for the makeCcd step of makeCamera, to read the opConfig and opECalib
24files in etc/.
25"""
27#
28# Modules
29#
30import re
31import os
32import os.path
33import datetime
34import numpy
37class yanny(dict): # noqa N801
38 """An object interface to a yanny file.
40 Create a yanny object using a yanny file, `filename`. If the file exists,
41 it is read, & the dict structure of the object will be basically the
42 same as that returned by ``read_yanny()`` in the efftickle package.
44 If the file does not exist, or if no filename is given, a blank
45 structure is returned. Other methods allow for subsequent writing
46 to the file.
48 Parameters
49 ----------
50 filename : str or file-like, optional
51 The name of a yanny file or a file-like object representing a yanny file.
52 np : bool, optional
53 If ``True``, data in a yanny file will be converted into a NumPy record
54 array. Default is ``False``.
55 debug : bool, optional
56 If ``True``, some simple debugging statements will be turned on. Default
57 is ``False``.
59 Attributes
60 ----------
61 np : bool
62 If True, data in a yanny file will be converted into a NumPy record
63 array.
64 debug : bool
65 If True, some simple debugging statements will be turned on.
66 filename : str
67 The name of a yanny parameter file. If a file-like object was used
68 to initialize the object, this will have the value 'in_memory.par'.
69 _contents : str
70 The complete contents of a yanny parameter file.
71 _struct_type_caches : dict
72 A dictionary of dictionaries, one dictionary for every structure
73 definition in a yanny parameter file. Contains the types of
74 each column
75 _struct_isarray_caches : dict
76 A dictionary of dictionaries, one dictionary for every structure
77 definition in a yanny parameter file. Contains a boolean value
78 for every column.
79 _enum_cache : dict
80 Initially ``None``, this attribute is initialized the first time
81 the ``isenum()`` method is called. The keyword is the name of the
82 enum type, the value is a list of the possible values of that type.
84 """
85 #
86 #
87 #
88 @staticmethod
89 def get_token(string):
90 """Removes the first 'word' from string.
92 If the 'word' is enclosed in double quotes, it returns the
93 contents of the double quotes. If the 'word' is enclosed in
94 braces, it returns the contents of the braces, but does not
95 attempt to split the array. If the 'word' is the last word of the
96 string, remainder is set equal to the empty string. This is
97 basically a wrapper on some convenient regular expressions.
99 Parameters
100 ----------
101 string : str
102 A string containing words.
104 Returns
105 -------
106 get_token : tuple
107 A tuple containing the first word and the remainder of the string.
109 Examples
110 --------
111 >>> from pydl.pydlutils.yanny import yanny
112 >>> yanny.get_token("The quick brown fox")
113 ('The', 'quick brown fox')
114 """
115 if string[0] == '"':
116 (word, remainder) = re.search(r'^"([^"]*)"\s*(.*)',
117 string).groups()
118 elif string[0] == '{':
119 (word, remainder) = re.search(r'^\{\s*([^}]*)\s*\}\s*(.*)',
120 string).groups()
121 else:
122 try:
123 (word, remainder) = re.split(r'\s+', string, 1)
124 except ValueError:
125 (word, remainder) = (string, '')
126 if remainder is None:
127 remainder = ''
128 return (word, remainder)
129 #
130 #
131 #
133 @staticmethod
134 def protect(x):
135 """Used to appropriately quote string that might contain whitespace.
137 This method is mostly for internal use by the yanny object.
139 Parameters
140 ----------
141 x : str
142 The data to protect.
144 Returns
145 -------
146 protect : str
147 The data with white space protected by quotes.
149 Examples
150 --------
151 >>> from pydl.pydlutils.yanny import yanny
152 >>> yanny.protect('This string contains whitespace.')
153 '"This string contains whitespace."'
154 >>> yanny.protect('This string contains a #hashtag.')
155 '"This string contains a #hashtag."'
156 """
157 if isinstance(x, numpy.bytes_):
158 s = x.decode()
159 else:
160 s = str(x)
161 if len(s) == 0 or s.find('#') >= 0 or re.search(r'\s+', s) is not None:
162 return '"' + s + '"'
163 else:
164 return s
165 #
166 #
167 #
169 @staticmethod
170 def trailing_comment(line):
171 """Identify a trailing comment and strip it.
173 This routine works on the theory that a properly quoted comment mark
174 will be surrounted by an odd number of double quotes, & we can
175 skip to searching for the last one in the line.
177 Parameters
178 ----------
179 line : str
180 A line from a yanny file potentially containing trailing comments.
182 Returns
183 -------
184 trailing_comment : str
185 The line with any trailing comment and any residual white space
186 trimmed off.
188 Notes
189 -----
190 This may fail in certain pathological cases, for example if a
191 real trailing comment contains a single double-quote::
193 # a 'pathological" trailing comment
195 or if someone is over-enthusiastically commenting::
197 # # # # # I like # characters.
199 Examples
200 --------
201 >>> from pydl.pydlutils.yanny import yanny
202 >>> yanny.trailing_comment('mystruct 1234 "#hashtag" # a comment.')
203 'mystruct 1234 "#hashtag"'
204 >>> yanny.trailing_comment('mystruct 1234 "#hashtag" # a "comment".')
205 'mystruct 1234 "#hashtag"'
206 """
207 lastmark = line.rfind('#')
208 if lastmark >= 0:
209 #
210 # Count the number of double quotes in the remainder of the line
211 #
212 if (len([c for c in line[lastmark:] if c == '"']) % 2) == 0:
213 #
214 # Even number of quotes
215 #
216 return line[0:lastmark].rstrip()
217 return line
218 #
219 #
220 #
222 @staticmethod
223 def dtype_to_struct(dt, structname='mystruct', enums=None):
224 """Convert a NumPy dtype object describing a record array to
225 a typedef struct statement.
227 The second argument is the name of the structure.
228 If any of the columns are enum types, enums must
229 be a dictionary with the keys the column names, and the values
230 are a tuple containing the name of the enum type as the first item
231 and a tuple or list of possible values as the second item.
233 Parameters
234 ----------
235 dt : numpy.dtype
236 The dtype of a NumPy record array.
237 structname : str, optional
238 The name to give the structure in the yanny file. Defaults to 'MYSTRUCT'.
239 enums : dict, optional
240 A dictionary containing enum information. See details above.
242 Returns
243 -------
244 dtype_to_struct : dict
245 A dictionary suitable for setting the 'symbols' dictionary of a new
246 yanny object.
248 Examples
249 --------
250 """
251 dtmap = {'i2': 'short', 'i4': 'int', 'i8': 'long', 'f4': 'float',
252 'f8': 'double'}
253 returnenums = list()
254 if enums is not None:
255 for e in enums:
256 lines = list()
257 lines.append('typedef enum {')
258 for n in enums[e][1]:
259 lines.append(" {0},".format(n))
260 lines[-1] = lines[-1].strip(',')
261 lines.append('}} {0};'.format(enums[e][0].upper()))
262 returnenums.append("\n".join(lines))
263 # lines.append('')
264 lines = list()
265 lines.append('typedef struct {')
266 for c in dt.names:
267 if dt[c].kind == 'V':
268 t = dt[c].subdtype[0].str[1:]
269 l = dt[c].subdtype[1][0] # noqa E741
270 s = dt[c].subdtype[0].itemsize
271 else:
272 t = dt[c].str[1:]
273 l = 0 # noqa E741
274 s = dt[c].itemsize
275 line = ' '
276 if t[0] == 'S':
277 if c in enums:
278 line += enums[c][0].upper()
279 else:
280 line += 'char'
281 else:
282 line += dtmap[t]
283 line += ' {0}'.format(c)
284 if l > 0:
285 line += "[{0:d}]".format(l)
286 if t[0] == 'S' and c not in enums:
287 line += "[{0:d}]".format(s)
288 line += ';'
289 lines.append(line)
290 lines.append('}} {0};'.format(structname.upper()))
291 return {structname.upper(): list(dt.names), 'enum': returnenums, 'struct': ["\n".join(lines)]}
292 #
293 #
294 #
296 def __init__(self, filename=None, np=False, debug=False):
297 """Create a yanny object using a yanny file.
298 """
299 #
300 # The symbol hash is inherited from the old read_yanny
301 #
302 self['symbols'] = dict()
303 #
304 # Create special attributes that contain the internal status of the object
305 # this should prevent overlap with keywords in the data files
306 #
307 self.filename = ''
308 self._contents = ''
309 #
310 # Since the re is expensive, cache the structure types keyed by the field.
311 # Create a dictionary for each structure found.
312 #
313 self._struct_type_caches = dict()
314 self._struct_isarray_caches = dict()
315 self._enum_cache = None
316 #
317 # Optionally convert numeric data into NumPy arrays
318 #
319 self.np = np
320 #
321 # Turn on simple debugging
322 #
323 self.debug = debug
324 #
325 # If the file exists, read it
326 #
327 if filename is not None:
328 #
329 # Handle file-like objects
330 #
331 # NOTE: commented out to remove astropy.extern.six dependency
332 # if isinstance(filename, six.string_types):
333 if isinstance(filename, str):
334 if os.access(filename, os.R_OK):
335 self.filename = filename
336 with open(filename, 'r') as f:
337 self._contents = f.read()
338 else:
339 #
340 # Assume file-like
341 #
342 self.filename = 'in_memory.par'
343 self._contents = filename.read()
344 self._parse()
345 return
346 #
347 #
348 #
350 def __str__(self):
351 """Implement the ``str()`` function for yanny objects.
353 Simply prints the current contents of the yanny file.
354 """
355 return self._contents
356 #
357 #
358 #
360 def __eq__(self, other):
361 """Test two yanny objects for equality.
363 Two yanny objects are assumed to be equal if their contents are equal.
364 """
365 if isinstance(other, yanny):
366 return str(other) == str(self)
367 return NotImplemented
368 #
369 #
370 #
372 def __ne__(self, other):
373 """Test two yanny objects for inequality.
375 Two yanny objects are assumed to be unequal if their contents are unequal.
376 """
377 if isinstance(other, yanny):
378 return str(other) != str(self)
379 return NotImplemented
380 #
381 #
382 #
384 def __bool__(self):
385 """Give a yanny object a definite truth value.
387 A yanny object is considered ``True`` if its contents are non-zero.
388 """
389 return len(self._contents) > 0
391 # `__nonzero__` is needed for Python 2.
392 # Python 3 uses `__bool__`.
393 # http://stackoverflow.com/a/2233850/498873
394 __nonzero__ = __bool__
396 #
397 #
398 #
399 def type(self, structure, variable):
400 """Returns the type of a variable defined in a structure.
402 Returns ``None`` if the structure or the variable is undefined.
404 Parameters
405 ----------
406 structure : str
407 The name of the structure that contains `variable`.
408 variable : str
409 The name of the column whose type you want.
411 Returns
412 -------
413 type : str
414 The type of the variable.
415 """
416 if structure not in self:
417 return None
418 if variable not in self.columns(structure):
419 return None
420 #
421 # Added code to cache values to speed up parsing large files.
422 # 2009.05.11 / Demitri Muna, NYU
423 # Find (or create) the cache for this structure.
424 #
425 try:
426 cache = self._struct_type_caches[structure]
427 except KeyError:
428 self._struct_type_caches[structure] = dict()
429 cache = self._struct_type_caches[structure] # cache for one struct type
430 #
431 # Lookup (or create) the value for this variable
432 #
433 try:
434 var_type = cache[variable]
435 except KeyError:
436 if self.debug:
437 print(variable)
438 defl = [x for x in self['symbols']['struct'] if x.find(structure.lower()) > 0]
439 defu = [x for x in self['symbols']['struct'] if x.find(structure.upper()) > 0]
440 if len(defl) != 1 and len(defu) != 1:
441 return None
442 elif len(defl) == 1:
443 definition = defl
444 else:
445 definition = defu
446 typere = re.compile(r'(\S+)\s+{0}([[<].*[]>]|);'.format(variable))
447 (typ, array) = typere.search(definition[0]).groups()
448 var_type = typ + array.replace('<', '[').replace('>', ']')
449 cache[variable] = var_type
450 return var_type
451 #
452 #
453 #
455 def basetype(self, structure, variable):
456 """Returns the bare type of a variable, stripping off any array information.
458 Parameters
459 ----------
460 structure : str
461 The name of the structure that contains `variable`.
462 variable : str
463 The name of the column whose type you want.
465 Returns
466 -------
467 basetype : str
468 The type of the variable, stripped of array information.
469 """
470 typ = self.type(structure, variable)
471 if self.debug:
472 print(variable, typ)
473 try:
474 return typ[0:typ.index('[')]
475 except ValueError:
476 return typ
477 #
478 #
479 #
481 def isarray(self, structure, variable):
482 """Returns ``True`` if the variable is an array type.
484 For character types, this means a two-dimensional array,
485 *e.g.*: ``char[5][20]``.
487 Parameters
488 ----------
489 structure : str
490 The name of the structure that contains `variable`.
491 variable : str
492 The name of the column to check for array type.
494 Returns
495 -------
496 isarray : bool
497 ``True`` if the variable is an array.
498 """
499 try:
500 cache = self._struct_isarray_caches[structure]
501 except KeyError:
502 self._struct_isarray_caches[structure] = dict()
503 cache = self._struct_isarray_caches[structure]
504 try:
505 result = cache[variable]
506 except KeyError:
507 typ = self.type(structure, variable)
508 character_array = re.compile(r'char[[<]\d*[]>][[<]\d*[]>]')
509 if ((character_array.search(typ) is not None)
510 or (typ.find('char') < 0 and (typ.find('[') >= 0
511 or typ.find('<') >= 0))):
512 cache[variable] = True
513 else:
514 cache[variable] = False
515 result = cache[variable]
516 return result
517 #
518 #
519 #
521 def isenum(self, structure, variable):
522 """Returns true if a variable is an enum type.
524 Parameters
525 ----------
526 structure : str
527 The name of the structure that contains `variable`.
528 variable : str
529 The name of the column to check for enum type.
531 Returns
532 -------
533 isenum : bool
534 ``True`` if the variable is enum type.
535 """
536 if self._enum_cache is None:
537 self._enum_cache = dict()
538 if 'enum' in self['symbols']:
539 for e in self['symbols']['enum']:
540 m = re.search(r'typedef\s+enum\s*\{([^}]+)\}\s*(\w+)\s*;', e).groups()
541 self._enum_cache[m[1]] = re.split(r',\s*', m[0].strip())
542 else:
543 return False
544 return self.basetype(structure, variable) in self._enum_cache
545 #
546 #
547 #
549 def array_length(self, structure, variable):
550 """Returns the length of an array type or 1 if the variable is not an array.
552 For character types, this is the length of a two-dimensional
553 array, *e.g.*, ``char[5][20]`` has length 5.
555 Parameters
556 ----------
557 structure : str
558 The name of the structure that contains `variable`.
559 variable : str
560 The name of the column to check for array length.
562 Returns
563 -------
564 array_length : int
565 The length of the array variable
566 """
567 if self.isarray(structure, variable):
568 typ = self.type(structure, variable)
569 return int(typ[typ.index('[')+1:typ.index(']')])
570 else:
571 return 1
572 #
573 #
574 #
576 def char_length(self, structure, variable):
577 """Returns the length of a character field.
579 *e.g.* ``char[5][20]`` is an array of 5 strings of length 20.
580 Returns ``None`` if the variable is not a character type. If the
581 length is not specified, *i.e.* ``char[]``, it returns the length of
582 the largest string.
584 Parameters
585 ----------
586 structure : str
587 The name of the structure that contains `variable`.
588 variable : str
589 The name of the column to check for char length.
591 Returns
592 -------
593 char_length : int or None
594 The length of the char variable.
595 """
596 typ = self.type(structure, variable)
597 if typ.find('char') < 0:
598 return None
599 try:
600 return int(typ[typ.rfind('[')+1:typ.rfind(']')])
601 except ValueError:
602 if self.isarray(structure, variable):
603 return max([max([len(x) for x in r]) for r in self[structure][variable]])
604 else:
605 return max([len(x) for x in self[structure][variable]])
606 #
607 #
608 #
610 def dtype(self, structure):
611 """Returns a NumPy dtype object suitable for describing a table as a record array.
613 Treats enums as string, which is what the IDL reader does.
615 Parameters
616 ----------
617 structure : str
618 The name of the structure.
620 Returns
621 -------
622 dtype : numpy.dtype
623 A dtype object suitable for describing the yanny structure as a record array.
624 """
625 dt = list()
626 dtmap = {'short': 'i2', 'int': 'i4', 'long': 'i8', 'float': 'f',
627 'double': 'd'}
628 for c in self.columns(structure):
629 typ = self.basetype(structure, c)
630 if typ == 'char':
631 d = "S{0:d}".format(self.char_length(structure, c))
632 elif self.isenum(structure, c):
633 d = "S{0:d}".format(max([len(x) for x in self._enum_cache[typ]]))
634 else:
635 d = dtmap[typ]
636 if self.isarray(structure, c):
637 dt.append((c, d, (self.array_length(structure, c),)))
638 else:
639 dt.append((c, d))
640 dt = numpy.dtype(dt)
641 return dt
642 #
643 #
644 #
646 def convert(self, structure, variable, value):
647 """Converts value into the appropriate (Python) type.
649 * ``short`` & ``int`` are converted to Python ``int``.
650 * ``long`` is converted to Python ``long``.
651 * ``float`` & ``double`` are converted to Python ``float``.
652 * Other types are not altered.
654 There may be further conversions into NumPy types, but this is the
655 first stage.
657 Parameters
658 ----------
659 structure : str
660 The name of the structure that contains `variable`.
661 variable : str
662 The name of the column undergoing conversion.
663 value : str
664 The value contained in a particular row of `variable`.
666 Returns
667 -------
668 convert : int, long, float or str
669 `value` converted to a Python numerical type.
670 """
671 typ = self.basetype(structure, variable)
672 if (typ == 'short' or typ == 'int'):
673 if self.isarray(structure, variable):
674 return [int(v) for v in value]
675 else:
676 return int(value)
677 if typ == 'long':
678 if self.isarray(structure, variable):
679 return [int(v) for v in value]
680 else:
681 return int(value)
682 if (typ == 'float' or typ == 'double'):
683 if self.isarray(structure, variable):
684 return [float(v) for v in value]
685 else:
686 return float(value)
687 return value
688 #
689 #
690 #
692 def tables(self):
693 """Returns a list of all the defined structures.
695 This is just the list of keys of the object with the 'internal'
696 keys removed.
697 """
698 foo = list()
699 for k in self['symbols']:
700 if k not in ('struct', 'enum'):
701 foo.append(k)
702 return foo
703 #
704 #
705 #
707 def columns(self, table):
708 """Returns an ordered list of column names associated with a particular table.
710 The order is the same order as they are defined in the yanny file.
712 Parameters
713 ----------
714 table : str
715 The table whose columns are desired.
717 Returns
718 -------
719 columns : list
720 The list of column names.
721 """
722 foo = list()
723 if table in self['symbols']:
724 return self['symbols'][table]
725 return foo
726 #
727 #
728 #
730 def size(self, table):
731 """Returns the number of rows in a table.
733 Parameters
734 ----------
735 table : str
736 The table whose size desired.
738 Returns
739 -------
740 size : int
741 The number of rows in `table`.
742 """
743 foo = self.columns(table)
744 return len(self[table][foo[0]])
745 #
746 #
747 #
749 def pairs(self):
750 """Returns a list of keys to keyword/value pairs.
752 Equivalent to doing ``self.keys()``, but with all the data tables &
753 other control structures stripped out.
754 """
755 p = list()
756 foo = self.tables()
757 for k in list(self.keys()):
758 if k != 'symbols' and k not in foo:
759 p.append(k)
760 return p
761 #
762 #
763 #
765 def row(self, table, index):
766 """Returns a list containing a single row from a specified table in column order
768 If index is out of range, it returns an empty list.
770 If the yanny object instance is set up for NumPy record arrays, then
771 a single row can be obtained with::
773 row0 = par['TABLE'][0]
775 Parameters
776 ----------
777 table : str
778 The table whose row is desired.
779 index : int
780 The number of the row to return.
782 Returns
783 -------
784 row : list
785 A row from `table`.
786 """
787 datarow = list()
788 if table in self and index >= 0 and index < self.size(table):
789 for c in self.columns(table):
790 datarow.append(self[table][c][index])
791 return datarow
792 #
793 #
794 #
796 def list_of_dicts(self, table):
797 """Construct a list of dictionaries.
799 Takes a table from the yanny object and constructs a list object
800 containing one row per entry. Each item in the list is a dictionary
801 keyed by the struct value names.
803 If the yanny object instance is set up for NumPy record arrays, then
804 the same functionality can be obtained with::
806 foo = par['TABLE'][0]['column']
808 Parameters
809 ----------
810 table : str
811 The table to convert
813 Returns
814 -------
815 list_of_dicts : list
816 A list containing the rows of `table` converted to ``dict``.
817 """
818 return_list = list()
819 d = dict()
820 struct_fields = self.columns(table) # I'm assuming these are in order...
821 for i in range(self.size(table)):
822 one_row = self.row(table, i) # one row as a list
823 j = 0
824 for key in struct_fields:
825 d[key] = one_row[j]
826 j = j + 1
827 return_list.append(dict(d)) # append a new dict (copy of d)
828 return return_list
829 #
830 #
831 #
833 def new_dict_from_pairs(self):
834 """Returns a new dictionary of keyword/value pairs.
836 The new dictionary (*i.e.*, not a yanny object) contains the keys
837 that ``self.pairs()`` returns. There are two reasons this is convenient:
839 * the key 'symbols' that is part of the yanny object will not be present
840 * a simple yanny file can be read with no further processing
842 Examples
843 --------
845 Read a yanny file and return only the pairs::
847 >>> from os.path import dirname
848 >>> from pydl.pydlutils.yanny import yanny
849 >>> new_dict = yanny(dirname(__file__)+'/tests/t/test.par').new_dict_from_pairs()
850 >>> new_dict['mjd']
851 '54579'
852 >>> new_dict['alpha']
853 'beta gamma delta'
855 added: Demitri Muna, NYU 2009-04-28
856 """
857 new_dictionary = dict()
858 for key in self.pairs():
859 new_dictionary[key] = self[key]
860 return new_dictionary
861 #
862 #
863 #
865 def write(self, newfile=None, comments=None):
866 """Write a yanny object to a file.
868 This assumes that the filename used to create the object was not that
869 of a pre-existing file. If a file of the same name is detected,
870 this method will *not* attempt to overwrite it, but will print a warning.
871 This also assumes that the special 'symbols' key has been properly
872 created. This will not necessarily make the file very human-readable,
873 especially if the data lines are long. If the name of a new file is
874 given, it will write to the new file (assuming it doesn't exist).
875 If the writing is successful, the data in the object will be updated.
877 Parameters
878 ----------
879 newfile : str, optional
880 The name of the file to write.
881 comments : str or list of str, optional
882 Comments that will be placed at the head of the file. If a
883 single string is passed, it will be written out verbatim, so it
884 had better contain '#' characters. If a list of strings is
885 passed, comment characters will be added and the strings
886 will be joined together.
887 """
888 if newfile is None:
889 if len(self.filename) > 0:
890 newfile = self.filename
891 else:
892 raise ValueError("No filename specified!")
893 if comments is None:
894 basefile = os.path.basename(newfile)
895 timestamp = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')
896 comments = "#\n# {0}\n#\n# Created by pydl.pydlutils.yanny.yanny\n#\n# {1}\n#\n".format(
897 basefile, timestamp)
898 else:
899 if not isinstance(comments, str):
900 comments = "\n".join(["# {0}".format(c) for c in comments]) + "\n"
901 contents = comments
902 #
903 # Print any key/value pairs
904 #
905 for key in self.pairs():
906 contents += "{0} {1}\n".format(key, self[key])
907 #
908 # Print out enum definitions
909 #
910 if len(self['symbols']['enum']) > 0:
911 contents += "\n" + "\n\n".join(self['symbols']['enum']) + "\n"
912 #
913 # Print out structure definitions
914 #
915 if len(self['symbols']['struct']) > 0:
916 contents += "\n" + "\n\n".join(self['symbols']['struct']) + "\n"
917 contents += "\n"
918 #
919 # Print out the data tables
920 #
921 for sym in self.tables():
922 columns = self.columns(sym)
923 for k in range(self.size(sym)):
924 line = list()
925 line.append(sym)
926 for col in columns:
927 if self.isarray(sym, col):
928 datum = '{' + ' '.join([self.protect(x) for x in self[sym][col][k]]) + '}'
929 else:
930 datum = self.protect(self[sym][col][k])
931 line.append(datum)
932 contents += "{0}\n".format(' '.join(line))
933 #
934 # Actually write the data to file
935 #
936 if os.access(newfile, os.F_OK):
937 print("{0} exists, aborting write!".format(newfile))
938 print("For reference, here's what would have been written:")
939 print(contents)
940 else:
941 with open(newfile, 'w') as f:
942 f.write(contents)
943 self._contents = contents
944 self.filename = newfile
945 self._parse()
946 return
947 #
948 #
949 #
951 def append(self, datatable):
952 """Appends data to an existing FTCL/yanny file.
954 Tries as much as possible to preserve the ordering & format of the
955 original file. The datatable should adhere to the format of the
956 yanny object, but it is not necessary to reproduce the 'symbols'
957 dictionary. It will not try to append data to a file that does not
958 exist. If the append is successful, the data in the object will be updated.
960 Parameters
961 ----------
962 datatable : dict
963 The data to append.
964 """
965 if len(self.filename) == 0:
966 raise ValueError(
967 "No filename is set for this object. Use the filename attribute to set the filename!")
968 if type(datatable) != dict:
969 raise ValueError("Data to append is not of the correct type. Use a dict!")
970 timestamp = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')
971 contents = ''
972 #
973 # Print any key/value pairs
974 #
975 for key in datatable:
976 if key.upper() in self.tables() or key == 'symbols':
977 continue
978 contents += "{0} {1}\n".format(key, datatable[key])
979 #
980 # Print out the data tables
981 #
982 for sym in self.tables():
983 if sym.lower() in datatable:
984 datasym = sym.lower()
985 else:
986 datasym = sym
987 if datasym in datatable:
988 columns = self.columns(sym)
989 for k in range(len(datatable[datasym][columns[0]])):
990 line = list()
991 line.append(sym)
992 for col in columns:
993 if self.isarray(sym, col):
994 datum = '{' + ' '.join([self.protect(x)
995 for x in datatable[datasym][col][k]]) + '}'
996 else:
997 datum = self.protect(datatable[datasym][col][k])
998 line.append(datum)
999 contents += "{0}\n".format(' '.join(line))
1000 #
1001 # Actually write the data to file
1002 #
1003 if len(contents) > 0:
1004 contents = ("# Appended by yanny.py at {0}.\n".format(timestamp)) + contents
1005 if os.access(self.filename, os.W_OK):
1006 with open(self.filename, 'a') as f:
1007 f.write(contents)
1008 self._contents += contents
1009 self._parse()
1010 else:
1011 print("{0} does not exist, aborting append!".format(self.filename))
1012 print("For reference, here's what would have been written:")
1013 print(contents)
1014 else:
1015 print("Nothing to be appended!")
1016 return
1017 #
1018 #
1019 #
1021 def _parse(self):
1022 r"""Converts text into tables that users can use.
1024 This method is for use internally by the yanny object. It is not
1025 meant to be called by users.
1027 Parsing proceeds in this order:
1029 #. Lines that end with a backslash character ``\`` are reattached
1030 to following lines.
1031 #. Structure & enum definitions are identified, saved into the
1032 'symbols' dictionary & stripped from the contents.
1033 #. Structure definitions are interpreted.
1034 #. At this point, the remaining lines of the original file can only
1035 contain these things:
1037 * 'blank' lines, including lines that only contain comments
1038 * keyword/value pairs
1039 * structure rows
1041 #. The remaining lines are scanned sequentially.
1043 #. 'Blank' lines are identified & ignored.
1044 #. Whitespace & comments are stripped from non-blank lines.
1045 #. Empty double braces ``{{}}`` are converted into empty double
1046 quotes ``""``.
1047 #. If the first word on a line matches the name of a structure,
1048 the line is broken up into tokens & each token or set of tokens
1049 (for arrays) is converted to the appropriate Python type.
1050 #. If the first word on a line does not match the name of a
1051 structure, it must be a keyword, so this line is interpreted
1052 as a keyword/value pair. No further processing is done to
1053 the value.
1055 #. At the conclusion of parsing, if ``self.np`` is ``True``, the
1056 structures are converted into NumPy record arrays.
1057 """
1058 #
1059 # there are five things we might find
1060 # 1. 'blank' lines including comments
1061 # 2. keyword/value pairs (which may have trailing comments)
1062 # 3. enumeration definitions
1063 # 4. structure definitions
1064 # 5. data
1065 #
1066 lines = self._contents
1067 #
1068 # Reattach lines ending with \
1069 #
1070 lines = re.sub(r'\\\s*\n', ' ', lines)
1071 #
1072 # Find structure & enumeration definitions & strip them out
1073 #
1074 self['symbols']['struct'] = re.findall(r'typedef\s+struct\s*\{[^}]+\}\s*\w+\s*;', lines)
1075 self['symbols']['enum'] = re.findall(r'typedef\s+enum\s*\{[^}]+\}\s*\w+\s*;', lines)
1076 lines = re.sub(r'typedef\s+struct\s*\{[^}]+\}\s*\w+\s*;', '', lines)
1077 lines = re.sub(r'typedef\s+enum\s*\{[^}]+\}\s*\w+\s*;', '', lines)
1078 #
1079 # Interpret the structure definitions
1080 #
1081 typedefre = re.compile(r'typedef\s+struct\s*\{([^}]+)\}\s*(\w*)\s*;')
1082 for typedef in self['symbols']['struct']:
1083 typedefm = typedefre.search(typedef)
1084 (definition, name) = typedefm.groups()
1085 self[name.upper()] = dict()
1086 self['symbols'][name.upper()] = list()
1087 definitions = re.findall(r'\S+\s+\S+;', definition)
1088 for d in definitions:
1089 d = d.replace(';', '')
1090 (datatype, column) = re.split(r'\s+', d)
1091 column = re.sub(r'[[<].*[]>]$', '', column)
1092 self['symbols'][name.upper()].append(column)
1093 self[name.upper()][column] = list()
1094 comments = re.compile(r'^\s*#') # Remove lines containing only comments
1095 blanks = re.compile(r'^\s*$') # Remove lines containing only whitespace
1096 #
1097 # Remove trailing comments, but not if they are enclosed in quotes.
1098 #
1099 # trailing_comments = re.compile(r'\s*\#.*$')
1100 # trailing_comments = re.compile(r'\s*\#[^"]+$')
1101 double_braces = re.compile(r'\{\s*\{\s*\}\s*\}') # Double empty braces get replaced with empty quotes
1102 if len(lines) > 0:
1103 for line in lines.split('\n'):
1104 if self.debug:
1105 print(line)
1106 if len(line) == 0:
1107 continue
1108 if comments.search(line) is not None:
1109 continue
1110 if blanks.search(line) is not None:
1111 continue
1112 #
1113 # Remove leading & trailing blanks & comments
1114 #
1115 line = line.strip()
1116 line = self.trailing_comment(line)
1117 # line = trailing_comments.sub('',line)
1118 line = double_braces.sub('""', line)
1119 #
1120 # Now if the first word on the line does not match a
1121 # structure definition it is a keyword/value pair
1122 #
1123 (key, value) = self.get_token(line)
1124 uckey = key.upper()
1125 if uckey in self['symbols']:
1126 #
1127 # Structure data
1128 #
1129 for column in self['symbols'][uckey]:
1130 if len(value) > 0 and blanks.search(value) is None:
1131 (data, value) = self.get_token(value)
1132 if self.isarray(uckey, column):
1133 #
1134 # An array value
1135 # if it's character data, it won't be
1136 # delimited by {} unless it is a multidimensional
1137 # string array. It may or may not be delimited
1138 # by double quotes
1139 #
1140 # Note, we're assuming here that the only
1141 # multidimensional arrays are string arrays
1142 #
1143 arraydata = list()
1144 while len(data) > 0:
1145 (token, data) = self.get_token(data)
1146 arraydata.append(token)
1147 self[uckey][column].append(
1148 self.convert(uckey, column, arraydata))
1149 else:
1150 #
1151 # A single value
1152 #
1153 self[uckey][column].append(
1154 self.convert(uckey, column, data))
1155 else:
1156 break
1157 else:
1158 #
1159 # Keyword/value pair
1160 #
1161 self[key] = value
1162 #
1163 # If self.np is True, convert tables into NumPy record arrays
1164 #
1165 if self.np:
1166 for t in self.tables():
1167 record = numpy.zeros((self.size(t),), dtype=self.dtype(t))
1168 for c in self.columns(t):
1169 record[c] = self[t][c]
1170 self[t] = record
1171 return