Source code for desidatamodel.stub

# Licensed under a 3-clause BSD style license - see LICENSE.rst
# -*- coding: utf-8 -*-
"""
==================
desidatamodel.stub
==================

Generate data model files from FITS files.
"""
import os
import re
from html import escape
from pathlib import Path
import importlib.resources as ir
# from pkg_resources import resource_filename
from astropy.io import fits
from astropy.io.fits.card import Undefined
from astropy.table import Table

from desiutil.log import log, DEBUG

from . import DataModelError
from .unit import validate_unit
#
# This is a template.
#
rst = """{titlehighlight}
{title}
{titlehighlight}

:Summary: *This section should be filled in with a high-level description of
    this file. In general, you should remove or replace the emphasized text
    (\\*this text is emphasized\\*) in this document.*
:Naming Convention: ``{filename}``, where ... *Give a human readable
    description of the filename, e.g. ``blat-{{EXPID}}`` where ``{{EXPID}}``
    is the 8-digit exposure ID.*
:Regex: ``{filename}`` *Give a regular expression for this filename.
    For example, a six-digit number would correspond to ``[0-9]{{6}}``.*
:File Type: {filetype}, {filesize}  *This section gives the type of the file
    and its approximate size.*

Contents
========

{contents_table}

FITS Header Units
=================

{hdu_sections}

Notes and Examples
==================

*Add notes and examples here.  You can also create links to example files.*
"""


[docs] class Stub(object): """This object contains metadata about a file and methods to print that metadata. Parameters ---------- filename : file path, file-like object or :class:`~astropy.io.fits.HDUList` Data file to convert to a data model file. error : :class:`bool`, optional If ``True``, failure to extract certain required metadata raises an exception. Attributes ---------- columns_header : :func:`tuple` The header of a table summarizing the columns of a BINTABLE HDU. contents_header : :func:`tuple` The header of a table summarizing the HDUs. filename : :class:`str` Name of the file. headers : :class:`list` The HDUs read from the file. keywords_header : :func:`tuple` The header of a table listing interesting FITS keywords. nhdr : :class:`int` Number of HDUs. """ contents_header = ('Number', 'EXTNAME', 'Type', 'Contents') keywords_header = ('KEY', 'Example Value', 'Type', 'Comment') columns_header = ('Name', 'Type', 'Units', 'Description') def __init__(self, filename, description_file=None, error=False): self.filename = None self.description_file = description_file self.error = error self.headers = list() self.description_file = description_file if isinstance(filename, (list, fits.HDUList)): self.nhdr = len(filename) for k in range(self.nhdr): self.headers.append(filename[k].header) else: with fits.open(filename, disable_image_compression=True) as fx: self.nhdr = len(fx) for k in range(self.nhdr): self.headers.append(fx[k].header) if isinstance(filename, (str,)): self.filename = filename elif isinstance(filename, (Path,)): self.filename = str(filename) self._basef = None self._modelname = None self._filesize = None self._filetype = None self._hdumeta = None self._hduname = None self._contents = None return @property def basef(self): """Base name of the file. """ if self._basef is None: self._basef = os.path.basename(self.filename) return self._basef @property def modelname(self): """Name to use for the data model file. """ if self._modelname is None: try: self._modelname = self.basef[0:self.basef.index('-')] except ValueError: self._modelname = self.basef[0:self.basef.index('.')] return self._modelname @property def filetype(self): """Type of file. Assumes FITS (for now) unless overridden in a subclass. """ if self._filetype is None: self._filetype = 'FITS' return self._filetype @property def filesize(self): """Size of the file in human-readable format. """ if self._filesize is None: self._filesize = file_size(self.filename) return self._filesize @property def hdumeta(self): """Metadata associated with each HDU. """ if self._hdumeta is None: self._hdumeta = list() for k in range(self.nhdr): meta = dict() meta['title'] = self.hduname.format(k) meta['extname'] = self.contents[k+1][1] meta['keywords'] = extract_keywords(self.headers[k]) if 'XTENSION' in self.headers[k]: meta['extension'] = self.headers[k]['XTENSION'].strip() if meta['extension'] == 'IMAGE': meta['format'] = self.image_format(self.headers[k]) elif meta['extension'] == 'BINTABLE': try: meta['format'] = self.columns(k, self.error) except DataModelError: meta['format'] = self.image_format(self.headers[k]) try: meta['extension'] = self.headers[k]['ZTENSION'].strip() except KeyError: try: i = self.headers[k]['ZIMAGE'] if i: meta['extension'] = 'IMAGE' except KeyError: log.warning("Possible malformed compressed data in HDU %d of %s.", k, self.filename) else: w = ("Unknown extension type: " + "{extension}.").format(**meta) meta['format'] = w log.warning(w) else: meta['extension'] = 'IMAGE' meta['format'] = self.image_format(self.headers[k]) self._hdumeta.append(meta) return self._hdumeta @property def hduname(self): """Format of HDU names. """ if self._hduname is None: if self.nhdr > 99: self._hduname = 'HDU{0:03d}' elif self.nhdr > 9: self._hduname = 'HDU{0:02d}' else: self._hduname = 'HDU{0:1d}' return self._hduname @property def contents(self): """A table summarizing the HDUs. """ if self._contents is None: self._contents = list() self._contents.append(self.contents_header) for k in range(self.nhdr): if 'EXTNAME' in self.headers[k]: extname = self.headers[k]['EXTNAME'].strip() else: extname = '' # # Don't complain about missing EXTNAME on primary, empty HDUs. # See https://github.com/desihub/desidatamodel/issues/69 # if k > 0: log.warning("HDU%d has no EXTNAME set!", k) if k > 0: if 'ZTENSION' in self.headers[k]: exttype = self.headers[k]['ZTENSION'].strip() else: exttype = self.headers[k]['XTENSION'].strip() else: exttype = 'IMAGE' self._contents.append((self.hduname.format(k)+'_', extname, exttype, '*Brief Description*')) return self._contents
[docs] def columns(self, hdu, error=False): """Describe the columns of a BINTABLE HDU. Parameters ---------- hdu : :class:`int` The HDU number (zero-indexed). error : :class:`bool`, optional If ``True``, failure to extract certain required metadata raises an exception. Returns ------- :class:`list` The rows of the table. Raises ------ :exc:`~desidatamodel.DataModelError` If the BINTABLE is actually a compressed image. :exc:`ValueError` If `error` and a ``TUNIT`` value does not have FITS-standard units. """ hdr = self.headers[hdu] if 'ZBITPIX' in hdr: raise DataModelError("HDU{0:d} is actually a compressed image!".format(hdu)) ncol = hdr['TFIELDS'] c = list() c.append(self.columns_header) if self.description_file is not None: log.info('Using column description and units from %s', self.description_file) desc_data = read_column_descriptions(self.description_file) else: desc_data = None for j in range(ncol): # Get units from header if possible jj = '{0:d}'.format(j+1) name = hdr['TTYPE'+jj].strip() ttype = fits_column_format(hdr['TFORM'+jj].strip()) tunit = 'TUNIT'+jj if tunit in hdr: units = hdr[tunit].strip() bad_unit = validate_unit(units, error=error) if bad_unit: log.debug("Non-standard (but acceptable) unit %s detected for column %s in HDU %d of %s.", bad_unit, j, hdu, self.filename) else: units = '' # Check TCOMMnn keyword, otherwise use TTYPE comment # for description. commkey = 'TCOMM'+jj if commkey in hdr: description = escape(hdr[commkey].strip()) else: description = escape(hdr.comments['TTYPE'+jj]) # For both UNITS and DESCRIPTION, column description file trumps # fits header (in case fits header was wrong and we need to # override it), but at least print warning if they don't match if desc_data is not None and name in desc_data: desc_units = desc_data[name]['Units'] if units != '' and units != desc_units: log.warning('Overriding header units "%s" with units "%s" from %s', units, desc_units, self.description_file) bad_unit = validate_unit(desc_units, error=error) if bad_unit: log.debug('Non-standard (but acceptable) units "%s" detected for column %s in %s', bad_unit, name, self.description_file) units = desc_units descfile_description = escape(desc_data[name]['Description']) if description != '' and description != descfile_description: log.warning('Overriding header description "%s" with description "%s" from %s', description, descfile_description, self.description_file) description = descfile_description c.append((name, ttype, units, description)) return c
[docs] def keywords(self, hdu): """A table summarizing the interesting keywords in a particular HDU. Parameters ---------- hdu : :class:`int` The HDU number (zero-indexed). Returns ------- :class:`list` The rows of the table. """ return [self.keywords_header] + self.hdumeta[hdu]['keywords']
[docs] def colsizes(self, table): """Compute the size (number of characters) of each column in a table. Parameters ---------- table : :class:`list` A list representing a table. Returns ------- :class:`list` The size of each column in the table. """ return [max(map(len, col)) for col in zip(*table)]
[docs] def highlight(self, sizes): """Return reStructuredText-compatible table highlights. Parameters ---------- sizes : :class:`list` The width of each column. Returns ------- :class:`str` A highlight string. """ return ' '.join(['='*k for k in sizes])
[docs] def colformat(self, sizes): """Return a string ready to be formatted. Parameters ---------- sizes : :class:`list` The width of each column. Returns ------- :class:`str` A string with format characters. """ return ' '.join(['{{{0:d}:{1:d}}}'.format(i, s) for i, s in enumerate(sizes)])
[docs] def format_table(self, table, indent=False): """Convert tabular data into reStructuredText-compatible string. This function assumes that `table` already has a header as the first row. Parameters ---------- table : :class:`list` A data table. indent : :class:`bool` If ``True``, indent the table for compatibility with collapsible tables. Returns ------- :class:`list` A list of strings that can be joined. """ spaces = '' if indent: spaces = ' ' sizes = self.colsizes(table) highlight = spaces + self.highlight(sizes) colformat = spaces + self.colformat(sizes) t = [highlight] for k in range(len(table)): t.append(colformat.format(*table[k]).rstrip()) if k == 0: t.append(highlight) t.append(highlight) return t
[docs] def section(self, hdu): """A string describing an HDU. Parameters ---------- hdu : :class:`int` The HDU number (zero-indexed). Returns ------- :class:`list` A list of strings that can be joined. """ s = list() # # Section title & summary. # s.append(self.hdumeta[hdu]['title']) s.append('-'*len(self.hdumeta[hdu]['title'])) s.append('') if self.hdumeta[hdu]['extname']: s.append('EXTNAME = {extname}'.format(**self.hdumeta[hdu])) s.append('') s.append('*Summarize the contents of this HDU.*') s.append('') # # Interesting keywords. # if len(self.hdumeta[hdu]['keywords']) > 0: s.append('Required Header Keywords') s.append('~~~~~~~~~~~~~~~~~~~~~~~~') s.append('') s.append('.. collapse:: Required Header Keywords Table') s.append('') s.append(' .. rst-class:: keywords') s.append('') s += self.format_table(self.keywords(hdu), indent=True) else: s.append('This HDU has no non-standard required keywords.') s.append('') # # Data payload # if self.hdumeta[hdu]['extension'] == 'BINTABLE': s.append('Required Data Table Columns') s.append('~~~~~~~~~~~~~~~~~~~~~~~~~~~') s.append('') s.append('.. rst-class:: columns') s.append('') s += self.format_table(self.hdumeta[hdu]['format']) else: s.append(self.hdumeta[hdu]['format']) s.append('') return s
def __str__(self): kw = dict() kw['title'] = self.modelname kw['titlehighlight'] = '='*len(kw['title']) kw['filename'] = self.basef kw['filetype'] = self.filetype kw['filesize'] = self.filesize kw['contents_table'] = ("\n".join(self.format_table(self.contents)) + "\n") hdu_sections = list() for k in range(self.nhdr): hdu_sections += self.section(k) kw['hdu_sections'] = "\n".join(hdu_sections) return rst.format(**kw)
[docs] def image_format(self, hdr): """Obtain format of an image HDU. Parameters ---------- hdr : :class:`~astropy.io.fits.Header` The header to parse. Returns ------- :class:`str` A string describing the image format. Raises ------ :exc:`~desidatamodel.DataModelError` If ``self.error`` is set a `BUNIT` header with units that do not follow the FITS standard is detected. """ n = hdr['NAXIS'] if n == 0: return 'Empty HDU.' bitmap = {8: 'char', 16: 'int16', 32: 'int32', 64: 'int64', -32: 'float32', -64: 'float64'} if 'ZBITPIX' in hdr: n = hdr['ZNAXIS'] dims = [str(hdr['ZNAXIS{0:d}'.format(k+1)]) for k in range(n)] try: datatype = bitmap[hdr['ZBITPIX']] + ' (compressed)' except KeyError: datatype = 'BITPIX={0} (compressed)'.format(hdr['ZBITPIX']) else: dims = [str(hdr['NAXIS{0:d}'.format(k+1)]) for k in range(n)] try: datatype = bitmap[hdr['BITPIX']] except KeyError: datatype = 'BITPIX={}'.format(hdr['BITPIX']) if 'BUNIT' in hdr: log.debug("BUNIT = '%s'", hdr['BUNIT']) bad_unit = validate_unit(hdr['BUNIT'], error=self.error) if bad_unit: log.debug("Non-standard (but acceptable) unit %s detected in %s.", bad_unit, self.filename) return 'Data: FITS image [{0}, {1}]'.format(datatype, 'x'.join(dims))
[docs] def extrakey(key): """Return True if key is not a boring standard FITS keyword. To make the data model more human readable, we don't overwhelm the output with required keywords which are required by the FITS standard anyway, or cases where the number of headers might change over time. This list isn't exhaustive. Parameters ---------- key : :class:`str` A FITS keyword. Returns ------- :class:`bool` ``True`` if the keyword is not boring. Examples -------- >>> extrakey('SIMPLE') False >>> extrakey('DEPNAM01') False >>> extrakey('BZERO') True """ from re import match # don't drop NAXIS1 and NAXIS2 since we want to document which is which if key in ('BITPIX', 'NAXIS', 'PCOUNT', 'GCOUNT', 'TFIELDS', 'XTENSION', 'SIMPLE', 'EXTEND', 'COMMENT', 'HISTORY', 'EXTNAME', ''): return False # Table-specific keywords if match(r'T(TYPE|FORM|UNIT|COMM|DIM|NULL)\d+', key) is not None: return False # Compression-specific keywords if match(r'Z(SIMPLE|IMAGE|TENSION|BITPIX|NAXIS|NAXIS1|NAXIS2|PCOUNT|GCOUNT|TILE1|TILE2|CMPTYPE|NAME1|VAL1|NAME2|VAL2|HECKSUM|DATASUM)', key) is not None: return False # Dependency list if match(r'DEP(NAM|VER)\d+', key) is not None: return False return True
[docs] def file_size(filename): """Determine file size and return string with human readable size format. Adapted from stackoverflow answers for human readable size formatting. Parameters ---------- filename : :class:`str` A string containing a filename. Returns ------- :class:`str` A human-readable file size. Examples -------- >>> file_size('one-gb-file.dat') '1 GB' """ n = os.path.getsize(filename) for unit in ['bytes', 'KB', 'MB', 'GB']: if n < 1024: return "{0:d} {1}".format(int(n), unit) else: n /= 1024.0 return "{0:3.1f} {1}".format(n, 'TB')
[docs] def fits_column_format(format): """Convert a FITS column format to a human-readable form. Parameters ---------- format : :class:`str` A FITS-style format string. Returns ------- :class:`str` A human-readable version of the format string. Examples -------- >>> fits_column_format('A') 'char[1]' >>> fits_column_format('J') 'int32' >>> fits_column_format('12E') 'float32[12]' """ m = re.match(r'(0|1|)[PQ]([A-Z])\([0-9]+\)', format) if m is not None: t = m.groups() cmap = {'A': 'char stream', 'B': '8-bit stream', 'I': '16-bit stream', 'J': '32-bit stream', 'K': '64-bit stream'} return cmap[t[1]] fitstype = format[-1] if fitstype == 'A' and len(format) == 1: return 'char[1]' fmap = {'A': 'char', 'I': 'int16', 'J': 'int32', 'K': 'int64', 'E': 'float32', 'D': 'float64', 'B': 'binary', 'L': 'logical'} if len(format) > 1: return fmap[fitstype] + '[' + format[0:len(format)-1] + ']' else: return fmap[fitstype]
[docs] def extract_keywords(hdr): """Extract interesting keywords from a FITS header. Parameters ---------- hdr : :class:`~astropy.io.fits.Header` The header to parse. Returns ------- :class:`list` A list of tuples containing the metadata of interesting keywords. """ keywords = list() for key in hdr: if extrakey(key): # Escape &, <, >, in strings, but don't choke on int/float value = hdr[key] if isinstance(value, bool): ktype = 'bool' value = ('F', 'T')[int(value)] if isinstance(value, (str,)): value = escape(value) if value == 'T' or value == 'F': ktype = 'bool' else: ktype = 'str' if isinstance(value, int): value = str(value) ktype = 'int' if isinstance(value, float): value = str(value) ktype = 'float' if isinstance(value, (Undefined, type(None))): log.error("Empty header keyword %s detected! This violates the FITS standard!", key) value = 'None' ktype = 'Unknown' if key.endswith('_'): key = key[0:len(key)-1] + '\\_' try: if value.endswith('_'): value = value[0:len(value)-1] + '\\_' except AttributeError: ktype = 'Unknown' log.warning("Raised AttributeError on %s = %s.", key, value) keywords.append((key, value, ktype, escape(hdr.comments[key]))) return keywords
[docs] def read_column_descriptions(filename): """Read column descriptions csv file and return dictionary Args: filename (str): csv filename with columns NAME,TYPE,UNITS,DESCRIPTION Returns: coldesc_dict[NAME] = dict with keys TYPE, UNITS, DESCRIPTION """ # Use python csv library instead of astropy Table to avoid # unwanted masking of blank strings import csv with open(filename) as fp: header = fp.readline().strip() correct_header = 'Name,Type,Units,Description' if header != correct_header: raise ValueError(f'{filename} header {header} should be {correct_header}') coldesc = dict() csvreader = csv.reader(fp) for row in csvreader: name, dtype, units, desc = row coldesc[name] = dict(Type=dtype, Units=units, Description=desc) return coldesc
[docs] def main(): """Entry point for the generate_model script. Returns ------- :class:`int` An integer suitable for passing to :func:`sys.exit`. """ from sys import argv from argparse import ArgumentParser try: from astropy.io import fits except ImportError: log.critical("This script requires astropy.io.fits, " + "available in your " + "favourite Python distribution.") return 1 desc = """Generate an DESI data model stub for a given FITS file. You will still need to hand edit the file to add descriptions, etc., but it gives you a good starting point in the correct format. """ parser = ArgumentParser(description=desc, prog=os.path.basename(argv[0])) parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help='Set log level to DEBUG.') parser.add_argument('filename', help='A FITS file.', metavar='FILE', nargs='+') parser.add_argument("--column_descriptions", help="CSV file with column info Name,Type,Units,Description; " "default=%(default)s", default=(ir.files('desidatamodel') / 'data' / 'column_descriptions.csv')) options = parser.parse_args() if options.verbose: log.setLevel(DEBUG) for f in options.filename: stub = Stub(f, description_file=options.column_descriptions) data = str(stub) # # Write the file # with open("{0}.rst".format(stub.modelname), 'w') as m: m.write(data) return 0