Source code for vdat.libvdat.symlink

# Virus Data Analysis Tool: a data reduction GUI for HETDEX/VIRUS data
# Copyright (C) 2015, 2016, 2017, 2018  "The HETDEX collaboration"
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""Symlink raw files into a redux directory

Multiprocessing is disabled for the following reasons:

* Calibration and science frames need locking to correctly deal with grouping
  and renaming if multiple shots have the same name from same objects;
* a lot of ``peewee.OperationalError: database is locked`` errors are raised
* symlinking is going to run only now and then and is always going to be much
  faster that any of the reduction steps
* if the symlink is run from the gui, there is no risk that interferes with
  reduction steps running
"""
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

import datetime as dt
import logging
import os
import sys
import re

from astropy.io import fits
import colorama
import peewee
import six

from pyhetdex.doc import docstring
import pyhetdex.tools.files.file_tools as ft
import pyhetdex.tools.processes as proc
from pyhetdex.tools import six_ext
from pyhetdex.tools.db_helpers import SQLITE_MAX_VARIABLE_NUMBER

import vdat.database as db
import vdat.utilities as vutil
import vdat.config as confp

colorama.init(autoreset=True)

# date formats
FMT_DATE_DIR = "%Y%m%d_%H%M%S"
"""Format for converting a :class:`~datetime.datetime` instance into as string
used as directory name"""

WORKER_NAME = 'vdat_symlink'


[docs]def symlink(log_message):
    """Symlink shots in the directory 'path' to an output redux directory.
    Format of redux directory follows
    `issue 820 <https://luna.mpe.mpg.de/redmine/issues/820>`

    Parameters
    ----------
    log_message : string
        Message to log as info before starting
    """
    log = logging.getLogger('logger')
    conf = confp.get_config('main')
    section = 'general'
    log.info(log_message)

    # get the relevant information
    raw_dir = [os.path.abspath(r) for r in conf.get_list(section, 'rawdir')]
    redux_dir = os.path.abspath(conf[section]['redux_dir'])

    conf[section]['rawdir'] = ','.join(raw_dir)
    conf[section]['redux_dir'] = redux_dir

    rawdir_exists = any([os.path.exists(r) for r in raw_dir])

    if not os.path.exists(redux_dir) and not rawdir_exists:
        log.critical("Neither the raw directory '%s' nor the redux directory"
                     " '%s' exist. This is a problem." " Aborting.", raw_dir,
                     redux_dir)
        raise vutil.VDATDirError("The raw and redux directories cannot be"
                                 " found: VDAT cannot run.")

    # create the redux directory if needed
    _mkdir(redux_dir, log)
    # start the database
    db.init(redux_dir)

    # scan the redux directory to build/update the database
    n_vdat_dirs, n_vdat_exps = _scan_dirs(redux_dir)
    log.info('Found and loaded %d already symlinked directories, for a total'
             ' of %d exposures', n_vdat_dirs, n_vdat_exps)

    if rawdir_exists:
        do_symlink(raw_dir, redux_dir)
    else:
        conf[section]['rawdir'] = ''
        log.warning("None of the raw/night directories '%s' exist, but I have"
                    " found the redux '%s'. Skip the symlinking.", raw_dir,
                    redux_dir)

    # create the zro and calibration references in the database
    log.info("Create the references in the database")
    db_create_references()


[docs]@docstring.format_docstring(tmp=vutil.SHOT_FILE, exp=vutil.EXPS_FILE)
def _scan_dirs(redux_dir):
    """Scan the redux directories and fill the database with entries from
    existing '{tmp}' and '{exp}' files. It also updates the '{tmp}' file if the
    redux directory has changed.

    Parameters
    ----------
    redux_dir : string
        name of the redux directory

    Returns
    -------
    int, int
        Number of VDATDir and VDATExposures entries added to the database
    """
    # create the database entries
    shots, exps = [], []
    for shot, exp in vutil.collect_metadata(redux_dir, skip_empty=True,
                                            repair_redux=True,
                                            merge_shot=True):
        shots.extend(shot)
        exps.extend(exp)
    if not shots:
        # reduction directory empty
        return 0, 0
    with db.connect():
        max_args = SQLITE_MAX_VARIABLE_NUMBER

        try:
            size = (max_args // len(shots[0])) - 1
            for i in range(0, len(shots), size):
                insert_query_dir = db.VDATDir.insert_many(shots[i:i+size])
                insert_query_dir.execute()

            size = (max_args // len(exps[0])) - 1
            for i in range(0, len(exps), size):
                insert_query_exp = db.VDATExposures.insert_many(exps[i:i+size])
                insert_query_exp.execute()
        except peewee.IntegrityError as e:
            if 'UNIQUE' in str(e):
                msg = ('Some of the database entries are repeated and this'
                       ' should not happen. This can happen when copying by'
                       ' hand a redux directory. Please use the ``clone``'
                       ' functionality to do this. To identify the offending'
                       ' directory you can run ``vdat_db check -v``.'
                       ' The directory should be removed or the metadata'
                       ' updated.')
            elif 'NOT NULL' in str(e):
                msg = ('The value "{}" is missing, run'
                       ' ``vdat_db update`` to try repairing the meta data.'
                       ' If the reparation fails, the meta data files are'
                       ' moved to a backup and vdat should not fail the next'
                       ' time you run it.'.format(str(e).split()[-1]))
            else:  # pragma: no cover
                raise

            print(colorama.Fore.RED + colorama.Style.BRIGHT + msg)
            sys.exit(1)
        except (KeyError, AttributeError) as e:
            print(colorama.Fore.RED + colorama.Style.BRIGHT +
                  'Some of the meta data files contain extra key(s).\b'
                  'The error message says "{e}".\n'
                  'You can run ``vdat_db check`` to inspect the meta'
                  ' data and, if there are not other issues, should be'
                  ' possible repair them with the'
                  ' ``vdat_db update`` command'.format(e=e))
            sys.exit(1)

    return len(shots), len(exps)


[docs]def do_symlink(raw_dir, redux_dir):
    """Run the symlinking from the raw to the redux directory

    Parameters
    ----------
    raw_dir : list of strings
        name of the raw or the night directories
    redux_dir : string
        name of the raw and the redux directory
    """
    log = logging.getLogger('logger')
    conf = confp.get_config('main', section='symlink')
    # Night and shot names
    wildcard_night = conf['night']
    is_night_regex = conf.getboolean('is_night_regex', False)
    virus_instrument = conf.get('virus_instrument', 'virus')
    wildcard_shot = conf['virus_shot']
    is_virus_shot_regex = conf.getboolean('is_virus_shot_regex', False)

    log.info("Symlinking fits files from %s to %s", raw_dir, redux_dir)

    # get the night directories
    gen_conf = confp.get_config('main', section='general')
    is_rawdir_night = gen_conf.getboolean('is_rawdir_night', False)

    if is_rawdir_night:
        nights = raw_dir
    else:
        nights = sum((list(ft.scan_dirs(r, matches=wildcard_night,
                                        recursive=False,
                                        is_matches_regex=is_night_regex))
                     for r in raw_dir), [])
    nights.sort()

    worker = proc.get_worker(name=WORKER_NAME,
                             result_class=proc.DeferredResult,
                             multiprocessing=False)

    # list containing tuples with the job and the shot directory to symlink
    with worker:
        for night in nights:
            log.info("Symlinking night %s...", night)

            redux_night = night.replace(os.path.dirname(night), redux_dir)

            # join the night with the virus subdirectory, before searching for
            # shots
            night = os.path.join(night, virus_instrument)

            jobs = []
            # loop through the directories in the night
            shots = sorted(ft.scan_dirs(night, matches=wildcard_shot,
                                        recursive=False,
                                        is_matches_regex=is_virus_shot_regex))
            if not shots:
                msg = ("No shot found in night %s. Is the instrument name '%s'"
                       " correct?")
                log.warning(msg, night, virus_instrument)
                continue

            # create the redux night directory if needed
            _mkdir(redux_night, log)

            for shot in shots:
                jobs.append([worker(_symlink_shot, shot, redux_night), shot])

            for (job, shot) in jobs:
                try:
                    job.get()
                except (vutil.VDATSymlinkError, vutil.VDATDatabaseError) as e:
                    log.error("The symlinking of shot '%s' has been skipped"
                              " due to the known error: %s", shot, str(e))
                except Exception:
                    log.critical("The symlinking of shot '%s' has been skipped"
                                 " due to an unknown error", shot,
                                 exc_info=True)

        n_complete, n_error, _ = worker.jobs_stat()

        log.info("Symlinking of %d shots is complete", n_complete)
        if n_error > 0:
            log.warning("There where %d errors.", n_error)
        # clean up the job list
        worker.clear_jobs()

    proc.remove_worker(name=WORKER_NAME)


[docs]def _symlink_shot(shot_dir, redux_dir):
    """Create the redux directory for the shot and symlink all the files from
    the shot directory

    Parameters
    ----------
    shot_dir : string
        name of the shot directory
    redux_dir : string
        name of the directory where the new directory and symlink must go

    Raises
    ------
    :class:`~vdat.utilities.VDATFitsTypeError`
        if the image type or object are not consistent or unknown
    """
    log = logging.getLogger('logger')
    conf = confp.get_config('main', section='symlink')

    shot = os.path.split(shot_dir)[1]

    log.debug("Symlinking shot %s", shot)

    # get all the virus fits files
    # TODO: for now it uses only the virus files, add also the guide probe and
    # wavefront sensor data
    fits_files = conf['virus_fits_files']
    is_fits_files_regex = conf.getboolean('is_virus_fits_files_regex', False)

    fits_files = list(ft.scan_files(shot_dir, matches=fits_files,
                                    is_matches_regex=is_fits_files_regex))
    if len(fits_files) <= 0:
        log.warning("No files found in shot '%s'. Skipping", shot)
        return

    _type_date = _get_imagetype_datetime(confp.get_config('main'), log,
                                         shot_dir, fits_files)

    image_type, avg_date, avg_date_string = _type_date

    if image_type in conf['cal_types']:
        type_ = 'cal'
    else:
        type_ = image_type

    # Instantiate the entry. The following fields are filled with values that
    # must or might be changed when doing the symlinking
    # * path: left empty for now. Must be filled before the symlinking. The
    #   path can be created using the method VDATDir.make_path
    # * redux_dir, night: name of the redux directory and the night number
    # * name: average timestamp as a string; is the target name of
    #   the directory for the zro and cal frames; must be changed for the
    #   science frames and can be changed in the other cases
    # * type_: type of the images, used to make the full directory path
    # * original_type_, object_: set to image_type; the latter is
    #   changed to the proper object name in _symlink_sci and to the lamp type
    #   in _symlink_cal for the cmp frames
    # * shot: original shot name; for cal if necessary merge two names
    # * timestamp: average time stamp; for cal if necessary average
    # * is_clone: False, should *not* be changed
    _redux_dir, night = os.path.split(redux_dir)
    vdat_dir = db.VDATDir(redux_dir=_redux_dir, night=night,
                          name=avg_date_string,
                          type_=type_, original_type_=image_type,
                          object_=image_type,
                          is_clone=False, shot=shot,
                          timestamp=avg_date, version_f=db.VDATDir.version)

    symlink_type = conf.get(type_ + "_symlink_as", fallback=type_)

    if type_ != 'cal' and symlink_type == 'cal':
        msg = ("It is not possible to treat type '{}' as it it where a 'cal'"
               " type. Aborting the symlinking for shot '{}'".format(type_,
                                                                     shot_dir))
        raise vutil.VDATFitsTypeError(msg)

    if symlink_type == 'sci':
        _symlink_typ = _symlink_sci
    elif symlink_type == 'zro':
        _symlink_typ = _symlink_zro
    elif symlink_type == 'cal':
        _symlink_typ = _symlink_cal
    else:
        msg = ("Object type '{0}' for files in shot '{1}' is unknown."
               " Aborting the symlinking of shot '{1}'".format(image_type,
                                                               shot_dir))
        raise vutil.VDATFitsTypeError(msg)

    vdat_dir = _symlink_typ(fits_files, vdat_dir)

    _save_exposures(shot_dir, fits_files, vdat_dir)

    log.debug("Symlinking shot %s done", shot)


[docs]def _save_exposures(shot_dir, fits_files, vdat_dir):
    """Give the list of files belonging to ``shot`` and the VDATDir database
    entry ``vdat_dir``, takes the first file in each exposure and create the
    necessary :class:`~vdat.database.VDATExposures` entries.

    Parameters
    ----------
    shot_dir : string
        name of the shot directory
    fits_files : list of strings
        symlinked files
    vdat_dir : :class:`vdat.database.VDATDir` instance
        database entry related to the input files
    """
    exps_basename = {}
    for fn in fits_files:
        relative_split = os.path.relpath(fn, start=shot_dir).split(os.path.sep)
        exp = relative_split[0]
        basename = relative_split[-1].split('_')[0]
        exps_basename[exp] = basename

    vdat_exp_dict = {'name': vdat_dir.name,
                     'path': vdat_dir.path,
                     'exptype': vdat_dir.type_,
                     'original_type': vdat_dir.original_type_,
                     'version_f': db.VDATExposures.version,
                     'object_': vdat_dir.object_}

    with db.connect():
        for k, v in six.iteritems(exps_basename):
            vdat_exp_d = vdat_exp_dict.copy()
            vdat_exp_d.update({'expname': k, 'basename': v})
            vdat_exp, created = db.VDATExposures.get_or_create(**vdat_exp_d)

            if created:
                vutil.write_to_exps_file(vdat_dir.path, **vdat_exp_d)


[docs]def _get_imagetype_datetime(conf, log, shot_dir, fnames):
    """Extract the image type and the date from the files

    Parameters
    ----------
    conf: configuration object
        can be either a :class:`~pyhetdex.tools.configuration.ConfigParser` or
        a dictionary(-like) object
    log : logging object
    shot_dir : string
        name of the shot directory
    fnames : list
        file names to scan

    Returns
    -------
    image_type : string
        type of the images
    avg_date : :class:`datetime`
        average timestamp
    avg_date_string : string
        string representation of the average time stamp

    Raises
    ------
    VDATFitsParseError
        if it fails parsing the file names
    VDATFitsTypeError
        if there are more than one image types
    """

    # get the image type and datetime from the file names
    _image_type_pattern = conf.get_list('symlink', 'image_type_pattern')
    image_type_pattern = re.compile(_image_type_pattern[0])
    image_type_repl = _image_type_pattern[1]
    _datetime_pattern = conf.get_list('symlink', 'datetime_pattern')
    datetime_pattern = re.compile(_datetime_pattern[0])
    datetime_repl = _datetime_pattern[1]

    image_type, datetime = set(), set()
    for f in fnames:
        _image_type, _n_sub_it = image_type_pattern.subn(image_type_repl, f)
        _datetime, _n_sub_dt = datetime_pattern.subn(datetime_repl, f)
        if _n_sub_it != 1 or _n_sub_dt != 1:
            msg = ('Failed extracting the image type and the date time from'
                   ' the file "{}"; obtained image type: {}, datetime: {}')
            msg = msg.format(f, _image_type, _datetime)
            raise vutil.VDATFitsParseError(msg)

        image_type.add(_image_type)
        datetime.add(_datetime)

    # check that all the types are the same
    if len(image_type) != 1:
        msg = ("Not all the fits files in the shot '{}' have the same image"
               " type; instead found: '{}'")
        msg = msg.format(shot_dir, image_type)
        raise vutil.VDATFitsTypeError(msg)
    image_type = list(image_type)[0]

    infmt = conf['symlink']['datetime_fmt']
    avg_date, avg_date_string = _average_timestamps(datetime, infmt)

    return image_type, avg_date, avg_date_string


[docs]def _save_and_symlink(log, fits_files, vdat_dir, write_shot_file,
                      append_to_shot_file, vdat_dir_shot_file=None):
    """Fail safe symlinking:

    * add ``vdat_dir`` to the database; if it fails, abort
    * symlink the ``fits_files`` to ``vdat_dir.path``; if it fails removes the
      symlinked files and ``vdat_dir`` from the database
    * add ``vdat_dir`` to the shot file; if it fails undo the previous steps

    Parameters
    ----------
    log : logging object
    fits_files : list of strings
        as in the input
    vdat_dir : :class:`vdat.database.VDATDir` instance
        database entry to save
    write_shot_file, append_to_shot_file : bool
        whether the entry needs to be written into the shot file and, in such
        case, whether it must be appended
    vdat_dir_shot_file : :class:`vdat.database.VDATDir` instance
        if given this entry, not ``vdat_dir``, is added to the shot file
    """
    target_dir = vdat_dir.path
    # try to add the entry in the database
    with db.connect(), db.database.atomic() as txn:
        # try to perform the symlinking
        symlinked_list = []
        bkp_shot_file = None
        try:
            _mkdir(target_dir, log)
            symlinked_list = _symlink_file(fits_files, target_dir, log)
        except Exception as e:
            msg = ("The symlinking to path '{}' failed because of '{}'."
                   " Aborting".format(target_dir, e))
            six.raise_from(vutil.VDATSymlinkError(msg), e)

        if write_shot_file:
            # try to write the shot file and if it fails remove the symlinked
            # files and, if the shot file already existed, reset the old
            # version
            try:
                bkp_shot_file = vutil.read_shot_file(target_dir)
            except six_ext.FileOpenError:
                pass
            try:
                vdat_dir_shot_file = vdat_dir_shot_file or vdat_dir
                vutil.write_to_shot_file(target_dir,
                                         append=append_to_shot_file,
                                         **vdat_dir_shot_file.data_clean)
            except Exception as e:
                for fn in symlinked_list:
                    os.unlink(fn)
                if bkp_shot_file:
                    append = False
                    for l in bkp_shot_file:
                        vutil.write_to_shot_file(target_dir,
                                                 append=append, **l)
                        append = True
                msg = ('The update of the shot file in "{}" failed because of'
                       ' "{}". Rolling back and Aborting')
                msg = msg.format(target_dir, e)
                six.raise_from(vutil.VDATSymlinkError(msg), e)

        try:
            vdat_dir.save()
        except peewee.IntegrityError as e:
            for fn in symlinked_list:
                os.unlink(fn)
            if bkp_shot_file:
                append = False
                for l in bkp_shot_file:
                    vutil.write_to_shot_file(target_dir,
                                             append=append, **l)
                    append = True
            else:
                if write_shot_file:
                    os.remove(os.path.join(target_dir, vutil.SHOT_FILE))

            txn.rollback()
            msg = ("The directory to which symlink '{}' is already used for an"
                   "other shot. Aborting the symlink to that path")
            msg = msg.format(target_dir)
            six.raise_from(vutil.VDATDatabaseUniquenessError(msg), e)


[docs]def _get_unique_keyword(fits_files, keyword, shot):
    """Get the header ``keyword`` from all the fits files and check that only
    one exists.

    Parameters
    ----------
    fits_files : list of strings
        names of the fits files
    keyword : string
        name of the keyword to extract
    shot : string
        name of the shot

    Returns
    -------
    string
        value of the header keyword

    Raises
    ------
    VDATFitsParseError
        if the header keyword does not exist
    VDATFitsTypeError
        if there are more than one values for the header keywords
    """
    try:
        values = {fits.getval(fn, keyword, memmap=False).strip()
                  for fn in fits_files}
    except KeyError:
        msg = ("At least one of the files in the shot '{}' doesn't have the"
               " required '{}' header keyword")
        msg = msg.format(shot, keyword)
        raise vutil.VDATFitsParseError(msg)

    # check that all the objects are the same
    if len(values) != 1:
        msg = ("Not all the fits files in the shot '{}' represent the same"
               " object type. Found '{}'. Aborting shot symlinking")
        msg = msg.format(shot, values)
        raise vutil.VDATFitsTypeError(msg)

    return list(values)[0]


[docs]def _symlink_sci(fits_files, vdat_dir):
    """Symlink the science shots

    Parameters
    ----------
    fits_files : list of strings
        names of the fits files
    vdat_dir : :class:`vdat.database.VDATDir` instance
        contains all the relevant information for the symlinking

    Returns
    -------
    out_vdat_dir : :class:`vdat.database.VDATDir` instance
        database entry to representing the current data

    Raises
    ------
    VDATSymlinkError
        if there is a mismatch between objects and shots (e.g. the same shot
        in the same night has a different object name)
    """
    log = logging.getLogger('logger')
    conf = confp.get_config('main', section='symlink')
    log.debug("Symlinking '%s' frames", vdat_dir.original_type_)
    object_key = conf.get('object_key', fallback='OBJECT')

    object_ = _get_unique_keyword(fits_files, object_key, vdat_dir.shot)
    object_ = object_.replace(" ", "-")
    vdat_dir.object_ = object_
    name = object_ or 'no_object'
    if not object_:
        log.warning("Shot '%s': the OBJECT is empty; using '%s' instead",
                    vdat_dir.shot, name)

    # search if the current shot has already been symlinked
    with db.connect():
        q_type = (db.VDATDir.select()
                            .where((db.VDATDir.night == vdat_dir.night) &
                                   (db.VDATDir.type_ == vdat_dir.type_) &
                                   (db.VDATDir.is_clone == False)))
        # match shot
        q_shot = q_type.where(db.VDATDir.shot == vdat_dir.shot)
        # or match the object
        q_obj = q_type.where(db.VDATDir.name == name)
        # or both
        q_shot_obj = q_type.where((db.VDATDir.name == name) &
                                  (db.VDATDir.shot == vdat_dir.shot))

        if q_shot_obj.exists():
            out_vdat_dir = q_obj.get()
            write_shot_file = False
        elif q_shot.exists():
            new_name = name + "_sym"
            q_new_obj = q_shot.where(db.VDATDir.name.startswith(new_name))
            if q_new_obj.exists():
                out_vdat_dir = q_new_obj.get()
                write_shot_file = False
            else:
                msg = ("The shot '{}' contains files with name '{}' and object"
                       " '{}', but in"
                       " the database the same shot has a different name"
                       " '{}' and/or object '{}'")
                raise vutil.VDATSymlinkError(msg.format(vdat_dir.shot,
                                                        name, object_,
                                                        q_shot.get().name,
                                                        q_shot.get().object_))
        else:
            if q_obj.exists():
                new_name = name + "_sym"
                log.warning("Shot '%s': the OBJECT '%s' is a duplicate, adding"
                            " a counter", vdat_dir.shot, object_)
                q_objs = q_type.where(db.VDATDir.name.startswith(new_name))
                name = new_name + '{:03d}'.format(len(q_objs) + 1)

            vdat_dir.name = name
            vdat_dir.make_path()
            out_vdat_dir = vdat_dir
            write_shot_file = True

    _save_and_symlink(log, fits_files, out_vdat_dir, write_shot_file, False)

    return out_vdat_dir


[docs]def _symlink_zro(fits_files, vdat_dir):
    """Symlink the bias shots.

    Parameters
    ----------
    fits_files : list of strings
        names of the fits files
    vdat_dir : :class:`vdat.database.VDATDir` instance
        contains all the relevant information for the symlinking

    Returns
    -------
    out_vdat_dir : :class:`vdat.database.VDATDir` instance
        database entry to representing the current data
    """
    log = logging.getLogger('logger')
    log.debug("Symlinking '%s' frames", vdat_dir.original_type_)

    # search if the current shot has already been symlinked
    with db.connect():
        q = db.VDATDir.select().where((db.VDATDir.night == vdat_dir.night) &
                                      (db.VDATDir.type_ == vdat_dir.type_) &
                                      (db.VDATDir.shot == vdat_dir.shot) &
                                      (db.VDATDir.is_clone == False)
                                      )

        if q.exists():
            out_vdat_dir = q.get()
            write_shot_file = False
        else:
            vdat_dir.make_path()
            out_vdat_dir = vdat_dir
            write_shot_file = True

    _save_and_symlink(log, fits_files, out_vdat_dir, write_shot_file, False)

    return out_vdat_dir


[docs]def _symlink_cal(fits_files, vdat_dir):
    """Symlink the calibration, flat and arc, shots.

    Flats and arcs taken together goes into the same directory.

    1. If the shot is already symlinked, reuse the directory
    2. If not look for directories with different original type or object,
        order them at increasing time distance and take the nearest one: if
        it's within a maximum time distance, symlink into that directory
    3. Otherwise create a new directory and symlink into it

    Parameters
    ----------
    fits_files : list of strings
        names of the fits files
    vdat_dir : :class:`vdat.database.VDATDir` instance
        contains all the relevant information for the symlinking

    Returns
    -------
    out_vdat_dir : :class:`vdat.database.VDATDir` instance
        database entry to representing the current data
    """
    conf = confp.get_config('main', section='symlink')
    log = logging.getLogger('logger')
    log.debug("Symlinking '%s' frames", vdat_dir.original_type_)

    option = 'cal_type_header_' + vdat_dir.object_
    header_key = conf.get(option, fallback=None)
    if header_key:
        object_ = _get_unique_keyword(fits_files, header_key, vdat_dir.shot)
        vdat_dir.object_ = object_

    # get all the calibration entries for the night
    with db.connect():
        qcal = db.VDATDir.select().where((db.VDATDir.night == vdat_dir.night) &
                                         (db.VDATDir.type_ == vdat_dir.type_) &
                                         (db.VDATDir.is_clone == False))

        # search for the current shot
        qshot = qcal.where(db.VDATDir.shot % ("*" + vdat_dir.shot + "*"))
        if qshot.exists():
            out_vdat_dir = qshot.get()
            vdat_dir.name = out_vdat_dir.name
        else:
            q_noobj = qcal.where(~(db.VDATDir.object_ %
                                 ("*" + vdat_dir.object_ + "*")))
            # find the nearest entry
            max_timedelta = dt.timedelta(conf.getint("max_delta_cal"))
            q_noobj = _find_nearest(q_noobj, vdat_dir.timestamp, n_nearest=1,
                                    nearest_then=max_timedelta)
            if q_noobj:
                out_vdat_dir = q_noobj[0]
                vdat_dir.name = out_vdat_dir.name
                vdat_dir.make_path()
                out_vdat_dir.make_path()
                out_vdat_dir.merge_entries(vdat_dir)
            else:
                out_vdat_dir = vdat_dir
        vdat_dir.make_path()
        out_vdat_dir.make_path()

        if qshot.exists():
            write_shot_file = False
        else:
            write_shot_file = True

    _save_and_symlink(log, fits_files, out_vdat_dir, write_shot_file,
                      True, vdat_dir_shot_file=vdat_dir)

    return vdat_dir


[docs]def _mkdir(dirname, log, failsafe=True):
    """Create the directory.

    If it exists, log it as error and, if ``failsafe`` is False, re-raise the
    exception

    Parameters
    ----------
    dirname : string
        name of the directory to create
    log : :class:`logging.Logging` instance
        log messages to this logger
    safe : bool, optional
        if true silently ignores :class:`OSError`` due to existing directories

    Raises
    ------
    :class:`~vdat.utilities.VDATDirError` if the creation fails with a
    :class:`OSError` and ``failsafe`` is False
    """
    try:
        os.makedirs(dirname)
        log.debug("Directory '%s' created", dirname)
    except OSError as e:
        if failsafe and e.errno == 17 and 'File exists' in e.strerror:
            log.debug("Cannot create output directory '%s'. Error: %s",
                      dirname, str(e))
        else:
            six.raise_from(vutil.VDATDirError(e), e)


[docs]def _symlink_file(file_list, target_dir,  log, failsafe=True):
    """Symlink the files into the target directory.

    If it exists, log it as error and, if ``failsafe`` is False, re-raise the
    exception

    Parameters
    ----------
    file_list : list of strings
        names of the fits files to symlink
    target_dir : string
        name of the directory where to do the symlink
    log : :class:`logging.Logging` instance
        log messages to this logger
    safe : bool, optional
        if true ignores :class:`OSError`` due to existing files

    Returns
    -------
    symlinked_list : list of strings
        list of files in the target directory successfully symlinked

    Raises
    ------
    :class:`~vdat.utilities.VDATSymlinkError` if the symlink creation fails
    with a :class:`OSError` and ``failsafe`` is False
    """
    conf = confp.get_config('main')

    if conf.getboolean('symlink', 'relative_symlink'):
        file_list = [os.path.relpath(fn, start=target_dir) for fn in file_list]

    replace_symlink = conf.getboolean('symlink', 'replace_symlink')

    symlinked_list = []
    for fn in file_list:
        name = os.path.join(target_dir, os.path.basename(fn))
        if replace_symlink and os.path.islink(name):
            os.remove(name)

        try:
            os.symlink(fn, name)
            symlinked_list.append(name)
            log.debug("Symlink to '%s' created", fn)
        except OSError as e:
            if failsafe and e.errno == 17 and 'File exists' in e.strerror:
                log.debug("Cannot create symlink to '%s' in '%s'. Error: %s",
                          fn, target_dir, str(e))
            else:
                six.raise_from(vutil.VDATSymlinkError(e), e)

    return symlinked_list


[docs]def _average_timestamps(dates, infmt, outfmt=FMT_DATE_DIR):
    """Average the list of timestamps.

    Parameters
    ----------
    dates : list of strings
        strings containing timestamps
    infmt : strings
        format of ``dates``
    outfmt : string, optional
        format of the output time stamp

    Returns
    -------
    avg_timestamp : :class:`datetime.datetime` instance
        average time
    string
        ``avg_timestamp`` formatted according to ``outfmt``

    Raises
    ------
    :class:`~vdat.utilities.VDATDateError`
        if it fails to parse dates from the fits headers
    """
    try:
        timestamps = [dt.datetime.strptime(d, infmt) for d in dates]
    except ValueError as e:
        six.raise_from(vutil.VDATDateError(e), e)
    avg_deltas = sum((t - timestamps[0] for t in timestamps),
                     dt.timedelta()) // len(timestamps)
    avg_timestamp = timestamps[0] + avg_deltas
    return avg_timestamp, avg_timestamp.strftime(outfmt)


[docs]def _find_nearest(q, timestamp, n_nearest=1, nearest_then=None):
    """Go through the list of query results, order them according to the absolute
    distance from ``timestamp`` and return the ``n_nearest``.

    Parameters
    ----------
    q : :class:`peewee.SelectQuery`
        query to use
    timestamp : :class:`~datetime.datetime` instance
        timestamp to use as reference
    n_nearest : int, optional
        maximum number of directories returned; set it to negative to return
        all
    nearest_then : :class:`~datetime.timedelta` instance
        if not None, don't consider any directory whose delta time is larger
        than ``nearest_then``; applied after n_nearest

    Returns
    -------
    sorted_q : list of query results
        ordered with respect to the timestamp
    """
    def _key(element):
        """Create the key for ordering as timedeltas from ``timestamp``"""
        return abs(element.timestamp - timestamp)

    def _filter(element):
        """Test whether the ``timedelta`` is less than ``nearest_then``"""
        return abs(element.timestamp - timestamp) < nearest_then

    sorted_q = sorted(q, key=_key)

    if n_nearest > 0:
        sorted_q = sorted_q[:n_nearest]
    if nearest_then:
        sorted_q = list(filter(_filter, sorted_q))

    return sorted_q


[docs]def db_create_references():
    """search reference zero and calibration directories and add them to the
    database"""
    with db.connect():
        qzro = db.VDATDir.select().where((db.VDATDir.type_ == 'zro') &
                                         (db.VDATDir.is_clone == False))
        qcal = db.VDATDir.select().where((db.VDATDir.type_ == 'cal') &
                                         (db.VDATDir.is_clone == False))
        for vdir in db.VDATDir.select():
            # for both find the reference zero directory
            ref_zro = _find_nearest(qzro, vdir.timestamp)
            if ref_zro and vdir.type_ != 'zro':
                vdir.zero_dir = ref_zro[0]
            if vdir.type_ not in ['zro', 'cal']:
                ref_cal = _find_nearest(qcal, vdir.timestamp)
                if ref_cal:
                    vdir.cal_dir = ref_cal[0]

            vdir.save()