"""
Module CSEPFile
"""

__version__ = "$Revision: 4627 $"
__revision__ = "$Id: CSEPFile.py 4627 2014-03-31 20:22:54Z liukis $"


import sys, shutil, os, string, re, logging, tarfile
import numpy as np

import Environment
from CSEPLogging import CSEPLogging


COMMENT = '#'

#--------------------------------------------------------------------------------
#
# CSEPFile
#
# This module is designed to read in and compare files generated by the CSEP.
#

# Modes for opening the files
class Mode:
   
   # Static data of the class
   READ =  "r"
   WRITE = "w"
   APPEND = "a"
   BINARY = "b"


#--------------------------------------------------------------------------------   
#
# This class represents file formats supported by the CSEP.
#
class Format:
   
   # Static data of the class
   ASCII = "ASCII"
   MATLAB = "Matlab"
   DIR = "directory"
   XML = "XML"
   SVG = "SVG"
   PNG = "PNG"
   TARGZ = "TARGZ"
   KML = "KML"
   HDF5 = "HDF5"


#--------------------------------------------------------------------------------   
#
# This class represents file extensions used by the CSEP code.
#
class Extension:
   
   # Static data of the class
   
   MATLAB = ".mat"
   ASCII = ".dat"
   XML = ".xml"
   TEXT = ".txt"
   ORIGINAL = '.original'
   KML = '.kml'
   HDF5 = '.h5'
   
   # Image files extensions
   SVG = ".svg"
   PNG = ".png"
   
   # Archive file extensions
   TARGZ = '.targz'

   # Association between pattern and corresponding extension
   __toFormat = {MATLAB : Format.MATLAB,
                 ASCII : Format.ASCII,
                 ASCII + TARGZ : Format.ASCII + Format.TARGZ,
                 XML : Format.XML,
                 TEXT : Format.ASCII,
                 SVG : Format.SVG,
                 PNG : Format.PNG,
                 KML: Format.KML,
                 HDF5: Format.HDF5,
                 HDF5 + TARGZ : Format.HDF5 + Format.TARGZ,
                 TARGZ: Format.TARGZ}
   
   # Dictionary of extensions that can be replaced
   __allowedReplacement = {XML    : [MATLAB, 
                                     ASCII,
                                     HDF5,
                                     HDF5+TARGZ,
                                     ASCII+TARGZ],
                           ASCII  : [MATLAB,
                                     XML,
                                     ASCII+TARGZ],
                           MATLAB : [ASCII,
                                     XML],
                           SVG    : [MATLAB, 
                                     ASCII,
                                     XML,
                                     HDF5]}
   
   #-----------------------------------------------------------------------------
   #
   # replace
   # 
   # This static method returns an extension to replace for provided filename, 
   # or None if such extension was not found.
   #
   @staticmethod
   def replace (ext, filename):
      """Return extension to be replaced for a filename given allowed conversions."""
      
      file_path, file_ext = os.path.splitext(filename)
      
      # If compression is enabled, then double extension will be used by the
      # filename
      if file_ext == Extension.TARGZ:
          file_path, next_file_ext = os.path.splitext(file_path)
          # Preserve extension order
          file_ext = next_file_ext + file_ext
          
      if file_ext in Extension.__allowedReplacement[ext]:
          return file_ext
      
      return None
      

   #-----------------------------------------------------------------------------
   #
   # toFormat
   # 
   # This static method returns a string representation of the format for the 
   # filename, or None if file extension is unknown.
   #
   @staticmethod
   def toFormat (filename):
      """Returns format for the filename or None for unknown file extension."""
      
      file_path, file_ext = os.path.splitext(filename)

      # If compression is enabled, then double extension will be used by the
      # filename
      if file_ext == Extension.TARGZ:
          file_path, next_file_ext = os.path.splitext(file_path)
          # Preserve extension order
          file_ext = next_file_ext + file_ext
      
      if len(file_ext) == 0 or \
         file_ext not in Extension.__toFormat:
          
          # No file extension is provided or extension is unknown
          return None
      
      return Extension.__toFormat[file_ext]

   
#-------------------------------------------------------------------------------- 
#
# This class is designed to generate filenames for various formats.
#
class Name:

   #-----------------------------------------------------------------------------
   #
   # ascii
   # 
   # This method generates a filename for ascii formatted data.
   # It replaces other than ASCII file extension for provided filename with
   # '.dat' extension.
   #
   # Input:
   #          filename - Filename for Matlab data.
   #          remove_token - Optional token to be removed from filename.   
   #
   # Output:
   #          String representing filename for ascii data.
   # 
   @staticmethod
   def ascii (filename, remove_token = None):
      """ Generate filename for ASCII data based on provided filename
            for Matlab or XML data."""

      # Return original filename (in ASCII format) if it does not have 
      # other than ASCII extension
      # Should explicitely check for the ASCII extension?
      ascii_filename = filename

      if remove_token is not None:
         ascii_filename = re.sub(remove_token,
                                 '',
                                 filename)
      
      
      extension = Extension.replace(Extension.ASCII, 
                                    filename)
      if extension is not None:
         ascii_filename = ascii_filename.replace(extension, 
                                                 Extension.ASCII)
      
      return ascii_filename   
   
      
   #-----------------------------------------------------------------------------
   #
   # matlab
   # 
   # This method generates a filename for Matlab formatted data.
   # It replaces other than Matlab file extension for provided filename with
   # '.mat' extension.
   #
   # Input:
   #          filename - Filename for ASCII data.
   #
   # Output:
   #          String representing filename for Matlab data.
   # 
   @staticmethod
   def matlab (filename):
      """ Generate filename for Matlab data based on provided filename
            for ASCII or XML data."""

      # Return original filename (in Matlab format) if it does not have 
      # other than XML extension
      # Should explicitely check for the Matlab extension?
      matlab_filename = filename

      extension = Extension.replace(Extension.MATLAB, 
                                    filename)
      if extension is not None:
         matlab_filename = filename.replace(extension, 
                                            Extension.MATLAB)

      return matlab_filename   
      
   
   #--------------------------------------------------------------------
   #
   # xml
   # 
   # This method generates a filename for XML formatted data.
   # It replaces other than XML file extension for provided filename with
   # '.xml' extension.
   #
   # Input:
   #          filename - Filename for ASCII or Matlab data.
   #          remove_token - Optional token to be removed from filename.
   #
   # Output:
   #          String representing filename for XML data.
   # 
   @staticmethod
   def xml (filename, remove_token = None):
      """ Generate filename for XML data based on provided filename
          for ASCII or Matlab data."""

      # Return original filename (in XML format) if it does not have 
      # other than XML extension
      # Should explicitly check for the XML extension?
      xml_filename = filename
      
      if remove_token is not None:
         xml_filename = re.sub(remove_token,
                               '',
                               filename)
      
      extension = Extension.replace(Extension.XML, 
                                    filename)
      if extension is not None:
         xml_filename = xml_filename.replace(extension, 
                                             Extension.XML)
      
      return xml_filename   
   
   
   #--------------------------------------------------------------------
   #
   # extension
   # 
   # This method generates a filename with provided extension for a given
   # filename. It replaces current extension with given one.
   #
   # Input:
   #          filename - Filename with any kind of extension.
   #          new_extension - New extension for the filename. Default is an empty 
   #                          string - will return filename without an extention.
   #
   # Output:
   #          Filename with new extension.
   # 
   @staticmethod
   def extension (filename, 
                  new_extension=''):
      """ Generate filename with new extension."""

      file_path, file_ext = os.path.splitext(filename)
      
      if file_ext == Extension.TARGZ:
          file_path, next_file_ext = os.path.splitext(file_path)
          # Preserve extension order
          file_ext = next_file_ext + file_ext
          
      # Check if new extension starts with '.'
      if len(new_extension) and new_extension[0] != '.':
          file_path += '.'
    
      return file_path + new_extension
       
   
#--------------------------------------------------------------------
#
# Compare two ASCII format files.
#
# Input: 
#        filename1 - File name of the first file to compare.
#        filename2 - File name of the second file to compare.
#        precision - Given precision for value comparison. Default is 1e-12.
#        skip_num_lines - Number of lines to skip at the beginning. Default is 0.
#        skip_column_index - List of columns indices to omit from comparison.
#                            Default is an empty list.  
#        use_percent_diff - Flag if percent difference should be used for value
#                           comparison. Default is False.
# 
# Output: 
#        True - files compared OK, exception is raised otherwise.
#
def compare (filename1, filename2, precision = 1E-12, skip_num_lines = 0,
             skip_column_index = [], use_percent_diff = False, delimiter = None):
     """ Compare two files with given precision."""
     
     # Read line at a time from each file, and compare the values
     fhandle1 = openFile(filename1)
     fhandle2 = openFile(filename2)

     skipped = 0
     diff_char = ' '
     if use_percent_diff is True:
        diff_char = '% '
        
     try:
         while fhandle1 or fhandle2:
             line1 = fhandle1.readline().strip()
             line2 = fhandle2.readline().strip()
             
             if skip_num_lines != 0 and skipped != skip_num_lines:
                skipped += 1
                continue

             # Ignore lines that begin with strings
             if len(line1) != 0 and len(line2) != 0 and \
                line1[0].isalpha() and line2[0].isalpha():

                #CSEPLogging.getLogger(__name__).debug("Skipping string lines: %s and %s" \
                #                                        %(line1, line2))
                continue

             line1_tokens = line1.split(delimiter)
             line2_tokens = line2.split(delimiter)
             
             if len(line1_tokens) != len(line2_tokens):
                error_msg = "Inconsistent number of elements in lines '%s' vs. '%s'\n" \
                            %(line1, line2)
                CSEPLogging.getLogger(__name__).error(error_msg)
                return False
             
             if len(line1_tokens) == 0:
                break
             
             line1_values = [float(token) for (i, token) in enumerate(line1_tokens)
                             if i not in skip_column_index]
             line2_values = [float(token) for (i, token) in enumerate(line2_tokens)
                             if i not in skip_column_index]          

             #CSEPLogging.getLogger(__name__).debug("File #1: %s" %line1_values)
             #CSEPLogging.getLogger(__name__).debug("File #2: %s" %line2_values)                
             
             for value1, value2 in zip(line1_values, line2_values):
                 diff = value1 - value2
                 
                 if use_percent_diff is True and (value1 + value2) != 0.0:
                    # percent_diff = 100*diff/((value1+value2)/2.0)
                    diff *= 200.0/(value1 + value2)
                    
                 if abs(diff) > precision:
                     # Value difference exceeds accepted tolerance, report the error
                     error_msg = "Difference %s%s(value %s vs. value %s) exceeds \
allowed tolerance %s. Line (%s) vs. line (%s)" \
                                 %(diff, diff_char, value1, value2,
                                   precision, line1, line2)
                     CSEPLogging.getLogger(__name__).error(error_msg)
                     
                     return False
             
     except StandardError, e:
           error_msg = "Error comparing files '%s' and '%s': (%s)" \
                       %(filename1, filename2, e)
           CSEPLogging.getLogger(__name__).error(error_msg)            
           
           return False
     
     # Return True if files compared OK
     return True

     
#--------------------------------------------------------------------
#
# Compare two numerical lists for equality with given precision.
#
# Input: 
#        line1 - Line of first list of values to compare: "x1 x2...xn"
#        line2 - Line of second list of values to compare: "y1 y2 y3...yn"
#        precision - Given precision for value comparison. Default is 1e-13.
#        use_percent_diff - Flag if percent difference should be used for value
#                           comparison. Default is False.
# 
# Output: 
#        True - lists are equal, False otherwise
#
def compareLines (line1, 
                  line2, 
                  precision = 1E-13,
                  use_percent_diff = False,
                  delimiter = None):
     """ Compare two lists of values with given precision given their text 
         representation."""


     diff_char = ' '
     if use_percent_diff is True:
        diff_char = '% '


     line1_tokens = line1.split(delimiter)
     line2_tokens = line2.split(delimiter)
    
     if len(line1_tokens) != len(line2_tokens):
        error_msg = "Inconsistent number of elements in lines '%s' vs. '%s'\n" \
                    %(line1, line2)
        CSEPLogging.getLogger(__name__).error(error_msg)
        return False
    
     if len(line1_tokens) == 0:
        return True
    
     line1_values = [float(token) for token in line1_tokens]
     line2_values = [float(token) for token in line2_tokens]

     #CSEPLogging.getLogger(__name__).debug("File #1: %s" %line1_values)
     #CSEPLogging.getLogger(__name__).debug("File #2: %s" %line2_values)                
    
     for value1, value2 in zip(line1_values, line2_values):
        diff = value1 - value2
        
        if use_percent_diff is True and (value1 + value2) != 0.0:
           # percent_diff = 100*diff/((value1+value2)/2.0)
           diff *= 200.0/(value1 + value2)
        
        if abs(diff) > precision:
            # Value difference exceeds accepted tolerance, report the error
            error_msg = "Difference %s%s(value %s vs. value %s) exceeds \
allowed tolerance %s. Line (%s) vs. line (%s)" \
                              %(diff, diff_char, value1, value2, 
                                precision, line1, line2)
            CSEPLogging.getLogger(__name__).error(error_msg)
                  
            return False
         
     return True    
       

#--------------------------------------------------------------------
#
# Open file.
#
# Input: 
#        filename - Name of the file to open.
#        mode - Mode for opening the file: "r" - reading or "w" - writing.
# 
# Output:
#        fhandle - Handle to the open file.
#
def openFile(filename, mode = Mode.READ):
     """ Open file in specified mode. Default is a 'read' mode."""
     
     try:
          fhandle = open(filename, mode)
          return fhandle
      
     except StandardError, e:
          error_msg = "Error opening the file '%s' in '%s' mode: (%s)" \
                      %(filename, mode, e)
          CSEPLogging.getLogger(__name__).error(error_msg)
          raise RuntimeError, error_msg
    
        
#===============================================================================
# This method is added as an optimization attempt to read large forecasts files
# by T and W evaluation tests - see Trac ticket #270: Optimize read of large 
# forecasts files for T and W evaluation tests
#===============================================================================
def read (filename, data_type = np.float, separator = None):
    """ Read file into numpy.array object (please see Trac ticket #270: 
        Optimize read of large forecasts files for T and W evaluation tests)""" 
    
    
    # Good practice: use 'open' with keyword "with" to guarantee file being
    # closed after exiting the scope
    with openFile(filename) as f:
        return readArray(f, 
                         data_type,
                         separator)


#===============================================================================
# This method is added as an optimization attempt to read large forecasts files
# by T and W evaluation tests - see Trac ticket #270: Optimize read of large 
# forecasts files for T and W evaluation tests
#===============================================================================
def readArray (fhandle,
               data_type = np.float,
               separator = None):
    """ Read file into numpy.array object (please see Trac ticket #270: 
        Optimize read of large forecasts files for T and W evaluation tests)""" 
    
    
    data = []

    for each_row in fhandle:
        
        # Ignore comments and empty lines
        if each_row.startswith(COMMENT) or len(each_row.strip()) == 0:
            continue
        
        data.append([each.strip() for each in each_row.split(separator)])

    file_data = np.asarray(data, 
                           dtype = data_type)
    
    # Re-shape 1-row forecast into 2-dim array: numpy loads 1-row data into
    # 1-dim array
    if file_data.ndim == 1:
        file_data.shape = (1, file_data.size)

    return file_data


#-------------------------------------------------------------------------------
# create GZIP archive of given file 
#-------------------------------------------------------------------------------
class GZIPArchive (object):
    
    def __init__ (self, 
                  filename, 
                  mode = Mode.READ):
        
        self.__file = filename
        self.__obj = tarfile.open(filename,
                                  mode + ':gz')
        
    def __del__ (self):
        self.__obj.close()
        
        
    def add (self, 
             original_file):
        """ Add file to the archive"""
        
        self.__obj.add(original_file,
                       arcname = os.path.basename(original_file))
    
        
    def extractall (self):
       """ Extract files from archive"""
       
       # Extract files to the same directory where archive is located
       self.__obj.extractall(os.path.dirname(self.__file))


    def extractfile (self,
                     filename):
       """ Extract files from archive"""
       
       # Extract files to the same directory where archive is located
       return self.__obj.extractfile(os.path.basename(filename))
        
        
#--------------------------------------------------------------------
#
# Create a copy of the file or directory.
#
# Input:
#          original_file - Name of original file or directory to copy.
#          copy_file - Destination file or directory.
#          create_archive - Optional argument to specify if archive of the entry
#                           should be created. Default is False.
#
def copy (original_file, 
          copy_file,
          create_archive = False):
    """ Create a copy of the file."""
  
    # Check file existence
    if os.path.exists(original_file):
       CSEPLogging.getLogger(__name__).info("Copying file %s to the %s..." \
                                             %(original_file, copy_file))
        
       ### Fix for Trac ticket #223: Store random seed values within archive
       if create_archive is True:   
           archive = GZIPArchive(copy_file,
                                 Mode.WRITE)
           archive.add(original_file)
           del archive
                      
       else: 
        
            # If entry is a directory, copy the whole tree
            if os.path.isdir(original_file):
                   shutil.copytree(original_file, 
                                   copy_file)
               
            else:
            
               # Entry is a file
               shutil.copyfile(original_file, copy_file)
               
               # Run md5sum on both files to make sure copy is not corrupted
               command = "md5sum %s" %(original_file)
               command_output = Environment.commandOutput(command)
               
               # Output is in the format "md5sum_value file_name", have to extract
               # md5sum value from the output        
               md5sum_original_file = string.split(command_output)[0]
       
               command = "md5sum %s" %(copy_file)
               command_output = Environment.commandOutput(command)
               
               # Output is in the format "md5sum_value file_name", have to extract
               # md5sum value from the output
               md5sum_copy_file = string.split(command_output)[0]
               
               if md5sum_original_file != md5sum_copy_file:
                   error_msg = "md5sum failed for the copy of the file: %s (file %s) vs. %s (file %s)" \
                               %(md5sum_original_file, original_file, md5sum_copy_file, copy_file)
                   CSEPLogging.getLogger(__name__).error(error_msg)
                   
                   raise RuntimeError, error_msg
    
    else:
        error_msg = "Failed to create %s copy of %s file. Original file doesn't exist." \
                     %(copy_file, original_file)       
        CSEPLogging.getLogger(__name__).error(error_msg)
        
        raise RuntimeError, error_msg

