"""
Module GeoNetNZDataSource
"""

__version__ = "$Revision$"
__revision__ = "$Id$"

import os, datetime, urllib, math

import numpy as np

import Environment, CSEPLogging, CSEPGeneric, CSEPFile, CSEPUtils, \
       MatlabLogical, CSEP
from CatalogDataSource import CatalogDataSource
from CSEPInputParams import CSEPInputParams
from QPCatalog import QPCatalog

FOCAL_MECHANISM_PATH_ENV = "FOCAL_MECHANISM_PATH"


#--------------------------------------------------------------------------------
#
# GeoNetNZDataSource
#
# This class provides an interface to extract GeoNet catalog data.
#
class GeoNetNZDataSource (CatalogDataSource):

    # Static data of the class
    Type = "GeoNetNZ"
    
    # URL to use for download
    __downloadURL = 'http://magma.geonet.org.nz/services/quake/quakeml/1.0.1/query?'
    __contentType = 'Content-Type'
    __XMLPageType = 'application/xml'
    __CMTPageType = 'application/vnd.ms-excel'
    
    # Focal mechanism file
    __downloadFocalMechanismURL = 'http://info.geonet.org.nz/download/attachments/8585256'

    RawFocalMechanismFile = 'GeoNet_CMT_solutions.csv'
    
    ProcessedFocalMechanismFile = 'GeoNet_CMT_solutions.dat'

    # Post-2012/01/01 catalog download info:    
    Post2012CatalogFile = 'post2012_import_raw.dat'
    __newCatalogDate = datetime.datetime(2012, 1, 1)
    __downloadURLPost2012 = 'http://wfs.geonet.org.nz/geonet/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=geonet:quake_search_v1&outputFormat=csv&&propertyName=origintime,latitude,longitude,magnitude,depth&sortBy=origintime'

    # Format for catalog data from WFS geonet.org.nz
    class WFSFormat (object):
        FID = 0
        Origintime = 1
        Latitude = 2
        Longitude = 3
        Depth = 4
        Magnitude = 5
         
    
    # Maximum magnitude threshold for data download
    __depthLowerOption = "depthLower"
    __depthUpperOption = "depthUpper"
    __magLowerOption = "magnitudeLower"
    __magUpperOption = "magnitudeUpper"
    __focalMechanismOption = "focalMechanism"
    
    __logger = None
    
    # Dictionary of data source parameters and their default values
    __defaultOptions = {__depthLowerOption : 0,
                        __depthUpperOption : 40,
                        __magLowerOption : 3.0,
                        __magUpperOption : 10,
                        __focalMechanismOption : False}
    
    
    #----------------------------------------------------------------------------
    #
    # Initialization.
    #
    
    # Input:
    #        start_date - Optional start date for the catalog data. Default is a
    #                     datetime.datetime() object for 1/1/1985.
    #        download_data - Flag if raw data should be downloaded. Default is
    #                        True.
    #        pre_process_data - Flag if raw data should be pre-processed.
    #                           Default is True.
    #        args - Optional list of arguments that is specific to the
    #               data source. For example, depth bounds for downloaded data.
    #               Default is None.
    # 
    def __init__ (self, 
                  start_date = datetime.datetime(1863, 1, 1), 
                  download_data = True,
                  pre_process_data = True,
                  args = None):
        """ Initialization for GeoNetNZDataSource class."""
    
        if GeoNetNZDataSource.__logger is None:
           GeoNetNZDataSource.__logger = CSEPLogging.CSEPLogging.getLogger(GeoNetNZDataSource.__name__)
           
        CatalogDataSource.__init__(self, start_date, 
                                         download_data, 
                                         pre_process_data)

        self.__args = CSEPInputParams.parse(GeoNetNZDataSource.__defaultOptions,
                                            args)
        
        
    #--------------------------------------------------------------------
    #
    # Return source type as defined by the class.
    #
    # Input: None.
    #
    # Output: string representing the type of the source.
    #
    def type (self):
        """ Return string representation of the source."""

        return GeoNetNZDataSource.Type


    #---------------------------------------------------------------------------
    #
    # Return file format of pre-processed catalog data.
    #
    # Input: None.
    #
    # Output: String representing the file format of pre-processed catalog data.
    #
    def fileFormat (self):
        """ String representing the file format of pre-processed catalog data."""

        return 'ZMAP (ASCII)'
    
    
    class QuakeSearchCSVFormat (object):
        CUSP_ID   = 0
        LAT       = 1
        LONG      = 2
        NZMGE     = 3
        NZMGN     = 4
        ORI_YEAR  = 5
        ORI_MONTH = 6
        ORI_DAY   = 7
        ORI_HOUR  = 8
        ORI_MINUTE= 9
        ORI_SECOND= 10
        MAG       = 11
        DEPTH     = 12
        ERLAT     = 13
        ERLON     = 14
        ERT       = 15
        ERMAG     = 16
        ERZ       = 17     

    
    @staticmethod
    def quakeSearchCSVExportZMAP(csv_file,
                                 output_file):
        
        """ This method converts CSV format of catalog as returned by 'quakesearch' 
            into ZMAP format, and saves newly formatted catalog to 'output_file'."""
        
        
        csv_np = CSEPFile.read(csv_file,
                               np.object,
                               ',')
        
        lat_lon_errors = csv_np[:, (QuakeSearchCSVFormat.ERLAT,
                                    QuakeSearchCSVFormat.ERLON)].astype(np.float)

        lat_lon_errors_flags = csv_np[:, (QuakeSearchCSVFormat.ERLAT,
                                          QuakeSearchCSVFormat.ERLON)].astype(np.str)
                                          
        # Locate events for which either long or lat error is missing
        selection = lat_lon_errors_flags[:, 0] == '' | \
                    lat_lon_errors_flags[:, 1] == '' 
        # If lat or long errors are not provided, put NaN values for ERLAT field
        # which will be replaced by horizontalError
        csv_np[selection, QuakeSearchCSVFormat.ERLAT] = 'NaN'
                                    
        lat = csv_np[:, (QuakeSearchCSVFormat.LAT)].astype(np.float)
        
        # Only compute horizontalError for events where both long and lat errors
        # are provided (use conversion found in QPCatalog.exportZMAP():
        horiz_sel = ~selection 
        csv_np[horiz_sel, QuakeSearchCSVFormat.ERLAT] = np.sqrt(np.power(lat_lon_errors[horiz_sel, 0] * 111.0, 2) +
                                                                np.power(lat_lon_errors[horiz_sel, 1] * np.cos(lat[horiz_sel] * np.pi/180.0) * 111.0, 2))
        
        # Replace not provided errors with NaN strings
        selection = csv_np[:, QuakeSearchCSVFormat.ERMAG].astype(np.str) == ''
        csv_np[selection, QuakeSearchCSVFormat.ERMAG] = 'NaN'

        selection = csv_np[:, QuakeSearchCSVFormat.ERZ].astype(np.str) == ''
        csv_np[selection, QuakeSearchCSVFormat.ERZ] = 'NaN'

        # Compute decimal year based on date and time of all catalog events
        catalog_date = np.array([datetime.datetime.combine(datetime.datetime.strptime('/'.join([event[QuakeSearchCSVFormat.ORI_YEAR],
                                                                                                event[QuakeSearchCSVFormat.ORI_MONTH],
                                                                                                event[QuakeSearchCSVFormat.ORI_DAY]]),
                                                                                      '%Y/%m/%d'),
                                                           datetime.time(int(event[QuakeSearchCSVFormat.ORI_HOUR]),
                                                                         int(event[QuakeSearchCSVFormat.ORI_MINUTE]),
                                                                         int(event[QuakeSearchCSVFormat.ORI_SECOND].split('.')[0]),
                                                                         int(event[QuakeSearchCSVFormat.ORI_SECOND].split('.')[1].ljust(6, '0'))))  for event in csv_np])        

        # Replace year entry in catalog with decimal year
        csv_np[:, QuakeSearchCSVFormat.ORI_YEAR] = np.array([CSEPUtils.decimalYear(each_date) for each_date in catalog_date])
        
        # Save catalog in ZMAP format to the file
        np.savetxt(output_file,
                   csv_np[:, (QuakeSearchCSVFormat.LONG,
                              QuakeSearchCSVFormat.LAT,
                              QuakeSearchCSVFormat.ORI_YEAR,
                              QuakeSearchCSVFormat.ORI_MONTH,
                              QuakeSearchCSVFormat.ORI_DAY,
                              QuakeSearchCSVFormat.MAG,
                              QuakeSearchCSVFormat.DEPTH,
                              QuakeSearchCSVFormat.ORI_HOUR,
                              QuakeSearchCSVFormat.ORI_MINUTE,
                              QuakeSearchCSVFormat.ORI_SECOND,
                              QuakeSearchCSVFormat.ERLAT,
                              QuakeSearchCSVFormat.ERZ,
                              QuakeSearchCSVFormat.ERMAG)])
        

    #---------------------------------------------------------------------------
    #
    # Return minimum magnitude for raw catalog data retrieved from data source
    #
    # Input: None.
    #
    # Output: start data of catalog data
    #
    def __getMinMagnitude (self):
        """ Return minimum magnitude for raw catalog data retrieved from data source."""

        return self.__args[GeoNetNZDataSource.__magLowerOption]
    
    MinMagnitude = property(__getMinMagnitude, 
                            doc = "Minimum magnitude for raw catalog data retrieved from data source")


    #--------------------------------------------------------------------
    #
    # Download catalog data from specified source.
    #
    # Input:
    #        test_date - Date for raw catalog data.
    #
    # Output: None.
    #
    def download (self, test_date):
       """ Extract GEONET New Zealand catalog for specified test date in QuakeML format."""

       
       # Format URL address to query the data: 
       # according to the http://www.geonet.org.nz/resources/earthquake/quake-web-services.html,
       # "Depth or magnitude queries require both upper and lower bounds."
       
       # Even though GeoNet docs state that dates are treated inclusively, 00:00:00 time
       # is implied for the endDate ---> increment test_date by one-day to get events
       # for the test_date within catalog
       end_date = test_date + datetime.timedelta(hours=24)
       
       if test_date >= GeoNetNZDataSource.__newCatalogDate:
           end_date = GeoNetNZDataSource.__newCatalogDate
           
       command = "%sstartDate=%s&endDate=%s&%s=%s&%s=%s&%s=%s&%s=%s" %(GeoNetNZDataSource.__downloadURL,
                                                                       self.StartDate.date(),
                                                                       end_date.date(),
                                                                       GeoNetNZDataSource.__depthLowerOption,
                                                                       self.__args[GeoNetNZDataSource.__depthLowerOption],
                                                                       GeoNetNZDataSource.__depthUpperOption,
                                                                       self.__args[GeoNetNZDataSource.__depthUpperOption],
                                                                       GeoNetNZDataSource.__magLowerOption,
                                                                       self.__args[GeoNetNZDataSource.__magLowerOption],
                                                                       GeoNetNZDataSource.__magUpperOption,
                                                                       self.__args[GeoNetNZDataSource.__magUpperOption])

       # Download QuakeML data and save it in XML format:
       GeoNetNZDataSource.__logger.info("Invoking %s\n" %command)       
       name, info = urllib.urlretrieve(command, 
                                       self.RawFile)

       file_type = info.getheader(GeoNetNZDataSource.__contentType)
       urllib.urlcleanup()
              
       if file_type.find(GeoNetNZDataSource.__XMLPageType) < 0:
          # File content is not of expected type
          error_msg = "Downloaded '%s' file content has unexpected type: got '%s',\
expected '%s'" %(self.RawFile, 
                 file_type, 
                 GeoNetNZDataSource.__XMLPageType)

          GeoNetNZDataSource.__logger.info(error_msg)
        
       # Check if focal mechanism should be retrieved
       if MatlabLogical.Boolean[self.__args[GeoNetNZDataSource.__focalMechanismOption]] == \
          MatlabLogical.Boolean[True]:
           
           # Extract directory path to download data to:
           data_path, data_file = os.path.split(self.RawFile)
           
           local_focal_mechanism = os.path.join(data_path,
                                                GeoNetNZDataSource.RawFocalMechanismFile)
           
           if CSEP.URL.retrieve(GeoNetNZDataSource.__downloadFocalMechanismURL, 
                                local_focal_mechanism,
                                GeoNetNZDataSource.__CMTPageType) is False:
               # Raise an exception
               error_msg = "Failed to retrieve %s from %s" \
                           %(local_focal_mechanism,
                             GeoNetNZDataSource.__downloadFocalMechanismURL)
              
               GeoNetNZDataSource.__logger.error(error_msg)            
               raise RuntimeError, error_msg           
       
       # Test date is post 2012/1/1, need to retrieve newest data from
       # new web service
       if test_date >= GeoNetNZDataSource.__newCatalogDate:
           end_date = test_date + datetime.timedelta(hours=24)
        
           # Download latest post-2012/1/1 data from new server:
           # http://wfs.geonet.org.nz/geonet/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=geonet:quake_search_v1&outputFormat=csv
           # &cql_filter=origintime>='2012-01-01'+AND+origintime<'2014-09-01'+AND+magnitude>=3+AND+magnitude<=10+AND+depth>=0+AND+depth<=40
           command = "%s&cql_filter=origintime>='%s'+AND+origintime<'%s'+AND+depth>=%s+AND+depth<=%s+AND+magnitude>=%s+AND+magnitude<=%s" \
                     %(GeoNetNZDataSource.__downloadURLPost2012,
                       GeoNetNZDataSource.__newCatalogDate.date(),
                       end_date.date(),
                       self.__args[GeoNetNZDataSource.__depthLowerOption],
                       self.__args[GeoNetNZDataSource.__depthUpperOption],
                       self.__args[GeoNetNZDataSource.__magLowerOption],
                       self.__args[GeoNetNZDataSource.__magUpperOption])
    
           # Download QuakeML data and save it in XML format:
           GeoNetNZDataSource.__logger.info("Invoking %s\n" %command)       
           name, info = urllib.urlretrieve(command, self.Post2012CatalogFile)
    
           file_type = info.getheader(GeoNetNZDataSource.__contentType)
           urllib.urlcleanup()
              
       return   
          

    #--------------------------------------------------------------------
    #
    # Pre-process catalog data into ZMAP-format.
    #
    # Input:
    #        raw_data_file - Raw catalog data file in QuakePy format.
    #        preprocessed_data_file - Filename for output pre-processed data.
    #
    # Output: None.
    #
    def preProcess (self, 
                    raw_data_file, 
                    preprocessed_data_file):
       """ Pre-process raw QuakePy data into catalog ZMAP format."""

       qml_catalog = QPCatalog(raw_data_file)
       qml_catalog.exportZMAP(preprocessed_data_file,
                              withUncertainties = True)
       
       # If new data was retrieved, include it into catalog
       if os.path.exists(self.Post2012CatalogFile):
           csv_np = CSEPFile.read(self.Post2012CatalogFile,
                                  np.object,
                                  ',')
           
           # Append missing columns
           fhandle = CSEPFile.openFile(preprocessed_data_file, 
                                       CSEPFile.Mode.APPEND)
           for each_event in csv_np[1:, :]:
               
               event_format = CSEP.Time.ISO8601Format
               # Some time stamps miss microsecond part 
               if '.' in each_event[GeoNetNZDataSource.WFSFormat.Origintime]:
                   event_format += '.%f'
                   
               event_date = datetime.datetime.strptime(each_event[GeoNetNZDataSource.WFSFormat.Origintime], 
                                                       event_format)            
               line = '\t'.join([each_event[GeoNetNZDataSource.WFSFormat.Longitude],
                                 each_event[GeoNetNZDataSource.WFSFormat.Latitude],
                                 str(CSEPUtils.decimalYear(event_date)),
                                 str(event_date.month),
                                 str(event_date.day),
                                 each_event[GeoNetNZDataSource.WFSFormat.Magnitude],
                                 each_event[GeoNetNZDataSource.WFSFormat.Depth],
                                 str(event_date.hour),
                                 str(event_date.minute),
                                 event_date.strftime('%S.%f'),
                                 'NaN', 'NaN', 'NaN', '\n'])
    
               fhandle.write(line)
           fhandle.close()
        

       # Check if focal mechanism should be retrieved
       if MatlabLogical.Boolean[self.__args[GeoNetNZDataSource.__focalMechanismOption]] == \
          MatlabLogical.Boolean[True]:
           
           # Pre-process focal mechanism file if it was downloaded
           # Extract directory path to download data to:
           data_path, data_file = os.path.split(self.RawFile)
           
           # Open pre-processed file for writing
           fh_out = CSEPFile.openFile(os.path.join(data_path,
                                                   GeoNetNZDataSource.ProcessedFocalMechanismFile),
                                      CSEPFile.Mode.WRITE)
           
           with CSEPFile.openFile(os.path.join(data_path,
                                               GeoNetNZDataSource.RawFocalMechanismFile)) as fh:
               for each_row in fh:
                   # Reached footer of the file, exit the loop
                   if each_row.startswith(','):
                       break
                   if len(each_row.strip()) == 0:
                       continue
                   
                   new_row = each_row.replace(',', '\t')
                   fh_out.write(new_row)
            
           fh_out.close()
         
       return
   
      
    #----------------------------------------------------------------------------
    #
    # Import utility for pre-processed catalog data into internal CSEP ZMAP format
    #
    # Input: 
    #        raw_file - Pre-processed catalog data file
    #        catalog_file - Filename for imported catalog data. Default is None,
    #                       meaning don't save imported catalog to the file.
    #
    # Output: Numpy.array object with catalog data
    #
    @classmethod
    def importToCSEP (cls,
                      raw_file, 
                      catalog_file = None):
        """ Import utility for pre-processed catalog data into ZMAP format"""

        
        # convert pre-processed catalog from ASCII format to
        return CSEPGeneric.Catalog.importZMAP(raw_file, 
                                              horizontal_error = 2.0,
                                              depth_error = 5.0,
                                              seismic_network = 1.0,
                                              result_file = catalog_file)
        
