"""
Module PDEDataSource
"""

__version__ = "$Revision$"
__revision__ = "$Id$"

import os, datetime, shutil, operator, time, ftplib

import CSEPFile, CSEPLogging, Environment, CSEP, CSEPGeneric
from CatalogDataSource import CatalogDataSource
from CSEPInputParams import CSEPInputParams


# Environment variable to define directory path for working copy of SVN
# repository to store raw PDE data as downloaded from the FTP site
ARCHIVE_ENV = 'PDE_ARCHIVE_DIR'


#--------------------------------------------------------------------------------
#
# PDEDataSource
#
# This class provides an interface to extract USGS/NEIC EHDF format of the PDE 
# catalog data. The web-based data source provides catalog in "final" monthly 
# format, and preliminary weekly data that overlaps with monthly catalog.
# This class acquires both data, and combines them in one file.
#
class PDEDataSource (CatalogDataSource):

    # Static data of the class
    Type = "PDE"
    
    # Class to represent EHDF catalog format
    class CharFormat (object):
        #Char. Pos.       Description
        # 1-2             "GS"
        # 3-4             blank
        # 5-12            EVENT DATE in YYYYMMDD format
        #13-20            ORIGIN TIME in HHMMSSTH format
        #21-26            LATITUDE
        #        21-25        geographic latitude value in f5.3 format (DDTHT)
        #        26           N or S
        #27-33            LONGITUDE
        #        27-32        geographic longitude value in f6.3 format (DDDTHT)
        #        33           E or W
        #34-40            DEPTH
        #        34-37        depth value in f4.1 format (NNNT)
        #        38           depth control or quality indicator (D, G, N, * or ?)
        #                     Note: beginning with PDE 01, 2004, "N" no longer used and
        #                     "A" (=computer assigned depth) has been added.
        #        39-40        number of depth phases used (i2 - shows 99 if >=99)
        #41-47            HYPOCENTER QUALITY INDICATORS
        #        41-43        number of P or PKP arrivals used in solution (i3 format)
        #        44-46        standard deviation in f3.2 format (NTH)
        #        47           authority/quality indicator (&, *, % or ?) -- note that
        #                       for the PDE this has always been the horizontal or
        #                       epicenter quality.
        #48-51            GS AVERAGE MB
        #        48-49        MB value in f2.1 format (NT)
        #        50-51        number of amplitudes used (i2 - shows 99 if >=99)
        #52-56            GS AVERAGE MS
        #        52-53        Ms value in f2.1 format (NT)
        #        54-55        number of amplitudes used (i2 - shows 99 if >=99)
        #        56           component (currently only Z)
        #57-66            CONTRIBUTED MAGNITUDE NUMBER 1
        #        57-59        magnitude value in f3.2 format (NTH - with H usually 0)
        #        60-61        type (MW, ME, MS, MB, ML, LG, RG, MD or CL)
        #        62-66        contributor (left justified a5 format, blank if source
        #                         is NEIS)
        #67-76            CONTRIBUTED MAGNITUDE NUMBER 2
        #        67-69        magnitude value in f3.2 format (NTH - with H usually 0)
        #        70-71        type (MW, ME, MS, MB, ML, LG, RG, MD, CL or MG)
        #        72-76        contributor (left justified a5 format, blank if source
        #                         is NEIS)
        #77-79            FLINN-ENGDAHL GEOGRAPHIC REGION NUMBER (in i3 format)
        #                     uses 1995 revision - 757 regions - since 1/1/2000
        #80               MAXIMUM INTENSITY (a1 format - 1-9, X, E or T)
        #81-92            FLAGS
        #        81           macroseismic (H=heard, F=felt, D=damage, C=casualties)
        #        82           moment tensor (any source) published in monthly listing (M)
        #        83           isoseismal/intensity map (P = PDE or Monthly Listing or 
        #                         U = U.S. Earthquakes)
        #        84           GS fault plane solution (F)
        #        85           IDE event (X) -- prior to PDE 01, 2004
        #                     event quality flag (A,B,C,D,H,N) -- begin with PDE 01, 2004
        #        86           diastrophic phenomena (U = uplift, S = subsidence, 
        #                         F = faulting, 3 = U & S, 4 = U & F, 5 = S & F, 6 = all)
        #        87           tsunami (T or Q)
        #        88           seiche (S or Q)
        #        89           volcanism (V)
        #        90           non-tectonic source (E = explosion, I = collapse, 
        #                         C = coalbump or rockburst in coal mine, R = rockburst,
        #                         M = meteoritic source)
        #        91           guided waves in atmosphere/ocean (T = t wave, 
        #                         A = acoustic wave, G = gravity wave, B = gravity and
        #                     acoustic waves, M = multiple effects)
        #        92           ground, soil, water table and atmospheric phenomena
        #                        (L = liquefaction, G = geyser, S = landslide/avalanche,
        #                         B = sand blows, C = ground cracks not known to be
        #                         an expression of faulting, V = visual/lights,
        #                         O = unusual odors, M = multiple effects)
        #93-99            HYPOCENTER CONTRIBUTOR
        #        93           "<"
        #        94-98        contributor (left justified, "-P" means preliminary)
        #        99           ">"        EventId = 0
        Date = (4, 12)
        Time = (12, 20)

    
    # FTP command to use for download (prepend 'ftp://' if using web browser)
    __downloadSite = 'hazards.cr.usgs.gov'
    
    # Directory for SVN working copy if used to archive raw data
    __archiveDir = None
    
    # Year of first available yearly catalog
    __catalogStart = datetime.date(1990, 1, 1)

    # Subdirectory that stores yearly and monthly catalogs
    __monthlyCatalogDirectory = "pde"
    
    # Subdirectory that stores weekly catalogs
    __weeklyCatalogDirectory = "weekly"
    
    # Prefix for catalog filenames
    __catalogFilenamePrefix = 'ehdf'
    
    # File extensions to store in SVN repository
    __dataTypes = ['.dat']

    __logger = None
    
    
    #----------------------------------------------------------------------------
    #
    # Initialization.
    #
    # Input:
    #        start_date - Start date for the catalog data. Default is 1/1/1990.
    #        download_data - Flag if raw data should be downloaded. Default is
    #                        True.
    #        pre_process_data - Flag if raw data should be pre-processed.
    #                           Default is True.
    #        args - Optional list of arguments that is specific to the
    #               data source. For example, flag if preliminary data
    #               should be downloaded from the CMT data source. Default
    #               is None.
    # 
    def __init__ (self, 
                  start_date = datetime.datetime(1990, 1, 1), 
                  download_data = True,
                  pre_process_data = True,
                  args = None):
        """ Initialization for PDEDataSource class."""

        if PDEDataSource.__logger is None:
           PDEDataSource.__logger = CSEPLogging.CSEPLogging.getLogger(PDEDataSource.__name__)
        
        # If SVN repository should be used to archive downloaded raw catalog data
        if ARCHIVE_ENV in os.environ:
            PDEDataSource.__archiveDir = os.environ[ARCHIVE_ENV]
        
        CatalogDataSource.__init__(self, 
                                   start_date, 
                                   download_data, 
                                   pre_process_data,
                                   svn_working_dir = PDEDataSource.__archiveDir)
        

    #--------------------------------------------------------------------
    #
    # Return source type as defined by the class.
    #
    # Input: None.
    #
    # Output: string representing the type of the source.
    #
    def type (self):
        """ Return string representation of the source."""

        return PDEDataSource.Type
     

    #---------------------------------------------------------------------------
    #
    # Return file format of pre-processed catalog data.
    #
    # Input: None.
    #
    # Output: String representing the file format of pre-processed catalog data.
    #
    def fileFormat (self):
        """ String representing the file format of pre-processed catalog data."""

        return 'raw EHDF (please refer to ftp://hazards.cr.usgs.gov/pde/ehdf.txt)'


    #--------------------------------------------------------------------
    #
    # Download catalog data from specified source.
    #
    # Input:
    #        test_date - Date for raw catalog data.
    #
    # Output: None.
    #
    def download (self, test_date):
       """ Extract raw PDE catalog for specified test date."""


       # Extract directory path to download data to:
       data_path, data_file = os.path.split(self.RawFile)
       
       # Download yearly catalogs
       download_year = PDEDataSource.__catalogStart.year
       is_downloaded = True

#       ftp = ftplib.FTP(PDEDataSource.__downloadSite, 
#                        timeout = 360)
#       ftp.login()
       
       while is_downloaded:
           time.sleep(2)
           local_yearly_catalog = os.path.join(data_path,
                                               '%s%s%s' %(PDEDataSource.__catalogFilenamePrefix,
                                                          download_year,
                                                          CSEPFile.Extension.ASCII))
           
           # Download yearly (concatenated monthly) data, append command output to the log file
           if CSEP.FTP.retrieve(PDEDataSource.__downloadSite,
                                PDEDataSource.__monthlyCatalogDirectory, 
                                local_yearly_catalog) is True:
              
              # Concatenate downloaded file to the cumulative file
              command = 'cat %s >> %s' %(local_yearly_catalog, 
                                         self.RawFile)
              Environment.invokeCommand(command)
              
              download_year += 1
    
           else:
              
              # No more yearly catalogs are available, proceed to weekly catalogs
              is_downloaded = False
       
       
       # Download final catalog that is provided individually (by month) 
       # starting with 1-1-2006, append command output to the log file
       curr_date = datetime.datetime(download_year, 1, 1)
       
       # Keep track of last downloaded month for final catalog - to figure out
       # beginning of the weekly catalog
       download_month = curr_date
       
       while curr_date.year <= test_date.year:
          
          # Download all available months for the year
          end_month = test_date.month
          
          # Get all months if it's not the testing year
          if curr_date.year != test_date.year:
             end_month = 12
          
          # Step through all months of the current download year
          for month_index in xrange(1, end_month+1):
             
             download_month = curr_date.replace(month=month_index)
             
             # Format filename for the current month of the year to download
             filename = "%s%s%s" %(PDEDataSource.__catalogFilenamePrefix,
                                   download_month.strftime('%Y%m'),
                                   CSEPFile.Extension.ASCII)
             filename = os.path.join(data_path,
                                     filename)
             
             time.sleep(2)
             if CSEP.FTP.retrieve(PDEDataSource.__downloadSite,
                                  PDEDataSource.__monthlyCatalogDirectory, 
                                  filename) is False:             
             
                # Detect the last month downloaded for the final catalog -
                # it's going to be the start date for the weekly catalog

                # Log start date of preliminary data
                PDEDataSource.__logger.info('Weekly catalog start date is %s' %download_month)
                
                # To break outter most loop - set year to last year
                curr_date = curr_date.replace(year=test_date.year)                    
                break
             
             
             # Concatenate downloaded file to the cumulative file
             command = 'cat %s >> %s' %(filename, 
                                        self.RawFile)
             Environment.invokeCommand(command)
             
          # Increment year for download
          curr_date = curr_date.replace(year=curr_date.year + 1)    


       # URL to access weekly catalogs
       url = os.path.join(PDEDataSource.__downloadSite,
                          PDEDataSource.__weeklyCatalogDirectory)
       
       # Last "download_month" was not downloaded (YYYY/MM/1)
       curr_date = download_month
       
       # Last downloaded date (monthly and weekly files may overlap in events
       # they contain - have to append only new events in freshly downloaded weekly files)
       last_download_date = download_month - datetime.timedelta(days=1)
       
       # Week info from ISO calendar
       curr_date_iso = curr_date.isocalendar()
       year_num = curr_date_iso[0] # year
       week_num = curr_date_iso[1] # week number
       
       # On the year border, ISO week might belong to the previous year
       if year_num != curr_date.year:
           week_num = 1
       
       raw_fhandle = CSEPFile.openFile(self.RawFile,
                                       CSEPFile.Mode.APPEND)
       
       # Download weekly catalog data
       while curr_date.year <= test_date.year:
       
          PDEDataSource.__logger.info("Weekly downloads: current date=%s, test date=%s" %(curr_date,
                                                                                          test_date)) 
          # Last week of the year - PDE may use 52 weeks even if there are 53 weeks in
          # calendar, so have to check 
          end_week = 53
          # If it's the year of the test date, then retrieve up to the test date
          if curr_date.year == test_date.year:
              test_date_iso = test_date.isocalendar()
              # PDE sometimes includes last days of the week into next ISO week
              end_week = test_date_iso[1] + 1
          
          # Download all available weeks for the year
          while week_num <= end_week:
              # Format file for next week to download
              last_weekly_file = os.path.join(data_path,
                                              '%s%s%sw%s' %(PDEDataSource.__catalogFilenamePrefix,
                                                            curr_date.year,
                                                            "{0:02d}".format(week_num),
                                                            CSEPFile.Extension.ASCII))
              
              # Due to unknown name formatting on PDE's FTP site, just increment 
              # the week and try again to download if expected week file doesn't exist: 
              # for example, PDE's 2001140w.dat includes 2011/10/1 events which
              # should be in ISO week #39
              time.sleep(2)
              if CSEP.FTP.retrieve(PDEDataSource.__downloadSite,
                                   PDEDataSource.__weeklyCatalogDirectory, 
                                   last_weekly_file) is False:
                 PDEDataSource.__logger.info('%s: file %s does not exist' %(url,
                                                                            last_weekly_file))
                
              else:
                  
                  event_date = None
                  for line in open(last_weekly_file,
                                   'rU'):
                      # Extract date of the event
                      event_date = datetime.datetime.strptime(line[PDEDataSource.CharFormat.Date[0]:PDEDataSource.CharFormat.Date[1]],
                                                              "%Y%m%d")
                      if event_date > last_download_date:
                          # Append dates after last downloaded date
                          raw_fhandle.write(line)
                  
                  if event_date is not None:        
                      last_download_date = event_date
                  
              week_num += 1
          
          # Start new year
          curr_date = curr_date.replace(year=curr_date.year + 1, month=1, day=1)
          week_num = 1
       
       
       # Download last few weeks of the catalog from weekly/ehdfqed.dat file
       filename = os.path.join(data_path,
                               '%sqed%s' %(PDEDataSource.__catalogFilenamePrefix,
                                           CSEPFile.Extension.ASCII))

       time.sleep(2)
       if CSEP.FTP.retrieve(PDEDataSource.__downloadSite,
                            PDEDataSource.__weeklyCatalogDirectory, 
                            filename):
           
            PDEDataSource.__logger.info("Extracting events since %s from %s" %(last_download_date,
                                                                               filename))
            for line in open(filename,
                             'rU'):
                # Extract date of the event
                event_date = datetime.datetime.strptime(line[PDEDataSource.CharFormat.Date[0]:PDEDataSource.CharFormat.Date[1]],
                                                        "%Y%m%d")
                if event_date > last_download_date:
                    # Append dates after last downloaded date
                    raw_fhandle.write(line)           
       else:
          
             # Raise an exception
             error_msg = "Failed to retrieve %s from %s" \
                         %(filename,
                           url)
             
             PDEDataSource.__logger.error(error_msg)            
             raise RuntimeError, error_msg
      
      
#       ftp.quit()
       raw_fhandle.close()

       ### 1) Check for data change if any and commit updates to SVN if there is a change,
       ### 2) Tag main trunk in repository
       self.SVN.commit("%s data to process %s test date, committed on %s" 
                       %(PDEDataSource.Type,
                         test_date.date(),
                         datetime.datetime.now()),
                         PDEDataSource.__dataTypes)
      

    #----------------------------------------------------------------------------
    #
    # Pre-process catalog data into reduced format: not done for now.
    # M8 evaluation test (the only place where PDE is used) does all filtering
    # internally for now.
    #
    def preProcess (self, raw_data_file, preprocessed_data_file):
        """ EHDF format of the PDE catalog is used in original raw format."""
            
        shutil.copyfile(raw_data_file,
                        preprocessed_data_file)
        
        
    #----------------------------------------------------------------------------
    #
    # Import utility for pre-processed catalog data into internal CSEP ZMAP format
    #
    # Input: 
    #        raw_file - Pre-processed catalog data file
    #        catalog_file - Optional file to save imported catalog to. Default
    #                       is None.
    #
    # Output: catalog_file
    #
    @classmethod
    def importToCSEP (cls,
                      raw_file,
                      catalog_file = None):
        """ Import utility for pre-processed catalog data into ZMAP format"""

        catalog_data = None
        
        with CSEPFile.openFile(raw_file) as fhandle: 
            catalog_data = fhandle.readlines()
            
        return catalog_data

    
    #----------------------------------------------------------------------------
    #
    # Cut catalog data to geographical area.
    #
    # Input: 
    #         catalog_data - Numpy.array object with catalog data stored in
    #                         np.object datatype.
    #         area_file - Filename for geographical area.
    #         result_file - Filename for result catalog data.
    #
    # Output: result_file
    #
    @classmethod
    def cutToArea (cls,
                   catalog_data, 
                   area_file, 
                   result_file = None):
        """ Cut catalog data to geographical area."""

        return catalog_data
     

    #----------------------------------------------------------------------------
    #
    # Cut catalog data by time period.
    #
    # Input: 
    #         catalog_data - Numpy.array object with catalog data stored in
    #                         np.object datatype.
    #         start_time - datetime object that represents start date for 
    #                      the period.
    #         stop_time - datetime object that represents end date for the period.
    #         result_file - Filename for result catalog data.      
    #         start_time_sign - Matlab sign for start_time boundary. 
    #                           Default is greater or equal sign (>=).
    #         stop_time_sign - Matlab sign for start_time boundary. 
    #                           Default is less or equal sign (<=).
    #
    # Output: result_file
    #
    @classmethod
    def cutToTimePeriod (cls,
                         catalog_data, 
                         start_time, 
                         stop_time,
                         result_file = None,
                         start_time_sign = operator.ge,
                         stop_time_sign = operator.le):
        """ Cut catalog data to time period."""

        # TODO: filter by time
        return catalog_data
               
        
    #-----------------------------------------------------------------------------
    #
    # modifications
    # 
    # This method applies uncertainties to the catalog data. The filtering of the 
    # result catalogs is kind of hidden from the caller (using Matlab...). 
    # It applies the same filtering as for original catalog defined by 
    # CSEPGeneric.Catalog.filter() method.
    #
    # Input: 
    #         catalog_file - Filename for catalog data
    #         area_file - Area file for catalog filtering
    #         result_file - Filename for result data
    #         probability_column - Column index for independence probability. This
    #                              column is available for declustered catalog only. 
    #                              Default is 0.
    #
    # Output: Directory that stores catalog modifications
    # 
    @classmethod
    def modifications (cls,
                       catalog_file, 
                       area_file, 
                       threshold,
                       result_file, 
                       probability_column = 0):
        """ Create catalog modifications by applying randomized uncertainties 
            to the original catalog"""

        # Don't generate catalog uncertainties
        return None


    #----------------------------------------------------------------------------
    #
    # Filter catalog data based on specified geographical location, 
    # minimum magnitude, maximum depth, and starting date for the forecast 
    # model.
    #
    # Input: 
    #         catalog_data - Numpy.array object with catalog data stored in
    #                         np.object datatype.
    #         area_file - Area file for catalog filtering
    #         result_file - Filename for result data.
    #         result_variable - Name of Matlab variable to store results to. 
    #                           Default is 'mCatalog'.
    #
    # Output: result_file
    #
    @classmethod
    def filter (cls,
                catalog_data, 
                area_file, 
                threshold,
                result_file = None):
        """ Filter catalog data based on specified geographical location, 
            minimum magnitude, maximum depth, and starting date for the forecast 
            model as provided by 'threshold' object."""
        
        
        if result_file is not None:
            with CSEPFile.openFile(result_file,
                                   CSEPFile.Mode.WRITE) as fhandle:
                for each_line in catalog_data:
                    fhandle.write(each_line)
                
            
        return catalog_data


    #----------------------------------------------------------------------------
    #
    # decluster
    # 
    # This method declusters catalog data according to the Reasenberg declustering
    # algorithm.
    #
    # Input: 
    #         catalog_file - Filename for catalog data.
    #         result_file - Filename for result data.mCatalog = getCatalog_ImportCMT('src/generic/test/data/GlobalModels/oneLineCMTFromYan.dat')
    #
    # Output: result_file
    #
    @classmethod
    def declusterReasenberg (cls,
                             catalog_file):
        """ Decluster catalog."""

        raise RuntimeError, "declusterReasenberg() method is not implemented"

