"""
Utility to stage raw catalogs, that have been downloaded by the testing
center at real time, for retrospective evaluation

"""

import os, glob, datetime, calendar, shutil

import CSEPFile, ANSSDataSource, CMTDataSource, CSEP
from CatalogDataSource import CatalogDataSource
from CSEPPropertyFile import CSEPPropertyFile
from DataSourceFactory import DataSourceFactory
from CSEPLogging import CSEPLogging


# Class to locate existing raw catalog files and to stage them in 
# forecast group directory for retrospective evaluation
class CatalogFiles:

   # Search file system for all dates within [StartDate, NowDate] interval
   # Raw catalog data is available since 2010/9/9
   StartDate = datetime.datetime(2010, 9, 9)
   #NowDate = datetime.datetime.now()
   NowDate = datetime.datetime(2012, 4, 15)

   __dryRun = True
   
   __waitingPeriodDays = 31
  
   __pidPattern = 'pid_*'
   __runtimeDirFormatString = "%Y%m%d%H%M%S"
   
   # Logger for the class
   __logger = None

    
   def __init__ (self,
                 rawcatalog_dir,
                 dry_run): # dry run: don't stage the files, only display the info

       # Top level directory path to real-time catalogs
       self.__rawCatalogPath = rawcatalog_dir
       self.__dryRun = dry_run

       if CatalogFiles.__logger is None:
           CatalogFiles.__logger = CSEPLogging.getLogger(CatalogFiles.__name__)

       
   def stage (self,
              stage_dir):         
     """ Search file system for directories with raw catalogs - step through defined
         date interval [StartDate; NowDate]."""

     all_days_calendar = calendar.Calendar()
     start_month = CatalogFiles.StartDate.month
     
     for each_year in xrange(CatalogFiles.StartDate.year, CatalogFiles.NowDate.year + 1):
         
         if each_year != CatalogFiles.StartDate.year:
             start_month = 1

         for each_month in xrange(start_month, 13):

             all_days = [d for d in all_days_calendar.itermonthdays(each_year, 
                                                                    each_month) if d != 0]
             
             if each_year == CatalogFiles.StartDate.year and \
                each_month == CatalogFiles.StartDate.month:
                 # Start from specified day of start month
                 all_days = filter(lambda x: x>= CatalogFiles.StartDate.day, 
                                   all_days)
             
             # Find directories that correspond to the day
             for each_day in all_days:
                 
                 # Dictionary to keep runtime directories organized by test date and time
                 test_date = datetime.datetime(each_year,
                                               each_month,
                                               each_day)
                 
                 # Don't continue if test date passed the interval of interest
                 if test_date > CatalogFiles.NowDate:
                     break
             
                 # Support nested directory structure introduced later in CSEP to
                 # organize data by year_month and pid:
                 # time.strftime("%Y_%m"),
                 # time.strftime("%Y%m%d%H%M%S"),
                 # 'pid_%s' %os.getpid()))
                 # At some point Dispatcher runtime directories became organized by YYYY_MM folders
                 # to avoid too many files under the same directory 
                 month_directory = os.path.join(self.__rawCatalogPath,
                                                test_date.strftime("%Y_%m"))
        
                 runtime_dirs = glob.glob(os.path.join(month_directory,
                                                       "%s*" %test_date.strftime("%Y%m%d")))
                 
                 if len(runtime_dirs) == 0:
                     
                     CSEPLogging.getLogger(__name__).warning("No runtime directories are found for %s under %s" %(test_date,
                                                                                                                  self.__rawCatalogPath))
                     continue
                 
                 # Each runtime directory will have raw catalog file, just use
                 # very first found directory since catalogs are most likely identical
                 # when retrieved within minutes on the same day
                 found_catalog = False

                 stage_date = test_date - datetime.timedelta(days=CatalogFiles.__waitingPeriodDays)
                 
                 for catalog_dir in runtime_dirs:
                     dir_path, dir_name = os.path.split(catalog_dir)
                     
                     # At some point CSEP added "pid_*" sub-directory to store
                     # raw catalogs to - to prevent multiple processes using
                     # the same runtime directory
                     pid_dirs = glob.glob(os.path.join(catalog_dir,
                                                       CatalogFiles.__pidPattern))
                     
                     # If multiple pid directories exit, take very first one
                     if len(pid_dirs):
                         catalog_dir = pid_dirs[0]
    
                     
                     ### Find raw catalog file in runtime directory through metadata files, 
                     ### and copy it to the stage directory
                     raw_catalog = self.__findByMetadataFile(catalog_dir)
                
                     if raw_catalog:
                         
                         # Copy to the staging directory - taking waitingPeriod
                         # into consideration: runtimeDate - waitingPeriodDays
                         stage_month_dir = os.path.join(stage_dir,
                                                        stage_date.strftime("%Y-%m-%d"))
                         
                         found_catalog = True
                             
                         if not self.__dryRun and \
                            os.path.exists(raw_catalog) and \
                            (os.path.getsize(raw_catalog) != 0):

                             os.makedirs(stage_month_dir)
                             CatalogFiles.__logger.info("Copying %s to %s" %(raw_catalog,
                                                                             stage_month_dir))
                             
                             shutil.copyfile(raw_catalog,
                                             os.path.join(stage_month_dir,
                                                          "import_raw.dat"))
                             meta_file = raw_catalog + CSEPPropertyFile.Metadata.Extension
                             
                             if os.path.exists(meta_file):
                                 # Copy metadata file to the stage directory for traceability
                                 shutil.copyfile(meta_file,
                                                 os.path.join(stage_month_dir,
                                                              os.path.basename(meta_file)))
                             
                     else:
                         # Raw catalog is not present in runtime directory
                         CatalogFiles.__logger.warning("===>No catalog is found for %s (stageDate=%s) under %s" %(test_date,
                                                                                                                  stage_date,
                                                                                                                  catalog_dir))
                     if found_catalog:
                         break
                # For catalog_dir

   #----------------------------------------------------------------------------
   # Locate raw catalog files based on existing metedata files under provided
   # directory
   #----------------------------------------------------------------------------
   def __findByMetadataFile (self, 
                             dir_path): 
       """ Search metadata files for raw catalog in 'dir_path'"""
     
       ### Search for metadata files - latest created are checked first
       meta_files = glob.glob('%s/%s*%s' %(dir_path,
                                           CSEP.NAMESPACE,
                                           CSEPPropertyFile.Metadata.Extension))
       
       data_file = None
       if len(meta_files) == 0:

           # File does not exist
           CatalogFiles.__logger.warning("No metadata files exist under %s directory" %dir_path)
           return data_file
          
       
       # Find metadata file that corresponds to raw catalog
       for each_file in meta_files:

          meta_obj = CSEPPropertyFile.Metadata(each_file)
          meta_path, meta_file = os.path.split(meta_obj.originalDataFilename)
          
          if meta_file == CatalogDataSource._RawFile:
             
              CatalogFiles.__logger.info("Found metadata file for raw catalog in %s: %s" %(dir_path,
                                                                                          meta_obj.info[CSEPPropertyFile.Metadata.DataFileKeyword]))
              
              data_file = meta_obj.info[CSEPPropertyFile.Metadata.DataFileKeyword]
              CatalogFiles.__logger.info("Found raw catalog by metadata: %s" %data_file)
              
              break

       if data_file:
           data_file = os.path.join(dir_path,
                                    os.path.basename(data_file))
       return data_file


if __name__ == "__main__":
    
    import optparse
    
    command_options = optparse.OptionParser()

    command_options.add_option('--rawCatalogDir',
                               dest='catalog_dir',
                               default=None,
                               help='Path to directory with raw catalog files')

    command_options.add_option('--stageDir',
                               dest='stage_dir',
                               default=None,
                               help='Directory to stage raw catalogs in')

    command_options.add_option('--disableDryRun',
                               dest='dry_run',
                               default=True,
                               action='store_false',
                               help='Invoke dry run of the program. Default is True (dry run enabled).')
    
    
    (values, args) = command_options.parse_args()

    
    c = CatalogFiles(values.catalog_dir,
                     values.dry_run)
    c.stage(values.stage_dir)
    
