//*********************************************************
// Copyright  2018 by FM . Please copy and distribute. "Share the fun"
//*********************************************************
// Description:
//  This library contains the functions to analyse script
//  code. Analysis will result in:
//
//  - total number of lines
//  - Percentage of white lines
//  - percentage of comment versus code
//    - we take the comment but only count a-z, 0-9
//    - we take the code but remove all the blanks
//
// ********************************************************
  
//module:TopAce_Script_analysis
//prefix:ta_ScriptCode_


// ********************************************************
// Description:
//  will analyse a script and return various 
//  'metrics'
//  These include:
//
//  - Total number of lines
//  - Number of white lines in the code
//  - Real number of bytes in code
//  - How much white lines per line of code
//  - How many bytes real code
//  - How many bytes real comment
//  - How many bytes of comment per byte of code
//  - McCabe index
//  - McCabe per lines of code
//
// Usage:
//  ta_ScriptCode_AnalyseScript( "c:\\tmp\\script.txt", mResult );
//
// Returns:
//  Nothing. Everything returned through the mapping
//
// ********************************************************

void ta_ScriptCode_AnalyseScript(
  string strScriptFile,    // absolute path to a script file
  mapping &mResult         // The mapping where the results are returned
)
{
  string strScript;
  
 
  // Load the entire script
  fileToString( strScriptFile, strScript );
  
  ta_ScriptCode_AnalyseScriptSourceCode( strScript, mResult );
}

void ta_ScriptCode_AnalysePanel(
  string strPanelFile,     // Relative path to a panel
  mapping &mResult         // The mapping where the results are returned
)
{
  // First isolate the script code form the XML converted panel
  string strScript = ta_XML_IsolateScriptFromPanel( strPanelFile );
  
  // And now actually analyse the script code
  ta_ScriptCode_AnalyseScriptSourceCode( strScript, mResult );
}
  
// ********************************************************
// Description:
//  will analyse actual script source coe and return various 
//  'metrics'
//  These include:
//
//  - Total number of lines
//  - Number of white lines in the code
//  - Real number of bytes in code
//  - How much white lines per line of code
//  - How many bytes real code
//  - How many bytes real comment
//  - How many bytes of comment per byte of code
//  - McCabe index
//  - McCabe per lines of code
//
// Usage:
//  ta_ScriptCode_AnalyseScriptSourceCode( strScript, mResult );
//
// Returns:
//  Nothing. Everything returned through the mapping
//
// ********************************************************

void   ta_ScriptCode_AnalyseScriptSourceCode(
  string strScript,        // Just the script code
  mapping &mResult         // The mapping where the results are returned
)
{
  
  // How many lines do we have in total
  int iTotalLines = dynlen( strsplit( strScript, "\n" ));
                  
  // *****************************************************************
  // We're first gone isolate the comments that look like /* .. */
  // ******************************************************************
  // Now show with and without the /* comments
  
  // Now remove the /* */ comments
  string strComments = ta_ScriptCode_IsolateComments( strScript );

  // *****************************************************************
  // Now show with and without whitelines
  // ******************************************************************
  
  // determine the number of whitelines and remove
  int iNumwhiteLines = ta_ScriptCode_IsolateWhiteLines( strScript );
  
  // *****************************************************************
  // Show some results
  // ******************************************************************
  int iNumLinesRealCode = dynlen( strsplit( strScript, "\n" ));
  
  // How many white lines / line of (real)code
  float fWhiteRatio = 0.0;
  if( iNumLinesRealCode > 0 )
  {
    fWhiteRatio = (float)iNumwhiteLines / (float)iNumLinesRealCode;
  }
  
  // *****************************************************************
  // Now determine the 'pure' characters for code and comments
  // ******************************************************************  
  string strPureCode= ta_ScriptCode_PureCode( strScript );
  
  // Determine the 'pure' comments
  string strPureComments = ta_ScriptCode_PureComments( strComments );
  
  int iBytesRealCode     = strlen( strPureCode );
  int iBytesRealComments = strlen( strPureComments );
  
  // Determine ratio comments/code
  float fRatio;
  if( iBytesRealCode )
  {
    fRatio = (float)iBytesRealComments / (float)iBytesRealCode;
  }  
  
  // *****************************************************************
  //Determine McCabe
  // ******************************************************************  
  int iMcCabe = ta_ScriptCode_DetermineMcCabe( strScript );

  // Determine the McCabe/lines ratio  
  float fMcCabeRatio;
  if( iNumLinesRealCode )
  {
    fMcCabeRatio = (float)iMcCabe /  (float)iNumLinesRealCode;
  }  
    
  // Now store the various results
  mResult[ "numlines"                 ] = iTotalLines;        // Total number of lines
  mResult[ "whitelinescode"           ] = iNumwhiteLines;     // Number of white lines in the code
  mResult[ "reallinescode"            ] = iNumLinesRealCode;  // Real number of bytes in code
  mResult[ "whitelinesperlinecode"    ] = fWhiteRatio;        // How much white lines per line of code
  mResult[ "bytesrealcode"            ] = iBytesRealCode;     // How many bytes real code
  mResult[ "bytesrealcomment"         ] = iBytesRealComments; // How many bytes real comment
  mResult[ "bytescommentperbytescode" ] = fRatio;             // How many bytes of comment per byte of code
  mResult[ "mccabe"                   ] = iMcCabe;            // McCabe index
  mResult[ "mccabeperlinescode"       ] = fMcCabeRatio;       // McCabe per lines of code
  
}

// ********************************************************
// Description:
//  Will take a source code and will isolate all the comments.
//  The script will then just contain the pure code (without comments)
//  
//
// Usage:
//   ta_ScriptCode_IsolateComments( strMyScript );
//
// Returns:
//   The comments that have been cut of the source code
//   (so that they can be counted laterion)
//
// ********************************************************

string ta_ScriptCode_IsolateComments(
  string &strScript        // The whole script
)
{
  int iPos; 
  string strComments;    // receives the comments that we stripped out
  
  // First remove all lines that contain "//"
  dyn_string dstrLines = strsplit( strScript, "\n" );
  for( int t = dynlen( dstrLines); t > 0; t--)
  {
    string strLine = dstrLines[t];
    
    // Look for comment
    int iPos = strpos( strLine, "//" );
    
    if( iPos >= 0 )
    {
      strComments = substr( strLine, iPos + 2 ) + "\n" + strComments;
      strLine = substr( strLine, 0, iPos );
      
      // when the remainder of the lines is just blanks
      // then empty the line so that it gets removed
      string strClean = strltrim( strLine, " \t" );
      if( !strlen( strClean ))
      {
        strLine ="";
      }
      
      if( strlen( strLine ) == 0 )
      {
        dynRemove( dstrLines, t );
      }
      else
      {
        dstrLines[t] = strLine;
      }
    }
  }  
  
  // Now make the script whole again
  strScript = "";
  for( int t = 1; t <= dynlen( dstrLines); t++)
  {
    strScript += dstrLines[t] + "\n";
  }  
  
  while( (iPos = strpos( strScript, "/*" )) >= 0 )
  {
    
    // we found the occurence of a /*
    // this marks the start of comments
    
    // Copy the stuff before the /*
    string strLeft = substr( strScript, 0, iPos );
    
    // Now look for the closing */
    int iPosEnd = strpos( strScript, "*/" );

    
    // when no */ is found, then something is wrong and we should
    // jump out here
    if( iPosEnd < 0 )
    {
      return strComments;
    }
    
    // Take the stuff after the */
    string strRight = substr( strScript, iPosEnd + 2 );
    
    // also, isolate the comments
    string strSubComment = substr( strScript, iPos, iPosEnd - iPos + 2 );
    
    strComments += strSubComment;
    
    // Now the script is restored
    // just take the stuff before the /* and after the */
    strScript = strLeft + strRight;
    
  }
  
  return strComments;
}


// ********************************************************
// Description:
//  Will take a source and determine the true white
// lines. WIll then count these white lines and take them
//  out of the scrit code
//  
//
// Usage:
//   int iNumWhite = ta_ScriptCode_IsolateWhiteLines( strMyScript );
//
// Returns:
//   The number of white lines
//   (and the cleaned script throuigh 'strScript' )
//
// ********************************************************

int ta_ScriptCode_IsolateWhiteLines( 
  string &strScript 
)
{
  int iNumWhite;
  
  dyn_string dstrScriptLines = strsplit( strScript, "\n" );  
  
  // Look for fully white lines
  for( int t = dynlen( dstrScriptLines ); t> 0; t--)
  {
    string strLine = dstrScriptLines[t];

    string strLineCheckCode = strltrim( strLine, " \t" );
      
    if( strlen( strLineCheckCode ) == 0) 
    {
      dynRemove(dstrScriptLines, t );
       iNumWhite++; 
    }
      
    
  }
  
  strScript = "";
  for( int t = 1; t <= dynlen( dstrScriptLines ); t++)
  {
    if( t > 1 )
    {
      strScript += "\n";
    }
    strScript += dstrScriptLines[t];
  }

  return iNumWhite;  
}

// ********************************************************
// Description:
//  Will take a piece of script code and will remove
//  all tabs, blanks and newline to get the
//  'pure' code
//  
//
// Usage:
//   strScript = ta_ScriptCode_PureCode( strScript );
//
// Returns:
//   The pure script code
//   (without blanks, tabs, newlines)
// ********************************************************

string ta_ScriptCode_PureCode( string strScript )
{
  // we remove:
  // - any blanks
  // - any tabs
  // - any newlines
  strreplace( strScript, " ", "" );
  strreplace( strScript, "\t", "" );
  strreplace( strScript, "\n", "" );
  
  return strScript;
}

// ********************************************************
// Description:
//  A piece of comments might contain blanks, "/", "\"
//  and quite often a "*" or "=" is used to construct lines.
//
//  We determine the real characters which we qualify as 
//  'pure' comments
//  
//
// Usage:
//   strPureComment = ta_ScriptCode_PureComments( strComments );
//
// Returns:
//   Returns the 'pure' comments
//   That is : the real characters
// ********************************************************

string ta_ScriptCode_PureComments( string strAllComments )
{
  string strResult;
  
  // We only want the following characters
  string strCharacters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.,-_+=[]{}@#$%^&";
  
  for( int t = 0; t < strlen( strAllComments ); t++)
  {
    char c = strAllComments[t];
    
    if( strpos( strCharacters, (string)c ) >= 0 )
    {
      strResult += c;
    }
  }
  
  return strResult;
}


// ********************************************************
// Description:
// The McCabe index (according to the Topaas document) is determined
// by the amounf of if-then's, while, switch and for statements
//
// We iterate through the source and then cound these keywords
//  
//
// Usage:
//   iMcCabe = ta_ScriptCode_DetermineMcCabe( strScript );
//
// Returns:
//   The number of 'statements' that we cound accoding to McCabe
// ********************************************************

int ta_ScriptCode_DetermineMcCabe( string strScript )
{
  int iMcCabe = 1;
  int iPos;

  strreplace( strScript, " ", "" );
  
  // What keywords are we gone look for to determine McCabe
  dyn_string dstrKeywords = makeDynString( "for(", "if(", "else", "switch(", "while(" );
  
  for( int t = 1; t <= dynlen(  dstrKeywords ); t++)
  {
    // take one of the keywords like if(, switch( and see
    // how often it appears in the code
    string strKeyword = dstrKeywords[t];
    
    while( (iPos = strpos( strScript, strKeyword)) >= 0 )
    {
      iMcCabe++;
      
      // Now mess up a character so that we don't find this keyword
      // again
      strchange( strScript, iPos, 1, 'x' );
    }
  } 
  
  return iMcCabe;
  
}

// ********************************************************
// Description:
//  Is being called from the TopAce window
//  to do an analysis of various files
//
//  
//
// Usage:
//   ta_ScriptCode_AnalyseFiles( dstrFiles, t );
//
// Returns:
//   nothing
//
// ********************************************************
void ta_ScriptCode_AnalyseFiles( 
  dyn_string &dstrModules,              // What files to analyse
  shape PROGRESS
)
{

  for( int t = 1; t<= dynlen( dstrModules ); t++)
  {
    // Get a reference to the module
    shared_ptr<ta_Module> pModule = g_ta_GlobalData.GetModule( dstrModules[t] );
      
    // Now get the files in this module
    dyn_string dstrFiles = pModule.GetFileNames();
    
    // Now iterate over all the files
    for( int i = 1; i <= dynlen( dstrFiles ); i++)
    {
      shared_ptr<ta_File> pFile = pModule.GetFile( dstrFiles[i] );

      if( pFile.ScriptFile() )
      {
        // get the absolute name
        string strAbsoluteFileName= pFile.GetAbsoluteFile();

        mapping m;
        ta_ScriptCode_AnalyseScript( strAbsoluteFileName, m );

        pFile.SetProps( m );
      }
    
      if( pFile.PanelFile() )
      {
        mapping m;
        string strPanel = pFile.GetName();
        
        // The name is something like "panels/sample/camera.xml"
        // but we have to cut of the panels/ part
        int iPos = strpos( strPanel, "panels/" );
        if( iPos == 0 )
        {
          strPanel = substr( strPanel, 7 );
        }
        
        ta_ScriptCode_AnalysePanel(strPanel, m );

        pFile.SetProps( m );
      }  
    }
    
    PROGRESS.progress( t );
  }
}
