// --------------------------------------------------------------------
// CAuthor.cxx
// Whatis:  Class for caching files
// Authors: Esko 'Varpu' Ilola  EIL
// History: EIL 02-JUN-2002     Created this source
// --------------------------------------------------------------------
#include    "CAuthor.hxx"
#include    "CStorage.hxx"
#include    "CTableAuth.hxx"
#include    "CTableArep.hxx"
#include    "CTableFile.hxx"
#include    "CTablePack.hxx"
#include    "CTablePackFile.hxx"
#include    "CUnUtils.hxx"
#include    "CUnLevelInfo.hxx"
#include    "CMySqlConnect.hxx"
#include    "CMySqlWhere.hxx"

// --------------------------------------------------------------------
// Getting rid of html tags
// --------------------------------------------------------------------
static const char *	__htmltag[] = {
	"<center", "</center",
	"<table", "</table",
	"<p", "</p",
	"<i", "</i",
	"<b", "</b",
	"<hr", "<br",
	"<tr", "</tr",
	"<td", "</td",
	"<font", "</font",
	"<ul", "</ul",
	"<span", "</span",
	"<strong", "</strong",
	"<html", "</html",
	"<a ", "</a",
	"<em", "</em",
	"<u", "</u",
	"<h1", "</h1",
	"<h2", "</h2",
	"<h3", "</h3",
	"<h4", "</h4",
	"<h5", "</h5",
	"<h6", "</h6",
	"<h7", "</h7",
	"<h8", "</h8",
	"<h9", "</h9",
	"<o", "</o",
	"<div", "</div",
	NULL
};

// --------------------------------------------------------------------
// local: Identifying author line using substring search
// --------------------------------------------------------------------
static const char *	__authstr[] = {
	"autor=",  "developer=", "author=", "author(s)", "author (s)",
	"[author]", "[autor]",
	"author :", "authors :", "autor :", "author:", "autor:",
	"design by:", "design by ",
	"author info:", "author info ",
	"lead designer:", "lead designer ",
	"created by:", "created by ",
	"modified by:", "modified by ",
	"entwickler:", "developer:", 
	"author(skins)", "***author ",
	"copyrighted by ", "courtesy of ", " ", "1", "2",
	NULL
};

// --------------------------------------------------------------------
// local:	These are used to identify the infamious "aka" string
// --------------------------------------------------------------------
static const char *	__akastr[] = {
	" aka.", " aka ", " a.k.a.", " a.k.a ",
	NULL
};

// --------------------------------------------------------------------
// public:  Constructors
// --------------------------------------------------------------------
CAuthor::CAuthor ( dword_t  aFileIdnt, bool aCreateRecord ) {
    CAuthor::Cleanup();
    CStorage        storage;
    CMySqlConnect   db( "quest", "", "UTCMS" );
    CMySqlWhere     w;
    CTableFile      file;
    data_file_tl    flist;
    data_file_t     fdata;
    char            temp    [1024];
    char            path    [1024];

    try {
        *temp = 0;
        *path = 0;

        w << "file_idnt=" << aFileIdnt;
        flist = file.Select( db, w );
        if  ( flist.size() > 0 ) {
            fdata = *(flist.begin());

			if	( ! CAuthor::TryCsum( fdata.file_csum ) ) {
		        ::my_make_tempdir( temp, "." );
	            ::sprintf( path, "%s/%s.%s", temp, fdata.file_name, fdata.file_suff );
	            storage.Load( path, aFileIdnt );
	            CAuthor::AuthorForFile( path );
				::unlink( path );
				::rmdir( temp );
			}
		    if  ( ( itsLongName != NULL ) && ( aCreateRecord ) ) {
				CAuthor::CreateAuthor();
		    }
        }
    }

    catch   ( ... ) {
        if  ( *path )	::unlink( path );
        if  ( *temp )	::rmdir( temp );
    }
}

// --------------------------------------------------------------------
CAuthor::CAuthor ( const char * aFileName, bool aCreateRecord ) {
    CAuthor::Cleanup();
	CAuthor::AuthorForFile( aFileName );
    if  ( ( itsLongName != NULL ) && ( aCreateRecord ) ) {
		CAuthor::CreateAuthor();
    }
}

// --------------------------------------------------------------------
CAuthor::CAuthor ( const char * aFileName, const char * aFileCsum, bool aCreateRecord ) {
    CAuthor::Cleanup();

	if	( ! CAuthor::TryCsum( aFileCsum ) ) {
		CAuthor::AuthorForFile( aFileName );
	}
    if  ( ( itsLongName != NULL ) && ( aCreateRecord ) ) {
		CAuthor::CreateAuthor();
    }
}

// --------------------------------------------------------------------
CAuthor::CAuthor ( ) {
    CAuthor::Cleanup();
}

// --------------------------------------------------------------------
// public:  Destructor
// --------------------------------------------------------------------
CAuthor::~CAuthor() {
    CAuthor::Free();
}

// --------------------------------------------------------------------
// public:  Set author names using a string
// --------------------------------------------------------------------
void    CAuthor::SetNames   ( const char * aLongName ) {
	CAuthor::Free();
	CAuthor::TryTextLine( aLongName );
}

// --------------------------------------------------------------------
// private: Cleaning woman
// --------------------------------------------------------------------
void    CAuthor::Cleanup( void ) {
    itsAuthorId     = 0;
    itsComplete     = false;
    itsFirstName    = NULL;
    itsNickName     = NULL;
    itsLastName     = NULL;
    itsLongName     = NULL;
}

// --------------------------------------------------------------------
// private: Free used memory
// --------------------------------------------------------------------
void    CAuthor::Free   ( void ) {
    if  ( itsFirstName )    delete [] itsFirstName;
    if  ( itsNickName )     delete [] itsNickName;
    if  ( itsLastName )     delete [] itsLastName;
    if  ( itsLongName )     delete [] itsLongName;
    CAuthor::Cleanup();
}

// --------------------------------------------------------------------
// private: Create author record
// --------------------------------------------------------------------
void    CAuthor::CreateAuthor   ( void ) {
    CMySqlConnect   db( "quest", "", "UTCMS" );
    CMySqlWhere     w;
    CMySqlQuote     q;
    CTableAuth      auth;
    data_auth_tl    list;
    data_auth_t     data;

    ::memset( &data, 0, sizeof( data ) );
    ::my_strfit( data.auth_name, sizeof( data.auth_name ), itsLongName );

    w << "auth_name='" << q.Quote( data.auth_name ) << "'";
    list = auth.Select( db, w );
    if  ( list.size() < 1 ) {
        data.auth_idnt = auth.NextIdnt( db );
        auth.Insert( db, &data );
        itsAuthorId = data.auth_idnt;
    }
    else {
        itsAuthorId = (*(list.begin())).auth_idnt;
    }
}

// --------------------------------------------------------------------
// private: Detect author for a file
// --------------------------------------------------------------------
bool    CAuthor::AuthorForFile  ( const char * aFileName ) {
    if 		( ! CAuthor::TryUnreal( aFileName ) ) {
		CAuthor::TryTEXT( aFileName );
	}
    return  itsComplete;
}

// --------------------------------------------------------------------
// private: Try assuming this to be an Unreal file
// --------------------------------------------------------------------
bool    CAuthor::TryUnreal      ( const char * aFileName ) {
	char *	mycopy = NULL;
    try {
        CUnUtils        u( aFileName );
        CUnLevelInfo    l( u.UnFile(), u.UnNameTable(), u.UnExportTable(), u.UnImportTable() );
        mycopy = ::my_private_strdup( l.Author() );

		CAuthor::PrepareLine( mycopy );

		if	( ::strlen( mycopy ) > 100 )	mycopy[100] = 0;

        CAuthor::TryTextLine( mycopy );
        delete [] mycopy;
    }

    catch   ( ... ) {
        if	( mycopy ) delete [] mycopy;
        CAuthor::Free();
    }

    return	itsLongName != NULL;
}

// --------------------------------------------------------------------
// private: Try assuming this to be TEXT file
// --------------------------------------------------------------------
bool    CAuthor::TryTEXT        ( const char * aFileName ) {
    FILE *  stg = NULL;
    try {
        if  ( ! CAuthor::IsTEXT( aFileName ) )  throw 0;
        FILE *  stg = ::fopen( aFileName, "r" );
        if  ( ! stg )   throw 0;

		// The idea is to rank all lines in the file and choose the one
		// that had the highest ranking

		dword_t		bestrank	= 10000;	// High enough to be past number of keywords
		dword_t		bestoffs	= 0;		// Start offset of that line
		dword_t		thisrank	= 0;
		dword_t		thisoffs	= 0;
		char		buffer		[1024];

		// So we first loop through the entire file
		thisoffs = 0;
        while   ( ::fgets( buffer, sizeof( buffer) - 1, stg ) ) {

			// Prepare the line for use
			CAuthor::PrepareLine( buffer );

			// Now, rank the line
			thisrank = CAuthor::RankLine( buffer, bestrank );

			// Did it rank better than the existing one ?
			if	( thisrank < bestrank ) {
				bestrank = thisrank;
				bestoffs = thisoffs;
				if	( bestrank == 0 )	break;	// Can't get any better
			}

			// Set up position for next line
			thisoffs = ::ftell( stg );
		}

		// Did we find any suitable line ?
		if	( bestrank < 10000 ) {

			::fseek( stg, bestoffs, SEEK_SET );
			::fgets( buffer, sizeof( buffer) - 1, stg );
			CAuthor::PrepareLine( buffer );
			CAuthor::TryTextLine( CAuthor::UseRankLine( buffer, bestrank ) );
		}

        ::fclose( stg );
        stg = NULL;     
    }

    catch   ( ... ) {
        if  ( stg ) ::fclose( stg );
        CAuthor::Free();
    }

    return  itsComplete;
}

// --------------------------------------------------------------------
// private: Test if this is a TEXT file
// --------------------------------------------------------------------
bool    CAuthor::IsTEXT ( const char * aFileName ) {
    char *  p = ::strrchr( aFileName, '.' );
    if  ( ! p )                             return false;
    if  ( ! ::my_stricmp( p, ".txt" ) )     return true;
    if  ( ! ::my_stricmp( p, ".text" ) )    return true;
    if  ( ! ::my_stricmp( p, ".readme" ) )  return true;
    if  ( ! ::my_stricmp( p, ".1st" ) )     return true;
    if  ( ! ::my_stricmp( p, ".doc" ) )     return true;
    if  ( ! ::my_stricmp( p, ".int" ) )     return true;
    if  ( ! ::my_stricmp( p, ".det" ) )     return true;
    if  ( ! ::my_stricmp( p, ".est" ) )     return true;
    if  ( ! ::my_stricmp( p, ".itt" ) )     return true;
    if  ( ! ::my_stricmp( p, ".frt" ) )     return true;
    if  ( ! ::my_stricmp( p, ".htm" ) )     return true;
    if  ( ! ::my_stricmp( p, ".html" ) )    return true;
    if  ( ! ::my_stricmp( p, ".uhtm" ) )	return true;
    return false;
}

// --------------------------------------------------------------------
// private: Trying out a text line
// --------------------------------------------------------------------
bool    CAuthor::TryTextLine    ( const char * aS ) {
    CMySqlConnect   db( "quest", "", "UTCMS" );
    CMySqlWhere     w;
    CMySqlQuote     q;
    CTableArep      arep;
    data_arep_tl    list;
    data_arep_t     data;

	// We always get rid of previous crap
	CAuthor::Free();

	// Too short to be a name
	if	( ::strlen( aS ) < 3 )	return false;


	// Maybe it is listed by the author string itself ?
	w = "";
	w << "arep_find='" << q.Quote( aS ) << "'";
	list = arep.Select( db, w );
	if	( list.size() > 0 ) {
		data = *(list.begin());
		itsFirstName	= ::my_private_strdup( data.arep_fnam );
		itsNickName		= ::my_private_strdup( data.arep_nnam );
		itsLastName		= ::my_private_strdup( data.arep_lnam );
		CAuthor::MakeLongName();
		return	true;
	}

	// Only one word long enough ?
	const char *	space	= ::strchr( aS, ' ' );
	if		( ( space == NULL ) && ( ::strlen( aS ) > 2 ) ) {
		itsLongName	= ::my_private_strdup( aS );
		return	true;
	}

	// Two words ?
	if	( space != NULL ) {
		if	( ::strchr( space + 1, ' ' ) == NULL ) {
			dword_t	fnl	= (dword_t)( space - aS );

			itsFirstName	= new char [fnl + 1];
			::memcpy( itsFirstName, aS, fnl );
			itsFirstName[fnl] = 0;
			itsLastName		= ::my_private_strdup( space + 1 );
			itsNickName 	= ::my_private_strdup( "player1" );

			::my_strfix( itsFirstName );
			::my_strfix( itsLastName );

			CAuthor::MakeLongName();

			// Just check that the name is not already there
			w = "";
			w << "arep_find='" << q.Quote( itsLongName ) << "'";
			list = arep.Select( db, w );
			if	( list.size() > 0 ) {
				data = *(list.begin());
				CAuthor::Free();
				itsFirstName	= ::my_private_strdup( data.arep_fnam );
				itsNickName		= ::my_private_strdup( data.arep_nnam );
				itsLastName		= ::my_private_strdup( data.arep_lnam );
				CAuthor::MakeLongName();
			}
			return	true;
		}
	}

	// Maybe it is a standard 3-part name having the nick in between ?
	const char *	quot	= "'`\"";
	const char *	sn		= NULL;
	const char *	en		= NULL;
	int				qi;

	for	( qi = 0; quot[qi] != 0; qi++ ) {
		sn = ::strchr	( aS, quot[qi] );
		en = ::strrchr	( aS, quot[qi] );
		if	( ( sn ) && ( en ) ) {
			if	( en > sn ) {
				if	( sn > aS ) {
					if	( ( en[1] == ' ' ) && ( *(sn - 1) == ' ' ) ) {
						break;
					}
				}
			}
		}
		sn = en = NULL;
	}

	if	( sn != NULL ) {
		dword_t	fnl	= (dword_t)( sn - aS ) - 1;
		dword_t	nnl = (dword_t)( en - sn ) - 1;

		itsFirstName	= new char [fnl + 1];
		::memcpy( itsFirstName, aS, fnl );
		itsFirstName[fnl] = 0;

		itsNickName		= new char [nnl + 1];
		if	( nnl )	::memcpy( itsNickName, sn + 1, nnl );
		itsNickName[nnl] = 0;

		itsLastName		= ::my_private_strdup( en + 2 );

		::my_strfix( itsFirstName );
		::my_strfix( itsNickName );
		::my_strfix( itsLastName );

		CAuthor::MakeLongName();

		w = "";
		w << "arep_find='" << q.Quote( itsLongName ) << "'";
		list = arep.Select( db, w );
		if	( list.size() > 0 ) {
			data = *(list.begin());
			CAuthor::Free();
			itsFirstName	= ::my_private_strdup( data.arep_fnam );
			itsNickName		= ::my_private_strdup( data.arep_nnam );
			itsLastName		= ::my_private_strdup( data.arep_lnam );
			CAuthor::MakeLongName();
		}

		return	true;
	}

	// Does it prolly contain the 'aka' ???
	char * locopy = ::my_private_strdup( aS );
	::my_strlower( locopy );

	for	( qi = 0; __akastr[qi]; qi++ ) {
		sn = ::strstr( locopy, __akastr[qi] );
		if	( sn ) {
			if	( sn > locopy ) {
				if	( ::strlen( sn ) > ::strlen( __akastr[qi] ) ) {
					break;
				}
			}
		}
	}

	if	( sn ) {
		char *	nnp	= (char *)sn;
		char *	fnp = locopy;
		char *	lnp;

		::strcpy( locopy, aS );
		nnp[0] = 0;
		nnp = nnp + ::strlen( __akastr[qi] );

		::my_strfix( nnp );
		::my_strfix( fnp );
		
		lnp = ::strrchr( fnp, ' ' );
		if	( lnp ) {
			lnp[0] = 0;
			lnp++;
		}
		else {
			lnp = ::strrchr( nnp, ' ' );
			if	( lnp ) {
				fnp = nnp;
				lnp[0] = 0;
				lnp++;
				nnp = locopy;
			}
			else {
				lnp = (char *)"Author";

			}
		}

		itsFirstName	= ::my_private_strdup( fnp );
		itsNickName		= ::my_private_strdup( nnp );
		itsLastName		= ::my_private_strdup( lnp );
		
		delete [] locopy;

		::my_strfix( itsFirstName );
		::my_strfix( itsNickName );
		::my_strfix( itsLastName );

		CAuthor::MakeLongName();

		w = "";
		w << "arep_find='" << q.Quote( itsLongName ) << "'";
		list = arep.Select( db, w );
		if	( list.size() > 0 ) {
			data = *(list.begin());
			CAuthor::Free();
			itsFirstName	= ::my_private_strdup( data.arep_fnam );
			itsNickName		= ::my_private_strdup( data.arep_nnam );
			itsLastName		= ::my_private_strdup( data.arep_lnam );
			CAuthor::MakeLongName();
		}

		return true;
	}

	delete [] locopy;

	// None of above - we have just make the copy and do it manually later
	itsLongName = ::my_private_strdup( aS );
	return	true;
}

// --------------------------------------------------------------------
// private:     Make up the long name
// --------------------------------------------------------------------
void    CAuthor::MakeLongName ( void ) {

    itsLongName = new char [  ::strlen( itsFirstName )
                            + ::strlen( itsNickName )
                            + ::strlen( itsLastName )
                            + 5 ];
	
	::strcpy( itsLongName, itsFirstName );

	if	( *itsNickName ) {
		if	( *itsLongName )	::strcat( itsLongName, " " );
		::strcat( itsLongName, "'" );
		::strcat( itsLongName, itsNickName );
		::strcat( itsLongName, "'" );
	}

	if	( *itsLastName ) {
		if	( *itsLongName )	::strcat( itsLongName, " " );
		::strcat( itsLongName, itsLastName );		
	}

    itsComplete = true;
}

// --------------------------------------------------------------------
// private:     Prepare a line for author detection
// --------------------------------------------------------------------
void    CAuthor::PrepareLine ( char *	aB ) {
	char *	lcbuff;
	char *	p;
	dword_t	pix;
	char *	e;
	dword_t	eix;
	int		hix;

	// Replace control characters with spaces
	for	( p = aB; *p; p++ ) {
		if	( ( *p > 0 ) && ( *p < ' ' ) )	*p = ' ';
	}

	// Get rid of leading/trailing white space
	::my_strfix( aB );

	// Do we still have something left ?
	if	( *aB == 0 )	return;

	// Get rid of embedded double spaces
	p = aB;
	while	( *p ) {
		if	( ( p[0] == ' ' ) && ( p[1] == ' ' ) ) {
			::strcpy( p, p + 1 );
		}
		else {
			p++;
		}
	}

	// Make a lowercase copy of the buffer for easier search for removal words (HTML)
	lcbuff = ::my_private_strdup( aB );
	::my_strlower( lcbuff );

	// Replace the HTML &quot; with the quote character
	p = ::strstr( lcbuff, "&quot;" );
	while	( p ) {
		pix = (dword_t)( p - lcbuff );
		::strcpy( lcbuff + pix, lcbuff + pix + 5 );
		::strcpy( aB + pix, aB + pix + 5 );
		lcbuff[pix] = '"';
		aB[pix] = '"';
		p = ::strstr( lcbuff, "&quot;" );
	}

	// Replace the HTML &nbsp; with the space
	p = ::strstr( lcbuff, "&nbsp;" );
	while	( p ) {
		pix = (dword_t)( p - lcbuff );
		::strcpy( lcbuff + pix, lcbuff + pix + 5 );
		::strcpy( aB + pix, aB + pix + 5 );
		lcbuff[pix] = '"';
		aB[pix] = '"';
		p = ::strstr( lcbuff, "&nbsp;" );
	}

	// Replace HTML tags with spaces
	hix = 0;
	while	( __htmltag[hix] ) {
		p = ::strstr( lcbuff, __htmltag[hix] );
		if	( p ) {
			pix = (dword_t)( p - lcbuff );
			e = ::strchr( p, '>' );
			if	( e ) {
				eix = (dword_t)( e - lcbuff );
				::strcpy( lcbuff + pix, lcbuff + eix );
				::strcpy( aB + pix, aB + eix );
				lcbuff[pix] = ' ';
				aB[pix] = ' ';
			}
			else {
				lcbuff[pix] = 0;
				aB[pix] = 0;
			}
		}
		else {
			hix++;
		}
	}
	delete [] lcbuff;

	// Get rid of leading/trailing white space
	::my_strfix( aB );

	// Get rid of embedded double spaces
	p = aB;
	while	( *p ) {
		if	( ( p[0] == ' ' ) && ( p[1] == ' ' ) ) {
			::strcpy( p, p + 1 );
		}
		else {
			p++;
		}
	}
}

// --------------------------------------------------------------------
// private:		Rank a line for authorness
// --------------------------------------------------------------------
dword_t	CAuthor::RankLine( const char * aLine, dword_t aBestRank ) {
	char *	lcbuff	= ::my_private_strdup( aLine );
	dword_t	rank	= 0;
	char *	p;

	// Try to locate the author keyword on the line
	::my_strlower( lcbuff );
	while	( ( __authstr[rank] ) && ( rank < aBestRank ) ) {
		p = ::strstr( lcbuff, __authstr[rank] );
		if	( p ) {
			p = p + ::strlen( __authstr[rank] );
			while	( ( *p == ' ' ) || ( *p == ':' ) )	p++;
			if	( ::strlen( p ) > 2 )	break;
		}
		rank++;
	}
	delete [] lcbuff;
	return	__authstr[rank] ? rank : aBestRank;
}

// --------------------------------------------------------------------
// private:		Use a ranked line
// --------------------------------------------------------------------
char *	CAuthor::UseRankLine( char * aLine, dword_t aRank ) {
	char *	lcbuff	= ::my_private_strdup( aLine );
	::my_strlower( lcbuff );

	char *	p	= ::strstr( lcbuff, __authstr[aRank] );
	dword_t	pix	= (dword_t)( p - lcbuff );

	p = aLine + pix + ::strlen( __authstr[aRank] );
	while	( ( *p == ' ' ) || ( *p == ':' ) )	p++;

	if	( ::strlen( p ) > 100 )	p[100] = 0;

	delete [] lcbuff;
	return	p;
}

// --------------------------------------------------------------------
// private:		Try to locate a checksum
// --------------------------------------------------------------------
bool	CAuthor::TryCsum	( const char * aFileCsum ) {
    CMySqlConnect   db( "quest", "", "UTCMS" );
    CMySqlWhere     w;
    CMySqlQuote     q;
    CTableArep      arep;
    data_arep_tl    list;
    data_arep_t     data;

	// If there is a checksum we have good chance that the record is already there
	if	( aFileCsum ) {
		w << "arep_find='" << aFileCsum << "'";
		list = arep.Select( db, w );
		if	( list.size() > 0 ) {
			data = *(list.begin());
			itsFirstName	= ::my_private_strdup( data.arep_fnam );
			itsNickName		= ::my_private_strdup( data.arep_nnam );
			itsLastName		= ::my_private_strdup( data.arep_lnam );
			CAuthor::MakeLongName();
			return	true;
		}
	}

	return	false;
}

// --------------------------------------------------------------------
// EOF: CAuthor.cxx
// --------------------------------------------------------------------
