CifSchemaMap.h

Go to the documentation of this file.
00001 /*$$FILE$$*/
00002 /*$$VERSION$$*/
00003 /*$$DATE$$*/
00004 /*$$LICENSE$$*/
00005 
00006 
00014 #ifndef CIFSCHEMAMAP_H
00015 #define CIFSCHEMAMAP_H
00016 
00017 
00018 #include <string>
00019 
00020 #include "CifFile.h"
00021 
00022 
00023 typedef struct _attribInfo
00024 {
00025     int iIndex;
00026     int iNull;
00027     int iWidth;
00028     int iPrecision;
00029     unsigned int iMaxWidth; 
00030     string dType;
00031     int iTypeCode;
00032 } ATTRIBINFO;
00033 
00034 
00035 typedef struct
00036 {
00037     string attribName;
00038     string dataType;
00039     string indexFlag;
00040     string nullFlag;
00041     string width;
00042     string precision;
00043     string populated;   // Not taken from the file, but constructed afterwards
00044 } AttrInfo;
00045 
00046 
00061 class SchemaMapping
00062 {
00063 
00064   public:
00065     // TODO - All these constants can be made
00066     // private, except _TYPE_CODE_TEXT that is used outside of this
00067     // class. Try to fix that and move all these constants to private
00068     static const int _TYPE_CODE_INT        = 1;
00069     static const int _TYPE_CODE_FLOAT      = 2;
00070     static const int _TYPE_CODE_STRING     = 3;
00071     static const int _TYPE_CODE_TEXT       = 4;
00072     static const int _TYPE_CODE_DATETIME   = 5;
00073     static const int _TYPE_CODE_BIGINT     = 6;
00074 
00075     static const int _MAX_LINE_LENGTH = 255;
00076 
00104     SchemaMapping(const string& schemaFile = String::Empty,
00105       const string& schemaFileOdb = String::Empty, bool verbose = false);
00106 
00120     virtual ~SchemaMapping();
00121 
00141     void SetReviseSchemaMode(bool mode = true);
00142 
00157     bool GetReviseSchemaMode();
00158 
00174     void ReviseSchemaMap(const string& revisedSchemaFile);
00175 
00176 
00191     void updateSchemaMapDetails(SchemaMapping& revSchMap);
00192 
00209     void CreateTables(CifFile& cifFile, const string& blockName);
00210 
00225     void GetTableNames(vector<string>& tableNames);
00226 
00227     ISTable* CreateTableInfo();
00228     ISTable* CreateColumnInfo();
00229 
00230     void GetAttributeNames(vector<string>& attributes,
00231       const string& tableName);
00232     void GetAttributesInfo(vector<AttrInfo>& attrInfo,
00233       const string& tableName);
00234     void GetMappedAttributesInfo(vector<vector<string> >& mappedAttrInfo,
00235       const string& tableName);
00236 
00237     void GetMappedConditions(vector<vector<string> >& mappedConditions,
00238       const string& tableName);
00239 
00240     void GetTableNameAbbrev(string &abbrev, string target);
00241     void GetAttributeNameAbbrev(string &abbrev, string targetTable,
00242       string targetAttribute);
00243 
00244     void getSchemaMapDetails(const string& tableName,
00245       const string& attribName, string& dataType,
00246       string& indexFlag, string& nullFlag,
00247       string& width, string& precision, string& populated);
00248 
00249     void UpdateAttributeDef(const string& tableName, const string& columnName,
00250       int type, int iWidth, int newWidth);
00251 
00252     void GetAttributeInfo(vector<ATTRIBINFO>& aI, ISTable *tIn);
00253     void GetAllAttributeInfo(vector<ATTRIBINFO>& aI, string tableName);
00254 
00255     static bool AreAttributesMandatory(const vector<string>& row,
00256       const vector<ATTRIBINFO>& aI);
00257     static bool IsTablePopulated(const vector<AttrInfo>& attrInfo);
00258     void GetMasterIndexAttribName(string& masterIndexAttribName);
00259     static int CheckIndexAttributes(const vector<string>& row,
00260       const vector<ATTRIBINFO>& aI);
00261 
00262   private:
00263     bool _verbose;
00264 
00265     string _schemaFile;    // Schema definition file name (mmCIF)
00266     string _schemaFileOdb; // Schema definition file name (odb)
00267 
00268     CifFile* _fobjS;
00269 
00270     ISTable* _tableSchema;
00271     ISTable* _attribSchema;
00272     ISTable* _schemaMap;
00273     ISTable* _mapConditions;
00274     ISTable* _tableAbbrev;    
00275     ISTable* _attributeAbbrev;
00276 
00277     bool _reviseSchemaMode;
00278     int  _compatibilityMode;
00279 
00280     void _AssignAttribIndices(void);
00281     void GetGroupNames(vector<string>& groupNames);
00282 
00283     void Clear();
00284 };
00285 
00286 
00296 class Db
00297 {
00298   public:
00299     static const string DB_DEFAULT_NAME;
00300 
00301     SchemaMapping& _schemaMapping;
00302 
00303     Db(SchemaMapping& schemaMapping, const string& dbName = DB_DEFAULT_NAME);
00304     virtual ~Db();
00305 
00306     void SetUseOnlyPopulated(bool mode=true);
00307     bool GetUseOnlyPopulated();
00308 
00309     void SetAppendFlag(const bool appendFlag);
00310     bool GetAppendFlag();
00311 
00312     void SetFieldSeparator(const string& fieldSeparator);
00313     void SetRowSeparator(const string& rowSeparator);
00314 
00315     string GetCommandTerm();
00316 
00317     string GetFieldSeparator();
00318     string GetRowSeparator();
00319 
00320     virtual void GetStart(string& start);
00321 
00322     virtual void WriteSchemaStart(ostream& io);
00323     void WriteDeleteTable(ostream& io, const string& table,
00324       const string& where, const string& what);
00325 
00326     virtual void DropTableSql(ostream& io, const string& tableNameDb);
00327 
00328     virtual const string& GetExec();
00329     virtual const string& GetExecOption();
00330     virtual const string& GetUserOption();
00331     virtual const string& GetPassOption();
00332 
00333     virtual const string& GetEnvDbUser();
00334     virtual const string& GetEnvDbPass();
00335 
00336     virtual const string& GetConnect();
00337     virtual const string& GetTerminate();
00338     virtual const string& GetDbCommand();
00339 
00340     virtual void WriteLoad(ostream& io);
00341 
00342     const string& GetDataLoadingFileName();
00343 
00344     virtual void WriteLoadingStart(ostream& io);
00345     virtual void WriteLoadingEnd(ostream& io);
00346     virtual void WriteLoadingTable(ostream& io, const string& tableName,
00347       const string& path);
00348 
00349     virtual void WritePrint(ostream& io, const string& tableNameDb);
00350 
00351     virtual void GetChar(string& dType, const unsigned int width);
00352     virtual void GetFloat(string& dType);
00353     virtual void GetText(string& dType, const unsigned int width);
00354     virtual void GetDate(string& dType);
00355     virtual void WriteNull(ostream& io, const int iNull,
00356       const unsigned int curr, const unsigned int attSize);
00357     virtual void WriteTableIndex(ostream& io, const string& tableNameDb,
00358       const vector<string>& indexList);
00359 
00360     const string& GetBcpStringDelimiter();
00361     virtual void WriteBcpDoubleQuotes(ostream& io);
00362 
00363     virtual void WriteNewLine(ostream& io, bool special = false);
00364 
00365     bool IsFirstTextNewLineSpecial();
00366 
00367     virtual void ConvertDate(string& dbDate, const string& cifDate);
00368     virtual void ConvertTimestamp(string& dbTimestamp,
00369       const string& cifTimestamp);
00370 
00371   protected:
00372     bool _useOnlyPopulated;
00373 
00374     bool _appendFlag;
00375 
00376     // Field and row separators for compact output (eg. BCP)
00377     string _fieldSeparator; 
00378     string _rowSeparator;   
00379 
00380     string _cmdTerm;            // SQL command terminator.
00381 
00382     // Used in assigning permissions when exporting schema in SQL. 
00383     string _dbName; // Target database name 
00384 
00385     string _exec;
00386     string _execOption;
00387     string _userOption;
00388     string _passOption;
00389 
00390     string _connect;
00391     string _terminate;
00392     string _dbCommand;
00393     string _envDbUser;
00394     string _envDbPass;
00395 
00396     string _dataLoadingFileName;
00397 
00398     string _bcpStringDelimiter;
00399 
00400     bool _firstTextNewLineSpecial;
00401 
00402   private:
00403     static const string _SCRIPT_LOADING_FILE;
00404 
00405 };
00406 
00407 
00415 class DbOracle : public Db
00416 {
00417 
00418   // Oracle 8.1.6 SQLLDR
00419 
00420   public:
00421     DbOracle(SchemaMapping& schemaMapping,
00422       const string& dbName = DB_DEFAULT_NAME);
00423     ~DbOracle();
00424 
00425     void WriteSchemaStart(ostream& io);
00426 
00427     void WriteLoadingStart(ostream& io);
00428     void WriteLoadingTable(ostream& io, const string& tableName,
00429       const string& path);
00430 
00431     void GetDate(string& dType);
00432     void GetText(string& dType, const unsigned int width);
00433 
00434     void WriteNull(ostream& io, const int iNull,
00435       const unsigned int curr, const unsigned int attSize);
00436     void WriteTableIndex(ostream& io, const string& tableNameDb,
00437       const vector<string>& indexList);
00438 
00439     void WriteNewLine(ostream& io, bool special = false);
00440 };
00441 
00442 
00450 class DbDb2 : public Db
00451 {
00452 
00453   public:
00454     DbDb2(SchemaMapping& schemaMapping,
00455       const string& dbName = DB_DEFAULT_NAME);
00456     ~DbDb2();
00457 
00458 
00459     void GetStart(string& start);
00460 
00461     void WriteSchemaStart(ostream& io);
00462 
00463     void WriteLoadingStart(ostream& io);
00464     void WriteLoadingEnd(ostream& io);
00465     void WriteLoadingTable(ostream& io, const string& tableName,
00466       const string& path);
00467 
00468     void GetFloat(string& dType);
00469     void GetDate(string& dType);
00470     void GetText(string& dType, const unsigned int width);
00471     void WriteNull(ostream& io, const int iNull,
00472       const unsigned int curr, const unsigned int attSize);
00473 
00474     void WriteTableIndex(ostream& io, const string& tableNameDb,
00475       const vector<string>& indexList);
00476 
00477     void WriteBcpDoubleQuotes(ostream& io);
00478 };
00479 
00480 
00488 class DbMySql : public Db
00489 {
00490 
00491   public:
00492     DbMySql(SchemaMapping& schemaMapping,
00493       const string& dbName = DB_DEFAULT_NAME);
00494     ~DbMySql();
00495 
00496     void GetStart(string& start);
00497 
00498     void DropTableSql(ostream& io, const string& tableNameDb);
00499 
00500     void WriteLoad(ostream& io);
00501     void WriteLoadingStart(ostream& io);
00502     void WriteLoadingTable(ostream& io, const string& tableName,
00503       const string& path);
00504 
00505     void WriteTableIndex(ostream& io, const string& tableNameDb,
00506       const vector<string>& indexList);
00507 
00508     void WriteNull(ostream& io, const int iNull,
00509       const unsigned int curr, const unsigned int attSize);
00510 
00511     void WriteNewLine(ostream& io, bool special = false);
00512 
00513   private:
00514     static const string _SQL_LOADING_FILE;
00515 
00516 };
00517 
00518 
00526 class DbSybase : public Db
00527 {
00528   public:
00529     DbSybase(SchemaMapping& schemaMapping,
00530       const string& dbName = DB_DEFAULT_NAME);
00531     ~DbSybase();
00532 
00533     void GetStart(string& start);
00534 
00535     void WriteLoadingStart(ostream& io);
00536     void WriteLoadingTable(ostream& io, const string& tableName,
00537       const string& path);
00538 
00539     void WritePrint(ostream& io, const string& tableNameDb);
00540     void WriteNull(ostream& io, const int iNull,
00541       const unsigned int curr, const unsigned int attSize);
00542 
00543     void WriteTableIndex(ostream& io, const string& tableNameDb,
00544       const vector<string>& indexList);
00545 
00546 #ifdef VLAD_DATE_OBSOLETE
00547     void ConvertDate(string& dbDate, const string& cifDate);
00548 #endif
00549 };
00550 
00551 
00561 class DbOutput
00562 {
00563 
00564   public:
00565     Db& _db;
00566 
00567     DbOutput(Db& db);
00568     virtual ~DbOutput();
00569 
00570     virtual void WriteSchema(const string& path = String::Empty);
00571     virtual void WriteDataLoadingScripts(const string& path = String::Empty);
00572     virtual void WriteData(Block& block, const string& path = String::Empty);
00573 
00574     void SetInputFile(const string& inpFile);
00575 
00576     const string& GetCommandScriptName();
00577 
00578   protected:
00579     static const string _DATA_LOADING_SCRIPT;
00580 
00581     string _SCHEMA_FILE;
00582 
00583     string _INPUT_FILE;
00584 
00585     string _stringDelimiter;
00586     vector<char> _specialChars;
00587 
00588     string _dateDelimiter;
00589     vector<char> _specialDateChars;
00590 
00591     string _itemSeparator; 
00592     string _rowSeparator; 
00593 
00594     void WriteDbExec(ostream& io, const string& fileName,
00595       const unsigned int indentLevel = 0);
00596     void WriteDbExecOnly(ostream& io, const string& fileName,
00597       const unsigned int indentLevel = 1);
00598 
00599     void WriteHeader(ostream& io);
00600 
00601     void _FormatNumericData(ostream &io, const string &cs);
00602     void _FormatStringData(ostream &io, const string &cs,
00603       unsigned int maxWidth);
00604     void _FormatTextData(ostream &io, const string &cs);
00605     void _FormatDateData(ostream &io, const string &cs, unsigned int maxWidth);
00606 
00607     void _FormatData(ostream &io, const string &cs,
00608       ATTRIBINFO& attribInfo, const bool noRevise);
00609 
00610     bool IsSpecialChar(const char& character);
00611     bool IsSpecialDateChar(const char& character);
00612 
00613     virtual void WriteEmptyNumeric(ostream& io);
00614     virtual void WriteEmptyString(ostream& io);
00615     virtual void WriteSpecialChar(ostream& io, const char& specChar);
00616 
00617     virtual void WriteEmptyDate(ostream& io);
00618     virtual void WriteSpecialDateChar(ostream& io, const char& specDateChar);
00619 
00620     virtual bool IsFirstTextNewLineSpecial();
00621     virtual void WriteNewLine(ostream& io, bool special = false);
00622 
00623     virtual void GetTableStart(string& tableStart, const string& tableName);
00624     virtual void GetTableEnd(string& tableEnd);
00625     const string& GetItemSeparator();
00626     const string& GetRowSeparator();
00627 
00628     virtual void _WriteTable(ostream& io, ISTable* tIn, bool noRevise = false);
00629     void GetMasterIndexAttribValue(string& masterIndexAttribValue,
00630       Block& block, const string& masterIndexAttribName,
00631       const vector<string>& tableNames);
00632 
00633   private:
00634     static void _FormatStringDataSql(ostream &io, const string &cs,
00635       unsigned int maxWidth);
00636 
00637 };
00638 
00639 
00648 class BcpOutput : public DbOutput
00649 {
00650   public:
00651     BcpOutput(Db& db);
00652     virtual ~BcpOutput();
00653 
00654     void WriteDataLoadingScripts(const string& path = String::Empty);
00655     void WriteData(Block& block, const string& path = String::Empty);
00656 
00657   private:
00658     static const string _DATA_DELETE_FILE;
00659 
00660     void WriteDataLoadingScript(const string& path);
00661     void WriteDataLoadingFile(const string& path = String::Empty);
00662 
00663     void WriteDelete(ostream& io);
00664 
00665     void WriteEmptyString(ostream& io);
00666 
00667     void WriteSpecialDateChar(ostream& io, const char& specDateChar);
00668 };
00669  
00670 
00679 class SqlOutput : public DbOutput
00680 {
00681 
00682   public:
00683     SqlOutput(Db& db);
00684     virtual ~SqlOutput();
00685 
00686     void WriteSchema(const string& path = String::Empty);
00687     void WriteDataLoadingScripts(const string& path = String::Empty);
00688     void WriteData(Block& block, const string& path = String::Empty);
00689 
00690   protected:
00691     void WriteEmptyNumeric(ostream& io);
00692     bool IsFirstTextNewLineSpecial();
00693     void WriteNewLine(ostream& io, bool special = false);
00694     void GetTableStart(string& tableStart, const string& tableName);
00695     void GetTableEnd(string& tableEnd);
00696 
00697   private:
00698     static const unsigned int _MAX_SQL_NAME_LENGTH = 60;
00699     static const string _SCHEMA_LOADING_SCRIPT;
00700     static const string _SCHEMA_DELETE_FILE;
00701     static const string _DATA_FILE;
00702 
00703     void WriteSqlScriptSchemaInfo(ostream& io);
00704     void WriteDataLoadingScript(const string& path);
00705 
00706     void CreateTableSql(ostream& io, const string& tableName,
00707       const vector<AttrInfo>& attrInfo);
00708 
00709     void WriteAuxTables(ostream& io, ISTable* infoP,
00710       const vector<string>& tableNames);
00711 
00712     void WriteEmptyString(ostream& io);
00713     void WriteEmptyDate(ostream& io);
00714 };
00715 
00716 
00725 class XmlOutput : public DbOutput
00726 {
00727   public:
00728     XmlOutput(Db& db);
00729     virtual ~XmlOutput();
00730 
00731     void WriteSchema(const string& path = String::Empty);
00732     void WriteData(Block& block, const string& path = String::Empty);
00733 
00734   private:
00735     static const int _MAX_NAME_LENGTH = 80;
00736     string _NS_DEFAULT;
00737 
00738     void _CreateComplexTypeXML(ostream &io, const string& tableName);
00739     void _SetNamespaceDefaultXML(const string& name); 
00740     void _GetDefaultNamespaceXML(ostream &io); 
00741     void _WriteTable(ostream& io, ISTable* tIn, bool noRevise = false);
00742 
00743     void _GetAttributeNameAbbrevXML(string &abbrev,
00744       const string& targetAttribute);
00745     void _QualifyNameXML(ostream &io,  const string& name); 
00746 
00747     void _GetTableNameAbbrevXML(string &abbrev, string target);
00748 
00749     void WriteSpecialChar(ostream& io, const char& specChar);
00750 };
00751 
00752 
00768 class DbLoader
00769 {
00770   public:
00771     enum eConvOpt
00772     {
00773         // Generate only data loading files
00774         eDATA_ONLY = 0,
00775 
00776         // Generate both data loading files and loading shell scripts
00777         eDATA_WITH_SCRIPTS,
00778 
00779         // Generate only loading shell scripts
00780         eSCRIPTS_ONLY
00781     };
00782 
00803     DbLoader(SchemaMapping& schemaMapping, DbOutput& dbOutput,
00804       bool verbose = false, const string& workDir = String::Empty);
00805 
00819     virtual ~DbLoader();
00820 
00835     void SetWorkDir(const string& workDir);
00836 
00855     void AsciiFileToDb(const string& asciiFile, const eConvOpt convOpt);
00856 
00876     void SerFileToDb(const string& serFile, const eConvOpt convOpt);
00877 
00896     void FileObjToDb(CifFile& cifFile, const eConvOpt convOpt);
00897 
00901     static unsigned int GetTableColumnIndex(const ISTable& isTable,
00902       const string& colName);
00903 
00904 #ifdef DB_HASH_ID
00905     void SetHashMode(int mode);
00906 #endif
00907 
00908   private:
00909     static const string _LOG_FILE;
00910 
00911     string _workDir; // Working directory for all generated files.
00912     string _INPUT_FILE;
00913 
00914 #ifdef DB_HASH_ID
00915     string _HASH_ID;
00916     int _hashMode;
00917 #endif
00918 
00919     // Block name of loadable data in mmCIF format
00920     string _blockName;
00921 
00922     bool _verbose;
00923 
00924     ofstream _log;
00925 
00926     SchemaMapping& _schemaMapping;
00927     DbOutput& _dbOutput;
00928 
00929     void _LoadBlock(Block& rBlock, Block& wBlock);
00930     bool _Search(vector<vector<string> >& dMap, const unsigned int iAttr,
00931       ISTable* isTableP, const string& blockName,
00932       const vector<string>& cNameMap, const string& sItem,
00933       const string& sCnd, const string& sFnct);
00934  
00935     void _DoFunc(vector<string>& s, const vector<string>& r,
00936       const string& sFnct);
00937 
00938     void _OpenLog(const string& logName);
00939 
00940     int _GetMapColumnIndex(const vector<string>& cNameMap, const string& vOf);
00941     void _GetMapColumnValue(string& p, vector<vector<string> >& dMap,
00942       int iCol, int irow);
00943     int _GetMapColumnLength(vector<vector<string> >& dMap,   int iCol);
00944 
00945 #ifdef VLAD_DATE_OBSOLETE
00946     void _dformat_1(const char *date, char *odate);
00947     void _dformat_2(const char *date, char *odate);
00948     void _dformat_3(const char *date, char *odate, int shortFlag);
00949     void _dformat_4(const char *date, char *odate);
00950     void _dformat_5(const char *date, char *odate);
00951 #endif
00952 
00953     void _ReorderName(string& res, char *string, int mode);
00954     void _ToUpperString(string& aString);
00955     void _StripString(string& aString, int mode);
00956 
00957     int _CheckNullRow(const vector<string>& row, const vector<ATTRIBINFO>& aI);
00958 
00959 #ifdef DB_HASH_ID
00960     long long pdbIdHash(const string& id);
00961 #endif
00962 
00963     static void CleanString(string& aString);
00964 
00965     void Clear();
00966 };
00967 
00968 
00969 #endif

Generated on Fri Feb 8 10:24:42 2008 for db-loader-v4.0 by  doxygen 1.5.1