ISTable.h

Go to the documentation of this file.
00001 //$$FILE$$
00002 //$$VERSION$$
00003 //$$DATE$$
00004 //$$LICENSE$$
00005 
00006 
00014 #ifndef ISTABLE_H
00015 #define ISTABLE_H
00016 
00017 
00018 #include <float.h>
00019 
00020 #include <string>
00021 #include <vector>
00022 #include <map>
00023 
00024 #include "mapped_vector.h"
00025 #include "mapped_vector.C"
00026 #include "GenString.h"
00027 #include "ITTable.h"
00028 #include "Serializer.h"
00029 
00030 
00031 using namespace std;
00032 
00033 
00034 typedef multimap<string, unsigned int, StringCompare> tIndex;
00035 
00036 
00056 class ISTable
00057 {
00058   public:
00059     typedef ITTable::eOrientation eOrientation;
00060 
00061     static const eOrientation eCOLUMN_WISE = ITTable::eCOLUMN_WISE;
00062     static const eOrientation eROW_WISE = ITTable::eROW_WISE;
00063 
00064     enum eTableDiff
00065     {
00066         eNONE = 0,
00067         eCASE_SENSE,
00068         eMORE_COLS,
00069         eLESS_COLS,
00070         eCOL_NAMES,
00071         eMORE_ROWS,
00072         eLESS_ROWS,
00073         eCELLS,
00074         // Used only in block diff to indicate missing table in first block
00075         eMISSING,
00076         // Used only in block diff to indicate extra table in first block
00077         eEXTRA
00078     };
00079 
00080     typedef ITTable::eSearchType eSearchType;
00081 
00082     static const eSearchType eEQUAL = ITTable::eEQUAL;
00083     static const eSearchType eLESS_THAN = ITTable::eLESS_THAN;
00084     static const eSearchType eLESS_THAN_OR_EQUAL = ITTable::eLESS_THAN_OR_EQUAL;
00085     static const eSearchType eGREATER_THAN = ITTable::eGREATER_THAN;
00086     static const eSearchType eGREATER_THAN_OR_EQUAL = ITTable::eGREATER_THAN_OR_EQUAL;
00087 
00088 #ifdef VLAD_SECOND_ITTABLE
00089     enum eSearchType
00090     {
00091         eEQUAL = 0,
00092         eLESS_THAN,
00093         eLESS_THAN_OR_EQUAL,
00094         eGREATER_THAN,
00095         eGREATER_THAN_OR_EQUAL
00096     };
00097 #endif
00098 
00099     typedef ITTable::eSearchDir eSearchDir;
00100 
00101     static const eSearchDir eFORWARD = ITTable::eFORWARD;
00102     static const eSearchDir eBACKWARD = ITTable::eBACKWARD;
00103 
00104 #ifdef VLAD_SECOND_ITTABLE
00105     enum eSearchDir
00106     {
00107         eFORWARD = 0,
00108         eBACKWARD
00109     };
00110 #endif
00111 
00112     static const unsigned char DT_STRING_VAL = 1; 
00113     static const unsigned char DT_INTEGER_VAL = 2;
00114     // static const unsigned char DT_DOUBLE_VAL = 3;
00115 
00116     // Sets string comparison case sensitive
00117     static const unsigned char CASE_SENSE = 0x00;
00118     // Sets string comparison case insensitive
00119     static const unsigned char CASE_INSENSE = 0x01;
00120     // Sets string comparison to be sensitive to whitespace
00121     static const unsigned char W_SPACE_SENSE = 0x00;
00122     // Sets string comparison to ignore repeating whitspace.  
00123     // Also ignores leading and trailing whitespace
00124     static const unsigned char W_SPACE_INSENSE = 0x02;
00125     // string datatype
00126     static const unsigned char DT_STRING  = DT_STRING_VAL  << 4;
00127     // integer datatype
00128     static const unsigned char DT_INTEGER = DT_INTEGER_VAL << 4;
00129     // VLAD FEATURE NOT WORKING double is not working, maybe integer. check it      // double datatype
00130     // static const unsigned char DT_DOUBLE  = DT_DOUBLE_VAL  << 4;
00131 
00149     ISTable(const StringCompare::eCompareType colCaseSense =
00150       StringCompare::eCASE_SENSITIVE);
00151 
00172     ISTable(eOrientation orient, const StringCompare::eCompareType
00173       colCaseSense = StringCompare::eCASE_SENSITIVE);
00174 
00192     ISTable(const string& name,
00193       const StringCompare::eCompareType colCaseSense =
00194       StringCompare::eCASE_SENSITIVE);
00195 
00216     ISTable(const string& name, eOrientation orient,
00217       const StringCompare::eCompareType colCaseSense =
00218       StringCompare::eCASE_SENSITIVE);
00219 
00236     ISTable(const ISTable& inTable);
00237 
00251     ~ISTable();
00252  
00268     ISTable& operator=(const ISTable& inTable);
00269 
00291     eTableDiff operator==(ISTable& inTable);
00292 
00306     inline const string& GetName() const; 
00307 
00321     void SetName(const string& name);
00322 
00336     inline unsigned int GetNumColumns() const;
00337 
00351     const vector<string>& GetColumnNames() const;
00352 
00367     bool IsColumnPresent(const string& colName);
00368 
00397     void AddColumn(const string& colName, const vector<string>& col =
00398       vector<string> (0));
00399 
00442     void InsertColumn(const string& colName,
00443       const string& afColName, const vector<string>& col =
00444       vector<string> (0));
00445 
00474     void FillColumn(const string& colName, const vector<string>& col);
00475 
00494     void GetColumn(vector<string>& col, const string& colName);
00495 
00527     void GetColumn(vector<string>& col, const string& colName,
00528       const unsigned int fromRowIndex, unsigned int toRowIndex);
00529 
00553     void GetColumn(vector<string>& col, const string& colName,
00554       const vector<unsigned int>& rowIndex);
00555 
00578     void RenameColumn(const string& oldColName, const string& newColName);
00579 
00596     void ClearColumn(const string& colName);
00597 
00614     void DeleteColumn(const string& colName);
00615 
00629     inline unsigned int GetNumRows() const;
00630 
00667     unsigned int AddRow(const vector<string>& row = vector<string> (0));
00668 
00715     unsigned int InsertRow(const unsigned int atRowIndex,
00716       const vector<string>& row = vector<string> (0));
00717 
00742     void FillRow(const unsigned int rowIndex, const vector<string>& row);
00743 
00783     void GetRow(vector<string>& row, const unsigned int rowIndex,
00784       const string& fromColName = String::Empty, const string& toColName =
00785       String::Empty);
00786 
00803     const vector<string>& GetRow(const unsigned int rowIndex);
00804 
00820     void ClearRow(const unsigned int rowIndex);
00821 
00840     void DeleteRow(const unsigned int rowIndex);
00841 
00859     void DeleteRows(const vector<unsigned int>& rows);
00860 
00874     inline unsigned int GetLastRowIndex();
00875 
00899     void UpdateCell(const unsigned int rowIndex, const string& colName,
00900       const string& value);
00901 
00923     const string& operator()(const unsigned int rowIndex,
00924       const string& colName) const;
00925 
00950     void SetFlags(const string& colName, const unsigned char flags);
00951 
00969     unsigned char GetDataType(const string& colName);
00970 
00999     unsigned int FindFirst(const vector<string>& targets,
01000       const vector<string>& colNames,
01001       const string& indexName = String::Empty);
01002 
01026     void Search(vector<unsigned int>& res, const string& target,
01027       const string& colName, const unsigned int fromRowIndex = 0,
01028       const eSearchDir searchDir = eFORWARD,
01029       const eSearchType searchType = eEQUAL);
01030 
01060     void Search(vector<unsigned int>& res, const vector<string>& targets,
01061       const vector<string>& colNames, const unsigned int fromRowIndex = 0,
01062       const eSearchDir searchDir = eFORWARD,
01063       const eSearchType searchType = eEQUAL,
01064       const string& indexName = String::Empty);
01065 
01097     void FindDuplicateRows(vector<pair<unsigned int, unsigned int> >& duplRows,
01098       const vector<string>& colNames, const bool keepDuplRows,
01099       const eSearchDir searchDir = eFORWARD);
01100 
01115     inline StringCompare::eCompareType GetColCaseSense() const;
01116 
01120     inline void SetModified(const bool modified);
01121 
01125     inline bool GetModified();
01126 
01130     void SetSerializer(Serializer* ser);
01131 
01135     int WriteObject(Serializer* ser, int& size);
01136 
01140     int GetObject(UInt32 index, Serializer* ser);
01141 
01145     void Read(unsigned int indexInFile);
01146 
01150     int Write();
01151 
01155     // typeOfMerge is 0 for overwrite, 1 for overlap
01156     static ISTable* Merge(ISTable& firstTable, ISTable& secondTable,
01157       unsigned int typeOfMerge = 0); 
01158 
01162     bool PrintDiff(ISTable& inTable);
01163 
01167     inline bool IndexExists(const string& indexName);
01168 
01172     void CreateIndex(const string& indexName, const vector<string>& colNames,
01173       const unsigned int unique = 0);
01174 
01178     void UpdateIndex(const string& indexName, const unsigned int rowIndex);
01179 
01183     void RebuildIndex(const string& indexName);
01184 
01188     void RebuildIndices();
01189 
01193     void DeleteIndex(const string& indexName);
01194 
01198     inline unsigned int GetNumIndices();
01199 
01203     void CreateKey(const vector<string>& colNames);
01204 
01208     void DeleteKey();
01209 
01213     static void SetUnion(const vector<unsigned int>& a,
01214       const vector<unsigned int>& b, vector<unsigned int>& ret);
01215 
01219     static void SetIntersect(const vector<unsigned int>& a,
01220       const vector<unsigned int>& b, vector<unsigned int>& ret);
01221 
01225     void GetColumnsIndices(vector<unsigned int>& colIndices,
01226       const vector<string>& colNames);
01227 
01231     void GetColumn(vector<string>& col, const string& colName,
01232       const string& indexName);
01233 
01234   private:
01235     static const unsigned int MAX_NUM_ITTABLE_ROWS = 1000;
01236 
01237     // number of digit DBL_MIN_10_EXP, letter e is not included in size
01238     static const unsigned int EXPONENT      =  4;
01239     static const unsigned int MAX_PRECISION = DBL_DIG;
01240     //???DBL_MANT_DIG;
01241     static const unsigned int MANTISSA       =  MAX_PRECISION + 2;
01242     static const unsigned int INT_LIMIT      = 11;
01243 
01244     // datatype mask
01245     static const unsigned char DT_MASK        = 15 << 4;
01246     // string comparison sensitivity mask
01247     static const unsigned char SC_MASK        = 0x01;
01248     // white space sensitivity mask
01249     static const unsigned char WS_MASK        = 0x02;
01250     static const unsigned char LAST_DT_VALUE  = 3;
01251     static const unsigned int  DEFAULT_PRECISION = MAX_PRECISION;
01252     static const unsigned char DEFAULT_OPTIONS;
01253 
01254     static const string _version;
01255 
01256     string _name;
01257 
01258     vector<ITTable> _ittables;
01259 
01260     ITTable::eOrientation _orient;
01261 
01262     StringCompare::eCompareType _colCaseSense;
01263 
01264     mapped_vector<string, StringCompare> _colNames;
01265  
01266     vector<unsigned int> _precision;
01267     vector<unsigned char> _compare_opts;
01268 
01269     vector<string> _indexNames;
01270     vector<vector<unsigned int> > _listsOfColumns;
01271     vector<unsigned int> _unique;
01272 
01273     Serializer* _ser;
01274 
01275     bool _modified; // Indicates whether table has been modified
01276 
01277     unsigned int _numRows;
01278 
01279     mutable unsigned int _rowIndexCache;
01280     mutable pair<unsigned int, unsigned int> _rowLocCache;
01281 
01282     void InsertColumn(const string& colName, const unsigned int atColIndex,
01283       const vector<string>& col = vector<string> (0));
01284     void CreateColumn(const string& colName, const unsigned int atColIndex,
01285       const vector<string>& col = vector<string> (0));
01286     int UpdateCell(const string& cell, const unsigned int colIndex,
01287       const unsigned int rowIndex);
01288     const string& operator()(const unsigned int rowIndex,
01289       const unsigned int colIndex) const;
01290     int SetFlags(const unsigned char newOpts, const unsigned int colIndex);
01291     void FindDuplicateRows(const vector<unsigned int>& colIndices,
01292       vector<pair<unsigned int, unsigned int> >& duplRows,
01293       const unsigned int keep, const eSearchDir searchDir = eFORWARD);
01294     void VerifyColumnsIndices(const vector<unsigned int>& colIndices);
01295     bool AreListsOfColumnsValid(const vector<unsigned int>& colIndices);
01296     void CreateIndex(const string& indexName,
01297       const vector<unsigned int>& colIndices, const unsigned int unique = 0);
01298     void CreateKey(const vector<unsigned int>& colIndices);
01299     unsigned int FindFirst(const vector<string>& targets,
01300       const vector<unsigned int>& colIndices,
01301       const string& indexName = String::Empty);
01302     void Search(vector<unsigned int>& res, const vector<string>& targets,
01303       const vector<unsigned int>& colIndices,
01304       const unsigned int fromRowIndex = 0,
01305       const eSearchDir searchDir = eFORWARD,
01306       const eSearchType searchType = eEQUAL,
01307       const string& indexName = String::Empty);
01308 
01309     void Init();
01310     void Clear();
01311 
01312     StringCompare::eCompareType
01313       GetCompareType(const vector<unsigned int>& colIndices);
01314 
01315     string CellValue(const unsigned int colIndex,
01316       const unsigned int rowIndex);
01317     string ConvertString(const string& value, const unsigned int colIndex);
01318     string MultiStringsValue(const vector<string>& values,
01319       const vector<unsigned int>& colIndices);
01320     string SubRowValue(const vector<unsigned int>& colIndices,
01321       const unsigned int rowIndex);
01322     string AggregateRow(const vector<unsigned int>& colIndices,
01323       const unsigned int rowIndex);
01324 
01325     inline void AppendToAndDelimit(string& to, const string& appending);
01326 
01327     void ValidateOptions(unsigned int colIndex);
01328 
01329     string CreateInternalIndexName(const unsigned int indexIndex);
01330     void UpdateIndex(const unsigned int indexIndex,
01331       const unsigned int rowIndex);
01332     void RebuildIndex(const unsigned int indexIndex);
01333     void ClearIndex(const unsigned int indexIndex);
01334     void DeleteIndex(const unsigned int indexIndex);
01335 
01336     int FindIndex(const string& indexName);
01337     int FindIndex(const vector<unsigned int>& colIndices);
01338 
01339     void UpdateIndices(const unsigned int rowIndex);
01340     void ClearIndices();
01341 
01342     bool IsColumnInIndex(const unsigned int indexIndex,
01343       const unsigned int colIndex);
01344 
01345     int FindKeyIndex();
01346 
01347     void UpdateColListOnColInsert(const unsigned int colIndex);
01348     void UpdateColListOnColDelete(const unsigned int colIndex);
01349     void UpdateColListOnCellUpdate(const unsigned int rowIndex,
01350       const unsigned int colIndex);
01351 
01352     unsigned int FindFirst(const vector<string>& targets,
01353       const vector<unsigned int>& colIndices, const unsigned int indexIndex);
01354 
01355     int WriteObjectV9(Serializer*, int& size);
01356 
01357     int GetObjectV9(UInt32 index, Serializer*);
01358     int GetObjectV8(UInt32 index, Serializer*);
01359     int GetObjectV7(UInt32 index, Serializer*);
01360     int GetObjectV6(UInt32 index, Serializer*);
01361     int GetObjectV3(UInt32 index, Serializer*);
01362     int GetObjectV2(UInt32 index, Serializer*);
01363     int GetObjectV1(UInt32 index, Serializer*);
01364     int GetObjectV1_1(UInt32 index, Serializer*);
01365 
01366     void ConvertToInt(const string& a, string& ret);
01367     void ConvertDouble(const string& a, string& ret);
01368     void ConvertToLowerNoWhiteSpace(const string& a, string& ret);
01369 
01370     void GetRowLocation(pair<unsigned int, unsigned int>& rowLoc,
01371       const unsigned int rowIndex) const;
01372     void CacheRowLocation(const unsigned int rowIndex) const;
01373 
01374     void CreateSubtables(const unsigned int numRows);
01375     void CreateSubtableColumns(const unsigned int colIndex,
01376       const vector<string>& col);
01377     void CreateColumn(const unsigned int atColIndex,
01378       const vector<string>& col);
01379 
01380     void Print(const string& indexName);
01381 
01382     unsigned int GetColumnIndex(const string& colName) const;
01383 
01384 };
01385 
01386 
01387 ostream& operator<<(ostream& out, const ISTable& isTable);
01388 
01389 
01390 inline unsigned int ISTable::GetLastRowIndex()
01391 {
01392 
01393     return(GetNumRows() - 1);
01394 
01395 }
01396 
01397 
01398 inline unsigned int ISTable::GetNumIndices()
01399 {
01400 
01401     return(_indexNames.size());
01402 
01403 }
01404 
01405 
01406 inline bool ISTable::IndexExists(const string& indexName)
01407 {
01408 
01409     int ret = FindIndex(indexName);
01410 
01411     if (ret == -1)
01412     {
01413         return(false);
01414     }
01415     else
01416     {
01417         return(true);
01418     }
01419 
01420 }
01421 
01422 
01423 inline void ISTable::AppendToAndDelimit(string& to, const string& appending)
01424 {
01425 
01426     to += appending;
01427     // VLAD HARDCODED CONST
01428     to += " ";
01429 
01430 }
01431 
01432 
01433 inline void ISTable::SetModified(const bool modified)
01434 {
01435     _modified = modified;
01436 }
01437 
01438 
01439 inline bool ISTable::GetModified()
01440 {
01441     return _modified;
01442 }
01443 
01444 
01445 inline const string& ISTable::GetName() const
01446 {
01447     return(_name);
01448 }
01449 
01450 
01451 inline unsigned int ISTable::GetNumRows() const
01452 {
01453     return(_numRows);
01454 }
01455 
01456 
01457 inline unsigned int ISTable::GetNumColumns() const
01458 {
01459     return(_colNames.size());
01460 }
01461 
01462 
01463 inline StringCompare::eCompareType ISTable::GetColCaseSense() const
01464 {
01465     return(_colCaseSense);
01466 }
01467 
01468 
01469 #endif // ISTABLE_H

Generated on Tue Feb 5 09:01:26 2008 for tables-v8.0 by  doxygen 1.5.1