From 702ce223052d37a75d90d1e9a7c23cce604c68e7 Mon Sep 17 00:00:00 2001 From: Felippe de Meirelles Motta Date: Sat, 21 Jun 2008 00:01:57 +0000 Subject: SSAHA is a software tool for very fast matching and alignment of DNA sequences. It stands for Sequence Search and Alignment by Hashing Algorithm. It achieves its fast search speed by converting sequence information into a `hash table' data structure, which can then be searched very rapidly for matches. WWW: http://www.sanger.ac.uk/Software/analysis/SSAHA/ PR: ports/124525 Submitted by: Fernan Aguero Approved by: gabor (mentor, implicit) --- biology/Makefile | 1 + biology/ssaha/Makefile | 33 ++++ biology/ssaha/distinfo | 3 + biology/ssaha/files/patch-GlobalDefinitions.cpp | 11 ++ biology/ssaha/files/patch-GlobalDefinitions.h | 82 ++++++++++ biology/ssaha/files/patch-HashTable.cpp | 20 +++ biology/ssaha/files/patch-HashTableGeneric.cpp | 48 ++++++ biology/ssaha/files/patch-HashTablePacked.cpp | 29 ++++ biology/ssaha/files/patch-HashTablePacked.h | 16 ++ biology/ssaha/files/patch-HashTableTranslated.cpp | 77 +++++++++ biology/ssaha/files/patch-MatchAligner.cpp | 94 +++++++++++ biology/ssaha/files/patch-MatchAligner.h | 33 ++++ biology/ssaha/files/patch-MatchStore.h | 54 +++++++ biology/ssaha/files/patch-MatchStoreGapped.h | 25 +++ biology/ssaha/files/patch-MatchStoreUngapped.h | 13 ++ biology/ssaha/files/patch-QueryManager.cpp | 54 +++++++ biology/ssaha/files/patch-QueryManager.h | 19 +++ biology/ssaha/files/patch-README | 13 ++ biology/ssaha/files/patch-SSAHAMain.cpp | 48 ++++++ biology/ssaha/files/patch-SSAHAMain.h | 19 +++ biology/ssaha/files/patch-SequenceEncoder.cpp | 75 +++++++++ biology/ssaha/files/patch-SequenceReader.cpp | 20 +++ biology/ssaha/files/patch-SequenceReader.h | 11 ++ biology/ssaha/files/patch-SequenceReaderFasta.cpp | 32 ++++ biology/ssaha/files/patch-SequenceReaderFasta.h | 11 ++ biology/ssaha/files/patch-SequenceReaderFilter.h | 20 +++ biology/ssaha/files/patch-SequenceReaderLocal.cpp | 28 ++++ biology/ssaha/files/patch-SequenceReaderMulti.cpp | 30 ++++ biology/ssaha/files/patch-SequenceReaderMulti.h | 13 ++ biology/ssaha/files/patch-SequenceReaderString.h | 26 +++ biology/ssaha/files/patch-makefile | 15 ++ .../ssaha/files/patch-testHashTableNoOverlap.cpp | 178 +++++++++++++++++++++ biology/ssaha/files/patch-testQueryManager.cpp | 20 +++ .../ssaha/files/patch-testSequenceReaderFasta.cpp | 171 ++++++++++++++++++++ biology/ssaha/pkg-descr | 11 ++ biology/ssaha/pkg-plist | 9 ++ 36 files changed, 1362 insertions(+) create mode 100644 biology/ssaha/Makefile create mode 100644 biology/ssaha/distinfo create mode 100644 biology/ssaha/files/patch-GlobalDefinitions.cpp create mode 100644 biology/ssaha/files/patch-GlobalDefinitions.h create mode 100644 biology/ssaha/files/patch-HashTable.cpp create mode 100644 biology/ssaha/files/patch-HashTableGeneric.cpp create mode 100644 biology/ssaha/files/patch-HashTablePacked.cpp create mode 100644 biology/ssaha/files/patch-HashTablePacked.h create mode 100644 biology/ssaha/files/patch-HashTableTranslated.cpp create mode 100644 biology/ssaha/files/patch-MatchAligner.cpp create mode 100644 biology/ssaha/files/patch-MatchAligner.h create mode 100644 biology/ssaha/files/patch-MatchStore.h create mode 100644 biology/ssaha/files/patch-MatchStoreGapped.h create mode 100644 biology/ssaha/files/patch-MatchStoreUngapped.h create mode 100644 biology/ssaha/files/patch-QueryManager.cpp create mode 100644 biology/ssaha/files/patch-QueryManager.h create mode 100644 biology/ssaha/files/patch-README create mode 100644 biology/ssaha/files/patch-SSAHAMain.cpp create mode 100644 biology/ssaha/files/patch-SSAHAMain.h create mode 100644 biology/ssaha/files/patch-SequenceEncoder.cpp create mode 100644 biology/ssaha/files/patch-SequenceReader.cpp create mode 100644 biology/ssaha/files/patch-SequenceReader.h create mode 100644 biology/ssaha/files/patch-SequenceReaderFasta.cpp create mode 100644 biology/ssaha/files/patch-SequenceReaderFasta.h create mode 100644 biology/ssaha/files/patch-SequenceReaderFilter.h create mode 100644 biology/ssaha/files/patch-SequenceReaderLocal.cpp create mode 100644 biology/ssaha/files/patch-SequenceReaderMulti.cpp create mode 100644 biology/ssaha/files/patch-SequenceReaderMulti.h create mode 100644 biology/ssaha/files/patch-SequenceReaderString.h create mode 100644 biology/ssaha/files/patch-makefile create mode 100644 biology/ssaha/files/patch-testHashTableNoOverlap.cpp create mode 100644 biology/ssaha/files/patch-testQueryManager.cpp create mode 100644 biology/ssaha/files/patch-testSequenceReaderFasta.cpp create mode 100644 biology/ssaha/pkg-descr create mode 100644 biology/ssaha/pkg-plist (limited to 'biology') diff --git a/biology/Makefile b/biology/Makefile index 2f591aa84d19..cd0e5740f286 100644 --- a/biology/Makefile +++ b/biology/Makefile @@ -81,6 +81,7 @@ SUBDIR += seaview SUBDIR += seqio SUBDIR += sim4 + SUBDIR += ssaha SUBDIR += tRNAscan-SE SUBDIR += t_coffee SUBDIR += tinker diff --git a/biology/ssaha/Makefile b/biology/ssaha/Makefile new file mode 100644 index 000000000000..8855ac6ec0fa --- /dev/null +++ b/biology/ssaha/Makefile @@ -0,0 +1,33 @@ +# New ports collection makefile for: ssaha +# Date created: 12.Jun.2008 +# Whom: Fernan Aguero +# +# $FreeBSD$ +# + +PORTNAME= ssaha +PORTVERSION= 3.1c +CATEGORIES= biology +MASTER_SITES= http://www.sanger.ac.uk/Software/analysis/${PORTNAME:U}/ +DISTNAME= ${PORTNAME}_v${PORTVERSION:S/.//} + +MAINTAINER= fernan@iib.unsam.edu.ar +COMMENT= Very fast matching and alignment of DNA sequences + +NO_WRKSUBDIR= yes + +USE_GMAKE= yes +MAKEFILE= makefile +BUILD_WRKSRC= ${WRKDIR}/Binary +ALL_TARGET= ssaha + +DATA_FILES= test.fasta test_extract.fasta test_filter.fail \ + test_filter.fastq test_protein.fasta README + +do-install: + ${INSTALL_PROGRAM} ${BUILD_WRKSRC}/ssaha ${PREFIX}/bin + @${MKDIR} ${DATADIR} + ${INSTALL_SCRIPT} ${BUILD_WRKSRC}/testSSAHA.csh ${DATADIR}/ + ${INSTALL_DATA} ${DATA_FILES:S,^,${WRKSRC}/,} ${DATADIR}/ + +.include diff --git a/biology/ssaha/distinfo b/biology/ssaha/distinfo new file mode 100644 index 000000000000..1d978ee099cd --- /dev/null +++ b/biology/ssaha/distinfo @@ -0,0 +1,3 @@ +MD5 (ssaha_v31c.tar.gz) = 0260a0cce67c5c465f5b54a45b3f65ae +SHA256 (ssaha_v31c.tar.gz) = 63fa38ccd2725db6ba10881f8cc94d899afd2eba9c2f6436223d9284c5abfced +SIZE (ssaha_v31c.tar.gz) = 251510 diff --git a/biology/ssaha/files/patch-GlobalDefinitions.cpp b/biology/ssaha/files/patch-GlobalDefinitions.cpp new file mode 100644 index 000000000000..dbf6ed8a038f --- /dev/null +++ b/biology/ssaha/files/patch-GlobalDefinitions.cpp @@ -0,0 +1,11 @@ +--- ./Global/GlobalDefinitions.cpp.orig 2004-03-01 13:51:28.000000000 -0300 ++++ ./Global/GlobalDefinitions.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -119,7 +119,7 @@ + MakeIntoWord::MakeIntoWord( int bitsPerSymbol, const char* tt ): + bitsPerSymbol_( bitsPerSymbol ), tt_( tt ) + { +- for ( unsigned int i(0) ; i < (1<(tolower(tt[i]))),i ) ); + map_.insert( make_pair( (static_cast(toupper(tt[i]))),i ) ); diff --git a/biology/ssaha/files/patch-GlobalDefinitions.h b/biology/ssaha/files/patch-GlobalDefinitions.h new file mode 100644 index 000000000000..fb7851574cc7 --- /dev/null +++ b/biology/ssaha/files/patch-GlobalDefinitions.h @@ -0,0 +1,82 @@ +--- ./Global/GlobalDefinitions.h.orig 2004-03-01 13:51:28.000000000 -0300 ++++ ./Global/GlobalDefinitions.h 2008-06-12 15:39:31.000000000 -0300 +@@ -726,8 +726,6 @@ + template class Allocator + { + public: +- typedef T MyType; +- + Allocator( T** ptr, const string& name, ostream& monStream=cerr ) : + ptr_(ptr), name_(name), size_(0), isAllocated_(false), + monStream_( monStream ) +@@ -788,32 +786,34 @@ + + virtual void allocate( unsigned long size ) + { +- size_=size; +- (*ptr_)=new T[size_]; +- isAllocated_=true; ++ Allocator::size_=size; ++ (*Allocator::ptr_)=new T[Allocator::size_]; ++ Allocator::isAllocated_=true; + } + + virtual void allocateAndZero( unsigned long size ) + { + const unsigned char zero(0); + allocate(size); +- memset( (void*)(*ptr_), zero, size_*sizeof(MyType) ); ++ memset( (void*)(*Allocator::ptr_), zero, Allocator::size_*sizeof(T) ); + } + virtual void load( unsigned long size ) + { + allocate(size); +- loadFromFile( name_, (char*)(*ptr_), size_*sizeof(MyType), monStream_ ); ++ loadFromFile( Allocator::name_, (char*)(*Allocator::ptr_), ++ Allocator::size_*sizeof(T), Allocator::monStream_ ); + } + virtual void save() + { +- saveToFile( name_, (char*)(*ptr_), size_*sizeof(MyType), monStream_ ); ++ saveToFile( Allocator::name_, (char*)(*Allocator::ptr_), ++ Allocator::size_*sizeof(T), Allocator::monStream_ ); + } + + virtual void deallocate() + { +- if (!isAllocated_) return; +- delete [] (*ptr_); +- isAllocated_=false; ++ if (!Allocator::isAllocated_) return; ++ delete [] (*Allocator::ptr_); ++ Allocator::isAllocated_=false; + } + protected: + }; +@@ -892,7 +892,7 @@ + if (isAllocated_) return; + mode_ = MemoryMapper::createMap; + size_ = size; +- (*ptr_) = (T*) linkToMap(mode_,name_,size_*sizeof(MyType)); ++ (*ptr_) = (T*) linkToMap(mode_,name_,size_*sizeof(T)); + isAllocated_ = true; + } + +@@ -908,7 +908,7 @@ + if (isAllocated_) return; + mode_ = MemoryMapper::readMap; + size_ = size; +- (*ptr_) = (T*) linkToMap(mode_,name_,size_*sizeof(MyType)); ++ (*ptr_) = (T*) linkToMap(mode_,name_,size_*sizeof(T)); + isAllocated_ = true; + } + +@@ -921,7 +921,7 @@ + virtual void deallocate() + { + if (!isAllocated_) return; +- if(munmap((caddr_t)(*ptr_), size_*sizeof(MyType)) < 0) ++ if(munmap((caddr_t)(*ptr_), size_*sizeof(T)) < 0) + perror("unmap error"); // don't throw - called from destructor! + close(fileDesc_); + if (mode_.deleteFileOnExit) shm_unlink(name_.c_str()); diff --git a/biology/ssaha/files/patch-HashTable.cpp b/biology/ssaha/files/patch-HashTable.cpp new file mode 100644 index 000000000000..c57e3745c639 --- /dev/null +++ b/biology/ssaha/files/patch-HashTable.cpp @@ -0,0 +1,20 @@ +--- ./HashTable/HashTable.cpp.orig 2004-03-01 13:51:28.000000000 -0300 ++++ ./HashTable/HashTable.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -265,7 +265,7 @@ + void HashTable::countWords( SequenceAdapter& thisSeq ) + { + +- for ( int j(0) ; j < thisSeq.size() ; ++ j ) ++ for ( WordSequence::size_type j(0) ; j < thisSeq.size() ; ++ j ) + { + // only count words that have not been flagged + pWordPositionInHitList_[(thisSeq[j]&(~gCursedWord))] +@@ -284,7 +284,7 @@ + // NB We stop at the last but one element of the + // sequence (as the last isn't a full word) + +- for ( int j(0) ; j < thisSeq.size() ; ++ j ) ++ for ( WordSequence::size_type j(0) ; j < thisSeq.size() ; ++ j ) + { + thisWord = thisSeq[j]; + // only hash words that have not been flagged diff --git a/biology/ssaha/files/patch-HashTableGeneric.cpp b/biology/ssaha/files/patch-HashTableGeneric.cpp new file mode 100644 index 000000000000..0d1ffb441303 --- /dev/null +++ b/biology/ssaha/files/patch-HashTableGeneric.cpp @@ -0,0 +1,48 @@ +--- ./HashTable/HashTableGeneric.cpp.orig 2005-06-21 05:48:27.000000000 -0300 ++++ ./HashTable/HashTableGeneric.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -63,18 +63,18 @@ + ( ostream& monitoringStream, + const string& name, + Allocator& arrayAllocator ) : +- isInitialized_( false ), +- monitoringStream_( monitoringStream ), + name_( name ), +- bitsPerSymbol_( gBaseBits ), // default: may be overwritten in subclass ctor ++ isInitialized_( false ), + maxNumHits_( defaultMaxNumHits ), ++ bitsPerSymbol_( gBaseBits ), // default: may be overwritten in subclass ctor + hitListFormat_( gNotSpecified ), ++ monitoringStream_( monitoringStream ), + pArrayAllocator_ + ( arrayAllocator.clone(&pWordPositionInHitList_, + name+(string)".head", + monitoringStream_) ), +- pSequenceSizes_(NULL), +- pNameReader_(NULL) ++ pNameReader_(NULL), ++ pSequenceSizes_(NULL) + { + monitoringStream_ << "constructing HashTableGeneric\n"; + if (name_=="") +@@ -290,7 +290,7 @@ + WordSequence thisSeq; + + // NB sequences are numbered 1...n not 0...n-1 +- for ( unsigned int i(1); i <= numSeqs ; i++ ) ++ for ( int i(1); i <= numSeqs ; i++ ) + { + if( sequenceReader.getNextSequence( thisSeq, wordLength_) == -1 ) + { +@@ -973,10 +973,10 @@ + + SequenceAdapterWithOverlap::SequenceAdapterWithOverlap + ( int bitsPerSymbol, int wordLength, int stepLength ) : ++SequenceAdapter(), + bitsPerSymbol_( bitsPerSymbol ), + wordLength_( wordLength ), +-stepLength_( stepLength ), +-SequenceAdapter() ++stepLength_( stepLength ) + { + + maskLeft_ = new Word[ wordLength_ ]; diff --git a/biology/ssaha/files/patch-HashTablePacked.cpp b/biology/ssaha/files/patch-HashTablePacked.cpp new file mode 100644 index 000000000000..6efc6d24bd77 --- /dev/null +++ b/biology/ssaha/files/patch-HashTablePacked.cpp @@ -0,0 +1,29 @@ +--- ./HashTable/HashTablePacked.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./HashTable/HashTablePacked.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -132,7 +132,7 @@ + void HashTablePacked::countWords( SequenceAdapter& thisSeq ) + { + +- for ( int j(0) ; j < thisSeq.size() ; ++ j ) ++ for ( WordSequence::size_type j(0) ; j < thisSeq.size() ; ++ j ) + { + // only count words that have not been flagged + pWordPositionInHitList_[(thisSeq[j]&(~gCursedWord))] +@@ -151,7 +151,7 @@ + // NB We stop at the last but one element of the + // sequence (as the last isn't a full word) + +- for ( int j(0) ; j < thisSeq.size() ; ++ j ) ++ for ( WordSequence::size_type j(0) ; j < thisSeq.size() ; ++ j ) + { + thisWord = thisSeq[j]; + +@@ -200,7 +200,7 @@ + for ( WordSequence::const_iterator thisWord(seq.begin()); + thisWord != last ; ++thisWord ) + { +- int oldSize(packedHits.size()); // %%%%%% ++// int oldSize(packedHits.size()); // %%%%%% + matchWordDeluxe( *thisWord, packedHits, baseOffset ); + // cout << printResidue(*thisWord, wordLength_) << " " + // << packedHits.size()-oldSize; diff --git a/biology/ssaha/files/patch-HashTablePacked.h b/biology/ssaha/files/patch-HashTablePacked.h new file mode 100644 index 000000000000..c00860d1bbfd --- /dev/null +++ b/biology/ssaha/files/patch-HashTablePacked.h @@ -0,0 +1,16 @@ +--- ./HashTable/HashTablePacked.h.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./HashTable/HashTablePacked.h 2008-06-12 15:39:31.000000000 -0300 +@@ -236,11 +236,11 @@ + HashTableView + (monitoringStream, name, hitListAllocator, arrayAllocator), + wordNum_(0), ++ numRepeats_(0), ++ substituteThreshold_(0), + pMatchSequence_(&HashTablePacked::matchSequenceStandard), + pMatchWord_(&HashTablePacked::matchWordStandard), + pGenerateSubstitutes_(&generateSubstitutesDNA), +- numRepeats_(0), +- substituteThreshold_(0), + sorter_(4,(sizeof(PositionPacked)*8)/4) + { + hitListFormat_ = g32BitPacked; diff --git a/biology/ssaha/files/patch-HashTableTranslated.cpp b/biology/ssaha/files/patch-HashTableTranslated.cpp new file mode 100644 index 000000000000..e4ab9af82423 --- /dev/null +++ b/biology/ssaha/files/patch-HashTableTranslated.cpp @@ -0,0 +1,77 @@ +--- ./HashTable/HashTableTranslated.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./HashTable/HashTableTranslated.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -50,9 +50,9 @@ + Allocator& hitListAllocator, + Allocator& arrayAllocator + ): +- queryFrame_(0), + HashTablePacked( monitoringStream, name, +- hitListAllocator, arrayAllocator ) ++ hitListAllocator, arrayAllocator ), ++ queryFrame_(0) + { + hitListFormat_ = g32BitPackedProtein; + bitsPerSymbol_ = gResidueBits; +@@ -135,12 +135,12 @@ + Allocator& hitListAllocator, + Allocator& arrayAllocator + ): ++ HashTablePacked( monitoringStream, name, ++ hitListAllocator, arrayAllocator ), ++ codonEncoder_(5), + queryFrame_(0), + queryMult_(1), +- codonEncoder_(5), +- pMatchSequence_( &HashTablePackedProtein::matchSequenceProtein ), +- HashTablePacked( monitoringStream, name, +- hitListAllocator, arrayAllocator ) ++ pMatchSequence_( &HashTablePackedProtein::matchSequenceProtein ) + { + hitListFormat_ = g32BitPackedProtein; + bitsPerSymbol_ = gResidueBits; +@@ -249,14 +249,14 @@ + Allocator& hitListAllocator, + Allocator& arrayAllocator + ) : ++ HashTableGeneric( monitoringStream, name, arrayAllocator ), + hashFwd_( monitoringStream, name+(string)"_fwd", + hitListAllocator, arrayAllocator ), + hashRev_( monitoringStream, name+(string)"_rev", + hitListAllocator, arrayAllocator ), + pHash_(&hashFwd_), + codonEncoder_(5), +- pMatchSequence_( &HashTableTranslated::matchSequenceProtein ), +- HashTableGeneric( monitoringStream, name, arrayAllocator ) ++ pMatchSequence_( &HashTableTranslated::matchSequenceProtein ) + { + bitsPerSymbol_=gResidueBits; + hitListFormat_ = gTranslated; +@@ -344,8 +344,8 @@ + + // Change encoding mode + SequenceReaderModeFlagReplace mode('X'); +- assert(ttCodon['X']==ttProtein['X']); +- assert(ttCodon['X']!=nv); ++ assert(ttCodon[(int)'X']==ttProtein[(int)'X']); ++ assert(ttCodon[(int)'X']!=nv); + codonEncoder_.changeMode( &mode ); + + +@@ -418,7 +418,7 @@ + seq->link( translatedSeq ); + + // NB sequences are numbered 1...n not 0...n-1 +- for ( unsigned int i(1); i <= numSeqs ; i++ ) ++ for ( int i(1); i <= numSeqs ; i++ ) + { + // cout << "hashing sequence " << i << endl; + if( sequenceReader.getNextSequence( thisSeq, eDNAWordSizeForHashing) == -1 ) +@@ -520,7 +520,7 @@ + int HashTableTranslated::getHitTypeSize( void ) const + { + assert(1==0); +- return NULL; ++ return 0; + } + void HashTableTranslated::allocateHitList( unsigned long size ) + { diff --git a/biology/ssaha/files/patch-MatchAligner.cpp b/biology/ssaha/files/patch-MatchAligner.cpp new file mode 100644 index 000000000000..e7420b1ddec4 --- /dev/null +++ b/biology/ssaha/files/patch-MatchAligner.cpp @@ -0,0 +1,94 @@ +--- ./QueryManager/MatchAligner.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./QueryManager/MatchAligner.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -53,13 +53,13 @@ + bool reverseQueryCoords, + bool doAlignment, + ostream& outputStream ): ++ pAlign_(pAlign), ++ outputStream_(outputStream), + querySource_(querySource), + subjectSource_(subjectSource), +- pAlign_(pAlign), + // numCols_(numCols), + reverseQueryCoords_(reverseQueryCoords), +- doAlignment_(doAlignment), +- outputStream_(outputStream) ++ doAlignment_(doAlignment) + { + } // ~MatchTaskAlign::MatchTaskAlign + +@@ -194,10 +194,10 @@ + int bandExtension, + ScoreTable* pTable, + ostream& outputStream ) : ++ outputStream_(outputStream), + numCols_(numCols), + bandExtension_(bandExtension), +- pTable_(pTable), +- outputStream_(outputStream) ++ pTable_(pTable) + { + pBufSeq1_= new char [numCols+1]; + pBufSeq2_= new char [numCols+1]; +@@ -563,17 +563,17 @@ + // cout << int ( ttDNA[ *(pChar++ ] << 4 + // | (ttDNA[ *(pChar++) ] << 2) + // | ttDNA[ *(pChar++) ] ) << endl; +- if ( (ttDNA[ *(pChar) ]==nv) +- || (ttDNA[ *(pChar+1) ]==nv) +- || (ttDNA[ *(pChar+2) ]==nv) ) ++ if ( (ttDNA[ (int) *(pChar) ]==nv) ++ || (ttDNA[ (int) *(pChar+1) ]==nv) ++ || (ttDNA[ (int) *(pChar+2) ]==nv) ) + { + *i='X'; + } // ~if + else + { +- *i= gResidueNames[ ttCodon[ ttDNA[ *(pChar) ] << 4 +- | ttDNA[ *(pChar+1) ] << 2 +- | ttDNA[ *(pChar+2) ] ] ]; ++ *i= gResidueNames[ ttCodon[ ttDNA[ (int) *(pChar) ] << 4 ++ | ttDNA[ (int) *(pChar+1) ] << 2 ++ | ttDNA[ (int) *(pChar+2) ] ] ]; + } // ~else + + } // ~for i +@@ -983,7 +983,7 @@ + + void print( PathMatrix& p ) + { +- for (int i(0); i< p.front().size(); i++) ++ for (size_t i(0); i< p.front().size(); i++) + { + for (vector >::iterator j(p.begin()); + j!=p.end();++j) +@@ -1249,16 +1249,16 @@ + bandWidth_(p2Size-p1Size+1), + bandLength_(p1Size+1), + colSize_(p2Size-p1Size+1+(2*bandExtension_)), +- fillCell_(), + finalFrame1_(p1FinalFrame), + finalFrame2_(p2FinalFrame), + numFrames1_((p1Trans[1]==NULL)?1:gNumReadingFrames), + numFrames2_((p2Trans[1]==NULL)?1:gNumReadingFrames), ++ fillCell_(), ++ getScore_(scoreTable), + v1_(colSize_, veryBadScore3D ), + v2_(colSize_, veryBadScore3D ), + pLast_(&v1_), +- pCurrent_(&v2_), +- getScore_(scoreTable) ++ pCurrent_(&v2_) + { + + p1_[0] = p1Trans[0]; +@@ -1287,7 +1287,7 @@ + { + + // ScoreType lastScore, prevFrameScore1, prevFrameScore2; +- int i,j,k,l; ++ int i,j; + + matrix.resize(bandLength_, vector(colSize_) ); + diff --git a/biology/ssaha/files/patch-MatchAligner.h b/biology/ssaha/files/patch-MatchAligner.h new file mode 100644 index 000000000000..1eafad595c91 --- /dev/null +++ b/biology/ssaha/files/patch-MatchAligner.h @@ -0,0 +1,33 @@ +--- ./QueryManager/MatchAligner.h.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./QueryManager/MatchAligner.h 2008-06-12 15:39:31.000000000 -0300 +@@ -315,13 +315,13 @@ + + static char getCodon( const char* pChar ) + { +- return ( ( (ttDNA[ *pChar ] ==nv) +- || (ttDNA[ *(pChar+1) ] ==nv) +- || (ttDNA[ *(pChar+2) ] ==nv) ) ++ return ( ( (ttDNA[ (int) *pChar ] ==nv) ++ || (ttDNA[ (int) *(pChar+1) ] ==nv) ++ || (ttDNA[ (int) *(pChar+2) ] ==nv) ) + ? 'X' +- : gResidueNames[ ttCodon[ ttDNA[ *(pChar) ] << 4 +- | ttDNA[ *(pChar+1) ] << 2 +- | ttDNA[ *(pChar+2) ] ] ] ); ++ : gResidueNames[ ttCodon[ ttDNA[ (int) *(pChar) ] << 4 ++ | ttDNA[ (int) *(pChar+1) ] << 2 ++ | ttDNA[ (int) *(pChar+2) ] ] ] ); + } // ~getCodon + + +@@ -519,8 +519,8 @@ + : public vector > + { + public: +- typedef pair >::iterator, +- vector::iterator> CellIterator; ++ typedef pair >::iterator, ++ typename vector::iterator> CellIterator; + + template ScoreType fillIn( MATRIX_FILLER& doMatrix ) + { diff --git a/biology/ssaha/files/patch-MatchStore.h b/biology/ssaha/files/patch-MatchStore.h new file mode 100644 index 000000000000..076639489869 --- /dev/null +++ b/biology/ssaha/files/patch-MatchStore.h @@ -0,0 +1,54 @@ +--- ./QueryManager/MatchStore.h.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./QueryManager/MatchStore.h 2008-06-12 15:39:31.000000000 -0300 +@@ -93,20 +93,20 @@ + virtual SequenceNumber getSubjectNum( void ) const + { return subjectNum_; } + // virtual inline string getSubjectName( void ) const; +- virtual inline const char* getSubjectName( void ) const; ++ virtual const char* getSubjectName( void ) const; + virtual SequenceOffset getSubjectStart( void ) const + { return subjectStart_; } + virtual SequenceOffset getSubjectEnd( void ) const + { return subjectEnd_; } + +- virtual inline SequenceNumber getQueryNum( void ) const; +- virtual inline string getQueryName( void ) const; ++ virtual SequenceNumber getQueryNum( void ) const; ++ virtual string getQueryName( void ) const; + virtual SequenceOffset getQueryStart( void ) const + { return queryStart_; } + virtual SequenceOffset getQueryEnd( void ) const + { return queryEnd_; } + +- virtual inline int getQuerySize( void ) const; ++ virtual int getQuerySize( void ) const; + virtual int getNumBases(void ) const + { return numBases_; } + virtual bool isQueryForward( void ) const +@@ -127,7 +127,6 @@ + SequenceOffset subjectEnd, + bool isQueryForward, + bool isSubjectForward ): +- myStore_( myStore ), + subjectNum_( subjectNum ), + numBases_( numBases ), + queryStart_( queryStart ), +@@ -135,7 +134,8 @@ + subjectStart_( subjectStart ), + subjectEnd_( subjectEnd ), + isQueryForward_( isQueryForward ), +- isSubjectForward_( isSubjectForward ){} ++ isSubjectForward_( isSubjectForward ), ++ myStore_( myStore ) {} + + SequenceNumber subjectNum_; + SequenceOffset numBases_; +@@ -444,7 +444,7 @@ + ( + unsigned int maxToSort = 1<<30, + double partialThreshold = 0.0 +- ) : sorter_(), maxToSort_(maxToSort), partialThreshold_(partialThreshold) {} ++ ) : maxToSort_(maxToSort), partialThreshold_(partialThreshold), sorter_() {} + // TBD should be virtual??? + void operator()(MatchStore& store ) + { diff --git a/biology/ssaha/files/patch-MatchStoreGapped.h b/biology/ssaha/files/patch-MatchStoreGapped.h new file mode 100644 index 000000000000..7252f06a6ad8 --- /dev/null +++ b/biology/ssaha/files/patch-MatchStoreGapped.h @@ -0,0 +1,25 @@ +--- ./QueryManager/MatchStoreGapped.h.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./QueryManager/MatchStoreGapped.h 2008-06-12 15:39:31.000000000 -0300 +@@ -57,7 +57,8 @@ + { + public: + MatchAlgorithm( int numRepeats ) : +- numRepeats_( numRepeats ), sortNeeded_(true) {} ++ sortNeeded_(true), numRepeats_( numRepeats ) {} ++ virtual ~MatchAlgorithm() {}; + void operator() + ( WordSequence& querySeq, + MatchAdder& addMatch, +@@ -82,10 +83,10 @@ + public: + MatchAlgorithmGapped + ( int maxGap, int maxInsert, int minToProcess, int numRepeats ): ++ MatchAlgorithm( numRepeats ), + maxGap_( maxGap ), + maxInsert_( maxInsert ), +- minToProcess_( minToProcess ), +- MatchAlgorithm( numRepeats ) ++ minToProcess_( minToProcess ) + {} + + virtual void generateMatches diff --git a/biology/ssaha/files/patch-MatchStoreUngapped.h b/biology/ssaha/files/patch-MatchStoreUngapped.h new file mode 100644 index 000000000000..f71815fa673c --- /dev/null +++ b/biology/ssaha/files/patch-MatchStoreUngapped.h @@ -0,0 +1,13 @@ +--- ./QueryManager/MatchStoreUngapped.h.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./QueryManager/MatchStoreUngapped.h 2008-06-12 15:39:31.000000000 -0300 +@@ -60,8 +60,8 @@ + public: + MatchAlgorithmUngapped + ( int minToProcess, int numRepeats ): +- minToProcess_( minToProcess ), +- MatchAlgorithm( numRepeats ) ++ MatchAlgorithm( numRepeats ), ++ minToProcess_( minToProcess ) + {} + + virtual void generateMatches diff --git a/biology/ssaha/files/patch-QueryManager.cpp b/biology/ssaha/files/patch-QueryManager.cpp new file mode 100644 index 000000000000..bb80ea676c1d --- /dev/null +++ b/biology/ssaha/files/patch-QueryManager.cpp @@ -0,0 +1,54 @@ +--- ./QueryManager/QueryManager.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./QueryManager/QueryManager.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -164,8 +164,8 @@ + // MatchPolicy member function definitions + + MatchPolicy::MatchPolicy( HashTableGeneric& subjectTable ) : +- subjectTable_( subjectTable ), +- queryWordLength_( subjectTable.getWordLength() ) ++ queryWordLength_( subjectTable.getWordLength() ), ++ subjectTable_( subjectTable ) + {} + + +@@ -203,8 +203,8 @@ + + MatchPolicyProteinProtein::MatchPolicyProteinProtein + ( HashTablePackedProtein& subjectTable ) : +-subjectTable_( subjectTable ), +-MatchPolicy( subjectTable ) ++MatchPolicy( subjectTable ), ++subjectTable_( subjectTable ) + { + + subjectTable_.setQueryProtein(); +@@ -386,9 +386,9 @@ + QueryManager::QueryManager + ( SequenceReader& querySeqs, + HashTableGeneric& subjectSeqs, ostream& monitoringStream ) : +- queryReader_( querySeqs ), + subjectTable_( subjectSeqs ), +- monitoringStream_( monitoringStream ) ++ monitoringStream_( monitoringStream ), ++ queryReader_( querySeqs ) + { + monitoringStream_ << "constructing QueryManager\n"; + +@@ -518,7 +518,7 @@ + + task( store ); + +- if ( queryReader_.getLastSequenceNumber() == queryEnd ) break; ++ if ( (int)queryReader_.getLastSequenceNumber() == queryEnd ) break; + + // clear the query sequence ready to read in next query + querySeqFwd.clear(); +@@ -530,7 +530,7 @@ + } // ~while + while ( numBasesInLast != -1 ); + +- if ( ( queryReader_.getLastSequenceNumber() < queryEnd ) ++ if ( ( (int)queryReader_.getLastSequenceNumber() < queryEnd ) + && ( queryEnd != - 1 ) ) + { + monitoringStream_ << "Info: requested final sequence (" << queryEnd diff --git a/biology/ssaha/files/patch-QueryManager.h b/biology/ssaha/files/patch-QueryManager.h new file mode 100644 index 000000000000..fef80e7b3865 --- /dev/null +++ b/biology/ssaha/files/patch-QueryManager.h @@ -0,0 +1,19 @@ +--- ./QueryManager/QueryManager.h.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./QueryManager/QueryManager.h 2008-06-12 15:39:31.000000000 -0300 +@@ -145,6 +145,7 @@ + // readFrame_(0), + pStore_(NULL), + subjectTable_( subjectTable ) {} ++ virtual ~MatchAdder() {}; + virtual void operator()( SequenceNumber subjectNum, + SequenceOffset numBases, + SequenceOffset queryStart, +@@ -172,7 +173,7 @@ + { + public: + MatchAdderImp( HashTableGeneric& subjectTable ) : +- lastSubjectNum_(0), name_(), MatchAdder( subjectTable ) {} ++ MatchAdder( subjectTable ), name_(), lastSubjectNum_(0) {} + virtual void operator()( SequenceNumber subjectNum, + SequenceOffset numBases, + SequenceOffset queryStart, diff --git a/biology/ssaha/files/patch-README b/biology/ssaha/files/patch-README new file mode 100644 index 000000000000..e1a8461090b7 --- /dev/null +++ b/biology/ssaha/files/patch-README @@ -0,0 +1,13 @@ +--- ./Binary/README.orig 2004-08-25 11:35:58.000000000 -0300 ++++ ./Binary/README 2008-06-12 15:39:31.000000000 -0300 +@@ -34,6 +34,10 @@ + + Patched for linux/g++3.2.2 25/8/4 AWS. + ++This version was patched for Linux/GCC 4.0.2 and Mac OS X/GCC 4.0.1 by ++Conrad Halling on 26 January 2006. See http://www.bifx.org/SSAHA/index.html ++for more information. ++ + 1. To compile the main ssaha executable + + make ssaha diff --git a/biology/ssaha/files/patch-SSAHAMain.cpp b/biology/ssaha/files/patch-SSAHAMain.cpp new file mode 100644 index 000000000000..c65c50796f9f --- /dev/null +++ b/biology/ssaha/files/patch-SSAHAMain.cpp @@ -0,0 +1,48 @@ +--- ./Global/SSAHAMain.cpp.orig 2004-03-01 14:12:38.000000000 -0300 ++++ ./Global/SSAHAMain.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -86,7 +86,8 @@ + Copyright (C) 2004 by Genome Research Limited\n\ + This software is released under the terms of version 2 of the GNU General\n\ + Public Licence, as published by the Free Software Foundation.\n\ +-This is SSAHA Version 3.2, released 1st March 2004.\n\n"; ++This is SSAHA Version 3.2, released 1st March 2004,\n\ ++patched 26 January 2006.\n\n"; + + Timer timeStamp; + +@@ -623,7 +624,7 @@ + { + pHashTable = new HashTablePacked( cerr, queryParams.saveName ); + if ( ( queryParams.wordLength <= 0 ) +- || ( queryParams.wordLength*gBaseBits > ( 8*sizeof(Word)) -1 ) ) ++ || ( (size_t) queryParams.wordLength*gBaseBits > ( 8*sizeof(Word)) -1 ) ) + { + cerr << "Warning: word length (" << queryParams.wordLength + << ") outside valid range (0 to " +@@ -656,7 +657,7 @@ + } // ~if + + if ( ( queryParams.wordLength <= 0 ) +- || ( queryParams.wordLength*gResidueBits > ( 8*sizeof(Word)) -1 ) ) ++ || ( (size_t) queryParams.wordLength*gResidueBits > ( 8*sizeof(Word)) -1 ) ) + { + cerr << "Warning: word length (" << queryParams.wordLength + << ") outside valid range (0 to " +@@ -782,7 +783,7 @@ + cerr << "Info: would expect " << expectedNumHits + << " hits per word for a random database of this size." << endl; + +- queryParams.maxStore=1+(int)(expectedNumHits*queryParams.maxStore); ++ queryParams.maxStore=(int)(expectedNumHits*queryParams.maxStore); + + cerr << "Info: will ignore hits on words that occur more than " + << queryParams.maxStore << " times in the database." << endl; +@@ -1151,7 +1152,7 @@ + } // ~if + dirent* dirEntry; + string entryName; +- while( dirEntry = readdir(pDir) ) ++ while( 0 != ( dirEntry = readdir(pDir) ) ) + { + entryName = (string) dirEntry->d_name; + if ((entryName == ".")||(entryName=="..")) continue; diff --git a/biology/ssaha/files/patch-SSAHAMain.h b/biology/ssaha/files/patch-SSAHAMain.h new file mode 100644 index 000000000000..ee646b476fc1 --- /dev/null +++ b/biology/ssaha/files/patch-SSAHAMain.h @@ -0,0 +1,19 @@ +--- ./Global/SSAHAMain.h.orig 2004-03-01 13:51:28.000000000 -0300 ++++ ./Global/SSAHAMain.h 2008-06-12 15:39:31.000000000 -0300 +@@ -131,7 +131,7 @@ + -1, // int queryEnd; + -1, // int wordLength; + -1, // int stepLength; +- 100000, // int maxToStore; ++ 10000, // int maxToStore; + 1, // int minToPrint; + -1, // int maxGap; + 0, // int maxInsert; +@@ -174,6 +174,7 @@ + + CommandLineArg( const string& nameLong, const string& nameShort ) : + nameLong_( nameLong ), nameShort_( nameShort ) {} ++ virtual ~CommandLineArg() {}; // base class with virtual functions requires virtual constructor + // Is the current argument equal to 'my' argument name? + virtual bool isThisMe( const string& argName ) + { diff --git a/biology/ssaha/files/patch-SequenceEncoder.cpp b/biology/ssaha/files/patch-SequenceEncoder.cpp new file mode 100644 index 000000000000..5832a96a40bb --- /dev/null +++ b/biology/ssaha/files/patch-SequenceEncoder.cpp @@ -0,0 +1,75 @@ +--- ./SequenceReader/SequenceEncoder.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./SequenceReader/SequenceEncoder.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -54,10 +54,10 @@ + ostream& monitoringStream): + monitoringStream_( monitoringStream ), + tt_(tt), sourceData_( sourceData ), bitsPerSymbol_(bitsPerSymbol), ++ symbolMask_((1<>1), + // oddNumSymbols_(wordLength%1==1), + { +@@ -71,13 +71,13 @@ + ett_( rhs.ett_ ), + sourceData_( rhs.sourceData_ ), + bitsPerSymbol_( rhs.bitsPerSymbol_ ), ++ symbolMask_(rhs.symbolMask_), + wordLength_( rhs.wordLength_ ), + // pSeq_( rhs.pSeq_ ),// don't want 2 encoders linking to same seq + pState_( rhs.pState_->clone() ), + wordFlag_(0), + // numSymbolPairs_(rhs.numSymbolPairs_), + // oddNumSymbols_(rhs.oddNumSymbols_), +- symbolMask_(rhs.symbolMask_), + doubleBitShift_(rhs.doubleBitShift_) + { + monitoringStream_ << "copy constructing SequenceEncoder" << endl; +@@ -218,7 +218,7 @@ + { + pTemp = (uchar*)p; + // cout << basesInLast << " doing odd char at end " << *pTemp << endl; +- encodeChar( *p, thisWord, wordFlag_, basesInLast ); ++ encodeChar( *pTemp, thisWord, wordFlag_, basesInLast ); + } + + pSeq_->setNumBasesInLast(basesInLast); +@@ -453,7 +453,7 @@ + + for( ; i!=lastWord ; ++i ) + { +- if (toCarry!=~0) ++ if (toCarry!=(Word)~0) + (*pCodon++) = ( ((*i)&gCursedWord)|lastWordFlag ) + ? flaggedChar + : ( toCarry | (((*i) >> (4*gCodonBits + 2*gBaseBits))&maskBase )); +@@ -488,7 +488,7 @@ + + for( ; i!=lastWord ; ++i ) + { +- if (toCarry!=~0) ++ if (toCarry!=(Word)~0) + (*pCodon++) = ( ((*i)&gCursedWord)|lastWordFlag ) + ? flaggedChar + : ( toCarry | (((*i) >> (4*gCodonBits + gBaseBits))&mask2Bases )); +@@ -610,7 +610,7 @@ + do + { + i--; +- if (toCarry!=~0) ++ if (toCarry!=(Word)~0) + (*pCodon++) = ( ((*i)&gCursedWord)|lastWordFlag ) + ? flaggedChar + : ( toCarry | ( ((*i) & mask2Bases ) << gBaseBits ) ); +@@ -650,7 +650,7 @@ + do + { + i--; +- if (toCarry!=~0) ++ if (toCarry!=(Word)~0) + (*pCodon++) = ( ((*i)&gCursedWord)|lastWordFlag ) + ? flaggedChar + : ( toCarry | ( ((*i) & maskBase ) << (2*gBaseBits) ) ); diff --git a/biology/ssaha/files/patch-SequenceReader.cpp b/biology/ssaha/files/patch-SequenceReader.cpp new file mode 100644 index 000000000000..356c6e8363d9 --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReader.cpp @@ -0,0 +1,20 @@ +--- ./SequenceReader/SequenceReader.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./SequenceReader/SequenceReader.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -179,7 +179,7 @@ + + for ( ; i != reverseBuffer_.end() ; i++, j-- ) + { +- *i= reverseChar[ *j ]; ++ *i= reverseChar[ (int) *j ]; + // cout << "char: " << (int)*j << *j << " - " << (int)*i << *i << endl; + } + +@@ -356,7 +356,7 @@ + throw SSAHAException + ("Requested seq start exceeds requested seq end in SourceReaderIndex::extractSource"); + } // ~if +- else if (seqEnd>lastSourceSeq_.size() ) ++ else if (seqEnd>(int)lastSourceSeq_.size() ) + { + cout << seqEnd << " " << lastSourceSeq_.size() << endl; + throw SSAHAException diff --git a/biology/ssaha/files/patch-SequenceReader.h b/biology/ssaha/files/patch-SequenceReader.h new file mode 100644 index 000000000000..c230fa08cbd8 --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReader.h @@ -0,0 +1,11 @@ +--- ./SequenceReader/SequenceReader.h.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./SequenceReader/SequenceReader.h 2008-06-12 15:39:31.000000000 -0300 +@@ -264,6 +264,8 @@ + pReader_( rhs.pReader_ ), + seqNum_( rhs.seqNum_ ) {} + ++ // A virtual destructor is required because this is a base class. ++ virtual ~SequenceReaderPrinter() {}; + + SequenceReaderPrinter& operator()( SequenceNumber inSeqNum ) + { diff --git a/biology/ssaha/files/patch-SequenceReaderFasta.cpp b/biology/ssaha/files/patch-SequenceReaderFasta.cpp new file mode 100644 index 000000000000..eaf3a2a5477f --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReaderFasta.cpp @@ -0,0 +1,32 @@ +--- ./SequenceReader/SequenceReaderFasta.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./SequenceReader/SequenceReaderFasta.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -135,9 +135,9 @@ + seqStopChar_( rhs.seqStopChar_ ), + pInputFileStream_( new ifstream( rhs.fileName_.c_str() ) ), + fileName_( rhs.fileName_.c_str() ), +- seqPositions_( rhs.seqPositions_ ), + // lastSourceSeqNum_(0), +- pEncoder_( rhs.pEncoder_->clone() ) ++ pEncoder_( rhs.pEncoder_->clone() ), ++ seqPositions_( rhs.seqPositions_ ) + { + monitoringStream_ << "copy constructing SequenceReaderFile" << this + << endl; +@@ -603,7 +603,7 @@ + throw SSAHAException + ("Requested seq start exceeds requested seq end in SequenceReaderFile::extractSource"); + } // ~if +- else if (seqEnd>lastSourceSeq_.size() ) ++ else if (seqEnd>(SequenceOffset)lastSourceSeq_.size() ) + { + throw SSAHAException + ("Requested last byte exceeds end of seq in SequenceReaderFile::extractSource"); +@@ -628,7 +628,7 @@ + computeNumSequencesInFile(); // ensure have scanned to end of file + fileFile << fileName_ << endl; + SeqIndexInfo* pIndex = new SeqIndexInfo[seqPositions_.size()]; +- for (int i(0) ; i < seqPositions_.size() ; i++) ++ for (unsigned int i(0) ; i < seqPositions_.size() ; i++) + { + pIndex[i].fileNum=fileNumber; + pIndex[i].seqPos=seqPositions_[i]; diff --git a/biology/ssaha/files/patch-SequenceReaderFasta.h b/biology/ssaha/files/patch-SequenceReaderFasta.h new file mode 100644 index 000000000000..a2988fbb91d8 --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReaderFasta.h @@ -0,0 +1,11 @@ +--- ./SequenceReader/SequenceReaderFasta.h.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./SequenceReader/SequenceReaderFasta.h 2008-06-12 15:39:31.000000000 -0300 +@@ -55,7 +55,7 @@ + { + public: + SequenceReaderFileState( SequenceNumber lsn, std::streampos fp ) : +- filePos_(fp), SequenceReaderState(lsn) {} ++ SequenceReaderState(lsn), filePos_(fp) {} + // no point in making this private as it's const + const std::streampos filePos_; + }; diff --git a/biology/ssaha/files/patch-SequenceReaderFilter.h b/biology/ssaha/files/patch-SequenceReaderFilter.h new file mode 100644 index 000000000000..38c65183fb7a --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReaderFilter.h @@ -0,0 +1,20 @@ +--- ./SequenceReader/SequenceReaderFilter.h.orig 2004-03-01 13:51:30.000000000 -0300 ++++ ./SequenceReader/SequenceReaderFilter.h 2008-06-12 15:39:31.000000000 -0300 +@@ -80,7 +80,7 @@ + + size_t max( void ) const + { +- int max(0); ++ size_t max(0); + for (vector >::const_iterator i(bins_.begin()); i!= bins_.end(); i++ ) + if (i->size()>max) max=i->size(); + return max; +@@ -109,7 +109,7 @@ + public: + SequenceReaderFilterState + ( SequenceNumber lsn, SequenceReader* ps ) : +- pState_(ps->saveState()), SequenceReaderState(lsn) {} ++ SequenceReaderState(lsn), pState_(ps->saveState()) {} + virtual ~SequenceReaderFilterState() {} //delete pState_; + // no point in making this private as it's const + // this is state info for *ps, whatever it is diff --git a/biology/ssaha/files/patch-SequenceReaderLocal.cpp b/biology/ssaha/files/patch-SequenceReaderLocal.cpp new file mode 100644 index 000000000000..f8629b6ffafe --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReaderLocal.cpp @@ -0,0 +1,28 @@ +--- ./SequenceReader/SequenceReaderLocal.cpp.orig 2004-03-01 13:51:30.000000000 -0300 ++++ ./SequenceReader/SequenceReaderLocal.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -46,10 +46,10 @@ + // and seqNames_ + SequenceReaderLocal::SequenceReaderLocal + ( SequenceReader& seqFile, int wordLength, ostream& monitoringStream ) : +-sourceData_( seqFile.getSourceDataType() ), ++SequenceReader( monitoringStream ), + wordLength_( wordLength ), + bitsPerSymbol_( seqFile.getBitsPerSymbol() ), +-SequenceReader( monitoringStream ) ++sourceData_( seqFile.getSourceDataType() ) + { + monitoringStream_ + << "constructing SequenceReaderLocal from SequenceReader" << endl; +@@ -72,10 +72,10 @@ + + SequenceReaderLocal::SequenceReaderLocal + ( int wordLength, int bitsPerSymbol, ostream& monitoringStream ) : +-sourceData_( gUnknownData ), ++SequenceReader( monitoringStream ), + wordLength_( wordLength ), + bitsPerSymbol_( bitsPerSymbol ), +-SequenceReader( monitoringStream ) ++sourceData_( gUnknownData ) + { + monitoringStream_ + << "constructing empty SequenceReaderLocal" << endl; diff --git a/biology/ssaha/files/patch-SequenceReaderMulti.cpp b/biology/ssaha/files/patch-SequenceReaderMulti.cpp new file mode 100644 index 000000000000..1e160249d180 --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReaderMulti.cpp @@ -0,0 +1,30 @@ +--- ./SequenceReader/SequenceReaderMulti.cpp.orig 2004-03-01 13:51:30.000000000 -0300 ++++ ./SequenceReader/SequenceReaderMulti.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -166,7 +166,7 @@ + { + if (thisReader_->allSeqsRead_) + { +- if ( currentSeqNum_ <= thisReader_->size_ ) ++ if ( currentSeqNum_ <= (SequenceNumber)thisReader_->size_ ) + { + // assert( thisReader_->ptr_->findSequence( currentSeqNum_ ) == true ); + lastSequenceNumber_ = --seqNum; // last read = 1 behind current +@@ -240,7 +240,7 @@ + for ( vector::iterator i = allReaders_.begin(); + i != allReaders_.end(); i++ ) + { +- if ( seqNum <= i->size_ ) { thisReader_ = i; break; } // %%%%% ++ if ( seqNum <= (SequenceNumber)i->size_ ) { thisReader_ = i; break; } // %%%%% + seqNum -= i->size_; + } // ~for i + +@@ -276,7 +276,8 @@ + { + DEBUG_L2( "SequenceReaderMulti::getNextSequence" ); + +- int numInLast; ++ int numInLast = -1; // Initialized to avoid -Wall possibly used before initialized warning. ++ // The algorithm should be checked carefully. + + while + ( ( thisReader_ diff --git a/biology/ssaha/files/patch-SequenceReaderMulti.h b/biology/ssaha/files/patch-SequenceReaderMulti.h new file mode 100644 index 000000000000..72f079f97add --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReaderMulti.h @@ -0,0 +1,13 @@ +--- ./SequenceReader/SequenceReaderMulti.h.orig 2004-03-01 13:51:30.000000000 -0300 ++++ ./SequenceReader/SequenceReaderMulti.h 2008-06-12 15:39:31.000000000 -0300 +@@ -81,8 +81,9 @@ + ( SequenceNumber lsn, + vector::iterator tr, + SequenceReaderState* ps ) : ++ SequenceReaderState(lsn), + thisReader_(tr), +- pState_(ps), SequenceReaderState(lsn) {} ++ pState_(ps) {} + virtual ~SequenceReaderMultiState() {} //delete pState_; + // no point in making this private as it's const + const vector::iterator thisReader_; diff --git a/biology/ssaha/files/patch-SequenceReaderString.h b/biology/ssaha/files/patch-SequenceReaderString.h new file mode 100644 index 000000000000..a35c0b7cfc01 --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReaderString.h @@ -0,0 +1,26 @@ +--- ./SequenceReader/SequenceReaderString.h.orig 2004-03-01 13:51:30.000000000 -0300 ++++ ./SequenceReader/SequenceReaderString.h 2008-06-12 15:39:31.000000000 -0300 +@@ -65,9 +65,9 @@ + // NB SequenceReaderStringBase takes ownership of *pEncoder + SequenceEncoder* pEncoder, + ostream& monitoringStream = cerr ) : ++ SequenceReader( monitoringStream ), + sequenceString_( sequenceString ), +- pEncoder_( pEncoder ), +- SequenceReader( monitoringStream ) ++ pEncoder_( pEncoder ) + { + monitoringStream_ << "constructing SequenceReaderStringBase" << endl; + } // constructor +@@ -78,9 +78,9 @@ + // TYPE NAME IN/OUT COMMENT + // Returns: TYPE COMMENT + SequenceReaderStringBase( const SequenceReaderStringBase& rhs ) : ++SequenceReader( rhs.monitoringStream_ ), + sequenceString_( rhs.sequenceString_ ), +-pEncoder_( rhs.pEncoder_->clone() ), +-SequenceReader( rhs.monitoringStream_ ) ++pEncoder_( rhs.pEncoder_->clone() ) + { + monitoringStream_ << "copy constructing SequenceReaderStringBase" << endl; + } // copy constructor diff --git a/biology/ssaha/files/patch-makefile b/biology/ssaha/files/patch-makefile new file mode 100644 index 000000000000..0154077a92ce --- /dev/null +++ b/biology/ssaha/files/patch-makefile @@ -0,0 +1,15 @@ +--- ./Binary/makefile.orig 2008-06-12 16:00:19.000000000 -0300 ++++ ./Binary/makefile 2008-06-12 16:00:43.000000000 -0300 +@@ -11,10 +11,10 @@ + # copy of the SSAHA directory structure you should be able to make files + # straight away. + # +-# SSAHA_TOP_DIR=$(SSAHA_DIR?$(SSAHA_DIR):$(PWD)/../) ++SSAHA_TOP_DIR=$(PWD)/../ + # Above is a nice idea, but syntax doesn't work for all versions of make + # so must define CURRENT_SSAHA_VERSION, eg in your .cshrc file - TC 14.3.01 +-SSAHA_TOP_DIR=$(CURRENT_SSAHA_VERSION) ++# SSAHA_TOP_DIR=$(CURRENT_SSAHA_VERSION) + + GLOBAL_DIR=${SSAHA_TOP_DIR}/Global + SEQ_DIR=${SSAHA_TOP_DIR}/SequenceReader diff --git a/biology/ssaha/files/patch-testHashTableNoOverlap.cpp b/biology/ssaha/files/patch-testHashTableNoOverlap.cpp new file mode 100644 index 000000000000..721e03ae9134 --- /dev/null +++ b/biology/ssaha/files/patch-testHashTableNoOverlap.cpp @@ -0,0 +1,178 @@ +--- ./HashTable/testHashTableNoOverlap.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./HashTable/testHashTableNoOverlap.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -70,10 +70,10 @@ + cout << " Test of class HashTable" << endl << endl; + cout << "*******************************************" << endl << endl; + +- int numSeqs = 10; +- int seqSize = 100; +- int wordLength = 10; +- int maxHits = 50; ++ unsigned int numSeqs = 10; ++ unsigned int seqSize = 100; ++ unsigned int wordLength = 10; ++ unsigned int maxHits = 50; + + // Generate a random sequence of (numSeqs*seqSize) base pairs ... + // 1128 is the seed value for the random number generator +@@ -116,7 +116,7 @@ + // so we are checking that all sequence data 'finds itself' in the hash + // table in the correct position + +- for ( int i(1) ; i <= numSeqs ; i++ ) ++ for ( unsigned int i(1) ; i <= numSeqs ; i++ ) + { // for each sequence in testReader ... + testReader.getNextSequence(seq,wordLength); + +@@ -131,7 +131,7 @@ + cout << seq.getNumBasesInLast() << "!!\n"; + // ... go through the Words in the sequence one by one and look + // for matches in the hash table +- for ( int j(0) ; j < (seqSize/wordLength) ; j++ ) ++ for ( unsigned int j(0) ; j < (seqSize/wordLength) ; j++ ) + { + cout << j << " " << printBase(seq[j],wordLength) << endl; + +@@ -146,7 +146,7 @@ + // ... and that the sequence number and offset for the hit are OK + + assert(hits[0].subjectNum == i ); +- assert(hits[0].diff == j*wordLength ); ++ assert((unsigned int)hits[0].diff == j*wordLength ); + assert(hits[0].queryPos == 1); + + hits.clear(); +@@ -157,12 +157,12 @@ + testHash.matchWord(seq,hits); + cout << hits.size() << "!!!!!\n"; + assert( hits.size() == (seqSize/wordLength) ); +- for ( int j(0) ; j < hits.size() ; j ++ ) ++ for ( unsigned int j(0) ; j < hits.size() ; j ++ ) + { + + assert(hits[j].subjectNum == i); + assert(hits[j].diff == 0); +- assert(hits[j].queryPos == 1 + ( j * wordLength ) ); ++ assert((unsigned int)hits[j].queryPos == 1 + ( j * wordLength ) ); + + } // ~for j + +@@ -194,7 +194,7 @@ + assert( testLoad.isInitialized() == true ); + + // and that word length matches original +- assert( testLoad.getWordLength() == wordLength ); ++ assert( (unsigned int)testLoad.getWordLength() == wordLength ); + + testLoad.setMaxNumHits( testHash.getMaxNumHits() ); + +@@ -208,7 +208,7 @@ + // and new hash tables, storing the resulting hits in hitsOrig and + // hitsCopy respectively. + +- for ( int i(0) ; i < numSeqs ; i++ ) ++ for ( unsigned int i(0) ; i < numSeqs ; i++ ) + { + + // ... check that the name strings match +@@ -258,7 +258,7 @@ + HashTable shiftHash(cout); + creator.createHashTable(shiftHash,shiftReader,wordLength,maxHits); + +- for ( int i(0) ; i < wordLength ; i++ ) ++ for ( unsigned int i(0) ; i < wordLength ; i++ ) + { + testSeq = testSeq.substr(1); // delete first character + { // braces ensure a new instance is created each time round loop +@@ -460,7 +460,7 @@ + SequenceEncoderCodon encoder; + encoder.setWordLength(wordLength); + +- for ( int i(1) ; i < hashTrans.getNumSequences() ; i++ ) ++ for ( unsigned int i(1) ; i < hashTrans.getNumSequences() ; i++ ) + { + + hashTrans.getSequenceName( s1, i ); +@@ -477,7 +477,7 @@ + assert(hashTrans.getSequenceSize(i)==hashTrans2.getSequenceSize(i)); + + assert( (((w1.size()-1) * gMaxBasesPerWord ) + w1.getNumBasesInLast()) +- == hashTrans.getSequenceSize(i)); ++ == (unsigned int)hashTrans.getSequenceSize(i)); + + + // Sequence should produce same (nonzero) num hits in fwd direction +@@ -573,19 +573,19 @@ + string s, s1; + Word w; + vector subs; +- int wl=15; ++ unsigned int wl=15; + + // test substitution for DNA + // + +- for (int i(0); i