/* FILE: lcsubstr.cpp last change: 04-May-2001 author: Marco Rospocher * * This program reads two strings from the prompt line * and finds a longest common substring of them. * An optimal LCS is computed based on a dynamic programming approach. */ #include #include #include #include #include const int time_fact = (int) sysconf(_SC_CLK_TCK); // _SC_CLK_TCK gives the number of tics per second template string toString(genType n) { strstream ss; ss << n; return ss.str(); } /************************************************************************************************/ /* RECURSION PROCEDURE COMPUTING THE LCS BETWEEN TWO STRINGS * Finds a longest common substring of string1 from 0 to indexstring1 * and string2 from 0 to indexstring2. * INPUT: two strings, two end-index for the two strings * OUTPUT: the LCS of the two strings between the specified end-index, the lenght of the LCS */ void RecProcedure(string string1, string string2, int indexstring1, int indexstring2, string & substring, int &len) { if ((indexstring1>-1)&&(indexstring2>-1)) { if (string1[indexstring1]==string2[indexstring2]) { RecProcedure(string1,string2,indexstring1-1,indexstring2-1,substring, len); //substring+=toString(string1[indexstring1]); substring+=string1[indexstring1]; len++; } else { string tmp_substring1=string(), tmp_substring2=string(); int tmp_len1=0,tmp_len2=0; RecProcedure(string1,string2,indexstring1-1,indexstring2,tmp_substring1, tmp_len1); RecProcedure(string1,string2,indexstring1,indexstring2-1,tmp_substring2, tmp_len2); if (tmp_len1>=tmp_len2) { len+=tmp_len1; substring=tmp_substring1+substring; } else { len+=tmp_len2; substring=tmp_substring2+substring; } } } else { substring=string(); len=0; // istruzione superflua: la stringa viene posta a zero nel main, ma non si sa mai ... } } /************************************************************************************************/ /* RECURSION FUNCTION WITH MEMOIZAZION COMPUTING THE LCS BETWEEN TWO STRINGS * Finds a longest common substring between string1 from 0 to indexstring1 * and string2 from 0 to indexstring2. * INPUT: two strings, two end-index for the two strings * OUTPUT: the lenght of the LCS between the specified end-index, * the matrix len (where len[i][j] contains the length of a LCS * of the string1 (ended at index i) and string2 (ended at index j) * ), * and the matrix track the contains the path to follow to build The LCS */ int RecMemoized(string string1, string string2, int indexstring1, int indexstring2, int** len, char** track) { int tmp_len1,tmp_len2; if( len[indexstring1][indexstring2]> -1) return len[indexstring1][indexstring2]; if (string1[indexstring1-1]==string2[indexstring2-1]) { track[indexstring1][indexstring2]='*'; len[indexstring1][indexstring2]=RecMemoized(string1,string2,indexstring1-1,indexstring2-1, len,track)+1; } else { tmp_len1=RecMemoized(string1,string2,indexstring1-1,indexstring2,len,track); tmp_len2=RecMemoized(string1,string2,indexstring1,indexstring2-1,len,track); if (tmp_len1>=tmp_len2) { len[indexstring1][indexstring2]=tmp_len1; track[indexstring1][indexstring2]='|'; } else { len[indexstring1][indexstring2]=tmp_len2; track[indexstring1][indexstring2]='-'; } } return len[indexstring1][indexstring2]; } /************************************************************************************************/ /* DINAMIC PROGRAMMING PROCEDURE COMPUTING THE LCS BETWEEN TWO STRINGS * Finds a longest common substring between string1 and string2 * INPUT: two strings, their lenghts * OUTPUT: the matrix len (where len[i][j] contains the length of a LCS * of the string1 (ended at index i) * and string2 (ended at index j) * ), * and the matrix track the contains the path to follow to build The LCS */ void LenghtLCS(string string1, string string2, int lenstr1, int lenstr2, int** & len, char** & track) { for (int i=1;i<=lenstr1;i++) for (int j=1;j<=lenstr2;j++) if (string1[i-1]==string2[j-1]) { len[i][j]=len[i-1][j-1]+1; track[i][j]='*'; } else if (len[i-1][j]>=len[i][j-1]) { len[i][j]=len[i-1][j]; track[i][j]='|'; } else { len[i][j]=len[i][j-1]; track[i][j]='-'; } } /************************************************************************************************/ /* INITIALIZING PROCEDURE FOR MATRIX len AND track * This procedure creates tow matrix with a row and a colomn more than it's necessary * In matrix len it sets the first row and the first colomn to zero (this will allow to cut some * conditional lines of code in procedure LenghtLCS and RecMemoized). * The rest of matrix len is sets to -1; * Matrix tarck is set to generic char 'A' */ void Initialize(int** & len, char** & track, int lenstr1, int lenstr2) { track = new char*[lenstr1+1]; len = new int*[lenstr1+1]; for(int i=0; i <= lenstr1; i++) { track[i] = new char[lenstr2+1]; len[i] = new int[lenstr2+1]; for(int j=0; j <= lenstr2; j++) { len[i][j] = -1; // -1 minimal lenght track[i][j]= 'A'; // casual char } } for(int i=0; i <= lenstr1; i++) len[i][0]=0; for(int i=0; i <= lenstr2; i++) len[0][i]=0; } /************************************************************************************************/ // THIS PROCEDURE RELEASE THE MEMORY OCCUPIED BY MATRIX len AND track void Release(int** & len, char** & track, int lenstr1, int lenstr2) { for(int i=0; i <= lenstr1; i++) { delete [] track[i]; delete [] len[i]; } delete [] track; delete [] len; } /************************************************************************************************/ // THIS PROCEDURE BUILDS THE LCS USING THE MATRIX track void PrintLCS(int i, int j, char **track, string string1) { if (!(i==0||j==0)) { if (track[i][j]=='*') { PrintLCS(i-1,j-1,track,string1); cout << string1[i-1]; } else if (track[i][j]=='|') PrintLCS(i-1,j,track,string1); else PrintLCS(i,j-1,track,string1); } } /************************************************************************************************/ void main(int argc, char **argv) { string string1; string string2; int len=0; if (argc<3) { cout << "Inserire una stringa: "; cin >> string1; if (argc==1) { cout << "Inserire l'altra stringa: "; cin >> string2; } else string2=*(argv+1); } else { string1=*(argv+1); string2=*(argv+2); } cout << endl; cout << "PROGRAMMA PER LA RICERCA DELLA MASSIMA SOTTOSTRINGA COMUNE TRA 2 STRINGHE" << endl; cout << endl; cout << "Stringhe in input:" << endl; cout << string1<< endl; cout << string2<< endl; cout << endl; int **memolen; char **track; struct tms ttmmss; int startingTime; // INIZIO PROGRAMMAZIONE DINAMICA cout << "Soluzione Programmazione Dinamica" << endl; times(&ttmmss); startingTime = (int) ttmmss.tms_utime; Initialize(memolen, track, string1.size(),string2.size()); LenghtLCS(string1,string2,string1.size(),string2.size(),memolen,track); times(&ttmmss); double tot_cpu = (ttmmss.tms_utime - startingTime) / (double)time_fact; // E' stata trovata la massima stringa comune cout << "Tempo di CPU impiegato: " << tot_cpu << endl; cout << "La massima stringa comune è lunga " << memolen[string1.size()][string2.size()] << " caratteri." << endl; cout << "La massima stringa comune é "; PrintLCS(string1.size(),string2.size(),track,string1); cout << endl; Release(memolen,track,string1.size(),string2.size()); times(&ttmmss); tot_cpu = (ttmmss.tms_utime - startingTime) / (double)time_fact; cout << "Tempo totale di CPU impiegato dalla Programmazione Dinamica: " << tot_cpu << endl << endl; // FINE PROGRAMMAZIONE DINAMICA // INIZIO PROCEDURA RICORSIVA string substring=string(); times(&ttmmss); startingTime = (int) ttmmss.tms_utime; RecProcedure(string1,string2,string1.size()-1,string2.size()-1,substring,len); cout << "Soluzione Ricorsiva" << endl; cout << "La massima stringa comune è lunga " << len << " caratteri." << endl; cout << "La massima stringa comune é " << substring << endl; times(&ttmmss); tot_cpu = (ttmmss.tms_utime - startingTime) / (double)time_fact; cout << "Tempo totale di CPU impiegato dalla soluzione ricorsiva: " << tot_cpu << endl << endl; // FINE PROCEDURA RICORSIVA // INIZIO PROCEDURA RICORSIVA MEMOIZED times(&ttmmss); startingTime = (int) ttmmss.tms_utime; Initialize(memolen, track, string1.size(),string2.size()); cout << "Soluzione Ricorsiva Memoizzata" << endl; cout << "La massima stringa comune è lunga " << RecMemoized(string1,string2,string1.size(),string2.size(),memolen,track) << " caratteri." << endl; cout << "La massima stringa comune é "; PrintLCS(string1.size(),string2.size(),track,string1); cout << endl; Release(memolen,track,string1.size(),string2.size()); times(&ttmmss); tot_cpu = (ttmmss.tms_utime - startingTime) / (double)time_fact; cout << "Tempo totale di CPU impiegato dalla soluzione ricorsiva con memoizzazione: " << tot_cpu << endl << endl; // FINE PROCEDURA RICORSIVA MEMOIZED }