Algorithms-UCSanDiego

Data Structures and Algorithms by UCSanDiego

View project on GitHub

1. Knuth-Morris-Pratt

Lecture

Exact Pattern Matching

Brute Force

Skipping Positions

Example 1

Example 2

Is Skipping Safe?

Quiz 1

Before shift:    ??abcdabc??????
                   abcdabcef
                   1234
                   
After shift:     ??abcdabc??????
                   ->  abcdabcef
                   1234

Quiz 2

Prefix Function

s[ 0 ] is always 0 since a string cannot be a prefix of itself.

Lemma

Corollary

The border can “grow” at most 1-character at at time when computing s( i + 1 ) from s( i )

Enumerating Borders

Example

Enumerating borders is similiar to the concept of Matryoshka dolls (i.e. one fits inside the other until there’s nothing left)

Quiz 3

Computing s( i + 1 )

Quiz 4

    P = ababcababac
    i = 012345678901
 s(i) = 00220123430
                ^
                i=8

Pseudocode

Example

Lemma and Proof

Knuth-Morris-Pratt Algorithm

Problem

Solutions

CPP

    #include <algorithm>
    #include <iostream>
    #include <iterator>
    #include <sstream>
    #include <vector>
    
    //#define BRUTE_FORCE_SEARCH
    //#define OUTPUT_PREFIX_FUNCTION
    #define OUTPUT_INDEXES_OF_NEEDLE_IN_HAYSTACK
    
    using namespace std;
    
    #ifdef BRUTE_FORCE_SEARCH // Failed case #57/64: time limit exceeded (Time used: 1.98/1.00, memory used: 72019968/536870912.)
    using Indexes = vector< int >;
    int main() {
        string needle, haystack; cin >> needle >> haystack;
        auto M = needle.size(),
             N = haystack.size();
        Indexes I; {
            for( auto i{ 0 }, j{ 0 }; i + M <= N; ++i ){
                for( j = 0; j < M && needle[ j ] == haystack[ i+j ]; ++j );
                if( j == M )
                    I.push_back( i );
            }
        }
    #ifdef OUTPUT_INDEXES_OF_NEEDLE_IN_HAYSTACK
        copy( I.begin(), I.end(), ostream_iterator< int >( cout, " " ) ); cout << endl;
    #endif
    }
    #else // Knuth-Morris-Pratt
    using PrefixFunction = vector< int >;
    using Indexes = vector< int >;
    int main() {
        string needle, haystack; cin >> needle >> haystack;
        string S; {
            ostringstream os; os << needle << '$' << haystack;
            S = os.str();
        }
        auto M = needle.size(),
             N = S.size();
        PrefixFunction P( N, 0 ); {
            for( auto i{ 1 }, j{ 0 }; i < N; ++i ){ // let j denote the "border" of the string
                while( 0 < j && S[ i ] != S[ j ] )
                    j = P[ j-1 ];
                P[ i ] = j = ( S[ i ] == S[ j ] )? j+1 : 0;
            }
        }
    #ifdef OUTPUT_PREFIX_FUNCTION
        cout << S << endl;
        copy( P.begin(), P.end(), ostream_iterator< int >( cout, "" ) ); cout << endl;
    #endif
        Indexes I; {
            auto offset = 2 * M;
            for( auto i{ offset }; i < S.size(); ++i )
                if( P[ i ] == M )
                    I.push_back( i - offset );
        }
    #ifdef OUTPUT_INDEXES_OF_NEEDLE_IN_HAYSTACK
        copy( I.begin(), I.end(), ostream_iterator< int >( cout, " " ) ); cout << endl;
    #endif
        return 0;
    }
    #endif