1. Knuth-Morris-Pratt
Lecture
Exact Pattern Matching
Brute Force
Skipping Positions
Example 1
Example 2
Is Skipping Safe?
Quiz 1
Before shift: ??abcdabc??????
abcdabcef
1234
After shift: ??abcdabc??????
-> abcdabcef
1234
Quiz 2
Prefix Function
s[ 0 ]
is always 0
since a string cannot be a prefix of itself.
Lemma
Corollary
The border can “grow” at most 1-character at at time when computing s( i + 1 )
from s( i )
Enumerating Borders
Example
Enumerating borders is similiar to the concept of Matryoshka dolls (i.e. one fits inside the other until there’s nothing left)
Quiz 3
Computing s( i + 1 )
Quiz 4
P = ababcababac
i = 012345678901
s(i) = 00220123430
^
i=8
Pseudocode
Example
Lemma and Proof
Knuth-Morris-Pratt Algorithm
Problem
Solutions
CPP
#include <algorithm>
#include <iostream>
#include <iterator>
#include <sstream>
#include <vector>
//#define BRUTE_FORCE_SEARCH
//#define OUTPUT_PREFIX_FUNCTION
#define OUTPUT_INDEXES_OF_NEEDLE_IN_HAYSTACK
using namespace std;
#ifdef BRUTE_FORCE_SEARCH // Failed case #57/64: time limit exceeded (Time used: 1.98/1.00, memory used: 72019968/536870912.)
using Indexes = vector< int >;
int main() {
string needle, haystack; cin >> needle >> haystack;
auto M = needle.size(),
N = haystack.size();
Indexes I; {
for( auto i{ 0 }, j{ 0 }; i + M <= N; ++i ){
for( j = 0; j < M && needle[ j ] == haystack[ i+j ]; ++j );
if( j == M )
I.push_back( i );
}
}
#ifdef OUTPUT_INDEXES_OF_NEEDLE_IN_HAYSTACK
copy( I.begin(), I.end(), ostream_iterator< int >( cout, " " ) ); cout << endl;
#endif
}
#else // Knuth-Morris-Pratt
using PrefixFunction = vector< int >;
using Indexes = vector< int >;
int main() {
string needle, haystack; cin >> needle >> haystack;
string S; {
ostringstream os; os << needle << '$' << haystack;
S = os.str();
}
auto M = needle.size(),
N = S.size();
PrefixFunction P( N, 0 ); {
for( auto i{ 1 }, j{ 0 }; i < N; ++i ){ // let j denote the "border" of the string
while( 0 < j && S[ i ] != S[ j ] )
j = P[ j-1 ];
P[ i ] = j = ( S[ i ] == S[ j ] )? j+1 : 0;
}
}
#ifdef OUTPUT_PREFIX_FUNCTION
cout << S << endl;
copy( P.begin(), P.end(), ostream_iterator< int >( cout, "" ) ); cout << endl;
#endif
Indexes I; {
auto offset = 2 * M;
for( auto i{ offset }; i < S.size(); ++i )
if( P[ i ] == M )
I.push_back( i - offset );
}
#ifdef OUTPUT_INDEXES_OF_NEEDLE_IN_HAYSTACK
copy( I.begin(), I.end(), ostream_iterator< int >( cout, " " ) ); cout << endl;
#endif
return 0;
}
#endif