Download as pdf or txt
Download as pdf or txt
You are on page 1of 16

Given an input text and an array of k words, arr[], find all occurrences of all words in the input text.

Let n be the length of text and m be the total number characters in all words, i.e. m = length(arr[0]) +
length(arr[1]) + … + length(arr[k-1]). Here k is the total number of input words.

using namespace std;


#include <bits/stdc++.h>
const int MAXS = 500;
const int MAXC = 26;
int out[MAXS];
int f[MAXS];
int g[MAXS][MAXC];
int buildMatchingMachine(string arr[], int k)
{
memset(out, 0, sizeof out);
memset(g, -1, sizeof g);
int states = 1;
for (int i = 0; i < k; ++i)
{
const string &word = arr[i];
int currentState = 0;
for (int j = 0; j < word.size(); ++j)
{
int ch = word[j] - 'a';
if (g[currentState][ch] == -1)
g[currentState][ch] = states++;

currentState = g[currentState][ch];
}
out[currentState] |= (1 << i);
}
for (int ch = 0; ch < MAXC; ++ch)
if (g[0][ch] == -1)
g[0][ch] = 0;
memset(f, -1, sizeof f);
queue<int> q;
for (int ch = 0; ch < MAXC; ++ch)
{
if (g[0][ch] != 0)
{
f[g[0][ch]] = 0;
q.push(g[0][ch]);
}
}

while (q.size())
{
int state = q.front();
q.pop();
for (int ch = 0; ch <= MAXC; ++ch)
{
if (g[state][ch] != -1)
{
int failure = f[state];
while (g[failure][ch] == -1)
failure = f[failure];

failure = g[failure][ch];
f[g[state][ch]] = failure;
out[g[state][ch]] |= out[failure];
q.push(g[state][ch]);
}
}
}

return states;
}
int findNextState(int currentState, char nextInput)
{
int answer = currentState;
int ch = nextInput - 'a';
while (g[answer][ch] == -1)
answer = f[answer];

return g[answer][ch];
}
void searchWords(string arr[], int k, string text)
{
buildMatchingMachine(arr, k);
int currentState = 0;
for (int i = 0; i < text.size(); ++i)
{
currentState = findNextState(currentState, text[i]);
if (out[currentState] == 0)
continue;
for (int j = 0; j < k; ++j)
{
if (out[currentState] & (1 << j))
{
cout << "Word " << arr[j] << " appears from "
<< i - arr[j].size() + 1 << " to " << i << endl;
}
}
}
}
int main()
{
/* string arr[] = {"he", "she", "hers", "his"};
string text = "ahishers";*/
int i,n;
cin>>n;
string arr[n],text;
for(i=0;i<n;i++)
cin>>arr[i];
cin>>text;
int k = sizeof(arr)/sizeof(arr[0]);
printf("cpp");
searchWords(arr, k, text);
printf("cpp");

return 0;
}
-------------

2nd solution:

import java.util.*;

class Test{

static int MAXS = 500;

static int MAXC = 26;

static int []out = new int[MAXS];

static int []f = new int[MAXS];

static int [][]g = new int[MAXS][MAXC];

static int buildMatchingMachine(String arr[], int k)

Arrays.fill(out, 0);

for(int i = 0; i < MAXS; i++)

Arrays.fill(g[i], -1);

int states = 1;

for(int i = 0; i < k; ++i)


{

String word = arr[i];

int currentState = 0;

for(int j = 0; j < word.length(); ++j)

int ch = word.charAt(j) - 'a';

if (g[currentState][ch] == -1)

g[currentState][ch] = states++;

currentState = g[currentState][ch];

out[currentState] |= (1 << i);

for(int ch = 0; ch < MAXC; ++ch)

if (g[0][ch] == -1)

g[0][ch] = 0;

Arrays.fill(f, -1);

Queue<Integer> q = new LinkedList<>();

for(int ch = 0; ch < MAXC; ++ch)

if (g[0][ch] != 0)

f[g[0][ch]] = 0;

q.add(g[0][ch]);

while (!q.isEmpty())

int state = q.peek();

q.remove();

for(int ch = 0; ch < MAXC; ++ch)


{

if (g[state][ch] != -1)

int failure = f[state];

while (g[failure][ch] == -1)

failure = f[failure];

failure = g[failure][ch];

f[g[state][ch]] = failure;

out[g[state][ch]] |= out[failure];

q.add(g[state][ch]);

return states;

static int findNextState(int currentState, char nextInput)

int answer = currentState;

int ch = nextInput - 'a';

while (g[answer][ch] == -1)

answer = f[answer];

return g[answer][ch];

static void searchWords(String arr[], int k,

String text)

buildMatchingMachine(arr, k);

int currentState = 0;
for(int i = 0; i < text.length(); ++i)

currentState = findNextState(currentState,

text.charAt(i));

if (out[currentState] == 0)

continue;

for(int j = 0; j < k; ++j)

if ((out[currentState] & (1 << j)) > 0)

System.out.print("Word " + arr[j] +

" appears from " +

(i - arr[j].length() + 1) +

" to " + i + "\n");

public static void main(String[] args)

int i,n;

Scanner sc = new Scanner(System.in);

n = Integer.parseInt(sc.nextLine());

String [] arr = new String[n];

for(i=0;i<n;i++) {

arr[i] = sc.nextLine();

String text = sc.nextLine();

searchWords(arr, n, text);

}
Given a text txt[0..n-1] and a pattern pat[0..m-1], write a function search(char pat[], char txt[]) that
prints all occurrences of pat[] in txt[]. You may assume that n > m.

Note: Use Rabin-Karp Algorithm

#include<string.h>

#include<stdio.h>

#define d 256

void search(char pat[], char txt[], int q)

int M = strlen(pat);

int N = strlen(txt);

int i, j;

int p = 0;

int t = 0;

int h = 1;

for (i = 0; i < M-1; i++)

h = (h*d)%q;

for (i = 0; i < M; i++)

p = (d*p + pat[i])%q;

t = (d*t + txt[i])%q;

for (i = 0; i <= N - M; i++)

if ( p == t )

for (j = 0; j < M; j++)

if (txt[i+j] != pat[j])

break;

}
if (j == M)

printf("Pattern found at index %d \n", i);

if ( i < N-M )

t = (d*(t - txt[i]*h) + txt[i+M])%q;

if (t < 0)

t = (t + q);

int main()

char txt[50], pat[25];

scanf("%s",txt);

scanf("%s",pat);

int q = 101;

search(pat, txt, q);

return 0;

Write a program to insert a set of strings into the trie and search for a word in it.

import java.io.*;

import java.util.*;

class Trie {

static final int ALPHABET_SIZE = 26;

static class TrieNode

TrieNode[] children = new TrieNode[ALPHABET_SIZE];

boolean isEndOfWord;
TrieNode(){

isEndOfWord = false;

for (int i = 0; i < ALPHABET_SIZE; i++)

children[i] = null;

};

static TrieNode root;

static void insert(String key)

int level;

int length = key.length();

int index;

TrieNode pCrawl = root;

for (level = 0; level < length; level++)

index = key.charAt(level) - 'a';

if (pCrawl.children[index] == null)

pCrawl.children[index] = new TrieNode();

pCrawl = pCrawl.children[index];

pCrawl.isEndOfWord = true;

static boolean search(String key)

int level;

int length = key.length();

int index;
TrieNode pCrawl = root;

for (level = 0; level < length; level++)

index = key.charAt(level) - 'a';

if (pCrawl.children[index] == null)

return false;

pCrawl = pCrawl.children[index];

return (pCrawl != null && pCrawl.isEndOfWord);

public static void main(String args[])

int i,n;

Scanner sc = new Scanner(System.in);

n = Integer.parseInt(sc.nextLine());

String [] keys = new String[n];

for(i=0;i<n;i++) {

keys[i] = sc.nextLine();

String word = sc.nextLine();

/*String keys[] = {"the", "a", "there", "answer", "any",

"by", "bye", "their"}; */

String output[] = {"Not present in trie", "Present in trie"};

root = new TrieNode();

for (i = 0; i < keys.length ; i++)

insert(keys[i]);
//System.out.println(search(word));

if(search(word) == true)

System.out.println(output[1]);

else System.out.println(output[0]);

Write a program to find that How many times a given pattern(p) is present in the given text(t).

Use KMP (Knuth Morris Pratt) Pattern Searching algorithm to solve this.

#include<stdio.h>

#include<string.h>

void computeLPSArray(char* pat, int M, int* lps);

int KMPSearch(char* pat, char* txt)

int c=0;

int M = strlen(pat);

int N = strlen(txt);

int lps[M];

computeLPSArray(pat, M, lps);

int i = 0;

int j = 0;

while (i < N) {

if (pat[j] == txt[i]) {

j++;

i++;

if (j == M) {

//printf("Found pattern at index %d ", i - j);

c++;
j = lps[j - 1];

else if (i < N && pat[j] != txt[i]) {

if (j != 0)

j = lps[j - 1];

else

i = i + 1;

return c;

void computeLPSArray(char* pat, int M, int* lps)

int len = 0;

lps[0] = 0;

int i = 1;

while (i < M) {

if (pat[i] == pat[len]) {

len++;

lps[i] = len;

i++;

else

if (len != 0) {

len = lps[len - 1];

else

lps[i] = 0;
i++;

int main()

/*char txt[] = "ABABDABACDABABCABAB";

char pat[] = "ABABCABAB";*/

char txt[50],pat[25];

scanf("%s",txt);

scanf("%s", pat);

printf("%d",KMPSearch(pat, txt));

return 0;

Given a text txt[0..n-1] and a pattern pat[0..m-1], write a function search(char pat[], char txt[]) that
prints all occurrences of pat[] in txt[]. You may assume that n > m.

Note: Use Finite Automata Algorithm

#include<stdio.h>

#include<string.h>

#define NO_OF_CHARS 256

int getNextState(char *pat, int M, int state, int x)

if (state < M && x == pat[state])

return state+1;

int ns, i;

for (ns = state; ns > 0; ns--)

if (pat[ns-1] == x)

{
for (i = 0; i < ns-1; i++)

if (pat[i] != pat[state-ns+1+i])

break;

if (i == ns-1)

return ns;

return 0;

void computeTF(char *pat, int M, int TF[][NO_OF_CHARS])

int state, x;

for (state = 0; state <= M; ++state)

for (x = 0; x < NO_OF_CHARS; ++x)

TF[state][x] = getNextState(pat, M, state, x);

void search(char *pat, char *txt)

int M = strlen(pat);

int N = strlen(txt);

int TF[M+1][NO_OF_CHARS];

computeTF(pat, M, TF);

int i, state=0;

for (i = 0; i < N; i++)

state = TF[state][txt[i]];

if (state == M)

printf ("Pattern found at index %d\n",


i-M+1);

int main()

/*char *txt = "AABAACAADAABAAABAA";

char *pat = "AABA";*/

char txt[50],pat[25];

scanf("%s",txt);

scanf("%s",pat);

search(pat, txt);

return 0;

You might also like