Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 4

Import random

Import pickle

Import numpy as np

Import pandas as pd

From nltk.tokenize import RegxpTokenizer

From tensorflow.keras.models.import sequential, load_model

From tensorflow.keras.layers. import LSTM, Dense, Activation

From tensorflow.keras.optimizers import RMSprop

Text df= pd.read csv (“data koo”)

Text df

Text=list(text_df.text.values)

Joined_text=””.join(text)

Partial_text= joined_text(: 10000)

Tokenizer= RegexpTokenizer(r”\w+”)

Tokens=tokenizer.tokenize(partial_text.lower())

unique_tekens= np.unique(tokens)

unique_token_index={token: idx for idx, token in enumerate(unique_tokens)}

unique_token_index # delete this after it prints the index


n_word=10

input_word=[]

next_words=[]

for i in range(len(tokens) - n_word):

input_words.append(tokens [i:i+ n_words])

next_words.append(tokens[i+n_words])

#check next_words and input word

X=np.zeros((len(input_words),n_words,len(unique_tokens)),dtype=bool)

X=np.zeros((len(input_words) ,len(unique_tokens)), dtype=bool)

For I,words in enumerate(input_words):

For j ,word in enumerate(words):

X [I, j, unique__token_index[word]]=1

y[I, unique__token_index[next_wordsLSTM[i]]]=1

# you can check by typing x or y here

# go to training the model

Model=sequential()

Model.add(LSTM(128,input_shape=(n_words,len (unique_tokens)) ,return_sequences==true))

Model.add(LSTM(128))

Model.add(Dense(len(unique_tokens)))

Model.add(Activation(“sofmax”))
Model.compile(loss=”categorical_crossentropy”,optimizer=RMSprop(learning_
rate=0.01).metrics=[“accuracy”])

Model.fit(x,y,batch_size=128,epochs=10,shuffle=True)

#save your model as follow

Model.save(“my_model _next _word”)

## load model as follow

Model=load_model(my_model _next _word)

# to predict next word

Def predict_next_word(input_text,,n_best):

Input_text=input_text.lower()

X=np.zeros((1,n_words,len(unique_tokens)))

Fir I,words in enumerate(input_text.split()):

X[0,I,unique_token_index[word]]=1

Predictions= model.predict(x)[0]

return np. argpartition(predictions,-n_best)[-n_best:]

possible= predict_next_word(“caalaan galeera jedheen…….”,5)

# so let see next word prediction result

Print([unique_tokens[idx] for idx in possible])

You might also like