Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 10

import java.io.

*;
import de.bwaldvogel.liblinear.Feature;
import de.bwaldvogel.liblinear.FeatureNode;
import de.bwaldvogel.liblinear.Linear;
import de.bwaldvogel.liblinear.Model;
import de.bwaldvogel.liblinear.Parameter;
import de.bwaldvogel.liblinear.Problem;
import de.bwaldvogel.liblinear.SolverType;
import java.util.*;
import de.bwaldvogel.liblinear.Problem;
public class filereader {
public static int articlecount=0;
public static String [] emotions={"stupid","scary","amusing","sad","stra
nge","interesting","hero","weird","dumbass"};
public static HashMap<String, Integer> map =new HashMap<String, Integer>
();
public static HashMap<String,Float> idf1=new HashMap<String,Float>();
public static FeatureNode [][] featurearray=new FeatureNode[3000][90000]
;
public static double [] outputarray=new double[3000];
public static int NUM_OF_TS_EXAMPLES = 0;
public static Feature[] instance=new FeatureNode[90000];
public static void test()
{
int accurate=0;
int wrong=0;
NUM_OF_TS_EXAMPLES =articlecount;
Problem problem = new Problem();
problem.l = articlecount; // number of training examples
problem.n = map.size(); // number of features
problem.x = featurearray; // feature nodes
problem.y = outputarray; // target values
SolverType solver = SolverType.L1R_LR; // -s 0
double C = 0.01; // cost of constraints violation
double eps = 0.1; // stopping criteria
Parameter parameter = new Parameter(solver, C, eps);
Model model = Linear.train(problem, parameter);
File modelFile = new File("model");
try {
model.save(modelFile);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

int cc;
int it=0;
cc=1;
FileReader fr1 = null;
try {
fr1 = new FileReader("C:\\Users\
\sai preethy\\Dropbox\\nlp\\headlinedata\\sai_svm_test_data.txt");
} catch (FileNotFoundException e1) {
// TODO Auto-generated catch blo
ck
e1.printStackTrace();
}
BufferedReader bf1=new BufferedReader(fr
1);
String aLine;
//int count=1;
try {
while((aLine=bf1.readLin
e())!=null)
{
String[] arr = a
Line.split("\t");
cc=0;
for ( String ss:
arr)
{
System.o
ut.println(ss);
if(cc==0)
it=Integer.parseInt(ss);
else
instance[cc-1]=n
ew FeatureNode(cc,Float.parseFloat(ss));
//instance[cc-1]
=new FeatureNode(cc,1);
cc++;
}
// load model or use it directly
try {
model = Model.load(model
File);
} catch (IOException e) {
// TODO Auto-generated c
atch block
e.printStackTrace();
}
double prediction = Linear.predict(model, instance);
// System.out.println("prediction"+prediction+" "+fileindex);
if(prediction== it)
{
accurate++;
}
else
{
wrong++;
}
}
} catch (IOException e) {
// TODO Auto-generated c
atch block
e.printStackTrace();
}
System.out.println("accurate"+accurate+ " "+wrong);
}
public static void addtodictionary(HashMap<String, Integer> m1)
{
for (String k : m1.keySet())
{
if(map.containsKey(k))
{
int count11=map.get(k);
map.put(k, count11+1);

}
}
}
public static void idf()
{
System.out.println("map size"+map.size());
for (String k : map.keySet())
{

idf1.put(k,new Float(Math.log((float)(articlecount/map.get(k)))
));

}
System.out.println("idf map size"+idf1.size());
}
public static void writeidftfproduct()
{
int index=0;
int cc;
int it=0;
cc=1;
FileReader fr1 = null;
try {
fr1 = new FileReader("C:\\Users\
\sai preethy\\Dropbox\\nlp\\headlinedata\\sai_svm_train_data.txt");
} catch (FileNotFoundException e1) {
// TODO Auto-generated catch blo
ck
e1.printStackTrace();
}
BufferedReader bf1=new BufferedReader(fr
1);
String aLine;
//int count=1;
try {
while((aLine=bf1
.readLine())!=null)
{
String[]
arr = aLine.split("\t");
cc=0;
for ( St
ring ss:arr)
{
System.out.println(ss);
if(cc==0)
outputarray[index]=Integer.parseInt(ss);
else
featurearray[index][cc-1]=new FeatureNode(cc,Float.parseFloat(ss));
//instan
ce[cc-1]=new FeatureNode(cc,1);
cc++;
}
index++;
}
} catch (NumberFormatExc
eption | IOException e) {
// TODO Auto-gen
erated catch block
e.printStackTrac
e();
}
}
public static void readarticle()
{
HashMap<String,Integer> m1=new HashMap<String,Integer>();
PrintWriter writer1=null;
try {

writer1 = new PrintWriter("C:\\Users\\sai preethy\\Drop
box\\nlp\\headlinedata\\tfidf.txt", "UTF-8");
// writer2 = new PrintWriter("C:\\Users\\Hi\\Dropbox\\nlp\
\headlinedata\\highfrequentdictlables.txt", "UTF-8");
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
int cc=1;

cc=1;
FileReader fr1 = null;
try {
fr1 = new FileReader("C:\\Users\\sai pre
ethy\\Dropbox\\nlp\\allemotionsheadlinecontent.txt");
} catch (FileNotFoundException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
BufferedReader bf1=new BufferedReader(fr1);
String aLine1;
int count1=1;
String tag=null;
int index=0;
try {
while((aLine1=bf1.readLine())!=n
ull)
{ cc=1;
articlecount++;
//System.out.println(aLi
ne);
int categorytag=0;
String[] arr = aLine1.sp
lit(" ");
//for (int ij1=0;ij1<arr
.length-1;ij1++)
//{
// String ss=arr
[ij1]+" "+arr[ij1+1];
for ( String ss : arr)
{
categorytag++;

if(categorytag==1)
{
tag=ss;
}
if(!m1.
containsKey(ss) )
{
m1.put(
ss, new Integer(1));
// System.
out.println(ss);
}
else
{
count1=m1.get(ss);
m1.put(ss,count1+1);

}
}
int c1= m1.size();
// writeidftfproduct( m1 ,tag,writer1,index);
//System.out.println("count "+c1+" "+cc);
m1.clear();
index++;
}
} catch (IOException e) {
// TODO Auto-generated catch blo
ck
e.printStackTrace();
}
writer1.close();
}
public static void createmaparticle()
{
HashMap<String,Integer> m1=new HashMap<String,Integer>();
int cc=1;

cc=1;
FileReader fr1 = null;
try {
fr1 = new FileReader("C:\\Users\\sai pre
ethy\\Dropbox\\nlp\\allemotionsheadlinecontent.txt");
} catch (FileNotFoundException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
BufferedReader bf1=new BufferedReader(fr1);
String aLine1;
int count1=1;
String tag=null;
try {
while((aLine1=bf1.readLine())!=null)
{ cc=1;
articlecount++;
//System.out.println(aLine);
int categorytag=0;
String[] arr = aLine1.split(" ")
;
//for (int ij1=0;ij1<arr.length-
1;ij1++)
//{
// String ss=arr[ij1]+"
"+arr[ij1+1];
for ( String ss : arr)
{
categorytag++;

if(categorytag==1)
{
tag=ss;
}
if(!m1.contains
Key(ss) )
{
m1.put(ss, new
Integer(1));
// System.out.prin
tln(ss);
}
else
{
count1=m
1.get(ss);
m1.put(s
s,count1+1);

}
}
int c1= m1.size();
addtodictionary(m1);
//System.out.println("count "+c1+" "+cc);
m1.clear();
}
bf1.close();
} catch (IOException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void main(String[] args) throws FileNotFoundException
{
int [] emotionscount=new int[emotions.length];
PrintWriter writer = null;

//for(int it=0;it<=6;it++)
// {
FileReader fr=new FileReader("C:\\Us
ers\\sai preethy\\Dropbox\\nlp\\allemotionsheadlinecontent.txt");
BufferedReader bf=new Bu
fferedReader(fr);
String aLine;
int count=1;
try {
while((aLine=bf.
readLine())!=null)
{
int firs
t=0;
//System
.out.println(aLine);
String[]
arr = aLine.split(" ");
// for (int
ij1=0;ij1<arr.length-1;ij1++)
// {

//
String ss=arr[ij1]+" "+arr[ij1+1];
for ( String ss
: arr)
{
String category;
first++;
if(first==1)
{category=ss;

for(int it=0;it<emotions.length;it++)
{
if(emotions[it].equalsIgnoreCase(category))
{
emotionscount[it]++;
System.out.println("category "+category+"count "+emotionscount[it]);
}
}
}
else
{
if(!map.containsKey(ss) )
{
map.put(ss, new Integer(1));
// System.out.println(ss);
}
else
{
count=map.get(ss);
map.put(ss,count+1);

}
}
}
}
bf.close();
} catch (IOException e)
{
// TODO Auto-generated c
atch block
e.printStackTrace();
}

// }

writeidftfproduct();
test();

System.out.println("map before "+map.size());

for(Iterator<Map.Entry<String,Integer>>it1=map.entry
Set().iterator();it1.hasNext();){

Map.Entry<String, Integer> entry = it1.next();

if (entry.getValue() < 15) {//removing elements
with total frequency count in training corpus less than 5 from dictionary

it1.remove();


}


}

System.out.println("map after "+map.size());
for (String k : map.keySet())
map.put(k, 0);
System.out.println("keys "+map);

int c= map.size();
System.out.println("count of dic "+c);
createmaparticle();
System.out.println("called function createmaparticle "+map.size());
System.out.println("article count "+articlecount);
idf();
System.out.println("idf "+idf1);
readarticle();
//}
}
}

You might also like