Professional Documents
Culture Documents
Java
Java
Java
classes to handle this but Apache POI Package developed by Apache Foundation gives you the
power of reading Microsoft Word document in Java. More information on the Apache POI
package can be found at Apache POI
import org.apache.poi.poifs.filesystem.*;
import org.apache.poi.hwpf.*;
import org.apache.poi.hwpf.extractor.*;
import java.io.*;
public class readDoc
{
public static void main( String[] args )
{
String filesname = "Hello.doc";
POIFSFileSystem fs = null;
try
{
fs = new POIFSFileSystem(new FileInputStream(filesname;
//Couldn't close the braces at the end as my site did not
allow it to close
HWPFDocument doc = new HWPFDocument(fs);
WordExtractor we = new WordExtractor(doc);
String[] paragraphs = we.getParagraphText();
System.out.println( "Word Document has " + paragraphs.length
+ " paragraphs" );
for( int i=0; i<paragraphs .length; i++ ) {
paragraphs[i] =
paragraphs[i].replaceAll("\\cM?\r?\n","");
System.out.println( "Length:"+paragraphs[ i
].length());
}
}
catch(Exception e) {
e.printStackTrace();
}
}