The following code extracts text content in an xlsx file
package com.finra;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class TestExcelExtractor {
public static void main(String [] args) throws Exception {
FileInputStream file = new FileInputStream(new File("c:/users/dgandikota/Test.xlsx"));
//Create Workbook instance holding reference to .xlsx file
XSSFWorkbook workbook = new XSSFWorkbook(file);
XSSFExcelExtractor excelExtractor = new XSSFExcelExtractor(workbook);
String allTxt = excelExtractor.getText();
System.out.println(allTxt);
allTxt = allTxt.replaceAll("\t", "|");
allTxt = allTxt.replace("null", "");
if (new File("c:/users/dgandikota/excelextract.txt").exists()) {
new File("c:/users/dgandikota/excelextract.txt").delete();
}
PrintWriter pw = new PrintWriter(new FileOutputStream(new File("c:/users/dgandikota/excelextract.txt")));
pw.print(allTxt);
pw.close();
// System.out.println(allTxt);
}
}
The sheet names are output by default
The end of sheet is found by the absence field separator (eg. line.indexOf("|") == -1)