Results 1 to 2 of 2
- 03-16-2011, 06:46 AM #1
Member
- Join Date
- Nov 2010
- Posts
- 2
- Rep Power
- 0
Not getting response when the document is having special chars
Please help following issue: Not getting response when the document is having special chars(Use any doc with special char(ex: &, $, <, >,.....) TestErrorFour.doc
Error message:
System.FormatException: Invalid length for a Base-64 char array. at
System.Convert.FromBase64String(String s) at
Summarize.Summarizer.AccumulateBroadcast(String filedata, String givenWords) in
c:\DocumentSummarizer\App_Code\Summarizer.cs:line 66
Code:
--------------------------------------------------------------------------File 1:
Note: Use any doc with special char(ex: &, $, <, >,.....) TestErrorFour.docJava Code:import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStream; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLConnection; import java.util.Properties; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.hwpf.*; import org.apache.poi.hwpf.extractor.*; import com.lowagie.text.Document; import com.lowagie.text.pdf.PRTokeniser; import com.lowagie.text.pdf.PdfReader; public class DocumentSummarizerClient { static Properties loadProperties() { Properties prop = new Properties(); try { prop.load(DocumentSummarizerClient.class.getClassLoader().getResourceAsStream("vep.properties")); } catch (Exception ioe) { ioe.printStackTrace(); } return prop; } public String getSummary(String fileName,String noOfWordsOrPercentage ){ String summaryInputData =""; String summarizedData=""; String summarizerURL = loadProperties().getProperty("Summarizer.serviceURL"); try { String fileExtension=fileName.substring(fileName.lastIndexOf(".")+1, fileName.length()); if (fileExtension.equalsIgnoreCase("doc")|| fileExtension.equalsIgnoreCase("txt")|| fileExtension.equalsIgnoreCase("pdf")) { if (fileExtension.equalsIgnoreCase("txt")) { BufferedReader bufferedReader = new BufferedReader( new FileReader(fileName)); String line = null; while ((line = bufferedReader.readLine()) != null) { summaryInputData += line; } } if(fileExtension.equalsIgnoreCase("doc")){ POIFSFileSystem fs = null; fs = new POIFSFileSystem(new FileInputStream(fileName)); HWPFDocument doc = new HWPFDocument(fs); WordExtractor we = new WordExtractor(doc); String[] paragraphs = we.getParagraphText(); for( int i=0; i<paragraphs .length; i++ ) { paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n",""); summaryInputData+= paragraphs[i]; } } if(fileExtension.equalsIgnoreCase("pdf")){ Document document = new Document(); document.open(); PdfReader reader = new PdfReader(fileName); int pageCount =reader.getNumberOfPages(); for(int i=1;i<=pageCount;i++){ byte[] bytes = reader.getPageContent(i); PRTokeniser tokenizer = new PRTokeniser(bytes); StringBuffer buffer = new StringBuffer(); while (tokenizer.nextToken()) { if (tokenizer.getTokenType() == PRTokeniser.TK_STRING) { buffer.append(tokenizer.getStringValue()); } } summaryInputData += buffer.toString(); } } } else{ System.out.println("This is Invalid document. Presntly we support only text,word and PDF documents "); } // String encoded =new String (summaryInputData.getBytes("ISO-8859-1"),"UTF-8"); String encoded=Base64Utils.base64Encode(summaryInputData.getBytes()); // encoded =new String (summaryInputData.getBytes("ISO-8859-1"),"UTF-8"); String parameters= "base64String="+encoded+"&noOfWordsOrPercentage="+noOfWordsOrPercentage; summarizedData= postRequest(parameters,summarizerURL); String slength= "<string xmlns=\"http://tempuri.org/\">"; if(summarizedData.contains("</string>")){ summarizedData= summarizedData.substring(summarizedData.indexOf(slength)+slength.length(),summarizedData.indexOf("</string>")); summarizedData = replaceVal(summarizedData); //System.out.println("<?xml version=\"1.0\" encoding=\"utf-8\"?><![CDATA["+summarizedData+"]]>"); System.out.println("Summarized data "+summarizedData); if(summarizedData.contains("Please enter the percentage")){ summarizedData="Data given cannot be summarized further"; } } else{ System.out.println("Data given cannot be summarized further"); summarizedData=""; } } catch (FileNotFoundException e) { return("The File is not found \n\n"+e.toString()); } catch (IOException e) { return("The File is already in use \n\n"+e.toString()); } catch (Exception e) { return(e.toString()); } return summarizedData; } public static String postRequest(String parameters,String webServiceURL) throws Exception{ Properties systemSettings = System.getProperties(); systemSettings.put("http.proxyHost", loadProperties().getProperty("proxyHost")); systemSettings.put("http.proxyPort", loadProperties().getProperty("proxyPort")); System.setProperties(systemSettings); String responseXML = ""; try { URL url = new URL(webServiceURL); URLConnection connection = url.openConnection(); HttpURLConnection httpConn = (HttpURLConnection) connection; byte[] requestXML = parameters.getBytes(); httpConn.setRequestProperty("Content-Length", String .valueOf(requestXML.length)); httpConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); httpConn.setRequestMethod("POST"); httpConn.setDoOutput(true); httpConn.setDoInput(true); OutputStream out = httpConn.getOutputStream(); out.write(requestXML, 0, requestXML.length); out.close(); InputStreamReader isr = new InputStreamReader(httpConn .getInputStream()); BufferedReader br = new BufferedReader(isr); String temp; String tempResponse = null; while ((temp = br.readLine()) != null) tempResponse = tempResponse + temp; responseXML = tempResponse; br.close(); isr.close(); } catch (java.net.MalformedURLException e) { System.out .println("Error in postRequest(): Secure Service Required"); } catch (Exception e) { System.out.println("Error in postRequest(): " + e.getMessage()); } return responseXML; } public String replaceVal(String value) { if (value == null) { value = null; } value = value.replace("<", "<"); value = value.replace(">", ">"); value = value.replace("&", "&"); return value; } public static void main(String[] args) { DocumentSummarizerClient testdoc=new DocumentSummarizerClient(); System.out.println("hello"); testdoc.getSummary("C:\\working_folder\\vep\\UnitTestCases\\VEP1.0\\DocumentSummarizerTestData\\TestOne.txt","100%"); } }
File 2:
--------------------------------------------------------------------------
issue 2: Exception when passing 2MB .txt fileJava Code:public class Base64Utils { private static byte[] mBase64EncMap, mBase64DecMap; /** * Class initializer. Initializes the Base64 alphabet (specified in RFC-2045). */ static { byte[] base64Map = { (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F', (byte)'G', (byte)'H', (byte)'I', (byte)'J', (byte)'K', (byte)'L', (byte)'M', (byte)'N', (byte)'O', (byte)'P', (byte)'Q', (byte)'R', (byte)'S', (byte)'T', (byte)'U', (byte)'V', (byte)'W', (byte)'X', (byte)'Y', (byte)'Z', (byte)'a', (byte)'b', (byte)'c', (byte)'d', (byte)'e', (byte)'f', (byte)'g', (byte)'h', (byte)'i', (byte)'j', (byte)'k', (byte)'l', (byte)'m', (byte)'n', (byte)'o', (byte)'p', (byte)'q', (byte)'r', (byte)'s', (byte)'t', (byte)'u', (byte)'v', (byte)'w', (byte)'x', (byte)'y', (byte)'z', (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7', (byte)'8', (byte)'9', (byte)'+', (byte)'/'}; mBase64EncMap = base64Map; mBase64DecMap = new byte[128]; for (int i=0; i<mBase64EncMap.length; i++) mBase64DecMap[mBase64EncMap[i]] = (byte) i; } /** * This class isn't meant to be instantiated. */ private Base64Utils() { } /** * Encodes the given byte[] using the Base64-encoding, * as specified in RFC-2045 (Section 6.8). * * @param aData the data to be encoded * @return the Base64-encoded <var>aData</var> * @exception IllegalArgumentException if NULL or empty array is passed */ public static String base64Encode(byte[] aData) { if ((aData == null) || (aData.length == 0)) throw new IllegalArgumentException("Can not encode NULL or empty byte array."); byte encodedBuf[] = new byte[((aData.length+2)/3)*4]; // 3-byte to 4-byte conversion int srcIndex, destIndex; for (srcIndex=0, destIndex=0; srcIndex < aData.length-2; srcIndex += 3) { encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex] >>> 2) & 077]; encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex+1] >>> 4) & 017 | (aData[srcIndex] << 4) & 077]; encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex+2] >>> 6) & 003 | (aData[srcIndex+1] << 2) & 077]; encodedBuf[destIndex++] = mBase64EncMap[aData[srcIndex+2] & 077]; } // Convert the last 1 or 2 bytes if (srcIndex < aData.length) { encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex] >>> 2) & 077]; if (srcIndex < aData.length-1) { encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex+1] >>> 4) & 017 | (aData[srcIndex] << 4) & 077]; encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex+1] << 2) & 077]; } else { encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex] << 4) & 077]; } } // Add padding to the end of encoded data while (destIndex < encodedBuf.length) { encodedBuf[destIndex] = (byte) '='; destIndex++; } String result = new String(encodedBuf); return result; } /** * Decodes the given Base64-encoded data, * as specified in RFC-2045 (Section 6.8). * * @param aData the Base64-encoded aData. * @return the decoded <var>aData</var>. * @exception IllegalArgumentException if NULL or empty data is passed */ public static byte[] base64Decode(String aData) { if ((aData == null) || (aData.length() == 0)) throw new IllegalArgumentException("Can not decode NULL or empty string."); byte[] data = aData.getBytes(); // Skip padding from the end of encoded data int tail = data.length; while (data[tail-1] == '=') tail--; byte decodedBuf[] = new byte[tail - data.length/4]; // ASCII-printable to 0-63 conversion for (int i = 0; i < data.length; i++) data[i] = mBase64DecMap[data[i]]; // 4-byte to 3-byte conversion int srcIndex, destIndex; for (srcIndex = 0, destIndex=0; destIndex < decodedBuf.length-2; srcIndex += 4, destIndex += 3) { decodedBuf[destIndex] = (byte) ( ((data[srcIndex] << 2) & 255) | ((data[srcIndex+1] >>> 4) & 003) ); decodedBuf[destIndex+1] = (byte) ( ((data[srcIndex+1] << 4) & 255) | ((data[srcIndex+2] >>> 2) & 017) ); decodedBuf[destIndex+2] = (byte) ( ((data[srcIndex+2] << 6) & 255) | (data[srcIndex+3] & 077) ); } // Handle last 1 or 2 bytes if (destIndex < decodedBuf.length) decodedBuf[destIndex] = (byte) ( ((data[srcIndex] << 2) & 255) | ((data[srcIndex+1] >>> 4) & 003) ); if (++destIndex < decodedBuf.length) decodedBuf[destIndex] = (byte) ( ((data[srcIndex+1] << 4) & 255) | ((data[srcIndex+2] >>> 2) & 017) ); return decodedBuf; } }
--------------------------------------------------------------------------------
Steps to reproduce:
Call getSummary() with 2MB .txt file
Actual:
The following exception has occured:
--------------------------------------------------------------------------1. Error in postRequest(): Unexpected end of file from server
java.lang.NullPointerException
Please provide your precious feedback/suggestions.
Thanks in advance…..Last edited by Eranga; 03-16-2011 at 09:54 AM. Reason: code tags added
- 03-16-2011, 09:04 AM #2
Moderator
- Join Date
- Apr 2009
- Posts
- 10,481
- Rep Power
- 16
Similar Threads
-
Intaking String and splitting into chars, returning individual chars as string array
By Gokul138 in forum New To JavaReplies: 1Last Post: 02-07-2011, 08:22 PM -
Searching index with special chars
By noorws in forum LuceneReplies: 1Last Post: 03-29-2010, 09:13 PM -
List of chars
By SwEeTAcTioN in forum New To JavaReplies: 8Last Post: 01-05-2010, 12:05 PM -
filter special chars
By Strubbl in forum LuceneReplies: 0Last Post: 08-17-2009, 10:23 AM -
chars
By whosadork in forum New To JavaReplies: 6Last Post: 10-03-2008, 09:40 PM


LinkBack URL
About LinkBacks

Bookmarks