Results 1 to 2 of 2
- 07-23-2012, 02:45 PM #1
Member
- Join Date
- Oct 2011
- Posts
- 19
- Rep Power
- 0
JDOM XMl parser following a tree formart "parent-child-grandchild..."
Good day,
I am working on extracting data from an MS excel file using the Apache POI API and parsing the data through to an XML file. I have achieved that, however, the data is supposed to be in a tree format. Here is the java code I came up with and the proposed XML output.
Java Code:package za.co.sita.extraction; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Iterator; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.w3c.dom.Attr; import org.w3c.dom.Document; import org.w3c.dom.Element; public class ExtractWorkbookData { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder dBuilder; Document doc; Element globalElement, element, secondLevel, thirdLevel, fourthLevel, fifthElement; public void workbook(String filename) throws Exception { dBuilder = dbf.newDocumentBuilder(); doc = dBuilder.newDocument(); // doc.getDocumentElement(); try { HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(filename)); HSSFSheet sheet = workbook.getSheetAt(0); Iterator<Row> rows = sheet.rowIterator(); int num = 0; globalElement = namespace(); while(rows.hasNext()) { Iterator<Cell> cells = rows.next().cellIterator(); String id = ""; while(cells.hasNext()) { Cell cell = (Cell)cells.next(); switch(cell.getColumnIndex()) { case 0 : if(cell.CELL_TYPE_NUMERIC == cell.getCellType()) { if(cell.getNumericCellValue() > 0) { id = String.valueOf(cell.getNumericCellValue()).substring(0, 1); } } else { if(!cell.getStringCellValue().equals("")) { id = String.valueOf(cell.getStringCellValue()); } } break; case 1 : if(!cell.getStringCellValue().equals("")) { element = baseElement(globalElement, String.valueOf(cell.getStringCellValue()), id); } break; case 2 : if(!cell.getStringCellValue().equals("")) { secondLevel = childHeadElement(element, String.valueOf(cell.getStringCellValue()), id); } case 3 : if(!cell.getStringCellValue().equals("")) { thirdLevel = levelthree(globalElement, String.valueOf(cell.getStringCellValue()), id); } break; case 4 : if(!cell.getStringCellValue().equals("")) { fourthLevel = levelfour(thirdLevel, id, String.valueOf(cell.getStringCellValue())); } break; case 5 : if(!cell.getStringCellValue().equals("")) { // fifthElement = levelFive(fourthLevel, String.valueOf(cell.getStringCellValue()), id); } break; } } num++; System.out.println("New Row " + num); } TransformerFactory tFactory = TransformerFactory.newInstance(); Transformer transformer = tFactory.newTransformer(); DOMSource source = new DOMSource(doc); StreamResult result = new StreamResult(new File("D:\\ExcelXML.xml")); transformer.transform(source, result); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } private Element namespace() { Element namespaceElement = doc.createElement("view:view"); doc.appendChild(namespaceElement); Attr attr = doc.createAttribute("xmlns:view"); attr.setValue("http://www.alfresco.org/view/repository/1.0"); namespaceElement.setAttributeNode(attr); attr = doc.createAttribute("xmlns:nt"); attr.setValue("http://www.jcp.org/jcr/nt/1.0"); namespaceElement.setAttributeNode(attr); attr = doc.createAttribute("xmlns:alf"); attr.setValue("http://www.alfresco.org"); namespaceElement.setAttributeNode(attr); attr = doc.createAttribute("xmlns:dod"); attr.setValue("http://www.alfresco.org/model/dod5015/1.0"); namespaceElement.setAttributeNode(attr); attr = doc.createAttribute("xmlns:cm"); attr.setValue("http://www.alfresco.org/model/content/1.0"); namespaceElement.setAttributeNode(attr); attr = doc.createAttribute("xmlns:rma"); attr.setValue("http://www.alfresco.org/model/recordsmanagement/1.0"); namespaceElement.setAttributeNode(attr); return namespaceElement; } private Element baseElement(Element root, String value, String id) { Element rootElement = doc.createElement("dod:recordSeries"); Attr attr = doc.createAttribute("view:childName"); attr.setValue("cm:" + value); rootElement.setAttributeNode(attr); Element headElement = doc.createElement("view:properties"); Element name = doc.createElement("cm:name"); Element identifier = doc.createElement("rma:identifier"); Element title = doc.createElement("cm:title"); Element desc = doc.createElement("cm:description"); name.setTextContent(id + "-" +value); identifier.setTextContent(id); title.setTextContent(value); desc.setTextContent(value); headElement.appendChild(name); headElement.appendChild(identifier); headElement.appendChild(title); headElement.appendChild(desc); rootElement.appendChild(headElement); Element baseElement = doc.createElement("view:associations"); Element neckElement = doc.createElement("cm:contains"); baseElement.appendChild(neckElement); rootElement.appendChild(baseElement); root.appendChild(rootElement); return neckElement; } private Element childHeadElement(Element baseElement, String value, String id) { Element record = doc.createElement("rma:recordFolder"); Element returnRecord = doc.createElement("rma:recordFolder"); Attr attr = doc.createAttribute("view:childName"); attr.setValue("cm:" + value); record.setAttributeNode(attr); Attr attrb = doc.createAttribute("view:childName"); attrb.setValue("cm:" + value); returnRecord.setAttributeNode(attrb); returnRecord.setAttributeNode(attrb); Element properties = doc.createElement("view:properties"); Element name = doc.createElement("cm:name"); Element identifier = doc.createElement("rma:identifier"); Element title = doc.createElement("cm:title"); name.setTextContent(id + "-" +value); identifier.setTextContent(id); title.setTextContent(value); properties.appendChild(name); properties.appendChild(identifier); properties.appendChild(title); record.appendChild(properties); baseElement.appendChild(record); return returnRecord; } private Element levelthree(Element level2Element, String value, String id) { Element record = doc.createElement("rma:recordFolder"); Attr attr = doc.createAttribute("view:childName"); attr.setValue("cm:" + value); record.setAttributeNode(attr); Element properties = doc.createElement("view:properties"); Element name = doc.createElement("cm:name"); Element identifier = doc.createElement("rma:identifier"); Element title = doc.createElement("cm:title"); name.setTextContent(id + "-" +value); identifier.setTextContent(id); title.setTextContent(value); properties.appendChild(name); properties.appendChild(identifier); properties.appendChild(title); record.appendChild(properties); level2Element.appendChild(record); return record; } private Element levelfour(Element level3Element, String id, String value) { Element headElement = doc.createElement("view:associations"); Element neckElement = doc.createElement("cm:contains"); Element record = doc.createElement("rma:recordFolder"); Attr attr = doc.createAttribute("view:childName"); attr.setValue("cm:" + value); record.setAttributeNode(attr); Element properties = doc.createElement("view:properties"); Element name = doc.createElement("cm:name"); Element identifier = doc.createElement("rma:identifier"); Element title = doc.createElement("cm:title"); name.setTextContent(id + "-" + value); identifier.setTextContent(id); title.setTextContent(value); properties.appendChild(name); properties.appendChild(identifier); properties.appendChild(title); record.appendChild(properties); neckElement.appendChild(record); headElement.appendChild(neckElement); level3Element.appendChild(headElement); return headElement; } private Element levelFive(Element level4Element, String value, String id) { Element headElement = doc.createElement("view:associations"); Element neckElement = doc.createElement("cm:contains"); Element record = doc.createElement("rma:recordFolder"); Attr attr = doc.createAttribute("view:childName"); attr.setValue("cm:" + value); record.setAttributeNode(attr); Element properties = doc.createElement("view:properties"); Element name = doc.createElement("cm:name"); Element identifier = doc.createElement("rma:identifier"); Element title = doc.createElement("cm:title"); name.setTextContent(id + "-" +value); identifier.setTextContent(id); title.setTextContent(value); properties.appendChild(name); properties.appendChild(identifier); properties.appendChild(title); record.appendChild(properties); neckElement.appendChild(record); headElement.appendChild(neckElement); level4Element.appendChild(headElement); return headElement; } public static void main(String[] args) { try { new ExtractWorkbookData().workbook("D:\\MultipleLevel.xls"); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }The above XML file is the required format, what I have been unable to do is to nest them in the "parent-child-grandchild..." and so on format.XML Code:- <view:properties> <cm:name>7-Records Management</cm:name> <rma:identifier>7</rma:identifier> <cm:title>7-Records Management</cm:title> <cm:description>Record series for Records Management files</cm:description> </view:properties> - <view:associations> - <cm:contains> - <dod:recordCategory view:childName="cm:Matters regarding RM in general"> - <view:properties> <cm:name>Matters regarding RM in general</cm:name> <rma:identifier>7-1</rma:identifier> <cm:title>Matters regarding RM in general</cm:title> <cm:description>Matters regarding RM in general</cm:description> </view:properties> - <view:associations> - <cm:contains> - <rma:recordFolder view:childName="cm:Policy"> - <view:properties> <cm:name>Policy</cm:name> <rma:identifier>7-1-P</rma:identifier> <cm:title>Policy</cm:title> </view:properties> </rma:recordFolder> - <rma:recordFolder view:childName="cm:Routine enquiries"> - <view:properties> <cm:name>Routine enquiries</cm:name> <rma:identifier>7-1-R</rma:identifier> <cm:title>Routine enquiries</cm:title> </view:properties> </rma:recordFolder> - <rma:recordFolder view:childName="cm:Training"> - <view:properties> <cm:name>Training</cm:name> <rma:identifier>7-1-1</rma:identifier> <cm:title>Training</cm:title> </view:properties> - <view:associations> - <cm:contains> - <rma:recordFolder view:childName="cm:Registry training"> - <view:properties> <cm:name>Registry training</cm:name> <rma:identifier>7-1-1-1</rma:identifier> <cm:title>Registry training</cm:title> </view:properties> - <view:associations> - <cm:contains> - <rma:recordFolder view:childName="cm:Development of courses"> - <view:properties> <cm:name>Development of courses</cm:name> <rma:identifier>7-1-1-1-1</rma:identifier> <cm:title>Development of courses</cm:title> </view:properties> </rma:recordFolder>
Please help me figure this out, though I have a feeling it's the way I am "appending" the children elements.
Regards
Dark-RedD
- 07-27-2012, 08:41 AM #2
Member
- Join Date
- Oct 2011
- Posts
- 19
- Rep Power
- 0
Similar Threads
-
loop "play again" in an 8 ball game , loops but wont let me answer my "out.print"
By IareSmart in forum New To JavaReplies: 1Last Post: 02-01-2012, 08:37 PM -
Recursion to check the "rightness" of a search tree.
By kyameron in forum New To JavaReplies: 0Last Post: 12-11-2011, 10:58 PM -
how to create a "child" frame.
By Shellback3 in forum AWT / SwingReplies: 4Last Post: 02-02-2011, 04:48 AM -
Java subprocesses via Runtime.exec() and windows "end process tree"...
By fxRichard in forum Advanced JavaReplies: 2Last Post: 01-06-2009, 03:53 PM -
the dollar sign "$", prints like any other normal char in java like "a" or "*" ?
By lse123 in forum New To JavaReplies: 1Last Post: 10-20-2008, 07:35 AM


LinkBack URL
About LinkBacks
Reply With Quote
Bookmarks