JDOM XMl parser following a tree formart "parent-child-grandchild..."
Good day,
I am working on extracting data from an MS excel file using the Apache POI API and parsing the data through to an XML file. I have achieved that, however, the data is supposed to be in a tree format. Here is the java code I came up with and the proposed XML output.
Code:
package za.co.sita.extraction;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Iterator;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
public class ExtractWorkbookData
{
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder;
Document doc;
Element globalElement, element, secondLevel, thirdLevel, fourthLevel,
fifthElement;
public void workbook(String filename) throws Exception
{
dBuilder = dbf.newDocumentBuilder();
doc = dBuilder.newDocument();
// doc.getDocumentElement();
try
{
HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(filename));
HSSFSheet sheet = workbook.getSheetAt(0);
Iterator<Row> rows = sheet.rowIterator();
int num = 0;
globalElement = namespace();
while(rows.hasNext())
{
Iterator<Cell> cells = rows.next().cellIterator();
String id = "";
while(cells.hasNext())
{
Cell cell = (Cell)cells.next();
switch(cell.getColumnIndex())
{
case 0 :
if(cell.CELL_TYPE_NUMERIC == cell.getCellType())
{
if(cell.getNumericCellValue() > 0)
{
id = String.valueOf(cell.getNumericCellValue()).substring(0, 1);
}
}
else
{
if(!cell.getStringCellValue().equals(""))
{
id = String.valueOf(cell.getStringCellValue());
}
}
break;
case 1 :
if(!cell.getStringCellValue().equals(""))
{
element = baseElement(globalElement, String.valueOf(cell.getStringCellValue()), id);
}
break;
case 2 :
if(!cell.getStringCellValue().equals(""))
{
secondLevel = childHeadElement(element, String.valueOf(cell.getStringCellValue()), id);
}
case 3 :
if(!cell.getStringCellValue().equals(""))
{
thirdLevel = levelthree(globalElement, String.valueOf(cell.getStringCellValue()), id);
}
break;
case 4 :
if(!cell.getStringCellValue().equals(""))
{
fourthLevel = levelfour(thirdLevel, id, String.valueOf(cell.getStringCellValue()));
}
break;
case 5 :
if(!cell.getStringCellValue().equals(""))
{
// fifthElement = levelFive(fourthLevel, String.valueOf(cell.getStringCellValue()), id);
}
break;
}
}
num++;
System.out.println("New Row " + num);
}
TransformerFactory tFactory = TransformerFactory.newInstance();
Transformer transformer = tFactory.newTransformer();
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new File("D:\\ExcelXML.xml"));
transformer.transform(source, result);
}
catch (FileNotFoundException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
catch (IOException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private Element namespace()
{
Element namespaceElement = doc.createElement("view:view");
doc.appendChild(namespaceElement);
Attr attr = doc.createAttribute("xmlns:view");
attr.setValue("http://www.alfresco.org/view/repository/1.0");
namespaceElement.setAttributeNode(attr);
attr = doc.createAttribute("xmlns:nt");
attr.setValue("http://www.jcp.org/jcr/nt/1.0");
namespaceElement.setAttributeNode(attr);
attr = doc.createAttribute("xmlns:alf");
attr.setValue("http://www.alfresco.org");
namespaceElement.setAttributeNode(attr);
attr = doc.createAttribute("xmlns:dod");
attr.setValue("http://www.alfresco.org/model/dod5015/1.0");
namespaceElement.setAttributeNode(attr);
attr = doc.createAttribute("xmlns:cm");
attr.setValue("http://www.alfresco.org/model/content/1.0");
namespaceElement.setAttributeNode(attr);
attr = doc.createAttribute("xmlns:rma");
attr.setValue("http://www.alfresco.org/model/recordsmanagement/1.0");
namespaceElement.setAttributeNode(attr);
return namespaceElement;
}
private Element baseElement(Element root, String value, String id)
{
Element rootElement = doc.createElement("dod:recordSeries");
Attr attr = doc.createAttribute("view:childName");
attr.setValue("cm:" + value);
rootElement.setAttributeNode(attr);
Element headElement = doc.createElement("view:properties");
Element name = doc.createElement("cm:name");
Element identifier = doc.createElement("rma:identifier");
Element title = doc.createElement("cm:title");
Element desc = doc.createElement("cm:description");
name.setTextContent(id + "-" +value);
identifier.setTextContent(id);
title.setTextContent(value);
desc.setTextContent(value);
headElement.appendChild(name);
headElement.appendChild(identifier);
headElement.appendChild(title);
headElement.appendChild(desc);
rootElement.appendChild(headElement);
Element baseElement = doc.createElement("view:associations");
Element neckElement = doc.createElement("cm:contains");
baseElement.appendChild(neckElement);
rootElement.appendChild(baseElement);
root.appendChild(rootElement);
return neckElement;
}
private Element childHeadElement(Element baseElement, String value, String id)
{
Element record = doc.createElement("rma:recordFolder");
Element returnRecord = doc.createElement("rma:recordFolder");
Attr attr = doc.createAttribute("view:childName");
attr.setValue("cm:" + value);
record.setAttributeNode(attr);
Attr attrb = doc.createAttribute("view:childName");
attrb.setValue("cm:" + value);
returnRecord.setAttributeNode(attrb);
returnRecord.setAttributeNode(attrb);
Element properties = doc.createElement("view:properties");
Element name = doc.createElement("cm:name");
Element identifier = doc.createElement("rma:identifier");
Element title = doc.createElement("cm:title");
name.setTextContent(id + "-" +value);
identifier.setTextContent(id);
title.setTextContent(value);
properties.appendChild(name);
properties.appendChild(identifier);
properties.appendChild(title);
record.appendChild(properties);
baseElement.appendChild(record);
return returnRecord;
}
private Element levelthree(Element level2Element, String value, String id)
{
Element record = doc.createElement("rma:recordFolder");
Attr attr = doc.createAttribute("view:childName");
attr.setValue("cm:" + value);
record.setAttributeNode(attr);
Element properties = doc.createElement("view:properties");
Element name = doc.createElement("cm:name");
Element identifier = doc.createElement("rma:identifier");
Element title = doc.createElement("cm:title");
name.setTextContent(id + "-" +value);
identifier.setTextContent(id);
title.setTextContent(value);
properties.appendChild(name);
properties.appendChild(identifier);
properties.appendChild(title);
record.appendChild(properties);
level2Element.appendChild(record);
return record;
}
private Element levelfour(Element level3Element, String id, String value)
{
Element headElement = doc.createElement("view:associations");
Element neckElement = doc.createElement("cm:contains");
Element record = doc.createElement("rma:recordFolder");
Attr attr = doc.createAttribute("view:childName");
attr.setValue("cm:" + value);
record.setAttributeNode(attr);
Element properties = doc.createElement("view:properties");
Element name = doc.createElement("cm:name");
Element identifier = doc.createElement("rma:identifier");
Element title = doc.createElement("cm:title");
name.setTextContent(id + "-" + value);
identifier.setTextContent(id);
title.setTextContent(value);
properties.appendChild(name);
properties.appendChild(identifier);
properties.appendChild(title);
record.appendChild(properties);
neckElement.appendChild(record);
headElement.appendChild(neckElement);
level3Element.appendChild(headElement);
return headElement;
}
private Element levelFive(Element level4Element, String value, String id)
{
Element headElement = doc.createElement("view:associations");
Element neckElement = doc.createElement("cm:contains");
Element record = doc.createElement("rma:recordFolder");
Attr attr = doc.createAttribute("view:childName");
attr.setValue("cm:" + value);
record.setAttributeNode(attr);
Element properties = doc.createElement("view:properties");
Element name = doc.createElement("cm:name");
Element identifier = doc.createElement("rma:identifier");
Element title = doc.createElement("cm:title");
name.setTextContent(id + "-" +value);
identifier.setTextContent(id);
title.setTextContent(value);
properties.appendChild(name);
properties.appendChild(identifier);
properties.appendChild(title);
record.appendChild(properties);
neckElement.appendChild(record);
headElement.appendChild(neckElement);
level4Element.appendChild(headElement);
return headElement;
}
public static void main(String[] args)
{
try
{
new ExtractWorkbookData().workbook("D:\\MultipleLevel.xls");
}
catch (Exception e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
Code:
- <view:properties>
<cm:name>7-Records Management</cm:name>
<rma:identifier>7</rma:identifier>
<cm:title>7-Records Management</cm:title>
<cm:description>Record series for Records Management files</cm:description>
</view:properties>
- <view:associations>
- <cm:contains>
- <dod:recordCategory view:childName="cm:Matters regarding RM in general">
- <view:properties>
<cm:name>Matters regarding RM in general</cm:name>
<rma:identifier>7-1</rma:identifier>
<cm:title>Matters regarding RM in general</cm:title>
<cm:description>Matters regarding RM in general</cm:description>
</view:properties>
- <view:associations>
- <cm:contains>
- <rma:recordFolder view:childName="cm:Policy">
- <view:properties>
<cm:name>Policy</cm:name>
<rma:identifier>7-1-P</rma:identifier>
<cm:title>Policy</cm:title>
</view:properties>
</rma:recordFolder>
- <rma:recordFolder view:childName="cm:Routine enquiries">
- <view:properties>
<cm:name>Routine enquiries</cm:name>
<rma:identifier>7-1-R</rma:identifier>
<cm:title>Routine enquiries</cm:title>
</view:properties>
</rma:recordFolder>
- <rma:recordFolder view:childName="cm:Training">
- <view:properties>
<cm:name>Training</cm:name>
<rma:identifier>7-1-1</rma:identifier>
<cm:title>Training</cm:title>
</view:properties>
- <view:associations>
- <cm:contains>
- <rma:recordFolder view:childName="cm:Registry training">
- <view:properties>
<cm:name>Registry training</cm:name>
<rma:identifier>7-1-1-1</rma:identifier>
<cm:title>Registry training</cm:title>
</view:properties>
- <view:associations>
- <cm:contains>
- <rma:recordFolder view:childName="cm:Development of courses">
- <view:properties>
<cm:name>Development of courses</cm:name>
<rma:identifier>7-1-1-1-1</rma:identifier>
<cm:title>Development of courses</cm:title>
</view:properties>
</rma:recordFolder>
The above XML file is the required format, what I have been unable to do is to nest them in the "parent-child-grandchild..." and so on format.
Please help me figure this out, though I have a feeling it's the way I am "appending" the children elements.
Regards
Dark-RedD
Re: JDOM XMl parser following a tree formart "parent-child-grandchild..."
Solved.
Please close the thread.