7 Replies Latest reply: Dec 4, 2006 6:39 AM by 807607 RSS

    Sax Writing

    807607
      Hi guys, i have a large app which would write to disk using dom but i had to change to suing a saxparser to reading the object back into memory as building the dom was too large, so i now have the following for writing the xml file:
      public class WriteToXML 
      {
              
          private static DocumentBuilderFactory docBuilderFactory;
          private static DocumentBuilder docBuilder;
          private static Document document;
          
          //private static Collection collection;
          
          private boolean debug = true;
          
          public static void initialise()
          {
              docBuilderFactory = DocumentBuilderFactory.newInstance();
              
              try 
              {
                  docBuilder = docBuilderFactory.newDocumentBuilder();
              } 
              catch (ParserConfigurationException ex) 
              {
                  ex.printStackTrace();
              }
              
              document = docBuilder.newDocument();
          }
          
          public static void writeCollection( Collection aCollection, File fileName )
          {
              initialise();
              System.out.println( "Building Dom" );
              buildDom( aCollection );
              System.out.println( "Writing To Disk" );
              transform( fileName );
              System.out.println( "Done...");
          }
          
          
          public static void buildDom( Collection collection )
          {
              TestCollection[ ] testCollections = collection.getTestCollections();
              
              System.out.println( "Writing Collection to disk..." );
              System.out.println( "Writing " + testCollections.length + " test collections..." );
              
              //RE = rootElement
              Element root = document.createElement( "RE" );
              
              Element protocol = document.createElement( "p" );
              Protocol p = collection.getProtocol();
              protocol.setAttribute( "c", p.isConcatenated().toString() );
              protocol.setAttribute( "s", p.isStatic().toString() );
              
              Element trainDirArray = document.createElement( "trdl" );
              File[ ] tempArray = collection.getTrainingDirs();
              
              for( int i = 0; i < tempArray.length; i++ )
              {
                  Element trainDir = document.createElement( "trd" );
                  trainDir.setAttribute( "fn", tempArray[ i ].getAbsolutePath() );
                  trainDirArray.appendChild( trainDir );
              }
              
              Element testElement = document.createElement( "td" );
              File testDir = collection.getTestingDir();
              testElement.setAttribute( "fn", testDir.getAbsolutePath() );
              
              for (int i = 0; i < testCollections.length; i++) 
              {
                  TestCollection currentCollection = collection.getTestCollection( i );
                  //System.out.println( "Saving "+ currentCollection.getName() + "..." );
                  
                  //TC = testCollection
                  //tfn = testFileName
                  //tc = testCategory
                  //fl = fileLength
                  Element testCollection = document.createElement( "TC" );
                  testCollection.setAttribute( "tfn", currentCollection.getTestFileName() );
                  //testCollection.setAttribute( "tc", currentCollection.getTestCategory() ); 
                  testCollection.setAttribute( "fl", currentCollection.getFileLength().toString() );
      
                  for( int j = 0; j < currentCollection.numComparisons(); j++ )
                  {
                      Comparison currentComparison = currentCollection.getComparison( j );
                      //System.out.println( "Current comparison: " + currentComparison.getNameOfComparisons() );
                      
                      //c = comparison
                      //trfn = trainFileName
                      //trc = trainCategory
                      Element comparison = document.createElement( "c" );                
                      comparison.setAttribute( "trfn", currentComparison.getTrainFileName() );
                      //comparison.setAttribute( "trc", currentComparison.getTrainCategory() );
                      
                      System.out.print("$");
                      
                      for( Integer k = 0; k < currentComparison.getNumCCounts(); k++ )
                      {
                          // r = result
                          // t = type
                          // c = c measure
                          // o = order
                          // v = value
                          Element result = document.createElement( "r" );
                          result.setAttribute( "t", "C" );
                          result.setAttribute( "o", k.toString() );
                          result.setAttribute( "v", currentComparison.getcCount( k ).toString() );
                          
                          comparison.appendChild( result );
                      }
                      
                      testCollection.appendChild( comparison );                                                                                       
                  }
                  
                  root.appendChild( testCollection );
                  root.appendChild( protocol );
                  root.appendChild( trainDirArray );
                  root.appendChild( testElement );
              }
              
              document.appendChild( root );
              
      
          
          }
          
          public static void transform( File fileName )
          {
               try 
               {
               // Prepare the DOM document for writing
               Source source = new DOMSource( document );
               
               // Prepare the output file
               Result result = new StreamResult( fileName );
               
               // Write the DOM document to the file
               Transformer xformer = TransformerFactory.newInstance().newTransformer();
               xformer.transform( source, result );
               
              } 
              catch( TransformerConfigurationException e ) 
              {
                  System.out.println( "TransformerConfigurationException has occurred" );
              } 
              catch(TransformerException e) 
              {
                  System.out.println( "TransformerException has occurred" );
              }
          }
      and i use the following to read it back in:
      public class XMLParser extends DefaultHandler
      {
          static Collection collection = new Collection();
          List<Short> cList;
          List<Comparison> comparisonList;
          //String trainCategory;
          File trainFileName;
          File testFileName;
          //String testCategory;
          TestCollection tc;
          List<TestCollection> testCollectionList;
          List<File> testFileNameList = new ArrayList<File>();
          List<File> trainFileNameList = new ArrayList<File>();
          //List<String> testCategoryList = new ArrayList<String>();
          //List<String> trainCategoryList = new ArrayList<String>();
          boolean allTrainsAdded = false;
          Protocol protocol;
          List<File> trainingDirList;
          File testingDir;
          
          /** Creates a new instance of XMLParser */
          public XMLParser() {
          }
          
          public static Collection read( File aFile )
          {
              SAXParserFactory spf = SAXParserFactory.newInstance();
              SAXParser sp;
              try {
                  sp = spf.newSAXParser();
      
                  sp.parse( aFile, new XMLParser() );
                  
              } catch (IOException ex) {
                  ex.printStackTrace();
              } catch (SAXException ex) {
                  ex.printStackTrace();
              } catch (ParserConfigurationException ex) {
                  ex.printStackTrace();
              }
              return collection;
      
          }
          
          public void startElement(String uri,String localName,String qName, Attributes attributes)
          {
              /*System.out.println("uri: " + uri);
              System.out.println("localName: " + localName);
              System.out.println("qname: " + qName);
              String[ ] attArray = new String[ attributes.getLength() ];
              
              for( int i = 0; i < attArray.length; i++ )
              {
                  String localN = attributes.getLocalName();
                  attArray[ i ] = attributes.getValue( i );
                  System.out.println( "attArray[ " + i + " ] = " + attArray[ i ] );
              }*/
              //System.out.println("Start element: " + qName + ", numElements: " + attributes.getLength());
              if( qName.equals( "RE" ) )
              {
                  testCollectionList = new ArrayList<TestCollection>();
              }
              else if( qName.equals( "p") )
              {
                  boolean isConcatenated = new Boolean( attributes.getValue( "c" ) );
                  boolean isStatic = new Boolean( attributes.getValue( "s" ) );
                  protocol = new Protocol( isConcatenated, isStatic );
              }
              else if( qName.equals( "trdl" ) )
              {
                  trainingDirList = new ArrayList<File>();
              }
              else if( qName.equals( "trd" ) )
              {
                  File trainDir = new File( attributes.getValue( "fn" ) );
                  trainingDirList.add( trainDir );
              }
              else if( qName.equals( "td" ) )
              {
                  testingDir = new File( attributes.getValue( "fn" ) );
              }
              else if( qName.equals( "TC" ) )
              {
                  comparisonList = new ArrayList<Comparison>();
                 
                  testFileName = new File( attributes.getValue( "tfn" ) );
                  testFileNameList.add( testFileName );
                  //testCategory = attributes.getValue( "tc" );
                  //testCategoryList.add( testCategory );
                  int fileLength = Integer.parseInt( attributes.getValue( "fl" ) );
                  
                  tc = new TestCollection( );
                  
                  tc.setFileLength( fileLength );
                  tc.setTestFileName( testFileName );
                  //tc.setTestCategory( testCategory );
              }
              else if ( qName.equals( "r" ) )
              {
               String order = attributes.getValue( "o" );
                  String type = attributes.getValue( "t" );
                  String value = attributes.getValue( "v" );
      
                  cList.add( Short.parseShort( order ), new Short( value ) );
                  
              }
              else if( qName.equals( "c" ) )
              {
                  cList = new ArrayList<Short>();
      
                  //trainCategory = attributes.getValue( "trc" );
                  trainFileName = new File( attributes.getValue( "trfn" ) );
                  if( !allTrainsAdded )
                  {
                      trainFileNameList.add( trainFileName );
                      //trainCategoryList.add( trainCategory );
                  }
              }
      
          }
          public void characters(char []ch,int start,int length)
          {
              String str=new String(ch,start,length);
              System.out.print(str);
          }
          public void endElement(String uri,String localName,String qName)
          {
              if (qName.equals( "c") )
              {
                  allTrainsAdded = true;
                  short[ ] cCounts = new short[ cList.size() ];       
                  for( int i = 0; i < cCounts.length; i++ )
                  {
                      cCounts[ i ] = cList.get( i );
                  }
                  
                  Comparison c = new Comparison( testFileName, trainFileName, tc );
                  c.setcCounts( cCounts );
                  this.comparisonList.add( c );
      
              }
              else if( qName.equals( "TC" ) )
              {
                  Comparison[ ] comparisons = new Comparison[ comparisonList.size() ];
                  comparisonList.toArray( comparisons );            
                  
                  tc.setComparisons( comparisons );
                  
                  testCollectionList.add( tc );
              }
              else if( qName.equals( "RE" ) )
              {
                  TestCollection[ ] testCollections = new TestCollection[ testCollectionList.size() ];
                  testCollectionList.toArray( testCollections );
                  collection.setTestCollections( testCollections );
                  
                  File[ ] testFileNames = new File[ testFileNameList.size() ];
                  testFileNameList.toArray( testFileNames );
                  collection.setTestingFiles( testFileNames );
                  
                  //String[ ] testCategories = new String[ testCategoryList.size() ];
                  //testCategoryList.toArray( testCategories );
                  //collection.setTestCategories( testCategories );
                  
                  File[ ] trainingFileNames = new File[ trainFileNameList.size() ];
                  trainFileNameList.toArray( trainingFileNames );
                  collection.setTrainingFiles( trainingFileNames );
                  
                  //String[ ] trainingCategories = new String[ trainCategoryList.size() ];
                  //trainCategoryList.toArray( trainingCategories );
                  //collection.setTrainingCategories( trainingCategories );
                  
                  collection.setProtocol( protocol );
                  
                  File[ ] trainingDirs = new File[ trainingDirList.size() ];
                  trainingDirList.toArray( trainingDirs );            
                  collection.setTrainingDirs( trainingDirs );
                  
                  collection.setTestingDir( testingDir );
              }
           //else
               //System.out.println("End element:   {" + uri + "}" + localName);
      
          }
      }
      now the problem is that now i am using larger datasets i now run out of memory before i even get chance to write the xml file, so what i need is a saxwriter i think as i need to be able to append data to the current xml file, this is because my code is two for loops which will test a training file against each of the testing files, so after each train has been processed i could then write that data before continuing onto the next.

      so the part of the writer where i create a test collection will be the bit that si changed, what i want to do, is read up to the point where the last test collectio is, /TC is the end of a test collection but at the end of the last test collection will be /RE i guess as once i am at the end of the last test collection, i am at the end of the collection.

      Could someone with kowledge of xml writing help me with the code needed to do this using sax preferably, i dont like to use external packages, i prefer to sue java's packages so that i get a better understanding in future.

      thanks i advance to anyone who is able to help with this, i realise you guys are probably busy but a few moment of your time would be greatly appreciated

      thanks

      Danny =)

      Message was edited by:
      dannythomas13
        • 1. Re: Sax Writing
          807607
          sorry but has nobody got any idea how to do this?
          • 2. Re: Sax Writing
            DrClap
            The standard way to write XML using the JAXP classes is to use an identity Transformer (one without an XSLT attached to it). You can send SAX events to such a thing using a SAX filter. But I believe that the Transformer builds an internal tree before writing anything. I know that it does when there's an XSLT involved but I don't know if it's smart enough to skip that step when it's an identity Transformer. You could try that; read this tutorial for more information on the SAX filter idea:

            http://www.cafeconleche.org/books/xmljava/chapters/ch08s05.html

            (Disregard its title, the concepts apply to you even though your data is coming from an XML source.)

            If that doesn't work then find an XML serializer that can accept SAX events or some similar serialization, and use that.
            • 3. Re: Sax Writing
              807607
              doesn't the standard way of writing using the sax classes no build the tree in memory?? this is what i was led to believe and this is what i would like help with coding
              • 4. Re: Sax Writing
                807607
                hi, i ahve now found the following code which seems to be along the right lines:
                import org.xml.sax.ContentHandler;
                
                import org.xml.sax.SAXException;
                
                import org.xml.sax.AttributesImpl;
                
                public void parse(InputSource input) throws SAXException {
                
                  PersonInputSource pin = (PersonInputSource)input;
                
                  Person person = pin.getPerson();
                
                  ContentHandler handler = getContentHandler();
                
                  AttributesImpl atts = new AttributesImpl();
                
                  // uri, localName, xml1.0name, type, value
                
                  atts.addAttribute ("", "name", "", "CDATA", person.getName());
                
                  atts.addAttribute ("", "age", "", "CDATA", person.getAge());
                
                  // fire SAX events
                
                  handler.startDocument();
                
                  handler.startElement ("", "person", "", atts);
                
                  handler.endElement ("", "person", "");
                
                  handler.endDocument ();
                
                }
                and:
                Person person = new Person("Fred", "52");
                
                PersonXMLReader px = new PersonXMLReader();
                
                px.setContentHandler( new XMLPrettyPrinter() );
                
                px.parse( new PersonInputSource(person) );
                now can anyone tell me what code is missing to write the file and also as the parser in this case only deals with one object type, and in my case i use an object which holds other objects, would i take in my oter object the then call parse on the others whilst using instance of to differentiate between the different objects that are passed?
                • 5. Re: Sax Writing
                  807607
                  is this going down the right path?
                  public class XMLWriter {
                      
                      /** Creates a new instance of XMLWriter */
                      public XMLWriter() {
                      }
                      
                      public void parse(Object input) throws SAXException {
                  
                    Collection c = (Collection)input;
                    
                    ContentHandler handler = getContentHandler();
                  
                    AttributesImpl atts = new AttributesImpl();
                  
                    // uri, localName, xml1.0name, type, value
                  
                    atts.addAttribute ("", "name", "", "CDATA", person.getName());
                  
                    atts.addAttribute ("", "age", "", "CDATA", person.getAge());
                  
                    // fire SAX events
                  
                    handler.startDocument();
                  
                    
                  
                    handler.endDocument ();
                    
                    TestCollection[ ] tcArray = c.getTestCollections();
                    
                    for( TestCollection i : tcArray )
                    {
                          handler.startElement ("", "tc", "", atts);
                  
                          handler.endElement ("", "tc", "");
                    }
                  
                    
                  
                  }
                  and then get start element to woprry about the elements within a TestCollection? also my getContentHandler method cannot be found, can anyone help with this?
                  thanks
                  • 6. Re: Sax Writing
                    807607
                    sorry but i am still havoing problems with this as the tree is being built in memory, can someone please help. i could output the data as text but xml really does look much better unless there is a way i could change the text representaton of the data into xml for when it is read back in or for when i want to read it myself
                    thanks
                    Danny =)
                    • 7. Re: Sax Writing
                      807607
                      Does this help?

                      http://www-128.ibm.com/developerworks/xml/library/x-tipstx4/