Results 1 to 2 of 2
  1. #1
    mahalakshmi is offline Member
    Join Date
    Feb 2009
    Posts
    1
    Rep Power
    0

    Default retrieval of tags

    hello everybody,

    we r doing project on focussed web crawler. for doing it v want to extract the content of the web page. v have wriiten a program which retrives meta tags. but it is getting first meta tag only for the url in the internet.Here i have attached our code for retriving meta tags. If anyone know this plz edit it and post it to my mail.(mahalakshmideepa@gmail.com)

    import javax.swing.text.*;
    import javax.swing.text.html.*;
    import javax.swing.text.html.parser.*;
    import java.io.*;
    import java.net.*;
    import java.util.*;
    import java.lang.*;
    import java.lang.Object.*;
    import java.net.URISyntaxException;
    import javax.swing.text.html.HTMLEditorKit.ParserCallback ;
    import java.net.URL;
    import javax.swing.text.html.HTMLDocument.Iterator;
    import javax.swing.text.AttributeSet;
    import javax.swing.text.Element;
    import javax.swing.text.ElementIterator;
    import javax.swing.text.StyleConstants;
    import javax.swing.text.html.HTML;
    import javax.swing.text.html.HTMLDocument;
    import javax.swing.text.html.HTMLEditorKit;
    import javax.swing.text.html.parser.ParserDelegator;
    import java.sql.*;


    public class TagStuff
    {
    public TagStuff(String urltry,String word)
    {

    System.out.println("me tagstuff came" );
    System.out.println(word );
    URI uri=null;
    URL url=null;
    final String word1=word;
    final String relurl=urltry;
    try
    {
    HTMLDocument doc =new HTMLDocument()
    {
    String ar[]=new String[100];
    int j;
    int flag=0;
    // int c;
    public HTMLEditorKit.ParserCallback getReader(int pos)
    {

    return new HTMLEditorKit.ParserCallback()
    {

    public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos)
    {



    // c=t.getTagCount();
    // System.out.println("the tag count is "+c);
    if(t==HTML.Tag.META)
    {
    String name1=(String)a.getAttribute(HTML.Attribute.NAME);
    if(name1!=null)
    {
    System.out.println("META name1: " + name1);
    }
    String content1=(String)a.getAttribute(HTML.Attribute.CON TENT);
    if(content1!=null)
    {
    System.out.println("META content1: " + content1);
    ar[j]=content1;
    System.out.println(" the value is"+ ar[j]);
    String spl[]=ar[j].split(" ");
    for(int i=0;i<spl.length;i++)
    {
    if(spl[i].equals(word1))
    {
    System.out.println("equal");
    flag=1;
    }
    }
    j++;

    }
    }
    }
    };
    }

    };




    uri=new URI(urltry);
    url=uri.toURL();

    URLConnection con=url.openConnection();
    Reader rd=new InputStreamReader(con.getInputStream());
    EditorKit kit=new HTMLEditorKit();
    doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE);
    int tagCount=0;
    HTMLDocument.Iterator tagIterator=doc.getIterator(HTML.Tag.META);
    while (tagIterator.isValid()){
    tagCount++;
    tagIterator.next();
    System.out.println("tagCount="+tagCount);
    }

    kit.read(rd, doc, 0);

    try {
    Class.forName("sun.jdbc.odbc.JdbcOdbcDriver");
    Connection conn = DriverManager.getConnection("jdbc:odbc:dblink");
    Statement stamt=conn.createStatement();
    stamt.executeUpdate("insert into relatedlinks values('"+relurl+"')");

    }
    catch(Exception e) {
    System.out.println("Error" +e);
    }

    }


    catch(MalformedURLException e){System.out.println(e);}
    catch(URISyntaxException e){System.out.println(e);}
    catch(BadLocationException e){System.out.println(e);}
    catch(IOException e){System.out.println(e);}
    catch (Exception e) {e.printStackTrace();}
    }

    }



    bye.

  2. #2
    masijade is offline Senior Member
    Join Date
    Jun 2008
    Posts
    2,571
    Rep Power
    9

    Default

    Quote Originally Posted by mahalakshmi View Post
    hello everybody,

    we r doing project on focussed web crawler. for doing it v want to extract the content of the web page. v have wriiten a program which retrives meta tags. but it is getting first meta tag only for the url in the internet.Here i have attached our code for retriving meta tags. *If anyone know this plz edit it and post it to my mail*.(mahalakshmideepa@gmail.com)
    Good luck.

    For one, almost noone here is going to read such a large mess of unformatted junk. Use code tags when posting code.

    For two, that "request" (read demand) belongs at Rent A Coder: How Software Gets Done -- Home of the worlds' largest number of completed software projects not here.

    For three, I hope the spam search engines find that email address of yours.

    mahalakshmideepa
    mahalakshmideepa
    mahalakshmideepa
    mahalakshmideepa
    mahalakshmideepa
    mahalakshmideepa
    mahalakshmideepa
    mahalakshmideepa
    Last edited by masijade; 02-23-2009 at 02:01 PM.

Similar Threads

  1. Replies: 6
    Last Post: 08-22-2008, 01:52 PM
  2. Replies: 0
    Last Post: 03-11-2008, 03:20 PM
  3. Lucene Image REtrieval 0.5.4
    By JavaBean in forum Java Software
    Replies: 0
    Last Post: 07-11-2007, 04:54 PM
  4. Help with Tags JSP
    By Marcus in forum JavaServer Pages (JSP) and JSTL
    Replies: 2
    Last Post: 07-04-2007, 03:53 PM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •