Results 1 to 1 of 1

Thread: Web Crawler

  1. #1
    zelot is offline Member
    Join Date
    Dec 2010
    Posts
    1
    Rep Power
    0

    Question Web Crawler

    Well, I have been working on this project for a couple of days but cannot seem to get it functioning. Every time something is fixed, something else shows up. And so, I ask your help. I do not want to post the entire code in this thread (due to the length), so instead I shall post a link to my netbeans project.
    LINK!

    I have not started working on the "Matched" JList yet, so do not worry about that part.

    Thanks for any help!

    EDIT:
    When I posted this, I wasn't completely awake. I had forgotten to mention what error/where I was getting it. Mainly I have been getting a null pointer type error

    Java Code:
    public class crawlerthread 
            implements Runnable {
        String URL_input;
        String keyword;
        String[] URLs;
        char symbol;
        int lnum = 1;
    
        public crawlerthread(String url_input, String word){
            this.URL_input = url_input;
            this.keyword = word;
        }
    
        //public void start() {
        //}
        String visit(String url){
            String html = null;
            String line = null;
            try {
                URL blah = new URL(url);
                URLConnection blahConnection = blah.openConnection();
                blahConnection.connect();
                BufferedReader in = new BufferedReader(new InputStreamReader(blah.openStream()));
                
                //inputLine = in.readLine();
                while ((line = in.readLine()) != null) {
                    System.out.println(line);
                    html += line;
                }
                in.close();
                WebCrawlerGUI.visited.addElement(URL_input);
            } catch (MalformedURLException ex) {
                Logger.getLogger(crawlerthread.class.getName()).log(Level.SEVERE, null, ex);
            } catch (IOException ex) {
                Logger.getLogger(crawlerthread.class.getName()).log(Level.SEVERE, null, ex);
            }
            return html;
        }
    
        String[] findLinks(String html){
            if(html == null)
                return null;
            String[] links = null;
            int index = 0;
            String lower = html.toLowerCase();
            if((lower.contains("<a"))&&(lower.contains("href"))){
    
                    String rest = html.substring(index);
                    for(int i=0;(rest.toLowerCase().contains("<a")) && (rest.toLowerCase().contains("href"));i++) {
                        index = rest.indexOf("<a");
                        index = rest.indexOf("href=");
                        index+=5;
                        System.out.println(rest.charAt(index));
                        //char quot = html.charAt(index);
                        String[] array = rest.split("\"", 2); // trying to create the string of the link. 
                        System.out.println(array); // trying to show what was in the array
                        links[i] = array[0]; // <--------null pointer error here
                        rest = array[1];
                    }
            }
    
            return links;
        }
    
        public void run() {
            boolean firstRun = true;
            while (WebCrawlerGUI.started == 1) {
                //findLinks(visit(URL_input));
                String source;
                if(firstRun == true){
                    firstRun = false;
                    source = visit(URL_input);
                    if(source == null)
                        return;
    
                    URLs = findLinks(source);
                    
                    WebCrawlerGUI.visited.addElement(URL_input);
                } else {
                    String[] NewURLs;
                    for(int i=0; i < URLs.length; i++) {
                        NewURLs = findLinks(visit(URLs[i]));
                        for(int b = 0; b < NewURLs.length; b++) {
                            WebCrawlerGUI.visited.addElement(NewURLs[b]);
                        }
                    }
                }
    }
    Last edited by zelot; 12-22-2010 at 10:28 PM.

Similar Threads

  1. web crawler
    By jharishabh7 in forum Networking
    Replies: 2
    Last Post: 11-23-2010, 11:49 AM
  2. Web crawler or RSS Feed
    By lokesh0011 in forum Java Servlet
    Replies: 1
    Last Post: 05-08-2010, 10:52 AM
  3. ChilKat web crawler
    By ishdevil in forum New To Java
    Replies: 0
    Last Post: 02-15-2010, 10:02 AM
  4. crawler doesn't start threads.
    By Pierced1 in forum Threads and Synchronization
    Replies: 2
    Last Post: 09-28-2009, 08:02 PM
  5. Search Engine , Web Crawler
    By sahil.ansari in forum Advanced Java
    Replies: 5
    Last Post: 07-21-2008, 01:53 AM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •