Results 1 to 11 of 11
Like Tree1Likes
  • 1 Post By toadaly

Thread: How to use multiple threads to speed processing

  1. #1
    gschrade is offline Member
    Join Date
    Mar 2011
    Posts
    3
    Rep Power
    0

    Default How to use multiple threads to speed processing

    I am new to Java Threads. I would like to use threads to speed the processing of the following program. This is the pojo that does the work:

    Java Code:
       1. import java.io.File;  
       2. import java.io.FileInputStream;  
       3. import java.io.FileNotFoundException;  
       4. import java.io.InputStream;  
       5. import java.math.BigInteger;  
       6. import java.security.MessageDigest;  
       7. import java.util.ArrayList;  
       8. import java.util.HashMap;  
       9.   
      10. public class FileFinder {  
      11.   
      12.     private File originalFileObject;  
      13.     private File fileObject;  
      14.     private HashMap<String,String> allFiles;  
      15.     private ArrayList<String> duplicateFiles;  
      16.     private int directoryCount = 0;  
      17.     private int fileCount = 0;  
      18.   
      19.     public FileFinder(File fileObject)  
      20.     {  
      21.         this.originalFileObject = fileObject;  
      22.         this.fileObject = fileObject;  
      23.         this.allFiles = new HashMap<String,String>();  
      24.         this.duplicateFiles = new ArrayList<String>();  
      25.     }  
      26.   
      27.     public void traverse()  
      28.     {  
      29.         recursiveTraversal(fileObject);  
      30.     }  
      31.   
      32.     public void recursiveTraversal(File fileObject)  
      33.     {         
      34.         if (fileObject.isDirectory())  
      35.         {  
      36.             directoryCount++;  
      37.             File allFiles[] = fileObject.listFiles();  
      38.             for(File aFile : allFiles)  
      39.             {  
      40.                 recursiveTraversal(aFile);  
      41.             }  
      42.         }  
      43.         else if (fileObject.isFile())  
      44.         {  
      45.             fileCount++;  
      46.             try  
      47.             {  
      48.                 InputStream is = new FileInputStream(fileObject);  
      49.                 String hashedFile = getHashedFileString(is);  
      50.                 String filePath = fileObject.getAbsolutePath();  
      51.                 String fileName = fileObject.getName();  
      52.                 if (allFiles.containsKey(hashedFile))  
      53.                 {  
      54.                     //System.out.println("Found duplicate file!");  
      55.                     duplicateFiles.add(filePath);  
      56.                 }  
      57.                 else  
      58.                 {  
      59.                     //System.out.println("No duplicate found.");  
      60.                     allFiles.put(hashedFile,filePath);  
      61.                     allFiles.put(filePath,fileName);  
      62.                 }  
      63.             }  
      64.             catch (FileNotFoundException e)  
      65.             {  
      66.                 e.printStackTrace(System.err);  
      67.             }  
      68.         }         
      69.     }  
      70.       
      71.     public ArrayList<String> getDuplicateFiles ()  
      72.     {  
      73.         return duplicateFiles;  
      74.     }  
      75.     public int getDirectoryCount()  
      76.     {  
      77.         return directoryCount;  
      78.     }  
      79.     public int getFileCount()  
      80.     {  
      81.         return fileCount;  
      82.     }  
      83.         private String getHashedFileString(InputStream is )   
      84.     {          
      85.         String output;          
      86.         int read;          
      87.         byte[] buffer = new byte[8192];          
      88.         try   
      89.         {              
      90.             MessageDigest digest = MessageDigest.getInstance("SHA-256"); //"MD5");              
      91.             while ((read = is.read(buffer)) > 0)   
      92.             {                  
      93.                 digest.update(buffer, 0, read);              
      94.             }              
      95.             byte[] hash = digest.digest();              
      96.             BigInteger bigInt = new BigInteger(1, hash);              
      97.             output = bigInt.toString(16);          
      98.         }           
      99.         catch (Exception e)   
     100.         {              
     101.             e.printStackTrace( System.err );              
     102.             return null;          
     103.         }          
     104.         return output;      
     105.     }  
     106. }
    The bottleneck appears to be in the getHashedFileString method. This is the main class:

    Java Code:
       1. import java.io.File;  
       2. import java.util.ArrayList;  
       3.   
       4. public class FileFinderTest {  
       5.     public static void main(String[] args)   
       6.     {  
       7.   
       8.         long begin = System.currentTimeMillis();  
       9.         String folderPath = "C:\\Program Files (x86)";  
      10.         System.out.println("Traversing directories");  
      11.         FileFinder ff = new FileFinder(new File(folderPath));  
      12.         ff.traverse();  
      13.         ArrayList<String> identicalFiles = ff.getDuplicateFiles();  
      14.         System.out.println("Directories: "+ff.getDirectoryCount());  
      15.         System.out.println("Files : "+ff.getFileCount());  
      16.         System.out.println("Duplicate files: "+identicalFiles.size());  
      17.         long end = System.currentTimeMillis();  
      18.         long diff = (end - begin)/1000/60;  
      19.         System.out.println("Time: "+diff+" minutes");  
      20.     }  
      21. }
    Is there a way to speed up the processing using threads? I have heard of ExecutorService Threads and Runnable, but cannot figure out how to use them here.

    Thanks in advance!

  2. #2
    Hibernate's Avatar
    Hibernate is offline Senior Member
    Join Date
    Dec 2010
    Location
    Stockholm, Sweden
    Posts
    222
    Blog Entries
    9
    Rep Power
    4

    Default

    I've never used ExecutorService, but the example in the Javadoc seems the explain how to use it.

    Since Java does not support multiple CPU(-cores), threading will
    only enhance the speed if you are using slow I/O (like writting to a hard drive or sending data other the Internet or a intranet).

    There are different ways to use a Thread, I think the easiest way is:
    Java Code:
    final Thread thread = new Thread(new RunnableClass(/*parameters*/));
    thread.start();
    RunnableClass implements Runnable, add the logic to run().
    See Javadoc for the methods setDaemon and setPriority.
    You can specified maximum stack size &c for the thread in the constructor.
    Ex animo! Hibernate
    Java, Arch Linux, C, GPL v3, Bash, Eclipse, Linux VT, GNOME 2 and many buttons on windows.

  3. #3
    gschrade is offline Member
    Join Date
    Mar 2011
    Posts
    3
    Rep Power
    0

    Default

    I have read that documentation and read about Thread and Runnable, but none of that has helped me figure out this real-world problem.

  4. #4
    toadaly is offline Senior Member
    Join Date
    Jan 2009
    Posts
    671
    Rep Power
    6

    Default

    Quote Originally Posted by Hibernate View Post
    Since Java does not support multiple CPU(-cores), threading will
    only enhance the speed if you are using slow I/O (like writting to a hard drive or sending data other the Internet or a intranet).
    Huh? Java can use multiple cores. Did you know IBM's Watson (the one that recently beat the world masters of Jeopardy) was written mostly in Java using thousands of processors?
    Fubarable likes this.

  5. #5
    toadaly is offline Senior Member
    Join Date
    Jan 2009
    Posts
    671
    Rep Power
    6

    Default

    Inside your recursiveTraversal method, you could launch a new Thread (or use a Thread pool) to traverse directories in parallel.

    You might take a look at ThreadPoolExecutor. The way you would use it in this context, is to create a Runnable for each directory. You would enqueue those Runabbles into the ThreadPoolExecutor's queue (the one you passed to the constructor). Each Runnable would perform what you have in lines 36 through 41. You'll need to be careful around lines 52 to 62 to prevent multiple threads from modifying allFiles and duplicateFiles simultaneously. As a first cut, put a synchronized around that block of code.

  6. #6
    gschrade is offline Member
    Join Date
    Mar 2011
    Posts
    3
    Rep Power
    0

    Default examples?

    Do you know of any example code that shows how to do this?

  7. #7
    toadaly is offline Senior Member
    Join Date
    Jan 2009
    Posts
    671
    Rep Power
    6

    Default

    Java Code:
    import java.util.concurrent.*;
    
    public class Demo {
    
      public static void main(String[] args) throws Exception {
    
        LinkedBlockingQueue<Runnable> queue = new LinkedBlockingQueue<Runnable>();
    
        ThreadPoolExecutor executor = 
          new ThreadPoolExecutor(3, 10, 1000, TimeUnit.MILLISECONDS, queue);
    
        for(int i=0; i<20; i++) {
    
          executor.execute(new Runnable() {
            public void run() {
    	  System.out.println("Thread name = "+
    			     Thread.currentThread().getName());
    	}
          });
        }
        
        executor.shutdown();
    
      }
    
    
    }

  8. #8
    Hibernate's Avatar
    Hibernate is offline Senior Member
    Join Date
    Dec 2010
    Location
    Stockholm, Sweden
    Posts
    222
    Blog Entries
    9
    Rep Power
    4

    Default

    Quote Originally Posted by toadaly View Post
    Huh? Java can use multiple cores. Did you know IBM's Watson (the one that recently beat the world masters of Jeopardy) was written mostly in Java using thousands of processors?
    Yes it can, but when I posted this the only thing I have read was that it can't but that Java 7 will.
    Ex animo! Hibernate
    Java, Arch Linux, C, GPL v3, Bash, Eclipse, Linux VT, GNOME 2 and many buttons on windows.

  9. #9
    kjkrum's Avatar
    kjkrum is online now Senior Member
    Join Date
    Apr 2011
    Location
    Tucson, AZ
    Posts
    1,060
    Rep Power
    6

    Default

    Java has taken advantage of OS threads for a long time. Back when Linux used to show every POSIX thread as a separate process, people used to freak out because they'd launch a Java app and see 20 copies running... each appearing to use a large chunk of memory. (In actuality, they were all using the same chunk of memory.)
    Get in the habit of using standard Java naming conventions!

  10. #10
    Hibernate's Avatar
    Hibernate is offline Senior Member
    Join Date
    Dec 2010
    Location
    Stockholm, Sweden
    Posts
    222
    Blog Entries
    9
    Rep Power
    4

    Default

    Quote Originally Posted by kjkrum View Post
    Java app and see 20 copies running...
    Is that why htop shows like ten identical copies of my Java appliation?
    Ex animo! Hibernate
    Java, Arch Linux, C, GPL v3, Bash, Eclipse, Linux VT, GNOME 2 and many buttons on windows.

  11. #11
    kjkrum's Avatar
    kjkrum is online now Senior Member
    Join Date
    Apr 2011
    Location
    Tucson, AZ
    Posts
    1,060
    Rep Power
    6

    Default

    Maybe. I haven't seen it do that since, like, the 2.2 kernel days. (Isn't 2.2 still used a lot for embedded systems?)
    Get in the habit of using standard Java naming conventions!

Similar Threads

  1. Running multiple threads on multiple CPU cores?
    By Dosta in forum Threads and Synchronization
    Replies: 2
    Last Post: 09-19-2010, 03:48 PM
  2. One socket and multiple threads
    By DC200 in forum Networking
    Replies: 1
    Last Post: 12-30-2009, 06:32 AM
  3. Replies: 4
    Last Post: 09-30-2009, 09:49 AM
  4. multiple threads access the same method.
    By bhanu in forum New To Java
    Replies: 3
    Last Post: 02-16-2009, 06:54 AM
  5. Privileged processing of multiple images?
    By peci1 in forum Java Applets
    Replies: 0
    Last Post: 11-20-2007, 11:57 PM

Tags for this Thread

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •