+ Start a Discussion
Jeffrey ZhangJeffrey Zhang 

Large Bulk API query too slow

JobInfo job = new JobInfo();
			    job.setObject("Account");
			    job.setOperation(OperationEnum.query);
			    job.setConcurrencyMode(ConcurrencyMode.Parallel);
			    job.setContentType(ContentType.CSV);
			    job = sfc.getBulkConnection().createJob(job);
			    assert job.getId() != null;
			    job = sfc.getBulkConnection().getJobStatus(job.getId());
			    String query = "SELECT Name, Id FROM Account";
			    long start = System.currentTimeMillis();
			    BatchInfo info = null;
			    ByteArrayInputStream bout = 
			        new ByteArrayInputStream(query.getBytes());
			    info = sfc.getBulkConnection().createBatchFromStream(job, bout);
			    
			    String[] queryResults = null;
			    
			    for(int i=0; i<10000; i++) {
			      Thread.sleep(30000); //30 sec
			      info = sfc.getBulkConnection().getBatchInfo(job.getId(), 
			          info.getId());
			      
			      if (info.getState() == BatchStateEnum.Completed) {
			        QueryResultList list = 
			        		sfc.getBulkConnection().getQueryResultList(job.getId(), 
			                info.getId());
			        queryResults = list.getResult();
			        break;
			      } else if (info.getState() == BatchStateEnum.Failed) {
			        System.out.println("-------------- failed ----------" 
			            + info);
			        break;
			      } else {
			        System.out.println("-------------- waiting ----------" 
			            + info);
			      }
			    }
			    System.out.println("Account!");
			    
			    if (queryResults != null) {
			      for (String resultId : queryResults) {
			    	 inputStream= (ByteArrayInputStream) sfc.getBulkConnection().getQueryResultStream(job.getId(), 
			            info.getId(), resultId);
			      }
			      

			      
			      //ArrayList<Account> beans = (ArrayList<Account>) rowProcessor.getBeans();
			      
			     // System.out.println("array of accounts:"+beans.toString());
				    System.out.println("Account2!");

			      
			      int c = inputStream.available();
				    System.out.println("Account3!");

			      byte[] bytes = new byte[c];
			      inputStream.read(bytes, 0, c);
				    System.out.println("Account4!");

			      String s = new String(bytes, StandardCharsets.UTF_8); // Or any encoding.
			     s= s.replaceAll("\"", "");
				    System.out.println("Account5!");

		    	  System.out.println("results!:"+ s);
		    	  System.out.println("size:"+ s.length());
		    	  
		    	  
		    	  String[] accounts = s.split("\n");
		    	  ArrayList<Account> accs = new ArrayList<Account>(1000000);

		    	  for(int i=1; i<accounts.length;i++)
		    	  {
		    		  String[] accountparts = accounts[i].split(",");
		    		  Account a = new Account();
		    		  a.setName(accountparts[0]);
		    		  a.setId(accountparts[1]);
		    		  accs.add(a);
		    	  }
Haven't really messed with data this large in java before so clearly I'm doing something wrong. Requirements require to pull down all the accounts (~450k) and I'm assigning it to an arraylist.

From my investigating, 
 
if (queryResults != null) {
			      for (String resultId : queryResults) {
			    	 inputStream= (ByteArrayInputStream) sfc.getBulkConnection().getQueryResultStream(job.getId(), 
			            info.getId(), resultId);
			      }

Is where its hanging... (takes like 5 min)  wonder if there is anything I can do to speed this up, or any other solutions? Thanks!