function readOnly(count){ }
Starting November 20, the site will be set to read-only. On December 4, 2023,
forum discussions will move to the Trailblazer Community.
+ Start a Discussion
Lee SinLee Sin 

Batch Apex Error: Batchable Instance Is Too Big

I am trying to find the duplicate emails under same account.
I have limited soql abilities so I didn't use "group" in my query at the time.
i wrote a batch apex class to process all the contacts to find the duplicate pairs but I got error "Batch Apex Error: Batchable Instance Is Too Big"
I tried the following:
1. Change the batch size to be 150 or 180.  
2. Searched this error in this forum, people encountered this when they were importing csv files or when they mistakenly added "Database.Stateful" keyword.

I don't quite understand the heap limit here, for each batch, what is the limit?  How can I know which part of my code comsumes most of the heap so I can try to fix my code?
Thank you!

 
global class ContactsCleaning implements Database.Batchable<sObject>,Database.Stateful{
    //query: select id,Accountid,email from Contact where email !=null and email!='\'\'' and accountid!=null and accountid!='\'\''
    public Map<AccountEmail,List<Contact>> duplicatePairs=new Map<AccountEmail,List<Contact>>();
    public integer totalemail=0;
    public integer totalcontact=0;
    global final String query;
    global ContactsCleaning(String q){
        query=q;
    }    
    global Database.QueryLocator start(Database.BatchableContext BC){
        return Database.getQueryLocator(query);
    }
    global void execute(Database.BatchableContext BC, List<Contact> scope)
    {
        for(Contact c : scope)
        {                        
                //check if email&account pair exists
                AccountEmail ae=new AccountEmail(c.AccountId,c.Email);
                if(duplicatePairs.containsKey(ae))
                {
                    duplicatePairs.get(ae).add(c);                  
                }
                else
                {                    
                    List<Contact> cl=new List<Contact>();
                    cl.add(c);
                    duplicatePairs.put(ae, cl);                    
                }            
        }                
    }    
    global void finish(Database.BatchableContext BC)
    {
        Document d=new Document();
        d.Name='Duplicate Emails';
        d.folderid = UserInfo.getUserId();
        String content='';
        for(AccountEmail ae : duplicatePairs.keySet())
        {
            if(duplicatePairs.get(ae).size()<=1)
            {
                duplicatePairs.remove(ae);
            }
            else
            {
                List<Contact> ls=duplicatePairs.get(ae);
                content=content+'Account: '+ae.AccountID+'   '+'Email:   '+ae.Email+'   ';
                Integer i=1;
                for(Contact c : ls)
                {
                    content=content+i+'   '+'ContactId:  '+c.Id+'   ';
                    i++;
                }
                content+='\n';
            }
        }
        d.Body=Blob.valueOf(content);
        d.ContentType='text/plain';
        d.Type='txt';
        insert d;
        Messaging.SingleEmailMessage mail = new Messaging.SingleEmailMessage();
        String[] toAddresses = new String[] {'jack.wang@accelerationretirement.com'};
        mail.setToAddresses(toAddresses);
        mail.setSubject('Duplicate Email Stats');
        mail.setPlainTextBody('Total Contacts:   '+totalcontact+'\n'+'Total Emails:   '+totalemail+'\n'+'Deplicate Pairs:   '+duplicatePairs.size());
        mail.setHtmlBody('<div>Total Contacts:   '+totalcontact+'</div>'+'Total Emails:   '+totalemail+'</div><div>Deplicate Pairs:   '+duplicatePairs.size()+'</div>');
        Messaging.sendEmail(new Messaging.SingleEmailMessage[] { mail });                                
    }    
}
//This class is the key to dedup email of Contact
public class AccountEmail {
    public String AccountID;
    public String Email;
    public AccountEmail()
    {
    }
    public AccountEmail(String acctid,String e)
    {
        AccountID=acctid;
        Email=e;
    }
    public Boolean equals(Object obj) {
        if (obj instanceof AccountEmail) {
            AccountEmail p = (AccountEmail)obj;
            return ((AccountID==p.AccountID) && (Email==p.Email));
        }
        return false;
    }

    public Integer hashCode() {
        return (31 * AccountID.hashCode()) ^ (Email.hashCode());
    }

}






 
Best Answer chosen by Lee Sin
Roy LuoRoy Luo
Try something like this:
AggregateResult[] groupedResults
  = [SELECT AccountID, Email, Count(Id) acctEmailCount FROM Contact GROUP BY AccountID, Email];
Map<String,Integer> nameCountMap = new Map<String, Integer>();
for(AggregateResult r: groupedResults)
{
  Object email = r.get('Email');   
}


OR at least use a light weight Map<String, List<Id>> duplicatePairs, since all you need are the emails and counts.
String acctMailKey = AccountId + '_' +  c.Email would just do fine. When you need the email address, just get the keys and parse it out. 

All Answers

Roy LuoRoy Luo
Try something like this:
AggregateResult[] groupedResults
  = [SELECT AccountID, Email, Count(Id) acctEmailCount FROM Contact GROUP BY AccountID, Email];
Map<String,Integer> nameCountMap = new Map<String, Integer>();
for(AggregateResult r: groupedResults)
{
  Object email = r.get('Email');   
}


OR at least use a light weight Map<String, List<Id>> duplicatePairs, since all you need are the emails and counts.
String acctMailKey = AccountId + '_' +  c.Email would just do fine. When you need the email address, just get the keys and parse it out. 
This was selected as the best answer
Lee SinLee Sin
Hi Luo, 
The result of this query is over 50000,  i tried to add a having cluase "having Count(Id)>1" but it makes no difference.
AggregateResult[] groupedResults
  = [SELECT AccountID, Email, Count(Id) acctEmailCount FROM Contact GROUP BY AccountID, Email];

So Can I use the aggregate query to create a scope for batch apex?
 
Roy LuoRoy Luo
Surely it should work. Also I guess changing
  public Map<AccountEmail,List<Contact>> duplicatePairs=new Map<AccountEmail,List<Contact>>();
to 
 public Map<String,List<Id>> duplicatePairs=new Map<String,List<Id>>(); 
might also get you away from the heap size issue. 

Good luck.
Lee SinLee Sin
Thank you very much!