Source code for bsmart.tools.MadGraphHackAnalysis

"""
Tool to run the pipeline madgraph to create lhe file then hackanalysis to drive pythia and perform analysis. 


Need to specify in tools:

.. code-block:: text

    "MadGraphHackAnalysis" : {
        "HackAnalysis" : "/path/to/executable", (e.g. analysePYTHIA_LHE.exe)
        "YAML file" "<NAME>.yaml"    (file for configuring HackAnalysis run  )
    }

plus, in addition, settings for running madgraph which can also be found in ../collider/MGaux.py
These include as obligatory:

* "MadGraph" : "/path/to/modeldir/bin/generate_events",   (if you have already generated the output directory for your process)

OR

* "Proc Card": "proc_card.dat",    ( for generating the madgraph directory based on a series of commands to provide to madgraph)
* "MadGraph Path": "/path/to/MG5_aMC_v3.5.XX/bin/mg5_aMC",     (so that we can run MadGraph )
* "Output Directory": "/path/to/where/you/want/to/put/the/output", (this is OPTIONAL: if you have the command output <outputname> in your proc_card.dat then BSMArt will find it)

other optional MadGraph settings include:

.. code-block:: text

    {
        "python2": "True",  (if you need to run madgraph via python2)
        "MadGraph Directory": "/path/to/modeldir",  (if you want to specify the path to the modeldir, otherwise the code will infer it)
        "QNUMBERS file": "myQNUMBERs.slha", (for models that have particles unknown to pythia)
        "MadSpin": "True", (if you want to run MadSpin and have it handled by BSMArt)
        "k-factor": 1.2, (for multiplying the cross-section by a k-factor)
        "Cross-sections" : "/path/to/csv/file/containing/cross-sections.csv",      (if you have tabulated cross-sections as a function of *one* variable to use instead of the one computed in MadGraph; this is documented in the InterpolateXSection.py file.)
    }

---------------------------

25/03/13: Make more robust, in case for some reason HackAnalysis fails to run when doing batches, will give two more tries with the same point






"""

__meta__ = {
    "name": "MadGraphHackAnalysis",
    "requires": ["pyyaml"],
    "settings": {
        "HackAnalysis": "Path to HackAnalysis executable",
        "YAML file": "Path to YAML config file",
        "MadGraph": "Path to generate_events",
        "Proc Card": "Path to proc_card.dat",
        "MadGraph Path": "Path to mg5_aMC",
        "Output Directory": "Path to output directory",
        "Mode": "LHE or HEPMC",
        "GridPack": "Boolean",
        "Check Absolute Convergence": "Boolean",
        "Check Convergence": "Boolean",
        "Efficiency Threshold": "Float",
        "Convergence Margin": "Float",
        "Check Exclusion": "Boolean",
        "Batch Size": "Int",
        "Store Events": "Boolean",
        "Cores": "Int"
    }
}


import os
import shutil

from bsmart import debug
from bsmart.HEPRun import HepTool,DataPoint

from bsmart.collider.pythiacfg import PYTHIACFG
from bsmart.collider.splitlhe import splitlhe
from bsmart.collider.MGaux import MadGraphRunner

import yaml
import json
from collections import OrderedDict
from bsmart import zslha
#import fnmatch
import sys
#import math

try:
    import importlib
except Exception as e:
    print("Failed to import importlib, "+str(e))


###############################################################


references=r'''@article{Goodsell:2021iwc,
    author = "Goodsell, Mark D. and Priya, Lakshmi",
    title = "{Long dead winos}",
    eprint = "2106.08815",
    archivePrefix = "arXiv",
    primaryClass = "hep-ph",
    doi = "10.1140/epjc/s10052-022-10188-1",
    journal = "Eur. Phys. J. C",
    volume = "82",
    number = "3",
    pages = "235",
    year = "2022"
}

@article{Goodsell:2024aig,
    author = "Goodsell, Mark D.",
    title = "{HackAnalysis 2: A powerful and hackable recasting tool}",
    eprint = "2406.10042",
    archivePrefix = "arXiv",
    primaryClass = "hep-ph",
    month = "6",
    year = "2024"
}

@article{Corpe:2025sbw,
    author = "Corpe, Louie and Haddad, Abdelhamid and Goodsell, Mark",
    title = "{Recasting the ATLAS search for displaced hadronic jets in the ATLAS calorimeter with additional jets or leptons using surrogate models}",
     eprint = "2502.10231",
    archivePrefix = "arXiv",
    primaryClass = "hep-ph",
    month = "2",
    year = "2025"
}
'''


 

[docs]
class NewTool(HepTool):
    """ MadGraphHackAnalysis runs madgraph and extracts the cross-section plus uncertainty 
    
    Various settings are possible:


    
    
    """
    def __init__(self, name, settings,global_settings=None):
        HepTool.__init__(self, name, settings,global_settings)
        self.citations=references
        ## Optional "Process" setting to label the process examined
        if 'Process' in self.settings:
            self.process = self.settings['Process'] ## to allow labelling the processes
        else:
            self.process='' 

        
        ## Optional "Mode" setting to choose "LHE" or "HEPMC", otherwise infer from the command name
        if 'Mode' in self.settings:
            
            tmode=self.settings['Mode'].upper()
            if tmode in ['LHE','HEPMC']:
                self.mode = tmode
            else:
                raise NameError("Operating mode not recognised; allowed values are LHE and HEPMC")
        else:
            if 'analysePYTHIA_LHE' in self.settings['HackAnalysis']:
                self.mode='LHE'
            elif 'analyseHEPMC' in self.settings['HackAnalysis']:
                self.mode='HEPMC'
            else:
                raise NameError("Executable not recognised and Mode not specified; allowed values are LHE and HEPMC")

        

        if global_settings is not None and global_settings.varnames is not None:
            self.varnames=global_settings.varnames
        else:
            self.varnames=None

        ## Require "YAML file" setting for running HackAnalysis
        if 'YAML file' not in self.settings:
            #self.log.error('No YAML file present!')
            raise NameError('No YAML file present!')
        tpath=os.path.dirname(self.settings['YAML file'])
        if tpath == '':
            
            self.yamlname=self.settings['YAML file']
            #self.yaml = os.path.join(self.settings['cwd'],self.yamlname)
            #yamldir=self.settings['cwd']
            yamldir=os.getcwd()
            self.yaml = os.path.join(yamldir,self.yamlname)
            
        else:
            self.yaml = self.settings['YAML file']
            self.yamlname=os.path.split(self.yaml)[-1]
            #yamldir=os.path.split(os.path.dirname(self.yaml))[0]  ### I no longer know why we are doing this
            yamldir = tpath
        # READ OTHER INFO FROM THE YAML!!!!
        try:
            with open(self.yaml, 'r') as stream:        
                self.yaml_data = yaml.safe_load(stream)
        except:
            raise NameError('Failed to open HackAnalysis yaml file!')
        
        self.qnumbers_name=None
        if 'QNUMBERS file' in self.yaml_data:
            ## need to know whether we have an absolute path for the QNUMBERS file, or no path:
            ## if there's no path then we need to copy it to the temp directory for running HackAnalysis
            ## We can extract it from the MG runner 
            tpath=os.path.dirname(self.yaml_data['QNUMBERS file'])
            if tpath == '':  # need to copy the QNUMBERS file to the working directory
                self.qnumbers_name=self.yaml_data['QNUMBERS file']
                if 'QNUMBERS file' not in self.settings:
                    self.settings['QNUMBERS file'] = self.yaml_data['QNUMBERS file']
            #else:
            #    self.qnumbers_name=None
                
        ## "GridPack": "True" enables gridpack mode, perfect for running large numbers of events
        ## GridPack not yet supported for HEPMC run because we only run single core HEPMC in HA2
        if 'GridPack' in self.settings and self.mode=='LHE':
            self.useGridPack=eval(self.settings['GridPack'])
        else:
            self.useGridPack=False


        ## "Check Absolute Convergence" setting when running batches of events, to check that cutflows converge to fixed precision
        if "Check Absolute Convergence" in self.settings:
            self.check_convergence = eval(self.settings['Check Absolute Convergence'])
        else:
            self.check_convergence = False

        ## "Check Convergence" setting when running batches of events, to check that cutflows converge to suitable precision depending on experimental sensitivity
        if "Check Convergence" in self.settings:
            self.check_exp_convergence = eval(self.settings['Check Convergence'])
            if self.check_exp_convergence:
                self.check_convergence = True
                
        else:
            self.check_exp_convergence=False

        if "Efficiency Threshold" in self.settings:
            self.convergence_threshold=float(self.settings['Efficiency Threshold'])
        else:
            self.convergence_threshold=1e-5

        if "Convergence Margin" in self.settings:
            self.convergence_error=float(self.settings['Convergence Margin'])
            self.safety_ratio=self.convergence_error
        else:
            self.convergence_error=0.1
            self.safety_ratio=0.25

        HAdir=os.path.dirname(settings['HackAnalysis'])
        #HApythonpath=os.path.join(os.path.dirname(settings['HackAnalysis']),'Statistics')
        HApythonpath=os.path.join(HAdir,'Statistics')
        if not os.path.exists(HApythonpath): # presumably using new cmake, so the stats directory is different
            if 'bin' in HAdir:
                HAdir=os.path.dirname(HAdir)
                HApythonpath=os.path.join(HAdir,'Statistics')

        # Modification 7/2/2025
        #HAstatspath=os.path.join(HApythonpath,'HAStats')
        HAstatspath=HApythonpath
        
        if "Check Exclusion" in self.settings:
            self.check_exclusion = eval(self.settings['Check Exclusion'])   
            if self.check_exclusion:
                #print('Adding stats path '+HAstatspath)

                if os.path.exists(HAstatspath):
                    if HApythonpath not in sys.path:
                        sys.path.insert(0,HApythonpath)
                #self.exclusions_module=importlib.import_module('HAstats') 
                self.exclusions_module=importlib.import_module('HAStats.HAstats')
                self.analysishandlers={}
                infopath=os.path.join(HApythonpath,"HAStats","Analyses")
                for analysis in self.yaml_data['analyses']:
                    analysisUPPER=analysis.upper()
                    try:
                        
                        with open(os.path.join(infopath,analysisUPPER+'_info.json'),'r') as thisinfo:
                            analysis_infodict=json.load(thisinfo)
                        
                        self.analysishandlers[analysis] = self.exclusions_module.AnalysisStats(analysis_infodict)

                    except Exception as e:
                        print("Failed to initialise analysis %s: %s" %(analysis,str(e)))
                        if analysis in self.analysishandlers:
                            print("Still somehow initialised the analysis, deleting")
                            self.analysishandlers.pop(analysis)
                        continue
        else:
            self.check_exclusion=False



        self.nevents=None
        
        ## We get nevents from the yaml file if it is there, otherwise use default of 10000
        if 'nevents' in self.yaml_data['settings']:
            self.nevents=int(self.yaml_data['settings']['nevents'])

        if self.nevents is None and self.useGridPack:
            
            self.nevents=10000
        
        self.combiner = None
        ## "Batch Size" setting if you want batch mode
        if 'Batch Size' in self.settings:
            self.batch_size=int(self.settings['Batch Size'])
            if self.batch_size < self.nevents:
                # need to modify the yaml file to only run the batch-size number of events
                self.yaml_data['settings']['nevents'] = self.batch_size
                # Import routines for merging json files etc
                
                """
                HApythonpath=os.path.join(os.path.dirname(settings['HackAnalysis']),'Statistics')
                if os.path.exists(HApythonpath):
                    sys.path.insert(0,HApythonpath)
                """
                
                if os.path.exists(HApythonpath):
                    if HApythonpath not in sys.path:
                        sys.path.insert(0,HApythonpath)
                    self.combiner=importlib.import_module('combine_ha')
                    if self.check_convergence:
                        self.convergence_module=importlib.import_module('ha_convergence')
                        self.convergence=self.convergence_module.HAconverger()
                else:
                    raise NameError("Cannot find Statistics path "+HApythonpath)
        else:
            self.batch_size=self.nevents

        if 'JSON file' in self.yaml_data['settings']:
            self.HAjson=self.yaml_data['settings']['JSON file']
        else:
            self.HAjson='hackanalysis.json'

        
        
        #Set up MadGraph

        


        #if self.mode=='LHE' and 'Extra Commands' not in self.settings:
        if 'Extra Commands' not in self.settings:    
            # need to make sure showering is turned off in madgraph, if using default settings
            # Otherwise we trust the user to know what they are doing!!
            if self.mode=='LHE':
                commandlist=[['shower=OFF'],['detector=OFF'],['analysis=OFF']]
            else:
                commandlist=[['shower=Pythia8'],['detector=OFF'],['analysis=OFF']]
            
            
            if 'MadSpin' in self.settings and eval(self.settings['MadSpin']):
                commandlist.append(['madspin=ON'])
            else:
                commandlist.append(['madspin=OFF'])
            commandlist.append(['done'])
            if 'run_card Commands' in self.settings:
                ## 'run_card Commands' in settings are commands passed to madgraph to update the run_card or parameters
                # These are in addition to the automatically created ones (e.g. setting the number of events) 
                for rccommand in self.settings['run_card Commands']:
                    commandlist.append(rccommand)
            else:
                commandlist.append(['update dependent'])

            #if 'MadSpin' in self.settings and eval(self.settings['MadSpin']):  
                #self.settings['Extra Commands']=[['shower=OFF'],['detector=OFF'],['analysis=OFF'],['madspin=ON'],['done'],[eventsline],['done']]
            #else:
                #self.settings['Extra Commands']=[['shower=OFF'],['detector=OFF'],['analysis=OFF'],['madspin=OFF'],['done'],[eventsline],['done']]
            
            # if we want to use the GridPack mode but in one big batch, we only need a minimum number of initial events
            # Otherwise we set the initial events equal to the batch size (which could be equal to the total number of events in one-shot runs)
            if self.useGridPack and self.batch_size >= self.nevents: 
                commandlist.append(['set nevents 10000']) ## basic number of events for gridpack run
            else:
                commandlist.append(['set nevents '+str(self.batch_size)])
            """

            if self.useGridPack:
                commandlist.append(['set nevents 10000']) ## basic number of events for gridpack run
            else:
                if 'nevents' in self.yaml_data['settings']:
                    commandlist.append(['set nevents '+str(self.yaml_data['settings']['nevents'])])
            """

            commandlist.append(['done'])
            self.settings['Extra Commands']=commandlist

        
        
        if 'Store Events' in self.settings:
            self.store_events=eval(self.settings['Store Events'])
        else:
            self.store_events = False


        self.hackanalysis_cmd = self.settings['HackAnalysis']+' '+self.yamlname

        
        if 'Cores' in self.settings:
            self.n_cores=int(self.settings['Cores'])
            self.yaml_data['settings']['cores'] = self.n_cores
        else:
            if 'cores' in self.yaml_data['settings']:
                self.n_cores = int(self.yaml_data['settings']['cores'])
                self.settings['Cores']=self.n_cores
            else:
                self.n_cores = 1
                self.yaml_data['settings']['cores'] = 1

        ## We use the temporary directory for the madgraph model directory if not told otherwise
        if global_settings is not None:
            #print("have global settings!")
            if hasattr(global_settings,'work_dir') and global_settings.work_dir is not None:
                #print("Setting TempDir!")
                self.settings['TempDir'] = global_settings.work_dir
                #print(self.settings['TempDir'])

        try:
            self.MGrunner=MadGraphRunner(self.settings)
        except Exception as e:
            raise NameError('Failed to initialise MadGraph: '+str(e))


        if 'LHE file' in self.yaml_data['settings']:
            self.LHEfile=self.yaml_data['settings']['LHE file']
        else:
            self.LHEfile='Split/split'

        if self.mode == 'LHE':
            if 'Config file' not in self.yaml_data['settings']:
                raise NameError('No cfg file for pythia present!')

            self.cfgname=self.yaml_data['settings']['Config file']
            #self.cfgfile = os.path.join(self.settings['cwd'],self.cfgname)
            self.cfgfile =os.path.join(yamldir,self.cfgname)
            self.pythia_cfg = PYTHIACFG()
            try:
                self.pythia_cfg.read(self.cfgfile)
            except Exception as e:
                raise NameError("Failed to initialise pythia configuration file, " + str(e))
        
        if 'Efficiency Filename' in self.yaml_data['settings']:
            self.effname = self.yaml_data['settings']['Efficiency Filename']
        else:
            self.effname = 'test.eff'
            self.yaml_data['settings']['Efficiency Filename'] = self.effname

        if 'Cutflow Filename' in self.yaml_data['settings']:
            self.cutflowname = self.yaml_data['settings']['Cutflow Filename']
        else:
            self.cutflowname = 'test_cf.eff'
            self.yaml_data['settings']['Cutflow Filename'] = self.cutflowname
        
        if 'Histogram Filename' in self.yaml_data['settings']:
            self.histoname = self.yaml_data['settings']['Histogram Filename']
        else:
            self.histoname = 'test.yoda'
            self.yaml_data['settings']['Histogram Filename'] = self.histoname
        



[docs]
    def run(self, spc_file, temp_dir, log,data_point):
        ## set up madgraph configuration
        #var_dict = data_point.get_var_dict(self.varnames)
        ## HackAnalysis yaml file:
        local_yaml = os.path.join(temp_dir, self.yamlname)
        
        if self.useGridPack:
            # First remove any existing gridpack if it already exists
            gridpacktarball=os.path.join(self.MGrunner.MG_Model_Dir,'run_01_gridpack.tar')
            if os.path.exists(gridpacktarball):
                os.remove(gridpacktarball)
            gridpacktarball=os.path.join(self.MGrunner.MG_Model_Dir,'run_01_gridpack.tar.gz')
            if os.path.exists(gridpacktarball):
                os.remove(gridpacktarball)

        MGinfo=self.MGrunner.run(spc_file,temp_dir,log,data_point)
        
        # if the reported cross-section is too small, is there any point in running?
        if self.check_exp_convergence:
            if MGinfo['xs']*1e3*139 < 1:
                if self.check_exclusion: # point clearly not excluded
                    ### ADD CODE HERE!
                    log.info("Not trivially Excluded!") 

                #return
        ## now split lhe file, if needed
        # otherwise we write the full path to the lhe file in the yaml file
        
        if self.mode == 'LHE':
                lhe_events=None
                for tfile in os.listdir(self.MGrunner.MG_Run_Dir):
                    if tfile[-7:] == '.lhe.gz' or tfile[-4:] == '.lhe':
                        lhe_events=os.path.join(self.MGrunner.MG_Run_Dir,tfile)                    
                        break
                if not os.path.exists(lhe_events):
                    raise NameError("Failed to generate lhe events!")
                
                if self.n_cores > 1:
                    splitlhe(self.n_cores,lhe_events,os.path.join(temp_dir,self.LHEfile))
                else:
                    self.yaml_data['settings']['LHE file'] = lhe_events
                    #if os.path.exists(os.path.join(self.MG_Run_Dir,))
        else: ## HEPMC 
                hepmc_events=None
                for tfile in os.listdir(self.MGrunner.MG_Run_Dir):
                    if tfile[-9:] == '.hepmc.gz' or tfile[-6:] == '.hepmc':
                        hepmc_events=os.path.join(self.MGrunner.MG_Run_Dir,tfile)                    
                        break
                if not os.path.exists(hepmc_events):
                    raise NameError("Failed to generate hepmc events!") 
                self.yaml_data['settings']['HEPMC file'] = hepmc_events
            ## Now for procesing the files to set up for HA

        with open(local_yaml,'w') as stream:
                    whatever = yaml.dump(self.yaml_data,stream)


        var_dict = data_point.get_all_dict()
        # Generate pythia config file each time (since we might need to, e.g. for changing qcut )

        ## Now for HackAnalysis stuff

        if self.mode == 'LHE':
            local_cfg = os.path.join(temp_dir, self.cfgname)
            #self.pythia_cfg.write(local_cfg,data_point.inputs,self.varnames)
        
            self.pythia_cfg.write_dict(local_cfg,var_dict)

        ## Now to run
        if os.path.exists(self.HAjson):
            os.remove(self.HAjson)
        if os.path.exists(self.effname):
            os.remove(self.effname)
        if os.path.exists(self.cutflowname):
            os.remove(self.cutflowname)
        if self.histoname is not None:
            if os.path.exists(self.histoname):
                os.remove(self.histoname)

        os.chdir(temp_dir)
        if self.qnumbers_name is not None:
            if not os.path.exists(self.qnumbers_name):
                with open(self.qnumbers_name,'w') as QNF:
                    QNF.write(self.MGrunner.QNUMBERSfile)
        
        failed_attempts=0
        try:
            debug.command_line_log(self.hackanalysis_cmd, log)
        except Exception as e:
            failed_attempts=1
            log.error('Error running HackAnalysis: '+str(e))
        

        if not os.path.exists(self.effname) or not os.path.exists(self.cutflowname):
                ## point has failed
                failed_attempts=1
                log.error('Failed to run HackAnalysis, efficiency or cutflow missing')
                raise NameError('Failed to run HackAnalysis, efficiency or cutflow missing')
        output_json_data=OrderedDict()
        if os.path.exists(self.HAjson):
                    with open(self.HAjson,'r') as jsonfile:
                        output_json_data=json.load(jsonfile,object_pairs_hook=OrderedDict)
        else:
            failed_attempts=1
            log.error('Json file missing')
            #raise NameError('Failed to run HackAnalysis, json missing')

        ### If gridpack mode, now we need the gridpack & batch run
        needed_extra_events=self.nevents
        improve_regions={}
        converged=False
        doneevents=0
        ## here we use the number of  events we *should* have generated. Maybe better to read the number from the output

        if failed_attempts ==0:
            if self.useGridPack and self.batch_size >= self.nevents: 
                doneevents=10000
            else:
                doneevents=self.batch_size

            if self.check_convergence: # check convergence or exclusion
                        if self.check_exp_convergence:
                            #check_convergence_exclusion(self,hackanalysis_output,nloxs=None,input_max_batch_size=None,safety_ratio=0.25,verbose=False)
                            needed_extra_events,improve_regions,_=self.convergence.check_convergence_exclusion(output_json_data,None,self.batch_size,self.safety_ratio,True)
                        else:
                            #check_convergence_fixed_goal(self,efficiencydata,rel_uncertainty=0.1,eff_threshold=1e-5,input_max_batch_size=None,verbose=True)
                            #needed_extra_events,improve_regions,_=self.convergence.check_convergence_fixed_goal(output_json_data,1.e-1,1.e-5,self.batch_size)
                            needed_extra_events,improve_regions,_=self.convergence.check_convergence_fixed_goal(output_json_data,self.convergence_error,self.convergence_threshold,self.batch_size,True)

                        if needed_extra_events > 0:
                            outstr=''
                            log.debug('Estimate need to generate %d more events' %needed_extra_events)
                            for analysis in improve_regions:
                                linestring=','.join(improve_regions[analysis])
                                outstr=outstr+analysis+': '+linestring+'\n'
                            log.debug('Improve regions: \n'+outstr)

                        else:
                            converged=True

        if self.useGridPack and not converged:
            # First need to generate the GridPack
            try:
                if self.MGrunner.python2:
                    debug.command_line_log('python2 '+os.path.join(self.MGrunner.MG_Model_Dir,'bin','madevent')+' create_gridpack',log)
                else:
                    debug.command_line_log(os.path.join(self.MGrunner.MG_Model_Dir,'bin','madevent')+' create_gridpack',log)
            except Exception as e:
                log.error('Could not run madevent to create gridpack: '+str(e))
            
            # then check gridpack was created in the model directory (that's where it gets put ...)
            # since we remove the run_01 should always be called run_01
            gridpacktarball=os.path.join(self.MGrunner.MG_Model_Dir,'run_01_gridpack.tar.gz')
            if not os.path.exists(gridpacktarball):
                gridpacktarball=os.path.join(self.MGrunner.MG_Model_Dir,'run_01_gridpack.tar')
                if not os.path.exists(gridpacktarball):
                    raise NameError('No gridpack found at '+gridpacktarball)

            # now extract it
            log.debug('Extracting gridpack '+gridpacktarball)
            log.debug('Changing directory to '+temp_dir)
            os.chdir(temp_dir) # just to make sure ...

            madeventdir=os.path.join(temp_dir,'madevent')
            if os.path.exists(madeventdir): # clean up if it is already there
                debug.command_line_log('chmod a+w -R madevent',log)
                shutil.rmtree(madeventdir)

            #now unpack it    
            debug.command_line_log('tar -xf '+gridpacktarball,log)
            
            os.chdir(temp_dir)

            # make read only
            debug.command_line_log('chmod a-w -R madevent',log)

            # if python2 -> correct the run.sh script
            if self.MGrunner.python2:
                OFrun=open('run.sh','w')
                OFrun.write('#!/bin/bash\n')
                OFrun.write('basedir=%s\n'%temp_dir)
                OFrun.write('export  LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${basedir}/madevent/lib:${basedir}/HELAS/lib\n')
                OFrun.write('python2 ${basedir}/madevent/bin/gridrun $1 $2 1\n')
                OFrun.write('mv ./Events/GridRun_${2}/unweighted_events.lhe.gz events.lhe.gz\n')
                OFrun.write('rm -rf Events Cards P* *.dat randinit &> /dev/null\n')
                OFrun.close()

            ## remove unnecessary files? 
            
            # Now create the directories for the gridpacks to run
            # New: run directories are created each time we run the gridpack

            import random
            import subprocess
            #seedstart=random.randint(1,sys.maxsize-1)
            seedstart=random.randint(1,1000000)
            log.debug('Setting random seed start to %d' % seedstart)
            if self.batch_size < self.nevents:
                coreevents=int(self.batch_size/self.n_cores)+1
            else:
                coreevents=int(self.nevents/self.n_cores)+1


            var_dict = data_point.get_all_dict()

            if self.mode == 'LHE':
                local_cfg = os.path.join(temp_dir, self.cfgname)
                #self.pythia_cfg.write(local_cfg,data_point.inputs,self.varnames)
                self.pythia_cfg.write_dict(local_cfg,var_dict)

            def rungp(np,seed):
                log.debug('starting run for core %d, requesting %d events with random seed %d' %(np,coreevents,seed+np))
                ## added the /dev/null to clear out the mess
                command='../run.sh %d %d' %(coreevents,seed+np)
                #command='../run.sh %d %d' %(coreevents,seedstart+np)
                wdir=os.path.join(temp_dir,'gpr_%d' %np)
                if os.path.exists(wdir):
                    shutil.rmtree(wdir)
                os.mkdir(wdir)
                """
                command_line = subprocess.Popen(command, shell=True, cwd=wdir,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
                out, err = command_line.communicate()
                """

                # new 22/07/24, don't want the output! Direct to null in a platform indep way
                command_line = subprocess.Popen(command, shell=True, cwd=wdir,
                                    stdout=subprocess.DEVNULL,
                                    stderr=subprocess.PIPE)
                _, err = command_line.communicate()
                

                if err != b'':
                    log.error(err.decode("utf-8"))
                # check events exist
                eventfile=os.path.join(wdir,'events.lhe.gz')
                ending='.lhe.gz'
                if not os.path.exists(eventfile):
                    eventfile=os.path.join(wdir,'events.lhe')
                    ending='.lhe'
                    if not os.path.exists(eventfile):
                        raise NameError('Failed to produce events for core %d' %np)
                    
                # move file to desired location
                
                shutil.move(eventfile,self.LHEfile+'_'+str(np)+ending)
                ## clean up
                shutil.rmtree(wdir)
                # return the name of the file produced
                return self.LHEfile+'_'+str(np)+ending

            # now have already set doneevents and converged check
            #doneevents=0
            #converged = False

            while doneevents < self.nevents and not converged:
                seedstart+=self.n_cores
                doneevents += self.batch_size

                if self.n_cores > 1:
                    import multiprocessing as mp
                    import time
                
                    lhe_events=None
                    for tfile in os.listdir(self.MGrunner.MG_Run_Dir):
                        if tfile[-7:] == '.lhe.gz' or tfile[-4:] == '.lhe':
                            lhe_events=os.path.join(self.MGrunner.MG_Run_Dir,tfile)                    
                            break
                    processes=[]
                    splitdir=os.path.dirname(self.LHEfile)
    
                    if splitdir != '':
                        if not os.path.exists(splitdir):
                            os.makedirs(splitdir)
                        
                        # move events to desired location
                        


                    for np in range(self.n_cores):
                        #processes.append(mp.Process(target=debug.command_line_log_PID,args=('../run.sh %d %d' %(coreevents,seedstart+np),os.path.join(temp_dir,'gpr_%d' %np))))
                        processes.append(mp.Process(target=rungp,args=(np,seedstart)))
                    for p in processes:
                        p.start()
                        time.sleep(0.1)
                    
                    while True:
                        time.sleep(10)
                        allExit = True
                        for t in processes:
                            if t.exitcode is None:
                                allExit = False
                                break

                        if allExit:
                            break
                    for p in processes:
                        p.join()               
                else:
                    lhe_events=rungp(0,seedstart)
                    self.yaml_data['settings']['LHE file'] = lhe_events
                os.chdir(temp_dir)
                ## Now to run
                if os.path.exists(self.HAjson):
                    os.remove(self.HAjson)
                if os.path.exists(self.effname):
                    os.remove(self.effname)
                if os.path.exists(self.cutflowname):
                    os.remove(self.cutflowname)
                if self.histoname is not None:
                    if os.path.exists(self.histoname):
                        os.remove(self.histoname)
                #self.yaml_data['settings']['LHE file'] = lhe_events
                with open(local_yaml,'w') as stream:
                    whatever = yaml.dump(self.yaml_data,stream)

                debug.command_line_log(self.hackanalysis_cmd, log)
        
                if not os.path.exists(self.effname) or not os.path.exists(self.cutflowname):
                    ## point has failed
                    log.error('Failed to run HackAnalysis')
                    raise NameError('Failed to run HackAnalysis')
                
                if os.path.exists(self.HAjson):
                    with open(self.HAjson,'r') as jsonfile:
                        new_json_data=json.load(jsonfile,object_pairs_hook=OrderedDict)
                    if self.combiner is None:
                        output_json_data=new_json_data
                    else:
                        output_json_data=self.combiner.addresults(output_json_data,new_json_data)

                # write merged files so the user can keep track (or in case of crashes!)
            
                if self.batch_size < self.nevents:
                    with open('temp_HA_output.json','w') as fp:
                        json.dump(output_json_data,fp,indent=4)
                    if self.combiner is not None:
                        self.combiner.write_efficiency_file(output_json_data,'temp_HA_efficiency_file.eff')

                if self.check_convergence: # check convergence or exclusion
                    if self.check_exp_convergence:
                        #check_convergence_exclusion(self,hackanalysis_output,nloxs=None,input_max_batch_size=None,safety_ratio=0.25,verbose=False)
                        needed_extra_events,improve_regions,_=self.convergence.check_convergence_exclusion(output_json_data,None,self.batch_size,self.safety_ratio)
                    else:
                        #check_convergence_fixed_goal(self,efficiencydata,rel_uncertainty=0.1,eff_threshold=1e-5,input_max_batch_size=None,verbose=True)
                        #needed_extra_events,improve_regions,_=self.convergence.check_convergence_fixed_goal(output_json_data,1.e-1,1.e-5,self.batch_size)
                        needed_extra_events,improve_regions,_=self.convergence.check_convergence_fixed_goal(output_json_data,self.convergence_error,self.convergence_threshold,self.batch_size)

                    if needed_extra_events > 0:
                        outstr=''
                        log.debug('Estimate need to generate %d more events' %needed_extra_events)
                        for analysis in improve_regions:
                            linestring=','.join(improve_regions[analysis])
                            outstr=outstr+analysis+': '+linestring+'\n'
                        log.debug('Improve regions: \n'+outstr)

                    else:
                        converged=True



            os.chdir(temp_dir)

            # If we have multiple batches, need to merge the outputs into one file,
            # but otherwise we will just keep the original
            if self.batch_size < self.nevents:
                with open(self.HAjson,'w') as jsonfile:
                    json.dump(output_json_data,jsonfile,indent=4)
                if self.combiner is not None:
                    self.combiner.write_efficiency_file(output_json_data,self.effname)

                
            if os.path.exists('temp_HA_output.json'):
                os.remove('temp_HA_output.json')
            if os.path.exists('temp_HA_efficiency_file.eff'):
                os.remove('temp_HA_efficiency_file.eff')

            debug.command_line_log('chmod a+w -R madevent',log)
            shutil.rmtree('madevent')
                
        ####################################################  
        elif self.batch_size < self.nevents: # batch mode but not gridpack
            ## first write the temporary files before first loop, since we already ran HA
            # Let's now harden the tool since on my computer MadGraph has a tendency to randomly crash

            if failed_attempts ==0:
                with open('temp_HA_output.json','w') as fp:
                            json.dump(output_json_data,fp,indent=4)
                if self.combiner is not None:
                    self.combiner.write_efficiency_file(output_json_data,'temp_HA_efficiency_file.eff')
            
            while doneevents < self.nevents and not converged  and failed_attempts < 3: # this is for 3 *consecutive* failures we give up
                # first generate more events
                
                try:
                    MGinfo=self.MGrunner.run(spc_file,temp_dir,log,data_point)
                except Exception as e:
                    log.debug('Problem running MadGraph: '+str(e))
                    failed_attempts+=1
                    continue
                if self.mode == 'LHE':
                    lhe_events=None
                    for tfile in os.listdir(self.MGrunner.MG_Run_Dir):
                        if tfile[-7:] == '.lhe.gz' or tfile[-4:] == '.lhe':
                            lhe_events=os.path.join(self.MGrunner.MG_Run_Dir,tfile)                    
                            break
                    if not os.path.exists(lhe_events):
                        # Allow for failure
                        failed_attempts+=1
                        continue
                        
                        #raise NameError("Failed to generate lhe events!")
                    if self.n_cores > 1:
                        splitlhe(self.n_cores,lhe_events,os.path.join(temp_dir,self.LHEfile))
                    else:
                        self.yaml_data['settings']['LHE file'] = lhe_events
                else: ## HEPMC 
                    hepmc_events=None
                    for tfile in os.listdir(self.MGrunner.MG_Run_Dir):
                        if tfile[-9:] == '.hepmc.gz' or tfile[-6:] == '.hepmc':
                            hepmc_events=os.path.join(self.MGrunner.MG_Run_Dir,tfile)                    
                            break
                    if not os.path.exists(hepmc_events):
                        failed_attempts+=1
                        continue
                        #raise NameError("Failed to generate hepmc events!") 
                    self.yaml_data['settings']['HEPMC file'] = hepmc_events

                if os.path.exists(self.HAjson):
                    os.remove(self.HAjson)
                if os.path.exists(self.effname):
                    os.remove(self.effname)
                if os.path.exists(self.cutflowname):
                    os.remove(self.cutflowname)
                if self.histoname is not None:
                    if os.path.exists(self.histoname):
                        os.remove(self.histoname)
                #self.yaml_data['settings']['LHE file'] = lhe_events
                with open(local_yaml,'w') as stream:
                    whatever = yaml.dump(self.yaml_data,stream)

                attempt_ok = True
                try:
                    debug.command_line_log(self.hackanalysis_cmd, log)
                    if not os.path.exists(self.effname) or not os.path.exists(self.cutflowname):
                        ## point has failed
                        log.error('Failed to run HackAnalysis')
                        attempt_ok=False
                        #raise NameError('Failed to run HackAnalysis')
                
                    if os.path.exists(self.HAjson):
                        with open(self.HAjson,'r') as jsonfile:
                                new_json_data=json.load(jsonfile,object_pairs_hook=OrderedDict)
                        output_json_data=self.combiner.addresults(output_json_data,new_json_data)
                    else:
                        attempt_ok=False
                except Exception as e:
                    log.error('Error running HackAnalysis: '+str(e))
                    log.error('That makes %d consecutive failed attempts' %failed_attempts)
                    attempt_ok=False
        
                
                if attempt_ok: 
                    failed_attempts=0   
                    doneevents += self.batch_size
                    # write merged files so the user can keep track (or in case of crashes!)
            
                    if self.batch_size < self.nevents:
                        with open('temp_HA_output.json','w') as fp:
                            json.dump(output_json_data,fp,indent=4)
                        self.combiner.write_efficiency_file(output_json_data,'temp_HA_efficiency_file.eff')
                else:
                    failed_attempts+=1

                

                if self.check_convergence and attempt_ok: # check convergence or exclusion
                    if self.check_exp_convergence:
                        #check_convergence_exclusion(self,hackanalysis_output,nloxs=None,input_max_batch_size=None,safety_ratio=0.25,verbose=False)
                        needed_extra_events,improve_regions,_=self.convergence.check_convergence_exclusion(output_json_data,None,self.batch_size,self.safety_ratio)
                    else:
                        #check_convergence_fixed_goal(self,efficiencydata,rel_uncertainty=0.1,eff_threshold=1e-5,input_max_batch_size=None,verbose=True)
                        #needed_extra_events,improve_regions,_=self.convergence.check_convergence_fixed_goal(output_json_data,1.e-1,1.e-5,self.batch_size)
                        needed_extra_events,improve_regions,_=self.convergence.check_convergence_fixed_goal(output_json_data,self.convergence_error,self.convergence_threshold,self.batch_size)

                    if needed_extra_events > 0:
                        outstr=''
                        log.debug('Estimate need to generate %d more events' %needed_extra_events)
                        for analysis in improve_regions:
                            linestring=','.join(improve_regions[analysis])
                            outstr=outstr+analysis+': '+linestring+'\n'
                        log.debug('Improve regions: \n'+outstr)

                    else:
                        log.debug('Point has converged!')
                        converged=True

            # finished event loop for non-gridpack mode, dump the final file
            with open(self.HAjson,'w') as jsonfile:
                    json.dump(output_json_data,jsonfile,indent=4)
            self.combiner.write_efficiency_file(output_json_data,self.effname)    
            if os.path.exists('temp_HA_output.json'):
                os.remove('temp_HA_output.json')
            if os.path.exists('temp_HA_efficiency_file.eff'):
                os.remove('temp_HA_efficiency_file.eff')

        ####################################################  
        ## Here we have finished running 

        # Now to append results to the spc file

        HA_spc=zslha.read(self.effname)
        if self.check_exclusion:
            for analysis in output_json_data:
                if analysis not in self.analysishandlers:  # have to check that it was initialised correctly...
                    continue
                
                #getstatsfromdict(self,effdict,inputxs=None,with_significance=True,with_upper_limits=False)
                try:
                    exclusion_results=self.analysishandlers[analysis].getstatsfromdict(output_json_data[analysis],None,False,True)
                except Exception as e:
                    print("Exception %s running exclusion results for analyis %s, giving mu = 10" %(str(e),analysis))
                    exclusion_results={}
                    exclusion_results['mu95exp']=10.0
                    exclusion_results['mu95obs']=10.0
                    exclusion_results['CLs exp']=-1.0
                    exclusion_results['CLs obs']=-1.0
                    #continue
                print("============ %s ============== " %analysis)
                print(exclusion_results)
                HA_spc.analyses[analysis.upper()]['PROCESS']['MU95EXP']=float(exclusion_results['mu95exp'])
                HA_spc.analyses[analysis.upper()]['PROCESS']['MU95OBS']=float(exclusion_results['mu95obs'])
                #HA_spc.blockcomments['PROCESS'][analysis.upper()]['MU95EXP']=analysis+' (expected)'
                #HA_spc.blockcomments['PROCESS'][analysis.upper()]['MU95OBS']=analysis+' (observed)'
                if float(exclusion_results['mu95obs']) < 1:
                    HA_spc.analyses[analysis.upper()]['PROCESS']['EXCLUDED']=0
                    #HA_spc.blockcomments['PROCESS'][analysis.upper()]['EXCLUDED']='Point Excluded (0)'
                else:
                    HA_spc.analyses[analysis.upper()]['PROCESS']['EXCLUDED']=1

                HA_spc.analyses[analysis.upper()]['PROCESS']['CLSEXP']=float(exclusion_results['CLs exp'])
                HA_spc.analyses[analysis.upper()]['PROCESS']['CLSOBS']=float(exclusion_results['CLs obs'])
                
                if 'bayes' in exclusion_results:
                    HA_spc.analyses[analysis.upper()]['PROCESS']['BAYES']=float(exclusion_results['bayes'])
                    #HA_spc.blockcomments['PROCESS'][analysis.upper()]['EXCLUDED']='Point Allowed (1)'
            ## overwrite the efficiency file so we can append it to spc
            #HA_spc.write(self.effname)
            """
            mubest=10.0
            muexpbest=10.0
            #analysisresults=[]
            newblock={}
            newblockcomments={}

            newindex=10
            for analysis in output_json_data:
                #getstatsfromdict(self,effdict,inputxs=None,with_significance=True,with_upper_limits=False)
                exclusion_results=self.analysishandlers[analysis].getstatsfromdict(output_json_data[analysis],None,False,True)
                print("============ %s ============== " %analysis)
                print(exclusion_results)
                newblock[str(newindex)]=exclusion_results['mu95exp']
                newblock[str(newindex+1)]=exclusion_results['mu95obs']
                newblockcomments[str(newindex)]=analysis+' (expected)'
                newblockcomments[str(newindex+1)]=analysis+' (observed)'
                #analysisresults.append([analysis.upper(),exclusion_results['mu95exp'],exclusion_results['mu95obs']])
                mubest=min(exclusion_results['mu95obs'],mubest)
                muexpbest=min(exclusion_results['mu95exp'],muexpbest)
                newindex=newindex+2
            if mubest < 1:
                newblock['1']=0
                newblockcomments['1']='Point Excluded (0)'
            else:
                newblock['1']=1
                newblockcomments['1']='Point Allowed (0)'
            newblock['2']=mubest
            newblockcomments['2']='Observed limit on signal strength'
            newblock['3']=muexpbest
            newblockcomments['3']='Expected limit on signal strength'
            data_point.spc.blocks['HACKANALYSIS '+self.process]=
            """
        if data_point.spc is None:
            data_point.spc=HA_spc
        else:
            data_point.spc.add(HA_spc)
        
       
        with open(spc_file,'a') as FO:
            for analysis in output_json_data:
                upanalysis=analysis.upper()
                FO.write('BLOCK PROCESS %s\n' % upanalysis)
                FO.write('XS    %.4e   # (fb)\n' % HA_spc.analyses[upanalysis]['PROCESS']['XS'])
                FO.write('Events   %d   \n' % HA_spc.analyses[upanalysis]['PROCESS']['Events'])
                if self.check_exclusion and 'MU95EXP' in HA_spc.analyses[upanalysis]['PROCESS']:
                    FO.write('MU95EXP    %.4e   # \n' % HA_spc.analyses[upanalysis]['PROCESS']['MU95EXP'])
                    FO.write('MU95OBS    %.4e   # \n' % HA_spc.analyses[upanalysis]['PROCESS']['MU95OBS'])
                    FO.write('CLSEXP    %.5f   # \n' % HA_spc.analyses[upanalysis]['PROCESS']['CLSEXP'])
                    FO.write('CLSOBS    %.5f   # \n' % HA_spc.analyses[upanalysis]['PROCESS']['CLSOBS'])

                    excl=int(HA_spc.analyses[upanalysis]['PROCESS']['EXCLUDED'])

                    if excl == 0:
                        FO.write('EXCLUDED    %d   # Point excluded (0) \n' %excl)
                    else:
                        FO.write('EXCLUDED    %d   # Point allowed (1) \n' %excl)
                        
                if 'BAYES' in HA_spc.analyses[upanalysis]['PROCESS']:
                    FO.write('BAYES    %.4e   # \n' % HA_spc.analyses[upanalysis]['PROCESS']['BAYES'])

                FO.write('BLOCK EFFICIENCIES %s # name, eff, rel. uncert. [systup   systdown]\n' % analysis)
                for region in output_json_data[analysis]["Regions"]:
                    if "sysup" in output_json_data[analysis]["Regions"][region]:
                        FO.write('%s   %.8e   %.8e   %.8e   %.8e\n' %(region,output_json_data[analysis]["Regions"][region]["efficiency"],output_json_data[analysis]["Regions"][region]["stat"]*0.01,output_json_data[analysis]["Regions"][region]["sysup"]*0.01,output_json_data[analysis]["Regions"][region]["sysdown"]*0.01))
                    else:
                        FO.write('%s   %.8e   %.8e\n' %(region,output_json_data[analysis]["Regions"][region]["efficiency"],output_json_data[analysis]["Regions"][region]["stat"]*0.01))
        #else:
            #debug.command_line_log("cat " + self.effname + " >> " + os.path.join(spc_file), log)   

            #for b in HA_spc.blocks:
            #    zslha.write_block_head(b,SF)

        #debug.command_line_log("cat " + self.effname + " >> " + os.path.join(spc_file), log)
        # Now do we store the cutflows and yoda files too? These can be picked up by copying everything

        ###Clean up the split directory -- otherwise it will be copied too!!!
        if not self.store_events and self.mode == 'LHE' and (self.n_cores > 1 or self.useGridPack):
            if lhe_events[-3:]=='.gz':
                ending = '.lhe.gz'
            else:
                ending = '.lhe'
            for x in range(self.n_cores):
                try:
                    os.remove(self.LHEfile+'_'+str(x)+ending)
                except Exception as e:
                    log.warning('Failed to remove split file, '+str(e))