Source code for bsmart.tools.MadGraphHackAnalysis

"""
Tool to run the pipeline madgraph to create lhe file then hackanalysis to drive pythia and perform analysis. 


Need to specify in tools:

.. code-block:: text

    "MadGraphHackAnalysis" : {
        "HackAnalysis" : "/path/to/executable", (e.g. analysePYTHIA_LHE.exe)
        "YAML file" "<NAME>.yaml"    (file for configuring HackAnalysis run  )
    }

plus, in addition, settings for running madgraph which can also be found in ../collider/MGaux.py
These include as obligatory:

* "MadGraph" : "/path/to/modeldir/bin/generate_events",   (if you have already generated the output directory for your process)

OR

* "Proc Card": "proc_card.dat",    ( for generating the madgraph directory based on a series of commands to provide to madgraph)
* "MadGraph Path": "/path/to/MG5_aMC_v3.5.XX/bin/mg5_aMC",     (so that we can run MadGraph )
* "Output Directory": "/path/to/where/you/want/to/put/the/output", (this is OPTIONAL: if you have the command output <outputname> in your proc_card.dat then BSMArt will find it)

other optional MadGraph settings include:

.. code-block:: text

    {
        "python2": "True",  (if you need to run madgraph via python2)
        "MadGraph Directory": "/path/to/modeldir",  (if you want to specify the path to the modeldir, otherwise the code will infer it)
        "QNUMBERS file": "myQNUMBERs.slha", (for models that have particles unknown to pythia)
        "MadSpin": "True", (if you want to run MadSpin and have it handled by BSMArt)
        "k-factor": 1.2, (for multiplying the cross-section by a k-factor)
        "Cross-sections" : "/path/to/csv/file/containing/cross-sections.csv",      (if you have tabulated cross-sections as a function of *one* variable to use instead of the one computed in MadGraph; this is documented in the InterpolateXSection.py file.)
    }

---------------------------

25/03/13: Make more robust, in case for some reason HackAnalysis fails to run when doing batches, will give two more tries with the same point






"""

__meta__ = {
    "name": "MadGraphHackAnalysis",
    "requires": ["pyyaml"],
    "settings": {
        "HackAnalysis": "Path to HackAnalysis executable",
        "YAML file": "Path to YAML config file",
        "MadGraph": "Path to generate_events",
        "Proc Card": "Path to proc_card.dat",
        "MadGraph Path": "Path to mg5_aMC",
        "Output Directory": "Path to output directory",
        "Mode": "LHE or HEPMC",
        "GridPack": "Boolean",
        "Check Absolute Convergence": "Boolean",
        "Check Convergence": "Boolean",
        "Efficiency Threshold": "Float",
        "Convergence Margin": "Float",
        "Check Exclusion": "Boolean",
        "Batch Size": "Int",
        "Store Events": "Boolean",
        "Cores": "Int"
    }
}


import os
import shutil

from bsmart import debug
from bsmart.HEPRun import HepTool,DataPoint

from bsmart.collider.pythiacfg import PYTHIACFG
from bsmart.collider.splitlhe import splitlhe
from bsmart.collider.MGaux import MadGraphRunner

import yaml
import json
from collections import OrderedDict
from bsmart import zslha
#import fnmatch
import sys
#import math

try:
    import importlib
except Exception as e:
    print("Failed to import importlib, "+str(e))


###############################################################


references=r'''@article{Goodsell:2021iwc,
    author = "Goodsell, Mark D. and Priya, Lakshmi",
    title = "{Long dead winos}",
    eprint = "2106.08815",
    archivePrefix = "arXiv",
    primaryClass = "hep-ph",
    doi = "10.1140/epjc/s10052-022-10188-1",
    journal = "Eur. Phys. J. C",
    volume = "82",
    number = "3",
    pages = "235",
    year = "2022"
}

@article{Goodsell:2024aig,
    author = "Goodsell, Mark D.",
    title = "{HackAnalysis 2: A powerful and hackable recasting tool}",
    eprint = "2406.10042",
    archivePrefix = "arXiv",
    primaryClass = "hep-ph",
    month = "6",
    year = "2024"
}

@article{Corpe:2025sbw,
    author = "Corpe, Louie and Haddad, Abdelhamid and Goodsell, Mark",
    title = "{Recasting the ATLAS search for displaced hadronic jets in the ATLAS calorimeter with additional jets or leptons using surrogate models}",
     eprint = "2502.10231",
    archivePrefix = "arXiv",
    primaryClass = "hep-ph",
    month = "2",
    year = "2025"
}
'''


 
[docs] class NewTool(HepTool): """ MadGraphHackAnalysis runs madgraph and extracts the cross-section plus uncertainty Various settings are possible: """ def __init__(self, name, settings,global_settings=None): HepTool.__init__(self, name, settings,global_settings) self.citations=references ## Optional "Process" setting to label the process examined if 'Process' in self.settings: self.process = self.settings['Process'] ## to allow labelling the processes else: self.process='' ## Optional "Mode" setting to choose "LHE" or "HEPMC", otherwise infer from the command name if 'Mode' in self.settings: tmode=self.settings['Mode'].upper() if tmode in ['LHE','HEPMC']: self.mode = tmode else: raise NameError("Operating mode not recognised; allowed values are LHE and HEPMC") else: if 'analysePYTHIA_LHE' in self.settings['HackAnalysis']: self.mode='LHE' elif 'analyseHEPMC' in self.settings['HackAnalysis']: self.mode='HEPMC' else: raise NameError("Executable not recognised and Mode not specified; allowed values are LHE and HEPMC") if global_settings is not None and global_settings.varnames is not None: self.varnames=global_settings.varnames else: self.varnames=None ## Require "YAML file" setting for running HackAnalysis if 'YAML file' not in self.settings: #self.log.error('No YAML file present!') raise NameError('No YAML file present!') tpath=os.path.dirname(self.settings['YAML file']) if tpath == '': self.yamlname=self.settings['YAML file'] #self.yaml = os.path.join(self.settings['cwd'],self.yamlname) #yamldir=self.settings['cwd'] yamldir=os.getcwd() self.yaml = os.path.join(yamldir,self.yamlname) else: self.yaml = self.settings['YAML file'] self.yamlname=os.path.split(self.yaml)[-1] #yamldir=os.path.split(os.path.dirname(self.yaml))[0] ### I no longer know why we are doing this yamldir = tpath # READ OTHER INFO FROM THE YAML!!!! try: with open(self.yaml, 'r') as stream: self.yaml_data = yaml.safe_load(stream) except: raise NameError('Failed to open HackAnalysis yaml file!') self.qnumbers_name=None if 'QNUMBERS file' in self.yaml_data: ## need to know whether we have an absolute path for the QNUMBERS file, or no path: ## if there's no path then we need to copy it to the temp directory for running HackAnalysis ## We can extract it from the MG runner tpath=os.path.dirname(self.yaml_data['QNUMBERS file']) if tpath == '': # need to copy the QNUMBERS file to the working directory self.qnumbers_name=self.yaml_data['QNUMBERS file'] if 'QNUMBERS file' not in self.settings: self.settings['QNUMBERS file'] = self.yaml_data['QNUMBERS file'] #else: # self.qnumbers_name=None ## "GridPack": "True" enables gridpack mode, perfect for running large numbers of events ## GridPack not yet supported for HEPMC run because we only run single core HEPMC in HA2 if 'GridPack' in self.settings and self.mode=='LHE': self.useGridPack=eval(self.settings['GridPack']) else: self.useGridPack=False ## "Check Absolute Convergence" setting when running batches of events, to check that cutflows converge to fixed precision if "Check Absolute Convergence" in self.settings: self.check_convergence = eval(self.settings['Check Absolute Convergence']) else: self.check_convergence = False ## "Check Convergence" setting when running batches of events, to check that cutflows converge to suitable precision depending on experimental sensitivity if "Check Convergence" in self.settings: self.check_exp_convergence = eval(self.settings['Check Convergence']) if self.check_exp_convergence: self.check_convergence = True else: self.check_exp_convergence=False if "Efficiency Threshold" in self.settings: self.convergence_threshold=float(self.settings['Efficiency Threshold']) else: self.convergence_threshold=1e-5 if "Convergence Margin" in self.settings: self.convergence_error=float(self.settings['Convergence Margin']) self.safety_ratio=self.convergence_error else: self.convergence_error=0.1 self.safety_ratio=0.25 HAdir=os.path.dirname(settings['HackAnalysis']) #HApythonpath=os.path.join(os.path.dirname(settings['HackAnalysis']),'Statistics') HApythonpath=os.path.join(HAdir,'Statistics') if not os.path.exists(HApythonpath): # presumably using new cmake, so the stats directory is different if 'bin' in HAdir: HAdir=os.path.dirname(HAdir) HApythonpath=os.path.join(HAdir,'Statistics') # Modification 7/2/2025 #HAstatspath=os.path.join(HApythonpath,'HAStats') HAstatspath=HApythonpath if "Check Exclusion" in self.settings: self.check_exclusion = eval(self.settings['Check Exclusion']) if self.check_exclusion: #print('Adding stats path '+HAstatspath) if os.path.exists(HAstatspath): if HApythonpath not in sys.path: sys.path.insert(0,HApythonpath) #self.exclusions_module=importlib.import_module('HAstats') self.exclusions_module=importlib.import_module('HAStats.HAstats') self.analysishandlers={} infopath=os.path.join(HApythonpath,"HAStats","Analyses") for analysis in self.yaml_data['analyses']: analysisUPPER=analysis.upper() try: with open(os.path.join(infopath,analysisUPPER+'_info.json'),'r') as thisinfo: analysis_infodict=json.load(thisinfo) self.analysishandlers[analysis] = self.exclusions_module.AnalysisStats(analysis_infodict) except Exception as e: print("Failed to initialise analysis %s: %s" %(analysis,str(e))) if analysis in self.analysishandlers: print("Still somehow initialised the analysis, deleting") self.analysishandlers.pop(analysis) continue else: self.check_exclusion=False self.nevents=None ## We get nevents from the yaml file if it is there, otherwise use default of 10000 if 'nevents' in self.yaml_data['settings']: self.nevents=int(self.yaml_data['settings']['nevents']) if self.nevents is None and self.useGridPack: self.nevents=10000 self.combiner = None ## "Batch Size" setting if you want batch mode if 'Batch Size' in self.settings: self.batch_size=int(self.settings['Batch Size']) if self.batch_size < self.nevents: # need to modify the yaml file to only run the batch-size number of events self.yaml_data['settings']['nevents'] = self.batch_size # Import routines for merging json files etc """ HApythonpath=os.path.join(os.path.dirname(settings['HackAnalysis']),'Statistics') if os.path.exists(HApythonpath): sys.path.insert(0,HApythonpath) """ if os.path.exists(HApythonpath): if HApythonpath not in sys.path: sys.path.insert(0,HApythonpath) self.combiner=importlib.import_module('combine_ha') if self.check_convergence: self.convergence_module=importlib.import_module('ha_convergence') self.convergence=self.convergence_module.HAconverger() else: raise NameError("Cannot find Statistics path "+HApythonpath) else: self.batch_size=self.nevents if 'JSON file' in self.yaml_data['settings']: self.HAjson=self.yaml_data['settings']['JSON file'] else: self.HAjson='hackanalysis.json' #Set up MadGraph #if self.mode=='LHE' and 'Extra Commands' not in self.settings: if 'Extra Commands' not in self.settings: # need to make sure showering is turned off in madgraph, if using default settings # Otherwise we trust the user to know what they are doing!! if self.mode=='LHE': commandlist=[['shower=OFF'],['detector=OFF'],['analysis=OFF']] else: commandlist=[['shower=Pythia8'],['detector=OFF'],['analysis=OFF']] if 'MadSpin' in self.settings and eval(self.settings['MadSpin']): commandlist.append(['madspin=ON']) else: commandlist.append(['madspin=OFF']) commandlist.append(['done']) if 'run_card Commands' in self.settings: ## 'run_card Commands' in settings are commands passed to madgraph to update the run_card or parameters # These are in addition to the automatically created ones (e.g. setting the number of events) for rccommand in self.settings['run_card Commands']: commandlist.append(rccommand) else: commandlist.append(['update dependent']) #if 'MadSpin' in self.settings and eval(self.settings['MadSpin']): #self.settings['Extra Commands']=[['shower=OFF'],['detector=OFF'],['analysis=OFF'],['madspin=ON'],['done'],[eventsline],['done']] #else: #self.settings['Extra Commands']=[['shower=OFF'],['detector=OFF'],['analysis=OFF'],['madspin=OFF'],['done'],[eventsline],['done']] # if we want to use the GridPack mode but in one big batch, we only need a minimum number of initial events # Otherwise we set the initial events equal to the batch size (which could be equal to the total number of events in one-shot runs) if self.useGridPack and self.batch_size >= self.nevents: commandlist.append(['set nevents 10000']) ## basic number of events for gridpack run else: commandlist.append(['set nevents '+str(self.batch_size)]) """ if self.useGridPack: commandlist.append(['set nevents 10000']) ## basic number of events for gridpack run else: if 'nevents' in self.yaml_data['settings']: commandlist.append(['set nevents '+str(self.yaml_data['settings']['nevents'])]) """ commandlist.append(['done']) self.settings['Extra Commands']=commandlist if 'Store Events' in self.settings: self.store_events=eval(self.settings['Store Events']) else: self.store_events = False self.hackanalysis_cmd = self.settings['HackAnalysis']+' '+self.yamlname if 'Cores' in self.settings: self.n_cores=int(self.settings['Cores']) self.yaml_data['settings']['cores'] = self.n_cores else: if 'cores' in self.yaml_data['settings']: self.n_cores = int(self.yaml_data['settings']['cores']) self.settings['Cores']=self.n_cores else: self.n_cores = 1 self.yaml_data['settings']['cores'] = 1 ## We use the temporary directory for the madgraph model directory if not told otherwise if global_settings is not None: #print("have global settings!") if hasattr(global_settings,'work_dir') and global_settings.work_dir is not None: #print("Setting TempDir!") self.settings['TempDir'] = global_settings.work_dir #print(self.settings['TempDir']) try: self.MGrunner=MadGraphRunner(self.settings) except Exception as e: raise NameError('Failed to initialise MadGraph: '+str(e)) if 'LHE file' in self.yaml_data['settings']: self.LHEfile=self.yaml_data['settings']['LHE file'] else: self.LHEfile='Split/split' if self.mode == 'LHE': if 'Config file' not in self.yaml_data['settings']: raise NameError('No cfg file for pythia present!') self.cfgname=self.yaml_data['settings']['Config file'] #self.cfgfile = os.path.join(self.settings['cwd'],self.cfgname) self.cfgfile =os.path.join(yamldir,self.cfgname) self.pythia_cfg = PYTHIACFG() try: self.pythia_cfg.read(self.cfgfile) except Exception as e: raise NameError("Failed to initialise pythia configuration file, " + str(e)) if 'Efficiency Filename' in self.yaml_data['settings']: self.effname = self.yaml_data['settings']['Efficiency Filename'] else: self.effname = 'test.eff' self.yaml_data['settings']['Efficiency Filename'] = self.effname if 'Cutflow Filename' in self.yaml_data['settings']: self.cutflowname = self.yaml_data['settings']['Cutflow Filename'] else: self.cutflowname = 'test_cf.eff' self.yaml_data['settings']['Cutflow Filename'] = self.cutflowname if 'Histogram Filename' in self.yaml_data['settings']: self.histoname = self.yaml_data['settings']['Histogram Filename'] else: self.histoname = 'test.yoda' self.yaml_data['settings']['Histogram Filename'] = self.histoname
[docs] def run(self, spc_file, temp_dir, log,data_point): ## set up madgraph configuration #var_dict = data_point.get_var_dict(self.varnames) ## HackAnalysis yaml file: local_yaml = os.path.join(temp_dir, self.yamlname) if self.useGridPack: # First remove any existing gridpack if it already exists gridpacktarball=os.path.join(self.MGrunner.MG_Model_Dir,'run_01_gridpack.tar') if os.path.exists(gridpacktarball): os.remove(gridpacktarball) gridpacktarball=os.path.join(self.MGrunner.MG_Model_Dir,'run_01_gridpack.tar.gz') if os.path.exists(gridpacktarball): os.remove(gridpacktarball) MGinfo=self.MGrunner.run(spc_file,temp_dir,log,data_point) # if the reported cross-section is too small, is there any point in running? if self.check_exp_convergence: if MGinfo['xs']*1e3*139 < 1: if self.check_exclusion: # point clearly not excluded ### ADD CODE HERE! log.info("Not trivially Excluded!") #return ## now split lhe file, if needed # otherwise we write the full path to the lhe file in the yaml file if self.mode == 'LHE': lhe_events=None for tfile in os.listdir(self.MGrunner.MG_Run_Dir): if tfile[-7:] == '.lhe.gz' or tfile[-4:] == '.lhe': lhe_events=os.path.join(self.MGrunner.MG_Run_Dir,tfile) break if not os.path.exists(lhe_events): raise NameError("Failed to generate lhe events!") if self.n_cores > 1: splitlhe(self.n_cores,lhe_events,os.path.join(temp_dir,self.LHEfile)) else: self.yaml_data['settings']['LHE file'] = lhe_events #if os.path.exists(os.path.join(self.MG_Run_Dir,)) else: ## HEPMC hepmc_events=None for tfile in os.listdir(self.MGrunner.MG_Run_Dir): if tfile[-9:] == '.hepmc.gz' or tfile[-6:] == '.hepmc': hepmc_events=os.path.join(self.MGrunner.MG_Run_Dir,tfile) break if not os.path.exists(hepmc_events): raise NameError("Failed to generate hepmc events!") self.yaml_data['settings']['HEPMC file'] = hepmc_events ## Now for procesing the files to set up for HA with open(local_yaml,'w') as stream: whatever = yaml.dump(self.yaml_data,stream) var_dict = data_point.get_all_dict() # Generate pythia config file each time (since we might need to, e.g. for changing qcut ) ## Now for HackAnalysis stuff if self.mode == 'LHE': local_cfg = os.path.join(temp_dir, self.cfgname) #self.pythia_cfg.write(local_cfg,data_point.inputs,self.varnames) self.pythia_cfg.write_dict(local_cfg,var_dict) ## Now to run if os.path.exists(self.HAjson): os.remove(self.HAjson) if os.path.exists(self.effname): os.remove(self.effname) if os.path.exists(self.cutflowname): os.remove(self.cutflowname) if self.histoname is not None: if os.path.exists(self.histoname): os.remove(self.histoname) os.chdir(temp_dir) if self.qnumbers_name is not None: if not os.path.exists(self.qnumbers_name): with open(self.qnumbers_name,'w') as QNF: QNF.write(self.MGrunner.QNUMBERSfile) failed_attempts=0 try: debug.command_line_log(self.hackanalysis_cmd, log) except Exception as e: failed_attempts=1 log.error('Error running HackAnalysis: '+str(e)) if not os.path.exists(self.effname) or not os.path.exists(self.cutflowname): ## point has failed failed_attempts=1 log.error('Failed to run HackAnalysis, efficiency or cutflow missing') raise NameError('Failed to run HackAnalysis, efficiency or cutflow missing') output_json_data=OrderedDict() if os.path.exists(self.HAjson): with open(self.HAjson,'r') as jsonfile: output_json_data=json.load(jsonfile,object_pairs_hook=OrderedDict) else: failed_attempts=1 log.error('Json file missing') #raise NameError('Failed to run HackAnalysis, json missing') ### If gridpack mode, now we need the gridpack & batch run needed_extra_events=self.nevents improve_regions={} converged=False doneevents=0 ## here we use the number of events we *should* have generated. Maybe better to read the number from the output if failed_attempts ==0: if self.useGridPack and self.batch_size >= self.nevents: doneevents=10000 else: doneevents=self.batch_size if self.check_convergence: # check convergence or exclusion if self.check_exp_convergence: #check_convergence_exclusion(self,hackanalysis_output,nloxs=None,input_max_batch_size=None,safety_ratio=0.25,verbose=False) needed_extra_events,improve_regions,_=self.convergence.check_convergence_exclusion(output_json_data,None,self.batch_size,self.safety_ratio,True) else: #check_convergence_fixed_goal(self,efficiencydata,rel_uncertainty=0.1,eff_threshold=1e-5,input_max_batch_size=None,verbose=True) #needed_extra_events,improve_regions,_=self.convergence.check_convergence_fixed_goal(output_json_data,1.e-1,1.e-5,self.batch_size) needed_extra_events,improve_regions,_=self.convergence.check_convergence_fixed_goal(output_json_data,self.convergence_error,self.convergence_threshold,self.batch_size,True) if needed_extra_events > 0: outstr='' log.debug('Estimate need to generate %d more events' %needed_extra_events) for analysis in improve_regions: linestring=','.join(improve_regions[analysis]) outstr=outstr+analysis+': '+linestring+'\n' log.debug('Improve regions: \n'+outstr) else: converged=True if self.useGridPack and not converged: # First need to generate the GridPack try: if self.MGrunner.python2: debug.command_line_log('python2 '+os.path.join(self.MGrunner.MG_Model_Dir,'bin','madevent')+' create_gridpack',log) else: debug.command_line_log(os.path.join(self.MGrunner.MG_Model_Dir,'bin','madevent')+' create_gridpack',log) except Exception as e: log.error('Could not run madevent to create gridpack: '+str(e)) # then check gridpack was created in the model directory (that's where it gets put ...) # since we remove the run_01 should always be called run_01 gridpacktarball=os.path.join(self.MGrunner.MG_Model_Dir,'run_01_gridpack.tar.gz') if not os.path.exists(gridpacktarball): gridpacktarball=os.path.join(self.MGrunner.MG_Model_Dir,'run_01_gridpack.tar') if not os.path.exists(gridpacktarball): raise NameError('No gridpack found at '+gridpacktarball) # now extract it log.debug('Extracting gridpack '+gridpacktarball) log.debug('Changing directory to '+temp_dir) os.chdir(temp_dir) # just to make sure ... madeventdir=os.path.join(temp_dir,'madevent') if os.path.exists(madeventdir): # clean up if it is already there debug.command_line_log('chmod a+w -R madevent',log) shutil.rmtree(madeventdir) #now unpack it debug.command_line_log('tar -xf '+gridpacktarball,log) os.chdir(temp_dir) # make read only debug.command_line_log('chmod a-w -R madevent',log) # if python2 -> correct the run.sh script if self.MGrunner.python2: OFrun=open('run.sh','w') OFrun.write('#!/bin/bash\n') OFrun.write('basedir=%s\n'%temp_dir) OFrun.write('export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${basedir}/madevent/lib:${basedir}/HELAS/lib\n') OFrun.write('python2 ${basedir}/madevent/bin/gridrun $1 $2 1\n') OFrun.write('mv ./Events/GridRun_${2}/unweighted_events.lhe.gz events.lhe.gz\n') OFrun.write('rm -rf Events Cards P* *.dat randinit &> /dev/null\n') OFrun.close() ## remove unnecessary files? # Now create the directories for the gridpacks to run # New: run directories are created each time we run the gridpack import random import subprocess #seedstart=random.randint(1,sys.maxsize-1) seedstart=random.randint(1,1000000) log.debug('Setting random seed start to %d' % seedstart) if self.batch_size < self.nevents: coreevents=int(self.batch_size/self.n_cores)+1 else: coreevents=int(self.nevents/self.n_cores)+1 var_dict = data_point.get_all_dict() if self.mode == 'LHE': local_cfg = os.path.join(temp_dir, self.cfgname) #self.pythia_cfg.write(local_cfg,data_point.inputs,self.varnames) self.pythia_cfg.write_dict(local_cfg,var_dict) def rungp(np,seed): log.debug('starting run for core %d, requesting %d events with random seed %d' %(np,coreevents,seed+np)) ## added the /dev/null to clear out the mess command='../run.sh %d %d' %(coreevents,seed+np) #command='../run.sh %d %d' %(coreevents,seedstart+np) wdir=os.path.join(temp_dir,'gpr_%d' %np) if os.path.exists(wdir): shutil.rmtree(wdir) os.mkdir(wdir) """ command_line = subprocess.Popen(command, shell=True, cwd=wdir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = command_line.communicate() """ # new 22/07/24, don't want the output! Direct to null in a platform indep way command_line = subprocess.Popen(command, shell=True, cwd=wdir, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) _, err = command_line.communicate() if err != b'': log.error(err.decode("utf-8")) # check events exist eventfile=os.path.join(wdir,'events.lhe.gz') ending='.lhe.gz' if not os.path.exists(eventfile): eventfile=os.path.join(wdir,'events.lhe') ending='.lhe' if not os.path.exists(eventfile): raise NameError('Failed to produce events for core %d' %np) # move file to desired location shutil.move(eventfile,self.LHEfile+'_'+str(np)+ending) ## clean up shutil.rmtree(wdir) # return the name of the file produced return self.LHEfile+'_'+str(np)+ending # now have already set doneevents and converged check #doneevents=0 #converged = False while doneevents < self.nevents and not converged: seedstart+=self.n_cores doneevents += self.batch_size if self.n_cores > 1: import multiprocessing as mp import time lhe_events=None for tfile in os.listdir(self.MGrunner.MG_Run_Dir): if tfile[-7:] == '.lhe.gz' or tfile[-4:] == '.lhe': lhe_events=os.path.join(self.MGrunner.MG_Run_Dir,tfile) break processes=[] splitdir=os.path.dirname(self.LHEfile) if splitdir != '': if not os.path.exists(splitdir): os.makedirs(splitdir) # move events to desired location for np in range(self.n_cores): #processes.append(mp.Process(target=debug.command_line_log_PID,args=('../run.sh %d %d' %(coreevents,seedstart+np),os.path.join(temp_dir,'gpr_%d' %np)))) processes.append(mp.Process(target=rungp,args=(np,seedstart))) for p in processes: p.start() time.sleep(0.1) while True: time.sleep(10) allExit = True for t in processes: if t.exitcode is None: allExit = False break if allExit: break for p in processes: p.join() else: lhe_events=rungp(0,seedstart) self.yaml_data['settings']['LHE file'] = lhe_events os.chdir(temp_dir) ## Now to run if os.path.exists(self.HAjson): os.remove(self.HAjson) if os.path.exists(self.effname): os.remove(self.effname) if os.path.exists(self.cutflowname): os.remove(self.cutflowname) if self.histoname is not None: if os.path.exists(self.histoname): os.remove(self.histoname) #self.yaml_data['settings']['LHE file'] = lhe_events with open(local_yaml,'w') as stream: whatever = yaml.dump(self.yaml_data,stream) debug.command_line_log(self.hackanalysis_cmd, log) if not os.path.exists(self.effname) or not os.path.exists(self.cutflowname): ## point has failed log.error('Failed to run HackAnalysis') raise NameError('Failed to run HackAnalysis') if os.path.exists(self.HAjson): with open(self.HAjson,'r') as jsonfile: new_json_data=json.load(jsonfile,object_pairs_hook=OrderedDict) if self.combiner is None: output_json_data=new_json_data else: output_json_data=self.combiner.addresults(output_json_data,new_json_data) # write merged files so the user can keep track (or in case of crashes!) if self.batch_size < self.nevents: with open('temp_HA_output.json','w') as fp: json.dump(output_json_data,fp,indent=4) if self.combiner is not None: self.combiner.write_efficiency_file(output_json_data,'temp_HA_efficiency_file.eff') if self.check_convergence: # check convergence or exclusion if self.check_exp_convergence: #check_convergence_exclusion(self,hackanalysis_output,nloxs=None,input_max_batch_size=None,safety_ratio=0.25,verbose=False) needed_extra_events,improve_regions,_=self.convergence.check_convergence_exclusion(output_json_data,None,self.batch_size,self.safety_ratio) else: #check_convergence_fixed_goal(self,efficiencydata,rel_uncertainty=0.1,eff_threshold=1e-5,input_max_batch_size=None,verbose=True) #needed_extra_events,improve_regions,_=self.convergence.check_convergence_fixed_goal(output_json_data,1.e-1,1.e-5,self.batch_size) needed_extra_events,improve_regions,_=self.convergence.check_convergence_fixed_goal(output_json_data,self.convergence_error,self.convergence_threshold,self.batch_size) if needed_extra_events > 0: outstr='' log.debug('Estimate need to generate %d more events' %needed_extra_events) for analysis in improve_regions: linestring=','.join(improve_regions[analysis]) outstr=outstr+analysis+': '+linestring+'\n' log.debug('Improve regions: \n'+outstr) else: converged=True os.chdir(temp_dir) # If we have multiple batches, need to merge the outputs into one file, # but otherwise we will just keep the original if self.batch_size < self.nevents: with open(self.HAjson,'w') as jsonfile: json.dump(output_json_data,jsonfile,indent=4) if self.combiner is not None: self.combiner.write_efficiency_file(output_json_data,self.effname) if os.path.exists('temp_HA_output.json'): os.remove('temp_HA_output.json') if os.path.exists('temp_HA_efficiency_file.eff'): os.remove('temp_HA_efficiency_file.eff') debug.command_line_log('chmod a+w -R madevent',log) shutil.rmtree('madevent') #################################################### elif self.batch_size < self.nevents: # batch mode but not gridpack ## first write the temporary files before first loop, since we already ran HA # Let's now harden the tool since on my computer MadGraph has a tendency to randomly crash if failed_attempts ==0: with open('temp_HA_output.json','w') as fp: json.dump(output_json_data,fp,indent=4) if self.combiner is not None: self.combiner.write_efficiency_file(output_json_data,'temp_HA_efficiency_file.eff') while doneevents < self.nevents and not converged and failed_attempts < 3: # this is for 3 *consecutive* failures we give up # first generate more events try: MGinfo=self.MGrunner.run(spc_file,temp_dir,log,data_point) except Exception as e: log.debug('Problem running MadGraph: '+str(e)) failed_attempts+=1 continue if self.mode == 'LHE': lhe_events=None for tfile in os.listdir(self.MGrunner.MG_Run_Dir): if tfile[-7:] == '.lhe.gz' or tfile[-4:] == '.lhe': lhe_events=os.path.join(self.MGrunner.MG_Run_Dir,tfile) break if not os.path.exists(lhe_events): # Allow for failure failed_attempts+=1 continue #raise NameError("Failed to generate lhe events!") if self.n_cores > 1: splitlhe(self.n_cores,lhe_events,os.path.join(temp_dir,self.LHEfile)) else: self.yaml_data['settings']['LHE file'] = lhe_events else: ## HEPMC hepmc_events=None for tfile in os.listdir(self.MGrunner.MG_Run_Dir): if tfile[-9:] == '.hepmc.gz' or tfile[-6:] == '.hepmc': hepmc_events=os.path.join(self.MGrunner.MG_Run_Dir,tfile) break if not os.path.exists(hepmc_events): failed_attempts+=1 continue #raise NameError("Failed to generate hepmc events!") self.yaml_data['settings']['HEPMC file'] = hepmc_events if os.path.exists(self.HAjson): os.remove(self.HAjson) if os.path.exists(self.effname): os.remove(self.effname) if os.path.exists(self.cutflowname): os.remove(self.cutflowname) if self.histoname is not None: if os.path.exists(self.histoname): os.remove(self.histoname) #self.yaml_data['settings']['LHE file'] = lhe_events with open(local_yaml,'w') as stream: whatever = yaml.dump(self.yaml_data,stream) attempt_ok = True try: debug.command_line_log(self.hackanalysis_cmd, log) if not os.path.exists(self.effname) or not os.path.exists(self.cutflowname): ## point has failed log.error('Failed to run HackAnalysis') attempt_ok=False #raise NameError('Failed to run HackAnalysis') if os.path.exists(self.HAjson): with open(self.HAjson,'r') as jsonfile: new_json_data=json.load(jsonfile,object_pairs_hook=OrderedDict) output_json_data=self.combiner.addresults(output_json_data,new_json_data) else: attempt_ok=False except Exception as e: log.error('Error running HackAnalysis: '+str(e)) log.error('That makes %d consecutive failed attempts' %failed_attempts) attempt_ok=False if attempt_ok: failed_attempts=0 doneevents += self.batch_size # write merged files so the user can keep track (or in case of crashes!) if self.batch_size < self.nevents: with open('temp_HA_output.json','w') as fp: json.dump(output_json_data,fp,indent=4) self.combiner.write_efficiency_file(output_json_data,'temp_HA_efficiency_file.eff') else: failed_attempts+=1 if self.check_convergence and attempt_ok: # check convergence or exclusion if self.check_exp_convergence: #check_convergence_exclusion(self,hackanalysis_output,nloxs=None,input_max_batch_size=None,safety_ratio=0.25,verbose=False) needed_extra_events,improve_regions,_=self.convergence.check_convergence_exclusion(output_json_data,None,self.batch_size,self.safety_ratio) else: #check_convergence_fixed_goal(self,efficiencydata,rel_uncertainty=0.1,eff_threshold=1e-5,input_max_batch_size=None,verbose=True) #needed_extra_events,improve_regions,_=self.convergence.check_convergence_fixed_goal(output_json_data,1.e-1,1.e-5,self.batch_size) needed_extra_events,improve_regions,_=self.convergence.check_convergence_fixed_goal(output_json_data,self.convergence_error,self.convergence_threshold,self.batch_size) if needed_extra_events > 0: outstr='' log.debug('Estimate need to generate %d more events' %needed_extra_events) for analysis in improve_regions: linestring=','.join(improve_regions[analysis]) outstr=outstr+analysis+': '+linestring+'\n' log.debug('Improve regions: \n'+outstr) else: log.debug('Point has converged!') converged=True # finished event loop for non-gridpack mode, dump the final file with open(self.HAjson,'w') as jsonfile: json.dump(output_json_data,jsonfile,indent=4) self.combiner.write_efficiency_file(output_json_data,self.effname) if os.path.exists('temp_HA_output.json'): os.remove('temp_HA_output.json') if os.path.exists('temp_HA_efficiency_file.eff'): os.remove('temp_HA_efficiency_file.eff') #################################################### ## Here we have finished running # Now to append results to the spc file HA_spc=zslha.read(self.effname) if self.check_exclusion: for analysis in output_json_data: if analysis not in self.analysishandlers: # have to check that it was initialised correctly... continue #getstatsfromdict(self,effdict,inputxs=None,with_significance=True,with_upper_limits=False) try: exclusion_results=self.analysishandlers[analysis].getstatsfromdict(output_json_data[analysis],None,False,True) except Exception as e: print("Exception %s running exclusion results for analyis %s, giving mu = 10" %(str(e),analysis)) exclusion_results={} exclusion_results['mu95exp']=10.0 exclusion_results['mu95obs']=10.0 exclusion_results['CLs exp']=-1.0 exclusion_results['CLs obs']=-1.0 #continue print("============ %s ============== " %analysis) print(exclusion_results) HA_spc.analyses[analysis.upper()]['PROCESS']['MU95EXP']=float(exclusion_results['mu95exp']) HA_spc.analyses[analysis.upper()]['PROCESS']['MU95OBS']=float(exclusion_results['mu95obs']) #HA_spc.blockcomments['PROCESS'][analysis.upper()]['MU95EXP']=analysis+' (expected)' #HA_spc.blockcomments['PROCESS'][analysis.upper()]['MU95OBS']=analysis+' (observed)' if float(exclusion_results['mu95obs']) < 1: HA_spc.analyses[analysis.upper()]['PROCESS']['EXCLUDED']=0 #HA_spc.blockcomments['PROCESS'][analysis.upper()]['EXCLUDED']='Point Excluded (0)' else: HA_spc.analyses[analysis.upper()]['PROCESS']['EXCLUDED']=1 HA_spc.analyses[analysis.upper()]['PROCESS']['CLSEXP']=float(exclusion_results['CLs exp']) HA_spc.analyses[analysis.upper()]['PROCESS']['CLSOBS']=float(exclusion_results['CLs obs']) if 'bayes' in exclusion_results: HA_spc.analyses[analysis.upper()]['PROCESS']['BAYES']=float(exclusion_results['bayes']) #HA_spc.blockcomments['PROCESS'][analysis.upper()]['EXCLUDED']='Point Allowed (1)' ## overwrite the efficiency file so we can append it to spc #HA_spc.write(self.effname) """ mubest=10.0 muexpbest=10.0 #analysisresults=[] newblock={} newblockcomments={} newindex=10 for analysis in output_json_data: #getstatsfromdict(self,effdict,inputxs=None,with_significance=True,with_upper_limits=False) exclusion_results=self.analysishandlers[analysis].getstatsfromdict(output_json_data[analysis],None,False,True) print("============ %s ============== " %analysis) print(exclusion_results) newblock[str(newindex)]=exclusion_results['mu95exp'] newblock[str(newindex+1)]=exclusion_results['mu95obs'] newblockcomments[str(newindex)]=analysis+' (expected)' newblockcomments[str(newindex+1)]=analysis+' (observed)' #analysisresults.append([analysis.upper(),exclusion_results['mu95exp'],exclusion_results['mu95obs']]) mubest=min(exclusion_results['mu95obs'],mubest) muexpbest=min(exclusion_results['mu95exp'],muexpbest) newindex=newindex+2 if mubest < 1: newblock['1']=0 newblockcomments['1']='Point Excluded (0)' else: newblock['1']=1 newblockcomments['1']='Point Allowed (0)' newblock['2']=mubest newblockcomments['2']='Observed limit on signal strength' newblock['3']=muexpbest newblockcomments['3']='Expected limit on signal strength' data_point.spc.blocks['HACKANALYSIS '+self.process]= """ if data_point.spc is None: data_point.spc=HA_spc else: data_point.spc.add(HA_spc) with open(spc_file,'a') as FO: for analysis in output_json_data: upanalysis=analysis.upper() FO.write('BLOCK PROCESS %s\n' % upanalysis) FO.write('XS %.4e # (fb)\n' % HA_spc.analyses[upanalysis]['PROCESS']['XS']) FO.write('Events %d \n' % HA_spc.analyses[upanalysis]['PROCESS']['Events']) if self.check_exclusion and 'MU95EXP' in HA_spc.analyses[upanalysis]['PROCESS']: FO.write('MU95EXP %.4e # \n' % HA_spc.analyses[upanalysis]['PROCESS']['MU95EXP']) FO.write('MU95OBS %.4e # \n' % HA_spc.analyses[upanalysis]['PROCESS']['MU95OBS']) FO.write('CLSEXP %.5f # \n' % HA_spc.analyses[upanalysis]['PROCESS']['CLSEXP']) FO.write('CLSOBS %.5f # \n' % HA_spc.analyses[upanalysis]['PROCESS']['CLSOBS']) excl=int(HA_spc.analyses[upanalysis]['PROCESS']['EXCLUDED']) if excl == 0: FO.write('EXCLUDED %d # Point excluded (0) \n' %excl) else: FO.write('EXCLUDED %d # Point allowed (1) \n' %excl) if 'BAYES' in HA_spc.analyses[upanalysis]['PROCESS']: FO.write('BAYES %.4e # \n' % HA_spc.analyses[upanalysis]['PROCESS']['BAYES']) FO.write('BLOCK EFFICIENCIES %s # name, eff, rel. uncert. [systup systdown]\n' % analysis) for region in output_json_data[analysis]["Regions"]: if "sysup" in output_json_data[analysis]["Regions"][region]: FO.write('%s %.8e %.8e %.8e %.8e\n' %(region,output_json_data[analysis]["Regions"][region]["efficiency"],output_json_data[analysis]["Regions"][region]["stat"]*0.01,output_json_data[analysis]["Regions"][region]["sysup"]*0.01,output_json_data[analysis]["Regions"][region]["sysdown"]*0.01)) else: FO.write('%s %.8e %.8e\n' %(region,output_json_data[analysis]["Regions"][region]["efficiency"],output_json_data[analysis]["Regions"][region]["stat"]*0.01)) #else: #debug.command_line_log("cat " + self.effname + " >> " + os.path.join(spc_file), log) #for b in HA_spc.blocks: # zslha.write_block_head(b,SF) #debug.command_line_log("cat " + self.effname + " >> " + os.path.join(spc_file), log) # Now do we store the cutflows and yoda files too? These can be picked up by copying everything ###Clean up the split directory -- otherwise it will be copied too!!! if not self.store_events and self.mode == 'LHE' and (self.n_cores > 1 or self.useGridPack): if lhe_events[-3:]=='.gz': ending = '.lhe.gz' else: ending = '.lhe' for x in range(self.n_cores): try: os.remove(self.LHEfile+'_'+str(x)+ending) except Exception as e: log.warning('Failed to remove split file, '+str(e))