HIPO4 C++ Library 4.4.1
Columnar I/O library for CLAS12 physics data
Loading...
Searching...
No Matches
hipo::chain Class Reference

Chain multiple HIPO files for unified processing. More...

#include <chain.h>

Public Types

using iterator = ChainIterator
 
using const_iterator = ChainIterator
 

Public Member Functions

 chain (int threads=0, bool progress=true, bool verbose=false)
 Construct a chain with specified thread count.
 
int add (std::string_view filename)
 Add a single file to the chain.
 
int add (std::vector< std::string > &filenames)
 Add multiple files to the chain.
 
int add_pattern (std::string_view pattern)
 Add files matching a glob pattern.
 
void clear ()
 Remove all files from the chain.
 
std::size_t size () const noexcept
 Number of files in the chain.
 
bool empty () const noexcept
 Check if chain is empty.
 
int get_nb_files () const noexcept
 Legacy alias for size()
 
const FileInfooperator[] (std::size_t index) const
 Get file info by index.
 
const FileInfofile_info (int index) const
 Legacy alias for operator[].
 
std::vector< FileInfo > & files () noexcept
 Get all file infos.
 
void set_tags (const std::vector< long > &tags)
 Set event tags for filtering.
 
void set_threads (int n)
 Set number of processing threads.
 
void set_progress (bool show)
 Enable/disable progress display.
 
void set_verbose (bool verbose)
 Enable/disable verbose output.
 
void open (bool validate_all=true)
 Validate and optionally load metadata for all files.
 
void scan ()
 Scan and display detailed information about all files.
 
void list () const
 Print list of files in chain.
 
banklist getBanks (const std::vector< std::string > &names)
 Create a banklist from bank names using the first file's dictionary.
 
banklist get_banks (const std::vector< std::string > &names)
 Alias for getBanks (snake_case)
 
iterator begin ()
 Get iterator to first event.
 
iterator end ()
 Get end iterator.
 
template<typename ProcessFunc >
void process (ProcessFunc &&process_func, double percentage=100.0)
 Process events in parallel across all files (record-level parallelism)
 
template<typename ProcessFunc >
void process (ProcessFunc &&process_func, long num_events)
 Process a specific number of events in parallel across all files.
 
template<typename ProcessFunc >
void process (const banklist &banks, ProcessFunc &&process_func, double percentage=100.0)
 Process events in parallel using a banklist (record-level parallelism)
 
template<typename ProcessFunc >
void process_filtered (ProcessFunc &&process_func, const std::vector< std::string > &required_banks, double percentage=100.0)
 Process events with bank filtering (record-level parallelism)
 
template<typename ProcessFunc >
void process_filtered (ProcessFunc &&process_func, const std::vector< std::string > &required_banks, long num_events)
 Process filtered events with an absolute event count.
 
template<typename FileFunc >
void for_each_file (FileFunc &&func)
 Apply a function to each file (for file-level operations)
 
const ChainStatisticsstatistics () const noexcept
 Get processing statistics.
 
long total_events ()
 Get total event count (loads metadata if needed)
 
long total_events_count () const
 Legacy alias.
 
void print_statistics () const
 Print processing statistics.
 
void show_all_info ()
 Show detailed info for all files.
 
bool any_has_config (std::string_view name)
 Check if any file has a configuration key.
 
std::optional< std::string > get_config (std::string_view name)
 Get configuration from first file that has it.
 
ThreadPoolthreadpool () noexcept
 Get the thread pool for advanced use.
 

Friends

class ChainIterator
 

Detailed Description

Chain multiple HIPO files for unified processing.

The chain class provides:

  • File management (add, remove, pattern matching)
  • Sequential iteration via range-based for loops
  • Parallel processing with configurable thread pools
  • Lazy metadata loading for minimal memory overhead
  • Progress tracking and statistics
hipo::chain ch(4); // 4 threads
ch.add_pattern("data/*.hipo");
// Sequential iteration
for (auto& [event, file_idx, event_idx] : ch) {
auto particles = event.getBank("REC::Particle");
}
// Parallel processing
ch.process([](auto& event, int file_idx, long event_idx) {
// Thread-safe processing...
});
Chain multiple HIPO files for unified processing.
Definition chain.h:274
Definition event.h:62

Member Typedef Documentation

◆ iterator

◆ const_iterator

Constructor & Destructor Documentation

◆ chain()

hipo::chain::chain ( int  threads = 0,
bool  progress = true,
bool  verbose = false 
)
inlineexplicit

Construct a chain with specified thread count.

Parameters
threadsNumber of threads (0 = auto-detect hardware concurrency)
progressShow progress bar during processing
verboseEnable verbose output

Member Function Documentation

◆ add() [1/2]

int hipo::chain::add ( std::string_view  filename)
inline

Add a single file to the chain.

Parameters
filenamePath to the HIPO file
Returns
Number of files in chain after addition

◆ add() [2/2]

int hipo::chain::add ( std::vector< std::string > &  filenames)
inline

Add multiple files to the chain.

Parameters
filenamesVector of file paths
Returns
Number of files in chain after addition

◆ add_pattern()

int hipo::chain::add_pattern ( std::string_view  pattern)
inline

Add files matching a glob pattern.

Parameters
patternGlob pattern (e.g., "data/*.hipo", "run_???.hipo")
Returns
Number of files matched and added

◆ clear()

void hipo::chain::clear ( )
inline

Remove all files from the chain.

◆ size()

std::size_t hipo::chain::size ( ) const
inlinenoexcept

Number of files in the chain.

◆ empty()

bool hipo::chain::empty ( ) const
inlinenoexcept

Check if chain is empty.

◆ get_nb_files()

int hipo::chain::get_nb_files ( ) const
inlinenoexcept

Legacy alias for size()

◆ operator[]()

const FileInfo & hipo::chain::operator[] ( std::size_t  index) const
inline

Get file info by index.

◆ file_info()

const FileInfo & hipo::chain::file_info ( int  index) const
inline

Legacy alias for operator[].

◆ files()

std::vector< FileInfo > & hipo::chain::files ( )
inlinenoexcept

Get all file infos.

◆ set_tags()

void hipo::chain::set_tags ( const std::vector< long > &  tags)
inline

Set event tags for filtering.

◆ set_threads()

void hipo::chain::set_threads ( int  n)
inline

Set number of processing threads.

◆ set_progress()

void hipo::chain::set_progress ( bool  show)
inline

Enable/disable progress display.

◆ set_verbose()

void hipo::chain::set_verbose ( bool  verbose)
inline

Enable/disable verbose output.

◆ open()

void hipo::chain::open ( bool  validate_all = true)
inline

Validate and optionally load metadata for all files.

Parameters
validate_allIf true, open each file to validate; if false, only check existence
Exceptions
std::runtime_errorif no valid files in chain

◆ scan()

void hipo::chain::scan ( )
inline

Scan and display detailed information about all files.

◆ list()

void hipo::chain::list ( ) const
inline

Print list of files in chain.

◆ getBanks()

banklist hipo::chain::getBanks ( const std::vector< std::string > &  names)
inline

Create a banklist from bank names using the first file's dictionary.

Creates bank objects with the correct schemas, ready to be filled by chain_event::readBanks(). The chain must have at least one file added.

◆ get_banks()

banklist hipo::chain::get_banks ( const std::vector< std::string > &  names)
inline

Alias for getBanks (snake_case)

◆ begin()

iterator hipo::chain::begin ( )
inline

Get iterator to first event.

Enables range-based for loops:

for (auto& [event, file_idx, event_idx] : chain) {
// Process event...
}

◆ end()

iterator hipo::chain::end ( )
inline

Get end iterator.

◆ process() [1/3]

template<typename ProcessFunc >
void hipo::chain::process ( ProcessFunc &&  process_func,
double  percentage = 100.0 
)
inline

Process events in parallel across all files (record-level parallelism)

Uses record-level parallelism for efficient I/O: each thread grabs entire records and processes all events within them sequentially. This minimizes random access and maximizes cache efficiency.

Parameters
process_funcCallable with signature: void(chain_event&, int file_idx, long event_idx)
percentagePercentage of events to process (0-100)
chain.process([](auto& event, int file_idx, long event_idx) {
auto particles = event.getBank("REC::Particle");
// Thread-safe processing...
});
void process(ProcessFunc &&process_func, double percentage=100.0)
Process events in parallel across all files (record-level parallelism)
Definition chain.h:659

◆ process() [2/3]

template<typename ProcessFunc >
void hipo::chain::process ( ProcessFunc &&  process_func,
long  num_events 
)
inline

Process a specific number of events in parallel across all files.

Same as process(func, percentage) but accepts an absolute event count.

Parameters
process_funcCallable with signature: void(chain_event&, int file_idx, long event_idx)
num_eventsNumber of events to process
chain.process([](auto& event, int file_idx, long event_idx) {
auto particles = event.getBank("REC::Particle");
}, 10000L);

◆ process() [3/3]

template<typename ProcessFunc >
void hipo::chain::process ( const banklist banks,
ProcessFunc &&  process_func,
double  percentage = 100.0 
)
inline

Process events in parallel using a banklist (record-level parallelism)

Each thread gets its own copy of the banklist for thread safety. For each event, the event data is read into the thread-local banklist before calling the user function.

Parameters
banksTemplate banklist (created via chain::getBanks); each thread copies it
process_funcCallable with signature: void(banklist&, int file_idx, long event_idx)
percentagePercentage of events to process (0-100)
hipo::banklist banks = chain.getBanks({"REC::Particle", "REC::Event"});
auto b_particle = hipo::getBanklistIndex(banks, "REC::Particle");
chain.process(banks, [b_particle](auto& banks, int file_idx, long event_idx) {
for (auto const& row : banks[b_particle].getRowList()) {
// process...
}
});
banklist getBanks(const std::vector< std::string > &names)
Create a banklist from bank names using the first file's dictionary.
Definition chain.h:606
banklist::size_type getBanklistIndex(banklist &banks, std::string const &bankName) noexcept(false)
Definition bank.cpp:585
std::vector< bank > banklist
Definition bank.h:678

◆ process_filtered() [1/2]

template<typename ProcessFunc >
void hipo::chain::process_filtered ( ProcessFunc &&  process_func,
const std::vector< std::string > &  required_banks,
double  percentage = 100.0 
)
inline

Process events with bank filtering (record-level parallelism)

Only processes events containing all specified banks. Uses record-level parallelism for efficient I/O.

Parameters
process_funcCallable with signature: void(chain_event&, int file_idx, long event_idx)
required_banksList of bank names that must be present
percentagePercentage of matching events to process

◆ process_filtered() [2/2]

template<typename ProcessFunc >
void hipo::chain::process_filtered ( ProcessFunc &&  process_func,
const std::vector< std::string > &  required_banks,
long  num_events 
)
inline

Process filtered events with an absolute event count.

Same as process_filtered(func, banks, percentage) but accepts an absolute count of matching events to process.

Parameters
process_funcCallable with signature: void(chain_event&, int file_idx, long event_idx)
required_banksList of bank names that must be present
num_eventsNumber of matching events to process

◆ for_each_file()

template<typename FileFunc >
void hipo::chain::for_each_file ( FileFunc &&  func)
inline

Apply a function to each file (for file-level operations)

Parameters
funcCallable with signature: void(reader&, const FileInfo&)

◆ statistics()

const ChainStatistics & hipo::chain::statistics ( ) const
inlinenoexcept

Get processing statistics.

◆ total_events()

long hipo::chain::total_events ( )
inline

Get total event count (loads metadata if needed)

◆ total_events_count()

long hipo::chain::total_events_count ( ) const
inline

Legacy alias.

◆ print_statistics()

void hipo::chain::print_statistics ( ) const
inline

Print processing statistics.

◆ show_all_info()

void hipo::chain::show_all_info ( )
inline

Show detailed info for all files.

◆ any_has_config()

bool hipo::chain::any_has_config ( std::string_view  name)
inline

Check if any file has a configuration key.

◆ get_config()

std::optional< std::string > hipo::chain::get_config ( std::string_view  name)
inline

Get configuration from first file that has it.

◆ threadpool()

ThreadPool & hipo::chain::threadpool ( )
inlinenoexcept

Get the thread pool for advanced use.

Friends And Related Symbol Documentation

◆ ChainIterator

friend class ChainIterator
friend

The documentation for this class was generated from the following file: