HIPO  4.3.0
High Performance Output data format for experimental physics
reader.h
Go to the documentation of this file.
1 //******************************************************************************
2 //* ██╗ ██╗██╗██████╗ ██████╗ ██╗ ██╗ ██████╗ *
3 //* ██║ ██║██║██╔══██╗██╔═══██╗ ██║ ██║ ██╔═████╗ *
4 //* ███████║██║██████╔╝██║ ██║ ███████║ ██║██╔██║ *
5 //* ██╔══██║██║██╔═══╝ ██║ ██║ ╚════██║ ████╔╝██║ *
6 //* ██║ ██║██║██║ ╚██████╔╝ ██║██╗╚██████╔╝ *
7 //* ╚═╝ ╚═╝╚═╝╚═╝ ╚═════╝ ╚═╝╚═╝ ╚═════╝ *
8 //************************ Jefferson National Lab (2017) ***********************
9 /*
10  * Copyright (c) 2017. Jefferson Lab (JLab). All rights reserved. Permission
11  * to use, copy, modify, and distribute this software and its documentation
12  * for educational, research, and not-for-profit purposes, without fee and
13  * without a signed licensing agreement.
14  *
15  * IN NO EVENT SHALL JLAB BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL
16  * INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING
17  * OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF JLAB HAS
18  * BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19  *
20  * JLAB SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE. THE HIPO DATA FORMAT SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF
23  * ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". JLAB HAS NO OBLIGATION TO
24  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25  *
26  * This software was developed under the United States Government license.
27  * For more information contact author at gavalian@jlab.org
28  * Department of Experimental Nuclear Physics, Jefferson Lab.
29  */
92 
93 /*
94  * File: reader.h
95  * Author: gavalian
96  *
97  * Created on April 11, 2017, 2:07 PM
98  */
99 
100 #ifndef HIPOREADER_H
101 #define HIPOREADER_H
102 
103 
104 
105 #include <iostream>
106 #include <utility>
107 #include <vector>
108 #include <fstream>
109 #include <cstdio>
110 #include <cstdlib>
111 #include <memory>
112 #include <thread>
113 #include <climits>
114 #include <mutex>
115 #include "record.h"
116 #include "utils.h"
117 #include "bank.h"
118 
119 namespace hipo {
120 
127  struct fileHeader_t {
128  int uniqueid{};
129  int filenumber{};
130  int headerLength{};
131  int recordCount{};
133  int bitInfo{};
134  int version{};
136  int magicNumber{};
137  long userRegister{};
140  };
141 
142 
146  struct recordInfo_t {
147  long recordPosition{};
148  int recordLength{};
150  long userWordOne{};
151  long userWordTwo{};
152  };
153 
154 
163  class readerIndex {
164 
165  private:
166 
167  std::vector<int> recordEvents;
168  std::vector<long> recordPosition;
169 
170  int currentRecord{};
171  int currentEvent{};
172  int currentRecordEvent{};
173 
174  public:
175 
176  readerIndex()= default;;
177  ~readerIndex()= default;;
178 
180  bool canAdvance();
182  bool advance();
183 
185  bool canAdvanceInRecord();
188  bool loadRecord(int irec);
191  bool gotoEvent(int eventNumber);
194  bool gotoRecord(int irec);
195 
197  int getEventNumber() { return currentEvent;}
199  int getRecordNumber() { return currentRecord;}
201  int getRecordEventNumber() { return currentRecordEvent;}
203  int getMaxEvents();
206  void addSize(int size);
209  void addPosition(long position){ recordPosition.push_back(position);}
211  long getPosition(int index) { return recordPosition[index];}
212 
214  int getNRecords() const {return recordEvents.size();}
216  void show();
218  void rewind(){
219  currentRecord = -1; currentEvent = -1; currentRecordEvent = -1;
220  }
222  void clear(){
223  recordEvents.clear(); recordPosition.clear();
224  }
226  void reset(){
227  currentRecord = 0; currentEvent = 0; currentRecordEvent = 0;
228  }
229  };
230 
250  class reader {
251 
252  private:
253 
254  fileHeader_t header{};
255  hipo::utils hipoutils;
256  std::ifstream inputStream;
257  long inputStreamSize{};
258 
259  hipo::record inputRecord;
260  hipo::readerIndex readerEventIndex;
261  std::vector<long> tagsToRead;
262 
263  short _verbose = {0} ;
264 
265  std::map<std::string,std::string> userConfig;
266 
267 
269  hipo::dictionary factory;
270 
271  void readHeader();
272  void readIndex();
273 
274 
275  public:
276 
278  reader();
281  reader(const char *file){ open(file);}
282 
288  reader(const char *file, std::vector<int> tags){
289  //for(int t = 0; t < tags.size(); t++) setTags(tags[t]);
290  for(auto tag : tags) setTags(tag);
291  open(file);
292  }
293 
294  reader(const reader &r){}
295 
296  ~reader();
297 
299  void about();
301  void rewind(){ readerEventIndex.rewind();}
304  void readDictionary(hipo::dictionary &dict);
311  void getStructure(hipo::structure &structure,int group, int item);
318  void getStructureNoCopy(hipo::structure &structure,int group, int item);
319 
322  void readUserConfig(std::map<std::string,std::string> &mapConfig);
323 
326  void open(const char *filename);
328  bool is_open(){ return inputStream.is_open();}
331  void setTags(int tag){ tagsToRead.push_back(tag);}
334  void setTags(std::vector<long> tags){ tagsToRead=std::move(tags);}
337  void setVerbose(short level=1){_verbose=level;}
338 
340  bool hasNext();
343  bool next();
346  bool gotoEvent(int eventNumber);
349  bool gotoRecord(int irec);
352  bool next(hipo::event &dataevent);
353 
356  bool next(std::vector<hipo::bank> &list);
360  std::vector<hipo::bank> getBanks(std::vector<std::string> names);
361 
364  void read(hipo::event &dataevent);
366  void printWarning();
368  int getNRecords() const {return readerEventIndex.getNRecords()-1;}
370  bool nextInRecord();
373  bool loadRecord(int irec);
377  bool loadRecord(hipo::record &record, int irec);
379  int getEntries(){return readerEventIndex.getMaxEvents();}
386  std::vector<int> getInt( const char *bank, const char *column, int max = -1);
393  std::vector<float> getFloat(const char *bank, const char *column, int max = -1);
394  };
395 
404  private:
405  hipo::reader hr;
406  //hipo::writer hw;
407  hipo::dictionary factory;
408  std::mutex obj;
409  long nProcessed = 0;
410  long nDataLimit = -1;
411  public:
412 
413  //static int eof_printout;
414 
415  readerstream(){ /*datastream::eof_printout = 0;*/}
416 
417  virtual ~readerstream(){}
418 
421  void open(const char *input){
422  hr.open(input);
423  hr.readDictionary(factory);
424  //hw.addDictionary(factory);
425  //hw.open(output);
426  }
427 
430  void setLimit(long limit){
431  nDataLimit = limit;
432  }
433 
437  void run(std::function<int(int)> &&function, int nthreads){
438  std::vector<std::thread*> threads;
439  for(int i = 0; i < nthreads; i++){
440  threads.push_back(new std::thread(function,i));
441  }
442  printf("-- created denoiser with %lu threads\n", threads.size());
443  for(int k = 0; k < (int) threads.size(); k++) threads[k]->join();
444  for(int k = 0; k < (int) threads.size(); k++) delete threads[k];
445  }
446 
448  hipo::reader &reader(){return hr;}
450  hipo::dictionary &dictionary(){ return factory;}
451 
455  void pull(hipo::record &record, int index){
456  std::unique_lock<std::mutex> lock(obj);
457  hr.loadRecord(record,index);
458  }
459 
462  void pull(std::vector<hipo::event> &events){
463 
464  std::unique_lock<std::mutex> lock(obj);
465  bool finished = false;
466  if(nDataLimit>0){ if(nProcessed>nDataLimit) finished = true;}
467 
468  if(hr.hasNext()==false){ printf("\n");}
469 
470  for(int n = 0; n < (int) events.size(); n++){
471  // write the event in the output if it's not empty
472  //if(events[n].getSize()>16){ hw.addEvent(events[n]);}
473  // reset event and read next in the file if any left
474  events[n].reset();
475  if(hr.next()==true&&finished==false){
476  hr.read(events[n]); nProcessed++;
477  if(nProcessed%250==0) { printf("."); fflush(stdout);}
478  if(nProcessed%10000==0) printf(" : %9lu \n",nProcessed);
479  }
480  }
481  }
482  };
483 
484 }
486 #endif /* HIPOREADER_H */
Core HIPO data structures: structure, composite, and bank classes for tabular data access.
Represents a HIPO bank, a tabular data structure with rows and typed columns.
Definition: bank.h:352
Collection of schema definitions, typically read from a HIPO file header.
Definition: dictionary.h:248
Represents a HIPO event, a container for multiple structures/banks.
Definition: event.h:77
readerIndex()=default
~readerIndex()=default
int getMaxEvents()
Definition: reader.cpp:536
void addPosition(long position)
Register a record position.
Definition: reader.h:209
bool gotoRecord(int irec)
Jump to a specific record.
Definition: reader.cpp:545
bool loadRecord(int irec)
Load a specific record by index.
Definition: reader.cpp:560
void clear()
Remove all record entries from the index.
Definition: reader.h:222
void show()
Print index information to stdout.
Definition: reader.cpp:527
void reset()
Reset counters to the first event of the first record.
Definition: reader.h:226
bool canAdvanceInRecord()
Check if more events remain in the current record.
Definition: reader.cpp:581
void rewind()
Reset the index to the beginning (before the first event).
Definition: reader.h:218
int getRecordEventNumber()
Definition: reader.h:201
bool canAdvance()
Check if there are more events to read.
Definition: reader.cpp:475
int getEventNumber()
Definition: reader.h:197
bool advance()
Advance to the next event, loading new records as needed.
Definition: reader.cpp:484
void addSize(int size)
Register a record with the given number of events.
Definition: reader.cpp:462
int getRecordNumber()
Definition: reader.h:199
bool gotoEvent(int eventNumber)
Jump to a specific event number.
Definition: reader.cpp:504
int getNRecords() const
Definition: reader.h:214
long getPosition(int index)
Definition: reader.h:211
Sequential reader for HIPO files.
Definition: reader.h:250
void read(hipo::event &dataevent)
Read the current event into the given event object.
Definition: reader.cpp:233
std::vector< hipo::bank > getBanks(std::vector< std::string > names)
Create bank objects for the given bank names.
Definition: reader.cpp:273
void readDictionary(hipo::dictionary &dict)
Read the schema dictionary from the file header.
Definition: reader.cpp:311
void getStructureNoCopy(hipo::structure &structure, int group, int item)
Extract a structure without copying (zero-copy).
Definition: reader.cpp:254
void setTags(std::vector< long > tags)
Set the event tag filter list.
Definition: reader.h:334
bool nextInRecord()
Advance to the next event within the current record.
Definition: reader.cpp:433
reader(const char *file)
Construct and open a HIPO file.
Definition: reader.h:281
void printWarning()
Print a warning message (e.g., end of file).
Definition: reader.cpp:442
reader()
Default constructor.
Definition: reader.cpp:58
void rewind()
Rewind to the beginning of the file.
Definition: reader.h:301
reader(const char *file, std::vector< int > tags)
Construct and open a HIPO file with event tag filtering.
Definition: reader.h:288
void setVerbose(short level=1)
Set the verbosity level.
Definition: reader.h:337
bool is_open()
Definition: reader.h:328
void setTags(int tag)
Add an event tag to the read filter.
Definition: reader.h:331
bool loadRecord(int irec)
Load a specific record by index into the internal buffer.
Definition: reader.cpp:390
void getStructure(hipo::structure &structure, int group, int item)
Extract a structure from the current event by group and item.
Definition: reader.cpp:241
reader(const reader &r)
Definition: reader.h:294
int getEntries()
Definition: reader.h:379
bool next()
Advance to the next event.
Definition: reader.cpp:334
int getNRecords() const
Definition: reader.h:368
std::vector< int > getInt(const char *bank, const char *column, int max=-1)
Read all integer values from a bank column across events.
Definition: reader.cpp:402
void open(const char *filename)
Open a HIPO file for reading.
Definition: reader.cpp:81
bool hasNext()
Check if more events are available without advancing.
Definition: reader.cpp:207
void about()
Print file information to stdout.
Definition: reader.cpp:72
bool gotoRecord(int irec)
Jump to a specific record.
Definition: reader.cpp:379
bool gotoEvent(int eventNumber)
Jump to a specific event number.
Definition: reader.cpp:353
std::vector< float > getFloat(const char *bank, const char *column, int max=-1)
Read all float values from a bank column across events.
Definition: reader.cpp:417
void readUserConfig(std::map< std::string, std::string > &mapConfig)
Read user configuration key-value pairs from the file header.
Definition: reader.cpp:283
Thread-safe reader for parallel event processing.
Definition: reader.h:403
void pull(hipo::record &record, int index)
Thread-safe loading of a record by index.
Definition: reader.h:455
void run(std::function< int(int)> &&function, int nthreads)
Launch worker threads that execute the given function.
Definition: reader.h:437
void pull(std::vector< hipo::event > &events)
Thread-safe batch pull: resets each event in the vector, reads next events, and refills.
Definition: reader.h:462
void open(const char *input)
Open a HIPO file for multi-threaded reading.
Definition: reader.h:421
virtual ~readerstream()
Definition: reader.h:417
hipo::dictionary & dictionary()
Definition: reader.h:450
hipo::reader & reader()
Definition: reader.h:448
void setLimit(long limit)
Set a limit on the number of events to process.
Definition: reader.h:430
Represents a single HIPO record containing multiple events.
Definition: record.h:196
Low-level data structure representing a HIPO structure.
Definition: bank.h:64
Utility functions for string manipulation, serialization, and HIPO file generation.
Definition: utils.h:36
Definition: bank.cpp:47
HIPO record reading and event extraction.
HIPO file header structure (56 bytes / 14 words).
Definition: reader.h:127
int userHeaderLength
User header length in bytes.
Definition: reader.h:135
long userRegister
User-defined 64-bit register.
Definition: reader.h:137
int filenumber
Split file number.
Definition: reader.h:129
int recordCount
Number of records in the file.
Definition: reader.h:131
int magicNumber
Magic number for endianness detection (0xc0da0100)
Definition: reader.h:136
int bitInfo
Bit-packed info (version, flags, padding)
Definition: reader.h:133
int indexArrayLength
Length of the index array in bytes.
Definition: reader.h:132
int uniqueid
File format identifier (HIPO: 0x43455248)
Definition: reader.h:128
long firstRecordPosition
File offset to the first record.
Definition: reader.h:139
long trailerPosition
File offset to trailer header (0 = none)
Definition: reader.h:138
int headerLength
Header length in 32-bit words (usually 14)
Definition: reader.h:130
int version
File format version.
Definition: reader.h:134
Metadata for a single record in a HIPO file.
Definition: reader.h:146
long userWordOne
First user-defined word.
Definition: reader.h:150
long userWordTwo
Second user-defined word.
Definition: reader.h:151
int recordLength
Total record length in bytes.
Definition: reader.h:148
long recordPosition
Byte position of the record in the file.
Definition: reader.h:147
int recordEntries
Number of events in the record.
Definition: reader.h:149
Utility functions and benchmark timer for HIPO library operations.