Python Quick Start¶
Reading a HIPO File¶
High-Level API¶
The simplest way to read HIPO files:
import hipopy
import numpy as np
with hipopy.open("data.hipo", banks=["REC::Particle"]) as f:
for banks in f:
bank = banks["REC::Particle"]
pid = bank["pid"] # NumPy int32 array
px = bank["px"] # NumPy float32 array
py = bank["py"]
pt = np.sqrt(px**2 + py**2)
print(f"{len(pid)} particles, max pT = {pt.max():.3f}")
Low-Level API¶
Mirrors the C++ API directly:
import hipopy
reader = hipopy.Reader("data.hipo")
dict_ = hipopy.Dictionary()
reader.read_dictionary(dict_)
bank = hipopy.Bank(dict_["REC::Particle"])
event = hipopy.Event()
while reader.next():
reader.read(event)
event.read(bank)
for row in range(bank.get_rows()):
pid = bank.get_int("pid", row)
px = bank.get_float("px", row)
py = bank.get_float("py", row)
print(f" pid={pid}, px={px:.3f}, py={py:.3f}")
Writing a HIPO File¶
import hipopy
# 1. Define schemas
schema = hipopy.Schema("event::particle", 100, 1)
schema.parse("pid/S,px/F,py/F,pz/F")
# 2. Create dictionary and writer
d = hipopy.Dictionary()
d.add_schema(schema)
writer = hipopy.Writer()
writer.add_dictionary(d) # must be before open()
writer.open("output.hipo")
# 3. Write events
for i in range(1000):
event = hipopy.Event()
bank = hipopy.Bank(schema, 5)
for row in range(5):
bank.put_short("pid", row, 211)
bank.put_float("px", row, 0.5 * row)
bank.put_float("py", row, 0.3 * row)
bank.put_float("pz", row, 1.0 + 0.1 * row)
event.add_structure(bank)
writer.add_event(event)
writer.close()
Important
Schemas must be added to the writer's dictionary before calling open(). They are written into the file header.
NumPy Array Access¶
Bank columns are accessible as NumPy arrays:
# Explicit method -- copy by default (safe)
arr = bank.get_array("px")
# Zero-copy view (unsafe across event boundaries)
arr = bank.get_array("px", copy=False)
# Shorthand via __getitem__ (always copies)
arr = bank["px"]
Warning
copy=False returns a view into the bank's internal buffer. This buffer is overwritten when event.read(bank) is called for the next event. Only use copy=False when you will consume the data before advancing.
Batch Reading¶
Flat NumPy Arrays¶
Read all events into flat concatenated arrays (no per-event structure):
import hipopy
import numpy as np
data = hipopy.read_columns("data.hipo", "REC::Particle", ["pid", "px", "py"])
pt = np.sqrt(data["px"]**2 + data["py"]**2)
electrons = np.abs(data["pid"]) == 11
print(f"Total particles: {len(data['pid'])}, electrons: {electrons.sum()}")
Ragged Awkward Arrays¶
Read with per-event structure preserved:
import hipopy
import awkward as ak
particles = hipopy.read_bank("data.hipo", "REC::Particle", ["pid", "px", "py"])
# particles.px[0] = first event's px values
# particles.px[1] = second event's px values, etc.
pt = np.sqrt(particles.px**2 + particles.py**2)
max_pt_per_event = ak.max(pt, axis=1)
Next Steps¶
- API Reference -- complete class and method documentation
- Examples -- annotated example scripts