Skip to content

Python Quick Start

Reading a HIPO File

High-Level API

The simplest way to read HIPO files:

import hipopy
import numpy as np

with hipopy.open("data.hipo", banks=["REC::Particle"]) as f:
    for banks in f:
        bank = banks["REC::Particle"]
        pid = bank["pid"]       # NumPy int32 array
        px  = bank["px"]        # NumPy float32 array
        py  = bank["py"]
        pt  = np.sqrt(px**2 + py**2)
        print(f"{len(pid)} particles, max pT = {pt.max():.3f}")

Low-Level API

Mirrors the C++ API directly:

import hipopy

reader = hipopy.Reader("data.hipo")
dict_  = hipopy.Dictionary()
reader.read_dictionary(dict_)

bank  = hipopy.Bank(dict_["REC::Particle"])
event = hipopy.Event()

while reader.next():
    reader.read(event)
    event.read(bank)
    for row in range(bank.get_rows()):
        pid = bank.get_int("pid", row)
        px  = bank.get_float("px", row)
        py  = bank.get_float("py", row)
        print(f"  pid={pid}, px={px:.3f}, py={py:.3f}")

Writing a HIPO File

import hipopy

# 1. Define schemas
schema = hipopy.Schema("event::particle", 100, 1)
schema.parse("pid/S,px/F,py/F,pz/F")

# 2. Create dictionary and writer
d = hipopy.Dictionary()
d.add_schema(schema)

writer = hipopy.Writer()
writer.add_dictionary(d)    # must be before open()
writer.open("output.hipo")

# 3. Write events
for i in range(1000):
    event = hipopy.Event()
    bank = hipopy.Bank(schema, 5)
    for row in range(5):
        bank.put_short("pid", row, 211)
        bank.put_float("px", row, 0.5 * row)
        bank.put_float("py", row, 0.3 * row)
        bank.put_float("pz", row, 1.0 + 0.1 * row)
    event.add_structure(bank)
    writer.add_event(event)

writer.close()

Important

Schemas must be added to the writer's dictionary before calling open(). They are written into the file header.

NumPy Array Access

Bank columns are accessible as NumPy arrays:

# Explicit method -- copy by default (safe)
arr = bank.get_array("px")

# Zero-copy view (unsafe across event boundaries)
arr = bank.get_array("px", copy=False)

# Shorthand via __getitem__ (always copies)
arr = bank["px"]

Warning

copy=False returns a view into the bank's internal buffer. This buffer is overwritten when event.read(bank) is called for the next event. Only use copy=False when you will consume the data before advancing.

Batch Reading

Flat NumPy Arrays

Read all events into flat concatenated arrays (no per-event structure):

import hipopy
import numpy as np

data = hipopy.read_columns("data.hipo", "REC::Particle", ["pid", "px", "py"])
pt = np.sqrt(data["px"]**2 + data["py"]**2)
electrons = np.abs(data["pid"]) == 11
print(f"Total particles: {len(data['pid'])}, electrons: {electrons.sum()}")

Ragged Awkward Arrays

Read with per-event structure preserved:

import hipopy
import awkward as ak

particles = hipopy.read_bank("data.hipo", "REC::Particle", ["pid", "px", "py"])
# particles.px[0] = first event's px values
# particles.px[1] = second event's px values, etc.

pt = np.sqrt(particles.px**2 + particles.py**2)
max_pt_per_event = ak.max(pt, axis=1)

Next Steps