Tutorial: register multiple tables and join
Goal: Register multiple tables in one Session and join them with SQL.
Prereqs: Finished Create, append, query (or equivalent).
What you’ll learn:
- How Session can hold multiple registered tables at once
- How to structure joins on (time, entity) columns
Session supports registering multiple tables into one SQL session and running joins.
This tutorial creates two time-series tables, registers them as prices and volumes,
and joins them on (ts, symbol).
from __future__ import annotations
from pathlib import Path
import pyarrow as pa
import pyarrow.parquet as pq
import timeseries_table_format as ttf
def _write_prices_parquet(path: Path) -> None:
tbl = pa.table(
{
"ts": pa.array([0, 3_600 * 1_000_000], type=pa.timestamp("us")),
"symbol": pa.array(["NVDA", "NVDA"], type=pa.string()),
"close": pa.array([1.0, 2.0], type=pa.float64()),
}
)
pq.write_table(tbl, str(path))
def _write_volumes_parquet(path: Path) -> None:
tbl = pa.table(
{
"ts": pa.array([0, 3_600 * 1_000_000], type=pa.timestamp("us")),
"symbol": pa.array(["NVDA", "NVDA"], type=pa.string()),
"volume": pa.array([10, 20], type=pa.int64()),
}
)
pq.write_table(tbl, str(path))
def run(*, base_dir: Path) -> pa.Table:
prices_root = base_dir / "prices_tbl"
prices = ttf.TimeSeriesTable.create(
table_root=str(prices_root),
time_column="ts",
bucket="1h",
entity_columns=["symbol"],
timezone=None,
)
prices_seg = base_dir / "prices.parquet"
_write_prices_parquet(prices_seg)
prices.append_parquet(str(prices_seg))
volumes_root = base_dir / "volumes_tbl"
volumes = ttf.TimeSeriesTable.create(
table_root=str(volumes_root),
time_column="ts",
bucket="1h",
entity_columns=["symbol"],
timezone=None,
)
volumes_seg = base_dir / "volumes.parquet"
_write_volumes_parquet(volumes_seg)
volumes.append_parquet(str(volumes_seg))
sess = ttf.Session()
sess.register_tstable("prices", str(prices_root))
sess.register_tstable("volumes", str(volumes_root))
return sess.sql(
"""
select p.ts as ts, p.symbol as symbol, p.close as close, v.volume as volume
from prices p
join volumes v
on p.ts = v.ts and p.symbol = v.symbol
order by p.ts
"""
)
def main() -> None:
out = run(base_dir=Path("./my_tables"))
print(out)
if __name__ == "__main__":
main()
Tips
- Keep your SQL table names stable (e.g.
prices,volumes,symbols) and register them at startup. - You can mix time-series tables (
register_tstable) and plain Parquet datasets (register_parquet) in the sameSession.
Next: - Tutorial: Parameterized queries - Reference: Session