Meta gRPC API
Catalog management, ingestion, merge tasks, and cluster operations
The meta service exposes a gRPC API on port 9560 (configurable via META_BIND). It manages all cluster metadata: databases, tables, views, ingestion streams, merge tasks, and segment lifecycle. gRPC reflection is enabled for tooling support.
Services Overview
| Service | Description |
|---|---|
| DatabaseService | Manages top-level databases (the unit that owns tables and views). |
| TableService | Manages tables within a database, including sharding fields, retention, and merge-span configuration. |
| ViewService | Manages named, database-scoped unions over tables. A view name lowers to the same plan shape as an explicit union T1, T2, ... in KQL. |
| ClusterService | Cluster membership registry — meta tracks live cluster members for leader-election and routing. |
| IngestionService | Authenticates ingest tokens and manages collector streams. |
| IngestTokenService | Manages ingest tokens for authentication and signal routing. |
| NurseryService | Manages ingest stream lifecycle, merged segment registration, and offset tracking. |
| MergeTaskService | Coordinates segment merge tasks between the janitor and merge workers. |
| SegmentDeletionService | Manages cleanup of tombstoned segments from storage. |
| SegmentLookupService | Discovers segments matching a time range and table filter. |
| JanitorService | Provides cluster statistics and merge task management for maintenance operations. |
DatabaseService
Manages top-level databases (the unit that owns tables and views).
RPCs
| RPC | Description |
|---|---|
CreateDatabase | Create a new database |
ListDatabases | List all databases |
DeleteDatabase | Tombstone a database. Without cascade, rejects when any live table / view / ingest token still lives in the database. With cascade, tombstones every catalog row in the database in one transaction. |
ResurrectDatabase | Lift the tombstone on a database and every currently-tombstoned child within the grace window. |
PreviewDeleteDatabase | Live children a cascade DeleteDatabase would tombstone. Read-only and advisory (children may change before the delete). |
Proto Definition
service DatabaseService {
rpc CreateDatabase(CreateDatabaseRequest) returns (CreateDatabaseResponse);
rpc ListDatabases(ListDatabasesRequest) returns (ListDatabasesResponse);
// Without cascade, rejects if any live child (table/view/token) remains; tombstoned children do not block.
rpc DeleteDatabase(DeleteDatabaseRequest) returns (DeleteDatabaseResponse);
rpc ResurrectDatabase(ResurrectDatabaseRequest) returns (ResurrectDatabaseResponse);
// Live children a cascade DeleteDatabase would tombstone. Read-only and advisory (children may change before the delete).
rpc PreviewDeleteDatabase(PreviewDeleteDatabaseRequest) returns (PreviewDeleteDatabaseResponse);
}
message Database {
string id = 1;
string name = 2;
// Absent for live rows.
Tombstone tombstone = 3;
}
message CreateDatabaseRequest {
string name = 1;
}
message CreateDatabaseResponse {
Database database = 1;
}
message ListDatabasesRequest {
bool include_deleted = 1;
}
message ListDatabasesResponse {
repeated Database items = 1;
}
message DeleteDatabaseRequest {
string database_id = 1;
bool cascade = 2;
// Grace window zero; bytes still bound by the segment tombstone_min_age floor (~12 min).
bool immediate = 3;
}
message DeleteDatabaseResponse {
Database database = 1;
}
message ResurrectDatabaseRequest {
string database_id = 1;
}
message ResurrectDatabaseResponse {
Database database = 1;
}
message PreviewDeleteDatabaseRequest {
string database_id = 1;
}
// One live child (id + display name) a cascade delete would tombstone.
message DeletableChild {
string id = 1;
string name = 2;
}
message PreviewDeleteDatabaseResponse {
repeated DeletableChild tables = 1;
repeated DeletableChild views = 2;
repeated DeletableChild ingest_tokens = 3;
// Grace window (seconds) a non-immediate delete schedules before the
// database is permanently removed — meta's effective value, so the UI
// need not hardcode it.
int64 grace_window_seconds = 4;
}TableService
Manages tables within a database, including sharding fields, retention, and merge-span configuration.
RPCs
| RPC | Description |
|---|---|
ListTables | List tables in a database |
CreateTable | Create a new table |
GetTable | Get table by ID |
DeleteTable | Tombstone a table. Rejects when live views or live ingest tokens still reference the table, returning a structured inventory of the blockers. |
ResurrectTable | Lift the tombstone on a table within the grace window. |
SetShardingFields | Configure table sharding fields |
GetShardingFields | Get table sharding configuration |
SetTableRetention | Set per-table retention (unset clears the override) |
SetTableMaxMergeTimeSpan | Set the per-table merge-span cap (unset reverts to retention-derived default) |
Proto Definition
// All RPCs are database-scoped via an explicit `database_id`. Writes
// keyed on `(database_id, name)` to match the table's per-database
// uniqueness; id-keyed reads use `(database_id, table_id)` and the
// server verifies membership.
service TableService {
rpc ListTables(ListTablesRequest) returns (ListTablesResponse);
// Table CRUD
rpc CreateTable(CreateTableRequest) returns (CreateTableResponse);
rpc GetTable(GetTableRequest) returns (GetTableResponse);
// Rejects with a TableReferences inventory when any live view or ingest token still references the table.
rpc DeleteTable(DeleteTableRequest) returns (DeleteTableResponse);
rpc ResurrectTable(ResurrectTableRequest) returns (ResurrectTableResponse);
// Table Sharding Fields
rpc SetShardingFields(SetShardingFieldsRequest) returns (SetShardingFieldsResponse);
rpc GetShardingFields(GetShardingFieldsRequest) returns (GetShardingFieldsResponse);
// Table Retention & Merge Span
rpc SetTableRetention(SetTableRetentionRequest) returns (SetTableRetentionResponse);
rpc SetTableMaxMergeTimeSpan(SetTableMaxMergeTimeSpanRequest) returns (SetTableMaxMergeTimeSpanResponse);
}
// region: Common Types
enum BqlType {
BQL_TYPE_BOOL = 0;
BQL_TYPE_INT = 1;
BQL_TYPE_LONG = 2;
BQL_TYPE_REAL = 3;
BQL_TYPE_STRING = 4;
BQL_TYPE_DATETIME = 5;
BQL_TYPE_TIMESPAN = 6;
BQL_TYPE_GUID = 7;
BQL_TYPE_DYNAMIC = 8;
}
message Column {
string name = 1;
BqlType type = 2;
}
// endregion
// region: Table Messages
message CreateTableRequest {
string database_id = 1;
string name = 2;
}
message CreateTableResponse {
string id = 1;
string name = 2;
string database_id = 3;
}
message GetTableRequest {
string database_id = 1;
string table_id = 2;
bool include_deleted = 3;
}
message GetTableResponse {
string id = 1;
string name = 2;
string database_id = 3;
// Retention in nanoseconds. Unset = no retention configured.
optional int64 retention_ns = 4;
// Explicit per-table merge span cap in nanoseconds. Unset = derive from
// retention (or fall back to the global cap).
optional int64 max_merge_time_span_ns = 5;
// Absent for live rows.
Tombstone tombstone = 6;
}
message ListTablesRequest {
string database_id = 1;
bool include_deleted = 2;
}
message ListTablesResponse {
repeated Table items = 1;
// Grace window (seconds) a non-immediate table delete schedules — meta's
// effective TABLE_GRACE, so the UI need not hardcode it.
int64 grace_window_seconds = 2;
}
message Table {
string id = 1;
string name = 2;
string database_id = 3;
optional int64 retention_ns = 4;
optional int64 max_merge_time_span_ns = 5;
// Absent for live rows.
Tombstone tombstone = 6;
}
message DeleteTableRequest {
string database_id = 1;
string table_id = 2;
// Grace window zero; bytes still bound by the segment tombstone_min_age floor (~12 min).
bool immediate = 3;
}
message DeleteTableResponse {
oneof outcome {
Table tombstoned = 1;
// Live views/tokens still referencing the table; tombstoned references are omitted (they do not block).
TableReferences blocked = 2;
}
}
message TableReferences {
repeated ViewReference views = 1;
repeated IngestTokenReference ingest_tokens = 2;
}
message ResurrectTableRequest {
string database_id = 1;
string table_id = 2;
}
message ResurrectTableResponse {
Table table = 1;
}
message ViewReference {
string id = 1;
string name = 2;
// Other tables this view spans; excludes the target table.
repeated TableRef other_members = 3;
// Absent for live rows.
Tombstone tombstone = 4;
}
message IngestTokenReference {
string id = 1;
string name = 2;
// Signal columns ("traces"/"logs"/"metrics") of this token pointing at the target table.
repeated string signals = 3;
// Absent for live rows.
Tombstone tombstone = 4;
}
message TableRef {
string id = 1;
string name = 2;
}
message SetShardingFieldsRequest {
string database_id = 1;
string table_id = 2;
repeated ShardingField fields = 3;
}
message SetShardingFieldsResponse {
repeated ShardingField fields = 1;
}
message GetShardingFieldsRequest {
string database_id = 1;
string table_id = 2;
bool include_deleted = 3;
}
message GetShardingFieldsResponse {
repeated ShardingField fields = 1;
}
// Set per-table retention. Unset `retention_ns` clears the setting
// (reverts to "no retention configured").
message SetTableRetentionRequest {
string database_id = 1;
string table_id = 2;
optional int64 retention_ns = 3;
}
message SetTableRetentionResponse {
optional int64 retention_ns = 1;
}
// Set the per-table merge-span override. Unset `max_merge_time_span_ns`
// clears the override (planner falls back to the retention formula).
message SetTableMaxMergeTimeSpanRequest {
string database_id = 1;
string table_id = 2;
optional int64 max_merge_time_span_ns = 3;
}
message SetTableMaxMergeTimeSpanResponse {
optional int64 max_merge_time_span_ns = 1;
}
// endregionViewService
Manages named, database-scoped unions over tables. A view name lowers to the same plan shape as an explicit union T1, T2, ... in KQL.
RPCs
| RPC | Description |
|---|---|
CreateView | Create a new view |
GetView | Get view by ID |
ListViews | List views in a database |
SetViewTables | Atomic full replacement of a view's member tables |
DeleteView | Tombstone a view. Views always use the 5-minute SHORT_GRACE (no immediate flag). |
ResurrectView | Lift the tombstone on a view within the grace window. |
LookupTableOrView | Database-scoped name resolution returning either a table or a view (used by the binder) |
Proto Definition
// ViewService manages named, database-scoped unions over tables. A view is
// "this database has a thing called `MyView` that means: union over
// T1, T2, T3" — the engine binder lowers a view-named reference into the
// same plan shape as an explicit `union T1, T2, T3` in KQL.
//
// All RPCs are database-scoped via an explicit `database_id`. Cross-type
// name collisions (a table and a view sharing a name in the same
// database) are rejected by CreateView / CreateTable at the service
// layer. No Delete RPC in v1.
service ViewService {
rpc CreateView(CreateViewRequest) returns (CreateViewResponse);
rpc GetView(GetViewRequest) returns (GetViewResponse);
rpc ListViews(ListViewsRequest) returns (ListViewsResponse);
// Atomic full replacement of the view's member list. Server rejects
// an empty `table_ids` (a view with no members has no defined
// schema and can't be lowered to a plan).
rpc SetViewTables(SetViewTablesRequest) returns (SetViewTablesResponse);
// Short default grace (5 min): views hold no recoverable data, so the window is only fat-finger insurance.
rpc DeleteView(DeleteViewRequest) returns (DeleteViewResponse);
rpc ResurrectView(ResurrectViewRequest) returns (ResurrectViewResponse);
// Database-scoped name resolution for the engine binder. Returns
// either a Table or a View. Replaces the unscoped
// `list_tables() + find_by_name` fallback that lived in the binder
// before PR 4b. Cross-type collisions are blocked at write time, so
// a name resolves to exactly one variant.
rpc LookupTableOrView(LookupTableOrViewRequest) returns (LookupTableOrViewResponse);
}
message View {
string id = 1;
string name = 2;
optional string description = 3;
// Ordered list of member tables (by `ordinal`). The order is
// stable across `SetViewTables` and surfaces directly in the plan.
repeated ViewMember members = 4;
// Absent for live rows.
Tombstone tombstone = 5;
}
// One member of a view's union. Carries both id and name so the
// binder can resolve without a second round-trip.
message ViewMember {
string id = 1;
string name = 2;
}
message CreateViewRequest {
string database_id = 1;
string name = 2;
// Ordered list of member table UUIDs. Must be non-empty.
repeated string table_ids = 3;
optional string description = 4;
}
message CreateViewResponse {
View view = 1;
}
message GetViewRequest {
// Database that owns the view. Required. Server rejects with
// INVALID_ARGUMENT if `database_id` does not own `view_id`.
string database_id = 2;
string view_id = 1;
bool include_deleted = 3;
}
message GetViewResponse {
View view = 1;
}
message ListViewsRequest {
string database_id = 1;
bool include_deleted = 2;
}
message ListViewsResponse {
repeated View items = 1;
}
message SetViewTablesRequest {
// Database that owns the view. Required. Server rejects with
// INVALID_ARGUMENT if `database_id` does not own `view_id`.
string database_id = 3;
string view_id = 1;
// Atomic full replacement. Must be non-empty.
repeated string table_ids = 2;
}
message SetViewTablesResponse {
View view = 1;
}
message DeleteViewRequest {
string database_id = 1;
string view_id = 2;
}
message DeleteViewResponse {
View view = 1;
}
message ResurrectViewRequest {
string database_id = 1;
string view_id = 2;
}
message ResurrectViewResponse {
View view = 1;
}
message LookupTableOrViewRequest {
// UUID of the database to resolve `name` against. Callers are
// expected to have already resolved via `GetDatabase`.
string database_id = 1;
string name = 2;
bool include_deleted = 3;
}
// Result of a `LookupTableOrView`: the name resolves to exactly one of
// a Table or a View. Caller can match on the `kind` oneof to dispatch.
message LookupTableOrViewResponse {
oneof kind {
LookupTable table = 1;
LookupView view = 2;
}
}
message LookupTable {
string id = 1;
string name = 2;
}
message LookupView {
string id = 1;
string name = 2;
repeated ViewMember members = 3;
}ClusterService
Cluster membership registry — meta tracks live cluster members for leader-election and routing.
RPCs
| RPC | Description |
|---|---|
RegisterMember | Register the calling node and refresh its heartbeat |
ListMembers | List currently-registered cluster members |
Proto Definition
// Ephemeral, in-memory registry of long-lived services (nursery, etc).
// Members heartbeat periodically; meta evicts stale entries on TTL.
// On meta restart the registry is empty — services re-register on next heartbeat.
service ClusterService {
// Register a member and refresh its heartbeat in one call. Idempotent;
// repeated calls update last_heartbeat. Members should call this
// periodically (e.g. every 5s) to remain in the registry.
rpc RegisterMember(RegisterMemberRequest) returns (RegisterMemberResponse);
// List members of a given kind that are currently registered (not evicted).
rpc ListMembers(ListMembersRequest) returns (ListMembersResponse);
}
// Kind of cluster member. Same QWS code can register as either lane —
// the kind tells QC which work-scope the member can handle. Unknown
// values are treated as MEMBER_KIND_UNSPECIFIED.
enum MemberKind {
MEMBER_KIND_UNSPECIFIED = 0;
// QWS in the cloud-segment lane: scans S3-resident segments via cache_server.
MEMBER_KIND_QWS_CLOUD = 2;
// QWS in the nursery-local lane: scans nursery-local .ttseg files via
// NurseryLocalCache; no S3 credentials. Today registered by the nursery
// service itself; in Phase 8.4 split into a separate qws child.
MEMBER_KIND_QWS_NURSERY = 1;
}
message RegisterMemberRequest {
// Stable identity for this member (e.g. nursery_node_id). Re-registering
// with the same id replaces the previous record.
string id = 1;
MemberKind kind = 2;
// gRPC endpoint other services can dial (e.g. "http://nursery-0:9531").
string endpoint = 3;
// Relative capacity of this member, used by the QC's partition_ring to
// route proportionally more partitions to stronger nodes. First version
// is the effective vCPU count visible to the registering process
// (cgroup-aware via num_cpus::get on Linux, so under k8s this is the
// pod's CPU limit, not the host's full capacity). Zero means "unset by
// an old client" — the QC treats this as weight=1 (equal share).
uint32 weight = 4;
}
message RegisterMemberResponse {
// Cluster identity string this meta is configured with — set per
// environment in helm values (e.g. "dev", "valhalla", a UUID).
// Not crypto, not a secret — a deliberate mixup-detection
// mechanism: services that talked to this meta tag their outbound
// gRPC calls with this id; the receiver rejects mismatches. Cheap
// way to catch "I accidentally pointed at the wrong meta_endpoint
// and now I'm dialing valhalla's qws from a dev process."
string cluster_id = 1;
}
message ListMembersRequest {
// If MEMBER_KIND_UNSPECIFIED, all members are returned. Otherwise filtered
// to the requested kind.
MemberKind kind = 1;
}
message MemberRecord {
string id = 1;
MemberKind kind = 2;
string endpoint = 3;
// Time of the last heartbeat received by meta, nanoseconds since unix epoch.
int64 last_heartbeat_nanos = 4;
// Relative capacity, set by the registering member at registration time
// (see RegisterMemberRequest.weight). Used by the QC's partition_ring to
// route proportionally more partitions to stronger nodes. Zero from
// pre-weighting clients is treated as weight=1 by the consumer.
uint32 weight = 5;
}
message ListMembersResponse {
repeated MemberRecord members = 1;
// Same cluster id as RegisterMemberResponse.cluster_id — see there.
string cluster_id = 2;
}IngestionService
Authenticates ingest tokens and manages collector streams.
RPCs
| RPC | Description |
|---|---|
RegisterForStream | Authenticate an ingest token and get/create a stream for a collector |
Proto Definition
service IngestionService {
// RegisterForStream authenticates an ingest token and returns a stream for this collector.
// Returns the existing active stream for the collector_id, or creates a new one.
rpc RegisterForStream(RegisterForStreamRequest) returns (RegisterForStreamResponse);
}
message RegisterForStreamRequest {
reserved 1;
string collector_string = 2;
string collector_id = 3;
string ingest_token = 4;
}
message StreamConfig {
CloudObjectPrefix cloud_object_prefix = 1;
uint64 flush_timeout_ms = 2;
uint64 max_segment_size_bytes = 3;
}
message RegisterForStreamResponse {
string stream_id = 1;
StreamConfig config = 2;
// UUID of the ingest token (from the meta database).
// Tjalfe uses this as the key in CombinedSignals.token_signals
// so the plaintext token is never persisted to S3.
string token_id = 3;
}IngestTokenService
Manages ingest tokens for authentication and signal routing.
RPCs
| RPC | Description |
|---|---|
CreateIngestToken | Create a new token (plaintext shown only once) |
ValidateIngestToken | Validate a token and return its routing |
DeleteIngestToken | Tombstone a token; the catalog row sits for the grace window so a fat-finger delete can be resurrected |
ResurrectIngestToken | Lift the tombstone on a token within the grace window |
ListIngestTokens | List all tokens |
GetIngestToken | Get token details by ID |
UpdateIngestTokenRouting | Update signal routing for a token |
Proto Definition
// All RPCs except ValidateIngestToken require `database_id`. Tokens are
// auth credentials, so every operation explicitly carries the database
// scope. Validate is the exception because the ingest path receives only
// a plaintext token header — the response carries `database_id` outward.
service IngestTokenService {
rpc CreateIngestToken(CreateIngestTokenRequest) returns (CreateIngestTokenResponse);
rpc ValidateIngestToken(ValidateIngestTokenRequest) returns (ValidateIngestTokenResponse);
// New stream registrations are rejected at tombstone time, before the grace window expires.
rpc DeleteIngestToken(DeleteIngestTokenRequest) returns (DeleteIngestTokenResponse);
rpc ResurrectIngestToken(ResurrectIngestTokenRequest) returns (ResurrectIngestTokenResponse);
rpc ListIngestTokens(ListIngestTokensRequest) returns (ListIngestTokensResponse);
rpc GetIngestToken(GetIngestTokenRequest) returns (GetIngestTokenResponse);
rpc UpdateIngestTokenRouting(UpdateIngestTokenRoutingRequest) returns (UpdateIngestTokenRoutingResponse);
}
message IngestTokenRouting {
string traces_table_id = 1;
string logs_table_id = 2;
string metrics_table_id = 3;
}
message IngestTokenInfo {
string id = 1;
string name = 2;
string created_at = 3;
IngestTokenRouting routing = 4;
string token_hint = 5; // last 4 chars of the plaintext token
string database_id = 6;
// Absent for live rows.
Tombstone tombstone = 7;
}
message CreateIngestTokenRequest {
string database_id = 1;
string name = 2;
// All 3 routing targets are required. A "logs-only" token points all 3
// signals at the logs table; wrong-signal data lands there (schema-on-read
// absorbs it). Tightening to per-signal optional is a future, non-breaking
// change.
string traces_table_id = 3;
string logs_table_id = 4;
string metrics_table_id = 5;
}
message CreateIngestTokenResponse {
string plaintext_token = 1; // shown only once
IngestTokenInfo token = 2;
}
message ValidateIngestTokenRequest {
string plaintext_token = 1;
}
message ValidateIngestTokenResponse {
string token_id = 1;
IngestTokenRouting routing = 2;
string database_id = 3;
}
message DeleteIngestTokenRequest {
string database_id = 1;
string token_id = 2;
}
message DeleteIngestTokenResponse {
// Carries the grace window: grace = delete_at - tombstoned_at.
Tombstone tombstone = 1;
}
message ResurrectIngestTokenRequest {
string database_id = 1;
string token_id = 2;
}
message ResurrectIngestTokenResponse {
IngestTokenInfo token = 1;
}
message ListIngestTokensRequest {
string database_id = 1;
bool include_deleted = 2;
}
message ListIngestTokensResponse {
repeated IngestTokenInfo tokens = 1;
}
message GetIngestTokenRequest {
string database_id = 1;
string token_id = 2;
bool include_deleted = 3;
}
message GetIngestTokenResponse {
IngestTokenInfo token = 1;
}
message UpdateIngestTokenRoutingRequest {
string database_id = 1;
string token_id = 2;
IngestTokenRouting routing = 3;
}
message UpdateIngestTokenRoutingResponse {
// empty on success
}NurseryService
Manages ingest stream lifecycle, merged segment registration, and offset tracking.
RPCs
| RPC | Description |
|---|---|
GetIngestStreamsForNursery | Get all active streams and table routing info |
MarkStreamStopped | Mark a stream as stopped (client disconnect or timeout) |
RegisterMergedSegments | |
UpdateDeletedOffset | Update the deleted offset after cleaning baby segments |
DeleteIngestStreamTable | Remove a fully-processed table entry from a stream |
Proto Definition
service NurseryService {
rpc GetIngestStreamsForNursery(GetIngestStreamsRequest)
returns (GetIngestStreamsResponse);
rpc MarkStreamStopped(MarkStreamStoppedRequest)
returns (MarkStreamStoppedResponse);
rpc RegisterMergedSegments(RegisterMergedSegmentsRequest)
returns (RegisterMergedSegmentsResponse);
rpc UpdateDeletedOffset(UpdateDeletedOffsetRequest)
returns (UpdateDeletedOffsetResponse);
rpc DeleteIngestStreamTable(DeleteIngestStreamTableRequest)
returns (DeleteIngestStreamTableResponse);
}
message GetIngestStreamsRequest {}
// Checkpoint tracking the mapping between a merged segment offset and stream offset
message OffsetCheckpoint {
// The MVCC version of the merged segment
int64 segment_offset = 1;
// The stream offset at the time of this merge
int64 stream_offset = 2;
// Ingest timestamp of the baby segment (nanoseconds since unix epoch)
int64 ingest_timestamp_nanos = 3;
}
// A best-effort (stream_offset, ingest_time) sample used to pick the ~1h
// "old enough to delete" boundary for a stream's shared blobs.
message OffsetTimestamp {
int64 stream_offset = 1;
int64 ingest_time_nanos = 2;
}
// Table-specific routing information within a stream
message IngestStreamTableInfo {
string table_id = 1;
// Last merged offset. Nursery should fetch from merged_offset + 1
int64 merged_offset = 2;
// Ingest timestamp of the last merged baby segment (nanoseconds since unix epoch)
// NOTE: Source precision is seconds (from S3 last_modified), sub-second portion is always zero
// Used by query engine to determine which segments to query
int64 merged_ingest_time_nanos = 3;
// Table stream creation time in nanoseconds since unix epoch
int64 created_at_nanos = 6;
// S3 location for uploading merged segments: endpoint, bucket, path prefix
// Path format: tables/<table_id>/segments/
CloudObjectPrefix cloud_merged_segments_prefix = 7;
// Checkpoints tracking recent merge points (segment_offset, stream_offset, ingest_timestamp)
// Pruned to keep only checkpoints from the last 30 minutes
repeated OffsetCheckpoint offset_checkpoints = 8;
}
// External stream information - matches two-table design (ingest_streams + ingest_stream_tables)
message IngestStreamInfo {
string stream_id = 1;
// S3 location for baby segments: endpoint, bucket, path prefix
// Path format: ingest-stream/<stream_id>/
CloudObjectPrefix cloud_stream_prefix = 2;
// If set, stream is stopped (no more baby segments will arrive)
// Nursery can release stopped streams after processing remaining segments
optional int64 stopped_at_nanos = 3;
// Offset of the stop message (only set if stopped_at_nanos is set)
// A fully-merged stream has merged_offset == stopped_offset
optional int64 stopped_offset = 4;
// Table-specific routing information for this stream
repeated IngestStreamTableInfo tables = 5;
// Durable completeness frontier (nanos): min over this stream's tables of
// merged_ingest_time_nanos, or its creation time when it has no tables yet.
// The nursery's ingest watermark floor for a stream it hasn't fetched.
int64 known_complete_through_nanos = 6;
// Highest stream offset whose shared baby-segment blobs are deleted from S3
// (default -1). Nursery only deletes offsets <= min(merged_offset) over tables.
int64 deleted_offset = 7;
// Best-effort ingest time near the deletion frontier (nanoseconds since unix
// epoch). Observability only — correctness rides on deleted_offset.
int64 deleted_ingest_time_nanos = 8;
// Sparse age-ladder samples for picking the deletion boundary.
repeated OffsetTimestamp offset_timestamps = 9;
}
// Per-token routing: maps each signal type to a target table.
// Keyed by token_id (UUID), matching the key in CombinedSignals.token_signals.
message IngestTokenRoutingInfo {
// UUID of the ingest token (from the meta database).
// Matches the key used in CombinedSignals.token_signals.
string token_id = 1;
string traces_table_id = 2;
string logs_table_id = 3;
string metrics_table_id = 4;
// PR 5: token's owning database. Used by nursery to scope
// `bzrk.table` attribute lookups and reject cross-DB routing.
string database_id = 5;
}
message GetIngestStreamsResponse {
repeated IngestStreamInfo streams = 1;
// All known tables (name → id mapping for routing resolution).
// Each `TableInfo.database_id` scopes the table to a single
// database, which is the only legal target for that table's
// routing (cross-database routing is impossible by construction).
repeated TableInfo tables = 2;
// Monotonic sequence number for staleness detection.
// Nursery should discard responses with a lower sequence than already seen.
int64 meta_response_seq = 3;
// Active ingest token routing for token-based signal routing
repeated IngestTokenRoutingInfo token_routing = 4;
// The `streams` list is complete through this time (nanos): any stream not in
// this response was created strictly after it. The nursery caps its ingest
// watermark here so a just-created, not-yet-polled stream can't be skipped.
int64 stream_list_complete_through_nanos = 6;
// Field 5 was `repeated DatabaseInfo databases`, used by the old
// qualified-routing path (`bzrk.table=db.foo`). The qualifier was
// removed in PR 7: every routing key is unqualified and resolves
// strictly in the token's database, so the nursery never needs a
// name→id database map.
reserved 5;
reserved "databases";
}
// Reason why a stream was stopped
enum StopReason {
STOP_REASON_UNSPECIFIED = 0;
// Client (ingester) wrote a stop message
STOP_REASON_CLIENT = 1;
// Nursery detected inactivity timeout
STOP_REASON_TIMEOUT = 2;
}
message MarkStreamStoppedRequest {
string stream_id = 1;
// When the stream was stopped (nanoseconds since unix epoch)
int64 stopped_at_nanos = 2;
// Final processed offset in the stream
int64 final_offset = 3;
// Reason for stopping
StopReason reason = 4;
}
message MarkStreamStoppedResponse {}
// Per-stream offset info for merged segment registration
message StreamOffsetInfo {
string stream_id = 1;
string table_id = 2;
// New merged_offset after this merge (max baby segment offset included)
int64 new_merged_offset = 3;
// Ingest timestamp (S3 last_modified) of the highest-offset baby segment
// NOTE: S3 provides second-level precision, sub-second portion is always zero
int64 ingest_time_nanos = 4;
// What merged_offset was when nursery selected segments for merge.
// Meta validates this matches current value to detect conflicts.
int64 start_merged_offset = 5;
}
// One merged segment registration: a new segment plus the per-stream offset
// advances for streams that contributed data to it.
message MergeRegistration {
CreateSegment segment = 1;
// Per-stream offset advance for streams that contributed segments to this merge.
// Each item's start_merged_offset must equal the current merged_offset for that
// (stream, table). Within a batch, items are applied in order, so item N+1's
// start_merged_offset must equal item N's new_merged_offset (for the same stream).
repeated StreamOffsetInfo stream_offsets = 2;
}
// Register one or more merged segments for a single table, atomically.
//
// All merges in the batch must share the same table_id (server enforced),
// and that table must live in `database_id` (server enforced). `merges`
// are applied in order in one Postgres transaction; on any failure the
// whole transaction rolls back. After all merges, `idle_advances` are
// applied — these advance merged_offset for streams that didn't contribute
// segments to any merge in this batch (e.g. streams whose follower
// processed keep-alives or a stop message). Idle-advance checkpoints
// reuse the last allocated segment version in the batch.
message RegisterMergedSegmentsRequest {
// Database that owns the batch's table. Required. Server rejects with
// INVALID_ARGUMENT if `database_id` does not own the batch's table_id.
string database_id = 3;
// Must be non-empty.
repeated MergeRegistration merges = 1;
// Optional. Applied last in the same transaction.
repeated StreamOffsetInfo idle_advances = 2;
// Idempotency key: a stable per-batch id, reused across retries of the same
// batch. Meta uses it to make registration replay-safe — a retry after a
// committed-but-lost response returns the original result instead of
// re-applying or wedging on the unique storage-path index.
string register_request_id = 4;
}
message RegisterMergedSegmentsResponse {
// segment_versions[i] is the MVCC version assigned to merges[i].
repeated int64 segment_versions = 1;
// Updated stream info for nursery to cache (avoids separate poll)
repeated IngestStreamInfo streams = 2;
// Monotonic sequence number for staleness detection (same counter as GetIngestStreamsResponse)
int64 meta_response_seq = 3;
}
message UpdateDeletedOffsetRequest {
string stream_id = 1;
int64 deleted_offset = 3;
// Best-effort ingest time near the deletion frontier (nanoseconds since unix
// epoch). Observability only — correctness rides on deleted_offset.
int64 deleted_ingest_time_nanos = 4;
}
message UpdateDeletedOffsetResponse {}
message DeleteIngestStreamTableRequest {
string stream_id = 1;
string table_id = 2;
// The current (highest) offset nursery is processing for this stream.
// Meta validates this is >= the table's merged_offset as a sanity check.
int64 current_offset = 3;
// Nursery's view of the table's merged_offset.
// Meta validates this matches its own merged_offset and deleted_offset
// to catch stale nursery state.
int64 merged_offset = 4;
}
message DeleteIngestStreamTableResponse {}MergeTaskService
Coordinates segment merge tasks between the janitor and merge workers.
RPCs
| RPC | Description |
|---|---|
PollForSegmentMergeTask | Worker polls for an available merge task |
CompleteSegmentMergeTask | Worker reports successful merge with the new segment |
FailSegmentMergeTask | Worker reports merge failure |
Proto Definition
service MergeTaskService {
rpc PollForSegmentMergeTask(PollForSegmentMergeTaskRequest) returns (PollForSegmentMergeTaskResponse);
rpc CompleteSegmentMergeTask(CompleteSegmentMergeTaskRequest) returns (CompleteSegmentMergeTaskResponse);
rpc FailSegmentMergeTask(FailSegmentMergeTaskRequest) returns (FailSegmentMergeTaskResponse);
}
message PollForSegmentMergeTaskRequest {
string worker_id = 1;
}
message PollForSegmentMergeTaskResponse {
optional SegmentMergeTaskInfo task = 1;
}
message CompleteSegmentMergeTaskRequest {
string task_id = 1;
CreateSegment merged_segment = 2;
string rewrite_journal_relative_path = 3;
}
message CompleteSegmentMergeTaskResponse {
oneof result {
CompleteSegmentMergeTaskSuccess success = 1;
CompleteSegmentMergeTaskError error = 2;
}
}
message CompleteSegmentMergeTaskSuccess {}
message CompleteSegmentMergeTaskError {
string message = 1;
}
message FailSegmentMergeTaskRequest {
string task_id = 1;
string error_message = 2;
}
message FailSegmentMergeTaskResponse {}SegmentDeletionService
Manages cleanup of tombstoned segments from storage.
RPCs
| RPC | Description |
|---|---|
GetTombstonedSegmentsForDeletion | Get segments marked for deletion |
ConfirmTombstoneDeletion | Confirm segments have been deleted from storage |
ConfirmRewriteJournalDeletion |
Proto Definition
service SegmentDeletionService {
rpc GetTombstonedSegmentsForDeletion(GetTombstonedSegmentsForDeletionRequest) returns (GetTombstonedSegmentsForDeletionResponse);
rpc ConfirmTombstoneDeletion(ConfirmTombstoneDeletionRequest) returns (ConfirmTombstoneDeletionResponse);
rpc ConfirmRewriteJournalDeletion(ConfirmRewriteJournalDeletionRequest) returns (ConfirmRewriteJournalDeletionResponse);
}
message GetTombstonedSegmentsForDeletionRequest {
// Empty - service uses internal configuration
}
message GetTombstonedSegmentsForDeletionResponse {
repeated TombstonedSegmentInfo segments = 1;
}
message TombstonedSegmentInfo {
string segment_id = 1;
string table_id = 2;
CloudObjectKey cloud_object_key = 3;
int64 tombstone_time_nanos = 4;
int64 tombstone_version = 5;
}
message ConfirmTombstoneDeletionRequest {
repeated string segment_ids = 1;
}
message ConfirmTombstoneDeletionResponse {
oneof result {
ConfirmTombstoneDeletionSuccess success = 1;
ConfirmTombstoneDeletionError error = 2;
}
}
message ConfirmTombstoneDeletionSuccess {
int32 deleted_count = 1;
repeated RewriteJournalReadyForDeletion journals_ready = 2;
}
message RewriteJournalReadyForDeletion {
string output_segment_id = 1;
CloudObjectKey journal_key = 2;
}
message ConfirmTombstoneDeletionError {
string message = 1;
}
message ConfirmRewriteJournalDeletionRequest {
repeated string output_segment_ids = 1;
}
message ConfirmRewriteJournalDeletionResponse {
int32 cleared_count = 1;
}SegmentLookupService
Discovers segments matching a time range and table filter.
RPCs
| RPC | Description |
|---|---|
FindSegments | Find segments by time range and table names (paginated) |
GetSegmentsByIds | Get specific segments by their IDs |
ListWarmCandidates | Returns segments ordered for cache warming priority (newest-first by time_range.end_time, globally across all tables). Paginated. |
Proto Definition
service SegmentLookupService {
rpc FindSegments(FindSegmentsRequest) returns (FindSegmentsResponse);
rpc GetSegmentsByIds(GetSegmentsByIdsRequest) returns (GetSegmentsByIdsResponse);
// Returns segments ordered for cache warming priority (newest-first by
// time_range.end_time, globally across all tables). Paginated.
rpc ListWarmCandidates(ListWarmCandidatesRequest) returns (ListWarmCandidatesResponse);
}
message FindSegmentsRequest {
TimeRange time_range = 1;
repeated string table_names = 3;
int32 limit = 4;
optional string cursor = 5;
// UUID of the database to resolve `table_names` against. Required;
// server rejects empty. Callers (query service, admin tools) are
// expected to have already resolved any name via `GetDatabase`.
string database_id = 6;
}
message PagedResponse {
optional string next_cursor = 1;
}
message FindSegmentsResponse {
repeated SegmentInfo segments = 1;
PagedResponse page = 2;
// Snapshot of meta's monotonic segment-version counter
// (`counters.version`) read in the same transaction as the
// segment list. QC pins this as the watermark `V` for the query:
// the cloud QWS lane sees only segments with `version <= V`; a
// future nursery lane (Phase 4-5 of docs/dev/query-exec-in-nursery.md)
// fills the gap with locally-staged segments that are either
// unregistered or registered at `version > V`. The two sets are
// disjoint by construction, so the merged result has no double-count.
uint64 current_version = 3;
}
message GetSegmentsByIdsRequest {
repeated string segment_ids = 1;
}
message GetSegmentsByIdsResponse {
repeated SegmentInfo segments = 1;
}
message ListWarmCandidatesRequest {
// Max segments per page (capped server-side).
int32 limit = 1;
// Opaque cursor from previous page; empty on first call.
optional string cursor = 2;
// Shard-filter fields will be added later as part of the sharding design.
}
message ListWarmCandidatesResponse {
// Ordered newest-first by time_range.end_time, then by id descending.
repeated SegmentInfo segments = 1;
PagedResponse page = 2;
}JanitorService
Provides cluster statistics and merge task management for maintenance operations.
RPCs
| RPC | Description |
|---|---|
GetJanitorStats | Get segment count, sizes, and outstanding merge tasks |
GetSegmentSizeStats | Get segment size distribution by tier |
ListMergeTasks | List active merge tasks |
GetMergeTask | Get detailed info about a specific merge task |
UnclaimMergeTask | Release a stale merge task claim |
CreateMergeTasks | Create new merge tasks for eligible segments |
RewriteAllSegments | Create one single-segment merge task per existing segment not already in a merge task. This forces every segment through the rewrite pipeline to pick up index fixes. |
DeleteMergeTask |
Proto Definition
service JanitorService {
rpc GetJanitorStats(GetJanitorStatsRequest) returns (GetJanitorStatsResponse);
rpc GetSegmentSizeStats(GetSegmentSizeStatsRequest) returns (GetSegmentSizeStatsResponse);
rpc ListMergeTasks(ListMergeTasksRequest) returns (ListMergeTasksResponse);
rpc GetMergeTask(GetMergeTaskRequest) returns (GetMergeTaskResponse);
rpc UnclaimMergeTask(UnclaimMergeTaskRequest) returns (UnclaimMergeTaskResponse);
rpc CreateMergeTasks(CreateMergeTasksRequest) returns (CreateMergeTasksResponse);
// Create one single-segment merge task per existing segment not already in a merge task.
// This forces every segment through the rewrite pipeline to pick up index fixes.
rpc RewriteAllSegments(RewriteAllSegmentsRequest) returns (RewriteAllSegmentsResponse);
rpc DeleteMergeTask(DeleteMergeTaskRequest) returns (DeleteMergeTaskResponse);
}
message GetSegmentSizeStatsRequest {
// Optional maximum compressed size in bytes (e.g., 10485760 for 10MB).
// Only segments with compressed_size <= this value are included.
optional int64 max_compressed_size = 1;
// Optional minimum age in seconds. Only segments created after now - min_age are included.
optional int64 min_age_secs = 2;
// Optional maximum age in seconds. Only segments created before now - max_age are included.
optional int64 max_age_secs = 3;
// Optional: scope to a specific table by UUID. Pre-PR-4a this took a
// name; switching to id avoids cross-database collisions when multi-DB
// arrives.
optional string table_id = 4;
}
message SegmentSizeTier {
int32 tier = 1;
string tier_min = 2;
string tier_max = 3;
int64 segment_count = 4;
int64 total_size = 5;
int64 smallest = 6;
int64 largest = 7;
int64 avg_size = 8;
string earliest_time = 9;
string latest_time = 10;
}
message GetSegmentSizeStatsResponse {
repeated SegmentSizeTier tiers = 1;
}
message GetJanitorStatsRequest {
// Optional: scope to a specific table by UUID. Pre-PR-4a this took a
// name; switching to id avoids cross-database collisions.
optional string table_id = 1;
}
message GetJanitorStatsResponse {
int64 segment_count = 1;
int64 outstanding_merge_tasks = 2;
int64 total_size_bytes = 3;
int64 total_compressed_size_bytes = 4;
int64 segments_incl_tombstoned = 5;
}
message ListMergeTasksRequest {
// Optional: scope to a specific table by UUID. Pre-PR-4a this took a
// name; switching to id avoids cross-database collisions.
optional string table_id = 1;
}
message ListMergeTasksResponse {
repeated MergeTaskSummary tasks = 1;
}
message MergeTaskSummary {
string task_id = 1;
string table_id = 2;
int32 segment_count = 3;
string assigned_to_worker = 4;
string created_at = 5;
string assigned_at = 6;
}
message GetMergeTaskRequest {
string task_id = 1;
}
message UnclaimMergeTaskRequest {
string task_id = 1;
// Minimum duration in seconds the task must have been claimed before it can be unclaimed.
// Defaults to 3600 (1 hour) if not set.
int64 min_claimed_secs = 2;
}
message UnclaimMergeTaskResponse {
// True if the task was unclaimed, false if it was not found or not yet old enough.
bool unclaimed = 1;
}
message CreateMergeTasksRequest {
// Optional maximum compressed size in bytes. Only segments with compressed_size <= this value.
optional int64 max_compressed_size = 1;
// Optional minimum age in seconds. Only segments older than now - min_age.
optional int64 min_age_secs = 2;
// Optional maximum age in seconds. Only segments newer than now - max_age.
optional int64 max_age_secs = 3;
// Optional: scope to a specific table by UUID. Pre-PR-4a this took a
// name; switching to id avoids cross-database collisions when multi-DB
// arrives.
optional string table_id = 4;
}
message CreateMergeTasksResponse {
// Number of merge tasks created
int32 tasks_created = 1;
// Total number of segments included across all tasks
int64 segments_included = 2;
}
message GetMergeTaskResponse {
string task_id = 1;
string table_id = 2;
int32 segment_count = 3;
string assigned_to_worker = 4;
string created_at = 5;
int64 smallest_segment_size = 6;
int64 largest_segment_size = 7;
int64 smallest_segment_compressed_size = 8;
int64 largest_segment_compressed_size = 9;
string earliest_time = 10;
string latest_time = 11;
int64 total_size = 12;
int64 total_compressed_size = 13;
string assigned_at = 14;
}
message RewriteAllSegmentsRequest {
// Optional: limit to a specific table
// Optional: scope to a specific table by UUID. Pre-PR-4a this took a
// name; switching to id avoids cross-database collisions.
optional string table_id = 1;
}
message RewriteAllSegmentsResponse {
// Number of single-segment merge tasks created
int32 tasks_created = 1;
// Total number of segments that will be rewritten
int64 segments_included = 2;
}
message DeleteMergeTaskRequest {
string task_id = 1;
}
message DeleteMergeTaskResponse {
// True if the task was found and deleted.
bool deleted = 1;
}Common Types
Shared message types used across multiple services.
// Both timestamps set or neither; server enforces delete_at >= tombstoned_at.
// Unix nanos (not google.protobuf.Timestamp) to dodge the well-known-types include path.
message Tombstone {
int64 tombstoned_at_unix_ns = 1;
int64 delete_at_unix_ns = 2;
}
message CloudBucket {
string endpoint = 1;
string bucket = 2;
string region = 3; // empty = provider default
string path = 4; // customer-configured base path within bucket (can be empty)
string provider = 5; // "s3" (default) or "gcs"
}
message CloudObjectPrefix {
CloudBucket bucket = 1;
string prefix = 2; // e.g. "tables/ds-123/segments"
}
message CloudObjectKey {
CloudObjectPrefix prefix = 1;
string key = 2; // e.g. "seg-456.ttseg"
}
message TimeRange {
int64 start_time = 1; // in nanoseconds since the unix epoch
int64 end_time = 2; // in nanoseconds since the unix epoch
}
message CreateSegment {
string table_id = 1;
TimeRange time_range = 2;
int64 size_bytes = 3;
int64 compressed_size_bytes = 4;
int32 number_of_events = 5;
CloudObjectKey cloud_object_key = 6;
// AUDT digest of the segment file's audit chunk (32 bytes). Empty
// for legacy segments without an AUDT chunk. New merger outputs
// populate it.
bytes audt_hash = 7;
// Hash algorithm used for `audt_hash`. Wire codes match
// libs/segment_files/src/audt.rs::AudtAlgo
// (0 = sha256, 1 = blake3-256, 2 = sha3-256). Ignored when
// `audt_hash` is empty.
uint32 audt_algo = 8;
// Ingest-time min/max across rows in the segment, taken from the
// merger's verifier (MIMX on `ingest_time`). Drives ingest-time slicing
// in the coordinator and the freeze watermark for the map-reduce-state
// cache. The invariant `event_time <= ingest_time` is enforced at write.
TimeRange ingest_time_range = 9;
}
message SegmentInfo {
string id = 1;
TimeRange time_range = 2;
int64 size_bytes = 3;
int64 compressed_size_bytes = 4;
int32 number_of_events = 5;
string table_id = 6;
CloudObjectKey cloud_object_key = 7;
// AUDT digest of the segment file's audit chunk (32 bytes). Empty
// for pre-existing v0 rows that have NULL `audt_hash` in meta; a
// query worker opening the segment uses this to verify the file
// against tampering at load time when present.
bytes audt_hash = 8;
// Hash algorithm wire code for `audt_hash` (see AudtAlgo). Ignored
// when `audt_hash` is empty.
uint32 audt_algo = 9;
// Ingest-time min/max across rows in the segment, populated from the
// merger's verifier. Seeded from `time_range` for rows older than
// migration 010 (safe under the `event_time <= ingest_time` invariant).
TimeRange ingest_time_range = 10;
}
message ShardingField {
string field_name = 1;
double weight = 2;
}
message SegmentMergeTaskInfo {
string task_id = 1;
string table_id = 2;
repeated SegmentInfo segments = 4;
CloudObjectPrefix cloud_object_prefix = 5;
repeated ShardingField sharding_fields = 6;
}
message TableInfo {
string id = 1;
string name = 2;
CloudObjectPrefix cloud_object_prefix = 3;
// PR 5: which database this table belongs to. Carries the
// `tables.database_id` column added by migration 026. Required for
// nursery routing to scope name lookups by database.
string database_id = 4;
}