AQL (ArrangoDB Query Language) Reference
AQL (ArrangoDB Query Language) is a declarative query language for multi-model databases that combines the power of SQL-like syntax with native support for graph traversals and document operations. Orbit-RS provides a comprehensive AQL implementation optimized for distributed graph and time series data.
Table of Contents
- Overview
- Syntax Fundamentals
- Data Types
- Operations
- Graph Traversals
- Aggregations
- Functions
- Time Series Integration
- Performance Optimization
- Examples
- API Reference
Overview
AQL in Orbit-RS provides:
- Multi-model support: Query graphs, documents, and time series data
- ACID transactions: Full transaction support with consistency guarantees
- Graph traversals: Native graph pattern matching and path finding
- High performance: Optimized query execution with smart indexing
- Distributed execution: Scale queries across cluster nodes
- Type safety: Strong typing with compile-time query validation
Key Features
- Declarative syntax: Focus on what you want, not how to get it
- Flexible joins: Join across different data models
- Advanced analytics: Built-in statistical and mathematical functions
- Streaming results: Process large result sets efficiently
- Query optimization: Automatic query plan optimization
Syntax Fundamentals
Basic Query Structure
FOR variable IN collection
[FILTER condition]
[SORT expression]
[LIMIT count]
RETURN expression
Variables and Expressions
// Variable binding
FOR doc IN users
RETURN doc.name
// Expression evaluation
FOR doc IN users
RETURN {
fullName: CONCAT(doc.firstName, " ", doc.lastName),
age: doc.age,
isAdult: doc.age >= 18
}
Comments
// Single line comment
FOR doc IN users // Another comment
/* Multi-line
comment */
RETURN doc
Data Types
Primitive Types
// Numbers
FOR doc IN collection
RETURN {
integer: 42,
float: 3.14159,
scientific: 1.23e-4
}
// Strings
FOR doc IN collection
RETURN {
simple: "Hello World",
escaped: "Line 1\nLine 2",
unicode: "Unicode: "
}
// Booleans
FOR doc IN collection
RETURN {
isTrue: true,
isFalse: false,
isNull: null
}
Complex Types
// Arrays
FOR doc IN collection
RETURN {
numbers: [1, 2, 3, 4, 5],
mixed: [1, "hello", true, null],
nested: [[1, 2], [3, 4]]
}
// Objects
FOR doc IN collection
RETURN {
user: {
name: "Alice",
age: 30,
preferences: {
theme: "dark",
language: "en"
}
}
}
Operations
FOR Loops
// Simple iteration
FOR user IN users
RETURN user
// Multiple collections
FOR user IN users
FOR post IN posts
FILTER post.authorId == user._id
RETURN {user: user.name, post: post.title}
// Array iteration
FOR item IN [1, 2, 3, 4, 5]
RETURN item * 2
// Object iteration
FOR key IN ATTRIBUTES({name: "Alice", age: 30})
RETURN key
FILTER Operations
// Basic filters
FOR user IN users
FILTER user.age > 18
RETURN user
// Complex conditions
FOR user IN users
FILTER user.age BETWEEN 25 AND 65
AND user.status == "active"
AND user.email LIKE "%@company.com"
RETURN user
// Array filters
FOR user IN users
FILTER "javascript" IN user.skills
RETURN user
// Null checks
FOR user IN users
FILTER user.lastLogin != null
RETURN user
SORT Operations
// Simple sorting
FOR user IN users
SORT user.name
RETURN user
// Multiple sort criteria
FOR user IN users
SORT user.department, user.salary DESC, user.name
RETURN user
// Expression sorting
FOR user IN users
SORT CONCAT(user.lastName, user.firstName)
RETURN user
LIMIT Operations
// Simple limit
FOR user IN users
LIMIT 10
RETURN user
// Offset and limit
FOR user IN users
LIMIT 20, 10 // Skip 20, take 10
RETURN user
// Dynamic limits
FOR user IN users
LIMIT @offset, @count
RETURN user
Graph Traversals
Basic Traversals
// Outbound traversal
FOR vertex, edge, path IN 1..3 OUTBOUND 'users/alice' follows
RETURN {vertex, edge, path}
// Inbound traversal
FOR vertex, edge IN 1..2 INBOUND 'posts/123' authored
RETURN vertex
// Any direction
FOR vertex IN 1..5 ANY 'users/alice' GRAPH 'social'
RETURN vertex
Advanced Traversals
// Multiple edge collections
FOR vertex, edge IN 1..3 OUTBOUND 'users/alice' follows, likes
RETURN {vertex, edge}
// Path filtering
FOR vertex, edge, path IN 1..4 OUTBOUND 'users/alice' follows
FILTER path.edges[*].weight > 0.5
RETURN vertex
// Shortest path
FOR path IN OUTBOUND SHORTEST_PATH 'users/alice' TO 'users/bob' follows
RETURN path
Named Graphs
// Define and use named graphs
FOR vertex, edge IN 1..3 OUTBOUND 'users/alice' GRAPH 'social_network'
RETURN {
person: vertex.name,
relationship: edge.type,
since: edge.created_at
}
// Multiple graphs
FOR vertex IN 1..2 OUTBOUND 'users/alice'
GRAPH ['professional', 'social']
RETURN vertex
Aggregations
Basic Aggregations
// COUNT
FOR user IN users
COLLECT status = user.status WITH COUNT INTO count
RETURN {status, count}
// SUM, AVERAGE
FOR sale IN sales
COLLECT year = DATE_YEAR(sale.date)
AGGREGATE total = SUM(sale.amount), avg = AVERAGE(sale.amount)
RETURN {year, total, avg}
// MIN, MAX
FOR product IN products
COLLECT category = product.category
AGGREGATE
minPrice = MIN(product.price),
maxPrice = MAX(product.price)
RETURN {category, minPrice, maxPrice}
Advanced Aggregations
// Multiple grouping levels
FOR sale IN sales
COLLECT
year = DATE_YEAR(sale.date),
month = DATE_MONTH(sale.date),
region = sale.region
AGGREGATE total = SUM(sale.amount)
RETURN {year, month, region, total}
// COLLECT with arrays
FOR user IN users
COLLECT department = user.department INTO departmentUsers
RETURN {
department,
users: departmentUsers[*].user.name,
count: LENGTH(departmentUsers)
}
Statistical Functions
// Advanced statistics
FOR metric IN metrics
COLLECT hour = DATE_HOUR(metric.timestamp)
AGGREGATE
count = COUNT(),
avg = AVERAGE(metric.value),
stddev = STDDEV(metric.value),
variance = VARIANCE(metric.value),
percentile_95 = PERCENTILE(metric.value, 95)
RETURN {hour, count, avg, stddev, variance, percentile_95}
Functions
String Functions
FOR user IN users
RETURN {
name: user.name,
upper: UPPER(user.name),
lower: LOWER(user.name),
length: LENGTH(user.name),
substring: SUBSTRING(user.name, 0, 3),
concat: CONCAT(user.firstName, " ", user.lastName),
split: SPLIT(user.email, "@"),
replace: SUBSTITUTE(user.phone, "-", ""),
regex: REGEX_MATCHES(user.email, "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"),
soundex: SOUNDEX(user.lastName),
uuid: UUID(),
find_last: FIND_LAST(user.name, "a")
}
Type Functions
FOR val IN data
RETURN {
is_null: IS_NULL(val),
is_bool: IS_BOOL(val),
is_number: IS_NUMBER(val),
is_string: IS_STRING(val),
is_array: IS_ARRAY(val),
is_object: IS_OBJECT(val),
is_date: IS_DATETIME(val),
is_key: IS_KEY(val),
to_int: TO_INT(val),
to_string: TO_STRING(val),
typename: TYPENAME(val)
}
Numeric Functions
FOR data IN dataset
RETURN {
value: data.value,
abs: ABS(data.value),
floor: FLOOR(data.value),
ceil: CEIL(data.value),
round: ROUND(data.value, 2),
sqrt: SQRT(ABS(data.value)),
power: POW(data.value, 2),
log: LOG(ABS(data.value)),
sin: SIN(data.angle),
cos: COS(data.angle),
random: RAND(),
variance: VARIANCE_SAMPLE([1, 2, 3, 4, 5]),
stddev: STDDEV_SAMPLE([1, 2, 3, 4, 5]),
median: MEDIAN([1, 2, 3, 4, 5]),
percentile: PERCENTILE([1, 2, 3, 4, 5], 90)
}
Date/Time Functions
FOR event IN events
RETURN {
timestamp: event.timestamp,
year: DATE_YEAR(event.timestamp),
month: DATE_MONTH(event.timestamp),
day: DATE_DAY(event.timestamp),
hour: DATE_HOUR(event.timestamp),
formatted: DATE_FORMAT(event.timestamp, "%Y-%m-%d %H:%M:%S"),
unix: DATE_TIMESTAMP(event.timestamp),
from_unix: DATE_ISO8601(event.unix_timestamp),
diff: DATE_DIFF(event.end_time, event.start_time, "minutes"),
add: DATE_ADD(event.timestamp, 1, "day"),
quarter: DATE_QUARTER(event.timestamp),
leapyear: DATE_LEAPYEAR(DATE_YEAR(event.timestamp)),
days_in_month: DATE_DAYS_IN_MONTH(event.timestamp),
iso_week: DATE_ISOWEEK(event.timestamp)
}
Array Functions
FOR user IN users
RETURN {
skills: user.skills,
first: FIRST(user.skills),
last: LAST(user.skills),
length: LENGTH(user.skills),
sorted: SORTED(user.skills),
unique: UNIQUE(user.skills),
reversed: REVERSE(user.skills),
slice: SLICE(user.skills, 1, 2),
contains: "javascript" IN user.skills,
intersection: INTERSECTION(user.skills, ["javascript", "python", "rust"]),
union: UNION(user.skills, ["go", "typescript"]),
intersection: INTERSECTION(user.skills, ["javascript", "python", "rust"]),
union: UNION(user.skills, ["go", "typescript"]),
flatten: FLATTEN([user.skills, user.certifications]),
outersection: OUTERSECTION(user.skills, ["java", "c++"]),
jaccard: JACCARD(user.skills, ["python", "rust"]),
interleave: INTERLEAVE(user.skills, [1, 2, 3])
}
Graph Functions
// Graph analysis functions
RETURN {
neighbors: GRAPH_NEIGHBORS("social", "users/alice", {direction: "outbound"}),
distance: GRAPH_DISTANCE_TO("social", "users/alice", "users/bob", {weight: "cost"}),
shortest: GRAPH_SHORTEST_PATH("social", "users/alice", "users/charlie", {}),
paths: GRAPH_PATHS("social", "users/alice", {maxDepth: 3}),
common: GRAPH_COMMON_NEIGHBORS("social", "users/alice", "users/bob", {}),
eccentricity: GRAPH_ECCENTRICITY("social", "users/alice", {}),
radius: GRAPH_RADIUS("social", {}),
diameter: GRAPH_DIAMETER("social", {})
}
Object Functions
FOR doc IN docs
RETURN {
keys: KEYS(doc),
values: VALUES(doc),
merge: MERGE(doc, {updated: true}),
matches: MATCHES(doc, {status: "active", type: "user"}),
has: HAS(doc, "email"),
unset: UNSET(doc, ["internal_id", "version"]),
keep: KEEP(doc, ["name", "email"]),
zip: ZIP(["a", "b"], [1, 2])
}
Time Series Integration
Time Series Queries
// Query time series data
FOR point IN timeseries
FILTER point.series_id == "temperature_sensor_01"
AND point.timestamp >= DATE_SUBTRACT(DATE_NOW(), 1, "hour")
SORT point.timestamp
RETURN {
time: point.timestamp,
value: point.value,
labels: point.labels
}
// Aggregate time series data
FOR point IN timeseries
FILTER point.series_id LIKE "cpu_usage_%"
AND point.timestamp >= DATE_SUBTRACT(DATE_NOW(), 24, "hour")
COLLECT
hour = DATE_HOUR(point.timestamp),
server = point.labels.server
AGGREGATE
avg_cpu = AVERAGE(point.value),
max_cpu = MAX(point.value),
count = COUNT()
RETURN {hour, server, avg_cpu, max_cpu, count}
Combined Graph and Time Series
// Join graph and time series data
FOR sensor IN sensors
FOR point IN timeseries
FILTER point.series_id == sensor._id
AND point.timestamp >= DATE_SUBTRACT(DATE_NOW(), 1, "hour")
COLLECT
sensor_name = sensor.name,
location = sensor.location
AGGREGATE
avg_value = AVERAGE(point.value),
sample_count = COUNT()
RETURN {sensor_name, location, avg_value, sample_count}
// Graph traversal with time series
FOR vertex, edge IN 1..2 OUTBOUND 'sensors/temp_01' connected_to
FOR point IN timeseries
FILTER point.series_id == vertex._id
AND point.timestamp >= @start_time
COLLECT sensor = vertex
AGGREGATE latest_value = LAST(point.value)
RETURN {
sensor: sensor.name,
type: sensor.type,
latest_reading: latest_value,
location: sensor.location
}
Performance Optimization
Index Usage
// Efficient filtering with indexes
FOR user IN users
FILTER user.email == @email // Uses hash index
RETURN user
FOR post IN posts
FILTER post.created_at >= @start_date // Uses skiplist index
SORT post.created_at DESC
RETURN post
// Compound index usage
FOR order IN orders
FILTER order.customer_id == @customer_id
AND order.status == "completed"
AND order.total >= @min_total
RETURN order
Query Optimization
// Early filtering
FOR user IN users
FILTER user.active == true // Filter early
FOR order IN orders
FILTER order.user_id == user._id
AND order.status == "completed"
RETURN {user, order}
// Efficient joins
FOR user IN users
FILTER user.department == @department
FOR order IN orders
FILTER order.user_id == user._id
COLLECT user_info = user INTO orders_group = order
RETURN {
user: user_info,
order_count: LENGTH(orders_group),
total_amount: SUM(orders_group[*].amount)
}
Batch Operations
// Efficient batch processing
FOR batch IN 1..100
FOR user IN users
LIMIT (batch - 1) * 1000, 1000 // Process in batches
// Process each batch
RETURN user
Examples
Social Network Analysis
// Find mutual friends
FOR user IN users
FILTER user._id == @user_id
FOR friend1 IN 1..1 OUTBOUND user follows
FOR friend2 IN 1..1 OUTBOUND @other_user_id follows
FILTER friend1._id == friend2._id
RETURN DISTINCT friend1
// Friend recommendations
FOR user IN users
FILTER user._id == @user_id
FOR friend IN 1..1 OUTBOUND user follows
FOR friend_of_friend IN 1..1 OUTBOUND friend follows
FILTER friend_of_friend._id != @user_id
AND friend_of_friend._id NOT IN (
FOR direct_friend IN 1..1 OUTBOUND user follows
RETURN direct_friend._id
)
COLLECT recommended_user = friend_of_friend WITH COUNT INTO mutual_count
SORT mutual_count DESC
LIMIT 10
RETURN {user: recommended_user, mutual_friends: mutual_count}
E-commerce Analytics
// Customer lifetime value
FOR customer IN customers
FOR order IN orders
FILTER order.customer_id == customer._id
COLLECT customer_info = customer
AGGREGATE
total_spent = SUM(order.total),
order_count = COUNT(),
avg_order_value = AVERAGE(order.total),
first_order = MIN(order.created_at),
last_order = MAX(order.created_at)
LET days_active = DATE_DIFF(last_order, first_order, "day")
RETURN {
customer: customer_info.name,
lifetime_value: total_spent,
orders: order_count,
avg_order: avg_order_value,
days_active: days_active,
value_per_day: total_spent / MAX(days_active, 1)
}
// Product recommendation based on purchase history
FOR customer IN customers
FILTER customer._id == @customer_id
FOR order IN orders
FILTER order.customer_id == customer._id
FOR item IN order.items
FOR similar_item IN products
FILTER similar_item.category == item.category
AND similar_item._id != item._id
AND similar_item._id NOT IN (
FOR past_order IN orders
FILTER past_order.customer_id == @customer_id
FOR past_item IN past_order.items
RETURN past_item._id
)
COLLECT recommended_product = similar_item WITH COUNT INTO relevance_score
SORT relevance_score DESC
LIMIT 5
RETURN {product: recommended_product, score: relevance_score}
IoT Data Analysis
// Sensor anomaly detection
FOR sensor IN sensors
FOR point IN timeseries
FILTER point.series_id == sensor._id
AND point.timestamp >= DATE_SUBTRACT(DATE_NOW(), 1, "hour")
COLLECT sensor_info = sensor
AGGREGATE
avg_value = AVERAGE(point.value),
stddev_value = STDDEV(point.value),
points = point[*]
LET threshold = avg_value + (2 * stddev_value)
LET anomalies = (
FOR p IN points
FILTER p.value > threshold
RETURN p
)
FILTER LENGTH(anomalies) > 0
RETURN {
sensor: sensor_info.name,
location: sensor_info.location,
avg_value: avg_value,
threshold: threshold,
anomaly_count: LENGTH(anomalies),
anomalies: anomalies
}
// Equipment maintenance prediction
FOR equipment IN equipment
FOR metric IN timeseries
FILTER metric.series_id == equipment.sensor_id
AND metric.timestamp >= DATE_SUBTRACT(DATE_NOW(), 30, "day")
COLLECT
equipment_info = equipment,
day = DATE_DAY(metric.timestamp)
AGGREGATE daily_avg = AVERAGE(metric.value)
COLLECT equipment_final = equipment_info INTO daily_values = daily_avg
LET trend = SLOPE(daily_values) // Custom function for trend analysis
LET maintenance_needed = ABS(trend) > 0.1 // Degradation threshold
FILTER maintenance_needed
RETURN {
equipment: equipment_final.name,
location: equipment_final.location,
trend: trend,
priority: ABS(trend) > 0.2 ? "high" : "medium",
next_maintenance: DATE_ADD(DATE_NOW(), CEIL(30 / ABS(trend)), "day")
}
API Reference
Query Execution
use orbit_protocols::aql::{AQLEngine, QueryOptions};
// Execute AQL query
let result = aql_engine.execute(
"FOR user IN users FILTER user.age > @min_age RETURN user",
[("min_age", 18)].iter().cloned().collect()
).await?;
// Execute with options
let options = QueryOptions {
max_runtime: Duration::from_secs(30),
memory_limit: 1024 * 1024 * 100, // 100MB
intermediate_commit_size: 10000,
..Default::default()
};
let result = aql_engine.execute_with_options(query, params, options).await?;
Streaming Results
use futures_util::stream::StreamExt;
// Stream large result sets
let mut stream = aql_engine.execute_stream(
"FOR doc IN large_collection RETURN doc"
).await?;
while let Some(batch) = stream.next().await {
for document in batch? {
// Process each document
println!("{:?}", document);
}
}
Prepared Statements
// Prepare query for multiple executions
let prepared = aql_engine.prepare(
"FOR user IN users FILTER user.department == @dept RETURN user"
).await?;
// Execute with different parameters
let results1 = prepared.execute([("dept", "engineering")]).await?;
let results2 = prepared.execute([("dept", "marketing")]).await?;
Transaction Support
// Execute within transaction
let transaction = aql_engine.begin_transaction().await?;
let result1 = transaction.execute(
"FOR user IN users FILTER user._id == @id UPDATE user WITH {last_login: DATE_NOW()} IN users",
[("id", "users/123")]
).await?;
let result2 = transaction.execute(
"INSERT {user_id: @id, action: 'login', timestamp: DATE_NOW()} IN audit_log",
[("id", "users/123")]
).await?;
transaction.commit().await?;
AQL in Orbit-RS provides a powerful, flexible query language that seamlessly integrates graph traversals, document operations, and time series analytics in a single, optimized execution engine.