SAP HANA Performance MCP Server

MCP server for SAP HANA performance analysis: query plan capture,
PlanViz export, expensive statement monitoring, plan cache inspection,
table statistics, memory stats, and active session tracking.
This commit is contained in:
Erhan Keseli
2026-04-06 20:38:29 +02:00
commit 8412e1801b
22 changed files with 2247 additions and 0 deletions

View File

@@ -0,0 +1 @@
__version__ = "0.1.0"

View File

@@ -0,0 +1,9 @@
from hana_performance_mcp.server import mcp
def main():
mcp.run()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,256 @@
from __future__ import annotations
from dataclasses import dataclass, field
@dataclass
class PerformanceFinding:
severity: str # "critical", "warning", "info"
category: str
operator_id: int | None
description: str
recommendation: str
class PlanAnalyzer:
"""Operators are dicts with keys matching EXPLAIN_PLAN_TABLE columns:
OPERATOR_ID, PARENT_OPERATOR_ID, OPERATOR_NAME, OPERATOR_DETAILS,
OPERATOR_PROPERTIES, EXECUTION_ENGINE, SCHEMA_NAME, TABLE_NAME, TABLE_TYPE,
TABLE_SIZE, OUTPUT_SIZE, SUBTREE_COST, LEVEL, POSITION.
"""
def __init__(self, operators: list[dict]) -> None:
self.operators = operators
self._findings: list[PerformanceFinding] = []
self._children_map: dict[int | None, list[dict]] = {}
for op in operators:
parent = op.get("PARENT_OPERATOR_ID")
self._children_map.setdefault(parent, []).append(op)
def analyze(self) -> list[PerformanceFinding]:
self._findings = []
self._check_full_table_scans()
self._check_inefficient_joins()
self._check_memory_intensive_ops()
self._check_execution_engine_mismatch()
self._check_cost_hotspots()
severity_order = {"critical": 0, "warning": 1, "info": 2}
self._findings.sort(key=lambda f: severity_order.get(f.severity, 3))
return self._findings
def check_estimated_vs_actual(self, runtime_stats: dict) -> None:
"""Flag >10x discrepancy between estimated and actual rows."""
actual_records = runtime_stats.get("records_processed")
if actual_records is None or actual_records == 0:
return
root = self._get_root()
if root is None:
return
estimated = _safe_int(root.get("OUTPUT_SIZE"))
if estimated is None or estimated == 0:
return
ratio = max(estimated, actual_records) / max(min(estimated, actual_records), 1)
if ratio > 10:
self._findings.append(PerformanceFinding(
severity="warning",
category="Statistics Mismatch",
operator_id=root.get("OPERATOR_ID"),
description=(
f"Estimated rows ({estimated:,}) vs actual rows ({actual_records:,}) "
f"differ by {ratio:.0f}x — table statistics may be stale."
),
recommendation="Run UPDATE STATISTICS on the involved tables to give the optimizer accurate cardinality estimates.",
))
def format_plan_tree(self) -> str:
if not self.operators:
return "(empty plan)"
lines = []
for op in self.operators:
level = _safe_int(op.get("LEVEL")) or 0
indent = " " * level
op_id = op.get("OPERATOR_ID", "?")
name = op.get("OPERATOR_NAME") or "UNKNOWN"
table = op.get("TABLE_NAME") or ""
engine = op.get("EXECUTION_ENGINE") or ""
cost = op.get("SUBTREE_COST")
rows = op.get("OUTPUT_SIZE")
parts = [f"[{op_id}] {indent}{name}"]
details = []
if table:
details.append(f"table: {table}")
if engine:
details.append(f"engine: {engine}")
if cost is not None:
details.append(f"cost: {cost}")
if rows is not None:
details.append(f"rows: ~{rows}")
if details:
parts.append(f"({', '.join(details)})")
lines.append(" ".join(parts))
return "\n".join(lines)
def generate_summary(self) -> str:
if not self._findings:
return "No performance issues detected. The query plan looks efficient."
counts = {"critical": 0, "warning": 0, "info": 0}
for f in self._findings:
counts[f.severity] = counts.get(f.severity, 0) + 1
parts = []
total = len(self._findings)
parts.append(f"Found {total} issue{'s' if total != 1 else ''}: ")
severity_parts = []
if counts["critical"]:
severity_parts.append(f"{counts['critical']} critical")
if counts["warning"]:
severity_parts.append(f"{counts['warning']} warning{'s' if counts['warning'] != 1 else ''}")
if counts["info"]:
severity_parts.append(f"{counts['info']} info")
parts.append(", ".join(severity_parts) + ".")
top = self._findings[0]
parts.append(
f"\n\nTop concern: [{top.severity.upper()}] {top.category}{top.description}"
)
return "".join(parts)
def _check_full_table_scans(self) -> None:
scan_names = {"TABLE SCAN", "COLUMN TABLE SCAN"}
for op in self.operators:
name = (op.get("OPERATOR_NAME") or "").upper()
if name not in scan_names:
continue
output_size = _safe_int(op.get("OUTPUT_SIZE"))
if output_size is not None and output_size > 10_000:
table = op.get("TABLE_NAME") or "unknown table"
self._findings.append(PerformanceFinding(
severity="warning",
category="Full Table Scan",
operator_id=op.get("OPERATOR_ID"),
description=(
f"{name} on {table} producing ~{output_size:,} rows. "
"A full scan on a large table may indicate a missing index."
),
recommendation="Add an index on the filter/join columns used in this scan, or verify that a full scan is intentional for analytics workloads.",
))
def _check_inefficient_joins(self) -> None:
for op in self.operators:
name = (op.get("OPERATOR_NAME") or "").upper()
if "NESTED LOOP JOIN" not in name:
continue
op_id = op.get("OPERATOR_ID")
children = self._children_map.get(op_id, [])
if len(children) < 2:
continue
child_sizes = [_safe_int(c.get("OUTPUT_SIZE")) or 0 for c in children]
if all(s > 10_000 for s in child_sizes):
self._findings.append(PerformanceFinding(
severity="critical",
category="Inefficient Join",
operator_id=op_id,
description=(
f"NESTED LOOP JOIN (operator {op_id}) with large inputs "
f"({child_sizes[0]:,} x {child_sizes[1]:,} rows). "
"This can cause quadratic performance."
),
recommendation="Review join conditions for missing indexes. Consider rewriting the query to allow a hash join or adding a hint (WITH HINT(USE_HASH_JOIN)).",
))
def _check_memory_intensive_ops(self) -> None:
memory_ops = {"SORT", "GROUP BY", "GROUPBY", "DISTINCT"}
for op in self.operators:
name = (op.get("OPERATOR_NAME") or "").upper()
if not any(m in name for m in memory_ops):
continue
output_size = _safe_int(op.get("OUTPUT_SIZE"))
if output_size is not None and output_size > 100_000:
self._findings.append(PerformanceFinding(
severity="warning",
category="Memory-Intensive Operation",
operator_id=op.get("OPERATOR_ID"),
description=(
f"{name} (operator {op.get('OPERATOR_ID')}) processing "
f"~{output_size:,} rows in memory."
),
recommendation="Consider adding an index to support ordering, pre-aggregating data, or limiting the result set to reduce memory pressure.",
))
def _check_execution_engine_mismatch(self) -> None:
for op in self.operators:
engine = (op.get("EXECUTION_ENGINE") or "").upper()
table_type = (op.get("TABLE_TYPE") or "").upper()
if engine == "ROW" and "COLUMN" in table_type:
table = op.get("TABLE_NAME") or "unknown table"
self._findings.append(PerformanceFinding(
severity="warning",
category="Engine Mismatch",
operator_id=op.get("OPERATOR_ID"),
description=(
f"Operator {op.get('OPERATOR_ID')} uses ROW engine on "
f"column-store table {table}. This bypasses columnar optimizations."
),
recommendation="Rewrite the query to be column-engine compatible (avoid unsupported functions or constructs that force row-engine fallback).",
))
def _check_cost_hotspots(self) -> None:
root = self._get_root()
if root is None:
return
total_cost = _safe_float(root.get("SUBTREE_COST"))
if not total_cost or total_cost <= 0:
return
for op in self.operators:
op_id = op.get("OPERATOR_ID")
if op_id == root.get("OPERATOR_ID"):
continue
op_cost = _safe_float(op.get("SUBTREE_COST"))
if op_cost is None:
continue
pct = (op_cost / total_cost) * 100
if pct > 40:
name = op.get("OPERATOR_NAME") or "UNKNOWN"
self._findings.append(PerformanceFinding(
severity="info",
category="Cost Hotspot",
operator_id=op_id,
description=(
f"{name} (operator {op_id}) accounts for {pct:.0f}% of "
"the total plan cost."
),
recommendation="Focus optimization efforts on this subtree — improving its efficiency will have the largest impact on overall query performance.",
))
def _get_root(self) -> dict | None:
for op in self.operators:
if _safe_int(op.get("LEVEL")) == 0:
return op
return self.operators[0] if self.operators else None
def _safe_int(val) -> int | None:
if val is None:
return None
try:
return int(val)
except (ValueError, TypeError):
return None
def _safe_float(val) -> float | None:
if val is None:
return None
try:
return float(val)
except (ValueError, TypeError):
return None

View File

@@ -0,0 +1,130 @@
from __future__ import annotations
import logging
import os
import queue
import threading
from contextlib import asynccontextmanager, contextmanager
from typing import TYPE_CHECKING
from hdbcli import dbapi
if TYPE_CHECKING:
from mcp.server.fastmcp import FastMCP
logger = logging.getLogger(__name__)
class HanaConnectionPool:
def __init__(
self,
host: str,
port: int,
user: str,
password: str,
encrypt: bool = True,
ssl_validate_certificate: bool = True,
pool_size: int = 3,
database_name: str | None = None,
) -> None:
self.host = host
self.port = port
self.user = user
self.password = password
self.encrypt = encrypt
self.ssl_validate_certificate = ssl_validate_certificate
self.pool_size = pool_size
self.database_name = database_name
self._queue: queue.Queue[dbapi.Connection] = queue.Queue(maxsize=pool_size)
self._lock = threading.Lock()
for _ in range(pool_size):
self._queue.put(self._create_connection())
logger.info("HANA connection pool created (%d connections)", pool_size)
@classmethod
def from_env(cls) -> HanaConnectionPool:
return cls(
host=os.environ["HANA_HOST"],
port=int(os.environ["HANA_PORT"]),
user=os.environ["HANA_USER"],
password=os.environ["HANA_PASSWORD"],
encrypt=os.environ.get("HANA_ENCRYPT", "true").lower() == "true",
ssl_validate_certificate=os.environ.get(
"HANA_SSLVALIDATECERTIFICATE", "true"
).lower()
== "true",
database_name=os.environ.get("HANA_DATABASE_NAME"),
)
def _create_connection(self) -> dbapi.Connection:
kwargs = dict(
address=self.host,
port=self.port,
user=self.user,
password=self.password,
encrypt=self.encrypt,
sslValidateCertificate=self.ssl_validate_certificate,
autocommit=True,
)
if self.database_name:
kwargs["databaseName"] = self.database_name
return dbapi.connect(**kwargs)
def _is_alive(self, conn: dbapi.Connection) -> bool:
try:
cur = conn.cursor()
cur.execute("SELECT 1 FROM DUMMY")
cur.close()
return True
except dbapi.Error:
return False
@contextmanager
def get_cursor(self):
conn = self._queue.get()
try:
if not self._is_alive(conn):
logger.warning("Replacing broken HANA connection")
try:
conn.close()
except Exception:
pass
conn = self._create_connection()
cursor = conn.cursor()
try:
yield cursor
finally:
cursor.close()
except dbapi.Error:
try:
conn.close()
except Exception:
pass
conn = self._create_connection()
raise
finally:
self._queue.put(conn)
def close_all(self) -> None:
closed = 0
while not self._queue.empty():
try:
conn = self._queue.get_nowait()
conn.close()
closed += 1
except queue.Empty:
break
except Exception:
closed += 1
logger.info("Closed %d HANA connections", closed)
@asynccontextmanager
async def app_lifespan(server: FastMCP):
pool = HanaConnectionPool.from_env()
try:
yield pool
finally:
pool.close_all()

View File

@@ -0,0 +1,37 @@
from __future__ import annotations
def format_bytes(n: int | None) -> str:
if n is None or n < 0:
return "N/A"
if n < 1024:
return f"{n} B"
if n < 1024**2:
return f"{n / 1024:.1f} KB"
if n < 1024**3:
return f"{n / 1024 ** 2:.1f} MB"
return f"{n / 1024 ** 3:.2f} GB"
def format_duration(microseconds: int | None) -> str:
if microseconds is None:
return "N/A"
if microseconds < 1000:
return f"{microseconds} us"
ms = microseconds / 1000
if ms < 1000:
return f"{ms:.1f} ms"
sec = ms / 1000
if sec < 60:
return f"{sec:.1f} s"
minutes = int(sec // 60)
remaining_sec = sec % 60
return f"{minutes}m {remaining_sec:.0f}s"
def truncate(s: str | None, max_len: int = 200) -> str:
if s is None:
return ""
if len(s) <= max_len:
return s
return s[:max_len] + "..."

View File

@@ -0,0 +1,35 @@
from mcp.server.fastmcp import Context, FastMCP
from hana_performance_mcp.connection import HanaConnectionPool, app_lifespan
mcp = FastMCP(
"hana-performance",
instructions=(
"SAP HANA performance analysis server. "
"Provides tools for query plan analysis, PlanViz export, "
"expensive statement monitoring, plan cache inspection, "
"table statistics, memory stats, and active session tracking."
),
lifespan=app_lifespan,
)
def get_pool(ctx: Context) -> HanaConnectionPool:
return ctx.request_context.lifespan_context
from hana_performance_mcp.tools.memory_stats import get_memory_stats # noqa: E402
from hana_performance_mcp.tools.active_sessions import get_active_sessions # noqa: E402
from hana_performance_mcp.tools.expensive_statements import get_expensive_statements # noqa: E402
from hana_performance_mcp.tools.plan_cache import get_plan_cache # noqa: E402
from hana_performance_mcp.tools.table_statistics import get_table_statistics # noqa: E402
from hana_performance_mcp.tools.analyze_query import analyze_query # noqa: E402
from hana_performance_mcp.tools.download_planviz import download_planviz # noqa: E402
mcp.tool()(get_memory_stats)
mcp.tool()(get_active_sessions)
mcp.tool()(get_expensive_statements)
mcp.tool()(get_plan_cache)
mcp.tool()(get_table_statistics)
mcp.tool()(analyze_query)
mcp.tool()(download_planviz)

View File

@@ -0,0 +1,74 @@
from __future__ import annotations
from hdbcli import dbapi
from mcp.server.fastmcp import Context
from hana_performance_mcp.formatting import format_bytes, format_duration, truncate
from hana_performance_mcp.server import get_pool
async def get_active_sessions(include_idle: bool = False, ctx: Context = None) -> str:
"""List active SAP HANA sessions, optionally including idle connections.
Args:
include_idle: If True, include connections in IDLE status.
Defaults to False (only active/running connections).
"""
pool = get_pool(ctx)
try:
with pool.get_cursor() as cursor:
cursor.execute(
"SELECT C.CONNECTION_ID, C.USER_NAME, C.SCHEMA_NAME, "
"C.CONNECTION_STATUS, C.CLIENT_HOST, C.CLIENT_IP, "
"A.STATEMENT_STRING, A.STATEMENT_STATUS, A.DURATION_MICROSEC, "
"A.ALLOCATED_MEMORY_SIZE, A.APPLICATION_NAME "
"FROM M_CONNECTIONS C "
"LEFT JOIN M_ACTIVE_STATEMENTS A "
"ON C.CONNECTION_ID = A.CONNECTION_ID "
"WHERE (? = 1 OR C.CONNECTION_STATUS != 'IDLE') "
"ORDER BY COALESCE(A.DURATION_MICROSEC, 0) DESC",
(int(include_idle),),
)
columns = [desc[0] for desc in cursor.description]
rows = cursor.fetchall()
except dbapi.Error as e:
return f"HANA Error ({e.errorcode}): {e.errortext}"
except Exception as e:
return f"Error: {e}"
if not rows:
mode = "all" if include_idle else "active"
return f"No {mode} sessions found."
lines = [f"## Active Sessions ({len(rows)} found)\n"]
for row in rows:
session = dict(zip(columns, row))
conn_id = session["CONNECTION_ID"]
user = session["USER_NAME"] or "N/A"
schema = session["SCHEMA_NAME"] or "N/A"
status = session["CONNECTION_STATUS"] or "N/A"
client = session["CLIENT_HOST"] or session["CLIENT_IP"] or "N/A"
app = session["APPLICATION_NAME"] or ""
lines.append(f"### Connection {conn_id}")
lines.append(f"- User: {user} | Schema: {schema} | Status: {status}")
lines.append(f"- Client: {client}")
if app:
lines.append(f"- Application: {app}")
stmt = session["STATEMENT_STRING"]
if stmt:
stmt_status = session["STATEMENT_STATUS"] or "N/A"
duration = format_duration(session["DURATION_MICROSEC"])
memory = format_bytes(session["ALLOCATED_MEMORY_SIZE"])
stmt = truncate(stmt)
lines.append(f"- Statement Status: {stmt_status} | Duration: {duration} | Memory: {memory}")
lines.append(f"- SQL: `{stmt}`")
else:
lines.append("- No active statement")
lines.append("")
return "\n".join(lines)

View File

@@ -0,0 +1,228 @@
from __future__ import annotations
import re
import uuid
from dataclasses import asdict
from datetime import datetime, timezone
from hdbcli import dbapi
from mcp.server.fastmcp import Context
from hana_performance_mcp.analysis.plan_analyzer import PlanAnalyzer
from hana_performance_mcp.formatting import format_bytes, format_duration
from hana_performance_mcp.server import get_pool
_ALLOWED_FIRST_KEYWORDS = {"SELECT", "WITH"}
_REJECTED_KEYWORDS = {
"INSERT", "UPDATE", "DELETE", "DROP", "ALTER", "CREATE", "TRUNCATE", "MERGE",
}
_EXPLAIN_COLUMNS = (
"OPERATOR_ID, PARENT_OPERATOR_ID, OPERATOR_NAME, OPERATOR_DETAILS, "
"OPERATOR_PROPERTIES, EXECUTION_ENGINE, SCHEMA_NAME, TABLE_NAME, "
"TABLE_TYPE, TABLE_SIZE, OUTPUT_SIZE, SUBTREE_COST, LEVEL, POSITION"
)
def _validate_sql(sql: str) -> str:
cleaned = sql.strip().rstrip(";").strip()
if not cleaned:
raise ValueError("SQL query is empty.")
if ";" in cleaned:
raise ValueError("Multi-statement SQL is not supported. Provide a single query.")
first_word = re.split(r"\s", cleaned, maxsplit=1)[0].upper()
if first_word in _REJECTED_KEYWORDS:
raise ValueError(
f"{first_word} statements are not allowed. Only SELECT/WITH queries can be analyzed."
)
if first_word not in _ALLOWED_FIRST_KEYWORDS:
raise ValueError(
f"Only SELECT/WITH queries can be analyzed. Got: {first_word}"
)
return cleaned
def _capture_plan(sql: str, execute: bool, row_limit: int, cursor) -> dict:
sql = _validate_sql(sql)
row_limit = max(1, min(row_limit, 10_000))
stmt_name = f"MCP_{uuid.uuid4().hex[:8]}"
operators = []
runtime_stats: dict | None = None
cursor.execute("SELECT VERSION FROM M_DATABASE")
hana_version = cursor.fetchone()[0]
cursor.execute("SELECT HOST, PORT FROM M_CONNECTIONS WHERE OWN = 'TRUE' LIMIT 1")
conn_row = cursor.fetchone()
server_host = conn_row[0] if conn_row else "unknown"
server_port = conn_row[1] if conn_row else 0
cursor.execute(
"DELETE FROM EXPLAIN_PLAN_TABLE WHERE STATEMENT_NAME = ?",
(stmt_name,),
)
cursor.execute(
f"EXPLAIN PLAN SET STATEMENT_NAME = '{stmt_name}' FOR {sql}"
)
cursor.execute(
f"SELECT {_EXPLAIN_COLUMNS} "
"FROM EXPLAIN_PLAN_TABLE "
"WHERE STATEMENT_NAME = ? "
"ORDER BY OPERATOR_ID",
(stmt_name,),
)
columns = [desc[0] for desc in cursor.description]
for row in cursor.fetchall():
operators.append(dict(zip(columns, row)))
cursor.execute(
"DELETE FROM EXPLAIN_PLAN_TABLE WHERE STATEMENT_NAME = ?",
(stmt_name,),
)
if execute:
start_time = datetime.now(timezone.utc)
try:
cursor.execute(
f"SELECT * FROM ({sql}) LIMIT {row_limit}"
)
cursor.fetchall()
except dbapi.Error:
pass
pattern_text = sql[:80].replace("%", "\\%").replace("_", "\\_")
cursor.execute(
"SELECT DURATION_MICROSEC, CPU_TIME, MEMORY_SIZE, "
"LOCK_WAIT_DURATION, RECORDS, START_TIME, "
"ERROR_CODE, ERROR_TEXT "
"FROM M_EXPENSIVE_STATEMENTS "
"WHERE UPPER(STATEMENT_STRING) LIKE '%' || UPPER(?) || '%' "
"AND START_TIME >= ? "
"ORDER BY START_TIME DESC LIMIT 1",
(pattern_text, start_time),
)
rt_row = cursor.fetchone()
if rt_row:
rt_cols = [desc[0] for desc in cursor.description]
rt = dict(zip(rt_cols, rt_row))
runtime_stats = {
"execution_time_ms": (rt["DURATION_MICROSEC"] or 0) / 1000,
"cpu_time_ms": (rt["CPU_TIME"] or 0) / 1000,
"memory_bytes": rt["MEMORY_SIZE"],
"lock_wait_ms": (rt["LOCK_WAIT_DURATION"] or 0) / 1000,
"records_processed": rt["RECORDS"],
"error_code": rt["ERROR_CODE"],
"error_text": rt["ERROR_TEXT"],
}
analyzer = PlanAnalyzer(operators)
findings = analyzer.analyze()
if runtime_stats and runtime_stats.get("records_processed") is not None:
analyzer.check_estimated_vs_actual(runtime_stats)
findings.sort(key=lambda f: {"critical": 0, "warning": 1, "info": 2}.get(f.severity, 3))
plan_tree = analyzer.format_plan_tree()
summary = analyzer.generate_summary()
return {
"metadata": {
"timestamp": datetime.now(timezone.utc).isoformat(),
"hana_version": hana_version,
"sql": sql,
"server_host": server_host,
"server_port": server_port,
},
"compile_plan": {
"operators": operators,
"plan_tree_text": plan_tree,
},
"runtime_stats": runtime_stats,
"analysis": {
"findings": [asdict(f) for f in findings],
"summary": summary,
},
}
async def analyze_query(
sql: str,
execute: bool = True,
row_limit: int = 1000,
ctx: Context = None,
) -> str:
"""Analyze a SQL query's execution plan with optional runtime statistics.
Generates the compile-time execution plan via EXPLAIN PLAN, optionally
executes the query to collect runtime statistics from M_EXPENSIVE_STATEMENTS,
and runs rule-based analysis to detect performance issues.
Args:
sql: The SQL SELECT/WITH query to analyze.
execute: If True, also execute the query to gather runtime stats.
row_limit: Maximum rows to fetch during execution (default 1000, max 10000).
"""
pool = get_pool(ctx)
try:
with pool.get_cursor() as cursor:
data = _capture_plan(sql, execute, row_limit, cursor)
except ValueError as e:
return f"Validation error: {e}"
except dbapi.Error as e:
return f"HANA Error ({e.errorcode}): {e.errortext}"
except Exception as e:
return f"Error: {e}"
operators = data["compile_plan"]["operators"]
if not operators:
return "No execution plan was generated. The query may be invalid."
plan_tree = data["compile_plan"]["plan_tree_text"]
runtime_stats = data["runtime_stats"]
findings = data["analysis"]["findings"]
summary = data["analysis"]["summary"]
sql = data["metadata"]["sql"]
sections = []
sections.append("## Query Execution Plan Analysis\n")
display_sql = sql if len(sql) <= 500 else sql[:500] + "..."
sections.append(f"### SQL\n```sql\n{display_sql}\n```\n")
sections.append(f"### Execution Plan Tree\n```\n{plan_tree}\n```\n")
if runtime_stats:
rt = runtime_stats
error_info = ""
if rt.get("error_code") and rt["error_code"] != 0:
error_info = f" | **Error {rt['error_code']}**: {rt.get('error_text', '')}"
sections.append(
f"### Runtime Statistics\n"
f"Execution Time: {rt['execution_time_ms']:.1f} ms | "
f"CPU Time: {rt['cpu_time_ms']:.1f} ms | "
f"Memory: {format_bytes(rt['memory_bytes'])} | "
f"Lock Wait: {rt['lock_wait_ms']:.1f} ms | "
f"Rows: {rt['records_processed']}{error_info}\n"
)
elif execute:
sections.append(
"### Runtime Statistics\n"
"No runtime statistics captured (query may not have appeared in M_EXPENSIVE_STATEMENTS).\n"
)
else:
sections.append("### Runtime Statistics\nSkipped (execute=False).\n")
if findings:
sections.append("### Performance Findings\n")
for f in findings:
severity_label = f["severity"].upper()
op_info = f" (operator {f['operator_id']})" if f.get("operator_id") is not None else ""
sections.append(
f"**{severity_label}** [{f['category']}]{op_info}: "
f"{f['description']}\n"
f" *Recommendation*: {f['recommendation']}\n"
)
else:
sections.append("### Performance Findings\nNo issues detected.\n")
sections.append(f"### Summary\n{summary}\n")
return "\n".join(sections)

View File

@@ -0,0 +1,80 @@
from __future__ import annotations
import json
from datetime import datetime, timezone
from pathlib import Path
from hdbcli import dbapi
from mcp.server.fastmcp import Context
from hana_performance_mcp.server import get_pool
from hana_performance_mcp.tools.analyze_query import _capture_plan
async def download_planviz(
sql: str,
output_path: str | None = None,
row_limit: int = 1000,
ctx: Context = None,
) -> str:
"""Export a comprehensive PlanViz JSON file for a SQL query.
Captures the compile-time execution plan, optional runtime statistics,
and performance analysis, then writes everything to a JSON file.
Args:
sql: The SQL SELECT/WITH query to analyze.
output_path: File path for the JSON output. Defaults to /tmp/planviz_<timestamp>.json.
row_limit: Maximum rows to fetch during execution (default 1000, max 10000).
"""
pool = get_pool(ctx)
try:
with pool.get_cursor() as cursor:
data = _capture_plan(sql, execute=True, row_limit=row_limit, cursor=cursor)
except ValueError as e:
return f"Validation error: {e}"
except dbapi.Error as e:
return f"HANA Error ({e.errorcode}): {e.errortext}"
except Exception as e:
return f"Error: {e}"
if not data["compile_plan"]["operators"]:
return "No execution plan was generated. The query may be invalid."
if output_path:
file_path = Path(output_path)
else:
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
file_path = Path(f"/tmp/planviz_{timestamp}.json")
try:
file_path.parent.mkdir(parents=True, exist_ok=True)
with open(file_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, default=str)
except OSError as e:
return f"File write error: {e}"
findings = data["analysis"]["findings"]
n_operators = len(data["compile_plan"]["operators"])
n_findings = len(findings)
severity_counts = {}
for finding in findings:
sev = finding["severity"]
severity_counts[sev] = severity_counts.get(sev, 0) + 1
summary_parts = [f"PlanViz exported to: {file_path}"]
summary_parts.append(f"Plan operators: {n_operators}")
if data["runtime_stats"]:
rt = data["runtime_stats"]
summary_parts.append(f"Execution time: {rt['execution_time_ms']:.1f} ms")
if n_findings:
sev_str = ", ".join(f"{count} {sev}" for sev, count in severity_counts.items())
summary_parts.append(f"Findings: {n_findings} ({sev_str})")
else:
summary_parts.append("Findings: none")
return "\n".join(summary_parts)

View File

@@ -0,0 +1,90 @@
from __future__ import annotations
from hdbcli import dbapi
from mcp.server.fastmcp import Context
from hana_performance_mcp.formatting import format_bytes, format_duration, truncate
from hana_performance_mcp.server import get_pool
_ORDER_BY_MAP = {
"DURATION": "DURATION_MICROSEC",
"MEMORY": "MEMORY_SIZE",
"CPU": "CPU_TIME",
}
async def get_expensive_statements(
limit: int = 20,
min_execution_time_ms: int = 0,
order_by: str = "DURATION",
ctx: Context = None,
) -> str:
"""Retrieve the most expensive statements from SAP HANA.
Args:
limit: Maximum number of statements to return (max 100).
min_execution_time_ms: Minimum execution time filter in milliseconds.
order_by: Sort order — one of "DURATION", "MEMORY", or "CPU".
"""
limit = max(1, min(limit, 100))
order_by = order_by.upper()
if order_by not in _ORDER_BY_MAP:
return f"Invalid order_by value '{order_by}'. Must be one of: DURATION, MEMORY, CPU."
pool = get_pool(ctx)
order_column = _ORDER_BY_MAP[order_by]
min_duration_us = min_execution_time_ms * 1000
try:
with pool.get_cursor() as cursor:
cursor.execute(
"SELECT STATEMENT_STRING, DURATION_MICROSEC, CPU_TIME, MEMORY_SIZE, "
"LOCK_WAIT_DURATION, START_TIME, DB_USER, SCHEMA_NAME, "
"ERROR_CODE, ERROR_TEXT, RECORDS, STATEMENT_HASH "
"FROM M_EXPENSIVE_STATEMENTS "
"WHERE DURATION_MICROSEC >= ? "
f"ORDER BY {order_column} DESC "
"LIMIT ?",
(min_duration_us, limit),
)
columns = [desc[0] for desc in cursor.description]
rows = cursor.fetchall()
except dbapi.Error as e:
return f"HANA Error ({e.errorcode}): {e.errortext}"
except Exception as e:
return f"Error: {e}"
if not rows:
return "No expensive statements found matching the criteria."
lines = [
f"## Expensive Statements (top {len(rows)}, ordered by {order_by})\n"
]
for i, row in enumerate(rows, 1):
stmt = dict(zip(columns, row))
sql = truncate(stmt["STATEMENT_STRING"] or "")
duration = format_duration(stmt["DURATION_MICROSEC"])
cpu = format_duration(stmt["CPU_TIME"])
memory = format_bytes(stmt["MEMORY_SIZE"])
lock_wait = format_duration(stmt["LOCK_WAIT_DURATION"])
user = stmt["DB_USER"] or "N/A"
schema = stmt["SCHEMA_NAME"] or "N/A"
records = stmt["RECORDS"] if stmt["RECORDS"] is not None else "N/A"
start = stmt["START_TIME"] or "N/A"
stmt_hash = stmt["STATEMENT_HASH"] or "N/A"
lines.append(f"### {i}. Duration: {duration} | CPU: {cpu} | Memory: {memory}")
lines.append(f"- User: {user} | Schema: {schema}")
lines.append(f"- Lock Wait: {lock_wait} | Rows: {records}")
lines.append(f"- Start: {start} | Hash: {stmt_hash}")
error_code = stmt["ERROR_CODE"]
if error_code and error_code != 0:
lines.append(f"- **Error {error_code}**: {stmt['ERROR_TEXT']}")
lines.append(f"- SQL: `{sql}`")
lines.append("")
return "\n".join(lines)

View File

@@ -0,0 +1,80 @@
from __future__ import annotations
from hdbcli import dbapi
from mcp.server.fastmcp import Context
from hana_performance_mcp.formatting import format_bytes
from hana_performance_mcp.server import get_pool
async def get_memory_stats(ctx: Context) -> str:
"""Return SAP HANA memory usage broken down by host and service.
Queries M_HOST_RESOURCE_UTILIZATION for host-level memory and
M_SERVICE_MEMORY for per-service breakdown.
"""
pool = get_pool(ctx)
try:
with pool.get_cursor() as cursor:
cursor.execute(
"SELECT HOST, FREE_PHYSICAL_MEMORY, USED_PHYSICAL_MEMORY, "
"ALLOCATION_LIMIT, INSTANCE_TOTAL_MEMORY_USED_SIZE, "
"INSTANCE_TOTAL_MEMORY_PEAK_USED_SIZE, "
"INSTANCE_TOTAL_MEMORY_ALLOCATED_SIZE, INSTANCE_CODE_SIZE, "
"INSTANCE_SHARED_MEMORY_ALLOCATED_SIZE "
"FROM M_HOST_RESOURCE_UTILIZATION"
)
host_columns = [desc[0] for desc in cursor.description]
host_rows = cursor.fetchall()
cursor.execute(
"SELECT HOST, PORT, SERVICE_NAME, HEAP_MEMORY_ALLOCATED_SIZE, "
"HEAP_MEMORY_USED_SIZE, SHARED_MEMORY_ALLOCATED_SIZE, "
"SHARED_MEMORY_USED_SIZE, TOTAL_MEMORY_USED_SIZE, "
"EFFECTIVE_ALLOCATION_LIMIT "
"FROM M_SERVICE_MEMORY ORDER BY TOTAL_MEMORY_USED_SIZE DESC"
)
svc_columns = [desc[0] for desc in cursor.description]
svc_rows = cursor.fetchall()
except dbapi.Error as e:
return f"HANA Error ({e.errorcode}): {e.errortext}"
except Exception as e:
return f"Error: {e}"
lines = ["## Host Memory Overview\n"]
for row in host_rows:
host = dict(zip(host_columns, row))
lines.append(f"### Host: {host['HOST']}")
lines.append(f"- Used Physical Memory: {format_bytes(host['USED_PHYSICAL_MEMORY'])}")
lines.append(f"- Free Physical Memory: {format_bytes(host['FREE_PHYSICAL_MEMORY'])}")
lines.append(f"- Allocation Limit: {format_bytes(host['ALLOCATION_LIMIT'])}")
lines.append(f"- Instance Used: {format_bytes(host['INSTANCE_TOTAL_MEMORY_USED_SIZE'])}")
lines.append(f"- Instance Peak Used: {format_bytes(host['INSTANCE_TOTAL_MEMORY_PEAK_USED_SIZE'])}")
lines.append(f"- Instance Allocated: {format_bytes(host['INSTANCE_TOTAL_MEMORY_ALLOCATED_SIZE'])}")
lines.append(f"- Code Size: {format_bytes(host['INSTANCE_CODE_SIZE'])}")
lines.append(f"- Shared Memory: {format_bytes(host['INSTANCE_SHARED_MEMORY_ALLOCATED_SIZE'])}")
used = host['INSTANCE_TOTAL_MEMORY_USED_SIZE']
limit = host['ALLOCATION_LIMIT']
if used and limit and limit > 0:
pct = used / limit * 100
lines.append(f"- **Utilization: {pct:.1f}%**")
lines.append("")
lines.append("## Service Memory Breakdown\n")
lines.append(f"{'Service':<25} {'Host':<20} {'Port':<7} {'Used':>12} {'Heap Alloc':>12} {'Heap Used':>12} {'Limit':>12}")
lines.append("-" * 110)
for row in svc_rows:
svc = dict(zip(svc_columns, row))
lines.append(
f"{svc['SERVICE_NAME']:<25} "
f"{svc['HOST']:<20} "
f"{svc['PORT']:<7} "
f"{format_bytes(svc['TOTAL_MEMORY_USED_SIZE']):>12} "
f"{format_bytes(svc['HEAP_MEMORY_ALLOCATED_SIZE']):>12} "
f"{format_bytes(svc['HEAP_MEMORY_USED_SIZE']):>12} "
f"{format_bytes(svc['EFFECTIVE_ALLOCATION_LIMIT']):>12}"
)
return "\n".join(lines)

View File

@@ -0,0 +1,85 @@
from __future__ import annotations
from hdbcli import dbapi
from mcp.server.fastmcp import Context
from hana_performance_mcp.formatting import format_bytes, format_duration, truncate
from hana_performance_mcp.server import get_pool
async def get_plan_cache(
limit: int = 20,
min_avg_time_ms: float = 0,
statement_filter: str | None = None,
ctx: Context = None,
) -> str:
"""Inspect the SAP HANA SQL plan cache for cached execution plans.
Args:
limit: Maximum number of entries to return (max 100).
min_avg_time_ms: Minimum average execution time filter in milliseconds.
statement_filter: Optional substring filter on statement text (case-insensitive).
"""
limit = max(1, min(limit, 100))
min_avg_time_us = int(min_avg_time_ms * 1000)
pool = get_pool(ctx)
try:
with pool.get_cursor() as cursor:
cursor.execute(
"SELECT PLAN_ID, STATEMENT_STRING, EXECUTION_COUNT, "
"TOTAL_EXECUTION_TIME, AVG_EXECUTION_TIME, "
"TOTAL_LOCK_WAIT_DURATION, TOTAL_RESULT_RECORD_COUNT, "
"LAST_EXECUTION_TIMESTAMP, IS_DISTRIBUTED_EXECUTION, "
"REFERENCE_COUNT, TOTAL_PREPARATION_TIME, "
"STATEMENT_HASH, USER_NAME, SCHEMA_NAME "
"FROM M_SQL_PLAN_CACHE "
"WHERE AVG_EXECUTION_TIME >= ? "
"AND (? IS NULL OR UPPER(STATEMENT_STRING) LIKE '%' || UPPER(?) || '%') "
"ORDER BY TOTAL_EXECUTION_TIME DESC "
"LIMIT ?",
(min_avg_time_us, statement_filter, statement_filter, limit),
)
columns = [desc[0] for desc in cursor.description]
rows = cursor.fetchall()
except dbapi.Error as e:
return f"HANA Error ({e.errorcode}): {e.errortext}"
except Exception as e:
return f"Error: {e}"
if not rows:
return "No plan cache entries found matching the criteria."
lines = [f"## SQL Plan Cache (top {len(rows)} by total execution time)\n"]
for i, row in enumerate(rows, 1):
entry = dict(zip(columns, row))
sql = truncate(entry["STATEMENT_STRING"] or "")
total_time = format_duration(entry["TOTAL_EXECUTION_TIME"])
avg_time = format_duration(entry["AVG_EXECUTION_TIME"])
exec_count = entry["EXECUTION_COUNT"] if entry["EXECUTION_COUNT"] is not None else "N/A"
total_rows = entry["TOTAL_RESULT_RECORD_COUNT"]
exec_ct = entry["EXECUTION_COUNT"]
if total_rows is not None and exec_ct:
avg_rows = total_rows // exec_ct
else:
avg_rows = "N/A"
lock_wait = format_duration(entry["TOTAL_LOCK_WAIT_DURATION"])
prep_time = format_duration(entry["TOTAL_PREPARATION_TIME"])
last_exec = entry["LAST_EXECUTION_TIMESTAMP"] or "N/A"
distributed = "Yes" if entry["IS_DISTRIBUTED_EXECUTION"] == "TRUE" else "No"
user = entry["USER_NAME"] or "N/A"
schema = entry["SCHEMA_NAME"] or "N/A"
plan_id = entry["PLAN_ID"] if entry["PLAN_ID"] is not None else "N/A"
stmt_hash = entry["STATEMENT_HASH"] or "N/A"
lines.append(f"### {i}. Executions: {exec_count} | Total: {total_time} | Avg: {avg_time}")
lines.append(f"- Plan ID: {plan_id} | Hash: {stmt_hash}")
lines.append(f"- User: {user} | Schema: {schema}")
lines.append(f"- Avg Rows: {avg_rows} | Lock Wait: {lock_wait} | Prep: {prep_time}")
lines.append(f"- Distributed: {distributed} | Last Exec: {last_exec}")
lines.append(f"- SQL: `{sql}`")
lines.append("")
return "\n".join(lines)

View File

@@ -0,0 +1,142 @@
from __future__ import annotations
from hdbcli import dbapi
from mcp.server.fastmcp import Context
from hana_performance_mcp.formatting import format_bytes
from hana_performance_mcp.server import get_pool
async def get_table_statistics(
schema_name: str,
table_name: str | None = None,
ctx: Context = None,
) -> str:
"""Retrieve SAP HANA table statistics for a schema, or column-level detail for a specific table.
Args:
schema_name: The database schema to inspect.
table_name: Optional table name. If provided, returns column-level detail.
If omitted, returns an overview of the top 50 tables by size.
"""
pool = get_pool(ctx)
try:
if table_name is None:
return _schema_overview(pool, schema_name)
return _table_detail(pool, schema_name, table_name)
except dbapi.Error as e:
return f"HANA Error ({e.errorcode}): {e.errortext}"
except Exception as e:
return f"Error: {e}"
def _schema_overview(pool, schema_name: str) -> str:
with pool.get_cursor() as cursor:
cursor.execute(
"SELECT TABLE_NAME, TABLE_TYPE, IS_COLUMN_TABLE, RECORD_COUNT, "
"TABLE_SIZE, IS_PARTITIONED, READ_COUNT, WRITE_COUNT, LOADED "
"FROM M_TABLES WHERE SCHEMA_NAME = ? "
"ORDER BY TABLE_SIZE DESC LIMIT 50",
(schema_name,),
)
columns = [desc[0] for desc in cursor.description]
rows = cursor.fetchall()
if not rows:
return f"No tables found in schema '{schema_name}'."
lines = [
f"## Table Statistics for Schema: {schema_name} (top {len(rows)} by size)\n"
]
lines.append(
f"{'Table':<40} {'Type':<8} {'Col?':<5} {'Rows':>12} "
f"{'Size':>12} {'Part?':<6} {'Reads':>10} {'Writes':>10} {'Loaded':<8}"
)
lines.append("-" * 125)
for row in rows:
t = dict(zip(columns, row))
name = t["TABLE_NAME"] or "N/A"
if len(name) > 38:
name = name[:35] + "..."
ttype = t["TABLE_TYPE"] or "N/A"
col = "Yes" if t["IS_COLUMN_TABLE"] == "TRUE" else "No"
records = t["RECORD_COUNT"] if t["RECORD_COUNT"] is not None else 0
size = format_bytes(t["TABLE_SIZE"])
part = "Yes" if t["IS_PARTITIONED"] == "TRUE" else "No"
reads = t["READ_COUNT"] if t["READ_COUNT"] is not None else 0
writes = t["WRITE_COUNT"] if t["WRITE_COUNT"] is not None else 0
loaded = t["LOADED"] or "N/A"
lines.append(
f"{name:<40} {ttype:<8} {col:<5} {records:>12} "
f"{size:>12} {part:<6} {reads:>10} {writes:>10} {loaded:<8}"
)
return "\n".join(lines)
def _table_detail(pool, schema_name: str, table_name: str) -> str:
with pool.get_cursor() as cursor:
cursor.execute(
"SELECT TABLE_NAME, TABLE_TYPE, IS_COLUMN_TABLE, RECORD_COUNT, "
"TABLE_SIZE, IS_PARTITIONED, READ_COUNT, WRITE_COUNT, LOADED "
"FROM M_TABLES WHERE SCHEMA_NAME = ? AND TABLE_NAME = ?",
(schema_name, table_name),
)
table_cols = [desc[0] for desc in cursor.description]
table_row = cursor.fetchone()
if not table_row:
return f"Table '{schema_name}.{table_name}' not found."
cursor.execute(
"SELECT COLUMN_NAME, DATA_TYPE_NAME, DISTINCT_COUNT, "
"MEMORY_SIZE_IN_TOTAL, COMPRESSION_TYPE, LOADED, "
"INTERNAL_ATTRIBUTE_TYPE "
"FROM M_CS_COLUMNS "
"WHERE SCHEMA_NAME = ? AND TABLE_NAME = ? "
"ORDER BY MEMORY_SIZE_IN_TOTAL DESC",
(schema_name, table_name),
)
col_columns = [desc[0] for desc in cursor.description]
col_rows = cursor.fetchall()
t = dict(zip(table_cols, table_row))
lines = [f"## Table Detail: {schema_name}.{table_name}\n"]
lines.append(f"- Type: {t['TABLE_TYPE']} | Column Store: {'Yes' if t['IS_COLUMN_TABLE'] == 'TRUE' else 'No'}")
lines.append(f"- Records: {t['RECORD_COUNT']} | Size: {format_bytes(t['TABLE_SIZE'])}")
lines.append(f"- Partitioned: {'Yes' if t['IS_PARTITIONED'] == 'TRUE' else 'No'} | Loaded: {t['LOADED'] or 'N/A'}")
lines.append(f"- Reads: {t['READ_COUNT']} | Writes: {t['WRITE_COUNT']}")
if not col_rows:
lines.append("\nNo column statistics available (table may not be column-store).")
return "\n".join(lines)
lines.append(f"\n### Column Details ({len(col_rows)} columns)\n")
lines.append(
f"{'Column':<35} {'Data Type':<18} {'Distinct':>10} "
f"{'Memory':>12} {'Compression':<15} {'Loaded':<8} {'Attr Type':<10}"
)
lines.append("-" * 120)
for row in col_rows:
c = dict(zip(col_columns, row))
name = c["COLUMN_NAME"] or "N/A"
if len(name) > 33:
name = name[:30] + "..."
dtype = c["DATA_TYPE_NAME"] or "N/A"
distinct = c["DISTINCT_COUNT"] if c["DISTINCT_COUNT"] is not None else "N/A"
mem = format_bytes(c["MEMORY_SIZE_IN_TOTAL"])
compression = c["COMPRESSION_TYPE"] or "N/A"
loaded = c["LOADED"] or "N/A"
attr_type = c["INTERNAL_ATTRIBUTE_TYPE"] or "N/A"
lines.append(
f"{name:<35} {dtype:<18} {distinct:>10} "
f"{mem:>12} {compression:<15} {loaded:<8} {attr_type:<10}"
)
return "\n".join(lines)