Skip to content

N+1 Query Elimination

Problem: The ScoopJoy franchise dashboard takes seconds to load. Each active outlet triggers its own queries for metrics, top items, and owner details, so the page fires 50+ database round-trips. You need to bring that down to a handful of queries regardless of how many outlets exist.

Solution: Replace iterative frappe.get_doc calls inside a loop with a few batch queries — JOINs and GROUP BY aggregations — then stitch the results together in Python using lookup dictionaries for O(1) access.

The slow version runs 1 + 4N queries: one to list outlets, then four per outlet inside the loop (full document load, invoice count, revenue sum, owner lookup). With 50 outlets that is 201 queries.

scoopjoy/scoopjoy/api/dashboard_slow.py
# BAD -- DO NOT USE -- shown for comparison
import frappe
from frappe.utils import today, get_first_day
@frappe.whitelist()
def get_dashboard_data_slow():
"""This makes 1 + (N * 4) queries for N outlets. With 50 outlets = 201 queries."""
start = frappe.utils.time.time()
month_start = get_first_day(today())
outlets = frappe.get_all("Franchise Outlet", filters={"is_active": 1}) # Query 1
result = []
for outlet in outlets: # Loop: N iterations
# Query 2 per outlet: full document load (fetches ALL fields + child tables)
doc = frappe.get_doc("Franchise Outlet", outlet.name)
# Query 3 per outlet: count invoices
order_count = frappe.db.count("Sales Invoice", filters={
"franchise_outlet": outlet.name,
"docstatus": 1,
"posting_date": [">=", month_start],
})
# Query 4 per outlet: sum revenue
revenue = frappe.db.sql("""
SELECT SUM(grand_total) FROM `tabSales Invoice`
WHERE franchise_outlet = %s AND docstatus = 1 AND posting_date >= %s
""", (outlet.name, month_start))[0][0] or 0
# Query 5 per outlet: get owner name
owner_name = frappe.db.get_value("Supplier", doc.franchise_owner, "supplier_name")
result.append({
"outlet": doc.name,
"outlet_name": doc.outlet_name,
"city": doc.city,
"owner": owner_name,
"orders": order_count,
"revenue": revenue,
})
elapsed = frappe.utils.time.time() - start
frappe.logger().info(f"Dashboard SLOW: {len(outlets)} outlets, {elapsed:.2f}s")
# Typical: 50 outlets = 201 queries, ~2.5 seconds
return result

The fast version is three batch queries no matter how many outlets you have: a JOIN for outlet data plus owner name, a GROUP BY aggregation for sales metrics, and a single window-function query for the top item per outlet. The loop at the end only walks dictionaries — zero extra database calls.

scoopjoy/scoopjoy/api/dashboard_fast.py
import frappe
from frappe.utils import today, get_first_day
from frappe.query_builder.functions import Sum, Count
@frappe.whitelist()
def get_dashboard_data_fast():
"""3 queries total regardless of outlet count."""
start = frappe.utils.time.time()
month_start = get_first_day(today())
# ---- Query 1: All outlet data + owner name in a single JOIN ----
Outlet = frappe.qb.DocType("Franchise Outlet")
Supplier = frappe.qb.DocType("Supplier")
outlets = (
frappe.qb.from_(Outlet)
.left_join(Supplier).on(Outlet.franchise_owner == Supplier.name)
.select(
Outlet.name,
Outlet.outlet_name,
Outlet.city,
Outlet.region,
Outlet.monthly_target,
Supplier.supplier_name.as_("owner_name"),
)
.where(Outlet.is_active == 1)
).run(as_dict=True)
# ---- Query 2: All sales metrics in one aggregation ----
SI = frappe.qb.DocType("Sales Invoice")
sales_data = (
frappe.qb.from_(SI)
.select(
SI.franchise_outlet,
Count(SI.name).as_("order_count"),
Sum(SI.grand_total).as_("total_revenue"),
)
.where(SI.docstatus == 1)
.where(SI.posting_date >= month_start)
.groupby(SI.franchise_outlet)
).run(as_dict=True)
# Build lookup dictionary: O(1) access per outlet
sales_lookup = {row.franchise_outlet: row for row in sales_data}
# ---- Query 3: Top item per outlet (single query with window function) ----
top_items_raw = frappe.db.sql("""
SELECT franchise_outlet, item_name, total_qty
FROM (
SELECT
si.franchise_outlet,
sii.item_name,
SUM(sii.qty) AS total_qty,
ROW_NUMBER() OVER (
PARTITION BY si.franchise_outlet
ORDER BY SUM(sii.qty) DESC
) AS rn
FROM `tabSales Invoice Item` sii
JOIN `tabSales Invoice` si ON sii.parent = si.name
WHERE si.docstatus = 1 AND si.posting_date >= %(month_start)s
GROUP BY si.franchise_outlet, sii.item_name
) ranked WHERE rn = 1
""", {"month_start": month_start}, as_dict=True)
top_items_lookup = {row.franchise_outlet: row.item_name for row in top_items_raw}
# ---- Combine in Python (zero additional queries) ----
result = []
for outlet in outlets:
sales = sales_lookup.get(outlet.name, frappe._dict())
result.append({
"outlet": outlet.name,
"outlet_name": outlet.outlet_name,
"city": outlet.city,
"owner": outlet.owner_name,
"orders": sales.get("order_count", 0),
"revenue": sales.get("total_revenue", 0),
"target": outlet.monthly_target,
"top_item": top_items_lookup.get(outlet.name, "N/A"),
})
elapsed = frappe.utils.time.time() - start
frappe.logger().info(f"Dashboard FAST: {len(outlets)} outlets, {elapsed:.2f}s")
# Typical: 50 outlets = 3 queries, ~0.08 seconds (30x faster)
return result

The three building blocks: the left_join folds the owner’s supplier_name into the outlet row so there is no per-outlet get_value; the GROUP BY aggregation returns one row of metrics per outlet, keyed into sales_lookup for O(1) access; and the ROW_NUMBER() OVER (PARTITION BY ...) window function picks the single top item per outlet in one pass instead of N separate “top item” queries.

TechniqueBefore (slow)After (fast)
Outlet datafrappe.get_doc() per rowSingle frappe.qb query with JOIN
Sales metricsfrappe.db.count() + frappe.db.sql() per rowSingle GROUP BY query + lookup dict
Top itemsNot shown (would be another N queries)Single window function query + lookup dict
Owner namesfrappe.db.get_value() per rowLEFT JOIN in outlet query
Total queries1 + 4N (201 for 50 outlets)3 (constant)
Typical time~2.5s~0.08s

You can also see the two endpoints flipped against each other:

scoopjoy/scoopjoy/api/dashboard_slow.py
for outlet in outlets: # Loop: N iterations
doc = frappe.get_doc("Franchise Outlet", outlet.name) # +1 query
order_count = frappe.db.count("Sales Invoice", filters={...}) # +1 query
revenue = frappe.db.sql("SELECT SUM(grand_total) ...") # +1 query
owner_name = frappe.db.get_value("Supplier", ..., "supplier_name") # +1 query

Cache singleton settings with get_cached_doc

Section titled “Cache singleton settings with get_cached_doc”

For singleton settings documents, reach for frappe.get_cached_doc instead of frappe.get_doc. The cached variant hits Redis (and auto-invalidates on save), so repeat reads never touch the database.

scoopjoy/scoopjoy/api/dashboard_fast.py
def get_franchise_settings():
"""
Use get_cached_doc for singleton settings documents.
This hits Redis, not the database, on subsequent calls.
"""
# GOOD: cached in Redis, auto-invalidated on save
settings = frappe.get_cached_doc("Franchise Settings")
# BAD: hits the database every time
# settings = frappe.get_doc("Franchise Settings")
return {
"royalty_rate": settings.default_royalty_rate,
"minimum_guarantee": settings.minimum_guarantee,
"reporting_currency": settings.reporting_currency,
}

Child-table data is another classic N+1 trap. Both frappe.get_all (with dotted field names) and frappe.qb can pull parent and child rows in a single query.

scoopjoy/scoopjoy/api/dashboard_fast.py
def get_outlets_with_operating_hours():
"""
Fetch parent + child table data without N+1.
frappe.get_all supports fetching linked document fields directly.
"""
# GOOD: Single query with child table fields
outlets = frappe.get_all(
"Franchise Outlet",
filters={"is_active": 1},
fields=[
"name", "outlet_name", "city",
# Pull fields from child table via dot notation
"`tabOutlet Operating Hours`.day as operating_day",
"`tabOutlet Operating Hours`.opening_time",
"`tabOutlet Operating Hours`.closing_time",
],
)
# Or use frappe.qb for more control:
Outlet = frappe.qb.DocType("Franchise Outlet")
Hours = frappe.qb.DocType("Outlet Operating Hours")
data = (
frappe.qb.from_(Outlet)
.left_join(Hours).on(Hours.parent == Outlet.name)
.select(
Outlet.name,
Outlet.outlet_name,
Hours.day,
Hours.opening_time,
Hours.closing_time,
)
.where(Outlet.is_active == 1)
.orderby(Outlet.name)
.orderby(Hours.idx)
).run(as_dict=True)
return data