import os
import sqlite3
import shutil
from dotenv import load_dotenv
from pathlib import Path

# LangChain Core
from langchain.agents import create_agent

# LangChain LLMs
from langchain_openai import ChatOpenAI

# LangChain SQL
from langchain_community.utilities import SQLDatabase
from langchain_community.agent_toolkits import SQLDatabaseToolkit

# LangChain RAG (Vector Store)
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.tools import tool

# Loads all variables from the .env file
load_dotenv()

# Confirmation that keys were loaded correctly
print(f"OpenAI Key Loaded: {'OPENAI_API_KEY' in os.environ}")
print(f"LangSmith Key Loaded: {'LANGSMITH_API_KEY' in os.environ}")

OpenAI Key Loaded: True
LangSmith Key Loaded: True

# Load documents
documents = []
for md_file in Path('./knowledge_base/').glob("*.md"):
    loader = TextLoader(str(md_file), encoding='utf-8')
    documents.extend(loader.load())

# Split documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)

# Load embedding model
print("Loading embedding model (this may take a moment)...")
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Create and save the vector store
print("Creating and saving vector store...")
vector_store = FAISS.from_documents(docs, embeddings)
vector_store.save_local("faiss_index")

print("Vector store 'faiss_index' created and saved successfully.")

Loading embedding model (this may take a moment)...
Creating and saving vector store...
Creating and saving vector store...
Vector store 'faiss_index' created and saved successfully.
Vector store 'faiss_index' created and saved successfully.

# Define the source path and the target path
source_db_path = os.path.join('data', 'northwind-SQLite3-0.1.0', 'Northwind_small.sqlite')
target_db_name = 'northwind.db'

# Check if the target file already exists
if not os.path.exists(target_db_name):
    print(f"Database '{target_db_name}' not found.")
    
    # Check if the source file exists
    if os.path.exists(source_db_path):
        print(f"Copying '{source_db_path}' to '{target_db_name}'...")
        shutil.copy(source_db_path, target_db_name)
        print(f"Database '{target_db_name}' is ready.")
    else:
        print(f"ERROR: Source file not found at '{source_db_path}'.")
        print("Please ensure the file path is correct based on your project structure.")
else:
    print(f"Database '{target_db_name}' already exists. Skipping copy.")

Database 'northwind.db' already exists. Skipping copy.

# Initialise the LLM
# This assumes your OPENAI_API_KEY is loaded in your environment
llm = ChatOpenAI(model="gpt-4o", temperature=0)

# Initialise the SQLDatabase connection
db_uri = f"sqlite:///{target_db_name}"
db = SQLDatabase.from_uri(db_uri)

# Defining the RAG tool
@tool
def search_company_policies(query: str) -> str:
    """
    Searches the company knowledge base for policies on HR, IT, and general company guidelines.
    Use this for any questions about vacation, sick leave, hardware requests, or work-from-home policies.
    """
    docs = vector_store.similarity_search(query, k=3) # Get top 3 results
    
    if not docs:
        return "No relevant company policies were found."
        
    # Format the results
    return "\n---\n".join([f"Source: {doc.metadata.get('source', 'N/A')}\nContent: {doc.page_content}" for doc in docs])

# Initialise the toolkit
sql_toolkit = SQLDatabaseToolkit(db=db, llm=llm)

# Get the list of tools from the toolkit
sql_tools = sql_toolkit.get_tools()

# Inspecting the created tools
print(f"SQL Tools Created: {[tool.name for tool in sql_tools]}")

SQL Tools Created: ['sql_db_query', 'sql_db_schema', 'sql_db_list_tables', 'sql_db_query_checker']

# Merging the tools into one list
tools = [search_company_policies] + sql_tools

# Creating the agent
agent_executor = create_agent(
    llm,
    tools,
    system_prompt="You are a helpful Enterprise Assistant. You have access to two types of tools: "
                  "1. A tool for searching company policies (HR, IT, etc.). "
                  "2. A set of tools for querying the company's Northwind SQL database."
)

print("Enterprise Assistant agent created successfully!")
print(f"Agent has access to {len(tools)} tools: {[tool.name for tool in tools]}")

Enterprise Assistant agent created successfully!
Agent has access to 5 tools: ['search_company_policies', 'sql_db_query', 'sql_db_schema', 'sql_db_list_tables', 'sql_db_query_checker']

# Test 1: Ask a RAG (policy) question
question_rag = "How many sick days do I get per year?"
response_rag = agent_executor.invoke({
    "messages": [
        {"role": "user", "content": question_rag}
    ]
})
print("--- RAG Response ---")
print(f"Question: {question_rag}")
print(f"Answer: {response_rag['messages'][-1].content}")

--- RAG Response ---
Question: How many sick days do I get per year?
Answer: You are entitled to up to 10 working days of full pay for sick leave in a rolling 12-month period, provided you have passed your probation period. After that, Statutory Sick Pay (SSP) is payable for up to 28 weeks, subject to qualifying conditions.

# Test 2: Ask a SQL (database) question
question_sql = "Which employee has the most sales? Show their name and total sales amount."
response_sql = agent_executor.invoke({
    "messages": [
        {"role": "user", "content": question_sql}
    ]
})
print("--- SQL Response ---")
print(f"Question: {question_sql}")
print(f"Answer: {response_sql['messages'][-1].content}")

--- SQL Response ---
Question: Which employee has the most sales? Show their name and total sales amount.
Answer: The employee with the most sales is Margaret Peacock, with a total sales amount of 232,890.85.

# Verifying SQL Answer
sql_query = """
SELECT
    e."FirstName" || ' ' || e."LastName" AS EmployeeName,
    SUM(od."UnitPrice" * od."Quantity" * (1 - od."Discount")) AS TotalSales
FROM "Employee" e
JOIN "Order" o ON e."Id" = o."EmployeeId"
JOIN "OrderDetail" od ON o."Id" = od."OrderId"
GROUP BY e."Id"
ORDER BY TotalSales DESC
LIMIT 1;
"""

print(f"Executing manual query on '{target_db_name}'...")
conn = None
try:
    conn = sqlite3.connect(target_db_name)
    cursor = conn.cursor()
    cursor.execute(sql_query)
    result = cursor.fetchone()
    
    print("\n---Manual SQL: Top Salesperson---")
    if result:
        print(f"Employee: {result[0]}, Total Sales: {result[1]:.2f}")
    else:
        print("No data returned from manual query.")

except sqlite3.Error as e:
    print(f"An error occurred: {e}")
finally:
    if conn:
        conn.close()

Executing manual query on 'northwind.db'...

---Manual SQL: Top Salesperson---
Employee: Margaret Peacock, Total Sales: 232890.85

# Test 3: Ask a complex, hybrid question
question_hybrid = "What is the work-from-home policy for our top salesperson, Margaret Peacock?"
response_hybrid = agent_executor.invoke({
    "messages": [
        {"role": "user", "content": question_hybrid}
    ]
})
print("--- Hybrid Response ---")
print(f"Question: {question_hybrid}")
print(f"Answer: {response_hybrid['messages'][-1].content}")

--- Hybrid Response ---
Question: What is the work-from-home policy for our top salesperson, Margaret Peacock?
Answer: The work-from-home policy at Northwind Traders allows roles deemed suitable for hybrid working, such as administrative, finance, HR, and IT, to work from home up to two days per week. Core office days are Tuesday, Wednesday, and Thursday, which are designated for collaboration. Employees must ensure they have a suitable, private workspace and a stable internet connection when working remotely.

Margaret Peacock, our top salesperson, holds the title of "Sales Representative." As a sales representative, she may be eligible for the hybrid working model, allowing her to work from home up to two days per week, depending on departmental policies and her specific role requirements.

# --- SQL Query: Find Margaret Peacock's title ---
print("---SQL Verification: Margaret Peacock's Title---")
sql_query_margaret = """
SELECT "FirstName", "LastName", "Title"
FROM "Employee"
WHERE "FirstName" = 'Margaret' AND "LastName" = 'Peacock';
"""

conn = None
try:
    conn = sqlite3.connect(target_db_name)
    cursor = conn.cursor()
    cursor.execute(sql_query_margaret)
    result = cursor.fetchone()
    
    if result:
        print(f"Employee: {result[0]} {result[1]}")
        print(f"Title: {result[2]}")
    else:
        print("No employee found with name 'Margaret Peacock'.")
except sqlite3.Error as e:
    print(f"An error occurred: {e}")
finally:
    if conn:
        conn.close()

---SQL Verification: Margaret Peacock's Title---
Employee: Margaret Peacock
Title: Sales Representative

Northwind Enterprise Agent Notebook¶

Required libraries¶

1 Ingesting Documents to Vector Store¶

2 Preparing the Northwind Database¶

3 Initialising LLM and Database Connection¶

4 Creating the Agent's Tools (RAG + SQL)¶

4.1 The RAG Tool¶

4.2 The SQL Tool¶

5 Creating the Main Enterprise Agent¶

6 Testing the Agent¶

6.1 First Test: RAG Query (Policy Lookup)¶

6.2 Second Test: SQL Query (Database Lookup)¶

6.3 Third Test: Hybrid Query (Combined Sources)¶

7 Conclusion¶

8 References¶