Prompt Engineering Code Reference
Complete code implementations for Chapter 6: Prompt Engineering.
Table of Contents
Few-Shot Learning
Dynamic Few-Shot Classifier
Complete implementation for retrieving relevant examples based on query similarity:
from sklearn.metrics.pairwise import cosine_similarity
class DynamicFewShotClassifier:
def __init__(self, examples: list[dict], embedding_model):
self.examples = examples
self.embedder = embedding_model
self.example_embeddings = self.embedder.encode([ex['input'] for ex in examples])
def get_examples(self, query: str, k: int = 5) -> list[dict]:
"""Retrieve k most similar examples to the query."""
query_embedding = self.embedder.encode([query])
similarities = cosine_similarity(query_embedding, self.example_embeddings)[0]
# Get top k, but ensure diversity in labels if classification
top_indices = similarities.argsort()[::-1]
selected = []
labels_seen = set()
for idx in top_indices:
if len(selected) >= k:
break
example = self.examples[idx]
# For classification: ensure label diversity
if example.get('label') not in labels_seen or len(selected) < k // 2:
selected.append(example)
labels_seen.add(example.get('label'))
return selected
def build_prompt(self, examples: list[dict], query: str) -> str:
"""Build few-shot prompt from examples."""
prompt_parts = ["Classify the following input based on these examples:\n"]
for ex in examples:
prompt_parts.append(f"Input: {ex['input']}")
prompt_parts.append(f"Label: {ex['label']}\n")
prompt_parts.append(f"Input: {query}")
prompt_parts.append("Label:")
return "\n".join(prompt_parts)
def classify(self, query: str) -> str:
"""Classify query using dynamically selected examples."""
examples = self.get_examples(query)
prompt = self.build_prompt(examples, query)
return call_llm(prompt)
# Usage example {.unnumbered}
examples = [
{"input": "Where is my order?", "label": "ORDER_STATUS"},
{"input": "I want a refund", "label": "RETURNS"},
{"input": "Is this in stock?", "label": "PRODUCT_INFO"},
# ... more examples
]
classifier = DynamicFewShotClassifier(examples, embedding_model)
result = classifier.classify("When will my package arrive?")Context Window Management
Context Manager
Complete implementation for allocating context budget across prompt components:
class ContextManager:
def __init__(self, max_tokens: int, tokenizer):
self.max_tokens = max_tokens
self.tokenizer = tokenizer
def allocate(
self,
system_prompt: str,
reserved_output: int,
conversation: list[dict],
documents: list[str]
) -> tuple[list[dict], list[str]]:
"""Allocate context budget across components."""
system_tokens = self.count(system_prompt)
available = self.max_tokens - system_tokens - reserved_output
# Allocate: 40% to conversation, 60% to documents (tune for your use case)
conversation_budget = int(available * 0.4)
document_budget = int(available * 0.6)
# Trim conversation (keep most recent)
trimmed_conversation = self.trim_conversation(conversation, conversation_budget)
# Trim documents (keep most relevant—assuming sorted by relevance)
trimmed_documents = self.trim_documents(documents, document_budget)
return trimmed_conversation, trimmed_documents
def trim_conversation(self, conversation: list[dict], budget: int) -> list[dict]:
"""Keep most recent messages that fit in budget."""
result = []
total = 0
for message in reversed(conversation):
tokens = self.count(message['content'])
if total + tokens > budget:
break
result.insert(0, message)
total += tokens
return result
def trim_documents(self, documents: list[str], budget: int) -> list[str]:
"""Keep most relevant documents that fit in budget."""
result = []
total = 0
for doc in documents:
tokens = self.count(doc)
if total + tokens > budget:
break
result.append(doc)
total += tokens
return result
def count(self, text: str) -> int:
"""Count tokens in text."""
return len(self.tokenizer.encode(text))
# Usage example {.unnumbered}
import tiktoken
tokenizer = tiktoken.encoding_for_model("gpt-4")
manager = ContextManager(max_tokens=8000, tokenizer=tokenizer)
trimmed_conv, trimmed_docs = manager.allocate(
system_prompt="You are a helpful assistant.",
reserved_output=1000,
conversation=conversation_history,
documents=retrieved_documents
)Conversation Summarization
Complete implementation for managing long conversations with summarization:
def manage_conversation_with_summary(
conversation: list[dict],
max_tokens: int,
summary_threshold: float = 0.7
):
"""Summarize old messages when conversation gets too long."""
total_tokens = sum(count_tokens(m['content']) for m in conversation)
if total_tokens < max_tokens * summary_threshold:
return conversation # Still fits
# Find split point: keep recent messages, summarize old
recent_budget = int(max_tokens * 0.6)
recent_messages = []
recent_tokens = 0
for message in reversed(conversation):
tokens = count_tokens(message['content'])
if recent_tokens + tokens > recent_budget:
break
recent_messages.insert(0, message)
recent_tokens += tokens
# Summarize older messages
old_messages = conversation[:-len(recent_messages)] if recent_messages else conversation
summary = summarize_conversation(old_messages)
# Return summary + recent messages
return [{"role": "system", "content": f"Previous conversation summary:\n{summary}"}] + recent_messages
def summarize_conversation(messages: list[dict]) -> str:
"""Generate a summary of conversation messages."""
conversation_text = "\n".join([
f"{m['role'].upper()}: {m['content']}"
for m in messages
])
prompt = f"""Summarize this conversation concisely, preserving key information:
{conversation_text}
Summary:"""
return call_llm(prompt, max_tokens=500)
def count_tokens(text: str) -> int:
"""Count tokens in text (implement with your tokenizer)."""
# Using tiktoken for OpenAI models
import tiktoken
enc = tiktoken.encoding_for_model("gpt-4")
return len(enc.encode(text))Document Chunking
Complete implementation for intelligent document chunking with overlap:
from nltk.tokenize import sent_tokenize
def chunk_document(text: str, chunk_size: int = 1000, overlap: int = 200) -> list[str]:
"""Chunk document with overlap for continuity."""
sentences = sent_tokenize(text)
chunks = []
current_chunk = []
current_size = 0
for sentence in sentences:
sentence_tokens = count_tokens(sentence)
if current_size + sentence_tokens > chunk_size and current_chunk:
chunks.append(' '.join(current_chunk))
# Keep overlap
overlap_size = 0
overlap_sentences = []
for s in reversed(current_chunk):
s_tokens = count_tokens(s)
if overlap_size + s_tokens > overlap:
break
overlap_sentences.insert(0, s)
overlap_size += s_tokens
current_chunk = overlap_sentences
current_size = overlap_size
current_chunk.append(sentence)
current_size += sentence_tokens
if current_chunk:
chunks.append(' '.join(current_chunk))
return chunks
def chunk_by_semantic_similarity(
text: str,
embedding_model,
similarity_threshold: float = 0.7,
max_chunk_size: int = 1000
) -> list[str]:
"""Chunk document based on semantic similarity between sentences."""
sentences = sent_tokenize(text)
embeddings = embedding_model.encode(sentences)
chunks = []
current_chunk = [sentences[0]]
current_size = count_tokens(sentences[0])
for i in range(1, len(sentences)):
# Check similarity with previous sentence
sim = cosine_similarity([embeddings[i-1]], [embeddings[i]])[0][0]
sentence_tokens = count_tokens(sentences[i])
# Start new chunk if similarity is low or size exceeded
if sim < similarity_threshold or current_size + sentence_tokens > max_chunk_size:
chunks.append(' '.join(current_chunk))
current_chunk = [sentences[i]]
current_size = sentence_tokens
else:
current_chunk.append(sentences[i])
current_size += sentence_tokens
if current_chunk:
chunks.append(' '.join(current_chunk))
return chunksAdvanced Prompting Techniques
ReAct Agent
Complete implementation for a ReAct (Reasoning + Acting) agent:
def react_agent(question: str, tools: dict[str, callable], max_steps: int = 10) -> str:
"""Simple ReAct implementation with reasoning and tool use."""
prompt = f"""Answer the following question using available tools.
Tools:
- search(query): Search for information
- calculate(expression): Evaluate a mathematical expression
Format:
Thought: [your reasoning]
Action: [tool_name]("[arguments]")
When you have the final answer, write:
Thought: [final reasoning]
Final Answer: [your answer]
Question: {question}
"""
for _ in range(max_steps):
response = call_llm(prompt)
prompt += response
# Check for final answer
if "Final Answer:" in response:
return response.split("Final Answer:")[-1].strip()
# Execute action
if "Action:" in response:
action_line = [l for l in response.split('\n') if l.startswith("Action:")][0]
tool_name, args = parse_action(action_line)
if tool_name in tools:
observation = tools[tool_name](args)
prompt += f"\nObservation: {observation}\n\n"
else:
prompt += f"\nObservation: Unknown tool: {tool_name}\n\n"
return "Max steps reached without final answer"
def parse_action(action_line: str) -> tuple[str, str]:
"""Parse action line to extract tool name and arguments."""
# Format: Action: tool_name("arguments")
import re
match = re.match(r'Action:\s*(\w+)\("([^"]*)"\)', action_line)
if match:
return match.group(1), match.group(2)
return None, None
# Example tools {.unnumbered}
def search(query: str) -> str:
"""Simulated search function."""
# In production, this would call a search API
return f"Search results for '{query}': ..."
def calculate(expression: str) -> str:
"""Safe mathematical expression evaluator."""
try:
# Only allow safe mathematical operations
allowed_chars = set('0123456789+-*/.() ')
if all(c in allowed_chars for c in expression):
return str(eval(expression))
return "Invalid expression"
except Exception as e:
return f"Error: {e}"
# Usage example {.unnumbered}
tools = {
"search": search,
"calculate": calculate
}
answer = react_agent(
"What is the elevation difference between the highest and lowest points in France?",
tools
)Document Analysis Chain
Complete implementation for multi-step document analysis using prompt chaining:
def analyze_document_chain(document: str) -> dict:
"""Analyze a document through a chain of prompts."""
# Step 1: Extract key entities
entities_prompt = f"""Extract all named entities from this document.
List each entity with its type (PERSON, ORGANIZATION, LOCATION, DATE).
Document:
{document}
Entities:"""
entities = call_llm(entities_prompt)
# Step 2: Summarize main themes
summary_prompt = f"""Summarize the main themes of this document in 3-5 bullet points.
Document:
{document}
Main themes:"""
themes = call_llm(summary_prompt)
# Step 3: Identify sentiment
sentiment_prompt = f"""What is the overall sentiment of this document?
Rate from 1 (very negative) to 5 (very positive) and explain briefly.
Document:
{document}
Sentiment analysis:"""
sentiment = call_llm(sentiment_prompt)
# Step 4: Generate comprehensive analysis (using previous outputs)
final_prompt = f"""Given the following analysis components, write a comprehensive
2-paragraph analysis of the document.
Document excerpt (first 500 chars): {document[:500]}...
Entities found: {entities}
Main themes: {themes}
Sentiment analysis: {sentiment}
Comprehensive analysis:"""
analysis = call_llm(final_prompt)
return {
"entities": entities,
"themes": themes,
"sentiment": sentiment,
"analysis": analysis
}
def analyze_document_parallel(document: str) -> dict:
"""Analyze document with parallel first-stage prompts for efficiency."""
import asyncio
async def run_parallel_analysis():
# Run independent analyses in parallel
entities_task = asyncio.create_task(
async_call_llm(f"Extract entities from:\n{document}")
)
themes_task = asyncio.create_task(
async_call_llm(f"Summarize themes from:\n{document}")
)
sentiment_task = asyncio.create_task(
async_call_llm(f"Analyze sentiment of:\n{document}")
)
entities, themes, sentiment = await asyncio.gather(
entities_task, themes_task, sentiment_task
)
# Final synthesis depends on previous results
analysis = await async_call_llm(
f"Synthesize analysis from:\nEntities: {entities}\n"
f"Themes: {themes}\nSentiment: {sentiment}"
)
return {
"entities": entities,
"themes": themes,
"sentiment": sentiment,
"analysis": analysis
}
return asyncio.run(run_parallel_analysis())
# Usage example {.unnumbered}
document = """
Apple Inc. announced today that CEO Tim Cook will present the company's
new product lineup at an event in Cupertino on September 12, 2024.
The event is expected to feature the iPhone 16 and updates to the
Apple Watch series...
"""
analysis = analyze_document_chain(document)
print(f"Entities: {analysis['entities']}")
print(f"Themes: {analysis['themes']}")