Prompt Engineering Code Reference

Complete code implementations for Chapter 6: Prompt Engineering.


Table of Contents


Few-Shot Learning

Dynamic Few-Shot Classifier

Complete implementation for retrieving relevant examples based on query similarity:

from sklearn.metrics.pairwise import cosine_similarity

class DynamicFewShotClassifier:
    def __init__(self, examples: list[dict], embedding_model):
        self.examples = examples
        self.embedder = embedding_model
        self.example_embeddings = self.embedder.encode([ex['input'] for ex in examples])

    def get_examples(self, query: str, k: int = 5) -> list[dict]:
        """Retrieve k most similar examples to the query."""
        query_embedding = self.embedder.encode([query])
        similarities = cosine_similarity(query_embedding, self.example_embeddings)[0]

        # Get top k, but ensure diversity in labels if classification
        top_indices = similarities.argsort()[::-1]
        selected = []
        labels_seen = set()

        for idx in top_indices:
            if len(selected) >= k:
                break
            example = self.examples[idx]
            # For classification: ensure label diversity
            if example.get('label') not in labels_seen or len(selected) < k // 2:
                selected.append(example)
                labels_seen.add(example.get('label'))

        return selected

    def build_prompt(self, examples: list[dict], query: str) -> str:
        """Build few-shot prompt from examples."""
        prompt_parts = ["Classify the following input based on these examples:\n"]

        for ex in examples:
            prompt_parts.append(f"Input: {ex['input']}")
            prompt_parts.append(f"Label: {ex['label']}\n")

        prompt_parts.append(f"Input: {query}")
        prompt_parts.append("Label:")

        return "\n".join(prompt_parts)

    def classify(self, query: str) -> str:
        """Classify query using dynamically selected examples."""
        examples = self.get_examples(query)
        prompt = self.build_prompt(examples, query)
        return call_llm(prompt)


# Usage example {.unnumbered}
examples = [
    {"input": "Where is my order?", "label": "ORDER_STATUS"},
    {"input": "I want a refund", "label": "RETURNS"},
    {"input": "Is this in stock?", "label": "PRODUCT_INFO"},
    # ... more examples
]

classifier = DynamicFewShotClassifier(examples, embedding_model)
result = classifier.classify("When will my package arrive?")

Context Window Management

Context Manager

Complete implementation for allocating context budget across prompt components:

class ContextManager:
    def __init__(self, max_tokens: int, tokenizer):
        self.max_tokens = max_tokens
        self.tokenizer = tokenizer

    def allocate(
        self,
        system_prompt: str,
        reserved_output: int,
        conversation: list[dict],
        documents: list[str]
    ) -> tuple[list[dict], list[str]]:
        """Allocate context budget across components."""
        system_tokens = self.count(system_prompt)
        available = self.max_tokens - system_tokens - reserved_output

        # Allocate: 40% to conversation, 60% to documents (tune for your use case)
        conversation_budget = int(available * 0.4)
        document_budget = int(available * 0.6)

        # Trim conversation (keep most recent)
        trimmed_conversation = self.trim_conversation(conversation, conversation_budget)

        # Trim documents (keep most relevant—assuming sorted by relevance)
        trimmed_documents = self.trim_documents(documents, document_budget)

        return trimmed_conversation, trimmed_documents

    def trim_conversation(self, conversation: list[dict], budget: int) -> list[dict]:
        """Keep most recent messages that fit in budget."""
        result = []
        total = 0
        for message in reversed(conversation):
            tokens = self.count(message['content'])
            if total + tokens > budget:
                break
            result.insert(0, message)
            total += tokens
        return result

    def trim_documents(self, documents: list[str], budget: int) -> list[str]:
        """Keep most relevant documents that fit in budget."""
        result = []
        total = 0
        for doc in documents:
            tokens = self.count(doc)
            if total + tokens > budget:
                break
            result.append(doc)
            total += tokens
        return result

    def count(self, text: str) -> int:
        """Count tokens in text."""
        return len(self.tokenizer.encode(text))


# Usage example {.unnumbered}
import tiktoken

tokenizer = tiktoken.encoding_for_model("gpt-4")
manager = ContextManager(max_tokens=8000, tokenizer=tokenizer)

trimmed_conv, trimmed_docs = manager.allocate(
    system_prompt="You are a helpful assistant.",
    reserved_output=1000,
    conversation=conversation_history,
    documents=retrieved_documents
)

Conversation Summarization

Complete implementation for managing long conversations with summarization:

def manage_conversation_with_summary(
    conversation: list[dict],
    max_tokens: int,
    summary_threshold: float = 0.7
):
    """Summarize old messages when conversation gets too long."""
    total_tokens = sum(count_tokens(m['content']) for m in conversation)

    if total_tokens < max_tokens * summary_threshold:
        return conversation  # Still fits

    # Find split point: keep recent messages, summarize old
    recent_budget = int(max_tokens * 0.6)
    recent_messages = []
    recent_tokens = 0

    for message in reversed(conversation):
        tokens = count_tokens(message['content'])
        if recent_tokens + tokens > recent_budget:
            break
        recent_messages.insert(0, message)
        recent_tokens += tokens

    # Summarize older messages
    old_messages = conversation[:-len(recent_messages)] if recent_messages else conversation
    summary = summarize_conversation(old_messages)

    # Return summary + recent messages
    return [{"role": "system", "content": f"Previous conversation summary:\n{summary}"}] + recent_messages


def summarize_conversation(messages: list[dict]) -> str:
    """Generate a summary of conversation messages."""
    conversation_text = "\n".join([
        f"{m['role'].upper()}: {m['content']}"
        for m in messages
    ])

    prompt = f"""Summarize this conversation concisely, preserving key information:

{conversation_text}

Summary:"""

    return call_llm(prompt, max_tokens=500)


def count_tokens(text: str) -> int:
    """Count tokens in text (implement with your tokenizer)."""
    # Using tiktoken for OpenAI models
    import tiktoken
    enc = tiktoken.encoding_for_model("gpt-4")
    return len(enc.encode(text))

Document Chunking

Complete implementation for intelligent document chunking with overlap:

from nltk.tokenize import sent_tokenize

def chunk_document(text: str, chunk_size: int = 1000, overlap: int = 200) -> list[str]:
    """Chunk document with overlap for continuity."""
    sentences = sent_tokenize(text)
    chunks = []
    current_chunk = []
    current_size = 0

    for sentence in sentences:
        sentence_tokens = count_tokens(sentence)

        if current_size + sentence_tokens > chunk_size and current_chunk:
            chunks.append(' '.join(current_chunk))

            # Keep overlap
            overlap_size = 0
            overlap_sentences = []
            for s in reversed(current_chunk):
                s_tokens = count_tokens(s)
                if overlap_size + s_tokens > overlap:
                    break
                overlap_sentences.insert(0, s)
                overlap_size += s_tokens

            current_chunk = overlap_sentences
            current_size = overlap_size

        current_chunk.append(sentence)
        current_size += sentence_tokens

    if current_chunk:
        chunks.append(' '.join(current_chunk))

    return chunks


def chunk_by_semantic_similarity(
    text: str,
    embedding_model,
    similarity_threshold: float = 0.7,
    max_chunk_size: int = 1000
) -> list[str]:
    """Chunk document based on semantic similarity between sentences."""
    sentences = sent_tokenize(text)
    embeddings = embedding_model.encode(sentences)

    chunks = []
    current_chunk = [sentences[0]]
    current_size = count_tokens(sentences[0])

    for i in range(1, len(sentences)):
        # Check similarity with previous sentence
        sim = cosine_similarity([embeddings[i-1]], [embeddings[i]])[0][0]
        sentence_tokens = count_tokens(sentences[i])

        # Start new chunk if similarity is low or size exceeded
        if sim < similarity_threshold or current_size + sentence_tokens > max_chunk_size:
            chunks.append(' '.join(current_chunk))
            current_chunk = [sentences[i]]
            current_size = sentence_tokens
        else:
            current_chunk.append(sentences[i])
            current_size += sentence_tokens

    if current_chunk:
        chunks.append(' '.join(current_chunk))

    return chunks

Advanced Prompting Techniques

ReAct Agent

Complete implementation for a ReAct (Reasoning + Acting) agent:

def react_agent(question: str, tools: dict[str, callable], max_steps: int = 10) -> str:
    """Simple ReAct implementation with reasoning and tool use."""
    prompt = f"""Answer the following question using available tools.

Tools:
- search(query): Search for information
- calculate(expression): Evaluate a mathematical expression

Format:
Thought: [your reasoning]
Action: [tool_name]("[arguments]")

When you have the final answer, write:
Thought: [final reasoning]
Final Answer: [your answer]

Question: {question}

"""
    for _ in range(max_steps):
        response = call_llm(prompt)
        prompt += response

        # Check for final answer
        if "Final Answer:" in response:
            return response.split("Final Answer:")[-1].strip()

        # Execute action
        if "Action:" in response:
            action_line = [l for l in response.split('\n') if l.startswith("Action:")][0]
            tool_name, args = parse_action(action_line)

            if tool_name in tools:
                observation = tools[tool_name](args)
                prompt += f"\nObservation: {observation}\n\n"
            else:
                prompt += f"\nObservation: Unknown tool: {tool_name}\n\n"

    return "Max steps reached without final answer"


def parse_action(action_line: str) -> tuple[str, str]:
    """Parse action line to extract tool name and arguments."""
    # Format: Action: tool_name("arguments")
    import re
    match = re.match(r'Action:\s*(\w+)\("([^"]*)"\)', action_line)
    if match:
        return match.group(1), match.group(2)
    return None, None


# Example tools {.unnumbered}
def search(query: str) -> str:
    """Simulated search function."""
    # In production, this would call a search API
    return f"Search results for '{query}': ..."

def calculate(expression: str) -> str:
    """Safe mathematical expression evaluator."""
    try:
        # Only allow safe mathematical operations
        allowed_chars = set('0123456789+-*/.() ')
        if all(c in allowed_chars for c in expression):
            return str(eval(expression))
        return "Invalid expression"
    except Exception as e:
        return f"Error: {e}"


# Usage example {.unnumbered}
tools = {
    "search": search,
    "calculate": calculate
}

answer = react_agent(
    "What is the elevation difference between the highest and lowest points in France?",
    tools
)

Document Analysis Chain

Complete implementation for multi-step document analysis using prompt chaining:

def analyze_document_chain(document: str) -> dict:
    """Analyze a document through a chain of prompts."""

    # Step 1: Extract key entities
    entities_prompt = f"""Extract all named entities from this document.
List each entity with its type (PERSON, ORGANIZATION, LOCATION, DATE).

Document:
{document}

Entities:"""
    entities = call_llm(entities_prompt)

    # Step 2: Summarize main themes
    summary_prompt = f"""Summarize the main themes of this document in 3-5 bullet points.

Document:
{document}

Main themes:"""
    themes = call_llm(summary_prompt)

    # Step 3: Identify sentiment
    sentiment_prompt = f"""What is the overall sentiment of this document?
Rate from 1 (very negative) to 5 (very positive) and explain briefly.

Document:
{document}

Sentiment analysis:"""
    sentiment = call_llm(sentiment_prompt)

    # Step 4: Generate comprehensive analysis (using previous outputs)
    final_prompt = f"""Given the following analysis components, write a comprehensive
2-paragraph analysis of the document.

Document excerpt (first 500 chars): {document[:500]}...

Entities found: {entities}

Main themes: {themes}

Sentiment analysis: {sentiment}

Comprehensive analysis:"""

    analysis = call_llm(final_prompt)

    return {
        "entities": entities,
        "themes": themes,
        "sentiment": sentiment,
        "analysis": analysis
    }


def analyze_document_parallel(document: str) -> dict:
    """Analyze document with parallel first-stage prompts for efficiency."""
    import asyncio

    async def run_parallel_analysis():
        # Run independent analyses in parallel
        entities_task = asyncio.create_task(
            async_call_llm(f"Extract entities from:\n{document}")
        )
        themes_task = asyncio.create_task(
            async_call_llm(f"Summarize themes from:\n{document}")
        )
        sentiment_task = asyncio.create_task(
            async_call_llm(f"Analyze sentiment of:\n{document}")
        )

        entities, themes, sentiment = await asyncio.gather(
            entities_task, themes_task, sentiment_task
        )

        # Final synthesis depends on previous results
        analysis = await async_call_llm(
            f"Synthesize analysis from:\nEntities: {entities}\n"
            f"Themes: {themes}\nSentiment: {sentiment}"
        )

        return {
            "entities": entities,
            "themes": themes,
            "sentiment": sentiment,
            "analysis": analysis
        }

    return asyncio.run(run_parallel_analysis())


# Usage example {.unnumbered}
document = """
Apple Inc. announced today that CEO Tim Cook will present the company's
new product lineup at an event in Cupertino on September 12, 2024.
The event is expected to feature the iPhone 16 and updates to the
Apple Watch series...
"""

analysis = analyze_document_chain(document)
print(f"Entities: {analysis['entities']}")
print(f"Themes: {analysis['themes']}")