from typing import List, Tuple
from pydantic import BaseModel, Field
import instructor
from openai import OpenAI
import numpy as np
from scipy.stats import entropy
class Example(BaseModel):
text: str
score: float = Field(description="Combined quality score")
entropy: float = Field(description="Entropy of responses")
repetitiveness: float = Field(description="Repetitiveness of responses")
class COSPSelector:
def __init__(self, client: OpenAI, n_samples: int = 3):
self.client = instructor.from_openai(client)
self.n_samples = n_samples
def generate_responses(self, prompt: str) -> List[Response]:
return [
self.client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": prompt}],
response_model=Response
)
for _ in range(self.n_samples)
]
def calculate_metrics(self, responses: List[Response]) -> Tuple[float, float]:
confidences = [r.confidence for r in responses]
entropy_score = entropy(confidences)
unique_responses = len(set(r.content for r in responses))
repetitiveness = 1 - (unique_responses / len(responses))
return entropy_score, repetitiveness
def select_examples(self, candidates: List[str], k: int) -> List[Example]:
examples = []
for text in candidates:
responses = self.generate_responses(text)
entropy_score, repetitiveness = self.calculate_metrics(responses)
# Combined score (lower is better)
score = entropy_score - repetitiveness
examples.append(Example(
text=text,
score=score,
entropy=entropy_score,
repetitiveness=repetitiveness
))
# Sort by score (lower is better) and select top k
return sorted(examples, key=lambda x: x.score)[:k]