|
| 1 | +import json |
| 2 | + |
| 3 | +from unitxt import get_logger, load_dataset |
| 4 | +from unitxt.api import LoadFromDictionary, TaskCard, evaluate |
| 5 | +from unitxt.blocks import Rename |
| 6 | +from unitxt.inference import HFPipelineBasedInferenceEngine |
| 7 | +from unitxt.operators import IndexOf, ListFieldValues |
| 8 | +from unitxt.templates import MultipleChoiceTemplate |
| 9 | + |
| 10 | +logger = get_logger() |
| 11 | + |
| 12 | +# Set up question answer pairs in a dictionary |
| 13 | +data = [ |
| 14 | + {"Question": "What is the capital of Texas?", "Option A": "Austin", "Option B": "Houston", "Option C": "Dallas", "Answer" : "Austin"}, |
| 15 | + {"Question": "What is the color of the sky?", "Option A": "Pink", "Option B": "Red", "Option C": "Blue" , "Answer" : "Blue"}, |
| 16 | +] |
| 17 | + |
| 18 | + |
| 19 | +# Create a unitxt cards that converts the input data to the format required by the |
| 20 | +# t`asks.qa.multiple_choice.open task`. |
| 21 | +# |
| 22 | +# It concatenates the different options fields to the 'choices' field. |
| 23 | +# And sets the 'answer' field, to the index of the correct answer in the 'choices' field. |
| 24 | +card = TaskCard( |
| 25 | + loader=LoadFromDictionary(data = { "test": data }), |
| 26 | + preprocess_steps=[ |
| 27 | + Rename( |
| 28 | + field_to_field={"Answer": "answer", "Question" : "question"}, |
| 29 | + ), |
| 30 | + ListFieldValues(fields=["Option A", "Option B", "Option C"], to_field="choices"), |
| 31 | + IndexOf(search_in="choices", index_of="answer", to_field="answer") |
| 32 | + ], |
| 33 | + task="tasks.qa.multiple_choice.open" |
| 34 | +) |
| 35 | + |
| 36 | +template = MultipleChoiceTemplate( |
| 37 | + input_format="Answer the following question, returning only a single letter. Do not any add any explanations. \n\nQuestion: {question}\nAnswers:\n{choices}\nAnswer:", |
| 38 | + target_field="answer", |
| 39 | + choices_separator="\n", |
| 40 | + postprocessors=["processors.lower_case","processors.first_character"], |
| 41 | + ) |
| 42 | + |
| 43 | +dataset = load_dataset( |
| 44 | + card = card, |
| 45 | + template=template, |
| 46 | + split="test", |
| 47 | + format="formats.chat_api", |
| 48 | +) |
| 49 | + |
| 50 | +# Infer using Llama-3.2-1B base using HF API |
| 51 | +model = HFPipelineBasedInferenceEngine( |
| 52 | + model_name="HuggingFaceTB/SmolLM2-1.7B-Instruct", max_new_tokens=32 |
| 53 | +) |
| 54 | +# Change to this to infer with external APIs: |
| 55 | +#from unitxt.inference import CrossProviderInferenceEngine |
| 56 | +# model = CrossProviderInferenceEngine(model="llama-3-2-1b-instruct", provider="watsonx") |
| 57 | +# The provider can be one of: ["watsonx", "together-ai", "open-ai", "aws", "ollama", "bam"] |
| 58 | + |
| 59 | + |
| 60 | +predictions = model(dataset) |
| 61 | +results = evaluate(predictions=predictions, data=dataset) |
| 62 | + |
| 63 | +print("Example prompt:") |
| 64 | +print(json.dumps(results.instance_scores[0]["source"], indent=4)) |
| 65 | + |
| 66 | + |
| 67 | +print("Instance Results:") |
| 68 | +print(results.instance_scores) |
| 69 | + |
| 70 | +print("Global Results:") |
| 71 | +print(results.global_scores.summary) |
| 72 | + |
| 73 | + |
0 commit comments