olive-recipes/google-vit-base-patch16-224/QNN/vit-base-patch16-224.py at main · CodeLinaro/olive-recipes · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
from logging import getLogger
from pathlib import Path

import numpy as np
import torchvision.transforms as transforms
import transformers
from torch import from_numpy
from torch.utils.data import Dataset

from olive.data.registry import Registry

logger = getLogger(__name__)

def get_imagenet_label_map():
    import json
    cache_file = Path(f"./cache/data/imagenet_class_index.json")
    if not cache_file.exists():
        import requests
        imagenet_class_index_url = (
            "https://raw.githubusercontent.com/pytorch/vision/main/gallery/assets/imagenet_class_index.json"
        )
        response = requests.get(imagenet_class_index_url)
        response.raise_for_status()  # Ensure the request was successful
        content = response.json()
        cache_file.parent.resolve().mkdir(parents=True, exist_ok=True)
        with open(cache_file, "w") as f:
            json.dump(content, f)
    else:
        with open(cache_file) as f:
            content = json.loads(f.read())

    return {v[0]: int(k) for k, v in content.items()}

def adapt_label_for_mini_imagenet(labels: list, label_names: list):
    label_map = get_imagenet_label_map()
    return [label_map[label_names[x]] for x in labels]

class ImagenetDataset(Dataset):
    def __init__(self, data):
        self.images = from_numpy(data["images"])
        self.labels = from_numpy(data["labels"])

    def __len__(self):
        return min(len(self.images), len(self.labels))

    def __getitem__(self, idx):
        return {"pixel_values": self.images[idx]}, self.labels[idx]


@Registry.register_post_process()
def dataset_post_process(output):
    return (
        output.logits.argmax(axis=1)
        if isinstance(output, transformers.modeling_outputs.ModelOutput)
        else output.argmax(axis=1)
    )

from transformers import AutoImageProcessor
processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224", use_fast=True)

@Registry.register_pre_process()
def dataset_pre_process(output_data, **kwargs):
    shuffle = kwargs.get("shuffle", True)
    if shuffle:
        seed = kwargs.get("seed", 42)
        output_data = output_data.shuffle(seed=seed)
    cache_key = kwargs.get("cache_key")
    size = kwargs.get("size", 256)
    cache_file = None
    if cache_key:
        cache_file = Path(f"./cache/data/{cache_key}_{output_data.info.dataset_name}_{size}.npz")
        if cache_file.exists():
            with np.load(Path(cache_file)) as data:
                return ImagenetDataset(data)

    labels = []
    images = []
    for i, sample in enumerate(output_data):
        if i >= size:
            break
        image = sample["image"]
        label = sample["label"]
        image = image.convert("RGB")
        image = processor(image)["pixel_values"][0]
        images.append(image)
        labels.append(label)

    if(output_data.info.dataset_name == "mini-imagenet"):
        labels = adapt_label_for_mini_imagenet(labels, output_data.features["label"].names)
    result_data = ImagenetDataset({"images": np.array(images), "labels": np.array(labels)})

    if cache_file:
        cache_file.parent.resolve().mkdir(parents=True, exist_ok=True)
        np.savez(cache_file, images=np.array(images), labels=np.array(labels))

    return result_data