|
45 | 45 |
|
46 | 46 | from llava import conversation as conversation_lib |
47 | 47 | from llava.model import * |
48 | | -from llava.mm_utils import process_highres_image, process_anyres_image, process_highres_image_crop_split, tokenizer_image_token |
| 48 | +from llava.mm_utils import process_highres_image, process_native_image, process_anyres_image, process_highres_image_crop_split, tokenizer_image_token |
49 | 49 | from llava.utils import rank0_print, process_video_with_pyav, process_video_with_decord |
50 | 50 |
|
51 | 51 | torch.multiprocessing.set_sharing_strategy("file_system") |
@@ -1086,6 +1086,11 @@ def process_image(self, image_file: Union[bytes, str], overwrite_image_aspect_ra |
1086 | 1086 | image_aspect_ratio = overwrite_image_aspect_ratio |
1087 | 1087 | if image_aspect_ratio == "highres": |
1088 | 1088 | image = process_highres_image(image, self.data_args.image_processor, self.data_args.image_grid_pinpoints) |
| 1089 | + elif image_aspect_ratio == "native": |
| 1090 | + image = process_native_image(image, self.data_args.image_processor) |
| 1091 | + if type(image) is dict: |
| 1092 | + grid_thw = image['grid_thw'] |
| 1093 | + image = image['pixel_values'] |
1089 | 1094 | elif image_aspect_ratio == "anyres" or "anyres_max" in image_aspect_ratio: |
1090 | 1095 | image = process_anyres_image(image, self.data_args.image_processor, self.data_args.image_grid_pinpoints) |
1091 | 1096 | if type(image) is dict: |
@@ -1115,19 +1120,15 @@ def expand2square(pil_img, background_color): |
1115 | 1120 | image = image.resize((504, 504)) |
1116 | 1121 | grid_thw = [1,36,36] |
1117 | 1122 |
|
1118 | | - image = processor.preprocess(image, return_tensors="pt", do_resize=False)["pixel_values"] |
1119 | | - # assert 1==3, f'image.size = {image['pixel_values'].shape}, processor = {processor}' |
1120 | | - |
| 1123 | + image = processor.preprocess(image, return_tensors="pt", do_resize=False)["pixel_values"] |
1121 | 1124 | else: |
1122 | | - # assert 1==3, f'processor:{processor}, image.size={image.size}, image_aspect_ratio={image_aspect_ratio} not supported yet' |
1123 | 1125 | if 'siglip' in str(processor).lower(): |
1124 | 1126 | image = image.resize((512, 512)) |
1125 | 1127 | image = processor.preprocess(image, return_tensors="pt", do_resize=False)["pixel_values"] |
1126 | 1128 | else: |
1127 | 1129 | image = image.resize((504, 504)) |
1128 | 1130 | image = processor.preprocess(image, return_tensors="pt", do_resize=False, do_center_crop=False)["pixel_values"] |
1129 | 1131 |
|
1130 | | - # assert 1==3, f'image.size = {image.shape}, processor = {processor}' |
1131 | 1132 | if grid_thw is None: |
1132 | 1133 | return image, image_size, "image" |
1133 | 1134 | return image, image_size, "image", grid_thw |
|
0 commit comments