master
/ handler.py

handler.py @fe35cc9

e7eee42
 
e6917e8
e7eee42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6917e8
 
 
 
 
e7eee42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d7dd9d
e7eee42
4d7dd9d
e7eee42
 
 
 
 
e6917e8
 
 
from PIL import Image
import requests
import base64
import torch, numpy as np
from torchvision import transforms
from torchvision.transforms.functional import InterpolationMode
from models.blip_vqa import blip_vqa


image_size = 480
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_url = './ckpt/model_base_vqa_capfilt_large.pth'
model = blip_vqa(pretrained=model_url, image_size=image_size, vit='base')
model = model.to(device)

import time
def run_time(func):
    def inner(model, image, question):
        back = func(model, image, question)
        print("Runned time: {} s".format(round((time.time() - t)/10, 3)))
        return back
    t = time.time()
    return inner

def load_demo_image(img_url, image_size, device):
    if "http" in img_url:
        raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
    else:
        raw_image = Image.open(img_url).convert('RGB')
    
    transform = transforms.Compose([
        transforms.Resize((image_size,image_size),interpolation=InterpolationMode.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
        ]) 
    image = transform(raw_image).unsqueeze(0).to(device)   
    return image

@run_time
def inference(model, image, question = 'what is in the picture?'):
    model.eval()
    with torch.no_grad():
        answer = model(image, question, train=False, inference='generate') 
        return answer[0]


def handle(conf):
    base64_str = conf['Photo']  # value_type: str # description: some description
    question = conf['Question']
    image = load_demo_image(base64_str, image_size, device)
    res = inference(model, image, question)
    print('Answer :', res)
    # add your code
    return {'Answer': res}



# handle({'Photo': './img/demo.jpg', 'Question': 'What is in this image?'})