Spaces:

Zevin2023
/

PromptIQA

Sleeping

App Files Files Community

Zevin2023 commited on Sep 20, 2024

Commit

ccbd00a

1 Parent(s): ed2472a

refine promptiqa.py

Browse files

Files changed (14) hide show

PromptIQA/models/gc_loss.py +0 -99
PromptIQA/models/monet_IPF.py +0 -397
PromptIQA/models/monet_test.py +0 -389
PromptIQA/models/monet_wo_prompt.py +0 -392
PromptIQA/models/{monet.py → promptiqa.py} +2 -84
PromptIQA/models/vit_base.py +0 -402
PromptIQA/models/vit_large.py +0 -405
PromptIQA/run_promptIQA copy.py +0 -109
PromptIQA/run_promptIQA.py +2 -2
PromptIQA/t.py +0 -2
PromptIQA/test.py +0 -429
PromptIQA/test.sh +0 -9
best_model.pth.tar +3 -0
get_examplt.py +0 -27

PromptIQA/models/gc_loss.py DELETED Viewed

@@ -1,99 +0,0 @@
-import torch.nn as nn
-import torch
-import numpy as np
-class GC_Loss(nn.Module):
-    def __init__(self, queue_len=800, alpha=0.5, beta=0.5, gamma=1):
-        super(GC_Loss, self).__init__()
-        self.pred_queue = list()
-        self.gt_queue = list()
-        self.queue_len = 0
-        self.queue_max_len = queue_len
-        print('The queue length is: ', self.queue_max_len)
-        self.mse = torch.nn.MSELoss().cuda()
-        self.alpha, self.beta, self.gamma = alpha, beta, gamma
-    def consistency(self, pred_data, gt_data):
-        pred_one_batch, pred_queue = pred_data
-        gt_one_batch, gt_queue = gt_data
-        pred_mean = torch.mean(pred_queue)
-        gt_mean = torch.mean(gt_queue)
-        diff_pred = pred_one_batch - pred_mean
-        diff_gt = gt_one_batch - gt_mean
-        x1 = torch.sum(torch.mul(diff_pred, diff_gt))
-        x2_1 = torch.sqrt(torch.sum(torch.mul(diff_pred, diff_pred)))
-        x2_2 = torch.sqrt(torch.sum(torch.mul(diff_gt, diff_gt)))
-        return x1 / (x2_1 * x2_2)
-    def ppra(self, x):
-        """
-            Pairwise Preference-based Rank Approximation
-        """
-        x_bar, x_std = torch.mean(x), torch.std(x)
-        x_n = (x - x_bar) / x_std
-        x_n_T = x_n.reshape(-1, 1)
-        rank_x = x_n_T - x_n_T.transpose(1, 0)
-        rank_x = torch.sum(1 / 2 * (1 + torch.erf(rank_x / torch.sqrt(torch.tensor(2, dtype=torch.float)))), dim=1)
-        return rank_x
-    @torch.no_grad()
-    def enqueue(self, pred, gt):
-        bs = pred.shape[0]
-        self.queue_len = self.queue_len + bs
-        self.pred_queue = self.pred_queue + pred.tolist()
-        self.gt_queue = self.gt_queue + gt.cpu().detach().numpy().tolist()
-        if self.queue_len > self.queue_max_len:
-            self.dequeue(self.queue_len - self.queue_max_len)
-            self.queue_len = self.queue_max_len
-    @torch.no_grad()
-    def dequeue(self, n):
-        for _ in range(n):
-            self.pred_queue.pop(0)
-            self.gt_queue.pop(0)
-    def clear(self):
-        self.pred_queue.clear()
-        self.gt_queue.clear()
-    def forward(self, x, y):
-        x_queue = self.pred_queue.copy()
-        y_queue = self.gt_queue.copy()
-        x_all = torch.cat((x, torch.tensor(x_queue).cuda()), dim=0)
-        y_all = torch.cat((y, torch.tensor(y_queue).cuda()), dim=0)
-        PLCC = self.consistency((x, x_all), (y, y_all))
-        PGC = 1 - PLCC
-        rank_x = self.ppra(x_all)
-        rank_y = self.ppra(y_all)
-        SROCC = self.consistency((rank_x[:x.shape[0]], rank_x), (rank_y[:y.shape[0]], rank_y))
-        SGC = 1 - SROCC
-        GC = (self.alpha * PGC + self.beta * SGC + self.gamma) * self.mse(x, y)
-        self.enqueue(x, y)
-        return GC
-if __name__ == '__main__':
-    gc = GC_Loss().cuda()
-    x = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float).cuda()
-    y = torch.tensor([6, 7, 8, 9, 15], dtype=torch.float).cuda()
-    res = gc(x, y)
-    print(res)

PromptIQA/models/monet_IPF.py DELETED Viewed

@@ -1,397 +0,0 @@
-"""
-    The completion for Mean-opinion Network(MoNet)
-"""
-import torch
-import torch.nn as nn
-import timm
-from timm.models.vision_transformer import Block
-from einops import rearrange
-from itertools import combinations
-from tqdm import tqdm
-class Attention_Block(nn.Module):
-    def __init__(self, dim, drop=0.1):
-        super().__init__()
-        self.c_q = nn.Linear(dim, dim)
-        self.c_k = nn.Linear(dim, dim)
-        self.c_v = nn.Linear(dim, dim)
-        self.norm_fact = dim ** -0.5
-        self.softmax = nn.Softmax(dim=-1)
-        self.proj_drop = nn.Dropout(drop)
-    def forward(self, x):
-        _x = x
-        B, C, N = x.shape
-        q = self.c_q(x)
-        k = self.c_k(x)
-        v = self.c_v(x)
-        attn = q @ k.transpose(-2, -1) * self.norm_fact
-        attn = self.softmax(attn)
-        x = (attn @ v).transpose(1, 2).reshape(B, C, N)
-        x = self.proj_drop(x)
-        x = x + _x
-        return x
-class Self_Attention(nn.Module):
-    """ Self attention Layer"""
-    def __init__(self, in_dim):
-        super(Self_Attention, self).__init__()
-        self.qConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1)
-        self.kConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1)
-        self.vConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1)
-        self.gamma = nn.Parameter(torch.zeros(1))
-        self.softmax = nn.Softmax(dim=-1)
-    def forward(self, inFeature):
-        bs, C, w, h = inFeature.size()
-        proj_query = self.qConv(inFeature).view(bs, -1, w * h).permute(0, 2, 1).contiguous()
-        proj_key = self.kConv(inFeature).view(bs, -1, w * h)
-        energy = torch.bmm(proj_query, proj_key)
-        attention = self.softmax(energy)
-        proj_value = self.vConv(inFeature).view(bs, -1, w * h)
-        out = torch.bmm(proj_value, attention.permute(0, 2, 1).contiguous())
-        out = out.view(bs, C, w, h)
-        out = self.gamma * out + inFeature
-        return out
-class MAL(nn.Module):
-    """
-        Multi-view Attention Learning (MAL) module
-    """
-    def __init__(self, in_dim=768, feature_num=4, feature_size=28):
-        super().__init__()
-        self.channel_attention = Attention_Block(in_dim * feature_num)  # Channel-wise self attention
-        self.feature_attention = Attention_Block(feature_size ** 2 * feature_num)  # Pixel-wise self attention
-        # Self attention module for each input feature
-        self.attention_module = nn.ModuleList()
-        for _ in range(feature_num):
-            self.attention_module.append(Self_Attention(in_dim))
-        self.feature_num = feature_num
-        self.in_dim = in_dim
-    def forward(self, features):
-        feature = torch.tensor([]).cuda()
-        for index, _ in enumerate(features):
-            feature = torch.cat((feature, self.attention_module[index](features[index]).unsqueeze(0)), dim=0)
-        features = feature
-        input_tensor = rearrange(features, 'n b c w h -> b (n c) (w h)')  # bs, 768 * feature_num, 28 * 28
-        bs, _, _ = input_tensor.shape  # [2, 3072, 784]
-        in_feature = rearrange(input_tensor, 'b (w c) h -> b w (c h)', w=self.in_dim,
-                               c=self.feature_num)  # bs, 768, 28 * 28 * feature_num
-        feature_weight_sum = self.feature_attention(in_feature)  # bs, 768, 768
-        in_channel = input_tensor.permute(0, 2, 1).contiguous()  # bs, 28 * 28, 768 * feature_num
-        channel_weight_sum = self.channel_attention(in_channel)  # bs, 28 * 28, 28 * 28
-        weight_sum_res = (rearrange(feature_weight_sum, 'b w (c h) -> b (w c) h', w=self.in_dim,
-                                    c=self.feature_num) + channel_weight_sum.permute(0, 2, 1).contiguous()) / 2  # [2, 3072, 784]
-        weight_sum_res = torch.mean(weight_sum_res.view(bs, self.feature_num, self.in_dim, -1), dim=1)
-        return weight_sum_res  # bs, 768, 28 * 28
-class SaveOutput:
-    def __init__(self):
-        self.outputs = []
-    def __call__(self, module, module_in, module_out):
-        self.outputs.append(module_out)
-    def clear(self):
-        self.outputs = []
-# utils
-@torch.no_grad()
-def concat_all_gather(tensor):
-    """
-    Performs all_gather operation on the provided tensors.
-    *** Warning ***: torch.distributed.all_gather has no gradient.
-    """
-    tensors_gather = [
-        torch.ones_like(tensor) for _ in range(torch.distributed.get_world_size())
-    ]
-    torch.distributed.all_gather(tensors_gather, tensor, async_op=False)
-    output = torch.cat(tensors_gather, dim=0)
-    return output
-class Attention(nn.Module):
-    def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.):
-        super().__init__()
-        assert dim % num_heads == 0, 'dim should be divisible by num_heads'
-        self.num_heads = num_heads
-        head_dim = dim // num_heads
-        self.scale = head_dim ** -0.5
-        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
-        self.attn_drop = nn.Dropout(attn_drop)
-        self.proj = nn.Linear(dim, dim)
-        self.proj_drop = nn.Dropout(proj_drop)
-    def forward(self, x):
-        B, N, C = x.shape
-        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
-        q, k, v = qkv.unbind(0)  # make torchscript happy (cannot use tensor as tuple)
-        attn = (q @ k.transpose(-2, -1)) * self.scale
-        attn = attn.softmax(dim=-1)
-        attn = self.attn_drop(attn)
-        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        return x
-import torch
-from functools import partial
-class MoNet(nn.Module):
-    def __init__(self, patch_size=8, drop=0.1, dim_mlp=768, img_size=224):
-        super().__init__()
-        self.img_size = img_size
-        self.input_size = img_size // patch_size
-        self.dim_mlp = dim_mlp
-        self.vit = timm.create_model('vit_base_patch8_224', pretrained=True)
-        self.vit.norm = nn.Identity()
-        self.vit.head = nn.Identity()
-        self.save_output = SaveOutput()
-        # Register Hooks
-        hook_handles = []
-        for layer in self.vit.modules():
-            if isinstance(layer, Block):
-                handle = layer.register_forward_hook(self.save_output)
-                hook_handles.append(handle)
-        self.MALs = nn.ModuleList()
-        for _ in range(3):
-            self.MALs.append(MAL())
-        # Image Quality Score Regression
-        self.fusion_mal = MAL(feature_num=3)
-        self.block = Block(dim_mlp, 12)
-        self.cnn = nn.Sequential(
-            nn.Conv2d(dim_mlp, 256, 5),
-            nn.BatchNorm2d(256),
-            nn.ReLU(inplace=True),
-            nn.AvgPool2d((2, 2)),
-            nn.Conv2d(256, 128, 3),
-            nn.BatchNorm2d(128),
-            nn.ReLU(inplace=True),
-            nn.AvgPool2d((2, 2)),
-            nn.Conv2d(128, 128, 3),
-            nn.BatchNorm2d(128),
-            nn.ReLU(inplace=True),
-            nn.AvgPool2d((3, 3)),
-        )
-        self.i_p_fusion = nn.Sequential(
-            Block(128, 4),
-            Block(128, 4),
-            Block(128, 4),
-        )
-        self.mlp = nn.Sequential(
-            nn.Linear(128, 64),
-            nn.GELU(),
-            nn.Linear(64, 128),
-        )
-        self.prompt_fusion = nn.Sequential(
-            Block(128, 4),
-            Block(128, 4),
-            Block(128, 4),
-        )
-        dpr = [x.item() for x in torch.linspace(0, 0, 8)]  # stochastic depth decay rule
-        self.blocks = nn.Sequential(*[
-            Block(
-                dim=128, num_heads=4, mlp_ratio=4, qkv_bias=True, drop=0,
-                attn_drop=0, drop_path=dpr[i], norm_layer=partial(nn.LayerNorm, eps=1e-6), act_layer=nn.GELU)
-            for i in range(8)])
-        self.norm = nn.LayerNorm(128)
-        self.score_block = nn.Sequential(
-            nn.Linear(128, 128 // 2),
-            nn.ReLU(),
-            nn.Dropout(drop),
-            nn.Linear(128 // 2, 1),
-            nn.Sigmoid()
-        )
-        self.prompt_feature = {}
-    @torch.no_grad()
-    def clear(self):
-        self.prompt_feature = {}
-    @torch.no_grad()
-    def inference(self, x, data_type):
-        prompt_feature = self.prompt_feature[data_type] # 1, n, 128
-        _x = self.vit(x)
-        x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
-        self.save_output.outputs.clear()
-        x = x.permute(0, 2, 1).contiguous()  # bs, 768 * 4, 28 * 28
-        x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size, h=self.input_size)  # bs, 4, 768, 28, 28
-        x = x.permute(1, 0, 2, 3, 4).contiguous()  # bs, 4, 768, 28 * 28
-        # Different Opinion Features (DOF)
-        DOF = torch.tensor([]).cuda()
-        for index, _ in enumerate(self.MALs):
-            DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
-        DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # M, bs, 768, 28, 28
-        # Image Quality Score Regression
-        fusion_mal = self.fusion_mal(DOF).permute(0, 2, 1).contiguous()  # bs, 28 * 28 768
-        IQ_feature = self.block(fusion_mal).permute(0, 2, 1).contiguous() # bs, 768, 28 * 28
-        IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size, h=self.input_size) # bs, 768, 28, 28
-        img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1) # bs, 1, 128
-        prompt_feature = prompt_feature.repeat(img_feature.shape[0], 1, 1) # bs, n, 128
-        prompt_feature = self.prompt_fusion(prompt_feature) # bs, n, 128
-        fusion = self.blocks(torch.cat((img_feature, prompt_feature), dim=1))[:, 0, :] # bs, 2, 1
-        # fusion = self.norm(fusion)[:, 0, :]
-        # fusion = self.score_block(fusion)
-        # # iq_res = torch.mean(fusion, dim=1).view(-1)
-        # iq_res = fusion[:, 0].view(-1)
-        return fusion
-    @torch.no_grad()
-    def check_prompt(self, data_type):
-        return data_type in self.prompt_feature
-    @torch.no_grad()
-    def forward_prompt(self, x, score, data_type):
-        if data_type in self.prompt_feature:
-            return
-        _x = self.vit(x)
-        x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
-        self.save_output.outputs.clear()
-        x = x.permute(0, 2, 1).contiguous()  # bs, 768 * 4, 28 * 28
-        x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size, h=self.input_size)  # bs, 4, 768, 28, 28
-        x = x.permute(1, 0, 2, 3, 4).contiguous()  # 4, bs, 768, 28, 28
-        # Different Opinion Features (DOF)
-        DOF = torch.tensor([]).cuda()
-        for index, _ in enumerate(self.MALs):
-            DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
-        DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # M, bs, 768, 28, 28
-        # Image Quality Score Regression
-        fusion_mal = self.fusion_mal(DOF).permute(0, 2, 1).contiguous()  # bs, 28 * 28 768
-        IQ_feature = self.block(fusion_mal).permute(0, 2, 1).contiguous() # bs, 768, 28 * 28
-        IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size, h=self.input_size) # bs, 768, 28, 28
-        img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1) # bs, 1, 128
-        # 分数线性变换为128维
-        # score_feature = self.score_projection(score)  # bs, 128
-        score_feature = score.expand(-1, 128)
-        # img_feature 和 score_feature融合得到 funsion_feature
-        funsion_feature = self.i_p_fusion(torch.cat((img_feature, score_feature.unsqueeze(1)), dim=1)) # bs, 2, 128
-        funsion_feature = self.mlp(torch.mean(funsion_feature, dim=1)).unsqueeze(0) # 1, n, 128
-        # print('Load Prompt For Testing.', funsion_feature.shape)
-        # self.prompt_feature = funsion_feature.clone()
-        self.prompt_feature[data_type] = funsion_feature.clone()
-    def forward(self, x, score):
-        _x = self.vit(x)
-        x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
-        self.save_output.outputs.clear()
-        x = x.permute(0, 2, 1).contiguous()  # bs, 768 * 4, 28 * 28
-        x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size, h=self.input_size)  # bs, 4, 768, 28, 28
-        x = x.permute(1, 0, 2, 3, 4).contiguous()  # 4, bs, 768, 28, 28
-        # Different Opinion Features (DOF)
-        DOF = torch.tensor([]).cuda()
-        for index, _ in enumerate(self.MALs):
-            DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
-        DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # M, bs, 768, 28, 28
-        # Image Quality Score Regression
-        fusion_mal = self.fusion_mal(DOF).permute(0, 2, 1).contiguous()  # bs, 28 * 28 768
-        IQ_feature = self.block(fusion_mal).permute(0, 2, 1).contiguous() # bs, 768, 28 * 28
-        IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size, h=self.input_size) # bs, 768, 28, 28
-        img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1) # bs, 1, 128
-        # 分数线性变换为128维
-        # score_feature = self.score_projection(score)  # bs, 128
-        score_feature = score.expand(-1, 128) # bs, 128
-        # img_feature 和 score_feature融合得到 funsion_feature
-        funsion_feature = self.i_p_fusion(torch.cat((img_feature, score_feature.unsqueeze(1)), dim=1)) # bs, 2, 128
-        funsion_feature = self.mlp(torch.mean(funsion_feature, dim=1)) #bs, 128
-        funsion_feature = self.expand(funsion_feature) # bs, bs - 1, 128
-        funsion_feature = self.prompt_fusion(funsion_feature) # bs, bs - 1, 128
-        fusion = self.blocks(torch.cat((img_feature, funsion_feature), dim=1)) # bs, 2, 1
-        fusion = self.norm(fusion)
-        fusion = self.score_block(fusion)
-        # iq_res = torch.mean(fusion, dim=1).view(-1)
-        iq_res = fusion[:, 0].view(-1)
-        # differ_fusion = self.diff_block(torch.cat((img_feature, funsion_feature), dim=1)) # bs, n, 1
-        # differ_iq_res = torch.mean(differ_fusion, dim=1).view(-1)
-        gt_res = score.view(-1)
-        # diff_gt_res = 1 - score.view(-1)
-        return (iq_res, 'differ_iq_res'), (gt_res, 'diff_gt_res')
-    def extract_feature(self, save_output, block_index=[2, 5, 8, 11]):
-        x1 = save_output.outputs[block_index[0]][:, 1:]
-        x2 = save_output.outputs[block_index[1]][:, 1:]
-        x3 = save_output.outputs[block_index[2]][:, 1:]
-        x4 = save_output.outputs[block_index[3]][:, 1:]
-        x = torch.cat((x1, x2, x3, x4), dim=2)
-        return x
-    def expand(self, A):
-        A_expanded = A.unsqueeze(0).expand(A.size(0), -1, -1)
-        B = None
-        for index, i in enumerate(A_expanded):
-            rmv = torch.cat((i[:index], i[index + 1:])).unsqueeze(0)
-            if B is None:
-                B = rmv
-            else:
-                B = torch.cat((B, rmv), dim=0)
-        return B
-if __name__ == '__main__':
-    in_feature = torch.zeros((10, 3, 224, 224)).cuda()
-    gt_feature = torch.tensor([[0, 100, 1], [0, 100, 2], [0, 100, 3], [0, 100, 4], [0, 100, 5], [0, 100, 6], [0, 100, 7], [0, 100, 8], [0, 100, 9], [0, 100, 10]], dtype=torch.float).cuda()
-    model = MoNet().cuda()
-    iq_res, gt_res = model(in_feature, gt_feature)
-    print(iq_res.shape)
-    print(gt_res.shape)

PromptIQA/models/monet_test.py DELETED Viewed

@@ -1,389 +0,0 @@
-"""
-    The completion for Mean-opinion Network(MoNet)
-"""
-import torch
-import torch.nn as nn
-import timm
-from timm.models.vision_transformer import Block
-from einops import rearrange
-from itertools import combinations
-from tqdm import tqdm
-class Attention_Block(nn.Module):
-    def __init__(self, dim, drop=0.1):
-        super().__init__()
-        self.c_q = nn.Linear(dim, dim)
-        self.c_k = nn.Linear(dim, dim)
-        self.c_v = nn.Linear(dim, dim)
-        self.norm_fact = dim ** -0.5
-        self.softmax = nn.Softmax(dim=-1)
-        self.proj_drop = nn.Dropout(drop)
-    def forward(self, x):
-        _x = x
-        B, C, N = x.shape
-        q = self.c_q(x)
-        k = self.c_k(x)
-        v = self.c_v(x)
-        attn = q @ k.transpose(-2, -1) * self.norm_fact
-        attn = self.softmax(attn)
-        x = (attn @ v).transpose(1, 2).reshape(B, C, N)
-        x = self.proj_drop(x)
-        x = x + _x
-        return x
-class Self_Attention(nn.Module):
-    """ Self attention Layer"""
-    def __init__(self, in_dim):
-        super(Self_Attention, self).__init__()
-        self.qConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1)
-        self.kConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1)
-        self.vConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1)
-        self.gamma = nn.Parameter(torch.zeros(1))
-        self.softmax = nn.Softmax(dim=-1)
-    def forward(self, inFeature):
-        bs, C, w, h = inFeature.size()
-        proj_query = self.qConv(inFeature).view(bs, -1, w * h).permute(0, 2, 1).contiguous()
-        proj_key = self.kConv(inFeature).view(bs, -1, w * h)
-        energy = torch.bmm(proj_query, proj_key)
-        attention = self.softmax(energy)
-        proj_value = self.vConv(inFeature).view(bs, -1, w * h)
-        out = torch.bmm(proj_value, attention.permute(0, 2, 1).contiguous())
-        out = out.view(bs, C, w, h)
-        out = self.gamma * out + inFeature
-        return out
-class MAL(nn.Module):
-    """
-        Multi-view Attention Learning (MAL) module
-    """
-    def __init__(self, in_dim=768, feature_num=4, feature_size=28):
-        super().__init__()
-        self.channel_attention = Attention_Block(in_dim * feature_num)  # Channel-wise self attention
-        self.feature_attention = Attention_Block(feature_size ** 2 * feature_num)  # Pixel-wise self attention
-        # Self attention module for each input feature
-        self.attention_module = nn.ModuleList()
-        for _ in range(feature_num):
-            self.attention_module.append(Self_Attention(in_dim))
-        self.feature_num = feature_num
-        self.in_dim = in_dim
-    def forward(self, features):
-        feature = torch.tensor([]).cuda()
-        for index, _ in enumerate(features):
-            feature = torch.cat((feature, self.attention_module[index](features[index]).unsqueeze(0)), dim=0)
-        features = feature
-        input_tensor = rearrange(features, 'n b c w h -> b (n c) (w h)')  # bs, 768 * feature_num, 28 * 28
-        bs, _, _ = input_tensor.shape  # [2, 3072, 784]
-        in_feature = rearrange(input_tensor, 'b (w c) h -> b w (c h)', w=self.in_dim,
-                               c=self.feature_num)  # bs, 768, 28 * 28 * feature_num
-        feature_weight_sum = self.feature_attention(in_feature)  # bs, 768, 768
-        in_channel = input_tensor.permute(0, 2, 1).contiguous()  # bs, 28 * 28, 768 * feature_num
-        channel_weight_sum = self.channel_attention(in_channel)  # bs, 28 * 28, 28 * 28
-        weight_sum_res = (rearrange(feature_weight_sum, 'b w (c h) -> b (w c) h', w=self.in_dim,
-                                    c=self.feature_num) + channel_weight_sum.permute(0, 2, 1).contiguous()) / 2  # [2, 3072, 784]
-        weight_sum_res = torch.mean(weight_sum_res.view(bs, self.feature_num, self.in_dim, -1), dim=1)
-        return weight_sum_res  # bs, 768, 28 * 28
-class SaveOutput:
-    def __init__(self):
-        self.outputs = []
-    def __call__(self, module, module_in, module_out):
-        self.outputs.append(module_out)
-    def clear(self):
-        self.outputs = []
-# utils
-@torch.no_grad()
-def concat_all_gather(tensor):
-    """
-    Performs all_gather operation on the provided tensors.
-    *** Warning ***: torch.distributed.all_gather has no gradient.
-    """
-    tensors_gather = [
-        torch.ones_like(tensor) for _ in range(torch.distributed.get_world_size())
-    ]
-    torch.distributed.all_gather(tensors_gather, tensor, async_op=False)
-    output = torch.cat(tensors_gather, dim=0)
-    return output
-class Attention(nn.Module):
-    def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.):
-        super().__init__()
-        assert dim % num_heads == 0, 'dim should be divisible by num_heads'
-        self.num_heads = num_heads
-        head_dim = dim // num_heads
-        self.scale = head_dim ** -0.5
-        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
-        self.attn_drop = nn.Dropout(attn_drop)
-        self.proj = nn.Linear(dim, dim)
-        self.proj_drop = nn.Dropout(proj_drop)
-    def forward(self, x):
-        B, N, C = x.shape
-        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
-        q, k, v = qkv.unbind(0)  # make torchscript happy (cannot use tensor as tuple)
-        attn = (q @ k.transpose(-2, -1)) * self.scale
-        attn = attn.softmax(dim=-1)
-        attn = self.attn_drop(attn)
-        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        return x
-class MoNet(nn.Module):
-    def __init__(self, patch_size=8, drop=0.1, dim_mlp=768, img_size=224):
-        super().__init__()
-        self.img_size = img_size
-        self.input_size = img_size // patch_size
-        self.dim_mlp = dim_mlp
-        self.vit = timm.create_model('vit_base_patch8_224', pretrained=True)
-        self.vit.norm = nn.Identity()
-        self.vit.head = nn.Identity()
-        self.save_output = SaveOutput()
-        # Register Hooks
-        hook_handles = []
-        for layer in self.vit.modules():
-            if isinstance(layer, Block):
-                handle = layer.register_forward_hook(self.save_output)
-                hook_handles.append(handle)
-        self.MALs = nn.ModuleList()
-        for _ in range(3):
-            self.MALs.append(MAL())
-        # Image Quality Score Regression
-        self.fusion_mal = MAL(feature_num=3)
-        self.block = Block(dim_mlp, 12)
-        self.cnn = nn.Sequential(
-            nn.Conv2d(dim_mlp, 256, 5),
-            nn.BatchNorm2d(256),
-            nn.ReLU(inplace=True),
-            nn.AvgPool2d((2, 2)),
-            nn.Conv2d(256, 128, 3),
-            nn.BatchNorm2d(128),
-            nn.ReLU(inplace=True),
-            nn.AvgPool2d((2, 2)),
-            nn.Conv2d(128, 128, 3),
-            nn.BatchNorm2d(128),
-            nn.ReLU(inplace=True),
-            nn.AvgPool2d((3, 3)),
-        )
-        # self.score_projection = nn.Sequential(
-        #     nn.Linear(1, 64),
-        #     nn.GELU(),
-        #     nn.Linear(64, 128),
-        # )
-        # self.i_p_fusion = nn.Sequential(
-        #     Block(128, 8),
-        #     Block(128, 8),
-        #     Block(128, 8),
-        # )
-        self.i_p_fusion = nn.Sequential(
-            Block(128, 4),
-            Block(128, 4),
-            Block(128, 4),
-        )
-        self.mlp = nn.Sequential(
-            nn.Linear(128, 64),
-            nn.GELU(),
-            nn.Linear(64, 128),
-        )
-        self.score_block = nn.Sequential(
-            Block(128, 4),
-            Block(128, 4),
-            # Block(128, 4),
-            nn.Linear(128, 128 // 2),
-            nn.ReLU(),
-            nn.Dropout(drop),
-            nn.Linear(128 // 2, 1),
-            nn.Sigmoid()
-        )
-        # self.diff_block = nn.Sequential(
-        #     Block(128, 8),
-        #     Block(128, 8),
-        #     Block(128, 8),
-        #     nn.Linear(128, 64),
-        #     nn.GELU(),
-        #     nn.Linear(64, 1),
-        # )
-        self.prompt_feature = None
-    @torch.no_grad()
-    def clear(self):
-        self.prompt_feature = None
-    @torch.no_grad()
-    def inference(self, x):
-        prompt_feature = self.prompt_feature # 1, n, 128
-        _x = self.vit(x)
-        x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
-        self.save_output.outputs.clear()
-        x = x.permute(0, 2, 1).contiguous()  # bs, 768 * 4, 28 * 28
-        x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size, h=self.input_size)  # bs, 4, 768, 28, 28
-        x = x.permute(1, 0, 2, 3, 4).contiguous()  # bs, 4, 768, 28 * 28
-        # Different Opinion Features (DOF)
-        DOF = torch.tensor([]).cuda()
-        for index, _ in enumerate(self.MALs):
-            DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
-        DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # M, bs, 768, 28, 28
-        # Image Quality Score Regression
-        fusion_mal = self.fusion_mal(DOF).permute(0, 2, 1).contiguous()  # bs, 28 * 28 768
-        IQ_feature = self.block(fusion_mal).permute(0, 2, 1).contiguous() # bs, 768, 28 * 28
-        IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size, h=self.input_size) # bs, 768, 28, 28
-        img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1) # bs, 1, 128
-        prompt_feature = prompt_feature.repeat(img_feature.shape[0], 1, 1) # bs, n, 128
-        fusion = self.score_block(torch.cat((img_feature, prompt_feature), dim=1)) # bs, n, 1
-        # iq_res = torch.mean(fusion, dim=1).view(-1)
-        iq_res = fusion[:, 0].view(-1)
-        return iq_res
-    def extract_feature(self, save_output, block_index=[2, 5, 8, 11]):
-        x1 = save_output.outputs[block_index[0]][:, 1:]
-        x2 = save_output.outputs[block_index[1]][:, 1:]
-        x3 = save_output.outputs[block_index[2]][:, 1:]
-        x4 = save_output.outputs[block_index[3]][:, 1:]
-        x = torch.cat((x1, x2, x3, x4), dim=2)
-        return x
-    @torch.no_grad()
-    def forward_prompt(self, x, score):
-        _x = self.vit(x)
-        x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
-        self.save_output.outputs.clear()
-        x = x.permute(0, 2, 1).contiguous()  # bs, 768 * 4, 28 * 28
-        x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size, h=self.input_size)  # bs, 4, 768, 28, 28
-        x = x.permute(1, 0, 2, 3, 4).contiguous()  # 4, bs, 768, 28, 28
-        # Different Opinion Features (DOF)
-        DOF = torch.tensor([]).cuda()
-        for index, _ in enumerate(self.MALs):
-            DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
-        DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # M, bs, 768, 28, 28
-        # Image Quality Score Regression
-        fusion_mal = self.fusion_mal(DOF).permute(0, 2, 1).contiguous()  # bs, 28 * 28 768
-        IQ_feature = self.block(fusion_mal).permute(0, 2, 1).contiguous() # bs, 768, 28 * 28
-        IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size, h=self.input_size) # bs, 768, 28, 28
-        img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1) # bs, 1, 128
-        # 分数线性变换为128维
-        # score_feature = self.score_projection(score)  # bs, 128
-        score_feature = score.expand(-1, 128)
-        # img_feature 和 score_feature融合得到 funsion_feature
-        funsion_feature = self.i_p_fusion(torch.cat((img_feature, score_feature.unsqueeze(1)), dim=1)) # bs, 2, 128
-        funsion_feature = self.mlp(torch.mean(funsion_feature, dim=1)).unsqueeze(0) # 1, n, 128
-        print('Load Prompt For Testing.', funsion_feature.shape)
-        self.prompt_feature = funsion_feature.clone()
-    def expand(self, A):
-        A_expanded = A.unsqueeze(0).expand(A.size(0), -1, -1)
-        B = None
-        for index, i in enumerate(A_expanded):
-            rmv = torch.cat((i[:index], i[index + 1:])).unsqueeze(0)
-            if B is None:
-                B = rmv
-            else:
-                B = torch.cat((B, rmv), dim=0)
-        return B
-    def forward(self, x, score):
-        _x = self.vit(x)
-        x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
-        self.save_output.outputs.clear()
-        x = x.permute(0, 2, 1).contiguous()  # bs, 768 * 4, 28 * 28
-        x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size, h=self.input_size)  # bs, 4, 768, 28, 28
-        x = x.permute(1, 0, 2, 3, 4).contiguous()  # 4, bs, 768, 28, 28
-        # Different Opinion Features (DOF)
-        DOF = torch.tensor([]).cuda()
-        for index, _ in enumerate(self.MALs):
-            DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
-        DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # M, bs, 768, 28, 28
-        # Image Quality Score Regression
-        fusion_mal = self.fusion_mal(DOF).permute(0, 2, 1).contiguous()  # bs, 28 * 28 768
-        IQ_feature = self.block(fusion_mal).permute(0, 2, 1).contiguous() # bs, 768, 28 * 28
-        IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size, h=self.input_size) # bs, 768, 28, 28
-        img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1) # bs, 1, 128
-        # 分数线性变换为128维
-        # score_feature = self.score_projection(score)  # bs, 128
-        score_feature = score.expand(-1, 128) # bs, 128
-        # img_feature 和 score_feature融合得到 funsion_feature
-        funsion_feature = self.i_p_fusion(torch.cat((img_feature.detach(), score_feature.unsqueeze(1).detach()), dim=1)) # bs, 2, 128
-        funsion_feature = self.mlp(torch.mean(funsion_feature, dim=1)) #bs, 128
-        funsion_feature = self.expand(funsion_feature) # bs, bs - 1, 128
-        fusion = self.score_block(torch.cat((img_feature, funsion_feature), dim=1)) # bs, n, 1
-        # iq_res = torch.mean(fusion, dim=1).view(-1)
-        iq_res = fusion[:, 0].view(-1)
-        # differ_fusion = self.diff_block(torch.cat((img_feature, funsion_feature), dim=1)) # bs, n, 1
-        # differ_iq_res = torch.mean(differ_fusion, dim=1).view(-1)
-        gt_res = score.view(-1)
-        # diff_gt_res = 1 - score.view(-1)
-        return (iq_res, 'differ_iq_res'), (gt_res, 'diff_gt_res')
-if __name__ == '__main__':
-    in_feature = torch.zeros((10, 3, 224, 224)).cuda()
-    gt_feature = torch.tensor([[0, 100, 1], [0, 100, 2], [0, 100, 3], [0, 100, 4], [0, 100, 5], [0, 100, 6], [0, 100, 7], [0, 100, 8], [0, 100, 9], [0, 100, 10]], dtype=torch.float).cuda()
-    model = MoNet().cuda()
-    iq_res, gt_res = model(in_feature, gt_feature)
-    print(iq_res.shape)
-    print(gt_res.shape)

PromptIQA/models/monet_wo_prompt.py DELETED Viewed

@@ -1,392 +0,0 @@
-"""
-    The completion for Mean-opinion Network(MoNet)
-"""
-import torch
-import torch.nn as nn
-import timm
-from timm.models.vision_transformer import Block
-from einops import rearrange
-from itertools import combinations
-from tqdm import tqdm
-import os
-# os.environ['CUDA_VISIBLE_DEVICES'] = '7'
-class Attention_Block(nn.Module):
-    def __init__(self, dim, drop=0.1):
-        super().__init__()
-        self.c_q = nn.Linear(dim, dim)
-        self.c_k = nn.Linear(dim, dim)
-        self.c_v = nn.Linear(dim, dim)
-        self.norm_fact = dim ** -0.5
-        self.softmax = nn.Softmax(dim=-1)
-        self.proj_drop = nn.Dropout(drop)
-    def forward(self, x):
-        _x = x
-        B, C, N = x.shape
-        q = self.c_q(x)
-        k = self.c_k(x)
-        v = self.c_v(x)
-        attn = q @ k.transpose(-2, -1) * self.norm_fact
-        attn = self.softmax(attn)
-        x = (attn @ v).transpose(1, 2).reshape(B, C, N)
-        x = self.proj_drop(x)
-        x = x + _x
-        return x
-class Self_Attention(nn.Module):
-    """ Self attention Layer"""
-    def __init__(self, in_dim):
-        super(Self_Attention, self).__init__()
-        self.qConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1)
-        self.kConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1)
-        self.vConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1)
-        self.gamma = nn.Parameter(torch.zeros(1))
-        self.softmax = nn.Softmax(dim=-1)
-    def forward(self, inFeature):
-        bs, C, w, h = inFeature.size()
-        proj_query = self.qConv(inFeature).view(bs, -1, w * h).permute(0, 2, 1).contiguous()
-        proj_key = self.kConv(inFeature).view(bs, -1, w * h)
-        energy = torch.bmm(proj_query, proj_key)
-        attention = self.softmax(energy)
-        proj_value = self.vConv(inFeature).view(bs, -1, w * h)
-        out = torch.bmm(proj_value, attention.permute(0, 2, 1).contiguous())
-        out = out.view(bs, C, w, h)
-        out = self.gamma * out + inFeature
-        return out
-class MAL(nn.Module):
-    """
-        Multi-view Attention Learning (MAL) module
-    """
-    def __init__(self, in_dim=768, feature_num=4, feature_size=28):
-        super().__init__()
-        self.channel_attention = Attention_Block(in_dim * feature_num)  # Channel-wise self attention
-        self.feature_attention = Attention_Block(feature_size ** 2 * feature_num)  # Pixel-wise self attention
-        # Self attention module for each input feature
-        self.attention_module = nn.ModuleList()
-        for _ in range(feature_num):
-            self.attention_module.append(Self_Attention(in_dim))
-        self.feature_num = feature_num
-        self.in_dim = in_dim
-    def forward(self, features):
-        feature = torch.tensor([]).cuda()
-        for index, _ in enumerate(features):
-            feature = torch.cat((feature, self.attention_module[index](features[index]).unsqueeze(0)), dim=0)
-        features = feature
-        input_tensor = rearrange(features, 'n b c w h -> b (n c) (w h)')  # bs, 768 * feature_num, 28 * 28
-        bs, _, _ = input_tensor.shape  # [2, 3072, 784]
-        in_feature = rearrange(input_tensor, 'b (w c) h -> b w (c h)', w=self.in_dim,
-                               c=self.feature_num)  # bs, 768, 28 * 28 * feature_num
-        feature_weight_sum = self.feature_attention(in_feature)  # bs, 768, 768
-        in_channel = input_tensor.permute(0, 2, 1).contiguous()  # bs, 28 * 28, 768 * feature_num
-        channel_weight_sum = self.channel_attention(in_channel)  # bs, 28 * 28, 28 * 28
-        weight_sum_res = (rearrange(feature_weight_sum, 'b w (c h) -> b (w c) h', w=self.in_dim,
-                                    c=self.feature_num) + channel_weight_sum.permute(0, 2, 1).contiguous()) / 2  # [2, 3072, 784]
-        weight_sum_res = torch.mean(weight_sum_res.view(bs, self.feature_num, self.in_dim, -1), dim=1)
-        return weight_sum_res  # bs, 768, 28 * 28
-class SaveOutput:
-    def __init__(self):
-        self.outputs = []
-    def __call__(self, module, module_in, module_out):
-        self.outputs.append(module_out)
-    def clear(self):
-        self.outputs = []
-# utils
-@torch.no_grad()
-def concat_all_gather(tensor):
-    """
-    Performs all_gather operation on the provided tensors.
-    *** Warning ***: torch.distributed.all_gather has no gradient.
-    """
-    tensors_gather = [
-        torch.ones_like(tensor) for _ in range(torch.distributed.get_world_size())
-    ]
-    torch.distributed.all_gather(tensors_gather, tensor, async_op=False)
-    output = torch.cat(tensors_gather, dim=0)
-    return output
-class Attention(nn.Module):
-    def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.):
-        super().__init__()
-        assert dim % num_heads == 0, 'dim should be divisible by num_heads'
-        self.num_heads = num_heads
-        head_dim = dim // num_heads
-        self.scale = head_dim ** -0.5
-        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
-        self.attn_drop = nn.Dropout(attn_drop)
-        self.proj = nn.Linear(dim, dim)
-        self.proj_drop = nn.Dropout(proj_drop)
-    def forward(self, x):
-        B, N, C = x.shape
-        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
-        q, k, v = qkv.unbind(0)  # make torchscript happy (cannot use tensor as tuple)
-        attn = (q @ k.transpose(-2, -1)) * self.scale
-        attn = attn.softmax(dim=-1)
-        attn = self.attn_drop(attn)
-        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        return x
-from functools import partial
-class MoNet(nn.Module):
-    def __init__(self, patch_size=8, drop=0.1, dim_mlp=768, img_size=224):
-        super().__init__()
-        self.img_size = img_size
-        self.input_size = img_size // patch_size
-        self.dim_mlp = dim_mlp
-        self.vit = timm.create_model('vit_base_patch8_224', pretrained=True)
-        self.vit.norm = nn.Identity()
-        self.vit.head = nn.Identity()
-        self.save_output = SaveOutput()
-        # Register Hooks
-        hook_handles = []
-        for layer in self.vit.modules():
-            if isinstance(layer, Block):
-                handle = layer.register_forward_hook(self.save_output)
-                hook_handles.append(handle)
-        self.MALs = nn.ModuleList()
-        for _ in range(3):
-            self.MALs.append(MAL())
-        # Image Quality Score Regression
-        self.fusion_mal = MAL(feature_num=3)
-        self.block = Block(dim_mlp, 12)
-        self.cnn = nn.Sequential(
-            nn.Conv2d(dim_mlp, 256, 5),
-            nn.BatchNorm2d(256),
-            nn.ReLU(inplace=True),
-            nn.AvgPool2d((2, 2)),
-            nn.Conv2d(256, 128, 3),
-            nn.BatchNorm2d(128),
-            nn.ReLU(inplace=True),
-            nn.AvgPool2d((2, 2)),
-            nn.Conv2d(128, 128, 3),
-            nn.BatchNorm2d(128),
-            nn.ReLU(inplace=True),
-            nn.AvgPool2d((3, 3)),
-        )
-        # self.i_p_fusion = nn.Sequential(
-        #     Block(128, 4),
-        #     Block(128, 4),
-        #     Block(128, 4),
-        # )
-        # self.mlp = nn.Sequential(
-        #     nn.Linear(128, 64),
-        #     nn.GELU(),
-        #     nn.Linear(64, 128),
-        # )
-        dpr = [x.item() for x in torch.linspace(0, 0, 8)]  # stochastic depth decay rule
-        self.blocks = nn.Sequential(*[
-            Block(
-                dim=128, num_heads=4, mlp_ratio=4, qkv_bias=True, drop=0,
-                attn_drop=0, drop_path=dpr[i], norm_layer=partial(nn.LayerNorm, eps=1e-6), act_layer=nn.GELU)
-            for i in range(8)])
-        self.norm = nn.LayerNorm(128)
-        self.score_block = nn.Sequential(
-            nn.Linear(128, 128 // 2),
-            nn.ReLU(),
-            nn.Dropout(drop),
-            nn.Linear(128 // 2, 1),
-            nn.Sigmoid()
-        )
-        self.prompt_feature = {}
-    @torch.no_grad()
-    def clear(self):
-        self.prompt_feature = {}
-    @torch.no_grad()
-    def inference(self, x, data_type):
-        # prompt_feature = self.prompt_feature[data_type] # 1, n, 128
-        _x = self.vit(x)
-        x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
-        self.save_output.outputs.clear()
-        x = x.permute(0, 2, 1).contiguous()  # bs, 768 * 4, 28 * 28
-        x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size, h=self.input_size)  # bs, 4, 768, 28, 28
-        x = x.permute(1, 0, 2, 3, 4).contiguous()  # bs, 4, 768, 28 * 28
-        # Different Opinion Features (DOF)
-        DOF = torch.tensor([]).cuda()
-        for index, _ in enumerate(self.MALs):
-            DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
-        DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # M, bs, 768, 28, 28
-        # Image Quality Score Regression
-        fusion_mal = self.fusion_mal(DOF).permute(0, 2, 1).contiguous()  # bs, 28 * 28 768
-        IQ_feature = self.block(fusion_mal).permute(0, 2, 1).contiguous() # bs, 768, 28 * 28
-        IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size, h=self.input_size) # bs, 768, 28, 28
-        img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1) # bs, 1, 128
-        # prompt_feature = prompt_feature.repeat(img_feature.shape[0], 1, 1) # bs, n, 128
-        # prompt_feature = self.prompt_fusion(prompt_feature) # bs, n, 128
-        fusion = self.blocks(img_feature) # bs, 2, 1
-        fusion = self.norm(fusion)
-        fusion = self.score_block(fusion)
-        # iq_res = torch.mean(fusion, dim=1).view(-1)
-        iq_res = fusion[:, 0].view(-1)
-        return iq_res
-    @torch.no_grad()
-    def check_prompt(self, data_type):
-        return data_type in self.prompt_feature
-    @torch.no_grad()
-    def forward_prompt(self, x, score, data_type):
-        pass
-        # if data_type in self.prompt_feature:
-        #     return
-        # _x = self.vit(x)
-        # x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
-        # self.save_output.outputs.clear()
-        # x = x.permute(0, 2, 1).contiguous()  # bs, 768 * 4, 28 * 28
-        # x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size, h=self.input_size)  # bs, 4, 768, 28, 28
-        # x = x.permute(1, 0, 2, 3, 4).contiguous()  # 4, bs, 768, 28, 28
-        # # Different Opinion Features (DOF)
-        # DOF = torch.tensor([]).cuda()
-        # for index, _ in enumerate(self.MALs):
-        #     DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
-        # DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # M, bs, 768, 28, 28
-        # # Image Quality Score Regression
-        # fusion_mal = self.fusion_mal(DOF).permute(0, 2, 1).contiguous()  # bs, 28 * 28 768
-        # IQ_feature = self.block(fusion_mal).permute(0, 2, 1).contiguous() # bs, 768, 28 * 28
-        # IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size, h=self.input_size) # bs, 768, 28, 28
-        # img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1) # bs, 1, 128
-        # # 分数线性变换为128维
-        # # score_feature = self.score_projection(score)  # bs, 128
-        # score_feature = score.expand(-1, 128)
-        # # img_feature 和 score_feature融合得到 funsion_feature
-        # funsion_feature = self.i_p_fusion(torch.cat((img_feature, score_feature.unsqueeze(1)), dim=1)) # bs, 2, 128
-        # funsion_feature = self.mlp(torch.mean(funsion_feature, dim=1)).unsqueeze(0) # 1, n, 128
-        # # print('Load Prompt For Testing.', funsion_feature.shape)
-        # # self.prompt_feature = funsion_feature.clone()
-        # self.prompt_feature[data_type] = funsion_feature.clone()
-    def forward(self, x, score):
-        _x = self.vit(x)
-        x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
-        self.save_output.outputs.clear()
-        x = x.permute(0, 2, 1).contiguous()  # bs, 768 * 4, 28 * 28
-        x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size, h=self.input_size)  # bs, 4, 768, 28, 28
-        x = x.permute(1, 0, 2, 3, 4).contiguous()  # 4, bs, 768, 28, 28
-        # Different Opinion Features (DOF)
-        DOF = torch.tensor([]).cuda()
-        for index, _ in enumerate(self.MALs):
-            DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
-        DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # M, bs, 768, 28, 28
-        # Image Quality Score Regression
-        fusion_mal = self.fusion_mal(DOF).permute(0, 2, 1).contiguous()  # bs, 28 * 28 768
-        IQ_feature = self.block(fusion_mal).permute(0, 2, 1).contiguous() # bs, 768, 28 * 28
-        IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size, h=self.input_size) # bs, 768, 28, 28
-        img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1) # bs, 1, 128
-        # 分数线性变换为128维
-        # score_feature = self.score_projection(score)  # bs, 128
-        # score_feature = score.expand(-1, 128) # bs, 128
-        # # img_feature 和 score_feature融合得到 funsion_feature
-        # funsion_feature = self.i_p_fusion(torch.cat((img_feature, score_feature.unsqueeze(1)), dim=1)) # bs, 2, 128
-        # funsion_feature = self.mlp(torch.mean(funsion_feature, dim=1)) #bs, 128
-        # funsion_feature = self.expand(funsion_feature) # bs, bs - 1, 128
-        # funsion_feature = self.prompt_fusion(funsion_feature) # bs, bs - 1, 128
-        fusion = self.blocks(img_feature) # bs, 2, 1
-        fusion = self.norm(fusion)
-        fusion = self.score_block(fusion)
-        # iq_res = torch.mean(fusion, dim=1).view(-1)
-        iq_res = fusion[:, 0].view(-1)
-        # differ_fusion = self.diff_block(torch.cat((img_feature, funsion_feature), dim=1)) # bs, n, 1
-        # differ_iq_res = torch.mean(differ_fusion, dim=1).view(-1)
-        gt_res = score.view(-1)
-        # diff_gt_res = 1 - score.view(-1)
-        return (iq_res, 'differ_iq_res'), (gt_res, 'diff_gt_res')
-    def extract_feature(self, save_output, block_index=[2, 5, 8, 11]):
-        x1 = save_output.outputs[block_index[0]][:, 1:]
-        x2 = save_output.outputs[block_index[1]][:, 1:]
-        x3 = save_output.outputs[block_index[2]][:, 1:]
-        x4 = save_output.outputs[block_index[3]][:, 1:]
-        x = torch.cat((x1, x2, x3, x4), dim=2)
-        return x
-    def expand(self, A):
-        A_expanded = A.unsqueeze(0).expand(A.size(0), -1, -1)
-        B = None
-        for index, i in enumerate(A_expanded):
-            rmv = torch.cat((i[:index], i[index + 1:])).unsqueeze(0)
-            if B is None:
-                B = rmv
-            else:
-                B = torch.cat((B, rmv), dim=0)
-        return B
-if __name__ == '__main__':
-    in_feature = torch.zeros((2, 3, 224, 224)).cuda()
-    gt_feature = torch.tensor([[0, 100, 1], [0, 100, 2]], dtype=torch.float).cuda()
-    model = MoNet().cuda()
-    iq_res, gt_res = model(in_feature, gt_feature)
-    print(iq_res)
-    print(gt_res.shape)

PromptIQA/models/{monet.py → promptiqa.py} RENAMED Viewed

@@ -6,10 +6,8 @@ import torch.nn as nn
 import timm
 from timm.models.vision_transformer import Block
 from einops import rearrange
-from itertools import combinations
-from tqdm import tqdm
 class Attention_Block(nn.Module):
     def __init__(self, dim, drop=0.1):
@@ -119,20 +117,6 @@ class SaveOutput:
     def clear(self):
         self.outputs = []
-# utils
-@torch.no_grad()
-def concat_all_gather(tensor):
-    """
-    Performs all_gather operation on the provided tensors.
-    *** Warning ***: torch.distributed.all_gather has no gradient.
-    """
-    tensors_gather = [
-        torch.ones_like(tensor) for _ in range(torch.distributed.get_world_size())
-    ]
-    torch.distributed.all_gather(tensors_gather, tensor, async_op=False)
-    output = torch.cat(tensors_gather, dim=0)
-    return output
 class Attention(nn.Module):
     def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.):
         super().__init__()
@@ -160,8 +144,7 @@ class Attention(nn.Module):
         x = self.proj_drop(x)
         return x
-from functools import partial
-class MoNet(nn.Module):
     def __init__(self, patch_size=8, drop=0.1, dim_mlp=768, img_size=224):
         super().__init__()
         self.img_size = img_size
@@ -273,15 +256,10 @@ class MoNet(nn.Module):
         fusion = self.norm(fusion)
         fusion = self.score_block(fusion)
-        # iq_res = torch.mean(fusion, dim=1).view(-1)
         iq_res = fusion[:, 0].view(-1)
         return iq_res
-    @torch.no_grad()
-    def check_prompt(self, data_type):
-        return data_type in self.prompt_feature
     @torch.no_grad()
     def forward_prompt(self, x, score, data_type):
         _x = self.vit(x)
@@ -304,63 +282,13 @@ class MoNet(nn.Module):
         IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size, h=self.input_size) # bs, 768, 28, 28
         img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1) # bs, 1, 128
-        # 分数线性变换为128维
-        # score_feature = self.score_projection(score)  # bs, 128
         score_feature = score.expand(-1, 128)
-        # img_feature 和 score_feature融合得到 funsion_feature
         funsion_feature = self.i_p_fusion(torch.cat((img_feature, score_feature.unsqueeze(1)), dim=1)) # bs, 2, 128
         funsion_feature = self.mlp(torch.mean(funsion_feature, dim=1)).unsqueeze(0) # 1, n, 128
-        # print('Load Prompt For Testing.', funsion_feature.shape)
-        # self.prompt_feature = funsion_feature.clone()
         self.prompt_feature[data_type] = funsion_feature.clone()
-    def forward(self, x, score):
-        _x = self.vit(x)
-        x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
-        self.save_output.outputs.clear()
-        x = x.permute(0, 2, 1).contiguous()  # bs, 768 * 4, 28 * 28
-        x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size, h=self.input_size)  # bs, 4, 768, 28, 28
-        x = x.permute(1, 0, 2, 3, 4).contiguous()  # 4, bs, 768, 28, 28
-        # Different Opinion Features (DOF)
-        DOF = torch.tensor([]).cuda()
-        for index, _ in enumerate(self.MALs):
-            DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
-        DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # M, bs, 768, 28, 28
-        # Image Quality Score Regression
-        fusion_mal = self.fusion_mal(DOF).permute(0, 2, 1).contiguous()  # bs, 28 * 28 768
-        IQ_feature = self.block(fusion_mal).permute(0, 2, 1).contiguous() # bs, 768, 28 * 28
-        IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size, h=self.input_size) # bs, 768, 28, 28
-        img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1) # bs, 1, 128
-        # 分数线性变换为128维
-        # score_feature = self.score_projection(score)  # bs, 128
-        score_feature = score.expand(-1, 128) # bs, 128
-        # img_feature 和 score_feature融合得到 funsion_feature
-        funsion_feature = self.i_p_fusion(torch.cat((img_feature, score_feature.unsqueeze(1)), dim=1)) # bs, 2, 128
-        funsion_feature = self.mlp(torch.mean(funsion_feature, dim=1)) #bs, 128
-        funsion_feature = self.expand(funsion_feature) # bs, bs - 1, 128
-        funsion_feature = self.prompt_fusion(funsion_feature) # bs, bs - 1, 128
-        fusion = self.blocks(torch.cat((img_feature, funsion_feature), dim=1)) # bs, 2, 1
-        fusion = self.norm(fusion)
-        fusion = self.score_block(fusion)
-        # iq_res = torch.mean(fusion, dim=1).view(-1)
-        iq_res = fusion[:, 0].view(-1)
-        # differ_fusion = self.diff_block(torch.cat((img_feature, funsion_feature), dim=1)) # bs, n, 1
-        # differ_iq_res = torch.mean(differ_fusion, dim=1).view(-1)
-        gt_res = score.view(-1)
-        # diff_gt_res = 1 - score.view(-1)
-        return (iq_res, 'differ_iq_res'), (gt_res, 'diff_gt_res')
     def extract_feature(self, save_output, block_index=[2, 5, 8, 11]):
         x1 = save_output.outputs[block_index[0]][:, 1:]
         x2 = save_output.outputs[block_index[1]][:, 1:]
@@ -381,13 +309,3 @@ class MoNet(nn.Module):
                 B = torch.cat((B, rmv), dim=0)
         return B
-if __name__ == '__main__':
-    in_feature = torch.zeros((10, 3, 224, 224)).cuda()
-    gt_feature = torch.tensor([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]], dtype=torch.float).cuda()
-    model = MoNet().cuda()
-    iq_res, gt_res = model(in_feature, gt_feature)
-    print(iq_res.shape)
-    print(gt_res.shape)

 import timm
 from timm.models.vision_transformer import Block
+from functools import partial
 from einops import rearrange
 class Attention_Block(nn.Module):
     def __init__(self, dim, drop=0.1):
     def clear(self):
         self.outputs = []
 class Attention(nn.Module):
     def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.):
         super().__init__()
         x = self.proj_drop(x)
         return x
+class PromptIQA(nn.Module):
     def __init__(self, patch_size=8, drop=0.1, dim_mlp=768, img_size=224):
         super().__init__()
         self.img_size = img_size
         fusion = self.norm(fusion)
         fusion = self.score_block(fusion)
         iq_res = fusion[:, 0].view(-1)
         return iq_res
     @torch.no_grad()
     def forward_prompt(self, x, score, data_type):
         _x = self.vit(x)
         IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size, h=self.input_size) # bs, 768, 28, 28
         img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1) # bs, 1, 128
         score_feature = score.expand(-1, 128)
         funsion_feature = self.i_p_fusion(torch.cat((img_feature, score_feature.unsqueeze(1)), dim=1)) # bs, 2, 128
         funsion_feature = self.mlp(torch.mean(funsion_feature, dim=1)).unsqueeze(0) # 1, n, 128
         self.prompt_feature[data_type] = funsion_feature.clone()
     def extract_feature(self, save_output, block_index=[2, 5, 8, 11]):
         x1 = save_output.outputs[block_index[0]][:, 1:]
         x2 = save_output.outputs[block_index[1]][:, 1:]
                 B = torch.cat((B, rmv), dim=0)
         return B

PromptIQA/models/vit_base.py DELETED Viewed

@@ -1,402 +0,0 @@
-"""
-    The completion for Mean-opinion Network(MoNet)
-"""
-import torch
-import torch.nn as nn
-import timm
-from timm.models.vision_transformer import Block
-from einops import rearrange
-from itertools import combinations
-from tqdm import tqdm
-class Attention_Block(nn.Module):
-    def __init__(self, dim, drop=0.1):
-        super().__init__()
-        self.c_q = nn.Linear(dim, dim)
-        self.c_k = nn.Linear(dim, dim)
-        self.c_v = nn.Linear(dim, dim)
-        self.norm_fact = dim ** -0.5
-        self.softmax = nn.Softmax(dim=-1)
-        self.proj_drop = nn.Dropout(drop)
-    def forward(self, x):
-        _x = x
-        B, C, N = x.shape
-        q = self.c_q(x)
-        k = self.c_k(x)
-        v = self.c_v(x)
-        attn = q @ k.transpose(-2, -1) * self.norm_fact
-        attn = self.softmax(attn)
-        x = (attn @ v).transpose(1, 2).reshape(B, C, N)
-        x = self.proj_drop(x)
-        x = x + _x
-        return x
-class Self_Attention(nn.Module):
-    """ Self attention Layer"""
-    def __init__(self, in_dim):
-        super(Self_Attention, self).__init__()
-        self.qConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1)
-        self.kConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1)
-        self.vConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1)
-        self.gamma = nn.Parameter(torch.zeros(1))
-        self.softmax = nn.Softmax(dim=-1)
-    def forward(self, inFeature):
-        bs, C, w, h = inFeature.size()
-        proj_query = self.qConv(inFeature).view(bs, -1, w * h).permute(0, 2, 1).contiguous()
-        proj_key = self.kConv(inFeature).view(bs, -1, w * h)
-        energy = torch.bmm(proj_query, proj_key)
-        attention = self.softmax(energy)
-        proj_value = self.vConv(inFeature).view(bs, -1, w * h)
-        out = torch.bmm(proj_value, attention.permute(0, 2, 1).contiguous())
-        out = out.view(bs, C, w, h)
-        out = self.gamma * out + inFeature
-        return out
-class three_cnn(nn.Module):
-    def __init__(self, in_dim) -> None:
-        super().__init__()
-        self.three_cnn = nn.Sequential(
-            nn.Conv2d(in_dim, in_dim // 2, kernel_size=3, padding=1),
-            nn.ReLU(inplace=True),
-            nn.Conv2d(in_dim // 2, in_dim // 2, kernel_size=3, padding=1),
-            nn.ReLU(inplace=True),
-            nn.Conv2d(in_dim // 2, in_dim, kernel_size=3, padding=1),
-            nn.ReLU(inplace=True),
-        )
-    def forward(self, input):
-        return self.three_cnn(input)
-class MAL(nn.Module):
-    def __init__(self, in_dim=768, feature_num=4, feature_size=28):
-        super().__init__()
-        self.attention_module = nn.ModuleList()
-        for i in range(feature_num):
-            self.attention_module.append(three_cnn(in_dim))
-        self.feature_num = feature_num
-        self.in_dim = in_dim
-        self.feature_size = feature_size
-    def forward(self, features):
-        feature = torch.tensor([]).cuda()
-        for index, _ in enumerate(features):
-            feature = torch.cat((feature, self.attention_module[index](features[index]).unsqueeze(1)), dim=1)
-        feature = torch.mean(feature, dim=1)
-        features = feature.view(-1, self.in_dim, self.feature_size * self.feature_size)
-        return features  # bs, 768, 28 * 28
-class SaveOutput:
-    def __init__(self):
-        self.outputs = []
-    def __call__(self, module, module_in, module_out):
-        self.outputs.append(module_out)
-    def clear(self):
-        self.outputs = []
-# utils
-@torch.no_grad()
-def concat_all_gather(tensor):
-    """
-    Performs all_gather operation on the provided tensors.
-    *** Warning ***: torch.distributed.all_gather has no gradient.
-    """
-    tensors_gather = [
-        torch.ones_like(tensor) for _ in range(torch.distributed.get_world_size())
-    ]
-    torch.distributed.all_gather(tensors_gather, tensor, async_op=False)
-    output = torch.cat(tensors_gather, dim=0)
-    return output
-class Attention(nn.Module):
-    def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.):
-        super().__init__()
-        assert dim % num_heads == 0, 'dim should be divisible by num_heads'
-        self.num_heads = num_heads
-        head_dim = dim // num_heads
-        self.scale = head_dim ** -0.5
-        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
-        self.attn_drop = nn.Dropout(attn_drop)
-        self.proj = nn.Linear(dim, dim)
-        self.proj_drop = nn.Dropout(proj_drop)
-    def forward(self, x):
-        B, N, C = x.shape
-        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
-        q, k, v = qkv.unbind(0)  # make torchscript happy (cannot use tensor as tuple)
-        attn = (q @ k.transpose(-2, -1)) * self.scale
-        attn = attn.softmax(dim=-1)
-        attn = self.attn_drop(attn)
-        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        return x
-from functools import partial
-class MoNet(nn.Module):
-    def __init__(self, patch_size=8, drop=0.1, dim_mlp=768, img_size=224):
-        super().__init__()
-        self.img_size = img_size
-        self.input_size = img_size // patch_size
-        self.dim_mlp = dim_mlp
-        self.vit = timm.create_model('vit_base_patch8_224', pretrained=True)
-        self.vit.norm = nn.Identity()
-        self.vit.head = nn.Identity()
-        self.save_output = SaveOutput()
-        # Register Hooks
-        hook_handles = []
-        for layer in self.vit.modules():
-            if isinstance(layer, Block):
-                handle = layer.register_forward_hook(self.save_output)
-                hook_handles.append(handle)
-        self.MALs = nn.ModuleList()
-        for _ in range(1):
-            self.MALs.append(MAL())
-        # Image Quality Score Regression
-        self.fusion_mal = MAL(feature_num=1)
-        self.block = Block(dim_mlp, 12)
-        self.cnn = nn.Sequential(
-            nn.Conv2d(dim_mlp, 256, 5),
-            nn.BatchNorm2d(256),
-            nn.ReLU(inplace=True),
-            nn.AvgPool2d((2, 2)),
-            nn.Conv2d(256, 128, 3),
-            nn.BatchNorm2d(128),
-            nn.ReLU(inplace=True),
-            nn.AvgPool2d((2, 2)),
-            nn.Conv2d(128, 128, 3),
-            nn.BatchNorm2d(128),
-            nn.ReLU(inplace=True),
-            nn.AvgPool2d((3, 3)),
-        )
-        self.i_p_fusion = nn.Sequential(
-            Block(128, 4),
-            Block(128, 4),
-            Block(128, 4),
-        )
-        self.mlp = nn.Sequential(
-            nn.Linear(128, 64),
-            nn.GELU(),
-            nn.Linear(64, 128),
-        )
-        self.prompt_fusion = nn.Sequential(
-            Block(128, 4),
-            Block(128, 4),
-            Block(128, 4),
-        )
-        dpr = [x.item() for x in torch.linspace(0, 0, 8)]  # stochastic depth decay rule
-        self.blocks = nn.Sequential(*[
-            Block(
-                dim=128, num_heads=4, mlp_ratio=4, qkv_bias=True, drop=0,
-                attn_drop=0, drop_path=dpr[i], norm_layer=partial(nn.LayerNorm, eps=1e-6), act_layer=nn.GELU)
-            for i in range(8)])
-        self.norm = nn.LayerNorm(128)
-        self.score_block = nn.Sequential(
-            nn.Linear(128, 128 // 2),
-            nn.ReLU(),
-            nn.Dropout(drop),
-            nn.Linear(128 // 2, 1),
-            nn.Sigmoid()
-        )
-        self.prompt_feature = {}
-    @torch.no_grad()
-    def clear(self):
-        self.prompt_feature = {}
-    @torch.no_grad()
-    def inference(self, x, data_type):
-        prompt_feature = self.prompt_feature[data_type] # 1, n, 128
-        _x = self.vit(x)
-        x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
-        self.save_output.outputs.clear()
-        x = x.permute(0, 2, 1).contiguous()  # bs, 768 * 4, 28 * 28
-        x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size, h=self.input_size)  # bs, 4, 768, 28, 28
-        x = x.permute(1, 0, 2, 3, 4).contiguous()  # bs, 4, 768, 28 * 28
-        # Different Opinion Features (DOF)
-        DOF = torch.tensor([]).cuda()
-        for index, _ in enumerate(self.MALs):
-            DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
-        DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # M, bs, 768, 28, 28
-        # Image Quality Score Regression
-        fusion_mal = self.fusion_mal(DOF).permute(0, 2, 1).contiguous()  # bs, 28 * 28 768
-        IQ_feature = self.block(fusion_mal).permute(0, 2, 1).contiguous() # bs, 768, 28 * 28
-        IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size, h=self.input_size) # bs, 768, 28, 28
-        img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1) # bs, 1, 128
-        prompt_feature = prompt_feature.repeat(img_feature.shape[0], 1, 1) # bs, n, 128
-        prompt_feature = self.prompt_fusion(prompt_feature) # bs, n, 128
-        fusion = self.blocks(torch.cat((img_feature, prompt_feature), dim=1)) # bs, 2, 1
-        fusion = self.norm(fusion)
-        fusion = self.score_block(fusion)
-        # iq_res = torch.mean(fusion, dim=1).view(-1)
-        iq_res = fusion[:, 0].view(-1)
-        return iq_res
-    @torch.no_grad()
-    def check_prompt(self, data_type):
-        return data_type in self.prompt_feature
-    @torch.no_grad()
-    def forward_prompt(self, x, score, data_type):
-        if data_type in self.prompt_feature:
-            return
-        _x = self.vit(x)
-        x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
-        self.save_output.outputs.clear()
-        x = x.permute(0, 2, 1).contiguous()  # bs, 768 * 4, 28 * 28
-        x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size, h=self.input_size)  # bs, 4, 768, 28, 28
-        x = x.permute(1, 0, 2, 3, 4).contiguous()  # 4, bs, 768, 28, 28
-        # Different Opinion Features (DOF)
-        DOF = torch.tensor([]).cuda()
-        for index, _ in enumerate(self.MALs):
-            DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
-        DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # M, bs, 768, 28, 28
-        # Image Quality Score Regression
-        fusion_mal = self.fusion_mal(DOF).permute(0, 2, 1).contiguous()  # bs, 28 * 28 768
-        IQ_feature = self.block(fusion_mal).permute(0, 2, 1).contiguous() # bs, 768, 28 * 28
-        IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size, h=self.input_size) # bs, 768, 28, 28
-        img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1) # bs, 1, 128
-        # 分数线性变换为128维
-        # score_feature = self.score_projection(score)  # bs, 128
-        score_feature = score.expand(-1, 128)
-        # img_feature 和 score_feature融合得到 funsion_feature
-        funsion_feature = self.i_p_fusion(torch.cat((img_feature, score_feature.unsqueeze(1)), dim=1)) # bs, 2, 128
-        funsion_feature = self.mlp(torch.mean(funsion_feature, dim=1)).unsqueeze(0) # 1, n, 128
-        # print('Load Prompt For Testing.', funsion_feature.shape)
-        # self.prompt_feature = funsion_feature.clone()
-        self.prompt_feature[data_type] = funsion_feature.clone()
-    def forward(self, x, score):
-        _x = self.vit(x)
-        x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
-        self.save_output.outputs.clear()
-        x = x.permute(0, 2, 1).contiguous()  # bs, 768 * 4, 28 * 28
-        x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size, h=self.input_size)  # bs, 4, 768, 28, 28
-        x = x.permute(1, 0, 2, 3, 4).contiguous()  # 4, bs, 768, 28, 28
-        # Different Opinion Features (DOF)
-        DOF = torch.tensor([]).cuda()
-        for index, _ in enumerate(self.MALs):
-            DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
-        DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # M, bs, 768, 28, 28
-        # Image Quality Score Regression
-        fusion_mal = self.fusion_mal(DOF).permute(0, 2, 1).contiguous()  # bs, 28 * 28 768
-        IQ_feature = self.block(fusion_mal).permute(0, 2, 1).contiguous() # bs, 768, 28 * 28
-        IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size, h=self.input_size) # bs, 768, 28, 28
-        img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1) # bs, 1, 128
-        # 分数线性变换为128维
-        # score_feature = self.score_projection(score)  # bs, 128
-        score_feature = score.expand(-1, 128) # bs, 128
-        # img_feature 和 score_feature融合得到 funsion_feature
-        # funsion_feature = self.i_p_fusion(torch.cat((img_feature.detach(), score_feature.unsqueeze(1).detach()), dim=1)) # bs, 2, 128
-        funsion_feature = self.i_p_fusion(torch.cat((img_feature, score_feature.unsqueeze(1)), dim=1)) # bs, 2, 128
-        funsion_feature = self.mlp(torch.mean(funsion_feature, dim=1)) #bs, 128
-        funsion_feature = self.expand(funsion_feature) # bs, bs - 1, 128
-        funsion_feature = self.prompt_fusion(funsion_feature) # bs, bs - 1, 128
-        fusion = self.blocks(torch.cat((img_feature, funsion_feature), dim=1)) # bs, 2, 1
-        fusion = self.norm(fusion)
-        fusion = self.score_block(fusion)
-        # iq_res = torch.mean(fusion, dim=1).view(-1)
-        iq_res = fusion[:, 0].view(-1)
-        # differ_fusion = self.diff_block(torch.cat((img_feature, funsion_feature), dim=1)) # bs, n, 1
-        # differ_iq_res = torch.mean(differ_fusion, dim=1).view(-1)
-        gt_res = score.view(-1)
-        # diff_gt_res = 1 - score.view(-1)
-        return (iq_res, 'differ_iq_res'), (gt_res, 'diff_gt_res')
-    def extract_feature(self, save_output, block_index=None):
-        block_index = [2, 5, 8, 11]
-        x1 = save_output.outputs[block_index[0]][:, 1:]
-        x2 = save_output.outputs[block_index[1]][:, 1:]
-        x3 = save_output.outputs[block_index[2]][:, 1:]
-        x4 = save_output.outputs[block_index[3]][:, 1:]
-        x = torch.cat((x1, x2, x3, x4), dim=2)
-        return x
-    def expand(self, A):
-        A_expanded = A.unsqueeze(0).expand(A.size(0), -1, -1)
-        B = None
-        for index, i in enumerate(A_expanded):
-            rmv = torch.cat((i[:index], i[index + 1:])).unsqueeze(0)
-            if B is None:
-                B = rmv
-            else:
-                B = torch.cat((B, rmv), dim=0)
-        return B
-if __name__ == '__main__':
-    in_feature = torch.zeros((11, 3, 384, 384)).cuda()
-    gt_feature = torch.tensor(
-        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], dtype=torch.float).cuda()
-    gt_feature = gt_feature.reshape(-1, 1)
-    model = MoNet().cuda()
-    (iq_res, _), (_, _) = model(in_feature, gt_feature)
-    print(iq_res.shape)
-    # print(gt_res.shape)

PromptIQA/models/vit_large.py DELETED Viewed

@@ -1,405 +0,0 @@
-"""
-    The completion for Mean-opinion Network(MoNet)
-"""
-import torch
-import torch.nn as nn
-import timm
-from timm.models.vision_transformer import Block
-from einops import rearrange
-from itertools import combinations
-from tqdm import tqdm
-class Attention_Block(nn.Module):
-    def __init__(self, dim, drop=0.1):
-        super().__init__()
-        self.c_q = nn.Linear(dim, dim)
-        self.c_k = nn.Linear(dim, dim)
-        self.c_v = nn.Linear(dim, dim)
-        self.norm_fact = dim ** -0.5
-        self.softmax = nn.Softmax(dim=-1)
-        self.proj_drop = nn.Dropout(drop)
-    def forward(self, x):
-        _x = x
-        B, C, N = x.shape
-        q = self.c_q(x)
-        k = self.c_k(x)
-        v = self.c_v(x)
-        attn = q @ k.transpose(-2, -1) * self.norm_fact
-        attn = self.softmax(attn)
-        x = (attn @ v).transpose(1, 2).reshape(B, C, N)
-        x = self.proj_drop(x)
-        x = x + _x
-        return x
-class Self_Attention(nn.Module):
-    """ Self attention Layer"""
-    def __init__(self, in_dim):
-        super(Self_Attention, self).__init__()
-        self.qConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1)
-        self.kConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1)
-        self.vConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1)
-        self.gamma = nn.Parameter(torch.zeros(1))
-        self.softmax = nn.Softmax(dim=-1)
-    def forward(self, inFeature):
-        bs, C, w, h = inFeature.size()
-        proj_query = self.qConv(inFeature).view(bs, -1, w * h).permute(0, 2, 1).contiguous()
-        proj_key = self.kConv(inFeature).view(bs, -1, w * h)
-        energy = torch.bmm(proj_query, proj_key)
-        attention = self.softmax(energy)
-        proj_value = self.vConv(inFeature).view(bs, -1, w * h)
-        out = torch.bmm(proj_value, attention.permute(0, 2, 1).contiguous())
-        out = out.view(bs, C, w, h)
-        out = self.gamma * out + inFeature
-        return out
-class three_cnn(nn.Module):
-    def __init__(self, in_dim) -> None:
-        super().__init__()
-        self.three_cnn = nn.Sequential(
-            nn.Conv2d(in_dim, in_dim // 2, kernel_size=3, padding=1),
-            nn.ReLU(inplace=True),
-            nn.Conv2d(in_dim // 2, in_dim // 2, kernel_size=3, padding=1),
-            nn.ReLU(inplace=True),
-            nn.Conv2d(in_dim // 2, in_dim, kernel_size=3, padding=1),
-            nn.ReLU(inplace=True),
-        )
-    def forward(self, input):
-        return self.three_cnn(input)
-class MAL(nn.Module):
-    def __init__(self, in_dim=768, feature_num=4, feature_size=28):
-        super().__init__()
-        self.attention_module = nn.ModuleList()
-        for i in range(feature_num):
-            self.attention_module.append(three_cnn(in_dim))
-        self.feature_num = feature_num
-        self.in_dim = in_dim
-        self.feature_size = feature_size
-    def forward(self, features):
-        feature = torch.tensor([]).cuda()
-        for index, _ in enumerate(features):
-            feature = torch.cat((feature, self.attention_module[index](features[index]).unsqueeze(1)), dim=1)
-        feature = torch.mean(feature, dim=1)
-        features = feature.view(-1, self.in_dim, self.feature_size * self.feature_size)
-        return features  # bs, 768, 28 * 28
-class SaveOutput:
-    def __init__(self):
-        self.outputs = []
-    def __call__(self, module, module_in, module_out):
-        self.outputs.append(module_out)
-    def clear(self):
-        self.outputs = []
-# utils
-@torch.no_grad()
-def concat_all_gather(tensor):
-    """
-    Performs all_gather operation on the provided tensors.
-    *** Warning ***: torch.distributed.all_gather has no gradient.
-    """
-    tensors_gather = [
-        torch.ones_like(tensor) for _ in range(torch.distributed.get_world_size())
-    ]
-    torch.distributed.all_gather(tensors_gather, tensor, async_op=False)
-    output = torch.cat(tensors_gather, dim=0)
-    return output
-class Attention(nn.Module):
-    def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.):
-        super().__init__()
-        assert dim % num_heads == 0, 'dim should be divisible by num_heads'
-        self.num_heads = num_heads
-        head_dim = dim // num_heads
-        self.scale = head_dim ** -0.5
-        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
-        self.attn_drop = nn.Dropout(attn_drop)
-        self.proj = nn.Linear(dim, dim)
-        self.proj_drop = nn.Dropout(proj_drop)
-    def forward(self, x):
-        B, N, C = x.shape
-        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
-        q, k, v = qkv.unbind(0)  # make torchscript happy (cannot use tensor as tuple)
-        attn = (q @ k.transpose(-2, -1)) * self.scale
-        attn = attn.softmax(dim=-1)
-        attn = self.attn_drop(attn)
-        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        return x
-from functools import partial
-class MoNet(nn.Module):
-    def __init__(self, patch_size=32, drop=0.1, dim_mlp=1024, img_size=384):
-        super().__init__()
-        self.img_size = img_size
-        self.input_size = img_size // patch_size
-        self.dim_mlp = dim_mlp
-        self.vit = timm.create_model('vit_large_patch32_384', pretrained=True)
-        self.vit.norm = nn.Identity()
-        self.vit.head = nn.Identity()
-        self.vit.head_drop = nn.Identity()
-        self.save_output = SaveOutput()
-        # Register Hooks
-        hook_handles = []
-        for layer in self.vit.modules():
-            if isinstance(layer, Block):
-                handle = layer.register_forward_hook(self.save_output)
-                hook_handles.append(handle)
-        self.MALs = nn.ModuleList()
-        for _ in range(3):
-            self.MALs.append(MAL(in_dim=dim_mlp, feature_size=self.input_size))
-        # Image Quality Score Regression
-        self.fusion_mal = MAL(in_dim=dim_mlp, feature_num=3, feature_size=self.input_size)
-        self.block = Block(dim_mlp, 16)
-        self.cnn = nn.Sequential(
-            nn.Conv2d(dim_mlp, 512, 5),
-            nn.BatchNorm2d(512),
-            nn.ReLU(inplace=True),
-            nn.AvgPool2d((2, 2)),  # 4
-            nn.Conv2d(512, 256, 3, 1),  # 2
-            nn.BatchNorm2d(256),
-            nn.ReLU(inplace=True),
-            nn.Conv2d(256, 256, 1),
-            nn.BatchNorm2d(256),
-            nn.ReLU(inplace=True),
-            nn.AvgPool2d((2, 2)),
-        )
-        self.i_p_fusion = nn.Sequential(
-            Block(256, 8),
-            Block(256, 8),
-            Block(256, 8),
-        )
-        self.mlp = nn.Sequential(
-            nn.Linear(256, 128),
-            nn.GELU(),
-            nn.Linear(128, 256),
-        )
-        self.prompt_fusion = nn.Sequential(
-            Block(256, 8),
-            Block(256, 8),
-            Block(256, 8),
-        )
-        dpr = [x.item() for x in torch.linspace(0, 0, 8)]  # stochastic depth decay rule
-        self.blocks = nn.Sequential(*[
-            Block(dim=256, num_heads=8, mlp_ratio=4, qkv_bias=True, attn_drop=0, drop_path=dpr[i])
-            for i in range(8)])
-        self.norm = nn.LayerNorm(256)
-        self.score_block = nn.Sequential(
-            nn.Linear(256, 256 // 2),
-            nn.ReLU(),
-            nn.Dropout(drop),
-            nn.Linear(256 // 2, 1),
-            nn.Sigmoid()
-        )
-        self.prompt_feature = {}
-    @torch.no_grad()
-    def clear(self):
-        self.prompt_feature = {}
-    @torch.no_grad()
-    def inference(self, x, data_type):
-        prompt_feature = self.prompt_feature[data_type]  # 1, n, 128
-        _x = self.vit(x)
-        x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
-        self.save_output.outputs.clear()
-        x = x.permute(0, 2, 1).contiguous()  # bs, 768 * 4, 28 * 28
-        x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size,
-                      h=self.input_size)  # bs, 4, 768, 28, 28
-        x = x.permute(1, 0, 2, 3, 4).contiguous()  # bs, 4, 768, 28 * 28
-        # Different Opinion Features (DOF)
-        DOF = torch.tensor([]).cuda()
-        for index, _ in enumerate(self.MALs):
-            DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
-        DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # M, bs, 768, 28, 28
-        # Image Quality Score Regression
-        fusion_mal = self.fusion_mal(DOF).permute(0, 2, 1).contiguous()  # bs, 28 * 28 768
-        IQ_feature = self.block(fusion_mal).permute(0, 2, 1).contiguous()  # bs, 768, 28 * 28
-        IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size,
-                               h=self.input_size)  # bs, 768, 28, 28
-        img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1)  # bs, 1, 128
-        prompt_feature = prompt_feature.repeat(img_feature.shape[0], 1, 1)  # bs, n, 128
-        prompt_feature = self.prompt_fusion(prompt_feature)  # bs, n, 128
-        fusion = self.blocks(torch.cat((img_feature, prompt_feature), dim=1))  # bs, 2, 1
-        fusion = self.norm(fusion)
-        fusion = self.score_block(fusion)
-        # iq_res = torch.mean(fusion, dim=1).view(-1)
-        iq_res = fusion[:, 0].view(-1)
-        return iq_res
-    @torch.no_grad()
-    def check_prompt(self, data_type):
-        return data_type in self.prompt_feature
-    @torch.no_grad()
-    def forward_prompt(self, x, score, data_type):
-        if data_type in self.prompt_feature:
-            return
-        _x = self.vit(x)
-        x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
-        self.save_output.outputs.clear()
-        x = x.permute(0, 2, 1).contiguous()  # bs, 768 * 4, 28 * 28
-        x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size,
-                      h=self.input_size)  # bs, 4, 768, 28, 28
-        x = x.permute(1, 0, 2, 3, 4).contiguous()  # 4, bs, 768, 28, 28
-        # Different Opinion Features (DOF)
-        DOF = torch.tensor([]).cuda()
-        for index, _ in enumerate(self.MALs):
-            DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
-        DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # M, bs, 768, 28, 28
-        # Image Quality Score Regression
-        fusion_mal = self.fusion_mal(DOF).permute(0, 2, 1).contiguous()  # bs, 28 * 28 768
-        IQ_feature = self.block(fusion_mal).permute(0, 2, 1).contiguous()  # bs, 768, 28 * 28
-        IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size,
-                               h=self.input_size)  # bs, 768, 28, 28
-        img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1)  # bs, 1, 128
-        # 分数线性变换为128维
-        # score_feature = self.score_projection(score)  # bs, 128
-        score_feature = score.expand(-1, 256)
-        # img_feature 和 score_feature融合得到 funsion_feature
-        funsion_feature = self.i_p_fusion(torch.cat((img_feature, score_feature.unsqueeze(1)), dim=1))  # bs, 2, 128
-        funsion_feature = self.mlp(torch.mean(funsion_feature, dim=1)).unsqueeze(0)  # 1, n, 128
-        # print('Load Prompt For Testing.', funsion_feature.shape)
-        # self.prompt_feature = funsion_feature.clone()
-        self.prompt_feature[data_type] = funsion_feature.clone()
-    def forward(self, x, score):
-        _x = self.vit(x)
-        x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
-        self.save_output.outputs.clear()
-        x = x.permute(0, 2, 1).contiguous()  # bs, 768 * 4, 28 * 28
-        x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size,
-                      h=self.input_size)  # bs, 4, 768, 28, 28
-        x = x.permute(1, 0, 2, 3, 4).contiguous()  # 4, bs, 768, 28, 28
-        # Different Opinion Features (DOF)
-        DOF = torch.tensor([]).cuda()
-        for index, _ in enumerate(self.MALs):
-            DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
-        DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # M, bs, 768, 28, 28
-        # Image Quality Score Regression
-        fusion_mal = self.fusion_mal(DOF).permute(0, 2, 1).contiguous()  # bs, 28 * 28 768
-        IQ_feature = self.block(fusion_mal).permute(0, 2, 1).contiguous()  # bs, 768, 28 * 28
-        IQ_feature = rearrange(IQ_feature, 'c d (w h) -> c d w h', w=self.input_size,
-                               h=self.input_size)  # bs, 768, 28, 28
-        img_feature = self.cnn(IQ_feature).squeeze(-1).squeeze(-1).unsqueeze(1)  # bs, 1, 128
-        # 分数线性变换为128维
-        # score_feature = self.score_projection(score)  # bs, 128
-        score_feature = score.expand(-1, 256)  # bs, 128
-        # img_feature 和 score_feature融合得到 funsion_feature funsion_feature = self.i_p_fusion(torch.cat((
-        # img_feature.detach(), score_feature.unsqueeze(1).detach()), dim=1)) # bs, 2, 128
-        funsion_feature = self.i_p_fusion(torch.cat((img_feature, score_feature.unsqueeze(1)), dim=1))  # bs, 2, 128
-        funsion_feature = self.mlp(torch.mean(funsion_feature, dim=1))  # bs, 128
-        funsion_feature = self.expand(funsion_feature)  # bs, bs - 1, 128
-        funsion_feature = self.prompt_fusion(funsion_feature)  # bs, bs - 1, 128
-        fusion = self.blocks(torch.cat((img_feature, funsion_feature), dim=1))  # bs, 2, 1
-        fusion = self.norm(fusion)
-        fusion = self.score_block(fusion)
-        # iq_res = torch.mean(fusion, dim=1).view(-1)
-        iq_res = fusion[:, 0].view(-1)
-        # differ_fusion = self.diff_block(torch.cat((img_feature, funsion_feature), dim=1)) # bs, n, 1
-        # differ_iq_res = torch.mean(differ_fusion, dim=1).view(-1)
-        gt_res = score.view(-1)
-        # diff_gt_res = 1 - score.view(-1)
-        return (iq_res, 'differ_iq_res'), (gt_res, 'diff_gt_res')
-    def extract_feature(self, save_output, block_index=None):
-        if block_index is None:
-            block_index = [5, 11, 17, 23]
-        x1 = save_output.outputs[block_index[0]][:, 1:]
-        x2 = save_output.outputs[block_index[1]][:, 1:]
-        x3 = save_output.outputs[block_index[2]][:, 1:]
-        x4 = save_output.outputs[block_index[3]][:, 1:]
-        x = torch.cat((x1, x2, x3, x4), dim=2)
-        return x
-    def expand(self, A):
-        A_expanded = A.unsqueeze(0).expand(A.size(0), -1, -1)
-        B = None
-        for index, i in enumerate(A_expanded):
-            rmv = torch.cat((i[:index], i[index + 1:])).unsqueeze(0)
-            if B is None:
-                B = rmv
-            else:
-                B = torch.cat((B, rmv), dim=0)
-        return B
-if __name__ == '__main__':
-    in_feature = torch.zeros((11, 3, 384, 384)).cuda()
-    gt_feature = torch.tensor(
-        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], dtype=torch.float).cuda()
-    gt_feature = gt_feature.reshape(-1, 1)
-    model = MoNet().cuda()
-    (iq_res, _), (_, _) = model(in_feature, gt_feature)
-    print(iq_res.shape)
-    # print(gt_res.shape)

PromptIQA/run_promptIQA copy.py DELETED Viewed

@@ -1,109 +0,0 @@
-import os
-import random
-import torchvision
-import cv2
-import torch
-from models import monet as MoNet
-import numpy as np
-from utils.dataset.process import ToTensor, Normalize
-from utils.toolkit import *
-import warnings
-warnings.filterwarnings('ignore')
-import sys
-sys.path.append(os.path.dirname(__file__))
-class PromptIQA():
-    def __init__(self) -> None:
-        pass
-def load_image(img_path, size=224):
-    try:
-        d_img = cv2.imread(img_path, cv2.IMREAD_COLOR)
-        d_img = cv2.resize(d_img, (size, size), interpolation=cv2.INTER_CUBIC)
-        d_img = cv2.cvtColor(d_img, cv2.COLOR_BGR2RGB)
-        d_img = np.array(d_img).astype('float32') / 255
-        d_img = np.transpose(d_img, (2, 0, 1))
-    except:
-        print(img_path)
-    return d_img
-def load_model(pkl_path):
-    model = MoNet.MoNet()
-    dict_pkl = {}
-    # prompt_num = torch.load(pkl_path, map_location='cpu').get('prompt_num')
-    for key, value in torch.load(pkl_path, map_location='cpu')['state_dict'].items():
-        dict_pkl[key[7:]] = value
-    model.load_state_dict(dict_pkl)
-    print('Load Model From ', pkl_path)
-    return model
-def get_an_img_score(img_path, target):
-    transform=torchvision.transforms.Compose([Normalize(0.5, 0.5), ToTensor()])
-    values_to_insert = np.array([0.0, 1.0])
-    position_to_insert = 0
-    target = np.insert(target, position_to_insert, values_to_insert)
-    sample = load_image(img_path)
-    samples = {'img': sample, 'gt': target}
-    samples = transform(samples)
-    return samples
-import random
-if __name__ == '__main__':
-    pkl_path = "./checkpoints/best_model_five_22.pth.tar"
-    model = load_model(pkl_path).cuda()
-    model.eval()
-    img_path = '/mnt/storage/PromptIQA_Demo/CSIQ/dst_src'
-    img_tensor, gt_tensor = None, None
-    img_list = os.listdir(img_path)
-    random.shuffle(img_list)
-    for idx, img_name in enumerate(img_list):
-        if idx == 10:
-            break
-        img_name = os.path.join(img_path, img_name)
-        score = np.array(idx / 10)
-        samples = get_an_img_score(img_name, score)
-        if img_tensor is None:
-            img_tensor = samples['img'].unsqueeze(0)
-            gt_tensor = samples['gt'].type(torch.FloatTensor).unsqueeze(0)
-        else:
-            img_tensor = torch.cat((img_tensor, samples['img'].unsqueeze(0)), dim=0)
-            gt_tensor = torch.cat((gt_tensor, samples['gt'].type(torch.FloatTensor).unsqueeze(0)), dim=0)
-    print(img_tensor.shape)
-    print(gt_tensor.shape)
-    print(gt_tensor)
-    img = img_tensor.squeeze(0).cuda()
-    label = gt_tensor.squeeze(0).cuda()
-    reverse = False
-    if reverse == 2:
-        label = torch.rand_like(label[:, -1]).cuda()
-        print(label)
-    elif reverse == 3:
-        print('Total Random')
-        label = torch.rand_like(label[:, -1]).cuda()
-        img = torch.rand_like(img).cuda()
-    else:
-        label = label[:, -1].cuda() if not reverse else (1 - label[:, -1].cuda())
-    print('input label: ', label)
-    model.forward_prompt(img, label.reshape(-1, 1), 'livec')
-    img_name = '/mnt/storage/PromptIQA_Demo/CSIQ/src_imgs/1600.png'
-    score = np.array(random.random())
-    samples = get_an_img_score(img_name, score)
-    img = samples['img'].unsqueeze(0).cuda()
-    print(img.shape)
-    pred = model.inference(img, 'livec')
-    print(pred)

PromptIQA/run_promptIQA.py CHANGED Viewed

@@ -3,7 +3,7 @@ import random
 import torchvision
 import cv2
 import torch
-from PromptIQA.models import monet as MoNet
 import numpy as np
 from PromptIQA.utils.dataset.process import ToTensor, Normalize
 from PromptIQA.utils.toolkit import *
@@ -14,7 +14,7 @@ import sys
 sys.path.append(os.path.dirname(__file__))
 def load_model(pkl_path):
-    model = MoNet.MoNet()
     dict_pkl = {}
     for key, value in torch.load(pkl_path, map_location='cpu')['state_dict'].items():
         dict_pkl[key[7:]] = value

 import torchvision
 import cv2
 import torch
+from PromptIQA.models import promptiqa
 import numpy as np
 from PromptIQA.utils.dataset.process import ToTensor, Normalize
 from PromptIQA.utils.toolkit import *
 sys.path.append(os.path.dirname(__file__))
 def load_model(pkl_path):
+    model = promptiqa.PromptIQA()
     dict_pkl = {}
     for key, value in torch.load(pkl_path, map_location='cpu')['state_dict'].items():
         dict_pkl[key[7:]] = value

PromptIQA/t.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- a = "(1+1)(22)"
2	- print(eval(a))

PromptIQA/test.py DELETED Viewed

@@ -1,429 +0,0 @@
-import sys
-from utils import log_writer
-import argparse
-import builtins
-import os
-import random
-import shutil
-import time
-import torch
-import torch.distributed as dist
-import torch.multiprocessing as mp
-import torch.nn as nn
-import torch.nn.parallel
-import torch.optim
-import torch.utils.data
-import torch.utils.data.distributed
-# from models import monet as MoNet
-from torch.utils.data import ConcatDataset
-from utils.dataset import data_loader
-from utils.toolkit import *
-loger_path = None
-def init(config):
-    global loger_path
-    if config.dist_url == "env://" and config.world_size == -1:
-        config.world_size = int(os.environ["WORLD_SIZE"])
-    config.distributed = config.world_size > 1 or config.multiprocessing_distributed
-    print("config.distributed", config.distributed)
-    loger_path = os.path.join(config.save_path, "inference_log")
-    if not os.path.isdir(loger_path):
-        os.makedirs(loger_path)
-    print("----------------------------------")
-    print(
-        "Begin Time: ", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
-    )
-    printArgs(config, loger_path)
-    # os.environ["CUDA_VISIBLE_DEVICES"] = '2,3,4,5,6,7'
-    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
-    # os.environ["CUDA_VISIBLE_DEVICES"] = '0,1,2,3,4,5'
-    # os.environ["CUDA_VISIBLE_DEVICES"] = '6,7'
-    # os.environ["CUDA_VISIBLE_DEVICES"] = '6'
-    # setup_seed(config.seed)
-def main(config):
-    init(config)
-    ngpus_per_node = torch.cuda.device_count()
-    if config.multiprocessing_distributed:
-        config.world_size = ngpus_per_node * config.world_size
-        print(config.world_size, ngpus_per_node, ngpus_per_node)
-        mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, config))
-    else:
-        # Simply call main_worker function
-        main_worker(config.gpu, ngpus_per_node, config)
-    print("End Time: ", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())))
-@torch.no_grad()
-def gather_together(data):  # 封装成一个函数，，用于收集各个gpu上的data数据，并返回一个list
-    dist.barrier()
-    world_size = dist.get_world_size()
-    gather_data = [None for _ in range(world_size)]
-    dist.all_gather_object(gather_data, data)
-    return gather_data
-import importlib.util
-def main_worker(gpu, ngpus_per_node, args):
-    models_path = os.path.join(args.save_path, "training_files", 'models', 'monet.py')
-    spec = importlib.util.spec_from_file_location("monet_module", models_path)
-    monet_module = importlib.util.module_from_spec(spec)
-    spec.loader.exec_module(monet_module)
-    MoNet = monet_module
-    loger_path = os.path.join(args.save_path, "inference_log")
-    if gpu == 0:
-        sys.stdout = log_writer.Logger(os.path.join(loger_path, f"inference_log_{args.prompt_type}_{args.reverse}.log"))
-    args.gpu = gpu
-    # suppress printing if not master
-    if args.multiprocessing_distributed and args.gpu != 0:
-        def print_pass(*args):
-            pass
-        builtins.print = print_pass
-    if args.gpu is not None:
-        print("Use GPU: {} for testing".format(args.gpu))
-    if args.distributed:
-        if args.dist_url == "env://" and args.rank == -1:
-            args.rank = int(os.environ["RANK"])
-        if args.multiprocessing_distributed:
-            args.rank = args.rank * ngpus_per_node + gpu
-        dist.init_process_group(
-            backend=args.dist_backend,
-            init_method=args.dist_url,
-            world_size=args.world_size,
-            rank=args.rank,
-        )
-    # create model
-    model = MoNet.MoNet()
-    dict_pkl = {}
-    prompt_num = torch.load(args.pkl_path, map_location='cpu').get('prompt_num')
-    for key, value in torch.load(args.pkl_path, map_location='cpu')['state_dict'].items():
-        dict_pkl[key[7:]] = value
-    model.load_state_dict(dict_pkl)
-    print('Load Model From ', args.pkl_path)
-    if args.distributed:
-        if args.gpu is not None:
-            torch.cuda.set_device(args.gpu)
-            model.cuda(args.gpu)
-            args.batch_size = int(args.batch_size / ngpus_per_node)
-            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
-            model = torch.nn.parallel.DistributedDataParallel(
-                model, device_ids=[args.gpu]
-            )
-            print("Model Distribute.")
-        else:
-            model.cuda()
-            model = torch.nn.parallel.DistributedDataParallel(model)
-    if prompt_num is None:
-        prompt_num = args.batch_size - 1
-    prompt_num = 10
-    print('prompt_num', prompt_num)
-    test_prompt_list, test_data_list = {}, []
-    # fix_prompt = None
-    for dataset in args.dataset:
-        print('---Load ', dataset)
-        path, train_index, test_index = get_data(dataset=dataset, split_seed=args.seed)
-        # if dataset == 'spaq' and False:
-        if dataset == 'spaq':
-            for column in range(2, 8):
-                print('sapq column train', column)
-                test_dataset = data_loader.Data_Loader(args.batch_size, dataset, path, test_index, istrain=False, column=column)
-                test_data_list.append(test_dataset.get_samples())
-                train_dataset = data_loader.Data_Loader(args.batch_size, dataset, path, train_index, istrain=False, column=column)
-                test_prompt_list[dataset+f'_{column}'] = train_dataset.get_prompt(prompt_num, args.prompt_type)
-        else:
-            test_dataset = data_loader.Data_Loader(args.batch_size, dataset, path, test_index, istrain=False, types=args.types)
-            test_data_list.append(test_dataset.get_samples())
-            train_dataset = data_loader.Data_Loader(args.batch_size, dataset, path, train_index, istrain=False, types=args.types)
-            test_prompt_list[dataset] = train_dataset.get_prompt(prompt_num, args.prompt_type)
-    print('args.prompt_type', args.prompt_type)
-    combined_test_samples = ConcatDataset(test_data_list)
-    print("test_dataset", len(combined_test_samples))
-    test_sampler = torch.utils.data.distributed.DistributedSampler(combined_test_samples)
-    test_loader = torch.utils.data.DataLoader(
-        combined_test_samples,
-        batch_size=1,
-        shuffle=(test_sampler is None),
-        num_workers=args.workers,
-        sampler=test_sampler,
-        drop_last=False,
-        pin_memory=True,
-    )
-    if args.distributed:
-        test_sampler.set_epoch(0)
-    for idxsa in range(1):
-        test_srocc, test_plcc, pred_scores, gt_scores, path = test(
-            test_loader, model, test_prompt_list, reverse=args.reverse
-        )
-        print('gt_scores', len(pred_scores), len(gt_scores))
-        print('Summary---')
-        gt_scores = gather_together(gt_scores)  # 进行汇总，得到一个list
-        pred_scores = gather_together(pred_scores)  # 进行汇总，得到一个list
-        gt_score_dict, pred_score_dict = {}, {}
-        for sublist in gt_scores:
-            for k, v in sublist.items():
-                if k not in gt_score_dict:
-                    gt_score_dict[k] = v
-                else:
-                    gt_score_dict[k] = gt_score_dict[k] + v
-        for sublist in pred_scores:
-            for k, v in sublist.items():
-                if k not in pred_score_dict:
-                    pred_score_dict[k] = v
-                else:
-                    pred_score_dict[k] = pred_score_dict[k] + v
-        gt_score_dict = dict(sorted(gt_score_dict.items()))
-        test_srocc, test_plcc = 0, 0
-        for k, v in gt_score_dict.items():
-            test_srocc_, test_plcc_ = cal_srocc_plcc(gt_score_dict[k], pred_score_dict[k])
-            print('\t{} Test SROCC: {}, PLCC: {}'.format(k, round(test_srocc_, 4), round(test_plcc_, 4)))
-            # print('Pred: ', pred_score_dict[k][:10])
-            # print('GT: ', gt_score_dict[k][:10])
-            # print('-----')
-            with open('{}_{}.csv'.format(idxsa, k), 'w') as f:
-                for i, j in zip(gt_score_dict[k], pred_score_dict[k]):
-                    f.write('{},{}\n'.format(i, j))
-            test_srocc += test_srocc_
-            test_plcc += test_plcc_
-def test(test_loader, MoNet, promt_data_loader, reverse=False):
-    """Training"""
-    pred_scores = {}
-    gt_scores = {}
-    path = []
-    batch_time = AverageMeter("Time", ":6.3f")
-    srocc = AverageMeter("SROCC", ":6.2f")
-    plcc = AverageMeter("PLCC", ":6.2f")
-    progress = ProgressMeter(
-        len(test_loader),
-        [batch_time, srocc, plcc],
-        prefix="Testing ",
-    )
-    print('reverse ----', reverse)
-    MoNet.train(False)
-    with torch.no_grad():
-        for index, (img_or, label_or, paths, dataset_type) in enumerate(test_loader):
-            # print(dataset_type)
-            t = time.time()
-            dataset_type = dataset_type[0]
-            has_prompt = False
-            if hasattr(MoNet.module, 'check_prompt'):
-                has_prompt =  MoNet.module.check_prompt(dataset_type)
-            if not has_prompt:
-                print('Load Prompt For ', dataset_type)
-                prompt_dataset = promt_data_loader[dataset_type]
-                for img, label in prompt_dataset:
-                    img = img.squeeze(0).cuda()
-                    label = label.squeeze(0).cuda()
-                    if reverse == 2:
-                        # label = torch.tensor([random.random() for i in range(len(label[:, -1]))]).cuda()
-                        #
-                        label = torch.rand_like(label[:, -1]).cuda()
-                        print(label)
-                    elif reverse == 3:
-                        print('Total Random')
-                        label = torch.rand_like(label[:, -1]).cuda()
-                        img = torch.rand_like(img).cuda()
-                    else:
-                        label = label[:, -1].cuda() if not reverse else (1 - label[:, -1].cuda())
-                    MoNet.module.forward_prompt(img, label.reshape(-1, 1), dataset_type)
-            img = img_or.squeeze(0).cuda()
-            label = label_or.squeeze(0).cuda()[:, 2]
-            # print(img.shape)
-            pred = MoNet.module.inference(img, dataset_type)
-            if dataset_type not in pred_scores:
-                pred_scores[dataset_type] = []
-            if dataset_type not in gt_scores:
-                gt_scores[dataset_type] = []
-            pred_scores[dataset_type] = pred_scores[dataset_type] + pred.cpu().tolist()
-            gt_scores[dataset_type] = gt_scores[dataset_type] + label.cpu().tolist()
-            path = path + list(paths)
-            batch_time.update(time.time() - t)
-            if index % 100 == 0:
-                for k, v in pred_scores.items():
-                    test_srocc, test_plcc = cal_srocc_plcc(pred_scores[k], gt_scores[k])
-                    # print('\t{}, SROCC: {}, PLCC: {}'.format(k, round(test_srocc, 4), round(test_plcc, 4)))
-                srocc.update(test_srocc)
-                plcc.update(test_plcc)
-                progress.display(index)
-    MoNet.module.clear()
-    # MoNet.train(True)
-    return 'test_srocc', 'test_plcc', pred_scores, gt_scores, path
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--seed",
-        dest="seed",
-        type=int,
-        default=570908,
-        help="Random seeds for result reproduction.",
-    )
-    parser.add_argument(
-        "--mal_num",
-        dest="mal_num",
-        type=int,
-        default=2,
-        help="The number of the MAL modules.",
-    )
-    # data related
-    parser.add_argument(
-        "--dataset",
-        dest="dataset",
-        nargs='+', default=None,
-        help="Support datasets: livec|koniq10k|bid|spaq",
-    )
-    # training related
-    parser.add_argument(
-        "--queue_ratio",
-        dest="queue_ratio",
-        type=float,
-        default=0.6,
-        help="Ratio of queue length used in GC loss to training set length.",
-    )
-    parser.add_argument(
-        "--loss",
-        dest="loss",
-        type=str,
-        default="MSE",
-        help="Loss function to use. Support losses: GC|MAE|MSE.",
-    )
-    parser.add_argument(
-        "--lr", dest="lr", type=float, default=1e-5, help="Learning rate"
-    )
-    parser.add_argument(
-        "--weight_decay",
-        dest="weight_decay",
-        type=float,
-        default=1e-5,
-        help="Weight decay",
-    )
-    parser.add_argument(
-        "--batch_size", dest="batch_size", type=int, default=11, help="Batch size"
-    )
-    parser.add_argument(
-        "--epochs", dest="epochs", type=int, default=50, help="Epochs for training"
-    )
-    parser.add_argument(
-        "--T_max",
-        dest="T_max",
-        type=int,
-        default=50,
-        help="Hyper-parameter for CosineAnnealingLR",
-    )
-    parser.add_argument(
-        "--eta_min",
-        dest="eta_min",
-        type=int,
-        default=0,
-        help="Hyper-parameter for CosineAnnealingLR",
-    )
-    parser.add_argument(
-        "-j",
-        "--workers",
-        default=32,
-        type=int,
-        metavar="N",
-        help="number of data loading workers (default: 32)",
-    )
-    # result related
-    parser.add_argument(
-        "--save_path",
-        dest="save_path",
-        type=str,
-        default="./save_logs/Matrix_Comparation_Koniq_bs_25",
-        help="The path where the model and logs will be saved.",
-    )
-    parser.add_argument(
-        "--world-size",
-        default=-1,
-        type=int,
-        help="number of nodes for distributed training",
-    )
-    parser.add_argument(
-        "--rank", default=-1, type=int, help="node rank for distributed training"
-    )
-    parser.add_argument(
-        "--dist-url",
-        default="tcp://224.66.41.62:23456",
-        type=str,
-        help="url used to set up distributed training",
-    )
-    parser.add_argument(
-        "--dist-backend", default="nccl", type=str, help="distributed backend"
-    )
-    parser.add_argument(
-        "--multiprocessing-distributed",
-        action="store_true",
-        help="Use multi-processing distributed training to launch "
-             "N processes per node, which has N GPUs. This is the "
-             "fastest way to use PyTorch for either single node or "
-             "multi node data parallel training",
-    )
-    parser.add_argument("--gpu", default=None, type=int, help="GPU id to use.")
-    parser.add_argument("--pkl_path", required=True, type=str)
-    parser.add_argument("--prompt_type", required=True, type=str)
-    parser.add_argument("--reverse", required=True, type=int)
-    parser.add_argument("--types", default='SSIM', type=str)
-    config = parser.parse_args()
-    config.save_path = os.path.dirname(config.pkl_path)
-    main(config)

PromptIQA/test.sh DELETED Viewed

@@ -1,9 +0,0 @@
-# python test.py --dist-url 'tcp://localhost:10055' --dataset spaq tid2013 livec bid spaq flive --batch_size 50 --prompt_type fix --multiprocessing-distributed --world-size 1 --rank 0 --reverse 0 --pkl_path /disk1/chenzewen/OurIdeas/GIQA/GIQA_2024/FourTask/N_F_A_U_RandomScale_MAE_loaderDebug_Rate95/best_model_five_52.pth.tar
-# python test.py --dist-url 'tcp://localhost:12755' --dataset csiq --batch_size 50 --prompt_type fix --multiprocessing-distributed --world-size 1 --rank 0 --reverse 3 --seed 2024 --pkl_path /disk1/chenzewen/OurIdeas/GIQA/GIQA_2024/Training_log/FourTask/N_F_A_U_RandomScale_MAE_loaderDebug_Rate95/best_model_five_52.pth.tar
-python test.py --dist-url 'tcp://localhost:12755' --dataset livec bid csiq --batch_size 50 --prompt_type random --multiprocessing-distributed --world-size 1 --rank 0 --reverse 2 --seed 2026  --pkl_path /disk1/chenzewen/OurIdeas/GIQA/GIQA_2024/Formal/PromptIQA_2026/best_model_five_92.pth.tar
-# reverse 0 no, 1 yes, 2 random
-python test.py --dist-url 'tcp://localhost:12755' --dataset tid2013_other --batch_size 50 --prompt_type random --multiprocessing-distributed --world-size 1 --rank 0 --reverse 2 --seed 2026  --pkl_path /disk1/chenzewen/OurIdeas/GIQA/GIQA_2024/Formal/PromptIQA_2026/best_model_five_92.pth.tar --types 'SSIM'
-CUDA_VISIBLE_DEVICES="0" python test.py --dist-url 'tcp://localhost:12755' --dataset tid2013_other --batch_size 50 --prompt_type random --multiprocessing-distributed --world-size 1 --rank 0 --reverse 2 --seed 2024  --pkl_path /disk1/chenzewen/OurIdeas/GIQA/GIQA_2024/Publication/PromptIQA_2024_WO_Norm_Score/best_model_five_22.pth.tar --types 'SSIM'

best_model.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:993555b9efaeae660d2dd6f4056f13c6957628ca592a2ce74ff2e8eb5a4a2280
+size 1272842308

get_examplt.py DELETED Viewed

@@ -1,27 +0,0 @@
-import os
-from copy import deepcopy
-isp_json = []
-path = './Examples'
-for img_dir in sorted(os.listdir(path)):
-    if os.path.isdir(os.path.join(path, img_dir)):
-        ISPP = os.path.join(path, img_dir, 'ISPP')
-        ispp = {}
-        ispp['Example_id'] = img_dir
-        ispp['ISPP'] = []
-        img_list = []
-        for idx, img in enumerate(sorted(os.listdir(ISPP))):
-            ispp['ISPP'].append([os.path.join(ISPP, img), idx / 10 if '1' in img_dir else 1 - idx / 10])
-        for file in os.listdir(os.path.join(path, img_dir)):
-            if os.path.isfile(os.path.join(path, img_dir, file)):
-                img_list.append(file)
-                ispp['Image'] = [os.path.join(path, img_dir, file), 7]
-        ispp['Remark'] = []
-        isp_json.append(deepcopy(ispp))
-with open('example2.json', 'w') as f:
-    import json
-    json.dump(isp_json, f, indent=4)