B-Xi
diff --git a/Diff for: ‎CTF-SSCL-UP-knn.py
+800 b/Diff for: ‎CTF-SSCL-UP-knn.py
+800
diff --git a/Diff for: ‎Overall.png
295 KB b/Diff for: ‎Overall.png
295 KB
diff --git a/Diff for: ‎README.md
+13 b/Diff for: ‎README.md
+13
diff --git a/Diff for: ‎gscvit.py
+358 b/Diff for: ‎gscvit.py
+358
@@ -0,0 +1,13 @@
+# CTF-SSCL
+This repo is the implementation of the following paper:
+
+**CTF-SSCL: CNN-Transformer for Few-shot Hyperspectral Image Classification Assisted by Semisupervised Contrastive Learning** (TGRS 2024), [[paper]](DOI:10.1109/TGRS.2024.3465225)
+
+## Abstract
+Few-shot learning (FSL) has rapidly advanced in the hyperspectral image (HSI) classification, potentially reducing the need for laborious and expensive labeled data collection. Due to the limited receptive field, the convolutional neural network (CNN) struggles to capture long-range dependencies for extracting global features. Additionally, the transformer focuses on global correlation while overlooking the effective representation of local spatial and spectral features. Moreover, contrastive learning has emerged as a powerful technique for improving consistency across different augmented views of samples of the same category.
+To this end, we devise a novel CNN-Transformer network for few-shot HSI classification with semisupervised contrastive learning (CTF-SSCL) to boost the classification performance. Specifically, the cascaded CNN-Transformer incorporates a lightweight spatial-spectral interactive convolution module (LSSICM) and a multi-scale transformer (MSFormer) to exploit local features from submaps and global information from the entire patch. Subsequently, the semisupervised contrastive loss, comprising unsupervised and supervised components, serves as an auxiliary to optimize the model with the classification loss. Wherein, recognizing the unified spectral-spatial information in HSI, we propose a spectral feature shift strategy (SFSS) to create sample pairs for the unsupervised contrastive learning, utilizing unsupervised contrastive loss among groups of samples with identical labels. Extensive experiments on four standard benchmarks demonstrate the effectiveness of the proposed CTF-SSCL with varying amounts of labeled samples. The code will be available online at https://github.com/B-Xi/CTF-SSCL.
+
+## Training and Test Process
+1. Prepare the training and test data as operated in the paper.
+2. Run the 'CTF-SSCL-UP-knn.py' to reproduce the CTF-SSCL results on Pavia University data set.
+
@@ -0,0 +1,358 @@
+import random
+from functools import partial
+import torch
+from torch import nn, einsum
+from einops import rearrange, repeat
+from einops.layers.torch import Rearrange, Reduce
+
+from timm.models.vision_transformer import _cfg
+
+
+def cast_tuple(val, length=1):
+    return val if isinstance(val, tuple) else ((val,) * length)
+
+
+class ChannelAdjustmentLayer1(nn.Module):
+    def __init__(self, target_channels=256):
+        super(ChannelAdjustmentLayer1, self).__init__()
+        self.target_channels = target_channels
+
+    def forward(self, x):
+        B, C, H, W = x.size()
+
+        if C == self.target_channels:
+            return x
+
+        if C < self.target_channels:
+            # 逐个通道复制，放到被复制通道的后面
+            num_channels_to_copy = self.target_channels - C
+            for i in range(num_channels_to_copy):
+                channel_to_copy = torch.randint(0, C, (1,))
+                x = torch.cat([x, x[:, channel_to_copy, :, :]], dim=1)
+
+        else:
+            # 逐个通道删除
+            num_channels_to_remove = C - self.target_channels
+            for i in range(num_channels_to_remove):
+                rand=torch.randint(0,x.shape[1],(1,))
+                x = torch.cat([x[:, :rand, :, :], x[:, rand+1:, :, :]], dim=1)
+        return x
+
+
+# 通道校准策略2
+class ChannelAdjustmentLayer2(nn.Module):
+    def __init__(self, target_channels=256):
+        super(ChannelAdjustmentLayer2, self).__init__()
+        self.target_channels = target_channels
+
+    def forward(self, x):
+        B, C, H, W = x.size()
+
+        if C == self.target_channels:
+            return x
+
+        if C < self.target_channels:
+            # 计算需要扩展的通道数
+            num_channels_to_expand = self.target_channels - C
+            # 计算每一端需要扩展的通道数
+            channels_to_expand_per_side = num_channels_to_expand // 2
+
+            # 两端均匀镜像扩展
+            x = torch.cat([x[:, :channels_to_expand_per_side+1, :, :].flip(dims=(1,)),
+                           x,
+                           x[:, -channels_to_expand_per_side:, :, :].flip(dims=(1,))], dim=1)
+
+        else:
+            # 计算需要删除的通道数
+            num_channels_to_remove = C - self.target_channels
+            # 计算每一端需要删除的通道数
+            channels_to_remove_per_side = num_channels_to_remove // 2
+
+            # 两端均匀镜像删除
+            x = x[:, channels_to_remove_per_side:-channels_to_remove_per_side, :, :]
+
+        return x[:, :self.target_channels, :, :]
+
+
+# 通道正则化
+class ChanLayerNorm(nn.Module):
+    def __init__(self, dim, eps=1e-5):
+        super().__init__()
+        self.eps = eps
+        self.g = nn.Parameter(torch.ones(1, dim, 1, 1))
+        self.b = nn.Parameter(torch.zeros(1, dim, 1, 1))
+
+    def forward(self, x):
+        var = torch.var(x, dim=1, unbiased=False, keepdim=True)
+        mean = torch.mean(x, dim=1, keepdim=True)
+        return (x - mean) / (var + self.eps).sqrt() * self.g + self.b
+
+
+class PreNorm(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.norm = ChanLayerNorm(dim)
+        self.fn = fn
+
+    def forward(self, x):
+        return self.fn(self.norm(x))
+
+
+# 通道校准模块
+class SpectralCalibration(nn.Module):
+    def __init__(self, dim_in, dim_out):
+        super().__init__()
+        self.conv = nn.Conv2d(dim_in, dim_out, 1)
+        self.bn = nn.BatchNorm2d(dim_out)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+class GSC(nn.Module):
+    def __init__(self, dim_in, dim_out, padding=1, num_groups=8):
+        super().__init__()
+        self.dim_out=dim_out
+        self.gpwc = nn.Conv2d(dim_in, dim_out, groups=num_groups, kernel_size=1)
+        self.dwc1 = nn.Conv2d(dim_out//2, dim_out//2, groups=dim_out//2,kernel_size=1)
+        self.dwc2 = nn.Conv2d(dim_out//2, dim_out//2, groups=dim_out//2,padding=1, kernel_size=3)
+        self.dwc3 = nn.Conv2d(dim_out//4, dim_out//4, groups=dim_out//4,padding=2,kernel_size=5)
+        self.dwc4 = nn.Conv2d(dim_out//4, dim_out//4, groups=dim_out//4,padding=3, kernel_size=7)
+        self.bn = nn.BatchNorm2d(dim_out)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x=self.gpwc(x)
+        x1=x[:,:self.dim_out//2,:,:]
+        x2=x[:,self.dim_out//2:,:,:]
+        x3=self.dwc2(x1)
+        x4=self.dwc2(x2)
+        x5=x3+x4/10
+        x6=x4+x3/10
+        x=torch.cat((x5,x6),dim=1)+x
+        return self.relu(self.bn(x))
+
+
+class GSSA(nn.Module):
+    def __init__(
+            self,
+            dim,
+            heads=8,
+            dim_head=16,
+            dropout=0.,
+            group_spatial_size=3
+    ):
+        super().__init__()
+        self.heads = heads
+        self.scale = dim_head ** -0.5
+        self.group_spatial_size = group_spatial_size
+        inner_dim = dim_head * heads
+
+        self.attend = nn.Sequential(
+            nn.Softmax(dim=-1),
+            nn.Dropout(dropout)
+        )
+
+        self.to_qkv = nn.Conv1d(dim, inner_dim * 3, 1, bias=False)
+        self.to_qkv1 = nn.Conv1d(128, 16, 1, bias=False)
+        self.group_tokens = nn.Parameter(torch.randn(dim))
+        dim_out=128
+        self.gc = nn.Conv2d(dim_out, dim_out, kernel_size=7, groups=dim_out, stride=1)
+        
+        self.group_tokens_to_qk = nn.Sequential(
+            nn.LayerNorm(dim_head),
+            nn.GELU(),
+            Rearrange('b h n c -> b (h c) n'),
+            nn.Conv1d(inner_dim, inner_dim * 2, 1),
+            Rearrange('b (h c) n -> b h n c', h=heads),
+        )
+
+        self.group_attend = nn.Sequential(
+            nn.Softmax(dim=-1),
+            nn.Dropout(dropout)
+        )
+
+        self.to_out = nn.Sequential(
+            nn.Conv2d(inner_dim, dim, 1),
+            nn.Dropout(dropout)
+        )
+
+    def forward(self, x):
+
+        batch, height, width, heads, gss = x.shape[0], *x.shape[-2:], self.heads, self.group_spatial_size
+        assert (height % gss) == 0 and (
+                width % gss) == 0, f'height {height} and width {width} must be divisible by group spatial size {gss}'
+        num_groups = (height // gss) * (width // gss)
+        w=self.gc(x)
+
+        w= rearrange(w, 'b c h w -> (b h w) c 1')
+        
+        x = rearrange(x, 'b c (h g1) (w g2) -> (b h w) c (g1 g2)', g1=gss, g2=gss)
+
+        #w = repeat(self.group_tokens, 'c -> b c 1', b=x.shape[0])
+
+        x = torch.cat((w, x), dim=-1)
+        q, k, v = self.to_qkv(x).chunk(3, dim=1)
+
+        q, k, v = map(lambda t: rearrange(t, 'b (h d) ... -> b h (...) d', h=heads), (q, k, v))
+
+        q = q * self.scale
+
+        dots = einsum('b h i d, b h j d -> b h i j', q, k)
+
+        attn = self.attend(dots)
+
+        out = torch.matmul(attn, v)
+        group_tokens, grouped_fmaps = out[:, :, 0], out[:, :, 1:]
+
+        if num_groups == 1:
+            fmap = rearrange(grouped_fmaps, '(b x y) h (g1 g2) d -> b (h d) (x g1) (y g2)', x=height // gss,
+                             y=width // gss, g=gss, g2=gss)
+            return self.to_out(fmap)
+        
+        #group_tokens=group_tokens+w1
+
+        group_tokens = rearrange(group_tokens, '(b x y) h d -> b h (x y) d', x=height // gss, y=width // gss)
+
+        grouped_fmaps = rearrange(grouped_fmaps, '(b x y) h n d -> b h (x y) n d', x=height // gss, y=width // gss)
+
+        w_q, w_k = self.group_tokens_to_qk(group_tokens).chunk(2, dim=-1)
+
+        w_q = w_q * self.scale
+
+        w_dots = einsum('b h i d, b h j d -> b h i j', w_q, w_k)
+
+        w_attn = self.group_attend(w_dots)
+
+        aggregated_grouped_fmap = einsum('b h i j, b h j w d -> b h i w d', w_attn, grouped_fmaps)
+
+        fmap = rearrange(aggregated_grouped_fmap, 'b h (x y) (g1 g2) d -> b (h d) (x g1) (y g2)', x=height // gss,
+                         y=width // gss, g1=gss, g2=gss)
+        return self.to_out(fmap)
+
+
+class Transformer(nn.Module):
+    def __init__(
+            self,
+            dim,
+            depth,
+            dim_head=16,
+            heads=8,
+            dropout=0.,
+            norm_output=True,
+            groupsize=4
+    ):
+        super().__init__()
+        self.layers = nn.ModuleList([])
+
+        for ind in range(depth):
+            self.layers.append(
+                PreNorm(dim, GSSA(dim, group_spatial_size=groupsize, heads=heads, dim_head=dim_head, dropout=dropout))
+            )
+
+        self.norm = ChanLayerNorm(dim) if norm_output else nn.Identity()
+
+    def forward(self, x):
+        for attn in self.layers:
+            x = attn(x)
+
+        return self.norm(x)
+
+
+class GSCViT(nn.Module):
+    def __init__(
+            self,
+            *,
+            num_classes,
+            depth,
+            heads,
+            group_spatial_size,
+            channels=200,
+            dropout=0.1,
+            padding,
+            dims=(256, 128, 64, 32),
+            num_groups=[16,16,16]
+    ):
+        super().__init__()
+        num_stages = 1#len(depth)
+
+        dim_pairs = tuple(zip(dims[:-1], dims[1:]))
+        hyperparams_per_stage = [heads]
+        hyperparams_per_stage = list(map(partial(cast_tuple, length=num_stages), hyperparams_per_stage))
+        assert all(tuple(map(lambda arr: len(arr) == num_stages, hyperparams_per_stage)))
+        self.sc = SpectralCalibration(channels, 256)
+        self.bn_1 = nn.BatchNorm2d(256)
+        self.relu_1 = nn.ReLU(inplace=True)
+        self.layers_trans = nn.ModuleList([])
+        is_last=1
+        layer_dim_in=256
+        layer_dim=128
+        p=1
+        num_group=16
+        layer_depth=1
+        ind=0
+        layer_heads=1
+        self.layers_trans.append(nn.ModuleList([
+                GSC(layer_dim_in, layer_dim, p, num_group),
+                Transformer(dim=int(layer_dim), depth=layer_depth, heads=layer_heads,
+                            groupsize=group_spatial_size[ind], dropout=dropout, norm_output=not is_last),
+                nn.BatchNorm2d(layer_dim),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(layer_dim,layer_dim,1),
+                
+            ]))
+
+        self.conv_last = nn.Conv2d(dims[-1], 2 * dims[-1], 3)
+
+        self.mlp_head = nn.Sequential(
+            Reduce('b d h w -> b d', 'mean'),
+            nn.LayerNorm(dims[-1]),
+            nn.Linear(dims[-1], num_classes)
+        )
+
+    def forward(self, x):
+        x = x.squeeze(dim=1)
+        x = self.sc(x)
+        x = self.bn_1(x)
+        x = self.relu_1(x)
+        
+        for peg, transformer, bn, relu, pw in self.layers_trans:
+            x = peg(x)
+            y = x
+            x = transformer(x)
+            x = pw(x) + y
+            x = bn(x)
+            x = relu(x)
+        return self.mlp_head(x)
+
+
+def gscvit(dataset):
+    model = None
+    if dataset == 'hy':
+        model = GSCViT(
+            num_classes=32,
+            channels=100,
+            heads=(1),#(1,1,1)
+            depth=(1),#(1,1,1)
+            group_spatial_size=[3, 3, 3],
+            dropout=0.1,
+            padding=[1, 1, 1],
+            dims = (256, 128),
+            num_groups=[16, 16, 16],
+        )
+    return model
+
+
+
+if __name__ == '__main__':
+    img = torch.randn(9, 100, 9, 9)
+    print("input shape:", img.shape)
+    net = gscvit(dataset='hy')
+    net.default_cfg = _cfg()
+    print("output shape:", net(img).shape)
+
+