|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +""" |
| 3 | +============================================ |
| 4 | +OTDA unsupervised vs semi-supervised setting |
| 5 | +============================================ |
| 6 | +
|
| 7 | +This example introduces a semi supervised domain adaptation in a 2D setting. |
| 8 | +It explicits the problem of semi supervised domain adaptation and introduces |
| 9 | +some optimal transport approaches to solve it. |
| 10 | +
|
| 11 | +Quantities such as optimal couplings, greater coupling coefficients and |
| 12 | +transported samples are represented in order to give a visual understanding |
| 13 | +of what the transport methods are doing. |
| 14 | +""" |
| 15 | + |
| 16 | +# Authors: Remi Flamary <[email protected]> |
| 17 | +# Stanislas Chambon <[email protected]> |
| 18 | +# |
| 19 | +# License: MIT License |
| 20 | + |
| 21 | +import matplotlib.pylab as pl |
| 22 | +import ot |
| 23 | + |
| 24 | + |
| 25 | +############################################################################## |
| 26 | +# generate data |
| 27 | +############################################################################## |
| 28 | + |
| 29 | +n_samples_source = 150 |
| 30 | +n_samples_target = 150 |
| 31 | + |
| 32 | +Xs, ys = ot.datasets.get_data_classif('3gauss', n_samples_source) |
| 33 | +Xt, yt = ot.datasets.get_data_classif('3gauss2', n_samples_target) |
| 34 | + |
| 35 | + |
| 36 | +############################################################################## |
| 37 | +# Transport source samples onto target samples |
| 38 | +############################################################################## |
| 39 | + |
| 40 | +# unsupervised domain adaptation |
| 41 | +ot_sinkhorn_un = ot.da.SinkhornTransport(reg_e=1e-1) |
| 42 | +ot_sinkhorn_un.fit(Xs=Xs, Xt=Xt) |
| 43 | +transp_Xs_sinkhorn_un = ot_sinkhorn_un.transform(Xs=Xs) |
| 44 | + |
| 45 | +# semi-supervised domain adaptation |
| 46 | +ot_sinkhorn_semi = ot.da.SinkhornTransport(reg_e=1e-1) |
| 47 | +ot_sinkhorn_semi.fit(Xs=Xs, Xt=Xt, ys=ys, yt=yt) |
| 48 | +transp_Xs_sinkhorn_semi = ot_sinkhorn_semi.transform(Xs=Xs) |
| 49 | + |
| 50 | +# semi supervised DA uses available labaled target samples to modify the cost |
| 51 | +# matrix involved in the OT problem. The cost of transporting a source sample |
| 52 | +# of class A onto a target sample of class B != A is set to infinite, or a |
| 53 | +# very large value |
| 54 | + |
| 55 | +# note that in the present case we consider that all the target samples are |
| 56 | +# labeled. For daily applications, some target sample might not have labels, |
| 57 | +# in this case the element of yt corresponding to these samples should be |
| 58 | +# filled with -1. |
| 59 | + |
| 60 | +# Warning: we recall that -1 cannot be used as a class label |
| 61 | + |
| 62 | + |
| 63 | +############################################################################## |
| 64 | +# Fig 1 : plots source and target samples + matrix of pairwise distance |
| 65 | +############################################################################## |
| 66 | + |
| 67 | +pl.figure(1, figsize=(10, 10)) |
| 68 | +pl.subplot(2, 2, 1) |
| 69 | +pl.scatter(Xs[:, 0], Xs[:, 1], c=ys, marker='+', label='Source samples') |
| 70 | +pl.xticks([]) |
| 71 | +pl.yticks([]) |
| 72 | +pl.legend(loc=0) |
| 73 | +pl.title('Source samples') |
| 74 | + |
| 75 | +pl.subplot(2, 2, 2) |
| 76 | +pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', label='Target samples') |
| 77 | +pl.xticks([]) |
| 78 | +pl.yticks([]) |
| 79 | +pl.legend(loc=0) |
| 80 | +pl.title('Target samples') |
| 81 | + |
| 82 | +pl.subplot(2, 2, 3) |
| 83 | +pl.imshow(ot_sinkhorn_un.cost_, interpolation='nearest') |
| 84 | +pl.xticks([]) |
| 85 | +pl.yticks([]) |
| 86 | +pl.title('Cost matrix - unsupervised DA') |
| 87 | + |
| 88 | +pl.subplot(2, 2, 4) |
| 89 | +pl.imshow(ot_sinkhorn_semi.cost_, interpolation='nearest') |
| 90 | +pl.xticks([]) |
| 91 | +pl.yticks([]) |
| 92 | +pl.title('Cost matrix - semisupervised DA') |
| 93 | + |
| 94 | +pl.tight_layout() |
| 95 | + |
| 96 | +# the optimal coupling in the semi-supervised DA case will exhibit " shape |
| 97 | +# similar" to the cost matrix, (block diagonal matrix) |
| 98 | + |
| 99 | + |
| 100 | +############################################################################## |
| 101 | +# Fig 2 : plots optimal couplings for the different methods |
| 102 | +############################################################################## |
| 103 | + |
| 104 | +pl.figure(2, figsize=(8, 4)) |
| 105 | + |
| 106 | +pl.subplot(1, 2, 1) |
| 107 | +pl.imshow(ot_sinkhorn_un.coupling_, interpolation='nearest') |
| 108 | +pl.xticks([]) |
| 109 | +pl.yticks([]) |
| 110 | +pl.title('Optimal coupling\nUnsupervised DA') |
| 111 | + |
| 112 | +pl.subplot(1, 2, 2) |
| 113 | +pl.imshow(ot_sinkhorn_semi.coupling_, interpolation='nearest') |
| 114 | +pl.xticks([]) |
| 115 | +pl.yticks([]) |
| 116 | +pl.title('Optimal coupling\nSemi-supervised DA') |
| 117 | + |
| 118 | +pl.tight_layout() |
| 119 | + |
| 120 | + |
| 121 | +############################################################################## |
| 122 | +# Fig 3 : plot transported samples |
| 123 | +############################################################################## |
| 124 | + |
| 125 | +# display transported samples |
| 126 | +pl.figure(4, figsize=(8, 4)) |
| 127 | +pl.subplot(1, 2, 1) |
| 128 | +pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', |
| 129 | + label='Target samples', alpha=0.5) |
| 130 | +pl.scatter(transp_Xs_sinkhorn_un[:, 0], transp_Xs_sinkhorn_un[:, 1], c=ys, |
| 131 | + marker='+', label='Transp samples', s=30) |
| 132 | +pl.title('Transported samples\nEmdTransport') |
| 133 | +pl.legend(loc=0) |
| 134 | +pl.xticks([]) |
| 135 | +pl.yticks([]) |
| 136 | + |
| 137 | +pl.subplot(1, 2, 2) |
| 138 | +pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', |
| 139 | + label='Target samples', alpha=0.5) |
| 140 | +pl.scatter(transp_Xs_sinkhorn_semi[:, 0], transp_Xs_sinkhorn_semi[:, 1], c=ys, |
| 141 | + marker='+', label='Transp samples', s=30) |
| 142 | +pl.title('Transported samples\nSinkhornTransport') |
| 143 | +pl.xticks([]) |
| 144 | +pl.yticks([]) |
| 145 | + |
| 146 | +pl.tight_layout() |
| 147 | +pl.show() |
0 commit comments