zama-ai
diff --git a/‎tfhe/src/high_level_api/integers/signed/ops.rs
Lines changed: 112 additions & 2 deletions b/‎tfhe/src/high_level_api/integers/signed/ops.rs
Lines changed: 112 additions & 2 deletions
diff --git a/‎tfhe/src/high_level_api/integers/signed/scalar_ops.rs
Lines changed: 108 additions & 1 deletion b/‎tfhe/src/high_level_api/integers/signed/scalar_ops.rs
Lines changed: 108 additions & 1 deletion
diff --git a/‎tfhe/src/high_level_api/integers/signed/tests/gpu.rs
Lines changed: 77 additions & 3 deletions b/‎tfhe/src/high_level_api/integers/signed/tests/gpu.rs
Lines changed: 77 additions & 3 deletions
@@ -5,8 +5,8 @@ use crate::high_level_api::integers::{FheIntId, FheUintId};
 use crate::high_level_api::keys::InternalServerKey;
 #[cfg(feature = "gpu")]
 use crate::high_level_api::traits::{
-    AddSizeOnGpu, BitAndSizeOnGpu, BitNotSizeOnGpu, BitOrSizeOnGpu, BitXorSizeOnGpu, SizeOnGpu,
-    SubSizeOnGpu,
+    AddSizeOnGpu, BitAndSizeOnGpu, BitNotSizeOnGpu, BitOrSizeOnGpu, BitXorSizeOnGpu,
+    FheMaxSizeOnGpu, FheMinSizeOnGpu, FheOrdSizeOnGpu, SizeOnGpu, SubSizeOnGpu,
 };
 use crate::high_level_api::traits::{
     DivRem, FheEq, FheMax, FheMin, FheOrd, RotateLeft, RotateLeftAssign, RotateRight,
@@ -2253,3 +2253,113 @@ where
         })
     }
 }
+
+#[cfg(feature = "gpu")]
+impl<Id> FheOrdSizeOnGpu<&Self> for FheInt<Id>
+where
+    Id: FheIntId,
+{
+    fn get_gt_size_on_gpu(&self, rhs: &Self) -> u64 {
+        global_state::with_internal_keys(|key| {
+            if let InternalServerKey::Cuda(cuda_key) = key {
+                with_thread_local_cuda_streams(|streams| {
+                    cuda_key.key.key.get_gt_size_on_gpu(
+                        &*self.ciphertext.on_gpu(streams),
+                        &rhs.ciphertext.on_gpu(streams),
+                        streams,
+                    )
+                })
+            } else {
+                0
+            }
+        })
+    }
+    fn get_ge_size_on_gpu(&self, rhs: &Self) -> u64 {
+        global_state::with_internal_keys(|key| {
+            if let InternalServerKey::Cuda(cuda_key) = key {
+                with_thread_local_cuda_streams(|streams| {
+                    cuda_key.key.key.get_ge_size_on_gpu(
+                        &*self.ciphertext.on_gpu(streams),
+                        &rhs.ciphertext.on_gpu(streams),
+                        streams,
+                    )
+                })
+            } else {
+                0
+            }
+        })
+    }
+    fn get_lt_size_on_gpu(&self, rhs: &Self) -> u64 {
+        global_state::with_internal_keys(|key| {
+            if let InternalServerKey::Cuda(cuda_key) = key {
+                with_thread_local_cuda_streams(|streams| {
+                    cuda_key.key.key.get_lt_size_on_gpu(
+                        &*self.ciphertext.on_gpu(streams),
+                        &rhs.ciphertext.on_gpu(streams),
+                        streams,
+                    )
+                })
+            } else {
+                0
+            }
+        })
+    }
+    fn get_le_size_on_gpu(&self, rhs: &Self) -> u64 {
+        global_state::with_internal_keys(|key| {
+            if let InternalServerKey::Cuda(cuda_key) = key {
+                with_thread_local_cuda_streams(|streams| {
+                    cuda_key.key.key.get_le_size_on_gpu(
+                        &*self.ciphertext.on_gpu(streams),
+                        &rhs.ciphertext.on_gpu(streams),
+                        streams,
+                    )
+                })
+            } else {
+                0
+            }
+        })
+    }
+}
+#[cfg(feature = "gpu")]
+impl<Id> FheMinSizeOnGpu<&Self> for FheInt<Id>
+where
+    Id: FheIntId,
+{
+    fn get_min_size_on_gpu(&self, rhs: &Self) -> u64 {
+        global_state::with_internal_keys(|key| {
+            if let InternalServerKey::Cuda(cuda_key) = key {
+                with_thread_local_cuda_streams(|streams| {
+                    cuda_key.key.key.get_min_size_on_gpu(
+                        &*self.ciphertext.on_gpu(streams),
+                        &rhs.ciphertext.on_gpu(streams),
+                        streams,
+                    )
+                })
+            } else {
+                0
+            }
+        })
+    }
+}
+
+#[cfg(feature = "gpu")]
+impl<Id> FheMaxSizeOnGpu<&Self> for FheInt<Id>
+where
+    Id: FheIntId,
+{
+    fn get_max_size_on_gpu(&self, rhs: &Self) -> u64 {
+        global_state::with_internal_keys(|key| {
+            if let InternalServerKey::Cuda(cuda_key) = key {
+                with_thread_local_cuda_streams(|streams| {
+                    cuda_key.key.key.get_max_size_on_gpu(
+                        &*self.ciphertext.on_gpu(streams),
+                        &rhs.ciphertext.on_gpu(streams),
+                        streams,
+                    )
+                })
+            } else {
+                0
+            }
+        })
+    }
+}
@@ -9,7 +9,8 @@ use crate::high_level_api::integers::FheIntId;
 use crate::high_level_api::keys::InternalServerKey;
 #[cfg(feature = "gpu")]
 use crate::high_level_api::traits::{
-    AddSizeOnGpu, BitAndSizeOnGpu, BitOrSizeOnGpu, BitXorSizeOnGpu, SubSizeOnGpu,
+    AddSizeOnGpu, BitAndSizeOnGpu, BitOrSizeOnGpu, BitXorSizeOnGpu, FheMaxSizeOnGpu,
+    FheMinSizeOnGpu, FheOrdSizeOnGpu, SubSizeOnGpu,
 };
 use crate::high_level_api::traits::{
     DivRem, FheEq, FheMax, FheMin, FheOrd, RotateLeft, RotateLeftAssign, RotateRight,
@@ -406,6 +407,112 @@ where
     }
 }
 
+#[cfg(feature = "gpu")]
+impl<Id, Clear> FheOrdSizeOnGpu<Clear> for FheInt<Id>
+where
+    Id: FheIntId,
+    Clear: DecomposableInto<u64>,
+{
+    fn get_gt_size_on_gpu(&self, _rhs: Clear) -> u64 {
+        global_state::with_internal_keys(|key| {
+            if let InternalServerKey::Cuda(cuda_key) = key {
+                with_thread_local_cuda_streams(|streams| {
+                    cuda_key
+                        .key
+                        .key
+                        .get_scalar_le_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
+                })
+            } else {
+                0
+            }
+        })
+    }
+    fn get_ge_size_on_gpu(&self, _rhs: Clear) -> u64 {
+        global_state::with_internal_keys(|key| {
+            if let InternalServerKey::Cuda(cuda_key) = key {
+                with_thread_local_cuda_streams(|streams| {
+                    cuda_key
+                        .key
+                        .key
+                        .get_scalar_le_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
+                })
+            } else {
+                0
+            }
+        })
+    }
+    fn get_lt_size_on_gpu(&self, _rhs: Clear) -> u64 {
+        global_state::with_internal_keys(|key| {
+            if let InternalServerKey::Cuda(cuda_key) = key {
+                with_thread_local_cuda_streams(|streams| {
+                    cuda_key
+                        .key
+                        .key
+                        .get_scalar_le_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
+                })
+            } else {
+                0
+            }
+        })
+    }
+    fn get_le_size_on_gpu(&self, _rhs: Clear) -> u64 {
+        global_state::with_internal_keys(|key| {
+            if let InternalServerKey::Cuda(cuda_key) = key {
+                with_thread_local_cuda_streams(|streams| {
+                    cuda_key
+                        .key
+                        .key
+                        .get_scalar_le_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
+                })
+            } else {
+                0
+            }
+        })
+    }
+}
+
+#[cfg(feature = "gpu")]
+impl<Id, Clear> FheMinSizeOnGpu<Clear> for FheInt<Id>
+where
+    Id: FheIntId,
+    Clear: DecomposableInto<u64>,
+{
+    fn get_min_size_on_gpu(&self, _rhs: Clear) -> u64 {
+        global_state::with_internal_keys(|key| {
+            if let InternalServerKey::Cuda(cuda_key) = key {
+                with_thread_local_cuda_streams(|streams| {
+                    cuda_key
+                        .key
+                        .key
+                        .get_scalar_min_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
+                })
+            } else {
+                0
+            }
+        })
+    }
+}
+#[cfg(feature = "gpu")]
+impl<Id, Clear> FheMaxSizeOnGpu<Clear> for FheInt<Id>
+where
+    Id: FheIntId,
+    Clear: DecomposableInto<u64>,
+{
+    fn get_max_size_on_gpu(&self, _rhs: Clear) -> u64 {
+        global_state::with_internal_keys(|key| {
+            if let InternalServerKey::Cuda(cuda_key) = key {
+                with_thread_local_cuda_streams(|streams| {
+                    cuda_key
+                        .key
+                        .key
+                        .get_scalar_max_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
+                })
+            } else {
+                0
+            }
+        })
+    }
+}
 // DivRem is a bit special as it returns a tuple of quotient and remainder
 macro_rules! generic_integer_impl_scalar_div_rem {
     (
 
@@ -2,10 +2,10 @@ use crate::high_level_api::integers::signed::tests::{
     test_case_ilog2, test_case_leading_trailing_zeros_ones,
 };
 use crate::high_level_api::integers::unsigned::tests::gpu::setup_gpu;
-use crate::high_level_api::traits::AddSizeOnGpu;
 use crate::prelude::{
-    check_valid_cuda_malloc, BitAndSizeOnGpu, BitNotSizeOnGpu, BitOrSizeOnGpu, BitXorSizeOnGpu,
-    FheTryEncrypt, SubSizeOnGpu,
+    check_valid_cuda_malloc, AddSizeOnGpu, BitAndSizeOnGpu, BitNotSizeOnGpu, BitOrSizeOnGpu,
+    BitXorSizeOnGpu, FheMaxSizeOnGpu, FheMinSizeOnGpu, FheOrdSizeOnGpu, FheTryEncrypt,
+    SubSizeOnGpu,
 };
 use crate::shortint::parameters::PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS;
 use crate::{FheInt32, GpuIndex};
@@ -162,3 +162,77 @@ fn test_gpu_get_bitops_size_on_gpu() {
         GpuIndex::new(0)
     ));
 }
+#[test]
+fn test_gpu_get_comparisons_size_on_gpu() {
+    let cks = setup_gpu(Some(PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS));
+    let mut rng = rand::thread_rng();
+    let clear_a = rng.gen_range(1..=i32::MAX);
+    let clear_b = rng.gen_range(1..=i32::MAX);
+    let mut a = FheInt32::try_encrypt(clear_a, &cks).unwrap();
+    let mut b = FheInt32::try_encrypt(clear_b, &cks).unwrap();
+    a.move_to_current_device();
+    b.move_to_current_device();
+    let a = &a;
+    let b = &b;
+
+    let gt_tmp_buffer_size = a.get_gt_size_on_gpu(b);
+    let scalar_gt_tmp_buffer_size = a.get_gt_size_on_gpu(clear_b);
+    assert!(check_valid_cuda_malloc(
+        gt_tmp_buffer_size,
+        GpuIndex::new(0)
+    ));
+    assert!(check_valid_cuda_malloc(
+        scalar_gt_tmp_buffer_size,
+        GpuIndex::new(0)
+    ));
+    let ge_tmp_buffer_size = a.get_ge_size_on_gpu(b);
+    let scalar_ge_tmp_buffer_size = a.get_ge_size_on_gpu(clear_b);
+    assert!(check_valid_cuda_malloc(
+        ge_tmp_buffer_size,
+        GpuIndex::new(0)
+    ));
+    assert!(check_valid_cuda_malloc(
+        scalar_ge_tmp_buffer_size,
+        GpuIndex::new(0)
+    ));
+    let lt_tmp_buffer_size = a.get_lt_size_on_gpu(b);
+    let scalar_lt_tmp_buffer_size = a.get_lt_size_on_gpu(clear_b);
+    assert!(check_valid_cuda_malloc(
+        lt_tmp_buffer_size,
+        GpuIndex::new(0)
+    ));
+    assert!(check_valid_cuda_malloc(
+        scalar_lt_tmp_buffer_size,
+        GpuIndex::new(0)
+    ));
+    let le_tmp_buffer_size = a.get_le_size_on_gpu(b);
+    let scalar_le_tmp_buffer_size = a.get_le_size_on_gpu(clear_b);
+    assert!(check_valid_cuda_malloc(
+        le_tmp_buffer_size,
+        GpuIndex::new(0)
+    ));
+    assert!(check_valid_cuda_malloc(
+        scalar_le_tmp_buffer_size,
+        GpuIndex::new(0)
+    ));
+    let max_tmp_buffer_size = a.get_max_size_on_gpu(b);
+    let scalar_max_tmp_buffer_size = a.get_max_size_on_gpu(clear_b);
+    assert!(check_valid_cuda_malloc(
+        max_tmp_buffer_size,
+        GpuIndex::new(0)
+    ));
+    assert!(check_valid_cuda_malloc(
+        scalar_max_tmp_buffer_size,
+        GpuIndex::new(0)
+    ));
+    let min_tmp_buffer_size = a.get_min_size_on_gpu(b);
+    let scalar_min_tmp_buffer_size = a.get_min_size_on_gpu(clear_b);
+    assert!(check_valid_cuda_malloc(
+        min_tmp_buffer_size,
+        GpuIndex::new(0)
+    ));
+    assert!(check_valid_cuda_malloc(
+        scalar_min_tmp_buffer_size,
+        GpuIndex::new(0)
+    ));
+}