Merge branch 'master' of https://github.com/Texera/texera into Texera…

…-master
Texera · Jan 21, 2025 · 0b7e7bc · 0b7e7bc
2 parents 28ed130 + 586496c
commit 0b7e7bc
Show file tree

Hide file tree

Showing 604 changed files with 9,414 additions and 23,455 deletions.
diff --git a/.github/workflows/github-action-build.yml b/.github/workflows/github-action-build.yml
@@ -56,7 +56,7 @@ jobs:
   core:
     strategy:
       matrix:
-        os: [ ubuntu-latest ]
+        os: [ ubuntu-22.04 ]
         java-version: [ 11 ]
     runs-on: ${{ matrix.os }}
     env:

diff --git a/.gitignore b/.gitignore
@@ -102,4 +102,10 @@ StoredCredential*
 **/apache2/
 **/Apache24/
 **/php/
-Composer-Setup.exe
+Composer-Setup.exe
+
+# Ignoring folders generated by vscode IDE
+.metals/
+.bloop/
+.ammonite/
+metals.sbt
diff --git a/README.md b/README.md
@@ -1,17 +1,21 @@
 <h1 align="center">Texera - Collaborative Data Science and AI/ML Using Workflows</h1>
 
 <p align="center">
-  <img src="core/gui/src/assets/logos/full_logo_small.png" alt="texera-logo" width="192px" height="109px"/>
+  <a href="https://texera.io"> <img src="core/gui/src/assets/logos/full_logo_small.png" alt="texera-logo" width="192px" height="109px"/> </a>
   <br>
   <i>Texera supports scalable data computation and enables advanced AI/ML techniques.</i>
   <br>
   <i>"Collaboration" is a key focus, and we enable an experience similar to Google Docs, but for data science. </i>
   <br>
 
   <h4 align="center">
-    <a href="https://github.com/Texera/texera#videos">Demo Video</a>
+    <a href="https://texera.io">Official Site</a>
     |
-    <a href="https://texera.github.io/blog/">Blogs</a>
+    <a href="https://texera.io/publications/">Publications</a>
+    |
+    <a href="https://texera.io/category/video/">Video</a>
+    | 
+    <a href="https://texera.io/category/blog/">Blog</a>
     |
     <a href="https://github.com/Texera/texera/wiki/Getting-Started">Getting Started</a>
     <br>
@@ -29,13 +33,6 @@
   <img alt="Static Badge" src="https://img.shields.io/badge/Largest_Deployment-100_nodes,_400_cores-green">
 </p>
 
-# Motivation
-
-* Data science is labor-intensive and particularly challenging for non-IT users applying AI/ML.
-* Many workflow-based data science platforms lack parallelism, limiting their ability to handle big datasets.
-* Cloud services and technologies have advanced significantly over the past decade, enabling powerful browser-based interfaces supported by high-speed networks.
-* Existing data science platforms offer limited interaction during long-running jobs, making them difficult to manage after execution begins.
-
 # Goals
 
 * Provide data science as cloud services;
@@ -148,59 +145,6 @@ The workflow in the use case shown below includes data cleaning, ML model traini
   _In JAMIA 2021_ | [PDF](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7989302/pdf/ocab047.pdf)
 </details>
 
-
-# Education
-<table>
-  <tr style="height: 500px;">
-    <td align="center">
-      <a href="https://ds4all.ics.uci.edu/">
-        <img src="https://ds4all.ics.uci.edu/wp-content/uploads/2023/07/banner-1024x576.png">
-      </a>
-      <p><b>Data Science for All</b></p>
-      An NSF-funded summer program to teach high-school students data science and AI/ML
-    </td>
-    <td align="center">
-      <a href="https://canvas.eee.uci.edu/courses/63639/pages/syllabus">
-        <img src="https://github.com/user-attachments/assets/a7569fd3-6857-48b4-80dc-d9f006ae2c8f">
-      </a>
-      <p><b>ICS 80: Data Science and AI/ML Using Workflows</b></p>
-      A Spring 2024 course at UCI, teaching 42 undergraduates, most of whom are not computer science majors, to learn data science and AI/ML
-    </td>
-    <td align="center">
-      <a href="https://sites.google.com/uci.edu/ds-workshop2024/home">
-        <img src="https://www.cerritos.edu/_resources/images/common/cerritos-college-logo.svg">
-      </a>
-      <p><b>Workshop of Data Science for Everyone at Cerritos College</b></p>
-      A two-day workshop designed for non-CS students to learn data science and ML without a single line of coding
-    </td>
-  </tr>
-</table>
-
-
-# Videos
-<table>
-  <tr style="height: 500px;">
-    <td align="center">
-      <a href="https://www.youtube.com/watch?v=B81iMFS5fPc">
-        <img src="https://img.youtube.com/vi/B81iMFS5fPc/0.jpg"  alt="Watch the video">
-      </a>
-      <p><b>dkNET Webinar 04/26/2024</b></p>
-    </td>
-    <td align="center">
-      <a href="https://www.youtube.com/watch?v=SP-XiDADbw0">
-        <img src="https://img.youtube.com/vi/SP-XiDADbw0/0.jpg" alt="Watch the video">
-      </a>
-      <p><b>Texera Demo @ VLDB'20</b></p>
-    </td>
-    <td align="center">
-      <a href="https://www.youtube.com/watch?v=T5ShFRfHmgI">
-        <img src="https://img.youtube.com/vi/T5ShFRfHmgI/0.jpg"  alt="Watch the video">
-      </a>
-      <p><b>Amber Presentation @ VLDB'20</b></p>
-    </td>
-  </tr>
-</table>
-
 # Getting Started
 
 * For users, visit [Guide to Use Texera](https://github.com/Texera/texera/wiki/Getting-Started).

diff --git a/core/amber/src/main/protobuf/edu/uci/ics/amber/engine/architecture/rpc/controlcommands.proto b/core/amber/src/main/protobuf/edu/uci/ics/amber/engine/architecture/rpc/controlcommands.proto
@@ -1,8 +1,9 @@
 syntax = "proto3";
 package edu.uci.ics.amber.engine.architecture.rpc;
 
-import "edu/uci/ics/amber/virtualidentity.proto";
-import "edu/uci/ics/amber/workflow.proto";
+import "edu/uci/ics/amber/core/virtualidentity.proto";
+import "edu/uci/ics/amber/core/workflow.proto";
+import "edu/uci/ics/amber/core/executor.proto";
 import "edu/uci/ics/amber/engine/architecture/worker/statistics.proto";
 import "edu/uci/ics/amber/engine/architecture/sendsemantics/partitionings.proto";
 import "scalapb/scalapb.proto";
@@ -58,8 +59,8 @@ message EmptyRequest{}
 
 message AsyncRPCContext {
   option (scalapb.message).no_box = true;
-  ActorVirtualIdentity sender = 1 [(scalapb.field).no_box = true];
-  ActorVirtualIdentity receiver = 2 [(scalapb.field).no_box = true];
+  core.ActorVirtualIdentity sender = 1 [(scalapb.field).no_box = true];
+  core.ActorVirtualIdentity receiver = 2 [(scalapb.field).no_box = true];
 }
 
 message ControlInvocation {
@@ -79,25 +80,25 @@ enum ChannelMarkerType {
 // Message for ChannelMarkerPayload
 message ChannelMarkerPayload {
   option (scalapb.message).extends = "edu.uci.ics.amber.engine.common.ambermessage.WorkflowFIFOMessagePayload";
-  ChannelMarkerIdentity id = 1 [(scalapb.field).no_box = true];
+  core.ChannelMarkerIdentity id = 1 [(scalapb.field).no_box = true];
   ChannelMarkerType markerType = 2;
-  repeated ChannelIdentity scope = 3;
+  repeated core.ChannelIdentity scope = 3;
   map<string, ControlInvocation> commandMapping = 4;
 }
 
 message PropagateChannelMarkerRequest {
-  repeated PhysicalOpIdentity sourceOpToStartProp = 1;
-  ChannelMarkerIdentity id = 2 [(scalapb.field).no_box = true];
+  repeated core.PhysicalOpIdentity sourceOpToStartProp = 1;
+  core.ChannelMarkerIdentity id = 2 [(scalapb.field).no_box = true];
   ChannelMarkerType markerType = 3;
-  repeated PhysicalOpIdentity scope = 4;
-  repeated PhysicalOpIdentity targetOps = 5;
+  repeated core.PhysicalOpIdentity scope = 4;
+  repeated core.PhysicalOpIdentity targetOps = 5;
   ControlRequest markerCommand = 6;
   string markerMethodName = 7;
 }
 
 message TakeGlobalCheckpointRequest {
   bool estimationOnly = 1;
-  ChannelMarkerIdentity checkpointId = 2 [(scalapb.field).no_box = true];
+  core.ChannelMarkerIdentity checkpointId = 2 [(scalapb.field).no_box = true];
   string destination = 3;
 }
 
@@ -122,7 +123,7 @@ message ModifyLogicRequest {
 }
 
 message RetryWorkflowRequest {
-  repeated ActorVirtualIdentity workers = 1;
+  repeated core.ActorVirtualIdentity workers = 1;
 }
 
 enum ConsoleMessageType{
@@ -147,7 +148,7 @@ message ConsoleMessageTriggeredRequest {
 }
 
 message PortCompletedRequest {
-  PortIdentity portId = 1 [(scalapb.field).no_box = true];
+  core.PortIdentity portId = 1 [(scalapb.field).no_box = true];
   bool input = 2;
 }
 
@@ -156,21 +157,21 @@ message WorkerStateUpdatedRequest {
 }
 
 message LinkWorkersRequest {
-  PhysicalLink link = 1 [(scalapb.field).no_box = true];
+  core.PhysicalLink link = 1 [(scalapb.field).no_box = true];
 }
 
 // Ping message
 message Ping {
   int32 i = 1;
   int32 end = 2;
-  ActorVirtualIdentity to = 3 [(scalapb.field).no_box = true];
+  core.ActorVirtualIdentity to = 3 [(scalapb.field).no_box = true];
 }
 
 // Pong message
 message Pong {
   int32 i = 1;
   int32 end = 2;
-  ActorVirtualIdentity to = 3 [(scalapb.field).no_box = true];
+  core.ActorVirtualIdentity to = 3 [(scalapb.field).no_box = true];
 }
 
 // Pass message
@@ -185,7 +186,7 @@ message Nested {
 
 // MultiCall message
 message MultiCall {
-  repeated ActorVirtualIdentity seq = 1;
+  repeated core.ActorVirtualIdentity seq = 1;
 }
 
 // ErrorCommand message
@@ -194,7 +195,7 @@ message ErrorCommand {
 
 // Collect message
 message Collect {
-  repeated ActorVirtualIdentity workers = 1;
+  repeated core.ActorVirtualIdentity workers = 1;
 }
 
 // GenerateNumber message
@@ -203,7 +204,7 @@ message GenerateNumber {
 
 // Chain message
 message Chain {
-  repeated ActorVirtualIdentity nexts = 1;
+  repeated core.ActorVirtualIdentity nexts = 1;
 }
 
 // Recursion message
@@ -213,44 +214,43 @@ message Recursion {
 
 // Messages for the commands
 message AddInputChannelRequest {
-  ChannelIdentity channelId = 1 [(scalapb.field).no_box = true];
-  PortIdentity portId = 2 [(scalapb.field).no_box = true];
+  core.ChannelIdentity channelId = 1 [(scalapb.field).no_box = true];
+  core.PortIdentity portId = 2 [(scalapb.field).no_box = true];
 }
 
 message AddPartitioningRequest {
-  PhysicalLink tag = 1 [(scalapb.field).no_box = true];
+  core.PhysicalLink tag = 1 [(scalapb.field).no_box = true];
   sendsemantics.Partitioning partitioning = 2 [(scalapb.field).no_box = true];
 }
 
 message AssignPortRequest {
-  PortIdentity portId = 1 [(scalapb.field).no_box = true];
+  core.PortIdentity portId = 1 [(scalapb.field).no_box = true];
   bool input = 2;
   map<string, string> schema = 3;
 }
 
 message FinalizeCheckpointRequest {
-  ChannelMarkerIdentity checkpointId = 1 [(scalapb.field).no_box = true];
+  core.ChannelMarkerIdentity checkpointId = 1 [(scalapb.field).no_box = true];
   string writeTo = 2;
 }
 
 message InitializeExecutorRequest {
   int32 totalWorkerCount = 1;
-  google.protobuf.Any opExecInitInfo = 2 [(scalapb.field).no_box = true];
+  core.OpExecInitInfo opExecInitInfo = 2;
   bool isSource = 3;
-  string language = 4;
 }
 
 message UpdateExecutorRequest {
-  PhysicalOpIdentity targetOpId = 1 [(scalapb.field).no_box = true];
+  core.PhysicalOpIdentity targetOpId = 1 [(scalapb.field).no_box = true];
   google.protobuf.Any newExecutor = 2 [(scalapb.field).no_box = true];
   google.protobuf.Any stateTransferFunc = 3;
 }
 
 message PrepareCheckpointRequest{
-  ChannelMarkerIdentity checkpointId = 1 [(scalapb.field).no_box = true];
+  core.ChannelMarkerIdentity checkpointId = 1 [(scalapb.field).no_box = true];
   bool estimationOnly = 2;
 }
 
 message QueryStatisticsRequest{
-  repeated ActorVirtualIdentity filterByWorkers = 1;
+  repeated core.ActorVirtualIdentity filterByWorkers = 1;
 }
diff --git a/...src/main/protobuf/edu/uci/ics/amber/engine/architecture/sendsemantics/partitionings.proto b/...src/main/protobuf/edu/uci/ics/amber/engine/architecture/sendsemantics/partitionings.proto
@@ -2,6 +2,7 @@ syntax = "proto3";
 
 package edu.uci.ics.amber.engine.architecture.sendsemantics;
 
+import "edu/uci/ics/amber/core/virtualidentity.proto";
 import "scalapb/scalapb.proto";
 
 option (scalapb.options) = {
@@ -10,8 +11,6 @@ option (scalapb.options) = {
   no_default_values_in_constructor: true
 };
 
-import "edu/uci/ics/amber/virtualidentity.proto";
-
 message Partitioning{
   oneof sealed_value{
     OneToOnePartitioning oneToOnePartitioning = 1;
@@ -24,29 +23,29 @@ message Partitioning{
 
 message OneToOnePartitioning{
   int32 batchSize = 1;
-  repeated ChannelIdentity channels = 2;
+  repeated core.ChannelIdentity channels = 2;
 }
 
 message RoundRobinPartitioning{
   int32 batchSize = 1;
-  repeated ChannelIdentity channels = 2;
+  repeated core.ChannelIdentity channels = 2;
 }
 
 message HashBasedShufflePartitioning{
   int32 batchSize = 1;
-  repeated ChannelIdentity channels = 2;
+  repeated core.ChannelIdentity channels = 2;
   repeated string hashAttributeNames = 3;
 }
 
 message RangeBasedShufflePartitioning {
   int32 batchSize = 1;
-  repeated ChannelIdentity channels = 2;
+  repeated core.ChannelIdentity channels = 2;
   repeated string rangeAttributeNames = 3;
   int64 rangeMin = 4;
   int64 rangeMax = 5;
 }
 
 message BroadcastPartitioning{
   int32 batchSize = 1;
-  repeated ChannelIdentity channels = 2;
+  repeated core.ChannelIdentity channels = 2;
 }
diff --git a/core/amber/src/main/protobuf/edu/uci/ics/amber/engine/architecture/worker/statistics.proto b/core/amber/src/main/protobuf/edu/uci/ics/amber/engine/architecture/worker/statistics.proto
@@ -2,7 +2,7 @@ syntax = "proto3";
 
 package edu.uci.ics.amber.engine.architecture.worker;
 
-import "edu/uci/ics/amber/workflow.proto";
+import "edu/uci/ics/amber/core/workflow.proto";
 import "scalapb/scalapb.proto";
 
 option (scalapb.options) = {
@@ -22,7 +22,7 @@ enum WorkerState {
 }
 
 message PortTupleCountMapping {
-  PortIdentity port_id = 1 [(scalapb.field).no_box = true];
+  core.PortIdentity port_id = 1 [(scalapb.field).no_box = true];
   int64 tuple_count = 2;
 }
 

diff --git a/core/amber/src/main/protobuf/edu/uci/ics/amber/engine/common/ambermessage.proto b/core/amber/src/main/protobuf/edu/uci/ics/amber/engine/common/ambermessage.proto
@@ -4,7 +4,7 @@ package edu.uci.ics.amber.engine.common;
 
 import "edu/uci/ics/amber/engine/architecture/rpc/controlcommands.proto";
 import "edu/uci/ics/amber/engine/architecture/rpc/controlreturns.proto";
-import "edu/uci/ics/amber/virtualidentity.proto";
+import "edu/uci/ics/amber/core/virtualidentity.proto";
 import "scalapb/scalapb.proto";
 
 option (scalapb.options) = {
@@ -21,11 +21,11 @@ message ControlPayloadV2 {
 }
 
 message PythonDataHeader {
-  ActorVirtualIdentity tag = 1 [(scalapb.field).no_box = true];
+  core.ActorVirtualIdentity tag = 1 [(scalapb.field).no_box = true];
   string payload_type = 2;
 }
 
 message PythonControlMessage {
-  ActorVirtualIdentity tag = 1 [(scalapb.field).no_box = true];
+  core.ActorVirtualIdentity tag = 1 [(scalapb.field).no_box = true];
   ControlPayloadV2 payload = 2 [(scalapb.field).no_box = true];
 }