Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/Texera/texera into Texera…
Browse files Browse the repository at this point in the history
…-master
  • Loading branch information
kunwp1 committed Jan 21, 2025
2 parents 28ed130 + 586496c commit 0b7e7bc
Show file tree
Hide file tree
Showing 604 changed files with 9,414 additions and 23,455 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/github-action-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ jobs:
core:
strategy:
matrix:
os: [ ubuntu-latest ]
os: [ ubuntu-22.04 ]
java-version: [ 11 ]
runs-on: ${{ matrix.os }}
env:
Expand Down
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,10 @@ StoredCredential*
**/apache2/
**/Apache24/
**/php/
Composer-Setup.exe
Composer-Setup.exe

# Ignoring folders generated by vscode IDE
.metals/
.bloop/
.ammonite/
metals.sbt
70 changes: 7 additions & 63 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
<h1 align="center">Texera - Collaborative Data Science and AI/ML Using Workflows</h1>

<p align="center">
<img src="core/gui/src/assets/logos/full_logo_small.png" alt="texera-logo" width="192px" height="109px"/>
<a href="https://texera.io"> <img src="core/gui/src/assets/logos/full_logo_small.png" alt="texera-logo" width="192px" height="109px"/> </a>
<br>
<i>Texera supports scalable data computation and enables advanced AI/ML techniques.</i>
<br>
<i>"Collaboration" is a key focus, and we enable an experience similar to Google Docs, but for data science. </i>
<br>

<h4 align="center">
<a href="https://github.com/Texera/texera#videos">Demo Video</a>
<a href="https://texera.io">Official Site</a>
|
<a href="https://texera.github.io/blog/">Blogs</a>
<a href="https://texera.io/publications/">Publications</a>
|
<a href="https://texera.io/category/video/">Video</a>
|
<a href="https://texera.io/category/blog/">Blog</a>
|
<a href="https://github.com/Texera/texera/wiki/Getting-Started">Getting Started</a>
<br>
Expand All @@ -29,13 +33,6 @@
<img alt="Static Badge" src="https://img.shields.io/badge/Largest_Deployment-100_nodes,_400_cores-green">
</p>

# Motivation

* Data science is labor-intensive and particularly challenging for non-IT users applying AI/ML.
* Many workflow-based data science platforms lack parallelism, limiting their ability to handle big datasets.
* Cloud services and technologies have advanced significantly over the past decade, enabling powerful browser-based interfaces supported by high-speed networks.
* Existing data science platforms offer limited interaction during long-running jobs, making them difficult to manage after execution begins.

# Goals

* Provide data science as cloud services;
Expand Down Expand Up @@ -148,59 +145,6 @@ The workflow in the use case shown below includes data cleaning, ML model traini
_In JAMIA 2021_ | [PDF](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7989302/pdf/ocab047.pdf)
</details>


# Education
<table>
<tr style="height: 500px;">
<td align="center">
<a href="https://ds4all.ics.uci.edu/">
<img src="https://ds4all.ics.uci.edu/wp-content/uploads/2023/07/banner-1024x576.png">
</a>
<p><b>Data Science for All</b></p>
An NSF-funded summer program to teach high-school students data science and AI/ML
</td>
<td align="center">
<a href="https://canvas.eee.uci.edu/courses/63639/pages/syllabus">
<img src="https://github.com/user-attachments/assets/a7569fd3-6857-48b4-80dc-d9f006ae2c8f">
</a>
<p><b>ICS 80: Data Science and AI/ML Using Workflows</b></p>
A Spring 2024 course at UCI, teaching 42 undergraduates, most of whom are not computer science majors, to learn data science and AI/ML
</td>
<td align="center">
<a href="https://sites.google.com/uci.edu/ds-workshop2024/home">
<img src="https://www.cerritos.edu/_resources/images/common/cerritos-college-logo.svg">
</a>
<p><b>Workshop of Data Science for Everyone at Cerritos College</b></p>
A two-day workshop designed for non-CS students to learn data science and ML without a single line of coding
</td>
</tr>
</table>


# Videos
<table>
<tr style="height: 500px;">
<td align="center">
<a href="https://www.youtube.com/watch?v=B81iMFS5fPc">
<img src="https://img.youtube.com/vi/B81iMFS5fPc/0.jpg" alt="Watch the video">
</a>
<p><b>dkNET Webinar 04/26/2024</b></p>
</td>
<td align="center">
<a href="https://www.youtube.com/watch?v=SP-XiDADbw0">
<img src="https://img.youtube.com/vi/SP-XiDADbw0/0.jpg" alt="Watch the video">
</a>
<p><b>Texera Demo @ VLDB'20</b></p>
</td>
<td align="center">
<a href="https://www.youtube.com/watch?v=T5ShFRfHmgI">
<img src="https://img.youtube.com/vi/T5ShFRfHmgI/0.jpg" alt="Watch the video">
</a>
<p><b>Amber Presentation @ VLDB'20</b></p>
</td>
</tr>
</table>

# Getting Started

* For users, visit [Guide to Use Texera](https://github.com/Texera/texera/wiki/Getting-Started).
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
syntax = "proto3";
package edu.uci.ics.amber.engine.architecture.rpc;

import "edu/uci/ics/amber/virtualidentity.proto";
import "edu/uci/ics/amber/workflow.proto";
import "edu/uci/ics/amber/core/virtualidentity.proto";
import "edu/uci/ics/amber/core/workflow.proto";
import "edu/uci/ics/amber/core/executor.proto";
import "edu/uci/ics/amber/engine/architecture/worker/statistics.proto";
import "edu/uci/ics/amber/engine/architecture/sendsemantics/partitionings.proto";
import "scalapb/scalapb.proto";
Expand Down Expand Up @@ -58,8 +59,8 @@ message EmptyRequest{}

message AsyncRPCContext {
option (scalapb.message).no_box = true;
ActorVirtualIdentity sender = 1 [(scalapb.field).no_box = true];
ActorVirtualIdentity receiver = 2 [(scalapb.field).no_box = true];
core.ActorVirtualIdentity sender = 1 [(scalapb.field).no_box = true];
core.ActorVirtualIdentity receiver = 2 [(scalapb.field).no_box = true];
}

message ControlInvocation {
Expand All @@ -79,25 +80,25 @@ enum ChannelMarkerType {
// Message for ChannelMarkerPayload
message ChannelMarkerPayload {
option (scalapb.message).extends = "edu.uci.ics.amber.engine.common.ambermessage.WorkflowFIFOMessagePayload";
ChannelMarkerIdentity id = 1 [(scalapb.field).no_box = true];
core.ChannelMarkerIdentity id = 1 [(scalapb.field).no_box = true];
ChannelMarkerType markerType = 2;
repeated ChannelIdentity scope = 3;
repeated core.ChannelIdentity scope = 3;
map<string, ControlInvocation> commandMapping = 4;
}

message PropagateChannelMarkerRequest {
repeated PhysicalOpIdentity sourceOpToStartProp = 1;
ChannelMarkerIdentity id = 2 [(scalapb.field).no_box = true];
repeated core.PhysicalOpIdentity sourceOpToStartProp = 1;
core.ChannelMarkerIdentity id = 2 [(scalapb.field).no_box = true];
ChannelMarkerType markerType = 3;
repeated PhysicalOpIdentity scope = 4;
repeated PhysicalOpIdentity targetOps = 5;
repeated core.PhysicalOpIdentity scope = 4;
repeated core.PhysicalOpIdentity targetOps = 5;
ControlRequest markerCommand = 6;
string markerMethodName = 7;
}

message TakeGlobalCheckpointRequest {
bool estimationOnly = 1;
ChannelMarkerIdentity checkpointId = 2 [(scalapb.field).no_box = true];
core.ChannelMarkerIdentity checkpointId = 2 [(scalapb.field).no_box = true];
string destination = 3;
}

Expand All @@ -122,7 +123,7 @@ message ModifyLogicRequest {
}

message RetryWorkflowRequest {
repeated ActorVirtualIdentity workers = 1;
repeated core.ActorVirtualIdentity workers = 1;
}

enum ConsoleMessageType{
Expand All @@ -147,7 +148,7 @@ message ConsoleMessageTriggeredRequest {
}

message PortCompletedRequest {
PortIdentity portId = 1 [(scalapb.field).no_box = true];
core.PortIdentity portId = 1 [(scalapb.field).no_box = true];
bool input = 2;
}

Expand All @@ -156,21 +157,21 @@ message WorkerStateUpdatedRequest {
}

message LinkWorkersRequest {
PhysicalLink link = 1 [(scalapb.field).no_box = true];
core.PhysicalLink link = 1 [(scalapb.field).no_box = true];
}

// Ping message
message Ping {
int32 i = 1;
int32 end = 2;
ActorVirtualIdentity to = 3 [(scalapb.field).no_box = true];
core.ActorVirtualIdentity to = 3 [(scalapb.field).no_box = true];
}

// Pong message
message Pong {
int32 i = 1;
int32 end = 2;
ActorVirtualIdentity to = 3 [(scalapb.field).no_box = true];
core.ActorVirtualIdentity to = 3 [(scalapb.field).no_box = true];
}

// Pass message
Expand All @@ -185,7 +186,7 @@ message Nested {

// MultiCall message
message MultiCall {
repeated ActorVirtualIdentity seq = 1;
repeated core.ActorVirtualIdentity seq = 1;
}

// ErrorCommand message
Expand All @@ -194,7 +195,7 @@ message ErrorCommand {

// Collect message
message Collect {
repeated ActorVirtualIdentity workers = 1;
repeated core.ActorVirtualIdentity workers = 1;
}

// GenerateNumber message
Expand All @@ -203,7 +204,7 @@ message GenerateNumber {

// Chain message
message Chain {
repeated ActorVirtualIdentity nexts = 1;
repeated core.ActorVirtualIdentity nexts = 1;
}

// Recursion message
Expand All @@ -213,44 +214,43 @@ message Recursion {

// Messages for the commands
message AddInputChannelRequest {
ChannelIdentity channelId = 1 [(scalapb.field).no_box = true];
PortIdentity portId = 2 [(scalapb.field).no_box = true];
core.ChannelIdentity channelId = 1 [(scalapb.field).no_box = true];
core.PortIdentity portId = 2 [(scalapb.field).no_box = true];
}

message AddPartitioningRequest {
PhysicalLink tag = 1 [(scalapb.field).no_box = true];
core.PhysicalLink tag = 1 [(scalapb.field).no_box = true];
sendsemantics.Partitioning partitioning = 2 [(scalapb.field).no_box = true];
}

message AssignPortRequest {
PortIdentity portId = 1 [(scalapb.field).no_box = true];
core.PortIdentity portId = 1 [(scalapb.field).no_box = true];
bool input = 2;
map<string, string> schema = 3;
}

message FinalizeCheckpointRequest {
ChannelMarkerIdentity checkpointId = 1 [(scalapb.field).no_box = true];
core.ChannelMarkerIdentity checkpointId = 1 [(scalapb.field).no_box = true];
string writeTo = 2;
}

message InitializeExecutorRequest {
int32 totalWorkerCount = 1;
google.protobuf.Any opExecInitInfo = 2 [(scalapb.field).no_box = true];
core.OpExecInitInfo opExecInitInfo = 2;
bool isSource = 3;
string language = 4;
}

message UpdateExecutorRequest {
PhysicalOpIdentity targetOpId = 1 [(scalapb.field).no_box = true];
core.PhysicalOpIdentity targetOpId = 1 [(scalapb.field).no_box = true];
google.protobuf.Any newExecutor = 2 [(scalapb.field).no_box = true];
google.protobuf.Any stateTransferFunc = 3;
}

message PrepareCheckpointRequest{
ChannelMarkerIdentity checkpointId = 1 [(scalapb.field).no_box = true];
core.ChannelMarkerIdentity checkpointId = 1 [(scalapb.field).no_box = true];
bool estimationOnly = 2;
}

message QueryStatisticsRequest{
repeated ActorVirtualIdentity filterByWorkers = 1;
repeated core.ActorVirtualIdentity filterByWorkers = 1;
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ syntax = "proto3";

package edu.uci.ics.amber.engine.architecture.sendsemantics;

import "edu/uci/ics/amber/core/virtualidentity.proto";
import "scalapb/scalapb.proto";

option (scalapb.options) = {
Expand All @@ -10,8 +11,6 @@ option (scalapb.options) = {
no_default_values_in_constructor: true
};

import "edu/uci/ics/amber/virtualidentity.proto";

message Partitioning{
oneof sealed_value{
OneToOnePartitioning oneToOnePartitioning = 1;
Expand All @@ -24,29 +23,29 @@ message Partitioning{

message OneToOnePartitioning{
int32 batchSize = 1;
repeated ChannelIdentity channels = 2;
repeated core.ChannelIdentity channels = 2;
}

message RoundRobinPartitioning{
int32 batchSize = 1;
repeated ChannelIdentity channels = 2;
repeated core.ChannelIdentity channels = 2;
}

message HashBasedShufflePartitioning{
int32 batchSize = 1;
repeated ChannelIdentity channels = 2;
repeated core.ChannelIdentity channels = 2;
repeated string hashAttributeNames = 3;
}

message RangeBasedShufflePartitioning {
int32 batchSize = 1;
repeated ChannelIdentity channels = 2;
repeated core.ChannelIdentity channels = 2;
repeated string rangeAttributeNames = 3;
int64 rangeMin = 4;
int64 rangeMax = 5;
}

message BroadcastPartitioning{
int32 batchSize = 1;
repeated ChannelIdentity channels = 2;
repeated core.ChannelIdentity channels = 2;
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ syntax = "proto3";

package edu.uci.ics.amber.engine.architecture.worker;

import "edu/uci/ics/amber/workflow.proto";
import "edu/uci/ics/amber/core/workflow.proto";
import "scalapb/scalapb.proto";

option (scalapb.options) = {
Expand All @@ -22,7 +22,7 @@ enum WorkerState {
}

message PortTupleCountMapping {
PortIdentity port_id = 1 [(scalapb.field).no_box = true];
core.PortIdentity port_id = 1 [(scalapb.field).no_box = true];
int64 tuple_count = 2;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ package edu.uci.ics.amber.engine.common;

import "edu/uci/ics/amber/engine/architecture/rpc/controlcommands.proto";
import "edu/uci/ics/amber/engine/architecture/rpc/controlreturns.proto";
import "edu/uci/ics/amber/virtualidentity.proto";
import "edu/uci/ics/amber/core/virtualidentity.proto";
import "scalapb/scalapb.proto";

option (scalapb.options) = {
Expand All @@ -21,11 +21,11 @@ message ControlPayloadV2 {
}

message PythonDataHeader {
ActorVirtualIdentity tag = 1 [(scalapb.field).no_box = true];
core.ActorVirtualIdentity tag = 1 [(scalapb.field).no_box = true];
string payload_type = 2;
}

message PythonControlMessage {
ActorVirtualIdentity tag = 1 [(scalapb.field).no_box = true];
core.ActorVirtualIdentity tag = 1 [(scalapb.field).no_box = true];
ControlPayloadV2 payload = 2 [(scalapb.field).no_box = true];
}
Loading

0 comments on commit 0b7e7bc

Please sign in to comment.