Skip to content

Update megascale stats to leverage new data service. #1566

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion frontend/app/components/megascale_stats/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ xprof_ng_module(
"@org_xprof//frontend/app/components/chart:chart_options",
"@org_xprof//frontend/app/components/chart:default_data_provider",
"@org_xprof//frontend/app/components/chart/dashboard",
"@org_xprof//frontend/app/services/data_service",
"@org_xprof//frontend/app/components/controls/category_filter",
"@org_xprof//frontend/app/components/controls/export_as_csv",
"@org_xprof//frontend/app/components/diagnostics_view",
"@org_xprof//frontend/app/services/data_service_v2:data_service_v2_interface",
"@org_xprof//frontend/app/store",
],
)
Expand Down
94 changes: 59 additions & 35 deletions frontend/app/components/megascale_stats/megascale_stats.ng.html
Original file line number Diff line number Diff line change
@@ -1,41 +1,65 @@
<div class="section-container">
<div class="mat-h1">Megascale Stats</div>
<div>
<div class="mat-body">
This tool provides insights into the DCN Collective.
</div>
<div class="description">
(1) DCN Collective Name is the name assigned to the collective.
</div>
<div class="description">
(2) Recv Op Name and Send Op Name is TPU recv-done op name and send op name.
</div>
<div class="description">
(3) Slack Time is the network independent time the collective has to transmit the data.
</div>
<div class="description">
(4) Observed Duration is the interval between the start of the send op to the end of the corresponding recv-done op.
</div>
<div class="description">
(5) Stall Duration is the duration of time the collective spends in send/send-done/recv/recv-done ops.
</div>
<div class="description">
(6) Occurrences is the total number of occurrences for each collective in the profiled duration.
<div class="page-container">
<div class="top-section">
<div class="megascale-stats">
<div class="mat-h1">Megascale Stats</div>
<div>
<div class="mat-body">
This tool provides insights into the DCN Collective.
</div>
<div class="description">
(1) DCN Collective Name is the name assigned to the collective.
</div>
<div class="description">
(2) Recv Op Name and Send Op Name is TPU recv-done op name and send op name.
</div>
<div class="description">
(3) Slack Time is the network independent time the collective has to transmit the data.
</div>
<div class="description">
(4) Observed Duration is the interval between the start of the send op to the end of the corresponding recv-done op.
</div>
<div class="description">
(5) Stall Duration is the duration of time the collective spends in send/send-done/recv/recv-done ops.
</div>
<div class="description">
(6) Occurrences is the total number of occurrences for each collective in the profiled duration.
</div>
<div class="description">
(7) Aggregated Total Stall is the total stall duration of the collective in the profiled duration.
</div>
<div class="description">
(8) Data Transmitted Size is the total data that needs to be transmitted over the network.
</div>
<div class="description">
(9) Required Bandwidth is the bandwidth required to transmit the data in the provided slack.
</div>
</div>
</div>
<div class="description">
(7) Aggregated Total Stall is the total stall duration of the collective in the profiled duration.
<div class="export-button-container">
<export-as-csv [tool]="tool" [sessionId]="sessionId" [host]="host"></export-as-csv>
</div>
<div class="description">
(8) Data Transmitted Size is the total data that needs to be transmitted over the network.
</div>
<div class="description">
(9) Required Bandwidth is the bandwidth required to transmit the data in the provided slack.
</div>

<diagnostics-view [diagnostics]="diagnostics"></diagnostics-view>

<div class="section-container">
<div class="row">
<div>
<category-filter
[dataTable]="dataTable"
column="Host"
all="All"
(changed)="updateFilters($event)"
>
</category-filter>
</div>
<div class="flex-space"></div>
</div>
</div>
</div>

<div>
<chart chartType="Table"
[dataInfo]="dataInfo">
</chart>
<div>
<chart chartType="Table"
[dataInfo]="dataInfo">
</chart>
</div>
</div>
31 changes: 31 additions & 0 deletions frontend/app/components/megascale_stats/megascale_stats.scss
Original file line number Diff line number Diff line change
@@ -1,8 +1,39 @@
/** CSS for megascale_stats component. */
@import 'frontend/app/styles/common';

.page-container {
display: flex;
flex-direction: column;
padding: 20px;
}

.top-section {
display: flex;
justify-content: space-between;
align-items: flex-start;
margin-bottom: 20px;
}

.megascale-stats {
margin-right: 20px;
}

.export-button-container {
flex-shrink: 0;
}

.section-container {
padding: 20px;
margin-top: 20px;
}

.row {
display: flex;
align-items: center;
}

.flex-space {
flex: 1;
}

chart {
Expand Down
70 changes: 49 additions & 21 deletions frontend/app/components/megascale_stats/megascale_stats.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
import {Component, OnDestroy} from '@angular/core';
import {ActivatedRoute} from '@angular/router';
import {Component, inject, OnDestroy} from '@angular/core';
import {ActivatedRoute, Params} from '@angular/router';
import {Store} from '@ngrx/store';
import {ChartDataInfo} from 'org_xprof/frontend/app/common/interfaces/chart';
import {SimpleDataTable} from 'org_xprof/frontend/app/common/interfaces/data_table';
import {NavigationEvent} from 'org_xprof/frontend/app/common/interfaces/navigation_event';
import {Diagnostics} from 'org_xprof/frontend/app/common/interfaces/diagnostics';
import {parseDiagnosticsDataTable} from 'org_xprof/frontend/app/common/utils/utils';
import {TABLE_OPTIONS} from 'org_xprof/frontend/app/components/chart/chart_options';
import {Dashboard} from 'org_xprof/frontend/app/components/chart/dashboard/dashboard';
import {DefaultDataProvider} from 'org_xprof/frontend/app/components/chart/default_data_provider';
import {DataService} from 'org_xprof/frontend/app/services/data_service/data_service';
import {setLoadingStateAction} from 'org_xprof/frontend/app/store/actions';
import {DATA_SERVICE_INTERFACE_TOKEN, DataServiceV2Interface} from 'org_xprof/frontend/app/services/data_service_v2/data_service_v2_interface';
import {setCurrentToolStateAction, setLoadingStateAction} from 'org_xprof/frontend/app/store/actions';
import {ReplaySubject} from 'rxjs';
import {takeUntil} from 'rxjs/operators';

const MEGASCALE_STATS_INDEX = 0;
const DIAGNOSTICS_INDEX = 1;

/** A Megascale Stats page component. */
@Component({
Expand All @@ -22,8 +24,16 @@ const MEGASCALE_STATS_INDEX = 0;
styleUrls: ['./megascale_stats.scss']
})
export class MegascaleStats extends Dashboard implements OnDestroy {
tool = 'megascale_stats';
/** Handles on-destroy Subject, used to unsubscribe. */
private readonly destroyed = new ReplaySubject<void>(1);
private readonly dataService: DataServiceV2Interface =
inject(DATA_SERVICE_INTERFACE_TOKEN);

sessionId = '';
host = '';

diagnostics: Diagnostics = {info: [], warnings: [], errors: []};

dataInfo: ChartDataInfo = {
data: null,
Expand All @@ -35,28 +45,30 @@ export class MegascaleStats extends Dashboard implements OnDestroy {
},
};

constructor(
route: ActivatedRoute, private readonly dataService: DataService,
private readonly store: Store<{}>) {
constructor(route: ActivatedRoute, private readonly store: Store<{}>) {
super();
route.params.pipe(takeUntil(this.destroyed)).subscribe((params) => {
this.update(params as NavigationEvent);
this.processQuery(params);
this.update();
});
this.store.dispatch(setCurrentToolStateAction({currentTool: this.tool}));
}

update(event: NavigationEvent) {
const run = event.run || '';
const tag = event.tag || 'megascale_stats';
const host = event.host || '';
processQuery(params: Params) {
this.sessionId = params['run'] || params['sessionId'] || this.sessionId;
this.tool = params['tag'] || this.tool;
this.host = params['host'] || this.host;
}

update() {
this.store.dispatch(setLoadingStateAction({
loadingState: {
loading: true,
message: 'Loading Megascale Stats data',
}
}));

this.dataService.getData(run, tag, host)
this.dataService.getData(this.sessionId, this.tool, this.host)
.pipe(takeUntil(this.destroyed))
.subscribe((data) => {
this.store.dispatch(setLoadingStateAction({
Expand All @@ -66,17 +78,33 @@ export class MegascaleStats extends Dashboard implements OnDestroy {
}
}));

const d = data as SimpleDataTable[] | null;
if (d && d.hasOwnProperty(MEGASCALE_STATS_INDEX)) {
this.parseData(d[MEGASCALE_STATS_INDEX]);
this.dataInfo = {
...this.dataInfo,
data: d[MEGASCALE_STATS_INDEX],
};
if (data) {
const d = data as SimpleDataTable[] | null;
if (d) {
if (d.hasOwnProperty(DIAGNOSTICS_INDEX)) {
this.diagnostics = parseDiagnosticsDataTable(
d[DIAGNOSTICS_INDEX],
);
}
if (d.hasOwnProperty(MEGASCALE_STATS_INDEX)) {
this.parseData(d[MEGASCALE_STATS_INDEX]);
this.dataInfo = {
...this.dataInfo,
data: d[MEGASCALE_STATS_INDEX],
};
}
}
}
});
}

override updateView() {
this.dataInfo = {
...this.dataInfo,
filters: this.getFilters(),
};
}

ngOnDestroy() {
// Unsubscribes all pending subscriptions.
this.destroyed.next();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import {NgModule} from '@angular/core';
import {ChartModule} from 'org_xprof/frontend/app/components/chart/chart';
import {CategoryFilterModule} from 'org_xprof/frontend/app/components/controls/category_filter/category_filter_module';
import {ExportAsCsvModule} from 'org_xprof/frontend/app/components/controls/export_as_csv/export_as_csv_module';
import {DiagnosticsViewModule} from 'org_xprof/frontend/app/components/diagnostics_view/diagnostics_view_module';

import {MegascaleStats} from './megascale_stats';

Expand All @@ -8,6 +11,9 @@ import {MegascaleStats} from './megascale_stats';
declarations: [MegascaleStats],
imports: [
ChartModule,
CategoryFilterModule,
DiagnosticsViewModule,
ExportAsCsvModule,
],
exports: [MegascaleStats],
})
Expand Down