From 33dcae2dd5cc1a8fe3f0206e61ff33a9801e826d Mon Sep 17 00:00:00 2001 From: saibulusu Date: Thu, 28 May 2026 15:28:58 -0700 Subject: [PATCH 1/8] migrating stream from internal --- .../TestResources/Results_StreamMsft.txt | 18 + .../StreamProfileTests.cs | 285 ++++++++++ .../Examples/Stream/StreamExample.txt | 33 ++ .../Examples/Stream/StreamExampleWindows.txt | 34 ++ .../Examples/Stream/StreamInvalidExample.txt | Bin 0 -> 56 bytes .../StreamInvalidMetricCountExample.txt | 29 ++ .../Examples/Stream/StreamMsftExample.txt | 18 + .../Stream/StreamMsftLatencyExample.txt | 9 + .../Stream/MsftStreamMetricsParserTests.cs | 126 +++++ .../Stream/StreamExecutorTests.cs | 454 ++++++++++++++++ .../Stream/StreamMetricsParserTests.cs | 81 +++ .../Stream/StreamExecutor.cs | 492 ++++++++++++++++++ .../Stream/StreamMetricsParser.cs | 112 ++++ .../Stream/StreamMsftMetricsParser.cs | 128 +++++ .../profiles/PERF-MEM-STREAM.json | 68 +++ .../profiles/PERF-MEM-STREAMMSFT.json | 53 ++ .../profiles/PERF-MEM-STREAMTRIAD.json | 36 ++ .../workloads/stream/msftstreammakefile.txt | 19 + .../docs/workloads/stream/stream-metrics.md | 82 +++ .../docs/workloads/stream/stream-profiles.md | 190 +++++++ .../workloads/stream/stream-supplemental.md | 110 ++++ website/docs/workloads/stream/stream.md | 70 +++ 22 files changed, 2447 insertions(+) create mode 100644 src/VirtualClient/TestResources/Results_StreamMsft.txt create mode 100644 src/VirtualClient/VirtualClient.Actions.FunctionalTests/StreamProfileTests.cs create mode 100644 src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamExample.txt create mode 100644 src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamExampleWindows.txt create mode 100644 src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamInvalidExample.txt create mode 100644 src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamInvalidMetricCountExample.txt create mode 100644 src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamMsftExample.txt create mode 100644 src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamMsftLatencyExample.txt create mode 100644 src/VirtualClient/VirtualClient.Actions.UnitTests/Stream/MsftStreamMetricsParserTests.cs create mode 100644 src/VirtualClient/VirtualClient.Actions.UnitTests/Stream/StreamExecutorTests.cs create mode 100644 src/VirtualClient/VirtualClient.Actions.UnitTests/Stream/StreamMetricsParserTests.cs create mode 100644 src/VirtualClient/VirtualClient.Actions/Stream/StreamExecutor.cs create mode 100644 src/VirtualClient/VirtualClient.Actions/Stream/StreamMetricsParser.cs create mode 100644 src/VirtualClient/VirtualClient.Actions/Stream/StreamMsftMetricsParser.cs create mode 100644 src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAM.json create mode 100644 src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMMSFT.json create mode 100644 src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMTRIAD.json create mode 100644 website/docs/workloads/stream/msftstreammakefile.txt create mode 100644 website/docs/workloads/stream/stream-metrics.md create mode 100644 website/docs/workloads/stream/stream-profiles.md create mode 100644 website/docs/workloads/stream/stream-supplemental.md create mode 100644 website/docs/workloads/stream/stream.md diff --git a/src/VirtualClient/TestResources/Results_StreamMsft.txt b/src/VirtualClient/TestResources/Results_StreamMsft.txt new file mode 100644 index 0000000000..fe42ea8fb0 --- /dev/null +++ b/src/VirtualClient/TestResources/Results_StreamMsft.txt @@ -0,0 +1,18 @@ +Read and write vector size is 64.0 MB +Scale and Copy vector size is 128.0 MB +Triad and Add vector size is 192.0 MB +Test executed with 1 cores +Latency Array size is 32.000 MB per core so total size is 32.000 MB +Number of threads is 1 +Loops are using NEON instructions +Number of latency threads is 1 +Number of Iterations is 6 +Number of Internal Iterations is 1 for BW +Number of Internal Iterations is 1 for Latency +Function Best Rate MB/s +Read: 144 141 140 113 110 114 +Copy: 277 272 265 117 115 120 +Scale: 275 270 268 118 117 119 +Add: 419 412 407 115 114 116 +Triad: 415 411 405 116 114 116 +Write: 128 126 124 127 125 129 diff --git a/src/VirtualClient/VirtualClient.Actions.FunctionalTests/StreamProfileTests.cs b/src/VirtualClient/VirtualClient.Actions.FunctionalTests/StreamProfileTests.cs new file mode 100644 index 0000000000..ccb8d91e46 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Actions.FunctionalTests/StreamProfileTests.cs @@ -0,0 +1,285 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +namespace VirtualClient.Actions +{ + using System; + using System.Collections.Generic; + using System.Linq; + using System.Runtime.InteropServices; + using System.Threading; + using System.Threading.Tasks; + using Moq; + using NUnit.Framework; + using VirtualClient; + using VirtualClient.Common; + using VirtualClient.Contracts; + + [TestFixture] + [Category("Functional")] + public class StreamProfileTests + { + private DependencyFixture mockFixture; + + [OneTimeSetUp] + public void SetupFixture() + { + this.mockFixture = new DependencyFixture(); + ComponentTypeCache.Instance.LoadComponentTypes(TestDependencies.TestDirectory); + } + + [Test] + [TestCase("PERF-MEM-STREAMTRIAD.json")] + public void StreamTriadWorkloadProfileParametersAreInlinedCorrectly(string profile) + { + this.mockFixture.Setup(PlatformID.Unix); + using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) + { + WorkloadAssert.ParameterReferencesInlined(executor.Profile); + } + } + + [Test] + [TestCase("PERF-MEM-STREAMTRIAD.json")] + public async Task StreamTriadWorkloadProfileExecutesTheExpectedWorkloadsOnUnixPlatform(string profile) + { + IEnumerable expectedCommands = StreamProfileTests.GetStreamTriadProfileExpectedCommands(); + + // Setup the expectations for the workload + // - Workload package is installed and exists. + // - The workload generates valid results. + this.mockFixture.Setup(PlatformID.Unix); + this.mockFixture.SystemManagement.Setup(mgr => mgr.GetCpuInfoAsync(It.IsAny())) + .ReturnsAsync(new CpuInfo("Name", "Description", 1, 2, 1, 1, true)); + + this.mockFixture.SetupPackage("stream", expectedFiles: "linux-x64/stream"); + + this.mockFixture.ProcessManager.OnCreateProcess = (command, arguments, workingDir) => + { + IProcessProxy process = this.mockFixture.CreateProcess(command, arguments, workingDir); + if (arguments.Contains("Stream", StringComparison.OrdinalIgnoreCase)) + { + process.StandardOutput.Append(TestDependencies.GetResourceFileContents("Results_Stream.txt")); + } + else if (arguments.Contains("lscpu | grep 'Flags'")) + { + process.StandardOutput.AppendLine("Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves avx512vbmi"); + } + + return process; + }; + + using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) + { + executor.ExecuteDependencies = false; + await executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None).ConfigureAwait(false); + await executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None).ConfigureAwait(false); + + WorkloadAssert.CommandsExecuted(this.mockFixture, expectedCommands.ToArray()); + } + } + + [Test] + [Ignore("We need to rethink how to do dependency testing with extension model.")] + [TestCase("PERF-MEM-STREAMTRIAD.json")] + public async Task StreamTriadWorkloadProfileInstallsTheExpectedDependenciesOnUnixPlatform(string profile) + { + // The setup in a typical Azure VM scenario + this.mockFixture.Setup(PlatformID.Unix); + + using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies, dependenciesOnly: true)) + { + executor.ExecuteDependencies = false; + await executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None).ConfigureAwait(false); + + // Workload dependency package expectations + // The workload dependency package should have been installed at this point. + WorkloadAssert.WorkloadPackageInstalled(this.mockFixture, "stream"); + } + } + + [Test] + [TestCase("PERF-MEM-STREAMTRIAD.json")] + public void StreamTriadProfileActionsWillNotBeExecutedIfTheWorkloadPackageDoesNotExist(string profile) + { + this.mockFixture.Setup(PlatformID.Unix); + this.mockFixture.SystemManagement.Setup(mgr => mgr.GetCpuInfoAsync(It.IsAny())) + .ReturnsAsync(new CpuInfo("Name", "Description", 1, 2, 1, 1, true)); + + // We ensure the workload package does not exist. + this.mockFixture.PackageManager.Clear(); + + using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) + { + executor.ExecuteDependencies = false; + + DependencyException error = Assert.ThrowsAsync(() => executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None)); + Assert.IsTrue(error.Reason == ErrorReason.WorkloadDependencyMissing); + } + } + + [Test] + [TestCase("PERF-MEM-STREAMMSFT.json")] + public void StreamMsftWorkloadProfileParametersAreInlinedCorrectly(string profile) + { + this.mockFixture.Setup(PlatformID.Unix); + using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) + { + WorkloadAssert.ParameterReferencesInlined(executor.Profile); + } + } + + [Test] + [TestCase("PERF-MEM-STREAMMSFT.json")] + public async Task StreamMsftWorkloadProfileExecutesTheExpectedWorkloadsOnUnixPlatform(string profile) + { + IEnumerable expectedCommands = StreamProfileTests.GetStreamMsftProfileExpectedCommands(); + + // Setup the expectations for the workload + // - Workload package is installed and exists. + // - The workload generates valid results. + this.mockFixture.Setup(PlatformID.Unix, Architecture.Arm64); + this.mockFixture.SetupPackage("streammsft", expectedFiles: "linux-arm64/stream"); + + this.mockFixture.SystemManagement.Setup(mgr => mgr.GetCpuInfoAsync(It.IsAny())) + .ReturnsAsync(new CpuInfo("Name", "Description", 1, 2, 1, 1, true)); + + this.mockFixture.ProcessManager.OnCreateProcess = (command, arguments, workingDir) => + { + IProcessProxy process = this.mockFixture.CreateProcess(command, arguments, workingDir); + if (arguments.Contains("perfrunner", StringComparison.OrdinalIgnoreCase)) + { + process.StandardOutput.Append(TestDependencies.GetResourceFileContents("Results_StreamMsft.txt")); + } + else if (arguments.Contains("make", StringComparison.OrdinalIgnoreCase)) + { + // Make command should succeed without output + } + + return process; + }; + + using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) + { + executor.ExecuteDependencies = false; + await executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None).ConfigureAwait(false); + + WorkloadAssert.CommandsExecuted(this.mockFixture, expectedCommands.ToArray()); + } + } + + [Test] + [TestCase("PERF-MEM-STREAM.json")] + public void StreamWorkloadProfileParametersAreInlinedCorrectly(string profile) + { + this.mockFixture.Setup(PlatformID.Unix); + using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) + { + WorkloadAssert.ParameterReferencesInlined(executor.Profile); + } + } + + [Test] + [TestCase("PERF-MEM-STREAM.json")] + public async Task StreamWorkloadProfileExecutesTheExpectedWorkloadsOnUnixPlatform(string profile) + { + IEnumerable expectedCommands = StreamProfileTests.GetStreamProfileExpectedCommands(); + + // Setup the expectations for the workload + // - Workload package is installed and exists. + // - The workload generates valid results. + this.mockFixture.Setup(PlatformID.Unix); + this.mockFixture.SetupPackage("stream", expectedFiles: "linux-x64/stream"); + this.mockFixture.SystemManagement.Setup(mgr => mgr.GetCpuInfoAsync(It.IsAny())) + .ReturnsAsync(new CpuInfo("Name", "Description", 1, 2, 1, 1, true)); + + this.mockFixture.ProcessManager.OnCreateProcess = (command, arguments, workingDir) => + { + IProcessProxy process = this.mockFixture.CreateProcess(command, arguments, workingDir); + if (arguments.Contains("streamworkload", StringComparison.OrdinalIgnoreCase)) + { + process.StandardOutput.Append(TestDependencies.GetResourceFileContents("Results_Stream.txt")); + } + else if (arguments.Contains("gcc", StringComparison.OrdinalIgnoreCase)) + { + // Compilation command - no output needed + } + + return process; + }; + + using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) + { + executor.ExecuteDependencies = false; + await executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None).ConfigureAwait(false); + + await executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None).ConfigureAwait(false); + + WorkloadAssert.CommandsExecuted(this.mockFixture, expectedCommands.ToArray()); + } + } + + [Test] + [TestCase("PERF-MEM-STREAM.json")] + public void StreamProfileActionsWillNotBeExecutedIfTheWorkloadPackageDoesNotExist(string profile) + { + this.mockFixture.Setup(PlatformID.Unix); + this.mockFixture.SystemManagement.Setup(mgr => mgr.GetCpuInfoAsync(It.IsAny())) + .ReturnsAsync(new CpuInfo("Name", "Description", 1, 2, 1, 1, true)); + + // We ensure the workload package does not exist. + this.mockFixture.PackageManager.Clear(); + + this.mockFixture.ProcessManager.OnCreateProcess = (exe, arguments, workingDir) => + { + IProcessProxy process = this.mockFixture.CreateProcess(exe, arguments, workingDir); + process.StandardOutput.AppendLine("gcc (Ubuntu 10.3.0-1ubuntu1~20.04) 10.3.0"); + process.StandardOutput.AppendLine("cc (Ubuntu 10.3.0-1ubuntu1~20.04) 10.3.0"); + return process; + }; + + using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) + { + executor.ExecuteDependencies = false; + + DependencyException error = Assert.ThrowsAsync(() => executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None)); + Assert.IsTrue(error.Reason == ErrorReason.WorkloadDependencyMissing); + } + } + + private static IEnumerable GetStreamTriadProfileExpectedCommands() + { + return new List + { + "bash -c \"lscpu \\| grep 'Flags'\"", + "bash -c \"export KMP_AFFINITY=.*&& export OMP_NUM_THREADS=.*&& export LD_LIBRARY_PATH=.*&& chmod \\+x.*&&.*Stream.*\"" + }; + } + + private static IEnumerable GetStreamProfileExpectedCommands() + { + return new List + { + "bash -c \"gcc.*stream\\.c.*-o.*streamworkload.*\"", + "bash -c \"export OMP_NUM_THREADS=.*&&.*chmod.*\\+x.*streamworkload.*&&.*streamworkload.*\"", + }; + } + + private static IEnumerable GetStreamMsftProfileExpectedCommands() + { + return new List + { + "bash.*make", + "bash.*perfrunner.*--threads.*--internal-iter", + }; + } + + private static IEnumerable GetStreamWindowsProfileExpectedCommands() + { + return new List + { + "cmd\\.exe.*stream\\.exe.*-n 50.*-s 320000000", + }; + } + } +} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamExample.txt b/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamExample.txt new file mode 100644 index 0000000000..d6fbf9c890 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamExample.txt @@ -0,0 +1,33 @@ +------------------------------------------------------------- +STREAM version $Revision: 5.10 $ +------------------------------------------------------------- +This system uses 8 bytes per array element. +------------------------------------------------------------- +Array size = 268435456 (elements), Offset = 0 (elements) +Memory per array = 2048.0 MiB (= 2.0 GiB). +Total memory required = 6144.0 MiB (= 6.0 GiB). +Each kernel will be executed 100 times. + The *best* time for each kernel (excluding the first iteration) + will be used to compute the reported bandwidth. +------------------------------------------------------------- +Number of Threads requested = 4 +Number of Threads counted = 4 +------------------------------------------------------------- +Your clock granularity/precision appears to be 1 microseconds. +Each test below will take on the order of 161936 microseconds. + (= 161936 clock ticks) +Increase the size of the arrays if this shows that +you are not getting at least 20 clock ticks per test. +------------------------------------------------------------- +WARNING -- The above is only a rough guideline. +For best results, please be sure you know the +precision of your system timer. +------------------------------------------------------------- +Function Best Rate MB/s Avg time Min time Max time +Copy: 18514.5 0.242368 0.231979 0.317779 +Scale: 18333.8 0.244441 0.234265 0.356043 +Add: 23043.7 0.293571 0.279575 0.377037 +Triad: 23314.0 0.291828 0.276334 0.337149 +------------------------------------------------------------- +Solution Validates: avg error less than 1.000000e-13 on all three arrays +------------------------------------------------------------- diff --git a/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamExampleWindows.txt b/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamExampleWindows.txt new file mode 100644 index 0000000000..4cb2355a4d --- /dev/null +++ b/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamExampleWindows.txt @@ -0,0 +1,34 @@ +------------------------------------------------------------- +STREAM version $Revision: 5.10 $ +------------------------------------------------------------- +This system uses 8 bytes per array element. +------------------------------------------------------------- +Array size = 320000000 (elements), Offset = 0 (elements) +Memory per array = 2441.4 MiB (= 2.4 GiB). +Total memory required = 7324.2 MiB (= 7.2 GiB). +Each kernel will be executed 50 times. + The *best* time for each kernel (excluding the first iteration) + will be used to compute the reported bandwidth. +------------------------------------------------------------- +Number of Threads requested = 8 +Number of Threads counted = 8 +------------------------------------------------------------- +Your clock granularity/precision appears to be 100 nanoseconds. +Each test below will take on the order of 29879 microseconds. + (= 298790 clock ticks) +Increase the size of the arrays if this shows that +you are not getting at least 20 clock ticks per test. +------------------------------------------------------------- +WARNING -- The above is only a rough guideline. +For best results, please be sure you know the +precision of your system timer. +------------------------------------------------------------- +| Function | Avg (MB/s) | Best (MB/s) | Worst (MB/s) | Avg time | Min time | Max time | +|----------|------------|-------------|--------------|----------|----------|----------| +| Copy | 42156.3 | 42890.7 | 41234.5 | 0.122345 | 0.119876 | 0.124567 | +| Scale | 41987.2 | 42678.9 | 41098.3 | 0.123456 | 0.120987 | 0.125678 | +| Add | 43521.8 | 44234.6 | 42765.4 | 0.165432 | 0.162345 | 0.168765 | +| Triad | 43789.5 | 44512.3 | 42987.6 | 0.164567 | 0.161234 | 0.167890 | +------------------------------------------------------------- +Solution Validates: avg error less than 1.000000e-13 on all three arrays +------------------------------------------------------------- diff --git a/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamInvalidExample.txt b/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamInvalidExample.txt new file mode 100644 index 0000000000000000000000000000000000000000..98f71b34476ba58172d6542f2b2a8dcf648f7a4a GIT binary patch literal 56 zcmezWFPNc(p@<=sA(0`ML4hFz$j$_^Qy6#|xEMSc@)*j1yd0od3Q!irFJLGI>Hq+D CSqqE+ literal 0 HcmV?d00001 diff --git a/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamInvalidMetricCountExample.txt b/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamInvalidMetricCountExample.txt new file mode 100644 index 0000000000..9d0e5969e4 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamInvalidMetricCountExample.txt @@ -0,0 +1,29 @@ +------------------------------------------------------------- +STREAM version $Revision: 5.10 $ +------------------------------------------------------------- +This system uses 8 bytes per array element. +------------------------------------------------------------- +Array size = 268435456 (elements), Offset = 0 (elements) +Memory per array = 2048.0 MiB (= 2.0 GiB). +Total memory required = 6144.0 MiB (= 6.0 GiB). +Each kernel will be executed 100 times. + The *best* time for each kernel (excluding the first iteration) + will be used to compute the reported bandwidth. +------------------------------------------------------------- +Number of Threads requested = 4 +Number of Threads counted = 4 +------------------------------------------------------------- +Your clock granularity/precision appears to be 1 microseconds. +Each test below will take on the order of 161936 microseconds. + (= 161936 clock ticks) +Increase the size of the arrays if this shows that +you are not getting at least 20 clock ticks per test. +------------------------------------------------------------- +WARNING -- The above is only a rough guideline. +For best results, please be sure you know the +precision of your system timer. +------------------------------------------------------------- +Function Best Rate MB/s Avg time Min time Max time +------------------------------------------------------------- +Solution Validates: avg error less than 1.000000e-13 on all three arrays +------------------------------------------------------------- diff --git a/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamMsftExample.txt b/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamMsftExample.txt new file mode 100644 index 0000000000..fe42ea8fb0 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamMsftExample.txt @@ -0,0 +1,18 @@ +Read and write vector size is 64.0 MB +Scale and Copy vector size is 128.0 MB +Triad and Add vector size is 192.0 MB +Test executed with 1 cores +Latency Array size is 32.000 MB per core so total size is 32.000 MB +Number of threads is 1 +Loops are using NEON instructions +Number of latency threads is 1 +Number of Iterations is 6 +Number of Internal Iterations is 1 for BW +Number of Internal Iterations is 1 for Latency +Function Best Rate MB/s +Read: 144 141 140 113 110 114 +Copy: 277 272 265 117 115 120 +Scale: 275 270 268 118 117 119 +Add: 419 412 407 115 114 116 +Triad: 415 411 405 116 114 116 +Write: 128 126 124 127 125 129 diff --git a/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamMsftLatencyExample.txt b/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamMsftLatencyExample.txt new file mode 100644 index 0000000000..9fe3903da8 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Actions.UnitTests/Examples/Stream/StreamMsftLatencyExample.txt @@ -0,0 +1,9 @@ +Latency Array size is 32.000 MB per core so total size is 32.000 MB +Number of threads is 0 +Loops are using NEON instructions +Number of latency threads is 1 +Number of Iterations is 6 +Number of Internal Iterations is 1 for BW +Number of Internal Iterations is 1 for Latency +Function Best Latency ns +LATENCY 108.97 108.00 111.43 diff --git a/src/VirtualClient/VirtualClient.Actions.UnitTests/Stream/MsftStreamMetricsParserTests.cs b/src/VirtualClient/VirtualClient.Actions.UnitTests/Stream/MsftStreamMetricsParserTests.cs new file mode 100644 index 0000000000..bf6baa1258 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Actions.UnitTests/Stream/MsftStreamMetricsParserTests.cs @@ -0,0 +1,126 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +namespace VirtualClient.Actions +{ + using System.Collections.Generic; + using System.IO; + using System.Reflection; + using VirtualClient.Contracts; + using NUnit.Framework; + using VirtualClient; + + [TestFixture] + [Category("Unit")] + internal class StreamMsftResultsParserUnitTests + { + private string rawText; + private StreamMsftMetricsParser testParser; + + private string ExamplePath + { + get + { + string workingDirectory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); + return Path.Combine(workingDirectory, "Examples", "Stream"); + } + } + + [Test] + public void StreamMsftResultsParserVerifyMetricsScenarioBandwidth() + { + // Read: 144 141 140 113 110 114 + // Copy: 277 272 265 117 115 120 + // Scale: 275 270 268 118 117 119 + // Add: 419 412 407 115 114 116 + // Triad: 415 411 405 116 114 116 + // Write: 128 126 124 127 125 129 + + string outputPath = Path.Combine(this.ExamplePath, "StreamMsftExample.txt"); + this.rawText = File.ReadAllText(outputPath); + this.testParser = new StreamMsftMetricsParser(this.rawText); + IList metrics = this.testParser.Parse(); + + Assert.AreEqual(36, metrics.Count); + MetricAssert.Exists(metrics, "Best Rate Read", 144, "MBps"); + MetricAssert.Exists(metrics, "Best Rate Scale", 275, "MBps"); + MetricAssert.Exists(metrics, "Best Rate Add", 419, "MBps"); + MetricAssert.Exists(metrics, "Best Rate Triad", 415, "MBps"); + MetricAssert.Exists(metrics, "Best Rate Write", 128, "MBps"); + MetricAssert.Exists(metrics, "Best Rate Copy", 277, "MBps"); + + MetricAssert.Exists(metrics, "Avg Rate Read", 141, "MBps"); + MetricAssert.Exists(metrics, "Avg Rate Scale", 270, "MBps"); + MetricAssert.Exists(metrics, "Avg Rate Add", 412, "MBps"); + MetricAssert.Exists(metrics, "Avg Rate Triad", 411, "MBps"); + MetricAssert.Exists(metrics, "Avg Rate Write", 126, "MBps"); + MetricAssert.Exists(metrics, "Avg Rate Copy", 272, "MBps"); + + MetricAssert.Exists(metrics, "Min Rate Read", 140, "MBps"); + MetricAssert.Exists(metrics, "Min Rate Scale", 268, "MBps"); + MetricAssert.Exists(metrics, "Min Rate Add", 407, "MBps"); + MetricAssert.Exists(metrics, "Min Rate Triad", 405, "MBps"); + MetricAssert.Exists(metrics, "Min Rate Write", 124, "MBps"); + MetricAssert.Exists(metrics, "Min Rate Copy", 265, "MBps"); + + + MetricAssert.Exists(metrics, "Avg Latency Read", 113, "ns"); + MetricAssert.Exists(metrics, "Avg Latency Scale", 118, "ns"); + MetricAssert.Exists(metrics, "Avg Latency Add", 115, "ns"); + MetricAssert.Exists(metrics, "Avg Latency Triad", 116, "ns"); + MetricAssert.Exists(metrics, "Avg Latency Write", 127, "ns"); + MetricAssert.Exists(metrics, "Avg Latency Copy", 117, "ns"); + + MetricAssert.Exists(metrics, "Min Latency Read", 110, "ns"); + MetricAssert.Exists(metrics, "Min Latency Scale", 117, "ns"); + MetricAssert.Exists(metrics, "Min Latency Add", 114, "ns"); + MetricAssert.Exists(metrics, "Min Latency Triad", 114, "ns"); + MetricAssert.Exists(metrics, "Min Latency Write", 125, "ns"); + MetricAssert.Exists(metrics, "Min Latency Copy", 115, "ns"); + + MetricAssert.Exists(metrics, "Max Latency Read", 114, "ns"); + MetricAssert.Exists(metrics, "Max Latency Scale", 119, "ns"); + MetricAssert.Exists(metrics, "Max Latency Add", 116, "ns"); + MetricAssert.Exists(metrics, "Max Latency Triad", 116, "ns"); + MetricAssert.Exists(metrics, "Max Latency Write", 129, "ns"); + MetricAssert.Exists(metrics, "Max Latency Copy", 120, "ns"); + } + + [Test] + public void StreamResultsParserVerifyMetricsScenarioLatency() + { + + // LATENCY 108.97 108.00 111.43 + string outputPath = Path.Combine(this.ExamplePath, "StreamMsftLatencyExample.txt"); + this.rawText = File.ReadAllText(outputPath); + this.testParser = new StreamMsftMetricsParser(this.rawText); + IList metrics = this.testParser.Parse(); + + Assert.AreEqual(3, metrics.Count); + MetricAssert.Exists(metrics, "Avg Latency ns", 108.97, "ns"); + MetricAssert.Exists(metrics, "Min Latency ns", 108.00, "ns"); + MetricAssert.Exists(metrics, "Max Latency ns", 111.43, "ns"); + + } + + [Test] + public void StreamResultParserThrowsOnInvalidOutputFormat() + { + string InvalidOutputPath = Path.Combine(this.ExamplePath, "StreamInvalidExample.txt"); + string rawText = File.ReadAllText(InvalidOutputPath); + this.testParser = new StreamMsftMetricsParser(rawText); + SchemaException exception = Assert.Throws(() => this.testParser.Parse()); + StringAssert.Contains("The Stream results has incorrect format/data for parsing", exception.Message); + } + + [Test] + public void StreamResultParserThrowsOnInvalidMetricsCount() + { + string InvalidMetricsCountOutputPath = Path.Combine(this.ExamplePath, "StreamInvalidMetricCountExample.txt"); + string rawText = File.ReadAllText(InvalidMetricsCountOutputPath); + this.testParser = new StreamMsftMetricsParser(rawText); + SchemaException exception = Assert.Throws(() => this.testParser.Parse()); + StringAssert.Contains($"The Stream results has incorrect format/data for parsing. Output is having 0 metrics.", exception.Message); + } + } +} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Actions.UnitTests/Stream/StreamExecutorTests.cs b/src/VirtualClient/VirtualClient.Actions.UnitTests/Stream/StreamExecutorTests.cs new file mode 100644 index 0000000000..43a5399f7c --- /dev/null +++ b/src/VirtualClient/VirtualClient.Actions.UnitTests/Stream/StreamExecutorTests.cs @@ -0,0 +1,454 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +namespace VirtualClient.Actions +{ + using System; + using System.Collections.Generic; + using System.Diagnostics; + using System.Runtime.InteropServices; + using System.Text; + using System.Text.RegularExpressions; + using System.Threading; + using System.Threading.Tasks; + using VirtualClient; + using VirtualClient.Common; + using VirtualClient.Contracts; + using Moq; + using NUnit.Framework; + + [TestFixture] + [Category("Unit")] + public class StreamExecutorTests + { + private static readonly string ExamplesDirectory = MockFixture.GetDirectory(typeof(StreamExecutorTests), "Examples", "Stream"); + + private MockFixture mockFixture; + private DependencyPath mockPackage; + private ConcurrentBuffer defaultOutput = new ConcurrentBuffer(); + private IProcessProxy defaultMemoryProcess; + + public void SetupTest(PlatformID platformID, Architecture cpuArchiture = Architecture.X64) + { + this.mockFixture = new MockFixture(); + this.mockFixture.Setup(platformID, cpuArchiture); + this.mockPackage = new DependencyPath("stream", this.mockFixture.PlatformSpecifics.GetPackagePath("stream")); + this.mockFixture.SetupPackage(this.mockPackage); + + this.mockFixture.Parameters["PackageName"] = this.mockPackage.Name; + this.mockFixture.Parameters["Toolset"] = "STREAM"; + this.mockFixture.Parameters["CompilerVersion"] = 10; + this.mockFixture.Parameters["CompilerParameters"] = "-fopenmp -mcmodel=large -D_OPENMP -DNTIMES=5000 -DSTREAM_ARRAY_SIZE=100000000"; + this.mockFixture.Parameters["ThreadCount"] = 1; + + string exampleResults = MockFixture.ReadFile(StreamExecutorTests.ExamplesDirectory, "StreamExample.txt"); + + this.defaultOutput.Clear(); + this.defaultOutput.Append(exampleResults); + + this.mockFixture.SystemManagement.Setup(mgr => mgr.GetCpuInfoAsync(It.IsAny())) + .ReturnsAsync(new CpuInfo("Name", "Description", 1, 2, 1, 1, hyperThreadingEnabled: true)); + + this.defaultMemoryProcess = new InMemoryProcess + { + StartInfo = new ProcessStartInfo + { + FileName = "exe", + Arguments = "args" + }, + ExitCode = 0, + OnStart = () => true, + OnHasExited = () => true, + StandardOutput = this.defaultOutput + }; + } + + [Test] + public async Task StreamExecutorDefaultScenarioRunsExpectedCommandsOnWindowsx64() + { + this.SetupTest(PlatformID.Win32NT, Architecture.X64); + + this.mockFixture.Parameters[nameof(StreamExecutor.ThreadCount)] = "2"; + + this.mockFixture.File.Setup(f => f.Exists(It.IsAny())).Returns(true); + + string packagePath = this.mockFixture.PlatformSpecifics.ToPlatformSpecificPath(this.mockPackage, PlatformID.Win32NT, Architecture.X64).Path; + + string executablePath = this.mockFixture.PlatformSpecifics.Combine(packagePath, "stream.exe"); + + List commandsExpected = new List + { + $"{executablePath} -n 50 -s 320000000" + }; + + string windowsResults = MockFixture.ReadFile(StreamExecutorTests.ExamplesDirectory, "StreamExampleWindows.txt"); + + StringBuilder builder = new StringBuilder(); + this.mockFixture.ProcessManager.OnProcessCreated = (process) => + { + string fullCommand = process.FullCommand(); + builder.AppendLine(fullCommand); + commandsExpected.Remove(fullCommand); + + if (fullCommand.Contains("stream.exe", StringComparison.OrdinalIgnoreCase)) + { + if (process.StandardOutput != null) + { + process.StandardOutput.Clear(); + process.StandardOutput.Append(windowsResults); + } + } + }; + + using (StreamExecutor streamExecutor = new StreamExecutor(this.mockFixture.Dependencies, this.mockFixture.Parameters)) + { + await streamExecutor.ExecuteAsync(CancellationToken.None).ConfigureAwait(false); + } + + string here = builder.ToString(); + Assert.IsEmpty(commandsExpected, $"Remaining commands not matched. Count: {commandsExpected.Count}\nObserved:\n{here}"); + } + + [Test] + public void StreamExecutorThrowsIfANonSupportedToolsetIsProvidedOnWindows() + { + this.SetupTest(PlatformID.Win32NT, Architecture.X64); + this.mockFixture.Parameters["Toolset"] = "STREAMTriad"; + + this.mockFixture.File.Setup(f => f.Exists(It.IsAny())).Returns(true); + + using (StreamExecutor streamExecutor = new StreamExecutor(this.mockFixture.Dependencies, this.mockFixture.Parameters)) + { + var exp = Assert.ThrowsAsync(() => streamExecutor.ExecuteAsync(CancellationToken.None)); + Assert.AreEqual(ErrorReason.InvalidProfileDefinition, exp.Reason); + } + } + + [Test] + public void StreamExecutorThrowsIfANonSupportedToolsetIsProvided() + { + this.SetupTest(PlatformID.Unix, Architecture.X64); + this.mockFixture.Parameters["Toolset"] = "NotSupported"; + + using (StreamExecutor streamExecutor = new StreamExecutor(this.mockFixture.Dependencies, this.mockFixture.Parameters)) + { + var exp = Assert.ThrowsAsync(() => streamExecutor.ExecuteAsync(CancellationToken.None)); + Assert.AreEqual(ErrorReason.InvalidProfileDefinition, exp.Reason); + } + } + + [Test] + public void StreamExecutorTriadScenarioThrowsOnNonSupportedArchitecture() + { + this.SetupTest(PlatformID.Unix, Architecture.Arm64); + + this.mockFixture.Parameters["PackageName"] = "Stream"; + this.mockFixture.Parameters["Toolset"] = "STREAMTriad"; + this.mockFixture.Parameters[nameof(StreamExecutor.ThreadCount)] = "2"; + this.mockFixture.PackageManager.OnGetPackage().ReturnsAsync(this.mockPackage); + + using (StreamExecutor streamExecutor = new StreamExecutor(this.mockFixture.Dependencies, this.mockFixture.Parameters)) + { + WorkloadException error = Assert.ThrowsAsync(() => streamExecutor.ExecuteAsync(CancellationToken.None)); + Assert.IsTrue(error.Reason == ErrorReason.PlatformNotSupported); + } + } + + [Test] + public void StreamExecutorMsftScenarioThrowsOnNonSupportedArchitecture() + { + this.SetupTest(PlatformID.Unix, Architecture.X64); + + this.mockFixture.Parameters["PackageName"] = "Stream"; + this.mockFixture.Parameters["Toolset"] = "StreamMsft"; + this.mockFixture.Parameters[nameof(StreamExecutor.ThreadCount)] = "2"; + this.mockFixture.PackageManager.OnGetPackage().ReturnsAsync(this.mockPackage); + + using (StreamExecutor streamExecutor = new StreamExecutor(this.mockFixture.Dependencies, this.mockFixture.Parameters)) + { + WorkloadException error = Assert.ThrowsAsync(() => streamExecutor.ExecuteAsync(CancellationToken.None)); + Assert.IsTrue(error.Reason == ErrorReason.PlatformNotSupported); + } + } + + [Test] + public async Task StreamExecutorDefaultScenarioRunsExpectedCommandsOnx64() + { + this.SetupTest(PlatformID.Unix, Architecture.X64); + + ProcessStartInfo expectedInfo = new ProcessStartInfo(); + string packagePath = this.mockPackage.Path; + + List commandsExpected = new List + { + $"sudo bash -c \"gcc {packagePath}/linux-x64/stream.c -o {packagePath}/linux-x64/streamworkload -fopenmp -mcmodel=large -D_OPENMP -DNTIMES=5000 -DSTREAM_ARRAY_SIZE=100000000\"", + $"sudo bash -c \"export OMP_NUM_THREADS=1 && chmod +x {packagePath}/linux-x64/streamworkload && {packagePath}/linux-x64/streamworkload\"" + }; + + StringBuilder builder = new StringBuilder(); + this.mockFixture.ProcessManager.OnProcessCreated = (process) => + { + string fullCommand = process.FullCommand(); + builder.AppendLine(fullCommand); + commandsExpected.Remove(fullCommand); + + if (fullCommand.Contains("streamworkload") && !fullCommand.Contains("gcc")) + { + if (process.StandardOutput != null) + { + process.StandardOutput.Clear(); + process.StandardOutput.Append(this.defaultOutput.ToString()); + } + } + }; + + using (StreamExecutor streamExecutor = new StreamExecutor(this.mockFixture.Dependencies, this.mockFixture.Parameters)) + { + await streamExecutor.ExecuteAsync(CancellationToken.None).ConfigureAwait(false); + } + + string here = builder.ToString(); + Assert.IsEmpty(commandsExpected, $"Remaining commands not matched. Count: {commandsExpected.Count}\nObserved:\n{here}"); + } + + [Test] + public async Task StreamExecutorDefaultScenarioRunsExpectedCommandsOnarm64() + { + this.SetupTest(PlatformID.Unix, Architecture.Arm64); + + ProcessStartInfo expectedInfo = new ProcessStartInfo(); + string packagePath = this.mockPackage.Path; + + List commandsExpected = new List + { + $"sudo bash -c \"gcc {packagePath}/linux-arm64/stream.c -o {packagePath}/linux-arm64/streamworkload -fopenmp -D_OPENMP -DNTIMES=5000 -DSTREAM_ARRAY_SIZE=100000000\"", + $"sudo bash -c \"export OMP_NUM_THREADS=1 && chmod +x {packagePath}/linux-arm64/streamworkload && {packagePath}/linux-arm64/streamworkload\"" + }; + + StringBuilder builder = new StringBuilder(); + this.mockFixture.ProcessManager.OnProcessCreated = (process) => + { + string fullCommand = process.FullCommand(); + builder.AppendLine(fullCommand); + commandsExpected.Remove(fullCommand); + if (fullCommand.Contains("streamworkload") && !fullCommand.Contains("gcc")) + { + if (process.StandardOutput != null) + { + process.StandardOutput.Clear(); + process.StandardOutput.Append(this.defaultOutput.ToString()); + } + } + }; + + using (StreamExecutor streamExecutor = new StreamExecutor(this.mockFixture.Dependencies, this.mockFixture.Parameters)) + { + await streamExecutor.ExecuteAsync(CancellationToken.None).ConfigureAwait(false); + } + + string here = builder.ToString(); + Assert.IsEmpty(commandsExpected); + } + + [Test] + public async Task StreamExecutorMsftScenarioRunsExpectedCommandsOnarm64() + { + this.SetupTest(PlatformID.Unix, Architecture.Arm64); + + ProcessStartInfo expectedInfo = new ProcessStartInfo(); + this.mockFixture.Parameters["Toolset"] = "StreamMsft"; + string packagePath = this.mockPackage.Path; + + List commandsExpected = new List + { + $"sudo bash -c \"make\"", + $"sudo bash -c \"{packagePath}/linux-arm64/perfrunner --threads 1 \"" + }; + + StringBuilder builder = new StringBuilder(); + this.mockFixture.ProcessManager.OnProcessCreated = (process) => + { + string fullCommand = process.FullCommand(); + builder.AppendLine(fullCommand); + commandsExpected.Remove(fullCommand); + if (fullCommand.Contains("perfrunner")) + { + if (process.StandardOutput != null) + { + process.StandardOutput.Clear(); + process.StandardOutput.Append(this.defaultOutput.ToString()); + } + } + }; + + using (StreamExecutor streamExecutor = new StreamExecutor(this.mockFixture.Dependencies, this.mockFixture.Parameters)) + { + await streamExecutor.ExecuteAsync(CancellationToken.None).ConfigureAwait(false); + } + + string here = builder.ToString(); + Assert.IsEmpty(commandsExpected); + } + + [Test] + public async Task StreamExecutorTriadScenarioRunsExpectedCommandsWhenHyperThreadingIsEnabled() + { + this.SetupTest(PlatformID.Unix); + + this.mockFixture.Parameters["Toolset"] = "STREAMTriad"; + ProcessStartInfo expectedInfo = new ProcessStartInfo(); + string packagePath = this.mockPackage.Path; + + List commandsExpected = new List + { + $"sudo bash -c \"lscpu | grep 'Flags'\"", + $"sudo bash -c \"export KMP_AFFINITY=granularity=fine,compact,1,0 && export OMP_NUM_THREADS=1 && export LD_LIBRARY_PATH={packagePath}/linux-x64/icclib && chmod +x {packagePath}/linux-x64/StreamTriad && {packagePath}/linux-x64/StreamTriad\"" + }; + + this.mockFixture.ProcessManager.OnProcessCreated = (process) => + { + string fullCommand = process.FullCommand(); + commandsExpected.Remove(fullCommand); + if (fullCommand.Contains("StreamTriad") && !fullCommand.Contains("lscpu")) + { + if (process.StandardOutput != null) + { + process.StandardOutput.Clear(); + process.StandardOutput.Append(this.defaultOutput.ToString()); + } + } + }; + + using (StreamExecutor streamExecutor = new StreamExecutor(this.mockFixture.Dependencies, this.mockFixture.Parameters)) + { + await streamExecutor.ExecuteAsync(CancellationToken.None).ConfigureAwait(false); + } + + Assert.IsEmpty(commandsExpected); + } + + [Test] + public async Task StreamExecutorTriadScenarioRunsExpectedCommandsWhenHyperThreadingIsNotEnabled() + { + this.SetupTest(PlatformID.Unix); + + this.mockFixture.SystemManagement.Setup(mgr => mgr.GetCpuInfoAsync(It.IsAny())) + .ReturnsAsync(new CpuInfo("Name", "Description", 1, 2, 1, 1, hyperThreadingEnabled: false)); + + this.mockFixture.Parameters["Toolset"] = "STREAMTriad"; + ProcessStartInfo expectedInfo = new ProcessStartInfo(); + + string packagePath = this.mockPackage.Path; + List commandsExpected = new List + { + $"sudo bash -c \"lscpu | grep 'Flags'\"", + $"sudo bash -c \"export KMP_AFFINITY=compact && export OMP_NUM_THREADS=1 && export LD_LIBRARY_PATH={packagePath}/linux-x64/icclib && chmod +x {packagePath}/linux-x64/StreamTriad && {packagePath}/linux-x64/StreamTriad\"" + }; + + this.mockFixture.ProcessManager.OnProcessCreated = (process) => + { + string fullCommand = process.FullCommand(); + commandsExpected.Remove(fullCommand); + if (fullCommand.Contains("StreamTriad") && !fullCommand.Contains("lscpu")) + { + if (process.StandardOutput != null) + { + process.StandardOutput.Clear(); + process.StandardOutput.Append(this.defaultOutput.ToString()); + } + } + }; + + using (StreamExecutor streamExecutor = new StreamExecutor(this.mockFixture.Dependencies, this.mockFixture.Parameters)) + { + await streamExecutor.ExecuteAsync(CancellationToken.None).ConfigureAwait(false); + } + + Assert.IsEmpty(commandsExpected); + } + + [Test] + [TestCase("StreamTriadAVX512", "Flags: fpu vme de pse avx avx2 avx512f")] + [TestCase("StreamTriadAVX2", "Flags: fpu vme de pse avx avx2")] + [TestCase("StreamTriadAVX", "Flags: fpu vme de pse avx")] + [TestCase("StreamTriad ", "Flags: fpu vme de")] + public async Task StreamExecutorTriadScenarioSelectsCorrectBinaryToExecuteDependingOnAVX(string expectedBinary, string flags) + { + this.SetupTest(PlatformID.Unix); + + this.mockFixture.Parameters["Toolset"] = "STREAMTriad"; + string flagsCmd = $"sudo bash -c \"lscpu | grep 'Flags'\""; + ProcessStartInfo expectedInfo = new ProcessStartInfo(); + ConcurrentBuffer flagsOutput = new ConcurrentBuffer(); + flagsOutput.Append(flags); + bool expectedBinaryExecuted = false; + + this.mockFixture.ProcessManager.OnProcessCreated = (process) => + { + string fullCommand = process.FullCommand(); + + if (fullCommand.Contains(expectedBinary, StringComparison.Ordinal)) + { + expectedBinaryExecuted = true; + + var triadOutput = + "Function Best Rate MB/s Avg Rate MB/s Min Rate MB/s" + System.Environment.NewLine + + "Triad 12345.67 12000.00 11000.00" + System.Environment.NewLine; + + if (process.StandardOutput != null) + { + process.StandardOutput.Clear(); + process.StandardOutput.Append(triadOutput); + } + } + + if (string.Equals(fullCommand, flagsCmd, StringComparison.Ordinal)) + { + if (process.StandardOutput != null) + { + process.StandardOutput.Clear(); + process.StandardOutput.Append(flagsOutput.ToString()); + } + } + }; + + using (StreamExecutor streamExecutor = new StreamExecutor(this.mockFixture.Dependencies, this.mockFixture.Parameters)) + { + await streamExecutor.ExecuteAsync(CancellationToken.None).ConfigureAwait(false); + } + + Assert.IsTrue(expectedBinaryExecuted); + } + + [Test] + public async Task ExecuteCommandAsyncReturnsProcessOutput() + { + this.SetupTest(PlatformID.Unix, Architecture.X64); + + this.mockFixture.ProcessManager.OnProcessCreated = (process) => + { + string cmd = process.FullCommand(); + if (cmd.Contains("echo hello")) + { + if (process.StandardOutput != null) + { + process.StandardOutput.Clear(); + process.StandardOutput.Append("hello\n"); + } + } + }; + + using (StreamExecutor streamExecutor = new StreamExecutor(this.mockFixture.Dependencies, this.mockFixture.Parameters)) + { + var method = typeof(StreamExecutor).GetMethod("ExecuteCommandAsync", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance); + Assert.IsNotNull(method, "Expected private method ExecuteCommandAsync to exist."); + + var telemetryContext = global::VirtualClient.Common.Telemetry.EventContext.Persisted(); + var task = (Task)method.Invoke(streamExecutor, new object[] { "bash", "-c \"echo hello\"", telemetryContext, CancellationToken.None, null, null }); + string output = await task.ConfigureAwait(false); + + StringAssert.Contains("hello", output); + } + } + } +} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Actions.UnitTests/Stream/StreamMetricsParserTests.cs b/src/VirtualClient/VirtualClient.Actions.UnitTests/Stream/StreamMetricsParserTests.cs new file mode 100644 index 0000000000..abeaafb20a --- /dev/null +++ b/src/VirtualClient/VirtualClient.Actions.UnitTests/Stream/StreamMetricsParserTests.cs @@ -0,0 +1,81 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +namespace VirtualClient.Actions +{ + using System.Collections.Generic; + using System.IO; + using System.Reflection; + using VirtualClient.Contracts; + using NUnit.Framework; + using VirtualClient; + + [TestFixture] + [Category("Unit")] + internal class StreamResultsParserUnitTests + { + private string rawText; + private StreamMetricsParser testParser; + + private string ExamplePath + { + get + { + string workingDirectory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); + return Path.Combine(workingDirectory, "Examples", "Stream"); + } + } + + [Test] + public void StreamResultsParserVerifyMetrics() + { + string outputPath = Path.Combine(this.ExamplePath, "StreamExample.txt"); + this.rawText = File.ReadAllText(outputPath); + this.testParser = new StreamMetricsParser(this.rawText); + IList metrics = this.testParser.Parse(); + + Assert.AreEqual(4, metrics.Count); + MetricAssert.Exists(metrics, "Best Rate Copy", 18514.5, "MBps"); + MetricAssert.Exists(metrics, "Best Rate Scale", 18333.8, "MBps"); + MetricAssert.Exists(metrics, "Best Rate Add", 23043.7, "MBps"); + MetricAssert.Exists(metrics, "Best Rate Triad", 23314, "MBps"); + } + + [Test] + public void StreamResultParserThrowsOnInvalidOutputFormat() + { + string invalidOutputPath = Path.Combine(this.ExamplePath, "StreamInvalidExample.txt"); + string rawText = File.ReadAllText(invalidOutputPath); + this.testParser = new StreamMetricsParser(rawText); + + SchemaException exception = Assert.Throws(() => this.testParser.Parse()); + StringAssert.Contains("incorrect format/data for parsing", exception.Message); + } + + [Test] + public void StreamResultParserThrowsOnInvalidMetricsCount() + { + string invalidMetricsCountOutputPath = Path.Combine(this.ExamplePath, "StreamInvalidMetricCountExample.txt"); + string rawText = File.ReadAllText(invalidMetricsCountOutputPath); + this.testParser = new StreamMetricsParser(rawText); + + SchemaException exception = Assert.Throws(() => this.testParser.Parse()); + StringAssert.Contains("incorrect format/data for parsing", exception.Message); + } + + [Test] + public void StreamResultsParserVerifyMetricsForWindowsFormat() + { + string outputPath = Path.Combine(this.ExamplePath, "StreamExampleWindows.txt"); + this.rawText = File.ReadAllText(outputPath); + this.testParser = new StreamMetricsParser(this.rawText); + IList metrics = this.testParser.Parse(); + + Assert.AreEqual(4, metrics.Count); + MetricAssert.Exists(metrics, "Best Rate Copy", 42890.7, "MBps"); + MetricAssert.Exists(metrics, "Best Rate Scale", 42678.9, "MBps"); + MetricAssert.Exists(metrics, "Best Rate Add", 44234.6, "MBps"); + MetricAssert.Exists(metrics, "Best Rate Triad", 44512.3, "MBps"); + } + } +} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Actions/Stream/StreamExecutor.cs b/src/VirtualClient/VirtualClient.Actions/Stream/StreamExecutor.cs new file mode 100644 index 0000000000..6b87e2d15f --- /dev/null +++ b/src/VirtualClient/VirtualClient.Actions/Stream/StreamExecutor.cs @@ -0,0 +1,492 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +namespace VirtualClient.Actions +{ + using System; + using System.Collections.Generic; + using System.Globalization; + using System.IO.Abstractions; + using System.Runtime.InteropServices; + using System.Text.RegularExpressions; + using System.Threading; + using System.Threading.Tasks; + using Microsoft.Extensions.DependencyInjection; + using VirtualClient; + using VirtualClient.Common; + using VirtualClient.Common.Extensions; + using VirtualClient.Common.Telemetry; + using VirtualClient.Contracts; + using VirtualClient.Contracts.Metadata; + + /// + /// STREAM: Sustainable Memory Bandwidth in High Performance Computers Benchmark. + /// Executor for Stream. + /// + [SupportedPlatforms("linux-x64,linux-arm64,win-x64,win-arm64")] + public class StreamExecutor : VirtualClientComponent + { + private readonly ISystemManagement systemManagement; + private readonly IFileSystem fileSystem; + private readonly IPackageManager packageManager; + + /// + /// Constructor. + /// + /// Provides required dependencies to the component. + /// Parameters defined in the profile or supplied on the command line. + public StreamExecutor(IServiceCollection dependencies, IDictionary parameters = null) + : base(dependencies, parameters) + { + this.systemManagement = dependencies.GetService(); + this.fileSystem = this.systemManagement.FileSystem; + this.packageManager = this.systemManagement.PackageManager; + } + + /// + /// GCC compilation parameters for generating STREAM binary. + /// + public string CompilerParameters + { + get + { + this.Parameters.TryGetValue(nameof(StreamExecutor.CompilerParameters), out IConvertible compilerParameters); + return compilerParameters?.ToString(); + } + } + + /// + /// Command line parameters to run Stream Msft. + /// + public string CommandLineParameters + { + get + { + this.Parameters.TryGetValue(nameof(StreamExecutor.CommandLineParameters), out IConvertible commandLineParameters); + return commandLineParameters?.ToString(); + } + } + + /// + /// Number of threads to run Stream/Stream Triad. + /// + public int? ThreadCount + { + get + { + this.Parameters.TryGetValue(nameof(StreamExecutor.ThreadCount), out IConvertible threadCount); + return threadCount != null ? threadCount.ToInt32(CultureInfo.InvariantCulture) : null; + } + + protected set + { + this.Parameters[nameof(this.ThreadCount)] = value; + } + } + + /// + /// The STREAM toolset to use (e.g. STREAM or STREAMTriad). + /// + public string Toolset + { + get + { + return this.Parameters.GetValue(nameof(StreamExecutor.Toolset), "STREAM"); + } + } + + /// + /// Gets the command line arguments for Windows Stream execution. + /// + public string CommandArgumentsWindows + { + get + { + return this.Parameters.GetValue(nameof(StreamExecutor.CommandArgumentsWindows), "-n 50 -s 320000000"); + } + } + + /// + /// Path to Stream Package. + /// + public string PackagePath { get; set; } + + /// + /// Path to Stream Windows executable. + /// + protected string ExecutablePath { get; set; } + + /// + /// Normalized toolset string (upper-case, trimmed). + /// + private string ToolsetNormalized => + this.Toolset?.Trim().ToUpperInvariant() ?? string.Empty; + + /// + /// Initializes the environment and dependencies for running the Stream workload. + /// + protected override async Task InitializeAsync(EventContext telemetryContext, CancellationToken cancellationToken) + { + await this.EvaluateParametersAsync(cancellationToken); + await this.InitializePlatformAsync(cancellationToken).ConfigureAwait(false); + } + + /// + /// Executes Stream workload. + /// + protected override async Task ExecuteAsync(EventContext telemetryContext, CancellationToken cancellationToken) + { + if (this.Platform == PlatformID.Unix && this.ToolsetNormalized == "STREAM") + { + await this.ExecuteStreamAsync(telemetryContext, cancellationToken).ConfigureAwait(false); + } + else if (this.Platform == PlatformID.Unix && this.ToolsetNormalized == "STREAMTRIAD") + { + await this.ExecuteStreamTriadAsync(telemetryContext, cancellationToken).ConfigureAwait(false); + } + else if (this.Platform == PlatformID.Unix && this.ToolsetNormalized == "STREAMMSFT") + { + await this.ExecuteStreamMsftAsync(telemetryContext, cancellationToken).ConfigureAwait(false); + } + else if (this.Platform == PlatformID.Win32NT && this.ToolsetNormalized == "STREAM") + { + await this.ExecuteStreamAsync(telemetryContext, cancellationToken).ConfigureAwait(false); + } + } + + /// + /// Validates the component for viability before executing the workload. + /// + protected override void Validate() + { + base.Validate(); + + string platformArchitecture = PlatformSpecifics.GetPlatformArchitectureName(this.Platform, this.CpuArchitecture); + string toolset = this.ToolsetNormalized; + string supportedScenarios = "Supported scenarios: linux-x64 STREAM, linux-x64 STREAMTRIAD, linux-x64 STREAMMSFT, linux-arm64 STREAM, linux-arm64 STREAMMSFT, win-x64 STREAM, win-arm64 STREAM."; + + if (this.Platform == PlatformID.Unix && toolset != "STREAM" && toolset != "STREAMTRIAD" && toolset != "STREAMMSFT") + { + throw new WorkloadException( + $"Unsupported toolset '{this.Toolset}'. {supportedScenarios}", + ErrorReason.InvalidProfileDefinition); + } + else if (this.Platform == PlatformID.Unix && toolset == "STREAMTRIAD" && this.CpuArchitecture != Architecture.X64) + { + throw new WorkloadException( + $"The STREAMTRIAD toolset is only supported on linux-x64. Current platform/architecture: {platformArchitecture}. {supportedScenarios}", + ErrorReason.PlatformNotSupported); + } + else if (this.Platform == PlatformID.Unix && toolset == "STREAMMSFT" && this.CpuArchitecture != Architecture.Arm64) + { + throw new WorkloadException( + $"The STREAMMSFT toolset is only supported on linux-arm64. Current platform/architecture: {platformArchitecture}. {supportedScenarios}", + ErrorReason.PlatformNotSupported); + } + else if (this.Platform == PlatformID.Win32NT && toolset != "STREAM") + { + throw new WorkloadException( + $"Unsupported toolset '{this.Toolset}' for Windows. {supportedScenarios}", + ErrorReason.InvalidProfileDefinition); + } + else if (this.Platform != PlatformID.Unix && this.Platform != PlatformID.Win32NT) + { + throw new WorkloadException( + $"The Stream workload is not supported on the current platform/architecture {platformArchitecture}. {supportedScenarios}", + ErrorReason.PlatformNotSupported); + } + } + + private async Task InitializePlatformAsync(CancellationToken cancellationToken) + { + DependencyPath workloadPackage = await this.GetPlatformSpecificPackageAsync(this.PackageName, cancellationToken); + this.PackagePath = workloadPackage.Path; + + if (this.Platform == PlatformID.Win32NT) + { + this.ExecutablePath = this.PlatformSpecifics.Combine(this.PackagePath, "stream.exe"); + this.fileSystem.File.ThrowIfFileDoesNotExist(this.ExecutablePath); + } + } + + private async Task ExecuteStreamAsync(EventContext telemetryContext, CancellationToken cancellationToken) + { + switch (this.Platform) + { + case PlatformID.Unix: + await this.ExecuteStreamLinuxAsync(telemetryContext, cancellationToken).ConfigureAwait(false); + break; + + case PlatformID.Win32NT: + await this.ExecuteStreamWindowsAsync(telemetryContext, cancellationToken).ConfigureAwait(false); + break; + } + } + + private async Task ExecuteStreamLinuxAsync(EventContext telemetryContext, CancellationToken cancellationToken) + { + EventContext relatedContext = telemetryContext.Clone(); + + string sourceCodeFile = "stream.c"; + string executableName = "streamworkload"; + string sourceCodePath = this.PlatformSpecifics.Combine(this.PackagePath, sourceCodeFile); + string executablePath = this.PlatformSpecifics.Combine(this.PackagePath, executableName); + + string effectiveCompilerParameters = this.CompilerParameters; + + if (this.CpuArchitecture == Architecture.Arm64 + && !string.IsNullOrEmpty(effectiveCompilerParameters) + && Regex.IsMatch(effectiveCompilerParameters, @"-mcmodel=\w+")) + { + // mcmodel command line argument is not supported by arm64 + string removedParameter = Regex.Match(effectiveCompilerParameters, @"-mcmodel=\w+").Value; + + this.Logger.LogTraceMessage( + $"Removed the parameter from compiler's commandline: \"{removedParameter}\" as it is not supported on cpuarchitecture:{this.CpuArchitecture}, New effective commandLine arguments : {effectiveCompilerParameters}", + EventContext.Persisted()); + + effectiveCompilerParameters = Regex.Replace(effectiveCompilerParameters, @"-mcmodel=\w+", string.Empty); + } + + string compileStream = $"gcc {sourceCodePath} -o {executablePath} {effectiveCompilerParameters}"; + + await this.ExecuteCommandAsync("bash", $"-c \"{compileStream}\"", relatedContext, cancellationToken) + .ConfigureAwait(false); + + string ompNumThreads = $"export OMP_NUM_THREADS={this.ThreadCount}"; + string makeExecutable = $"chmod +x {executablePath}"; + string command = $"{ompNumThreads} && {makeExecutable} && {executablePath}"; + + relatedContext.AddContext("command", command); + + DateTime startTime = DateTime.UtcNow; + + string results = await this.ExecuteCommandAsync("bash", $"-c \"{command}\"", relatedContext, cancellationToken, "STREAM") + .ConfigureAwait(false); + + this.MetadataContract.AddForScenario( + "STREAM", + command, + toolVersion: null); + + this.MetadataContract.Apply(telemetryContext); + + DateTime endTime = DateTime.UtcNow; + StreamMetricsParser streamResultsParser = new StreamMetricsParser(results); + IList metrics = streamResultsParser.Parse(); + + this.Logger.LogMetrics( + toolName: "STREAM", + scenarioName: "MemoryBandwidth", + startTime, + endTime, + metrics, + null, + scenarioArguments: command, + this.Tags, + telemetryContext); + } + + private async Task ExecuteStreamWindowsAsync(EventContext telemetryContext, CancellationToken cancellationToken) + { + EventContext relatedContext = telemetryContext.Clone(); + + string commandArguments = this.CommandArgumentsWindows; + string streamCommand = $"{this.ExecutablePath} {commandArguments}"; + + if (this.ThreadCount.HasValue && this.ThreadCount.Value > 0) + { + Environment.SetEnvironmentVariable("OMP_NUM_THREADS", this.ThreadCount.Value.ToString(CultureInfo.InvariantCulture)); + } + + relatedContext.AddContext("command", streamCommand); + + DateTime startTime = DateTime.UtcNow; + + string results = await this.ExecuteCommandAsync(this.ExecutablePath, commandArguments, relatedContext, cancellationToken, "STREAM", workingDir: this.PackagePath) + .ConfigureAwait(false); + + this.MetadataContract.AddForScenario( + "STREAM", + streamCommand, + toolVersion: null); + + this.MetadataContract.Apply(telemetryContext); + + DateTime endTime = DateTime.UtcNow; + StreamMetricsParser streamResultsParser = new StreamMetricsParser(results); + IList metrics = streamResultsParser.Parse(); + + this.Logger.LogMetrics( + toolName: "STREAM", + scenarioName: "MemoryBandwidth", + startTime, + endTime, + metrics, + null, + scenarioArguments: streamCommand, + this.Tags, + telemetryContext); + } + + private async Task ExecuteStreamTriadAsync(EventContext telemetryContext, CancellationToken cancellationToken) + { + EventContext relatedContext = telemetryContext.Clone(); + + Tuple executableInfo = await this.GetStreamTriadExecutableAsync(relatedContext, cancellationToken); + string scenario = executableInfo.Item1; + string executableName = executableInfo.Item2; + + CpuInfo cpuInfo = await this.systemManagement.GetCpuInfoAsync(cancellationToken); + string executablePath = this.PlatformSpecifics.Combine(this.PackagePath, executableName); + string kmpAffinity; + + if (cpuInfo.IsHyperthreadingEnabled) + { + kmpAffinity = "export KMP_AFFINITY=granularity=fine,compact,1,0"; + } + else + { + kmpAffinity = "export KMP_AFFINITY=compact"; + } + + string ompNumThreads = $"export OMP_NUM_THREADS={this.ThreadCount}"; + string icclibPath = this.PlatformSpecifics.Combine(this.PackagePath, "icclib"); + string ldLibPath = $"export LD_LIBRARY_PATH={icclibPath}"; + string makeExecutable = $"chmod +x {executablePath}"; + string command = $"{kmpAffinity} && {ompNumThreads} && {ldLibPath} && {makeExecutable} && {executablePath}"; + + relatedContext.AddContext("command", command); + + DateTime startTime = DateTime.UtcNow; + string results = await this.ExecuteCommandAsync("bash", $"-c \"{command}\"", relatedContext, cancellationToken, "STREAM"); + DateTime endTime = DateTime.UtcNow; + + this.MetadataContract.AddForScenario( + "STREAM Triad", + command, + toolVersion: null); + + this.MetadataContract.Apply(telemetryContext); + + StreamMetricsParser streamResultsParser = new StreamMetricsParser(results); + IList metrics = streamResultsParser.Parse(); + + this.Logger.LogMetrics( + toolName: "STREAM Triad", + scenarioName: scenario, + startTime, + endTime, + metrics, + null, + scenarioArguments: command, + this.Tags, + telemetryContext); + } + + private async Task ExecuteStreamMsftAsync(EventContext telemetryContext, CancellationToken cancellationToken) + { + EventContext relatedContext = telemetryContext.Clone(); + + string executableName = "perfrunner"; + + await this.ExecuteCommandAsync("bash", "-c \"make\"", relatedContext, cancellationToken, workingDir: this.PackagePath) + .ConfigureAwait(false); + + string executablePath = this.PlatformSpecifics.Combine(this.PackagePath, executableName); + string command = $"{executablePath} --threads {this.ThreadCount} {this.CommandLineParameters}"; + + relatedContext.AddContext("command", command); + + DateTime startTime = DateTime.UtcNow; + string results = await this.ExecuteCommandAsync("bash", $"-c \"{command}\"", relatedContext, cancellationToken, "STREAM") + .ConfigureAwait(false); + DateTime endTime = DateTime.UtcNow; + + this.MetadataContract.AddForScenario( + "STREAM", + command, + toolVersion: null); + + this.MetadataContract.Apply(telemetryContext); + + StreamMsftMetricsParser streammsftResultsParser = new StreamMsftMetricsParser(results); + IList metrics = streammsftResultsParser.Parse(); + + this.Logger.LogMetrics( + toolName: "STREAM MSFT", + scenarioName: "MemoryBandwidth", + startTime, + endTime, + metrics, + null, + scenarioArguments: command, + this.Tags, + telemetryContext); + } + + /// + /// Executes the given command. + /// + /// Output of the command. + private Task ExecuteCommandAsync(string cmd, string cmdArgs, EventContext telemetryContext, CancellationToken cancellationToken, string toolName = null, string workingDir = null) + { + string output = string.Empty; + + return this.Logger.LogMessageAsync($"{nameof(StreamExecutor)}.ExecuteCommand", telemetryContext, async () => + { + ISystemManagement systemManagement = this.Dependencies.GetService(); + using (IProcessProxy process = this.Platform == PlatformID.Win32NT + ? systemManagement.ProcessManager.CreateProcess(cmd, cmdArgs, workingDir) + : systemManagement.ProcessManager.CreateElevatedProcess(this.Platform, cmd, cmdArgs, workingDir: workingDir)) + { + this.CleanupTasks.Add(() => process.SafeKill()); + await process.StartAndWaitAsync(cancellationToken); + + if (!cancellationToken.IsCancellationRequested) + { + await this.LogProcessDetailsAsync(process, telemetryContext, toolName, logToFile: true); + process.ThrowIfWorkloadFailed(); + } + + output = process.StandardOutput.ToString(); + } + + return output; + }); + } + + /// + /// Gets executable name dependent on the AVX support. + /// + private async Task> GetStreamTriadExecutableAsync(EventContext telemetryContext, CancellationToken cancellationToken) + { + string lscpuFlags = await this.ExecuteCommandAsync("bash", "-c \"lscpu | grep 'Flags'\"", telemetryContext, cancellationToken); + string executableName; + string scenarioName; + + if (lscpuFlags.Contains("avx512")) + { + executableName = "StreamTriadAVX512"; + scenarioName = "MemoryBandwidth-TriadAVX512"; + } + else if (lscpuFlags.Contains("avx2")) + { + executableName = "StreamTriadAVX2"; + scenarioName = "MemoryBandwidth-TriadAVX2"; + } + else if (lscpuFlags.Contains("avx")) + { + executableName = "StreamTriadAVX"; + scenarioName = "MemoryBandwidth-TriadAVX"; + } + else + { + executableName = "StreamTriad"; + scenarioName = "MemoryBandwidth-Triad"; + } + + return new Tuple(scenarioName, executableName); + } + } +} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Actions/Stream/StreamMetricsParser.cs b/src/VirtualClient/VirtualClient.Actions/Stream/StreamMetricsParser.cs new file mode 100644 index 0000000000..aabb2f7976 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Actions/Stream/StreamMetricsParser.cs @@ -0,0 +1,112 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +namespace VirtualClient.Actions +{ + using System; + using System.Collections.Generic; + using System.Globalization; + using System.Text.RegularExpressions; + using VirtualClient; + using VirtualClient.Contracts; + + /// + /// Parser for STREAM results (Linux and Windows formats). + /// + public class StreamMetricsParser : MetricsParser + { + /// + /// To match Linux-style metrics data rows. + /// e.g. Copy: 18514.5 0.242368 0.231979 0.317779 + /// + private static readonly Regex LinuxDataRowRegex = new Regex( + @"^\s*(?[\w\-\+]+):?\s+(?\d+(\.\d+)?)\s+\d+(\.\d+)?\s+\d+(\.\d+)?(\s+\d+(\.\d+)?)?", + RegexOptions.Multiline | RegexOptions.Compiled); + + /// + /// To match Windows pipe-delimited metrics data rows. + /// e.g. | Copy | 42156.3 | 42890.7 | 41234.5 | 0.122345 | 0.119876 | 0.124567 | + /// + private static readonly Regex WindowsTableRowRegex = new Regex( + @"^\|\s*(?[\w\-\+]+):?\s*\|\s*(?\d+(\.\d+)?)\s*\|\s*(?\d+(\.\d+)?)\s*\|\s*(?\d+(\.\d+)?)\s*\|\s*(?\d+(\.\d+)?)\s*\|\s*(?\d+(\.\d+)?)\s*\|\s*(?\d+(\.\d+)?)\s*\|", + RegexOptions.Multiline | RegexOptions.Compiled); + + /// + /// To detect the Windows pipe table header row. + /// + private static readonly Regex WindowsHeaderRegex = new Regex( + @"^\|\s*Function\s*\|", + RegexOptions.Multiline | RegexOptions.Compiled); + + /// + /// Constructor for + /// + /// Raw text to parse. + public StreamMetricsParser(string rawText) + : base(rawText) + { + } + + /// + public override IList Parse() + { + this.Preprocess(); + + Regex dataRowRegex = WindowsHeaderRegex.IsMatch(this.PreprocessedText) + ? WindowsTableRowRegex + : LinuxDataRowRegex; + + IList metrics = this.ExtractMetrics(dataRowRegex); + + if (metrics.Count == 0) + { + throw new SchemaException("The STREAM results have incorrect format/data for parsing."); + } + + return metrics; + } + + /// + protected override void Preprocess() + { + string text = this.RawText ?? string.Empty; + text = Regex.Replace(text, "\r\n", "\n"); + text = Regex.Replace(text, "\n", Environment.NewLine); + this.PreprocessedText = text.Trim(); + } + + /// + /// To extract Best Rate metrics from regex matches. + /// + private IList ExtractMetrics(Regex dataRowRegex) + { + List metrics = new List(); + MatchCollection matches = dataRowRegex.Matches(this.PreprocessedText); + + foreach (Match match in matches) + { + string func = match.Groups["func"].Value; + string bestText = match.Groups["best"].Value; + + if (string.IsNullOrWhiteSpace(func) || string.IsNullOrWhiteSpace(bestText)) + { + continue; + } + + if (!double.TryParse(bestText, NumberStyles.Float, CultureInfo.InvariantCulture, out double best)) + { + continue; + } + + metrics.Add( + new Metric( + name: $"Best Rate {func}", + value: best, + unit: "MBps", + relativity: MetricRelativity.HigherIsBetter)); + } + + return metrics; + } + } +} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Actions/Stream/StreamMsftMetricsParser.cs b/src/VirtualClient/VirtualClient.Actions/Stream/StreamMsftMetricsParser.cs new file mode 100644 index 0000000000..6a46ea865c --- /dev/null +++ b/src/VirtualClient/VirtualClient.Actions/Stream/StreamMsftMetricsParser.cs @@ -0,0 +1,128 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +namespace VirtualClient.Actions +{ + using System; + using System.Collections.Generic; + using System.Data; + using System.Text.RegularExpressions; + using VirtualClient; + using VirtualClient.Contracts; + using DataTableExtensions = VirtualClient.Contracts.DataTableExtensions; + + /// + /// Parser for Microsoft's Stream results. + /// + public class StreamMsftMetricsParser : MetricsParser + { + /// + /// Sectionize the text by one or more empty lines. + /// + private static readonly Regex StreamSectionDelimiter = new Regex($"({Environment.NewLine})(\\s)*({Environment.NewLine})", RegexOptions.ExplicitCapture); + + /// + /// Separate the column values by 2 or more spaces, so that "N-Body Physics" will not be separated into two cells. + /// + private static readonly Regex StreamDataTableDelimiter = new Regex(@"(\s){2,}", RegexOptions.ExplicitCapture); + + /// + /// Constructor for + /// + /// Raw text to parse. + public StreamMsftMetricsParser(string rawText) + : base(rawText) + { + } + + /// + /// Stream Results. + /// + public DataTable StreamResult { get; set; } + + /// + public override IList Parse() + { + this.Preprocess(); + this.Sections = TextParsingExtensions.Sectionize(this.PreprocessedText, StreamMsftMetricsParser.StreamSectionDelimiter); + + if (this.Sections.Count <= 0 || !this.Sections.ContainsKey("RESULTS TABLE")) + { + throw new SchemaException("The Stream results has incorrect format/data for parsing"); + } + + this.ParseWorkloadResult(); + + List metrics = new List(); + + for (int index = 0; index < this.StreamResult.Columns.Count; index++) + { + string metricName = this.StreamResult.Columns[index].ColumnName; + string unit = "MBps"; + MetricRelativity metricRelativity = MetricRelativity.HigherIsBetter; + + if (metricName.Contains("Rate", StringComparison.OrdinalIgnoreCase)) + { + metricRelativity = MetricRelativity.HigherIsBetter; + unit = "MBps"; + } + else if (metricName.Contains("Latency", StringComparison.OrdinalIgnoreCase)) + { + metricRelativity = MetricRelativity.LowerIsBetter; + unit = "ns"; + } + + metrics.AddRange(this.StreamResult.GetMetrics(nameIndex: 0, valueIndex: index, unit: unit, namePrefix: $"{metricName} ", metricRelativity: metricRelativity)); + } + + if (metrics.Count == 0) + { + throw new SchemaException($"The Stream results has incorrect format/data for parsing. Output is having 0 metrics."); + } + + return metrics; + } + + /// + protected override void Preprocess() + { + // Converting all CRLF(Windows EOL) to LF(Unix EOL). + this.PreprocessedText = Regex.Replace(this.RawText, "\r\n", "\n"); + + // Converting all LF to Environment.NewLine + this.PreprocessedText = Regex.Replace(this.PreprocessedText, "\n", Environment.NewLine); + + // Replacing dash lines with new lines. + this.PreprocessedText = Regex.Replace(this.PreprocessedText, @"(-){3,}", Environment.NewLine); + + // Removing ":" from metric names. + this.PreprocessedText = Regex.Replace(this.PreprocessedText, @":", string.Empty); + + // Creating section for the results table with the name "RESULTS TABLE". + this.PreprocessedText = Regex.Replace(this.PreprocessedText, @"Function", $"{Environment.NewLine}RESULTS TABLE{Environment.NewLine}Function"); + + // Creating columns in case of best rate heading + this.PreprocessedText = Regex.Replace(this.PreprocessedText, @"Function(\s)*Best(\s)*Rate(\s)*MB/s", $"Function Best Rate Avg Rate Min Rate Avg Latency Min Latency Max Latency"); + + // Extra Space for delimeter + this.PreprocessedText = Regex.Replace(this.PreprocessedText, @"LATENCY", $"ns "); + + // Creating columns in case of latency + this.PreprocessedText = Regex.Replace(this.PreprocessedText, @"Function(\s)*Best(\s)*Latency(\s)*ns", $"Function Avg Latency Min Latency Max Latency"); + + // Removing report tile (Not required Section). + Regex reportTitle = new Regex(@"Stream Report"); + this.PreprocessedText = TextParsingExtensions.RemoveRows(this.PreprocessedText, reportTitle); + + // Removing unnecessary starting and ending space. + this.PreprocessedText = this.PreprocessedText.Trim(); + } + + private void ParseWorkloadResult() + { + string sectionName = "RESULTS TABLE"; + this.StreamResult = DataTableExtensions.ConvertToDataTable( + this.Sections[sectionName], StreamMsftMetricsParser.StreamDataTableDelimiter, sectionName); + } + } +} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAM.json b/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAM.json new file mode 100644 index 0000000000..63f9aebf01 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAM.json @@ -0,0 +1,68 @@ +{ + "Description": "STREAM Performance Workload - Sustainable Memory Bandwidth in High Performance Computers Benchmark", + "Metadata": { + "DocumentationLink": "https://msazure.visualstudio.com/One/_git/CRC-AIR-Workloads?version=GBmaster&path=/docs/workloads", + "RecommendedMinimumExecutionTime": "00:10:00", + "SupportedPlatforms": "linux-x64,linux-arm64,win-x64,win-arm64", + "SupportedOperatingSystems": "Windows,Ubuntu", + "SupportedValuesOfToolset": "STREAM,STREAMTriad,STREAMMsft" + }, + "Parameters": { + "CompilerVersion": "", + "CompilerParameters": "-fopenmp -mcmodel=large -D_OPENMP -DNTIMES=5000 -DSTREAM_ARRAY_SIZE=100000000", + "ThreadCount": "{PhysicalCoreCount}", + "CommandArgumentsWindows": "-n 50 -s 320000000" + }, + "Actions": [ + { + "Type": "StreamExecutor", + "Parameters": { + "Scenario": "STREAM", + "Toolset": "STREAM", + "PackageName": "stream", + "CompilerParameters": "$.Parameters.CompilerParameters", + "ThreadCount": "$.Parameters.ThreadCount", + "CommandArgumentsWindows": "$.Parameters.CommandArgumentsWindows" + } + } + ], + "Dependencies": [ + { + "Type": "CompilerInstallation", + "Parameters": { + "Scenario": "InstallCompiler", + "SupportedPlatforms": "linux-x64,linux-arm64", + "CompilerVersion": "$.Parameters.CompilerVersion" + } + }, + { + "Type": "DependencyPackageInstallation", + "Parameters": { + "Scenario": "InstallStreamPackage", + "SupportedPlatforms": "linux-x64,linux-arm64,win-x64,win-arm64", + "BlobContainer": "packages", + "BlobName": "stream.1.0.0-2.zip", + "PackageName": "stream", + "Extract": true + } + }, + { + "Type": "WgetPackageInstallation", + "Parameters": { + "Scenario": "Install_Visual_Cpp_Redistributable_x64", + "SupportedPlatforms": "win-x64", + "PackageName": "visual_c++_redistributable", + "PackageUri": "https://aka.ms/vs/17/release/vc_redist.x64.exe" + } + }, + { + "Type": "WgetPackageInstallation", + "Parameters": { + "Scenario": "Install_Visual_Cpp_Redistributable_arm64", + "SupportedPlatforms": "win-arm64", + "PackageName": "visual_c++_redistributable", + "PackageUri": "https://aka.ms/vs/17/release/vc_redist.arm64.exe" + } + } + ] +} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMMSFT.json b/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMMSFT.json new file mode 100644 index 0000000000..028c3caa67 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMMSFT.json @@ -0,0 +1,53 @@ +{ + "Description": "Microsoft's Stream Performance Workload", + "Metadata": { + "DocumentationLink": "https://msazure.visualstudio.com/One/_git/CRC-AIR-Workloads?version=GBmaster&path=/docs/workloads", + "RecommendedMinimumExecutionTime": "00:10:00", + "SupportedPlatforms": "linux-arm64", + "SupportedOperatingSystems": "Ubuntu", + "SupportedValuesOfToolset": "STREAM,STREAMTriad,STREAMMsft" + }, + "Parameters": { + "CompilerVersion": "", + "CommandLineParameters": "--internal-iter 1000 --internal-iter-lat 1000", + "ThreadCount": "{PhysicalCoreCount}" + }, + "Actions": [ + { + "Type": "StreamExecutor", + "Parameters": { + "Scenario": "STREAM-MSFT", + "Toolset": "STREAMMSFT", + "PackageName": "streammsft", + "CommandLineParameters": "$.Parameters.CommandLineParameters", + "ThreadCount": "$.Parameters.ThreadCount" + } + } + ], + "Dependencies": [ + { + "Type": "CompilerInstallation", + "Parameters": { + "Scenario": "InstallCompiler", + "CompilerVersion": "$.Parameters.CompilerVersion" + } + }, + { + "Type": "LinuxPackageInstallation", + "Parameters": { + "Scenario": "InstallLinuxPackages", + "Packages": "make,libnuma-dev" + } + }, + { + "Type": "DependencyPackageInstallation", + "Parameters": { + "Scenario": "InstallStreamPackage", + "BlobContainer": "packages", + "BlobName": "streammsft.1.0.0.zip", + "PackageName": "streammsft", + "Extract": true + } + } + ] +} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMTRIAD.json b/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMTRIAD.json new file mode 100644 index 0000000000..a8387ec9ee --- /dev/null +++ b/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMTRIAD.json @@ -0,0 +1,36 @@ +{ + "Description": "Stream Performance Workload", + "Metadata": { + "DocumentationLink": "https://msazure.visualstudio.com/One/_git/CRC-AIR-Workloads?version=GBmaster&path=/docs/workloads", + "RecommendedMinimumExecutionTime": "00:10:00", + "SupportedPlatforms": "linux-x64", + "SupportedOperatingSystems": "Ubuntu", + "SupportedValuesOfToolset": "STREAM,STREAMTriad,STREAMMsft" + }, + "Parameters": { + "ThreadCount": "{PhysicalCoreCount}" + }, + "Actions": [ + { + "Type": "StreamExecutor", + "Parameters": { + "Scenario": "STREAM-Triad", + "Toolset": "STREAMTriad", + "PackageName": "stream", + "ThreadCount": "$.Parameters.ThreadCount" + } + } + ], + "Dependencies": [ + { + "Type": "DependencyPackageInstallation", + "Parameters": { + "Scenario": "InstallStreamPackage", + "BlobContainer": "packages", + "BlobName": "stream.1.0.0-1.zip", + "PackageName": "stream", + "Extract": true + } + } + ] +} \ No newline at end of file diff --git a/website/docs/workloads/stream/msftstreammakefile.txt b/website/docs/workloads/stream/msftstreammakefile.txt new file mode 100644 index 0000000000..19c0996088 --- /dev/null +++ b/website/docs/workloads/stream/msftstreammakefile.txt @@ -0,0 +1,19 @@ +DATA = $(shell date '+%Y%m%d') + +all: $(DATA).bwtest $(DATA).bwtest.dbg perfrunner + +perfrunner: + cp $(DATA).bwtest perfrunner + +$(DATA).bwtest: clean Streams.cpp + g++ -DLINUX -o $(DATA).bwtest Streams.cpp read.s readq.s copy.s copyq.s scale.s scaleq.s add.s addq.s triad.s triadq.s write.s writeq.s ptr_lat.s rw.s rrw.s read_sve.s write_sve.s copy_sve.s scale_sve.s add_sve.s triad_sve.s ptr_rmw.s -Wall -O2 -march=armv8-a+sve -lnuma + +$(DATA).bwtest.dbg: Streams.cpp + g++ -DLINUX -o $(DATA).bwtest.dbg Streams.cpp read.s readq.s copy.s copyq.s scale.s scaleq.s add.s addq.s triad.s triadq.s write.s writeq.s ptr_lat.s rw.s rrw.s read_sve.s write_sve.s copy_sve.s scale_sve.s add_sve.s triad_sve.s ptr_rmw.s -Wall -O0 -g -march=armv8-a+sve -lnuma + +$(DATA).bwtest.hlm: Streams.cpp + g++ -DLINUX -DHLM -D__aarch64__ -o $(DATA).bwtest.hlm Streams.cpp read.s readq.s copy.s copyq.s scale.s scaleq.s add.s addq.s triad.s triadq.s write.s writeq.s ptr_lat.s rw.s rrw.s read_sve.s write_sve.s copy_sve.s scale_sve.s add_sve.s triad_sve.s ptr_rmw.s -Wall -O2 -static -march=armv8-a+sve -lnuma + +clean: + rm -f *.bwtest *.dbg *.hlm perfrunner + diff --git a/website/docs/workloads/stream/stream-metrics.md b/website/docs/workloads/stream/stream-metrics.md new file mode 100644 index 0000000000..51d0988c9a --- /dev/null +++ b/website/docs/workloads/stream/stream-metrics.md @@ -0,0 +1,82 @@ +# STREAM Workload Metrics + +The following document illustrates the type of results that are emitted by the STREAM workload and captured by the +Virtual Client for net impact analysis. + +### System Metrics +Different metrics are captured from the system depending upon which monitor profiles are used. If a monitor profile is not +defined, the default MONITORS-DEFAULT.json profile is used. See the following documentation to determine monitor profiles +that are available. + +* [Monitor Profiles](https://github.com/microsoft/VirtualClient/blob/main/website/docs/monitors/monitor-profiles.md) +* [Monitor Profiles (internal only)](../../monitors/monitor-profiles.md) + +### Workload-Specific Metrics + +The following metrics are emitted by the STREAM or STREAMTRIAD (Intel-specialized) workload itself. + +| Metric Name | Example Value (min) | Example Value (max) | Example Value (avg) | Unit | +|------------------------|---------------------|---------------------|---------------------|------| +| Best Rate Add | 8635.5 | 327893.5 | 42849.75067544962 | MBps | +| Best Rate Copy | 6787.4 | 346279.0 | 30720.395126646046 | MBps | +| Best Rate Scale | 6747.1 | 320023.2 | 30578.698884020574 | MBps | +| Best Rate Triad | 10141.2 | 305781.6 | 42735.667183905876 | MBps | + +The following metrics are emitted by the MSFT STREAM + +| MetricName | Example Value(max) | Example Value (avg) | Example Value (min) | MetricUnit | +|-------------------|--------------------|---------------------|---------------------|------------| +| Min Rate Read | 50057 | 49066.33333 | 47600 | MBps | +| Min Rate Copy | 72302 | 71153.5 | 70192 | MBps | +| Avg Rate Read | 50433 | 49814.33333 | 47981 | MBps | +| Min Rate Triad | 55407 | 54307.66667 | 53180 | MBps | +| Min Rate Scale | 72308 | 71063.83333 | 70110 | MBps | +| Avg Rate Scale | 73315 | 72425.5 | 71056 | MBps | +| Best Rate Copy | 74208 | 73171.83333 | 72073 | MBps | +| Min Rate Add | 53658 | 52981.16667 | 52199 | MBps | +| Avg Rate Add | 54074 | 53553.16667 | 52848 | MBps | +| Best Rate Scale | 74990 | 73716 | 72486 | MBps | +| Best Rate Write | 133326 | 116689.6667 | 87466 | MBps | +| Best Rate Add | 54544 | 54011.5 | 53351 | MBps | +| Avg Rate Triad | 55720 | 55032.5 | 53849 | MBps | +| Min Rate Write | 118017 | 96797.83333 | 84744 | MBps | +| Avg Rate Write | 124882 | 105829 | 86252 | MBps | +| Best Rate Triad | 56567 | 55725.5 | 54780 | MBps | +| Avg Rate Copy | 73323 | 72106.66667 | 71016 | MBps | +| Best Rate Read | 51087 | 50461 | 48497 | MBps | +| Min Latency Scale | 159 | 145.5 | 137 | ns | +| Avg Latency Write | 377 | 261.1666667 | 196 | ns | +| Min Latency Read | 135 | 128 | 121 | ns | +| Min Latency Add | 145 | 134.5 | 126 | ns | +| Max Latency Read | 181 | 168.5 | 156 | ns | +| Avg Latency Triad | 163 | 151.6666667 | 141 | ns | +| Max Latency Add | 189 | 172.6666667 | 158 | ns | +| Min Latency Copy | 169 | 151.6666667 | 143 | ns | +| Avg Latency Read | 152 | 147.5 | 144 | ns | +| Avg Latency Copy | 183 | 164.8333333 | 155 | ns | +| Max Latency Copy | 203 | 183.8333333 | 173 | ns | +| Avg Latency Add | 161 | 151.1666667 | 144 | ns | +| Max Latency Write | 446 | 299.1666667 | 220 | ns | +| Min Latency Triad | 151 | 135.1666667 | 128 | ns | +| Min Latency Write | 319 | 224 | 175 | ns | +| Max Latency Scale | 205 | 178.6666667 | 169 | ns | +| Max Latency Triad | 193 | 171 | 157 | ns | +| Avg Latency Scale | 179 | 161.8333333 | 155 | ns | + + +Msft Stream output explained: + +Function Best Rate MB/s +Read: 18095 17922 17677 110 110 111 +Copy: 28378 28330 28302 113 107 115 +Scale: 28363 27853 27129 114 112 116 +Add: 19826 19788 19700 113 112 113 +Triad: 20457 20197 19829 113 112 113 +Write: 47899 47881 47872 117 113 120 + +Column 1 : Best Rate +Column 2 : Avg Rate +Column 3 : Min Rate +Column 4 : Avg Latency +Column 5 : Min Latency +Column 6 : Max Latency diff --git a/website/docs/workloads/stream/stream-profiles.md b/website/docs/workloads/stream/stream-profiles.md new file mode 100644 index 0000000000..c559574966 --- /dev/null +++ b/website/docs/workloads/stream/stream-profiles.md @@ -0,0 +1,190 @@ +# STREAM Workload Profiles +The following profiles run customer-representative or benchmarking scenarios using the STREAM workload. + +* [Getting Started](https://microsoft.github.io/VirtualClient/) +* [Workload Details](./stream.md) +* [Workload Profile Metrics](./stream-metrics.md) +* [Workload Packages](https://github.com/microsoft/VirtualClient/blob/main/website/docs/developing/dependency-packages.md) + +----------------------------------------------------------------------- + +### Preliminaries +The profiles below require the ability to download workload packages and dependencies from a package store. In order to download the workload packages, connection +information must be supplied on the command line. See the 'Workload Packages' documentation above for details on how that works. + +----------------------------------------------------------------------- + +### PERF-MEM-STREAM.json +Runs a Memory-intensive workload using the STREAM Benchmark to test the bandwidth of the Memory. This profile compiles the workload using 'gcc'. + +* **OS/Architecture Platforms** + * linux-x64 + * linux-arm64 + +* **Supported Operating Systems** + * Ubuntu 18 + * Ubuntu 20 + * Ubuntu 22 + +* **Supported Compilers** + The following compilers are supported with the workload for this profile. See profile parameters and usage examples below. + + * GCC Compiler Versions = 8, 9, 10, 11 + +* **Dependencies** + The following dependencies must be met to run this workload profile. + + * Workload package must exist in the 'packages' directory or connection information for the package store supplied on the command line (see 'Workload Packages' link above). + +* **Profile Parameters** + The following parameters can be optionally supplied on the command line to modify the behaviors of the workload. See the 'Usage Scenarios/Examples' above for examples on how to supply parameters to + Virtual Client profiles. + + | Parameter | Purpose |Default | + |---------------------------|---------------------------------------------------------------|-----------------------------------------------------------------------------| + | CompilerVersion | Not Required. Compiler's Version to install. | | + | CompilerParameters | Not Required. Parameters use to compile the stream binary. |-fopenmp -mcmodel=large -D_OPENMP -DNTIMES=5000 -DSTREAM_ARRAY_SIZE=100000000| + +* **Component/Action Parameters** + The following parameters are available in the profile components/actions. + + | Parameter | Purpose |Default | + |---------------------------|-------------------------------------------------------------------------------------------------------------------------|-------------| + | CompilerVersion | Not Required. Compiler's Version to install. | | + | CompilerParameters | Not Required. Parameters use to compile the stream binary. |-fopenmp -mcmodel=large -D_OPENMP -DNTIMES=5000 -DSTREAM_ARRAY_SIZE=100000000| + | Toolset | Defines the STREAM toolset to use. Valid values include: STREAM and STREAMTriad. Note that the STREAMTriad toolset can be used on Intel CPU systems only. | STREAM | + + + +* **Compiler Flags** + + | Parameter | Purpose | + |------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------| + | -fopenmp -D_OPENMP | Using OpenMP for multiple processors. | + | -DNTIMES | Flag DNTIMES is for Stream which defines number of iterations of the workload each iteration takes around 10-50 milliseconds depending on VMSKU. | + | -DSTREAM_ARRAY_SIZE=100000000| Array size used by the Stream. | + | -mcmodel=large | It avoids integer overflow while providing array size 100000000.As it uses 64 bit integer instead of default 32 bit integer | + +* **Workload Runtimes** + The following timings represent the length of time required to run a single round of tests ran. These timings can be used to determine + minimum required runtimes for the Virtual Client in order to get results. These are estimates based on the use of prescribed VM SKUs. + It is practical to allow for minimum 1 to 2 hours extra runtime to ensure the tests can complete full test runs. + * Expected Runtime = 10 secs + +* **Usage Examples** + The following section provides a few basic examples of how to use the workload profile. Additional usage examples can be found in the + 'Usage Scenarios/Examples' link at the top. + +
+ + ``` csharp + ./VirtualClient --profile=PERF-MEM-STREAM.json --timeout=60 --packageStore="{BlobConnectionString|SAS Uri}" + ``` +
+ +### PERF-MEM-STREAMTRIAD.json +Runs a Memory-intensive workload using the STREAM Benchmark to test memory bandwidth. This profile is designed by the Intel team to +maximize the utilization of Intel processors. + +* **Supported Platform/Architectures** + * linux-x64 + +* **Supported Operating Systems** + * Ubuntu 18 + * Ubuntu 20 + * Ubuntu 22 + +* **Dependencies** + The following dependencies must be met to run this workload profile. + + * Workload package must exist in the 'packages' directory or connection information for the package store supplied on the command line (see 'Workload Packages' link above). + +* **Workload Runtimes** + The following timings represent the length of time required to run a single round of tests ran. These timings can be used to determine + minimum required runtimes for the Virtual Client in order to get results. These are estimates based on the use of prescribed VM SKUs. + It is practical to allow for minimum 1 to 2 hours extra runtime to ensure the tests can complete full test runs. + * Expected Runtime = 10 secs + +* **Usage Examples** + The following section provides a few basic examples of how to use the workload profile. Additional usage examples can be found in the + 'Usage Scenarios/Examples' link at the top. + +
+ + ``` csharp + ./VirtualClient --profile=PERF-MEM-STREAMTRIAD.json --timeout=60 --packageStore="{BlobConnectionString|SAS Uri}" + ``` +
+ +### PERF-MEM-STREAMMSFT.json +Runs a Memory-intensive workload using the STREAM Benchmark to test memory bandwidth. This profile is designed by Microsoft team to maximize the performance of 1P programs. + +* **Supported Platform/Architectures** + * linux-arm64 + +* **Supported Operating Systems** + * Ubuntu 18 + * Ubuntu 20 + * Ubuntu 22 + +* **Supported Compilers** + The following compilers are supported with the workload for this profile. See profile parameters and usage examples below. + + * G++ Compiler Versions = 8, 9, 10, 11 + +* **Dependencies** + The following dependencies must be met to run this workload profile. + + * Workload package must exist in the 'packages' directory or connection information for the package store supplied on the command line (see 'Workload Packages' link above). + +* **Workload Runtimes** + The following timings represent the length of time required to run a single round of tests ran. These timings can be used to determine + minimum required runtimes for the Virtual Client in order to get results. These are estimates based on the use of prescribed VM SKUs. + It is practical to allow for minimum 1 to 2 hours extra runtime to ensure the tests can complete full test runs. + * Expected Runtime = 10 secs + +* **Usage Examples** + The following section provides a few basic examples of how to use the workload profile. Additional usage examples can be found in the + 'Usage Scenarios/Examples' link at the top. + +* **Profile Parameters** + The following parameters can be optionally supplied on the command line to modify the behaviors of the workload. See the 'Usage Scenarios/Examples' above for examples on how to supply parameters to + Virtual Client profiles. + + | Parameter | Purpose |Default | + |---------------------------|---------------------------------------------------------------|-----------------------------------------------------------------------------| + | CompilerName | Not Required. Compiler used to compile. |gcc | + | CompilerVersion | Not Required. Compiler's Version to install. |10 | + | CommandLineParameters | Not Required. Parameters to be used in MSFT Stream. |--internal-iter 1000 --internal-iter-lat 1000| + | ThreadCount | Not Required. Number of threads use to run the workload | No. of Physical Cores. | + +* **Component/Action Parameters** + The following parameters are available in the profile components/actions. + + | Parameter | Purpose |Default | + |---------------------------|-------------------------------------------------------------------------------------------------------------------------|-------------| + | CompilerName | Not Required. Compiler used to compile. |gcc | + | CompilerVersion | Not Required. Compiler's Version to install. |10 | + | CommandLineParameters | Not Required. Parameters to be used in MSFT Stream. |--internal-iter 1000 --internal-iter-lat 1000| + | ThreadCount | Not Required. Number of threads use to run the workload | No. of Physical Cores. | + | Toolset | Defines the STREAM toolset to use. Valid values include: STREAM , STREAMTriad and STREAMMsft. Note that the STREAMTriad toolset can be used on Intel CPU systems only. | STREAMMSFT | +Note: The default parameters are according to the parameters documentation inorder to have stable results. +[Msft Stream Parameters](./stream.md) + +* **Make file for Msft Stream with** + +[MakeFile](./streammsftmakefile.txt) + + +
+ + ``` csharp + ./VirtualClient --profile=PERF-MEM-STREAMMSFT.json --timeout=60 --packageStore="{BlobConnectionString|SAS Uri}" + ``` +
+----------------------------------------------------------------------- + +### Resources + +* [Azure VM Sizes](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes) +* [Azure Managed Disks](https://azure.microsoft.com/en-us/pricing/details/managed-disks/) \ No newline at end of file diff --git a/website/docs/workloads/stream/stream-supplemental.md b/website/docs/workloads/stream/stream-supplemental.md new file mode 100644 index 0000000000..2d2b497fb3 --- /dev/null +++ b/website/docs/workloads/stream/stream-supplemental.md @@ -0,0 +1,110 @@ +# STREAM Workload Supplemental +The following information is additional/supplemental to the documentation available for the STREAM workload. This information is intended for +use by teams internal to Microsoft and their affiliates. + +## System Recommendations +The following sections provide recommendations to consider when running Virtual Client profiles (workloads, monitors and tests) on +a system. + +### PERF-MEM-STREAM.json +The following configurations are general recommendations for use when running this profile on cloud hardware systems and virtual machines. + +* **Recommended Configurations** + Note that the term "cores" as used below in describing VM specifications should be inferred as synonymous with the term virtual CPU (vCPU). The configurations + below cover those used by the CRC team for running this workload as part of the Virtual Client platform. These come from recommendations and empirical + evidence from running on Azure cloud systems and are designed to mimic "customer-representative" scenarios or to utilize/stress the physical nodes/systems. + These configurations have generally proven to be well-suited for net impact analysis on systems where a change is being applied to the physical hardware + (e.g. a firmware update). + + * Operating System (unless otherwise specified below) + * Linux Scenarios + * Publisher: Canonical + * Offer: UbuntuServer + * Sku: 18.04-LTS + * Version: latest + * AMD Gen6 (Naples) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations = 1 x 64-core -> Standard_L64_v2 + * Test/QoS = 1 x 16-core -> Standard_L16_v2 +

+ * AMD Gen7 (Rome) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations (ideal) = 1 x 72-core -> Standard_D72a_v4, Standard_E72a_v4 + * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64a_v4, Standard_E64a_v4 + * Test/QoS = 1 x 16-core -> Standard_D16a_v4, Standard_E16a_v4 +

+ * AMD Gen8 (Milan) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations (ideal) = 1 x 96-core -> Standard_D96a_v4/v5, Standard_E96a_v4/v5 + * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64a_v4/v5, Standard_E64a_v4/v5 + * Test/QoS = 1 x 16-core -> Standard_D16a_v4/v5, Standard_E16a_v4/v5 +

+ * Intel Gen5 (Broadwell) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations = 1 x 48-core -> Standard_D48_v3, Standard_E48_v3, Standard_F48_v2 + * Test/QoS = 1 x 16-core -> Standard_D16_v3, Standard_E16_v3, Standard_F16 +

+ * Intel Gen6 (Coffee Lake) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v3, Standard_E64_v3, Standard_F64_v2 + * Test/QoS = 1 x 16-core -> Standard_D16_v3, Standard_E16_v3, Standard_F16_v2 +

+ * Intel Gen6 (Skylake) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v3, Standard_E64_v3, Standard_F64_v2 + * Test/QoS = 1 x 16-core -> Standard_D16_v3, Standard_E16_v3, Standard_F16_v2 +

+ * Intel Gen7 (Cascade Lake) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations (ideal) = 1 x 72-core -> Standard_D72_v5, Standard_E72_v5, Standard_F72_v2 + * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v3/v4, Standard_E64_v3/v4, Standard_F64_v2 + * Test/QoS = 1 x 16-core -> Standard_D16_v3/v4, Standard_E16_v3/v4, Standard_F16_v2 +

+ * Intel Gen8 (Icelake) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations (ideal) = 1 x 96-core -> Standard_D96_v5, Standard_E96_v5, Standard_F96_v2 + * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v4/v5, Standard_E64_v4/v5, Standard_F64_v2 + * Test/QoS = 1 x 16-core -> Standard_D16_v4/v5, Standard_E16_v4/v5, Standard_F16_v2 + +### PERF-MEM-STREAMTRIAD.json +The following configurations are general recommendations for use when running this profile on cloud hardware systems and virtual machines. + +* **Recommended Configurations** + Note that the term "cores" as used below in describing VM specifications should be inferred as synonymous with the term virtual CPU (vCPU). The configurations + below cover those used by the CRC team for running this workload as part of the Virtual Client platform. These come from recommendations and empirical + evidence from running on Azure cloud systems and are designed to mimic "customer-representative" scenarios or to utilize/stress the physical nodes/systems. + These configurations have generally proven to be well-suited for net impact analysis on systems where a change is being applied to the physical hardware + (e.g. a firmware update). + + * Operating System (unless otherwise specified below) + * Linux Scenarios + * Publisher: Canonical + * Offer: UbuntuServer + * Sku: 18.04-LTS + * Version: latest + * Intel Gen5 (Broadwell) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations = 1 x 48-core -> Standard_D48_v3, Standard_E48_v3, Standard_F48_v2 + * Test/QoS = 1 x 16-core -> Standard_D16_v3, Standard_E16_v3, Standard_F16 +

+ * Intel Gen6 (Coffee Lake) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v3, Standard_E64_v3, Standard_F64_v2 + * Test/QoS = 1 x 16-core -> Standard_D16_v3, Standard_E16_v3, Standard_F16_v2 +

+ * Intel Gen6 (Skylake) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v3, Standard_E64_v3, Standard_F64_v2 + * Test/QoS = 1 x 16-core -> Standard_D16_v3, Standard_E16_v3, Standard_F16_v2 +

+ * Intel Gen7 (Cascade Lake) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations (ideal) = 1 x 72-core -> Standard_D72_v5, Standard_E72_v5, Standard_F72_v2 + * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v3/v4, Standard_E64_v3/v4, Standard_F64_v2 + * Test/QoS = 1 x 16-core -> Standard_D16_v3/v4, Standard_E16_v3/v4, Standard_F16_v2 +

+ * Intel Gen8 (Icelake) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations (ideal) = 1 x 96-core -> Standard_D96_v5, Standard_E96_v5, Standard_F96_v2 + * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v4/v5, Standard_E64_v4/v5, Standard_F64_v2 + * Test/QoS = 1 x 16-core -> Standard_D16_v4/v5, Standard_E16_v4/v5, Standard_F16_v2 \ No newline at end of file diff --git a/website/docs/workloads/stream/stream.md b/website/docs/workloads/stream/stream.md new file mode 100644 index 0000000000..0636835984 --- /dev/null +++ b/website/docs/workloads/stream/stream.md @@ -0,0 +1,70 @@ +# STREAM Workload + +STREAM is a benchmark for measuring memory bandwidth performance . + +* [Official STREAM Benchmark Documentation](https://www.cs.virginia.edu/stream/) +* [Intels STREAM TRIAD Documentaion](https://www.intel.com/content/www/us/en/developer/articles/technical/optimizing-memory-bandwidth-on-stream-triad.html) + +--- + +### What is Being Tested? + +The following performance analysis tests are ran as part of the STREAM workload. It measures the Memory Bandwidth in MB/s (1 MB=10^6 B, *not* 2^20 B) + + +| Bandwidth Benchmark | Description | +|-----------------------|-----------------------------------------------------------| +| Copy | Measures memory copy operation speeds | +| Scale | Measures memory scale operation speeds | +| Add | Measures memory add operation speeds | +| Triad | Measures memory triad operation speeds | + +--- + +### Supported Platform/Architectures + +* linux-x64 +* linux-arm64 + +### MSFT STREAM Parameters (Can't be used by STREAM and STREAMTriad) : + --rnd-threads RND_THREADS: Number of total threads to randomize the work. + --threads THREADS: Number of threads to split the work. + --lat-threads LAT_THREADS: Number of threads running latency test + --l2tol2-threads L2toL@_THREADS: Number of threads running latency test from L2 to L2 + --bw-array BW_ARRAY_SIZE: Array size in 1KB blocks per thread to measure bandwidth. + --lat-array LAT_ARRAY_SIZE: Array size in 1KB blocks to measure latency. + --lat-accesses LAT_ACCESSES: Number of accesses to measure latency. + --lat-seq : Measure latency using sequential address on a 64B basic. + --l2tol2-array L2toL2_ARRAY_SIZE: Array size in 1KB blocks to measure L2 to L2 latency. + --iter ITERATIONS: Number of iterations to measure performance. + --internal-iter INTERNAL_ITER: Internal iteration for bandwidth loop to guarantee stable results. + --internal-iter-lat INTERNAL_ITER_LAT: Internal iteration for latency loop to guarantee stable results. + --pattern PATTERN: Type of pattern, 0 is fixed, 1 is maximum switch and 2 is fully random. + --enable-lat ENABLE_LAT: Measure latency accessing to a vector randomly + --enable-l2tol2 ENABLE_L2toL2: Measure latency accessing remote l2 vector randomly. This test disables all other + --verbose VERBOSE: 0 to no verborse level, 1 is basic verbose level to check synch points and 2 is for debug. + --limit-time SECONDS: Number of seconds to run per iteration + --kernel KERNEL: 0 is READ + 1 is COPY + 2 is SCALE + 3 is ADD + 4 is TRIAD + 5 is WRITE + 6 is ALL (0-5) + 7 is NONE (ENABLE_LAT is required on this mode) + 8 is 1R1W + 9 is 2R1W + --total-size-kb TOTAL_SIZE: Total Array size in KB to measure bandwidth, the tool will split it across all threads. + --total-size-mb TOTAL_SIZE: Total Array size in MB to measure bandwidth, the tool will split it across all threads. + --total-size-gb TOTAL_SIZE: Total Array size in GB to measure bandwidth, the tool will split it across all threads. + --use-scalar: This parameters enables standard scalar kernels + --use-sve: This parameters enables SVE kernels + --use-neon: This parameters enables NEON 8.1 kernels (default) + --use-numa: This parameters forces each thread to alloc the memory to be NUMA aware + --numa-sequential: This parameters forces NUMA0 until it's fully used + --numa-inverted: This parameters forces memory allocation on the oposite NUMA node + --numa-alloc node_id: This parameters forces memory allocation on a fix NUMA node + --silent: This parameters disables all the outputs except the performance report + --perCoreReport: This parameters the performance report per core + --streams-seq: This parameters runs the streams kernels in the same order + --help provides this info From 4a25158560e30af03bf194a4f56aed0cbf8fc293 Mon Sep 17 00:00:00 2001 From: saibulusu Date: Thu, 28 May 2026 15:39:46 -0700 Subject: [PATCH 2/8] excluding triad & msft --- .../StreamProfileTests.cs | 166 ------------------ .../profiles/PERF-MEM-STREAMMSFT.json | 53 ------ .../profiles/PERF-MEM-STREAMTRIAD.json | 36 ---- .../docs/workloads/stream/stream-metrics.md | 4 +- .../docs/workloads/stream/stream-profiles.md | 101 ----------- .../workloads/stream/stream-supplemental.md | 43 ----- website/docs/workloads/stream/stream.md | 4 +- 7 files changed, 4 insertions(+), 403 deletions(-) delete mode 100644 src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMMSFT.json delete mode 100644 src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMTRIAD.json diff --git a/src/VirtualClient/VirtualClient.Actions.FunctionalTests/StreamProfileTests.cs b/src/VirtualClient/VirtualClient.Actions.FunctionalTests/StreamProfileTests.cs index ccb8d91e46..3b9104b293 100644 --- a/src/VirtualClient/VirtualClient.Actions.FunctionalTests/StreamProfileTests.cs +++ b/src/VirtualClient/VirtualClient.Actions.FunctionalTests/StreamProfileTests.cs @@ -28,146 +28,6 @@ public void SetupFixture() ComponentTypeCache.Instance.LoadComponentTypes(TestDependencies.TestDirectory); } - [Test] - [TestCase("PERF-MEM-STREAMTRIAD.json")] - public void StreamTriadWorkloadProfileParametersAreInlinedCorrectly(string profile) - { - this.mockFixture.Setup(PlatformID.Unix); - using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) - { - WorkloadAssert.ParameterReferencesInlined(executor.Profile); - } - } - - [Test] - [TestCase("PERF-MEM-STREAMTRIAD.json")] - public async Task StreamTriadWorkloadProfileExecutesTheExpectedWorkloadsOnUnixPlatform(string profile) - { - IEnumerable expectedCommands = StreamProfileTests.GetStreamTriadProfileExpectedCommands(); - - // Setup the expectations for the workload - // - Workload package is installed and exists. - // - The workload generates valid results. - this.mockFixture.Setup(PlatformID.Unix); - this.mockFixture.SystemManagement.Setup(mgr => mgr.GetCpuInfoAsync(It.IsAny())) - .ReturnsAsync(new CpuInfo("Name", "Description", 1, 2, 1, 1, true)); - - this.mockFixture.SetupPackage("stream", expectedFiles: "linux-x64/stream"); - - this.mockFixture.ProcessManager.OnCreateProcess = (command, arguments, workingDir) => - { - IProcessProxy process = this.mockFixture.CreateProcess(command, arguments, workingDir); - if (arguments.Contains("Stream", StringComparison.OrdinalIgnoreCase)) - { - process.StandardOutput.Append(TestDependencies.GetResourceFileContents("Results_Stream.txt")); - } - else if (arguments.Contains("lscpu | grep 'Flags'")) - { - process.StandardOutput.AppendLine("Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves avx512vbmi"); - } - - return process; - }; - - using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) - { - executor.ExecuteDependencies = false; - await executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None).ConfigureAwait(false); - await executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None).ConfigureAwait(false); - - WorkloadAssert.CommandsExecuted(this.mockFixture, expectedCommands.ToArray()); - } - } - - [Test] - [Ignore("We need to rethink how to do dependency testing with extension model.")] - [TestCase("PERF-MEM-STREAMTRIAD.json")] - public async Task StreamTriadWorkloadProfileInstallsTheExpectedDependenciesOnUnixPlatform(string profile) - { - // The setup in a typical Azure VM scenario - this.mockFixture.Setup(PlatformID.Unix); - - using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies, dependenciesOnly: true)) - { - executor.ExecuteDependencies = false; - await executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None).ConfigureAwait(false); - - // Workload dependency package expectations - // The workload dependency package should have been installed at this point. - WorkloadAssert.WorkloadPackageInstalled(this.mockFixture, "stream"); - } - } - - [Test] - [TestCase("PERF-MEM-STREAMTRIAD.json")] - public void StreamTriadProfileActionsWillNotBeExecutedIfTheWorkloadPackageDoesNotExist(string profile) - { - this.mockFixture.Setup(PlatformID.Unix); - this.mockFixture.SystemManagement.Setup(mgr => mgr.GetCpuInfoAsync(It.IsAny())) - .ReturnsAsync(new CpuInfo("Name", "Description", 1, 2, 1, 1, true)); - - // We ensure the workload package does not exist. - this.mockFixture.PackageManager.Clear(); - - using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) - { - executor.ExecuteDependencies = false; - - DependencyException error = Assert.ThrowsAsync(() => executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None)); - Assert.IsTrue(error.Reason == ErrorReason.WorkloadDependencyMissing); - } - } - - [Test] - [TestCase("PERF-MEM-STREAMMSFT.json")] - public void StreamMsftWorkloadProfileParametersAreInlinedCorrectly(string profile) - { - this.mockFixture.Setup(PlatformID.Unix); - using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) - { - WorkloadAssert.ParameterReferencesInlined(executor.Profile); - } - } - - [Test] - [TestCase("PERF-MEM-STREAMMSFT.json")] - public async Task StreamMsftWorkloadProfileExecutesTheExpectedWorkloadsOnUnixPlatform(string profile) - { - IEnumerable expectedCommands = StreamProfileTests.GetStreamMsftProfileExpectedCommands(); - - // Setup the expectations for the workload - // - Workload package is installed and exists. - // - The workload generates valid results. - this.mockFixture.Setup(PlatformID.Unix, Architecture.Arm64); - this.mockFixture.SetupPackage("streammsft", expectedFiles: "linux-arm64/stream"); - - this.mockFixture.SystemManagement.Setup(mgr => mgr.GetCpuInfoAsync(It.IsAny())) - .ReturnsAsync(new CpuInfo("Name", "Description", 1, 2, 1, 1, true)); - - this.mockFixture.ProcessManager.OnCreateProcess = (command, arguments, workingDir) => - { - IProcessProxy process = this.mockFixture.CreateProcess(command, arguments, workingDir); - if (arguments.Contains("perfrunner", StringComparison.OrdinalIgnoreCase)) - { - process.StandardOutput.Append(TestDependencies.GetResourceFileContents("Results_StreamMsft.txt")); - } - else if (arguments.Contains("make", StringComparison.OrdinalIgnoreCase)) - { - // Make command should succeed without output - } - - return process; - }; - - using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) - { - executor.ExecuteDependencies = false; - await executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None).ConfigureAwait(false); - - WorkloadAssert.CommandsExecuted(this.mockFixture, expectedCommands.ToArray()); - } - } - [Test] [TestCase("PERF-MEM-STREAM.json")] public void StreamWorkloadProfileParametersAreInlinedCorrectly(string profile) @@ -247,15 +107,6 @@ public void StreamProfileActionsWillNotBeExecutedIfTheWorkloadPackageDoesNotExis } } - private static IEnumerable GetStreamTriadProfileExpectedCommands() - { - return new List - { - "bash -c \"lscpu \\| grep 'Flags'\"", - "bash -c \"export KMP_AFFINITY=.*&& export OMP_NUM_THREADS=.*&& export LD_LIBRARY_PATH=.*&& chmod \\+x.*&&.*Stream.*\"" - }; - } - private static IEnumerable GetStreamProfileExpectedCommands() { return new List @@ -264,22 +115,5 @@ private static IEnumerable GetStreamProfileExpectedCommands() "bash -c \"export OMP_NUM_THREADS=.*&&.*chmod.*\\+x.*streamworkload.*&&.*streamworkload.*\"", }; } - - private static IEnumerable GetStreamMsftProfileExpectedCommands() - { - return new List - { - "bash.*make", - "bash.*perfrunner.*--threads.*--internal-iter", - }; - } - - private static IEnumerable GetStreamWindowsProfileExpectedCommands() - { - return new List - { - "cmd\\.exe.*stream\\.exe.*-n 50.*-s 320000000", - }; - } } } \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMMSFT.json b/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMMSFT.json deleted file mode 100644 index 028c3caa67..0000000000 --- a/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMMSFT.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "Description": "Microsoft's Stream Performance Workload", - "Metadata": { - "DocumentationLink": "https://msazure.visualstudio.com/One/_git/CRC-AIR-Workloads?version=GBmaster&path=/docs/workloads", - "RecommendedMinimumExecutionTime": "00:10:00", - "SupportedPlatforms": "linux-arm64", - "SupportedOperatingSystems": "Ubuntu", - "SupportedValuesOfToolset": "STREAM,STREAMTriad,STREAMMsft" - }, - "Parameters": { - "CompilerVersion": "", - "CommandLineParameters": "--internal-iter 1000 --internal-iter-lat 1000", - "ThreadCount": "{PhysicalCoreCount}" - }, - "Actions": [ - { - "Type": "StreamExecutor", - "Parameters": { - "Scenario": "STREAM-MSFT", - "Toolset": "STREAMMSFT", - "PackageName": "streammsft", - "CommandLineParameters": "$.Parameters.CommandLineParameters", - "ThreadCount": "$.Parameters.ThreadCount" - } - } - ], - "Dependencies": [ - { - "Type": "CompilerInstallation", - "Parameters": { - "Scenario": "InstallCompiler", - "CompilerVersion": "$.Parameters.CompilerVersion" - } - }, - { - "Type": "LinuxPackageInstallation", - "Parameters": { - "Scenario": "InstallLinuxPackages", - "Packages": "make,libnuma-dev" - } - }, - { - "Type": "DependencyPackageInstallation", - "Parameters": { - "Scenario": "InstallStreamPackage", - "BlobContainer": "packages", - "BlobName": "streammsft.1.0.0.zip", - "PackageName": "streammsft", - "Extract": true - } - } - ] -} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMTRIAD.json b/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMTRIAD.json deleted file mode 100644 index a8387ec9ee..0000000000 --- a/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMTRIAD.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "Description": "Stream Performance Workload", - "Metadata": { - "DocumentationLink": "https://msazure.visualstudio.com/One/_git/CRC-AIR-Workloads?version=GBmaster&path=/docs/workloads", - "RecommendedMinimumExecutionTime": "00:10:00", - "SupportedPlatforms": "linux-x64", - "SupportedOperatingSystems": "Ubuntu", - "SupportedValuesOfToolset": "STREAM,STREAMTriad,STREAMMsft" - }, - "Parameters": { - "ThreadCount": "{PhysicalCoreCount}" - }, - "Actions": [ - { - "Type": "StreamExecutor", - "Parameters": { - "Scenario": "STREAM-Triad", - "Toolset": "STREAMTriad", - "PackageName": "stream", - "ThreadCount": "$.Parameters.ThreadCount" - } - } - ], - "Dependencies": [ - { - "Type": "DependencyPackageInstallation", - "Parameters": { - "Scenario": "InstallStreamPackage", - "BlobContainer": "packages", - "BlobName": "stream.1.0.0-1.zip", - "PackageName": "stream", - "Extract": true - } - } - ] -} \ No newline at end of file diff --git a/website/docs/workloads/stream/stream-metrics.md b/website/docs/workloads/stream/stream-metrics.md index 51d0988c9a..e32d85ea86 100644 --- a/website/docs/workloads/stream/stream-metrics.md +++ b/website/docs/workloads/stream/stream-metrics.md @@ -13,7 +13,7 @@ that are available. ### Workload-Specific Metrics -The following metrics are emitted by the STREAM or STREAMTRIAD (Intel-specialized) workload itself. +The following metrics are emitted by the STREAM workload itself. | Metric Name | Example Value (min) | Example Value (max) | Example Value (avg) | Unit | |------------------------|---------------------|---------------------|---------------------|------| @@ -79,4 +79,4 @@ Column 2 : Avg Rate Column 3 : Min Rate Column 4 : Avg Latency Column 5 : Min Latency -Column 6 : Max Latency +Column 6 : Max Latency \ No newline at end of file diff --git a/website/docs/workloads/stream/stream-profiles.md b/website/docs/workloads/stream/stream-profiles.md index c559574966..410e13285d 100644 --- a/website/docs/workloads/stream/stream-profiles.md +++ b/website/docs/workloads/stream/stream-profiles.md @@ -82,107 +82,6 @@ Runs a Memory-intensive workload using the STREAM Benchmark to test the bandwidt ``` -### PERF-MEM-STREAMTRIAD.json -Runs a Memory-intensive workload using the STREAM Benchmark to test memory bandwidth. This profile is designed by the Intel team to -maximize the utilization of Intel processors. - -* **Supported Platform/Architectures** - * linux-x64 - -* **Supported Operating Systems** - * Ubuntu 18 - * Ubuntu 20 - * Ubuntu 22 - -* **Dependencies** - The following dependencies must be met to run this workload profile. - - * Workload package must exist in the 'packages' directory or connection information for the package store supplied on the command line (see 'Workload Packages' link above). - -* **Workload Runtimes** - The following timings represent the length of time required to run a single round of tests ran. These timings can be used to determine - minimum required runtimes for the Virtual Client in order to get results. These are estimates based on the use of prescribed VM SKUs. - It is practical to allow for minimum 1 to 2 hours extra runtime to ensure the tests can complete full test runs. - * Expected Runtime = 10 secs - -* **Usage Examples** - The following section provides a few basic examples of how to use the workload profile. Additional usage examples can be found in the - 'Usage Scenarios/Examples' link at the top. - -
- - ``` csharp - ./VirtualClient --profile=PERF-MEM-STREAMTRIAD.json --timeout=60 --packageStore="{BlobConnectionString|SAS Uri}" - ``` -
- -### PERF-MEM-STREAMMSFT.json -Runs a Memory-intensive workload using the STREAM Benchmark to test memory bandwidth. This profile is designed by Microsoft team to maximize the performance of 1P programs. - -* **Supported Platform/Architectures** - * linux-arm64 - -* **Supported Operating Systems** - * Ubuntu 18 - * Ubuntu 20 - * Ubuntu 22 - -* **Supported Compilers** - The following compilers are supported with the workload for this profile. See profile parameters and usage examples below. - - * G++ Compiler Versions = 8, 9, 10, 11 - -* **Dependencies** - The following dependencies must be met to run this workload profile. - - * Workload package must exist in the 'packages' directory or connection information for the package store supplied on the command line (see 'Workload Packages' link above). - -* **Workload Runtimes** - The following timings represent the length of time required to run a single round of tests ran. These timings can be used to determine - minimum required runtimes for the Virtual Client in order to get results. These are estimates based on the use of prescribed VM SKUs. - It is practical to allow for minimum 1 to 2 hours extra runtime to ensure the tests can complete full test runs. - * Expected Runtime = 10 secs - -* **Usage Examples** - The following section provides a few basic examples of how to use the workload profile. Additional usage examples can be found in the - 'Usage Scenarios/Examples' link at the top. - -* **Profile Parameters** - The following parameters can be optionally supplied on the command line to modify the behaviors of the workload. See the 'Usage Scenarios/Examples' above for examples on how to supply parameters to - Virtual Client profiles. - - | Parameter | Purpose |Default | - |---------------------------|---------------------------------------------------------------|-----------------------------------------------------------------------------| - | CompilerName | Not Required. Compiler used to compile. |gcc | - | CompilerVersion | Not Required. Compiler's Version to install. |10 | - | CommandLineParameters | Not Required. Parameters to be used in MSFT Stream. |--internal-iter 1000 --internal-iter-lat 1000| - | ThreadCount | Not Required. Number of threads use to run the workload | No. of Physical Cores. | - -* **Component/Action Parameters** - The following parameters are available in the profile components/actions. - - | Parameter | Purpose |Default | - |---------------------------|-------------------------------------------------------------------------------------------------------------------------|-------------| - | CompilerName | Not Required. Compiler used to compile. |gcc | - | CompilerVersion | Not Required. Compiler's Version to install. |10 | - | CommandLineParameters | Not Required. Parameters to be used in MSFT Stream. |--internal-iter 1000 --internal-iter-lat 1000| - | ThreadCount | Not Required. Number of threads use to run the workload | No. of Physical Cores. | - | Toolset | Defines the STREAM toolset to use. Valid values include: STREAM , STREAMTriad and STREAMMsft. Note that the STREAMTriad toolset can be used on Intel CPU systems only. | STREAMMSFT | -Note: The default parameters are according to the parameters documentation inorder to have stable results. -[Msft Stream Parameters](./stream.md) - -* **Make file for Msft Stream with** - -[MakeFile](./streammsftmakefile.txt) - - -
- - ``` csharp - ./VirtualClient --profile=PERF-MEM-STREAMMSFT.json --timeout=60 --packageStore="{BlobConnectionString|SAS Uri}" - ``` -
------------------------------------------------------------------------ ### Resources diff --git a/website/docs/workloads/stream/stream-supplemental.md b/website/docs/workloads/stream/stream-supplemental.md index 2d2b497fb3..d63f05fed6 100644 --- a/website/docs/workloads/stream/stream-supplemental.md +++ b/website/docs/workloads/stream/stream-supplemental.md @@ -60,49 +60,6 @@ The following configurations are general recommendations for use when running th * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v3/v4, Standard_E64_v3/v4, Standard_F64_v2 * Test/QoS = 1 x 16-core -> Standard_D16_v3/v4, Standard_E16_v3/v4, Standard_F16_v2

- * Intel Gen8 (Icelake) Hardware - * Virtual Machines (per node) - * Firmware/Hardware Validations (ideal) = 1 x 96-core -> Standard_D96_v5, Standard_E96_v5, Standard_F96_v2 - * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v4/v5, Standard_E64_v4/v5, Standard_F64_v2 - * Test/QoS = 1 x 16-core -> Standard_D16_v4/v5, Standard_E16_v4/v5, Standard_F16_v2 - -### PERF-MEM-STREAMTRIAD.json -The following configurations are general recommendations for use when running this profile on cloud hardware systems and virtual machines. - -* **Recommended Configurations** - Note that the term "cores" as used below in describing VM specifications should be inferred as synonymous with the term virtual CPU (vCPU). The configurations - below cover those used by the CRC team for running this workload as part of the Virtual Client platform. These come from recommendations and empirical - evidence from running on Azure cloud systems and are designed to mimic "customer-representative" scenarios or to utilize/stress the physical nodes/systems. - These configurations have generally proven to be well-suited for net impact analysis on systems where a change is being applied to the physical hardware - (e.g. a firmware update). - - * Operating System (unless otherwise specified below) - * Linux Scenarios - * Publisher: Canonical - * Offer: UbuntuServer - * Sku: 18.04-LTS - * Version: latest - * Intel Gen5 (Broadwell) Hardware - * Virtual Machines (per node) - * Firmware/Hardware Validations = 1 x 48-core -> Standard_D48_v3, Standard_E48_v3, Standard_F48_v2 - * Test/QoS = 1 x 16-core -> Standard_D16_v3, Standard_E16_v3, Standard_F16 -

- * Intel Gen6 (Coffee Lake) Hardware - * Virtual Machines (per node) - * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v3, Standard_E64_v3, Standard_F64_v2 - * Test/QoS = 1 x 16-core -> Standard_D16_v3, Standard_E16_v3, Standard_F16_v2 -

- * Intel Gen6 (Skylake) Hardware - * Virtual Machines (per node) - * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v3, Standard_E64_v3, Standard_F64_v2 - * Test/QoS = 1 x 16-core -> Standard_D16_v3, Standard_E16_v3, Standard_F16_v2 -

- * Intel Gen7 (Cascade Lake) Hardware - * Virtual Machines (per node) - * Firmware/Hardware Validations (ideal) = 1 x 72-core -> Standard_D72_v5, Standard_E72_v5, Standard_F72_v2 - * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v3/v4, Standard_E64_v3/v4, Standard_F64_v2 - * Test/QoS = 1 x 16-core -> Standard_D16_v3/v4, Standard_E16_v3/v4, Standard_F16_v2 -

* Intel Gen8 (Icelake) Hardware * Virtual Machines (per node) * Firmware/Hardware Validations (ideal) = 1 x 96-core -> Standard_D96_v5, Standard_E96_v5, Standard_F96_v2 diff --git a/website/docs/workloads/stream/stream.md b/website/docs/workloads/stream/stream.md index 0636835984..ba5e62aa89 100644 --- a/website/docs/workloads/stream/stream.md +++ b/website/docs/workloads/stream/stream.md @@ -26,7 +26,7 @@ The following performance analysis tests are ran as part of the STREAM workload. * linux-x64 * linux-arm64 -### MSFT STREAM Parameters (Can't be used by STREAM and STREAMTriad) : +### MSFT STREAM Parameters: --rnd-threads RND_THREADS: Number of total threads to randomize the work. --threads THREADS: Number of threads to split the work. --lat-threads LAT_THREADS: Number of threads running latency test @@ -67,4 +67,4 @@ The following performance analysis tests are ran as part of the STREAM workload. --silent: This parameters disables all the outputs except the performance report --perCoreReport: This parameters the performance report per core --streams-seq: This parameters runs the streams kernels in the same order - --help provides this info + --help provides this info \ No newline at end of file From 4f83b4f20a872289a96ece7aaaa67ef116db22c5 Mon Sep 17 00:00:00 2001 From: saibulusu Date: Thu, 28 May 2026 15:57:08 -0700 Subject: [PATCH 3/8] Revert "excluding triad & msft" This reverts commit 4a25158560e30af03bf194a4f56aed0cbf8fc293. --- .../StreamProfileTests.cs | 166 ++++++++++++++++++ .../profiles/PERF-MEM-STREAMMSFT.json | 53 ++++++ .../profiles/PERF-MEM-STREAMTRIAD.json | 36 ++++ .../docs/workloads/stream/stream-metrics.md | 4 +- .../docs/workloads/stream/stream-profiles.md | 101 +++++++++++ .../workloads/stream/stream-supplemental.md | 43 +++++ website/docs/workloads/stream/stream.md | 4 +- 7 files changed, 403 insertions(+), 4 deletions(-) create mode 100644 src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMMSFT.json create mode 100644 src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMTRIAD.json diff --git a/src/VirtualClient/VirtualClient.Actions.FunctionalTests/StreamProfileTests.cs b/src/VirtualClient/VirtualClient.Actions.FunctionalTests/StreamProfileTests.cs index 3b9104b293..ccb8d91e46 100644 --- a/src/VirtualClient/VirtualClient.Actions.FunctionalTests/StreamProfileTests.cs +++ b/src/VirtualClient/VirtualClient.Actions.FunctionalTests/StreamProfileTests.cs @@ -28,6 +28,146 @@ public void SetupFixture() ComponentTypeCache.Instance.LoadComponentTypes(TestDependencies.TestDirectory); } + [Test] + [TestCase("PERF-MEM-STREAMTRIAD.json")] + public void StreamTriadWorkloadProfileParametersAreInlinedCorrectly(string profile) + { + this.mockFixture.Setup(PlatformID.Unix); + using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) + { + WorkloadAssert.ParameterReferencesInlined(executor.Profile); + } + } + + [Test] + [TestCase("PERF-MEM-STREAMTRIAD.json")] + public async Task StreamTriadWorkloadProfileExecutesTheExpectedWorkloadsOnUnixPlatform(string profile) + { + IEnumerable expectedCommands = StreamProfileTests.GetStreamTriadProfileExpectedCommands(); + + // Setup the expectations for the workload + // - Workload package is installed and exists. + // - The workload generates valid results. + this.mockFixture.Setup(PlatformID.Unix); + this.mockFixture.SystemManagement.Setup(mgr => mgr.GetCpuInfoAsync(It.IsAny())) + .ReturnsAsync(new CpuInfo("Name", "Description", 1, 2, 1, 1, true)); + + this.mockFixture.SetupPackage("stream", expectedFiles: "linux-x64/stream"); + + this.mockFixture.ProcessManager.OnCreateProcess = (command, arguments, workingDir) => + { + IProcessProxy process = this.mockFixture.CreateProcess(command, arguments, workingDir); + if (arguments.Contains("Stream", StringComparison.OrdinalIgnoreCase)) + { + process.StandardOutput.Append(TestDependencies.GetResourceFileContents("Results_Stream.txt")); + } + else if (arguments.Contains("lscpu | grep 'Flags'")) + { + process.StandardOutput.AppendLine("Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves avx512vbmi"); + } + + return process; + }; + + using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) + { + executor.ExecuteDependencies = false; + await executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None).ConfigureAwait(false); + await executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None).ConfigureAwait(false); + + WorkloadAssert.CommandsExecuted(this.mockFixture, expectedCommands.ToArray()); + } + } + + [Test] + [Ignore("We need to rethink how to do dependency testing with extension model.")] + [TestCase("PERF-MEM-STREAMTRIAD.json")] + public async Task StreamTriadWorkloadProfileInstallsTheExpectedDependenciesOnUnixPlatform(string profile) + { + // The setup in a typical Azure VM scenario + this.mockFixture.Setup(PlatformID.Unix); + + using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies, dependenciesOnly: true)) + { + executor.ExecuteDependencies = false; + await executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None).ConfigureAwait(false); + + // Workload dependency package expectations + // The workload dependency package should have been installed at this point. + WorkloadAssert.WorkloadPackageInstalled(this.mockFixture, "stream"); + } + } + + [Test] + [TestCase("PERF-MEM-STREAMTRIAD.json")] + public void StreamTriadProfileActionsWillNotBeExecutedIfTheWorkloadPackageDoesNotExist(string profile) + { + this.mockFixture.Setup(PlatformID.Unix); + this.mockFixture.SystemManagement.Setup(mgr => mgr.GetCpuInfoAsync(It.IsAny())) + .ReturnsAsync(new CpuInfo("Name", "Description", 1, 2, 1, 1, true)); + + // We ensure the workload package does not exist. + this.mockFixture.PackageManager.Clear(); + + using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) + { + executor.ExecuteDependencies = false; + + DependencyException error = Assert.ThrowsAsync(() => executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None)); + Assert.IsTrue(error.Reason == ErrorReason.WorkloadDependencyMissing); + } + } + + [Test] + [TestCase("PERF-MEM-STREAMMSFT.json")] + public void StreamMsftWorkloadProfileParametersAreInlinedCorrectly(string profile) + { + this.mockFixture.Setup(PlatformID.Unix); + using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) + { + WorkloadAssert.ParameterReferencesInlined(executor.Profile); + } + } + + [Test] + [TestCase("PERF-MEM-STREAMMSFT.json")] + public async Task StreamMsftWorkloadProfileExecutesTheExpectedWorkloadsOnUnixPlatform(string profile) + { + IEnumerable expectedCommands = StreamProfileTests.GetStreamMsftProfileExpectedCommands(); + + // Setup the expectations for the workload + // - Workload package is installed and exists. + // - The workload generates valid results. + this.mockFixture.Setup(PlatformID.Unix, Architecture.Arm64); + this.mockFixture.SetupPackage("streammsft", expectedFiles: "linux-arm64/stream"); + + this.mockFixture.SystemManagement.Setup(mgr => mgr.GetCpuInfoAsync(It.IsAny())) + .ReturnsAsync(new CpuInfo("Name", "Description", 1, 2, 1, 1, true)); + + this.mockFixture.ProcessManager.OnCreateProcess = (command, arguments, workingDir) => + { + IProcessProxy process = this.mockFixture.CreateProcess(command, arguments, workingDir); + if (arguments.Contains("perfrunner", StringComparison.OrdinalIgnoreCase)) + { + process.StandardOutput.Append(TestDependencies.GetResourceFileContents("Results_StreamMsft.txt")); + } + else if (arguments.Contains("make", StringComparison.OrdinalIgnoreCase)) + { + // Make command should succeed without output + } + + return process; + }; + + using (ProfileExecutor executor = TestDependencies.CreateProfileExecutor(profile, this.mockFixture.Dependencies)) + { + executor.ExecuteDependencies = false; + await executor.ExecuteAsync(ProfileTiming.OneIteration(), CancellationToken.None).ConfigureAwait(false); + + WorkloadAssert.CommandsExecuted(this.mockFixture, expectedCommands.ToArray()); + } + } + [Test] [TestCase("PERF-MEM-STREAM.json")] public void StreamWorkloadProfileParametersAreInlinedCorrectly(string profile) @@ -107,6 +247,15 @@ public void StreamProfileActionsWillNotBeExecutedIfTheWorkloadPackageDoesNotExis } } + private static IEnumerable GetStreamTriadProfileExpectedCommands() + { + return new List + { + "bash -c \"lscpu \\| grep 'Flags'\"", + "bash -c \"export KMP_AFFINITY=.*&& export OMP_NUM_THREADS=.*&& export LD_LIBRARY_PATH=.*&& chmod \\+x.*&&.*Stream.*\"" + }; + } + private static IEnumerable GetStreamProfileExpectedCommands() { return new List @@ -115,5 +264,22 @@ private static IEnumerable GetStreamProfileExpectedCommands() "bash -c \"export OMP_NUM_THREADS=.*&&.*chmod.*\\+x.*streamworkload.*&&.*streamworkload.*\"", }; } + + private static IEnumerable GetStreamMsftProfileExpectedCommands() + { + return new List + { + "bash.*make", + "bash.*perfrunner.*--threads.*--internal-iter", + }; + } + + private static IEnumerable GetStreamWindowsProfileExpectedCommands() + { + return new List + { + "cmd\\.exe.*stream\\.exe.*-n 50.*-s 320000000", + }; + } } } \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMMSFT.json b/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMMSFT.json new file mode 100644 index 0000000000..028c3caa67 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMMSFT.json @@ -0,0 +1,53 @@ +{ + "Description": "Microsoft's Stream Performance Workload", + "Metadata": { + "DocumentationLink": "https://msazure.visualstudio.com/One/_git/CRC-AIR-Workloads?version=GBmaster&path=/docs/workloads", + "RecommendedMinimumExecutionTime": "00:10:00", + "SupportedPlatforms": "linux-arm64", + "SupportedOperatingSystems": "Ubuntu", + "SupportedValuesOfToolset": "STREAM,STREAMTriad,STREAMMsft" + }, + "Parameters": { + "CompilerVersion": "", + "CommandLineParameters": "--internal-iter 1000 --internal-iter-lat 1000", + "ThreadCount": "{PhysicalCoreCount}" + }, + "Actions": [ + { + "Type": "StreamExecutor", + "Parameters": { + "Scenario": "STREAM-MSFT", + "Toolset": "STREAMMSFT", + "PackageName": "streammsft", + "CommandLineParameters": "$.Parameters.CommandLineParameters", + "ThreadCount": "$.Parameters.ThreadCount" + } + } + ], + "Dependencies": [ + { + "Type": "CompilerInstallation", + "Parameters": { + "Scenario": "InstallCompiler", + "CompilerVersion": "$.Parameters.CompilerVersion" + } + }, + { + "Type": "LinuxPackageInstallation", + "Parameters": { + "Scenario": "InstallLinuxPackages", + "Packages": "make,libnuma-dev" + } + }, + { + "Type": "DependencyPackageInstallation", + "Parameters": { + "Scenario": "InstallStreamPackage", + "BlobContainer": "packages", + "BlobName": "streammsft.1.0.0.zip", + "PackageName": "streammsft", + "Extract": true + } + } + ] +} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMTRIAD.json b/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMTRIAD.json new file mode 100644 index 0000000000..a8387ec9ee --- /dev/null +++ b/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMTRIAD.json @@ -0,0 +1,36 @@ +{ + "Description": "Stream Performance Workload", + "Metadata": { + "DocumentationLink": "https://msazure.visualstudio.com/One/_git/CRC-AIR-Workloads?version=GBmaster&path=/docs/workloads", + "RecommendedMinimumExecutionTime": "00:10:00", + "SupportedPlatforms": "linux-x64", + "SupportedOperatingSystems": "Ubuntu", + "SupportedValuesOfToolset": "STREAM,STREAMTriad,STREAMMsft" + }, + "Parameters": { + "ThreadCount": "{PhysicalCoreCount}" + }, + "Actions": [ + { + "Type": "StreamExecutor", + "Parameters": { + "Scenario": "STREAM-Triad", + "Toolset": "STREAMTriad", + "PackageName": "stream", + "ThreadCount": "$.Parameters.ThreadCount" + } + } + ], + "Dependencies": [ + { + "Type": "DependencyPackageInstallation", + "Parameters": { + "Scenario": "InstallStreamPackage", + "BlobContainer": "packages", + "BlobName": "stream.1.0.0-1.zip", + "PackageName": "stream", + "Extract": true + } + } + ] +} \ No newline at end of file diff --git a/website/docs/workloads/stream/stream-metrics.md b/website/docs/workloads/stream/stream-metrics.md index e32d85ea86..51d0988c9a 100644 --- a/website/docs/workloads/stream/stream-metrics.md +++ b/website/docs/workloads/stream/stream-metrics.md @@ -13,7 +13,7 @@ that are available. ### Workload-Specific Metrics -The following metrics are emitted by the STREAM workload itself. +The following metrics are emitted by the STREAM or STREAMTRIAD (Intel-specialized) workload itself. | Metric Name | Example Value (min) | Example Value (max) | Example Value (avg) | Unit | |------------------------|---------------------|---------------------|---------------------|------| @@ -79,4 +79,4 @@ Column 2 : Avg Rate Column 3 : Min Rate Column 4 : Avg Latency Column 5 : Min Latency -Column 6 : Max Latency \ No newline at end of file +Column 6 : Max Latency diff --git a/website/docs/workloads/stream/stream-profiles.md b/website/docs/workloads/stream/stream-profiles.md index 410e13285d..c559574966 100644 --- a/website/docs/workloads/stream/stream-profiles.md +++ b/website/docs/workloads/stream/stream-profiles.md @@ -82,6 +82,107 @@ Runs a Memory-intensive workload using the STREAM Benchmark to test the bandwidt ``` +### PERF-MEM-STREAMTRIAD.json +Runs a Memory-intensive workload using the STREAM Benchmark to test memory bandwidth. This profile is designed by the Intel team to +maximize the utilization of Intel processors. + +* **Supported Platform/Architectures** + * linux-x64 + +* **Supported Operating Systems** + * Ubuntu 18 + * Ubuntu 20 + * Ubuntu 22 + +* **Dependencies** + The following dependencies must be met to run this workload profile. + + * Workload package must exist in the 'packages' directory or connection information for the package store supplied on the command line (see 'Workload Packages' link above). + +* **Workload Runtimes** + The following timings represent the length of time required to run a single round of tests ran. These timings can be used to determine + minimum required runtimes for the Virtual Client in order to get results. These are estimates based on the use of prescribed VM SKUs. + It is practical to allow for minimum 1 to 2 hours extra runtime to ensure the tests can complete full test runs. + * Expected Runtime = 10 secs + +* **Usage Examples** + The following section provides a few basic examples of how to use the workload profile. Additional usage examples can be found in the + 'Usage Scenarios/Examples' link at the top. + +
+ + ``` csharp + ./VirtualClient --profile=PERF-MEM-STREAMTRIAD.json --timeout=60 --packageStore="{BlobConnectionString|SAS Uri}" + ``` +
+ +### PERF-MEM-STREAMMSFT.json +Runs a Memory-intensive workload using the STREAM Benchmark to test memory bandwidth. This profile is designed by Microsoft team to maximize the performance of 1P programs. + +* **Supported Platform/Architectures** + * linux-arm64 + +* **Supported Operating Systems** + * Ubuntu 18 + * Ubuntu 20 + * Ubuntu 22 + +* **Supported Compilers** + The following compilers are supported with the workload for this profile. See profile parameters and usage examples below. + + * G++ Compiler Versions = 8, 9, 10, 11 + +* **Dependencies** + The following dependencies must be met to run this workload profile. + + * Workload package must exist in the 'packages' directory or connection information for the package store supplied on the command line (see 'Workload Packages' link above). + +* **Workload Runtimes** + The following timings represent the length of time required to run a single round of tests ran. These timings can be used to determine + minimum required runtimes for the Virtual Client in order to get results. These are estimates based on the use of prescribed VM SKUs. + It is practical to allow for minimum 1 to 2 hours extra runtime to ensure the tests can complete full test runs. + * Expected Runtime = 10 secs + +* **Usage Examples** + The following section provides a few basic examples of how to use the workload profile. Additional usage examples can be found in the + 'Usage Scenarios/Examples' link at the top. + +* **Profile Parameters** + The following parameters can be optionally supplied on the command line to modify the behaviors of the workload. See the 'Usage Scenarios/Examples' above for examples on how to supply parameters to + Virtual Client profiles. + + | Parameter | Purpose |Default | + |---------------------------|---------------------------------------------------------------|-----------------------------------------------------------------------------| + | CompilerName | Not Required. Compiler used to compile. |gcc | + | CompilerVersion | Not Required. Compiler's Version to install. |10 | + | CommandLineParameters | Not Required. Parameters to be used in MSFT Stream. |--internal-iter 1000 --internal-iter-lat 1000| + | ThreadCount | Not Required. Number of threads use to run the workload | No. of Physical Cores. | + +* **Component/Action Parameters** + The following parameters are available in the profile components/actions. + + | Parameter | Purpose |Default | + |---------------------------|-------------------------------------------------------------------------------------------------------------------------|-------------| + | CompilerName | Not Required. Compiler used to compile. |gcc | + | CompilerVersion | Not Required. Compiler's Version to install. |10 | + | CommandLineParameters | Not Required. Parameters to be used in MSFT Stream. |--internal-iter 1000 --internal-iter-lat 1000| + | ThreadCount | Not Required. Number of threads use to run the workload | No. of Physical Cores. | + | Toolset | Defines the STREAM toolset to use. Valid values include: STREAM , STREAMTriad and STREAMMsft. Note that the STREAMTriad toolset can be used on Intel CPU systems only. | STREAMMSFT | +Note: The default parameters are according to the parameters documentation inorder to have stable results. +[Msft Stream Parameters](./stream.md) + +* **Make file for Msft Stream with** + +[MakeFile](./streammsftmakefile.txt) + + +
+ + ``` csharp + ./VirtualClient --profile=PERF-MEM-STREAMMSFT.json --timeout=60 --packageStore="{BlobConnectionString|SAS Uri}" + ``` +
+----------------------------------------------------------------------- ### Resources diff --git a/website/docs/workloads/stream/stream-supplemental.md b/website/docs/workloads/stream/stream-supplemental.md index d63f05fed6..2d2b497fb3 100644 --- a/website/docs/workloads/stream/stream-supplemental.md +++ b/website/docs/workloads/stream/stream-supplemental.md @@ -60,6 +60,49 @@ The following configurations are general recommendations for use when running th * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v3/v4, Standard_E64_v3/v4, Standard_F64_v2 * Test/QoS = 1 x 16-core -> Standard_D16_v3/v4, Standard_E16_v3/v4, Standard_F16_v2

+ * Intel Gen8 (Icelake) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations (ideal) = 1 x 96-core -> Standard_D96_v5, Standard_E96_v5, Standard_F96_v2 + * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v4/v5, Standard_E64_v4/v5, Standard_F64_v2 + * Test/QoS = 1 x 16-core -> Standard_D16_v4/v5, Standard_E16_v4/v5, Standard_F16_v2 + +### PERF-MEM-STREAMTRIAD.json +The following configurations are general recommendations for use when running this profile on cloud hardware systems and virtual machines. + +* **Recommended Configurations** + Note that the term "cores" as used below in describing VM specifications should be inferred as synonymous with the term virtual CPU (vCPU). The configurations + below cover those used by the CRC team for running this workload as part of the Virtual Client platform. These come from recommendations and empirical + evidence from running on Azure cloud systems and are designed to mimic "customer-representative" scenarios or to utilize/stress the physical nodes/systems. + These configurations have generally proven to be well-suited for net impact analysis on systems where a change is being applied to the physical hardware + (e.g. a firmware update). + + * Operating System (unless otherwise specified below) + * Linux Scenarios + * Publisher: Canonical + * Offer: UbuntuServer + * Sku: 18.04-LTS + * Version: latest + * Intel Gen5 (Broadwell) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations = 1 x 48-core -> Standard_D48_v3, Standard_E48_v3, Standard_F48_v2 + * Test/QoS = 1 x 16-core -> Standard_D16_v3, Standard_E16_v3, Standard_F16 +

+ * Intel Gen6 (Coffee Lake) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v3, Standard_E64_v3, Standard_F64_v2 + * Test/QoS = 1 x 16-core -> Standard_D16_v3, Standard_E16_v3, Standard_F16_v2 +

+ * Intel Gen6 (Skylake) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v3, Standard_E64_v3, Standard_F64_v2 + * Test/QoS = 1 x 16-core -> Standard_D16_v3, Standard_E16_v3, Standard_F16_v2 +

+ * Intel Gen7 (Cascade Lake) Hardware + * Virtual Machines (per node) + * Firmware/Hardware Validations (ideal) = 1 x 72-core -> Standard_D72_v5, Standard_E72_v5, Standard_F72_v2 + * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v3/v4, Standard_E64_v3/v4, Standard_F64_v2 + * Test/QoS = 1 x 16-core -> Standard_D16_v3/v4, Standard_E16_v3/v4, Standard_F16_v2 +

* Intel Gen8 (Icelake) Hardware * Virtual Machines (per node) * Firmware/Hardware Validations (ideal) = 1 x 96-core -> Standard_D96_v5, Standard_E96_v5, Standard_F96_v2 diff --git a/website/docs/workloads/stream/stream.md b/website/docs/workloads/stream/stream.md index ba5e62aa89..0636835984 100644 --- a/website/docs/workloads/stream/stream.md +++ b/website/docs/workloads/stream/stream.md @@ -26,7 +26,7 @@ The following performance analysis tests are ran as part of the STREAM workload. * linux-x64 * linux-arm64 -### MSFT STREAM Parameters: +### MSFT STREAM Parameters (Can't be used by STREAM and STREAMTriad) : --rnd-threads RND_THREADS: Number of total threads to randomize the work. --threads THREADS: Number of threads to split the work. --lat-threads LAT_THREADS: Number of threads running latency test @@ -67,4 +67,4 @@ The following performance analysis tests are ran as part of the STREAM workload. --silent: This parameters disables all the outputs except the performance report --perCoreReport: This parameters the performance report per core --streams-seq: This parameters runs the streams kernels in the same order - --help provides this info \ No newline at end of file + --help provides this info From e2080b65583307c7793013e347bd71b4b49ba3e3 Mon Sep 17 00:00:00 2001 From: saibulusu Date: Fri, 29 May 2026 13:42:28 -0700 Subject: [PATCH 4/8] Documentation updates. --- .../VirtualClient.Documentation.csproj | 1 + .../profiles/PERF-MEM-STREAM.json | 118 ++++++++-------- .../docs/workloads/stream/stream-metrics.md | 19 +-- .../docs/workloads/stream/stream-profiles.md | 2 - .../workloads/stream/stream-supplemental.md | 46 +++++- website/docs/workloads/stream/stream.md | 133 ++++++++++-------- 6 files changed, 181 insertions(+), 138 deletions(-) diff --git a/src/VirtualClient/VirtualClient.Documentation/VirtualClient.Documentation.csproj b/src/VirtualClient/VirtualClient.Documentation/VirtualClient.Documentation.csproj index c3a2817621..5ed012791e 100644 --- a/src/VirtualClient/VirtualClient.Documentation/VirtualClient.Documentation.csproj +++ b/src/VirtualClient/VirtualClient.Documentation/VirtualClient.Documentation.csproj @@ -19,6 +19,7 @@ + diff --git a/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAM.json b/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAM.json index 63f9aebf01..c8bf76c8dc 100644 --- a/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAM.json +++ b/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAM.json @@ -1,68 +1,68 @@ { - "Description": "STREAM Performance Workload - Sustainable Memory Bandwidth in High Performance Computers Benchmark", - "Metadata": { + "Description": "STREAM Performance Workload - Sustainable Memory Bandwidth in High Performance Computers Benchmark", + "Metadata": { "DocumentationLink": "https://msazure.visualstudio.com/One/_git/CRC-AIR-Workloads?version=GBmaster&path=/docs/workloads", "RecommendedMinimumExecutionTime": "00:10:00", "SupportedPlatforms": "linux-x64,linux-arm64,win-x64,win-arm64", "SupportedOperatingSystems": "Windows,Ubuntu", "SupportedValuesOfToolset": "STREAM,STREAMTriad,STREAMMsft" - }, - "Parameters": { - "CompilerVersion": "", - "CompilerParameters": "-fopenmp -mcmodel=large -D_OPENMP -DNTIMES=5000 -DSTREAM_ARRAY_SIZE=100000000", - "ThreadCount": "{PhysicalCoreCount}", - "CommandArgumentsWindows": "-n 50 -s 320000000" - }, - "Actions": [ - { - "Type": "StreamExecutor", - "Parameters": { - "Scenario": "STREAM", - "Toolset": "STREAM", - "PackageName": "stream", - "CompilerParameters": "$.Parameters.CompilerParameters", - "ThreadCount": "$.Parameters.ThreadCount", - "CommandArgumentsWindows": "$.Parameters.CommandArgumentsWindows" - } - } - ], - "Dependencies": [ - { - "Type": "CompilerInstallation", - "Parameters": { - "Scenario": "InstallCompiler", - "SupportedPlatforms": "linux-x64,linux-arm64", - "CompilerVersion": "$.Parameters.CompilerVersion" - } }, - { - "Type": "DependencyPackageInstallation", - "Parameters": { - "Scenario": "InstallStreamPackage", - "SupportedPlatforms": "linux-x64,linux-arm64,win-x64,win-arm64", - "BlobContainer": "packages", - "BlobName": "stream.1.0.0-2.zip", - "PackageName": "stream", - "Extract": true - } - }, - { - "Type": "WgetPackageInstallation", - "Parameters": { - "Scenario": "Install_Visual_Cpp_Redistributable_x64", - "SupportedPlatforms": "win-x64", - "PackageName": "visual_c++_redistributable", - "PackageUri": "https://aka.ms/vs/17/release/vc_redist.x64.exe" - } + "Parameters": { + "CompilerVersion": "", + "CompilerParameters": "-fopenmp -mcmodel=large -D_OPENMP -DNTIMES=5000 -DSTREAM_ARRAY_SIZE=100000000", + "ThreadCount": "{PhysicalCoreCount}", + "CommandArgumentsWindows": "-n 50 -s 320000000" }, - { - "Type": "WgetPackageInstallation", - "Parameters": { - "Scenario": "Install_Visual_Cpp_Redistributable_arm64", - "SupportedPlatforms": "win-arm64", - "PackageName": "visual_c++_redistributable", - "PackageUri": "https://aka.ms/vs/17/release/vc_redist.arm64.exe" - } - } - ] + "Actions": [ + { + "Type": "StreamExecutor", + "Parameters": { + "Scenario": "STREAM", + "Toolset": "STREAM", + "PackageName": "stream", + "CompilerParameters": "$.Parameters.CompilerParameters", + "ThreadCount": "$.Parameters.ThreadCount", + "CommandArgumentsWindows": "$.Parameters.CommandArgumentsWindows" + } + } + ], + "Dependencies": [ + { + "Type": "CompilerInstallation", + "Parameters": { + "Scenario": "InstallCompiler", + "SupportedPlatforms": "linux-x64,linux-arm64", + "CompilerVersion": "$.Parameters.CompilerVersion" + } + }, + { + "Type": "DependencyPackageInstallation", + "Parameters": { + "Scenario": "InstallStreamPackage", + "SupportedPlatforms": "linux-x64,linux-arm64,win-x64,win-arm64", + "BlobContainer": "packages", + "BlobName": "stream.1.0.0-2.zip", + "PackageName": "stream", + "Extract": true + } + }, + { + "Type": "WgetPackageInstallation", + "Parameters": { + "Scenario": "Install_Visual_Cpp_Redistributable_x64", + "SupportedPlatforms": "win-x64", + "PackageName": "visual_c++_redistributable", + "PackageUri": "https://aka.ms/vs/17/release/vc_redist.x64.exe" + } + }, + { + "Type": "WgetPackageInstallation", + "Parameters": { + "Scenario": "Install_Visual_Cpp_Redistributable_arm64", + "SupportedPlatforms": "win-arm64", + "PackageName": "visual_c++_redistributable", + "PackageUri": "https://aka.ms/vs/17/release/vc_redist.arm64.exe" + } + } + ] } \ No newline at end of file diff --git a/website/docs/workloads/stream/stream-metrics.md b/website/docs/workloads/stream/stream-metrics.md index 51d0988c9a..eed410d4cd 100644 --- a/website/docs/workloads/stream/stream-metrics.md +++ b/website/docs/workloads/stream/stream-metrics.md @@ -1,19 +1,4 @@ -# STREAM Workload Metrics - -The following document illustrates the type of results that are emitted by the STREAM workload and captured by the -Virtual Client for net impact analysis. - -### System Metrics -Different metrics are captured from the system depending upon which monitor profiles are used. If a monitor profile is not -defined, the default MONITORS-DEFAULT.json profile is used. See the following documentation to determine monitor profiles -that are available. - -* [Monitor Profiles](https://github.com/microsoft/VirtualClient/blob/main/website/docs/monitors/monitor-profiles.md) -* [Monitor Profiles (internal only)](../../monitors/monitor-profiles.md) - -### Workload-Specific Metrics - -The following metrics are emitted by the STREAM or STREAMTRIAD (Intel-specialized) workload itself. +The following metrics are emitted by the STREAM or STREAMTRIAD (Intel-specialized) workload itself. | Metric Name | Example Value (min) | Example Value (max) | Example Value (avg) | Unit | |------------------------|---------------------|---------------------|---------------------|------| @@ -79,4 +64,4 @@ Column 2 : Avg Rate Column 3 : Min Rate Column 4 : Avg Latency Column 5 : Min Latency -Column 6 : Max Latency +Column 6 : Max Latency \ No newline at end of file diff --git a/website/docs/workloads/stream/stream-profiles.md b/website/docs/workloads/stream/stream-profiles.md index c559574966..9c63441977 100644 --- a/website/docs/workloads/stream/stream-profiles.md +++ b/website/docs/workloads/stream/stream-profiles.md @@ -54,8 +54,6 @@ Runs a Memory-intensive workload using the STREAM Benchmark to test the bandwidt | CompilerParameters | Not Required. Parameters use to compile the stream binary. |-fopenmp -mcmodel=large -D_OPENMP -DNTIMES=5000 -DSTREAM_ARRAY_SIZE=100000000| | Toolset | Defines the STREAM toolset to use. Valid values include: STREAM and STREAMTriad. Note that the STREAMTriad toolset can be used on Intel CPU systems only. | STREAM | - - * **Compiler Flags** | Parameter | Purpose | diff --git a/website/docs/workloads/stream/stream-supplemental.md b/website/docs/workloads/stream/stream-supplemental.md index 2d2b497fb3..74bfe4b9a6 100644 --- a/website/docs/workloads/stream/stream-supplemental.md +++ b/website/docs/workloads/stream/stream-supplemental.md @@ -107,4 +107,48 @@ The following configurations are general recommendations for use when running th * Virtual Machines (per node) * Firmware/Hardware Validations (ideal) = 1 x 96-core -> Standard_D96_v5, Standard_E96_v5, Standard_F96_v2 * Firmware/Hardware Validations = 1 x 64-core -> Standard_D64_v4/v5, Standard_E64_v4/v5, Standard_F64_v2 - * Test/QoS = 1 x 16-core -> Standard_D16_v4/v5, Standard_E16_v4/v5, Standard_F16_v2 \ No newline at end of file + * Test/QoS = 1 x 16-core -> Standard_D16_v4/v5, Standard_E16_v4/v5, Standard_F16_v2 + +### PERF-MEM-STREAMMSFT.json +MSFT STREAM Parameters (Can't be used by STREAM and STREAMTriad) : + --rnd-threads RND_THREADS: Number of total threads to randomize the work. + --threads THREADS: Number of threads to split the work. + --lat-threads LAT_THREADS: Number of threads running latency test + --l2tol2-threads L2toL@_THREADS: Number of threads running latency test from L2 to L2 + --bw-array BW_ARRAY_SIZE: Array size in 1KB blocks per thread to measure bandwidth. + --lat-array LAT_ARRAY_SIZE: Array size in 1KB blocks to measure latency. + --lat-accesses LAT_ACCESSES: Number of accesses to measure latency. + --lat-seq : Measure latency using sequential address on a 64B basic. + --l2tol2-array L2toL2_ARRAY_SIZE: Array size in 1KB blocks to measure L2 to L2 latency. + --iter ITERATIONS: Number of iterations to measure performance. + --internal-iter INTERNAL_ITER: Internal iteration for bandwidth loop to guarantee stable results. + --internal-iter-lat INTERNAL_ITER_LAT: Internal iteration for latency loop to guarantee stable results. + --pattern PATTERN: Type of pattern, 0 is fixed, 1 is maximum switch and 2 is fully random. + --enable-lat ENABLE_LAT: Measure latency accessing to a vector randomly + --enable-l2tol2 ENABLE_L2toL2: Measure latency accessing remote l2 vector randomly. This test disables all other + --verbose VERBOSE: 0 to no verborse level, 1 is basic verbose level to check synch points and 2 is for debug. + --limit-time SECONDS: Number of seconds to run per iteration + --kernel KERNEL: 0 is READ + 1 is COPY + 2 is SCALE + 3 is ADD + 4 is TRIAD + 5 is WRITE + 6 is ALL (0-5) + 7 is NONE (ENABLE_LAT is required on this mode) + 8 is 1R1W + 9 is 2R1W + --total-size-kb TOTAL_SIZE: Total Array size in KB to measure bandwidth, the tool will split it across all threads. + --total-size-mb TOTAL_SIZE: Total Array size in MB to measure bandwidth, the tool will split it across all threads. + --total-size-gb TOTAL_SIZE: Total Array size in GB to measure bandwidth, the tool will split it across all threads. + --use-scalar: This parameters enables standard scalar kernels + --use-sve: This parameters enables SVE kernels + --use-neon: This parameters enables NEON 8.1 kernels (default) + --use-numa: This parameters forces each thread to alloc the memory to be NUMA aware + --numa-sequential: This parameters forces NUMA0 until it's fully used + --numa-inverted: This parameters forces memory allocation on the oposite NUMA node + --numa-alloc node_id: This parameters forces memory allocation on a fix NUMA node + --silent: This parameters disables all the outputs except the performance report + --perCoreReport: This parameters the performance report per core + --streams-seq: This parameters runs the streams kernels in the same order + --help provides this info \ No newline at end of file diff --git a/website/docs/workloads/stream/stream.md b/website/docs/workloads/stream/stream.md index 0636835984..3368a18fb7 100644 --- a/website/docs/workloads/stream/stream.md +++ b/website/docs/workloads/stream/stream.md @@ -1,70 +1,85 @@ -# STREAM Workload +# STREAM +STREAM is a synthetic benchmark designed to measure sustainable memory bandwidth and the corresponding computation rate for simple vector kernels. +It is intended to provide a measure of memory performance independent of any particular computing platform's cache hierarchy, and has become a +de-facto industry standard for measuring memory bandwidth. -STREAM is a benchmark for measuring memory bandwidth performance . +* [STREAM Official Site](https://www.cs.virginia.edu/stream/) +* [Intel's STREAM TRIAD Documentation](https://www.intel.com/content/www/us/en/developer/articles/technical/optimizing-memory-bandwidth-on-stream-triad.html) -* [Official STREAM Benchmark Documentation](https://www.cs.virginia.edu/stream/) -* [Intels STREAM TRIAD Documentaion](https://www.intel.com/content/www/us/en/developer/articles/technical/optimizing-memory-bandwidth-on-stream-triad.html) +## What is Being Measured? +The STREAM benchmark measures sustainable memory bandwidth (in MB/s where 1 MB = 10^6 bytes, not 2^20 bytes) using four simple vector kernels. +These kernels are designed to be simple enough to avoid introducing computational bottlenecks while being complex enough to be representative of +real application behavior. Each kernel represents a common pattern found in scientific and engineering applications. ---- +The STREAM benchmark runs the following memory bandwidth tests: -### What is Being Tested? +| Bandwidth Benchmark | Description | +|-----------------------|-----------------------------------------------------------| +| Copy | Measures memory copy operation speeds (a(i) = b(i)) | +| Scale | Measures memory scale operation speeds (a(i) = q*b(i)) | +| Add | Measures memory add operation speeds (a(i) = b(i) + c(i)) | +| Triad | Measures memory triad operation speeds (a(i) = b(i) + q*c(i)) | -The following performance analysis tests are ran as part of the STREAM workload. It measures the Memory Bandwidth in MB/s (1 MB=10^6 B, *not* 2^20 B) +## Workload Metrics +The following metrics are examples of those captured by the Virtual Client when running the STREAM workload. Virtual Client supports both the +standard STREAM benchmark and the Microsoft-optimized STREAM implementation which provides additional metrics including latency measurements +and the Write operation. +### Standard STREAM Metrics -| Bandwidth Benchmark | Description | -|-----------------------|-----------------------------------------------------------| -| Copy | Measures memory copy operation speeds | -| Scale | Measures memory scale operation speeds | -| Add | Measures memory add operation speeds | -| Triad | Measures memory triad operation speeds | +| Metric Name | Example Value (min) | Example Value (max) | Example Value (avg) | Unit | +|-------------|---------------------|---------------------|---------------------|------| +| Best Rate Add | 8635.5 | 327893.5 | 42849.75 | MB/s | +| Best Rate Copy | 6787.4 | 346279.0 | 30720.40 | MB/s | +| Best Rate Scale | 6747.1 | 320023.2 | 30578.70 | MB/s | +| Best Rate Triad | 10141.2 | 305781.6 | 42735.67 | MB/s | + +### Microsoft STREAM Metrics +The Microsoft STREAM implementation provides additional detailed metrics including minimum, average, and best rates for all operations, +as well as latency measurements. It also includes a Write operation in addition to the standard STREAM operations. ---- +#### Bandwidth Metrics -### Supported Platform/Architectures +| Metric Name | Example Value (min) | Example Value (max) | Example Value (avg) | Unit | +|-------------|---------------------|---------------------|---------------------|------| +| Best Rate Add | 53351.0 | 54544.0 | 54011.50 | MB/s | +| Best Rate Copy | 72073.0 | 74208.0 | 73171.83 | MB/s | +| Best Rate Read | 48497.0 | 51087.0 | 50461.00 | MB/s | +| Best Rate Scale | 72486.0 | 74990.0 | 73716.00 | MB/s | +| Best Rate Triad | 54780.0 | 56567.0 | 55725.50 | MB/s | +| Best Rate Write | 87466.0 | 133326.0 | 116689.67 | MB/s | +| Avg Rate Add | 52848.0 | 54074.0 | 53553.17 | MB/s | +| Avg Rate Copy | 71016.0 | 73323.0 | 72106.67 | MB/s | +| Avg Rate Read | 47981.0 | 50433.0 | 49814.33 | MB/s | +| Avg Rate Scale | 71056.0 | 73315.0 | 72425.50 | MB/s | +| Avg Rate Triad | 53849.0 | 55720.0 | 55032.50 | MB/s | +| Avg Rate Write | 86252.0 | 124882.0 | 105829.00 | MB/s | +| Min Rate Add | 52199.0 | 53658.0 | 52981.17 | MB/s | +| Min Rate Copy | 70192.0 | 72302.0 | 71153.50 | MB/s | +| Min Rate Read | 47600.0 | 50057.0 | 49066.33 | MB/s | +| Min Rate Scale | 70110.0 | 72308.0 | 71063.83 | MB/s | +| Min Rate Triad | 53180.0 | 55407.0 | 54307.67 | MB/s | +| Min Rate Write | 84744.0 | 118017.0 | 96797.83 | MB/s | -* linux-x64 -* linux-arm64 +#### Latency Metrics -### MSFT STREAM Parameters (Can't be used by STREAM and STREAMTriad) : - --rnd-threads RND_THREADS: Number of total threads to randomize the work. - --threads THREADS: Number of threads to split the work. - --lat-threads LAT_THREADS: Number of threads running latency test - --l2tol2-threads L2toL@_THREADS: Number of threads running latency test from L2 to L2 - --bw-array BW_ARRAY_SIZE: Array size in 1KB blocks per thread to measure bandwidth. - --lat-array LAT_ARRAY_SIZE: Array size in 1KB blocks to measure latency. - --lat-accesses LAT_ACCESSES: Number of accesses to measure latency. - --lat-seq : Measure latency using sequential address on a 64B basic. - --l2tol2-array L2toL2_ARRAY_SIZE: Array size in 1KB blocks to measure L2 to L2 latency. - --iter ITERATIONS: Number of iterations to measure performance. - --internal-iter INTERNAL_ITER: Internal iteration for bandwidth loop to guarantee stable results. - --internal-iter-lat INTERNAL_ITER_LAT: Internal iteration for latency loop to guarantee stable results. - --pattern PATTERN: Type of pattern, 0 is fixed, 1 is maximum switch and 2 is fully random. - --enable-lat ENABLE_LAT: Measure latency accessing to a vector randomly - --enable-l2tol2 ENABLE_L2toL2: Measure latency accessing remote l2 vector randomly. This test disables all other - --verbose VERBOSE: 0 to no verborse level, 1 is basic verbose level to check synch points and 2 is for debug. - --limit-time SECONDS: Number of seconds to run per iteration - --kernel KERNEL: 0 is READ - 1 is COPY - 2 is SCALE - 3 is ADD - 4 is TRIAD - 5 is WRITE - 6 is ALL (0-5) - 7 is NONE (ENABLE_LAT is required on this mode) - 8 is 1R1W - 9 is 2R1W - --total-size-kb TOTAL_SIZE: Total Array size in KB to measure bandwidth, the tool will split it across all threads. - --total-size-mb TOTAL_SIZE: Total Array size in MB to measure bandwidth, the tool will split it across all threads. - --total-size-gb TOTAL_SIZE: Total Array size in GB to measure bandwidth, the tool will split it across all threads. - --use-scalar: This parameters enables standard scalar kernels - --use-sve: This parameters enables SVE kernels - --use-neon: This parameters enables NEON 8.1 kernels (default) - --use-numa: This parameters forces each thread to alloc the memory to be NUMA aware - --numa-sequential: This parameters forces NUMA0 until it's fully used - --numa-inverted: This parameters forces memory allocation on the oposite NUMA node - --numa-alloc node_id: This parameters forces memory allocation on a fix NUMA node - --silent: This parameters disables all the outputs except the performance report - --perCoreReport: This parameters the performance report per core - --streams-seq: This parameters runs the streams kernels in the same order - --help provides this info +| Metric Name | Example Value (min) | Example Value (max) | Example Value (avg) | Unit | +|-------------|---------------------|---------------------|---------------------|------| +| Avg Latency Add | 144.0 | 161.0 | 151.17 | nanoseconds | +| Avg Latency Copy | 155.0 | 183.0 | 164.83 | nanoseconds | +| Avg Latency Read | 144.0 | 152.0 | 147.50 | nanoseconds | +| Avg Latency Scale | 155.0 | 179.0 | 161.83 | nanoseconds | +| Avg Latency Triad | 141.0 | 163.0 | 151.67 | nanoseconds | +| Avg Latency Write | 196.0 | 377.0 | 261.17 | nanoseconds | +| Max Latency Add | 158.0 | 189.0 | 172.67 | nanoseconds | +| Max Latency Copy | 173.0 | 203.0 | 183.83 | nanoseconds | +| Max Latency Read | 156.0 | 181.0 | 168.50 | nanoseconds | +| Max Latency Scale | 169.0 | 205.0 | 178.67 | nanoseconds | +| Max Latency Triad | 157.0 | 193.0 | 171.00 | nanoseconds | +| Max Latency Write | 220.0 | 446.0 | 299.17 | nanoseconds | +| Min Latency Add | 126.0 | 145.0 | 134.50 | nanoseconds | +| Min Latency Copy | 143.0 | 169.0 | 151.67 | nanoseconds | +| Min Latency Read | 121.0 | 135.0 | 128.00 | nanoseconds | +| Min Latency Scale | 137.0 | 159.0 | 145.50 | nanoseconds | +| Min Latency Triad | 128.0 | 151.0 | 135.17 | nanoseconds | +| Min Latency Write | 175.0 | 319.0 | 224.00 | nanoseconds | \ No newline at end of file From e4f23155f77a3750b92b2c8d9bf99bf0ae8a926b Mon Sep 17 00:00:00 2001 From: saibulusu Date: Fri, 29 May 2026 13:48:26 -0700 Subject: [PATCH 5/8] removing metrics file --- .../VirtualClient.Documentation.csproj | 1 - .../docs/workloads/stream/stream-metrics.md | 67 ------------------- 2 files changed, 68 deletions(-) delete mode 100644 website/docs/workloads/stream/stream-metrics.md diff --git a/src/VirtualClient/VirtualClient.Documentation/VirtualClient.Documentation.csproj b/src/VirtualClient/VirtualClient.Documentation/VirtualClient.Documentation.csproj index 5ed012791e..c3a2817621 100644 --- a/src/VirtualClient/VirtualClient.Documentation/VirtualClient.Documentation.csproj +++ b/src/VirtualClient/VirtualClient.Documentation/VirtualClient.Documentation.csproj @@ -19,7 +19,6 @@ - diff --git a/website/docs/workloads/stream/stream-metrics.md b/website/docs/workloads/stream/stream-metrics.md deleted file mode 100644 index eed410d4cd..0000000000 --- a/website/docs/workloads/stream/stream-metrics.md +++ /dev/null @@ -1,67 +0,0 @@ -The following metrics are emitted by the STREAM or STREAMTRIAD (Intel-specialized) workload itself. - -| Metric Name | Example Value (min) | Example Value (max) | Example Value (avg) | Unit | -|------------------------|---------------------|---------------------|---------------------|------| -| Best Rate Add | 8635.5 | 327893.5 | 42849.75067544962 | MBps | -| Best Rate Copy | 6787.4 | 346279.0 | 30720.395126646046 | MBps | -| Best Rate Scale | 6747.1 | 320023.2 | 30578.698884020574 | MBps | -| Best Rate Triad | 10141.2 | 305781.6 | 42735.667183905876 | MBps | - -The following metrics are emitted by the MSFT STREAM - -| MetricName | Example Value(max) | Example Value (avg) | Example Value (min) | MetricUnit | -|-------------------|--------------------|---------------------|---------------------|------------| -| Min Rate Read | 50057 | 49066.33333 | 47600 | MBps | -| Min Rate Copy | 72302 | 71153.5 | 70192 | MBps | -| Avg Rate Read | 50433 | 49814.33333 | 47981 | MBps | -| Min Rate Triad | 55407 | 54307.66667 | 53180 | MBps | -| Min Rate Scale | 72308 | 71063.83333 | 70110 | MBps | -| Avg Rate Scale | 73315 | 72425.5 | 71056 | MBps | -| Best Rate Copy | 74208 | 73171.83333 | 72073 | MBps | -| Min Rate Add | 53658 | 52981.16667 | 52199 | MBps | -| Avg Rate Add | 54074 | 53553.16667 | 52848 | MBps | -| Best Rate Scale | 74990 | 73716 | 72486 | MBps | -| Best Rate Write | 133326 | 116689.6667 | 87466 | MBps | -| Best Rate Add | 54544 | 54011.5 | 53351 | MBps | -| Avg Rate Triad | 55720 | 55032.5 | 53849 | MBps | -| Min Rate Write | 118017 | 96797.83333 | 84744 | MBps | -| Avg Rate Write | 124882 | 105829 | 86252 | MBps | -| Best Rate Triad | 56567 | 55725.5 | 54780 | MBps | -| Avg Rate Copy | 73323 | 72106.66667 | 71016 | MBps | -| Best Rate Read | 51087 | 50461 | 48497 | MBps | -| Min Latency Scale | 159 | 145.5 | 137 | ns | -| Avg Latency Write | 377 | 261.1666667 | 196 | ns | -| Min Latency Read | 135 | 128 | 121 | ns | -| Min Latency Add | 145 | 134.5 | 126 | ns | -| Max Latency Read | 181 | 168.5 | 156 | ns | -| Avg Latency Triad | 163 | 151.6666667 | 141 | ns | -| Max Latency Add | 189 | 172.6666667 | 158 | ns | -| Min Latency Copy | 169 | 151.6666667 | 143 | ns | -| Avg Latency Read | 152 | 147.5 | 144 | ns | -| Avg Latency Copy | 183 | 164.8333333 | 155 | ns | -| Max Latency Copy | 203 | 183.8333333 | 173 | ns | -| Avg Latency Add | 161 | 151.1666667 | 144 | ns | -| Max Latency Write | 446 | 299.1666667 | 220 | ns | -| Min Latency Triad | 151 | 135.1666667 | 128 | ns | -| Min Latency Write | 319 | 224 | 175 | ns | -| Max Latency Scale | 205 | 178.6666667 | 169 | ns | -| Max Latency Triad | 193 | 171 | 157 | ns | -| Avg Latency Scale | 179 | 161.8333333 | 155 | ns | - - -Msft Stream output explained: - -Function Best Rate MB/s -Read: 18095 17922 17677 110 110 111 -Copy: 28378 28330 28302 113 107 115 -Scale: 28363 27853 27129 114 112 116 -Add: 19826 19788 19700 113 112 113 -Triad: 20457 20197 19829 113 112 113 -Write: 47899 47881 47872 117 113 120 - -Column 1 : Best Rate -Column 2 : Avg Rate -Column 3 : Min Rate -Column 4 : Avg Latency -Column 5 : Min Latency -Column 6 : Max Latency \ No newline at end of file From dfd105f63df64aca39d17b58d4be7bf21f7b3506 Mon Sep 17 00:00:00 2001 From: saibulusu Date: Fri, 29 May 2026 13:53:13 -0700 Subject: [PATCH 6/8] minor documentation fix --- website/docs/workloads/stream/stream-profiles.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/website/docs/workloads/stream/stream-profiles.md b/website/docs/workloads/stream/stream-profiles.md index 9c63441977..515fc072a1 100644 --- a/website/docs/workloads/stream/stream-profiles.md +++ b/website/docs/workloads/stream/stream-profiles.md @@ -2,8 +2,7 @@ The following profiles run customer-representative or benchmarking scenarios using the STREAM workload. * [Getting Started](https://microsoft.github.io/VirtualClient/) -* [Workload Details](./stream.md) -* [Workload Profile Metrics](./stream-metrics.md) +* [Workload Details](./stream.md) * [Workload Packages](https://github.com/microsoft/VirtualClient/blob/main/website/docs/developing/dependency-packages.md) ----------------------------------------------------------------------- From 0b1b3862c88e492ae0ff8c81a88b2393dbfa0abb Mon Sep 17 00:00:00 2001 From: saibulusu Date: Fri, 29 May 2026 14:08:14 -0700 Subject: [PATCH 7/8] using copilot suggestions for stream-profiles.md --- website/docs/workloads/stream/stream-profiles.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/docs/workloads/stream/stream-profiles.md b/website/docs/workloads/stream/stream-profiles.md index 515fc072a1..9ebd249ebd 100644 --- a/website/docs/workloads/stream/stream-profiles.md +++ b/website/docs/workloads/stream/stream-profiles.md @@ -72,7 +72,7 @@ Runs a Memory-intensive workload using the STREAM Benchmark to test the bandwidt The following section provides a few basic examples of how to use the workload profile. Additional usage examples can be found in the 'Usage Scenarios/Examples' link at the top. -
+
``` csharp ./VirtualClient --profile=PERF-MEM-STREAM.json --timeout=60 --packageStore="{BlobConnectionString|SAS Uri}" @@ -106,7 +106,7 @@ maximize the utilization of Intel processors. The following section provides a few basic examples of how to use the workload profile. Additional usage examples can be found in the 'Usage Scenarios/Examples' link at the top. -
+
``` csharp ./VirtualClient --profile=PERF-MEM-STREAMTRIAD.json --timeout=60 --packageStore="{BlobConnectionString|SAS Uri}" @@ -173,7 +173,7 @@ Note: The default parameters are according to the parameters documentation inord [MakeFile](./streammsftmakefile.txt) -
+
``` csharp ./VirtualClient --profile=PERF-MEM-STREAMMSFT.json --timeout=60 --packageStore="{BlobConnectionString|SAS Uri}" From af02d619ab3404503f486a1fd6b145d87eb68967 Mon Sep 17 00:00:00 2001 From: saibulusu Date: Fri, 29 May 2026 14:23:44 -0700 Subject: [PATCH 8/8] updating stream-profiles.md formatting --- .../docs/workloads/stream/stream-profiles.md | 248 ++++++++---------- 1 file changed, 111 insertions(+), 137 deletions(-) diff --git a/website/docs/workloads/stream/stream-profiles.md b/website/docs/workloads/stream/stream-profiles.md index 9ebd249ebd..520fd18607 100644 --- a/website/docs/workloads/stream/stream-profiles.md +++ b/website/docs/workloads/stream/stream-profiles.md @@ -1,187 +1,161 @@ -# STREAM Workload Profiles +# STREAM Workload Profiles The following profiles run customer-representative or benchmarking scenarios using the STREAM workload. -* [Getting Started](https://microsoft.github.io/VirtualClient/) -* [Workload Details](./stream.md) -* [Workload Packages](https://github.com/microsoft/VirtualClient/blob/main/website/docs/developing/dependency-packages.md) +* [Workload Details](./stream.md) ------------------------------------------------------------------------ +## PERF-MEM-STREAM.json +Runs a memory-intensive workload using the STREAM benchmark to test the sustainable memory bandwidth of the system. STREAM measures memory bandwidth +using four simple vector kernels (Copy, Scale, Add, and Triad) designed to stress the memory subsystem with minimal dependency on cache. -### Preliminaries -The profiles below require the ability to download workload packages and dependencies from a package store. In order to download the workload packages, connection -information must be supplied on the command line. See the 'Workload Packages' documentation above for details on how that works. +* [Workload Profile](https://github.com/microsoft/VirtualClient/blob/main/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAM.json) ------------------------------------------------------------------------ - -### PERF-MEM-STREAM.json -Runs a Memory-intensive workload using the STREAM Benchmark to test the bandwidth of the Memory. This profile compiles the workload using 'gcc'. - -* **OS/Architecture Platforms** +* **Supported Platform/Architectures** * linux-x64 * linux-arm64 + * win-x64 + * win-arm64 -* **Supported Operating Systems** - * Ubuntu 18 - * Ubuntu 20 - * Ubuntu 22 - -* **Supported Compilers** - The following compilers are supported with the workload for this profile. See profile parameters and usage examples below. - - * GCC Compiler Versions = 8, 9, 10, 11 +* **Supports Disconnected Scenarios** + * No. Internet connection required. * **Dependencies** - The following dependencies must be met to run this workload profile. + The dependencies defined in the 'Dependencies' section of the profile itself are required in order to run the workload operations effectively. + * Internet connection. + * Blob storage account from which the required dependencies package can be downloaded. + * https://github.com/microsoft/VirtualClient/blob/main/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAM.json - * Workload package must exist in the 'packages' directory or connection information for the package store supplied on the command line (see 'Workload Packages' link above). + Additional information on components that exist within the 'Dependencies' section of the profile can be found in the following locations: + * [Installing Dependencies](https://microsoft.github.io/VirtualClient/docs/category/dependencies/) * **Profile Parameters** - The following parameters can be optionally supplied on the command line to modify the behaviors of the workload. See the 'Usage Scenarios/Examples' above for examples on how to supply parameters to - Virtual Client profiles. - - | Parameter | Purpose |Default | - |---------------------------|---------------------------------------------------------------|-----------------------------------------------------------------------------| - | CompilerVersion | Not Required. Compiler's Version to install. | | - | CompilerParameters | Not Required. Parameters use to compile the stream binary. |-fopenmp -mcmodel=large -D_OPENMP -DNTIMES=5000 -DSTREAM_ARRAY_SIZE=100000000| - -* **Component/Action Parameters** - The following parameters are available in the profile components/actions. - - | Parameter | Purpose |Default | - |---------------------------|-------------------------------------------------------------------------------------------------------------------------|-------------| - | CompilerVersion | Not Required. Compiler's Version to install. | | - | CompilerParameters | Not Required. Parameters use to compile the stream binary. |-fopenmp -mcmodel=large -D_OPENMP -DNTIMES=5000 -DSTREAM_ARRAY_SIZE=100000000| - | Toolset | Defines the STREAM toolset to use. Valid values include: STREAM and STREAMTriad. Note that the STREAMTriad toolset can be used on Intel CPU systems only. | STREAM | - -* **Compiler Flags** - - | Parameter | Purpose | - |------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------| - | -fopenmp -D_OPENMP | Using OpenMP for multiple processors. | - | -DNTIMES | Flag DNTIMES is for Stream which defines number of iterations of the workload each iteration takes around 10-50 milliseconds depending on VMSKU. | - | -DSTREAM_ARRAY_SIZE=100000000| Array size used by the Stream. | - | -mcmodel=large | It avoids integer overflow while providing array size 100000000.As it uses 64 bit integer instead of default 32 bit integer | - -* **Workload Runtimes** - The following timings represent the length of time required to run a single round of tests ran. These timings can be used to determine - minimum required runtimes for the Virtual Client in order to get results. These are estimates based on the use of prescribed VM SKUs. - It is practical to allow for minimum 1 to 2 hours extra runtime to ensure the tests can complete full test runs. - * Expected Runtime = 10 secs + The following parameters can be optionally supplied on the command line to modify the behaviors of the workload. + + | Parameter | Purpose | Default value | + |---------------------------|---------------------------------------------------------------------------------|---------------| + | CompilerVersion | Optional. The version of the compiler to use. | The default version for the OS/distro. | + | CompilerParameters | Optional. Compiler flags used to compile the STREAM binary. | -fopenmp -mcmodel=large -D_OPENMP -DNTIMES=5000 -DSTREAM_ARRAY_SIZE=100000000 | + | ThreadCount | Optional. The number of threads to use for running the benchmark. | # of physical cores | + | CommandArgumentsWindows | Optional. Command-line arguments for the Windows version of STREAM. | -n 50 -s 320000000 | + +* **Profile Runtimes** + See the 'Metadata' section of the profile for estimated runtimes. These timings represent the length of time required to run a single round of profile + actions. These timings can be used to determine minimum required runtimes for the Virtual Client in order to get results. These are often estimates based on the + number of system cores. + + * Recommended Minimum Execution Time = 10 minutes * **Usage Examples** - The following section provides a few basic examples of how to use the workload profile. Additional usage examples can be found in the - 'Usage Scenarios/Examples' link at the top. + The following section provides a few basic examples of how to use the workload profile. + + ``` bash + # Execute the workload profile + ./VirtualClient --profile=PERF-MEM-STREAM.json --system=Azure --timeout=60 --packageStore="{BlobConnectionString|SAS Uri}" -
+ # Override the compiler version + ./VirtualClient --profile=PERF-MEM-STREAM.json --system=Azure --timeout=60 --parameters="CompilerVersion=11" --packageStore="{BlobConnectionString|SAS Uri}" - ``` csharp - ./VirtualClient --profile=PERF-MEM-STREAM.json --timeout=60 --packageStore="{BlobConnectionString|SAS Uri}" + # Override the thread count + ./VirtualClient --profile=PERF-MEM-STREAM.json --system=Azure --timeout=60 --parameters="ThreadCount=8" --packageStore="{BlobConnectionString|SAS Uri}" ``` -
-### PERF-MEM-STREAMTRIAD.json -Runs a Memory-intensive workload using the STREAM Benchmark to test memory bandwidth. This profile is designed by the Intel team to -maximize the utilization of Intel processors. +## PERF-MEM-STREAMTRIAD.json +Runs a memory-intensive workload using the Intel-optimized STREAM Triad benchmark to test memory bandwidth. This profile is specifically designed +by the Intel team to maximize the utilization of Intel processors. The STREAMTriad toolset focuses on the Triad kernel which is often considered +the most representative of real-world memory access patterns. + +* [Workload Profile](https://github.com/microsoft/VirtualClient/blob/main/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMTRIAD.json) * **Supported Platform/Architectures** * linux-x64 -* **Supported Operating Systems** - * Ubuntu 18 - * Ubuntu 20 - * Ubuntu 22 +* **Supports Disconnected Scenarios** + * No. Internet connection required. * **Dependencies** - The following dependencies must be met to run this workload profile. + The dependencies defined in the 'Dependencies' section of the profile itself are required in order to run the workload operations effectively. + * Internet connection. + * Blob storage account from which the required dependencies package can be downloaded. + * https://github.com/microsoft/VirtualClient/blob/main/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMTRIAD.json + + Additional information on components that exist within the 'Dependencies' section of the profile can be found in the following locations: + * [Installing Dependencies](https://microsoft.github.io/VirtualClient/docs/category/dependencies/) - * Workload package must exist in the 'packages' directory or connection information for the package store supplied on the command line (see 'Workload Packages' link above). +* **Profile Parameters** + The following parameters can be optionally supplied on the command line to modify the behaviors of the workload. -* **Workload Runtimes** - The following timings represent the length of time required to run a single round of tests ran. These timings can be used to determine - minimum required runtimes for the Virtual Client in order to get results. These are estimates based on the use of prescribed VM SKUs. - It is practical to allow for minimum 1 to 2 hours extra runtime to ensure the tests can complete full test runs. - * Expected Runtime = 10 secs + | Parameter | Purpose | Default value | + |---------------------------|---------------------------------------------------------------------------------|---------------| + | ThreadCount | Optional. The number of threads to use for running the benchmark. | # of physical cores | + +* **Profile Runtimes** + See the 'Metadata' section of the profile for estimated runtimes. These timings represent the length of time required to run a single round of profile + actions. These timings can be used to determine minimum required runtimes for the Virtual Client in order to get results. These are often estimates based on the + number of system cores. + + * Recommended Minimum Execution Time = 10 minutes * **Usage Examples** - The following section provides a few basic examples of how to use the workload profile. Additional usage examples can be found in the - 'Usage Scenarios/Examples' link at the top. + The following section provides a few basic examples of how to use the workload profile. -
+ ``` bash + # Execute the workload profile + ./VirtualClient --profile=PERF-MEM-STREAMTRIAD.json --system=Azure --timeout=60 --packageStore="{BlobConnectionString|SAS Uri}" - ``` csharp - ./VirtualClient --profile=PERF-MEM-STREAMTRIAD.json --timeout=60 --packageStore="{BlobConnectionString|SAS Uri}" + # Override the thread count + ./VirtualClient --profile=PERF-MEM-STREAMTRIAD.json --system=Azure --timeout=60 --parameters="ThreadCount=16" --packageStore="{BlobConnectionString|SAS Uri}" ``` -
-### PERF-MEM-STREAMMSFT.json -Runs a Memory-intensive workload using the STREAM Benchmark to test memory bandwidth. This profile is designed by Microsoft team to maximize the performance of 1P programs. +## PERF-MEM-STREAMMSFT.json +Runs a memory-intensive workload using Microsoft's optimized STREAM implementation to test memory bandwidth. This profile is specifically designed +by the Microsoft team to provide additional metrics including detailed latency measurements and support for ARM64 architectures. The implementation +includes additional memory operations (Read and Write) beyond the standard STREAM kernels. + +* [Workload Profile](https://github.com/microsoft/VirtualClient/blob/main/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMMSFT.json) * **Supported Platform/Architectures** * linux-arm64 -* **Supported Operating Systems** - * Ubuntu 18 - * Ubuntu 20 - * Ubuntu 22 - -* **Supported Compilers** - The following compilers are supported with the workload for this profile. See profile parameters and usage examples below. - - * G++ Compiler Versions = 8, 9, 10, 11 +* **Supports Disconnected Scenarios** + * No. Internet connection required. * **Dependencies** - The following dependencies must be met to run this workload profile. - - * Workload package must exist in the 'packages' directory or connection information for the package store supplied on the command line (see 'Workload Packages' link above). + The dependencies defined in the 'Dependencies' section of the profile itself are required in order to run the workload operations effectively. + * Internet connection. + * Blob storage account from which the required dependencies package can be downloaded. + * https://github.com/microsoft/VirtualClient/blob/main/src/VirtualClient/VirtualClient.Main/profiles/PERF-MEM-STREAMMSFT.json -* **Workload Runtimes** - The following timings represent the length of time required to run a single round of tests ran. These timings can be used to determine - minimum required runtimes for the Virtual Client in order to get results. These are estimates based on the use of prescribed VM SKUs. - It is practical to allow for minimum 1 to 2 hours extra runtime to ensure the tests can complete full test runs. - * Expected Runtime = 10 secs + Additional information on components that exist within the 'Dependencies' section of the profile can be found in the following locations: + * [Installing Dependencies](https://microsoft.github.io/VirtualClient/docs/category/dependencies/) -* **Usage Examples** - The following section provides a few basic examples of how to use the workload profile. Additional usage examples can be found in the - 'Usage Scenarios/Examples' link at the top. - * **Profile Parameters** - The following parameters can be optionally supplied on the command line to modify the behaviors of the workload. See the 'Usage Scenarios/Examples' above for examples on how to supply parameters to - Virtual Client profiles. - - | Parameter | Purpose |Default | - |---------------------------|---------------------------------------------------------------|-----------------------------------------------------------------------------| - | CompilerName | Not Required. Compiler used to compile. |gcc | - | CompilerVersion | Not Required. Compiler's Version to install. |10 | - | CommandLineParameters | Not Required. Parameters to be used in MSFT Stream. |--internal-iter 1000 --internal-iter-lat 1000| - | ThreadCount | Not Required. Number of threads use to run the workload | No. of Physical Cores. | - -* **Component/Action Parameters** - The following parameters are available in the profile components/actions. + The following parameters can be optionally supplied on the command line to modify the behaviors of the workload. - | Parameter | Purpose |Default | - |---------------------------|-------------------------------------------------------------------------------------------------------------------------|-------------| - | CompilerName | Not Required. Compiler used to compile. |gcc | - | CompilerVersion | Not Required. Compiler's Version to install. |10 | - | CommandLineParameters | Not Required. Parameters to be used in MSFT Stream. |--internal-iter 1000 --internal-iter-lat 1000| - | ThreadCount | Not Required. Number of threads use to run the workload | No. of Physical Cores. | - | Toolset | Defines the STREAM toolset to use. Valid values include: STREAM , STREAMTriad and STREAMMsft. Note that the STREAMTriad toolset can be used on Intel CPU systems only. | STREAMMSFT | -Note: The default parameters are according to the parameters documentation inorder to have stable results. -[Msft Stream Parameters](./stream.md) + | Parameter | Purpose | Default value | + |---------------------------|---------------------------------------------------------------------------------|---------------| + | CompilerVersion | Optional. The version of the compiler to use. | The default version for the OS/distro. | + | CommandLineParameters | Optional. Command-line arguments for the STREAMMSFT benchmark. | --internal-iter 1000 --internal-iter-lat 1000 | + | ThreadCount | Optional. The number of threads to use for running the benchmark. | # of physical cores | -* **Make file for Msft Stream with** +* **Profile Runtimes** + See the 'Metadata' section of the profile for estimated runtimes. These timings represent the length of time required to run a single round of profile + actions. These timings can be used to determine minimum required runtimes for the Virtual Client in order to get results. These are often estimates based on the + number of system cores. -[MakeFile](./streammsftmakefile.txt) + * Recommended Minimum Execution Time = 10 minutes +* **Usage Examples** + The following section provides a few basic examples of how to use the workload profile. -
+ ``` bash + # Execute the workload profile + ./VirtualClient --profile=PERF-MEM-STREAMMSFT.json --system=Azure --timeout=60 --packageStore="{BlobConnectionString|SAS Uri}" - ``` csharp - ./VirtualClient --profile=PERF-MEM-STREAMMSFT.json --timeout=60 --packageStore="{BlobConnectionString|SAS Uri}" - ``` -
------------------------------------------------------------------------ + # Override the compiler version + ./VirtualClient --profile=PERF-MEM-STREAMMSFT.json --system=Azure --timeout=60 --parameters="CompilerVersion=11" --packageStore="{BlobConnectionString|SAS Uri}" -### Resources + # Override command-line parameters + ./VirtualClient --profile=PERF-MEM-STREAMMSFT.json --system=Azure --timeout=60 --parameters="CommandLineParameters='--internal-iter 2000 --internal-iter-lat 2000'" --packageStore="{BlobConnectionString|SAS Uri}" -* [Azure VM Sizes](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes) -* [Azure Managed Disks](https://azure.microsoft.com/en-us/pricing/details/managed-disks/) \ No newline at end of file + # Override the thread count + ./VirtualClient --profile=PERF-MEM-STREAMMSFT.json --system=Azure --timeout=60 --parameters="ThreadCount=32" --packageStore="{BlobConnectionString|SAS Uri}" + ``` \ No newline at end of file