Skip to content

Commit

Permalink
[GenAI] Add LLaMA support (#7220)
Browse files Browse the repository at this point in the history
* add llama

* add test for tokenizer

* make llama 3.1 working

* update

* add shape test for 70b and 405b

* clean up

* add tests

* update

* fix error

* calculate rotary embedding in model layer

* remove rotary_emb from attention

* update feed

* update .csproj

* Update NuGet.config

* fix test

* pass device

* fix test

* update constructor

* disable 405b test

* update

* disable 70b test

* use windows only fact

* revert change

* rename test to LLaMA3_1
  • Loading branch information
LittleLittleCloud authored Aug 28, 2024
1 parent fa8c822 commit 70e5ab1
Show file tree
Hide file tree
Showing 57 changed files with 3,932 additions and 211 deletions.
24 changes: 23 additions & 1 deletion Microsoft.ML.sln
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,11 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.GenAI.Phi.Test
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.GenAI.Samples", "docs\samples\Microsoft.ML.GenAI.Samples\Microsoft.ML.GenAI.Samples.csproj", "{1D4AD9A3-19AF-432B-889D-A63FE6D7BD47}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ML.GenAI.Core.Tests", "test\Microsoft.ML.GenAI.Core.Tests\Microsoft.ML.GenAI.Core.Tests.csproj", "{14AB0804-D4CE-4634-B544-5A8587620783}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.GenAI.Core.Tests", "test\Microsoft.ML.GenAI.Core.Tests\Microsoft.ML.GenAI.Core.Tests.csproj", "{14AB0804-D4CE-4634-B544-5A8587620783}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.GenAI.LLaMA", "src\Microsoft.ML.GenAI.LLaMA\Microsoft.ML.GenAI.LLaMA.csproj", "{0AA6D5CB-195F-457A-8792-4221E76E6C44}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ML.GenAI.LLaMA.Tests", "test\Microsoft.ML.GenAI.LLaMA.Tests\Microsoft.ML.GenAI.LLaMA.Tests.csproj", "{D202353D-6FAF-4263-9A01-BDCFBC92391F}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down Expand Up @@ -878,6 +882,22 @@ Global
{14AB0804-D4CE-4634-B544-5A8587620783}.Release|Any CPU.Build.0 = Release|Any CPU
{14AB0804-D4CE-4634-B544-5A8587620783}.Release|x64.ActiveCfg = Release|Any CPU
{14AB0804-D4CE-4634-B544-5A8587620783}.Release|x64.Build.0 = Release|Any CPU
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Debug|x64.ActiveCfg = Debug|Any CPU
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Debug|x64.Build.0 = Debug|Any CPU
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Release|Any CPU.Build.0 = Release|Any CPU
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Release|x64.ActiveCfg = Release|Any CPU
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Release|x64.Build.0 = Release|Any CPU
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Debug|x64.ActiveCfg = Debug|Any CPU
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Debug|x64.Build.0 = Debug|Any CPU
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Release|Any CPU.Build.0 = Release|Any CPU
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Release|x64.ActiveCfg = Release|Any CPU
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Release|x64.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -969,6 +989,8 @@ Global
{867FFC34-DFA7-400F-B9BB-85158326CE08} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
{1D4AD9A3-19AF-432B-889D-A63FE6D7BD47} = {DA452A53-2E94-4433-B08C-041EDEC729E6}
{14AB0804-D4CE-4634-B544-5A8587620783} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
{0AA6D5CB-195F-457A-8792-4221E76E6C44} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{D202353D-6FAF-4263-9A01-BDCFBC92391F} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D}
Expand Down
4 changes: 4 additions & 0 deletions NuGet.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
<add key="dotnet5-roslyn" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json" />
<add key="mlnet-daily" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/MachineLearning/nuget/v3/index.json" />
<add key="mlnet-assets" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/machinelearning-assets/nuget/v3/index.json" />
<add key="dotnet-libraries-transport" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-libraries-transport/nuget/v3/index.json" />
<add key="dotnet8" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet8/nuget/v3/index.json" />
</packageSources>
<packageSourceMapping>
Expand Down Expand Up @@ -40,6 +41,9 @@
<packageSource key="mlnet-assets">
<package pattern="*" />
</packageSource>
<packageSource key="dotnet-libraries-transport">
<package pattern="*" />
</packageSource>
<packageSource key="dotnet8">
<package pattern="*" />
</packageSource>
Expand Down
51 changes: 51 additions & 0 deletions docs/samples/Microsoft.ML.GenAI.Samples/Llama/LLaMA3_1.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
using AutoGen.Core;
using Microsoft.ML.GenAI.Core;
using Microsoft.ML.GenAI.Core.Extension;
using Microsoft.ML.GenAI.LLaMA;
using Microsoft.ML.Tokenizers;
using TorchSharp;
using static TorchSharp.torch;

namespace Microsoft.ML.GenAI.Samples.Llama;

internal class LlamaSample
{
public static async void Run()
{
var device = "cuda";
if (device == "cuda")
{
torch.InitializeDeviceType(DeviceType.CUDA);
}

var defaultType = ScalarType.Float16;
torch.manual_seed(1);
torch.set_default_dtype(defaultType);
var weightFolder = @"C:\Users\xiaoyuz\source\repos\Meta-Llama-3.1-8B-Instruct";
var configName = "config.json";
var originalWeightFolder = Path.Combine(weightFolder, "original");

Console.WriteLine("Loading Llama from huggingface model weight folder");
var stopWatch = System.Diagnostics.Stopwatch.StartNew();
stopWatch.Start();
var tokenizer = LlamaTokenizerHelper.FromPretrained(originalWeightFolder);
var model = LlamaForCausalLM.FromPretrained(weightFolder, configName, layersOnTargetDevice: -1);

var pipeline = new CausalLMPipeline<TiktokenTokenizer, LlamaForCausalLM>(tokenizer, model, device);

var agent = new LlamaCausalLMAgent(pipeline, "assistant")
.RegisterPrintMessage();

var task = """
Write a C# program to print the sum of two numbers. Use top-level statement, put code between ```csharp and ```.
""";

await agent.SendAsync(task);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

<ItemGroup>
<ProjectReference Include="..\..\..\src\Microsoft.ML.GenAI.Core\Microsoft.ML.GenAI.Core.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.GenAI.LLaMA\Microsoft.ML.GenAI.LLaMA.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.GenAI.Phi\Microsoft.ML.GenAI.Phi.csproj" />
</ItemGroup>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public static async Task RunAsync()
torch.manual_seed(1);
torch.set_default_dtype(defaultType);
var weightFolder = @"C:\Users\xiaoyuz\source\repos\Phi-3-mini-4k-instruct";
var pipeline = Utils.LoadPhi3Mini4KFromFolder(weightFolder, device: device);
var pipeline = Utils.LoadPhi3Mini4KFromFolder(weightFolder, device: device, quantizeToInt8: false);

// agent
var agent = new Phi3Agent(pipeline, "assistant")
Expand Down
2 changes: 1 addition & 1 deletion docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public static ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> LoadPhi3Mini4KFromFo
string weightFolder,
string configName = "config.json",
string device = "cuda",
int modelSizeOnCudaInGB = 16,
int modelSizeOnCudaInGB = 55,
int modelSizeOnMemoryInGB = 64,
int modelSizeOnDiskInGB = 200,
bool quantizeToInt8 = false,
Expand Down
2 changes: 1 addition & 1 deletion eng/Versions.props
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
<MicrosoftMLTensorFlowTestModelsVersion>0.0.13-test</MicrosoftMLTensorFlowTestModelsVersion>
<MicrosoftMLTestDatabasesVersion>0.0.6-test</MicrosoftMLTestDatabasesVersion>
<MicrosoftMLTestModelsVersion>0.0.7-test</MicrosoftMLTestModelsVersion>
<MicrosoftMLTestTokenizersVersion>2.0.0-beta.24219.1</MicrosoftMLTestTokenizersVersion>
<MicrosoftMLTestTokenizersVersion>2.0.0-beta.24415.1</MicrosoftMLTestTokenizersVersion>
<SystemDataSqlClientVersion>4.8.6</SystemDataSqlClientVersion>
<SystemDataSQLiteCoreVersion>1.0.118</SystemDataSQLiteCoreVersion>
<XunitCombinatorialVersion>1.6.24</XunitCombinatorialVersion>
Expand Down
51 changes: 51 additions & 0 deletions src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,57 @@ public static Dictionary<string, string> InferDeviceMapForEachLayer(
return deviceMap;
}

/// <summary>
/// Infer the device map for each layer in the model.
/// The device map is a dictionary where the key is the device id (e.g. "cuda:0") and the value is the memory size in bytes of the device.
/// When inferring the device map, each layer in the model will be placed on the device in the order of the devices list.
/// </summary>
/// <param name="model"></param>
/// <param name="numberOfLayerToBePlaced">a list of key-value pairs where the key is the device id (e.g. "cuda:0") and the value is the number of layers to be placed on the device.
/// If you want to place all remaining layers on the device, set that value to -1.
/// e.g. [{"cuda:0", 2}, {"cpu", -1}], the first 2 layers will be placed on "cuda:0" and the rest will be placed on "cpu".
/// </param>
/// <returns></returns>
public static Dictionary<string, string> InferDeviceMapForEachLayer(
this nn.Module model,
IEnumerable<KeyValuePair<string, int>> numberOfLayerToBePlaced)
{
var layerSizeMap = model.GetSizeForEachDynamicLayerInBytes()
.OrderByDescending(x => x.Value)
.ToList();

var deviceMap = new Dictionary<string, string>();
foreach (var (device, count) in numberOfLayerToBePlaced)
{
if (count != -1)
{
var topK = layerSizeMap.Take(count).ToList();
layerSizeMap = layerSizeMap.Skip(count).ToList();
foreach (var (key, value) in topK)
{
deviceMap[key] = device;
}
}
else
{
foreach (var (key, value) in layerSizeMap)
{
deviceMap[key] = device;
}

layerSizeMap.Clear();
break;
}
}

if (layerSizeMap.Count > 0)
{
throw new ArgumentException("The layer count is not enough to cover all layers, did you forget to set the last layer count to -1?");
}

return deviceMap;
}

internal static string Peek(this nn.Module model)
{
var sb = new StringBuilder();
Expand Down
11 changes: 4 additions & 7 deletions src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,20 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="AutoGen.Core" Version="$(AutoGenVersion)" />
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="$(SemanticKernelVersion)" />
<PackageReference Include="System.Memory" Version="$(SystemMemoryVersion)" />
<PackageReference Include="TorchSharp" Version="$(TorchSharpVersion)" />
</ItemGroup>
<!--
<ItemGroup Condition="'$(Configuration)' == 'Debug'">
<PackageReference Include="libtorch-cpu-win-x64" Version="$(LibTorchVersion)" Condition="$([MSBuild]::IsOSPlatform('Windows'))" PrivateAssets="all" />
<PackageReference Include="libtorch-cpu-linux-x64" Version="$(LibTorchVersion)" Condition="$([MSBuild]::IsOSPlatform('Linux'))" PrivateAssets="all" />
<PackageReference Include="libtorch-cpu-osx-x64" Version="$(LibTorchVersion)" Condition="$([MSBuild]::IsOSPlatform('OSX'))" PrivateAssets="all" />
</ItemGroup> -->

<ItemGroup>
<ProjectReference Include="..\Microsoft.ML.Tokenizers\Microsoft.ML.Tokenizers.csproj" />
</ItemGroup>

<ItemGroup>
<InternalsVisibleTo Include="Microsoft.ML.GenAI.Phi" />
<InternalsVisibleTo Include="Microsoft.ML.GenAI.LLaMA" />
<InternalsVisibleTo Include="Microsoft.ML.GenAI.LLaMA.Tests" />
<InternalsVisibleTo Include="Microsoft.ML.GenAI.Phi.Tests" />
<InternalsVisibleTo Include="Microsoft.ML.GenAI.Core.Tests" />
</ItemGroup>
Expand Down
Loading

0 comments on commit 70e5ab1

Please sign in to comment.