This article shows how you can use Azure Computer vision in Azure Cognitive Services to perform Optical Character Recognition (OCR).
The Computer vision feature is available by adding a Computer Vision resource in Azure Portal.
I have made a .NET MAUI Blazor app and the Github Repo for it is available here :
https://github.com/toreaurstadboss/Ocr.Handwriting.Azure.AI.Models
Let us first look at the .csproj of the Lib project in this repo.
The following class generates ComputerVision clients that can be used to extract different information from streams and files containing video and images. We are going to focus on
images and extracting text via OCR. Azure Computer Vision can also extract handwritten text in addition to regular text written by typewriters or text inside images and similar. Azure Computer Vision also can
detect shapes in images and classify objects. This demo only focuses on text extraction form images.
ComputerVisionClientFactory
using Microsoft.Azure.CognitiveServices.Vision.ComputerVision;
namespaceOcr.Handwriting.Azure.AI.Lib
{
publicinterfaceIComputerVisionClientFactory
{
ComputerVisionClient CreateClient();
}
///<summary>/// Client factory for Azure Cognitive Services - Computer vision.///</summary>publicclassComputerVisionClientFactory : IComputerVisionClientFactory
{
// Add your Computer Vision key and endpointstaticstring? _key = Environment.GetEnvironmentVariable("AZURE_COGNITIVE_SERVICES_VISION_KEY");
staticstring? _endpoint = Environment.GetEnvironmentVariable("AZURE_COGNITIVE_SERVICES_VISION_ENDPOINT");
publicComputerVisionClientFactory() : this(_key, _endpoint)
{
}
publicComputerVisionClientFactory(string? key, string? endpoint)
{
_key = key;
_endpoint = endpoint;
}
public ComputerVisionClient CreateClient()
{
if (_key == null)
{
thrownew ArgumentNullException(_key, "The AZURE_COGNITIVE_SERVICES_VISION_KEY is not set. Set a system-level environment variable or provide this value by calling the overloaded constructor of this class.");
}
if (_endpoint == null)
{
thrownew ArgumentNullException(_key, "The AZURE_COGNITIVE_SERVICES_VISION_ENDPOINT is not set. Set a system-level environment variable or provide this value by calling the overloaded constructor of this class.");
}
var client = Authenticate(_key!, _endpoint!);
return client;
}
publicstatic ComputerVisionClient Authenticate(string key, string endpoint) =>
new ComputerVisionClient(new ApiKeyServiceClientCredentials(key))
{
Endpoint = endpoint
};
}
}
The setup of the endpoint and key of the Computer Vision resource is done via system-level envrionment variables.
Next up, let's look at retrieving OCR text from images. Here we use ComputerVisionClient. We open up a stream of a file, an image, using File.OpenReadAsync and then the method
ReadInStreamAsync of Computer vision client. The image we will load up in the app is selected by the user and the image is previewed and saved using MAUI Storage lib (inside the Appdata folder).
OcrImageService.cs
using Microsoft.Azure.CognitiveServices.Vision.ComputerVision;
using Microsoft.Azure.CognitiveServices.Vision.ComputerVision.Models;
using Microsoft.Extensions.Logging;
using System.Diagnostics;
using ReadResult = Microsoft.Azure.CognitiveServices.Vision.ComputerVision.Models.ReadResult;
namespaceOcr.Handwriting.Azure.AI.Lib
{
publicinterfaceIOcrImageService
{
Task<IList<ReadResult?>?> GetReadResults(string imageFilePath);
Task<string> GetReadResultsText(string imageFilePath);
}
publicclassOcrImageService : IOcrImageService
{
privatereadonly IComputerVisionClientFactory _computerVisionClientFactory;
privatereadonly ILogger<OcrImageService> _logger;
publicOcrImageService(IComputerVisionClientFactory computerVisionClientFactory, ILogger<OcrImageService> logger)
{
_computerVisionClientFactory = computerVisionClientFactory;
_logger = logger;
}
private ComputerVisionClient CreateClient() => _computerVisionClientFactory.CreateClient();
publicasync Task<string> GetReadResultsText(string imageFilePath)
{
var readResults = await GetReadResults(imageFilePath);
var ocrText = ExtractText(readResults?.FirstOrDefault());
return ocrText;
}
publicasync Task<IList<ReadResult?>?> GetReadResults(string imageFilePath)
{
if (string.IsNullOrWhiteSpace(imageFilePath))
{
returnnull;
}
try
{
var client = CreateClient();
//Retrieve OCR results using (FileStream stream = File.OpenRead(imageFilePath))
{
var textHeaders = await client.ReadInStreamAsync(stream);
string operationLocation = textHeaders.OperationLocation;
string operationId = operationLocation[^36..]; //hat operator of C# 8.0 : this slices out the last 36 chars, which contains the guid chars which are 32 hexadecimals chars + four hyphens
ReadOperationResult results;
do
{
results = await client.GetReadResultAsync(Guid.Parse(operationId));
_logger.LogInformation($"Retrieving OCR results for operationId {operationId} for image {imageFilePath}");
}
while (results.Status == OperationStatusCodes.Running || results.Status == OperationStatusCodes.NotStarted);
IList<ReadResult?> result = results.AnalyzeResult.ReadResults;
return result;
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
returnnull;
}
}
privatestaticstringExtractText(ReadResult? readResult) => string.Join(Environment.NewLine, readResult?.Lines?.Select(l => l.Text) ?? new List<string>());
}
}
Let's look at the MAUI Blazor project in the app.
The MauiProgram.cs looks like this.
MauiProgram.cs
using Ocr.Handwriting.Azure.AI.Data;
using Ocr.Handwriting.Azure.AI.Lib;
using Ocr.Handwriting.Azure.AI.Services;
using TextCopy;
namespaceOcr.Handwriting.Azure.AI;
publicstaticclassMauiProgram
{
publicstatic MauiApp CreateMauiApp()
{
var builder = MauiApp.CreateBuilder();
builder
.UseMauiApp<App>()
.ConfigureFonts(fonts =>
{
fonts.AddFont("OpenSans-Regular.ttf", "OpenSansRegular");
});
builder.Services.AddMauiBlazorWebView();
#if DEBUG
builder.Services.AddBlazorWebViewDeveloperTools();
builder.Services.AddLogging();
#endif
builder.Services.AddSingleton<WeatherForecastService>();
builder.Services.AddScoped<IComputerVisionClientFactory, ComputerVisionClientFactory>();
builder.Services.AddScoped<IOcrImageService, OcrImageService>();
builder.Services.AddScoped<IImageSaveService, ImageSaveService>();
builder.Services.InjectClipboard();
return builder.Build();
}
}
We also need some code to preview and save the image an end user chooses. The IImageService looks like this.
ImageSaveService
using Microsoft.AspNetCore.Components.Forms;
using Ocr.Handwriting.Azure.AI.Models;
namespaceOcr.Handwriting.Azure.AI.Services
{
publicclassImageSaveService : IImageSaveService
{
publicasync Task<ImageSaveModel> SaveImage(IBrowserFile browserFile)
{
var buffers = newbyte[browserFile.Size];
var bytes = await browserFile.OpenReadStream(maxAllowedSize: 30 * 1024 * 1024).ReadAsync(buffers);
string imageType = browserFile.ContentType;
var basePath = FileSystem.Current.AppDataDirectory;
var imageSaveModel = new ImageSaveModel
{
SavedFilePath = Path.Combine(basePath, $"{Guid.NewGuid().ToString("N")}-{browserFile.Name}"),
PreviewImageUrl = $"data:{imageType};base64,{Convert.ToBase64String(buffers)}",
FilePath = browserFile.Name,
FileSize = bytes / 1024,
};
await File.WriteAllBytesAsync(imageSaveModel.SavedFilePath, buffers);
return imageSaveModel;
}
}
}
Note the use of maxAllowedSize of IBrowserfile.OpenReadStream method, this is a good practice since IBrowserFile only supports 512 kB per default. I set it in the app to 30 MB to support some high res images too.
We preview the image as base-64 here and we also save the image also. Note the use of FileSystem.Current.AppDataDirectory as base path here. Here we use nuget package Microsoft.Maui.Storage.
These are the packages that is used for the MAUI Blazor project of the app.
Ocr.Handwriting.Azure.AI.csproj
@page "/"
@using Ocr.Handwriting.Azure.AI.Models;
@using Microsoft.Azure.CognitiveServices.Vision.ComputerVision;
@using Microsoft.Azure.CognitiveServices.Vision.ComputerVision.Models;
@using Ocr.Handwriting.Azure.AI.Lib;
@using Ocr.Handwriting.Azure.AI.Services;
@using TextCopy;
@inject IImageSaveService ImageSaveService
@inject IOcrImageService OcrImageService
@inject IClipboard Clipboard
<h1>Azure AI OCR Text recognition</h1>
<EditForm Model="Model" OnValidSubmit="@Submit" style="background-color:aliceblue">
<DataAnnotationsValidator />
<label><b>Select a picture to run OCR</b></label><br />
<InputFile OnChange="@OnInputFile" accept=".jpeg,.jpg,.png" />
<br />
<code class="alert-secondary">Supported file formats: .jpeg, .jpg and .png</code>
<br />
@if (Model.PreviewImageUrl != null) {
<label class="alert-info">Preview of the selected image</label>
<div style="overflow:auto;max-height:300px;max-width:500px">
<img class="flagIcon" src="@Model.PreviewImageUrl" /><br />
</div>
<code class="alert-light">File Size (kB): @Model.FileSize</code>
<br />
<code class="alert-light">File saved location: @Model.SavedFilePath</code>
<br />
<label class="alert-info">Click the button below to start running OCR using Azure AI</label><br />
<br />
<button type="submit">Submit</button> <button style="margin-left:200px" type="button"class="btn-outline-info" @onclick="@CopyTextToClipboard">Copy to clipboard</button>
<br />
<br />
<InputTextArea style="width:1000px;height:300px"readonly="readonly" placeholder="Detected text in the image uploaded" @bind-Value="Model!.OcrOutputText" rows="5"></InputTextArea>
}
</EditForm>
@code {
private IndexModel Model = new();
privateasync Task OnInputFile(InputFileChangeEventArgs args)
{
var imageSaveModel = await ImageSaveService.SaveImage(args.File);
Model = new IndexModel(imageSaveModel);
await Application.Current.MainPage.DisplayAlert($"MAUI Blazor OCR App", $"Wrote file to location : {Model.SavedFilePath} Size is: {Model.FileSize} kB", "Ok", "Cancel");
}
publicasync Task CopyTextToClipboard()
{
await Clipboard.SetTextAsync(Model.OcrOutputText);
await Application.Current.MainPage.DisplayAlert($"MAUI Blazor OCR App", $"The copied text was put into the clipboard. Character length: {Model.OcrOutputText?.Length}", "Ok", "Cancel");
}
privateasync Task Submit()
{
if (Model.PreviewImageUrl == null || Model.SavedFilePath == null)
{
await Application.Current.MainPage.DisplayAlert($"MAUI Blazor OCR App", $"You must select an image first before running OCR. Supported formats are .jpeg, .jpg and .png", "Ok", "Cancel");
return;
}
Model.OcrOutputText = await OcrImageService.GetReadResultsText(Model.SavedFilePath);
StateHasChanged(); //visual refresh here
}
}
The UI works like this. The user selects an image. As we can see by the 'accept' html attribute, the .jpeg, .jpg and .png extensions are allowed in the file input dialog. When the user selects an image, the image is saved and
previewed in the UI.
By hitting the Submit button, the OCR service in Azure is contacted and text is retrieved and displayed in the text area below, if any text is present in the image. A button allows copying the text into the clipboard.
Here are some screenshots of the app.
This article shows code how to build a universal translator using Azure AI Cognitive Services. This includes Azure AI Textanalytics to detect languge from text input, and
using Azure AI Translation services.
The Github repo is here :
https://github.com/toreaurstadboss/MultiLingual.Translator
The following Nuget packages are used in the Lib project csproj file :
We are going to build a .NET 6 cross platform MAUI Blazor app. First off, we focus on the Razor Library project called 'Lib'. This
project contains the library util code to detect language and translate into other language.
Let us first look at creating the clients needed to detect language and to translate text.
TextAnalyticsFactory.cs
using Azure;
using Azure.AI.TextAnalytics;
using Azure.AI.Translation.Text;
using System;
namespaceMultiLingual.Translator.Lib
{
publicstaticclassTextAnalyticsClientFactory
{
publicstatic TextAnalyticsClient CreateClient()
{
string? uri = Environment.GetEnvironmentVariable("AZURE_COGNITIVE_SERVICE_ENDPOINT", EnvironmentVariableTarget.Machine);
string? key = Environment.GetEnvironmentVariable("AZURE_COGNITIVE_SERVICE_KEY", EnvironmentVariableTarget.Machine);
if (uri == null)
{
thrownew ArgumentNullException(nameof(uri), "Could not get system environment variable named 'AZURE_COGNITIVE_SERVICE_ENDPOINT' Set this variable first.");
}
if (key == null)
{
thrownew ArgumentNullException(nameof(uri), "Could not get system environment variable named 'AZURE_COGNITIVE_SERVICE_KEY' Set this variable first.");
}
var client = new TextAnalyticsClient(new Uri(uri!), new AzureKeyCredential(key!));
return client;
}
publicstatic TextTranslationClient CreateTranslateClient()
{
string? keyTranslate = Environment.GetEnvironmentVariable("AZURE_TRANSLATION_SERVICE_KEY", EnvironmentVariableTarget.Machine);
string? regionForTranslationService = Environment.GetEnvironmentVariable("AZURE_TRANSLATION_SERVICE_REGION", EnvironmentVariableTarget.Machine);
if (keyTranslate == null)
{
thrownew ArgumentNullException(nameof(keyTranslate), "Could not get system environment variable named 'AZURE_TRANSLATION_SERVICE_KEY' Set this variable first.");
}
if (keyTranslate == null)
{
thrownew ArgumentNullException(nameof(keyTranslate), "Could not get system environment variable named 'AZURE_TRANSLATION_SERVICE_REGION' Set this variable first.");
}
var client = new TextTranslationClient(new AzureKeyCredential(keyTranslate!), region: regionForTranslationService);
return client;
}
}
}
The code assumes that there is four environment variables at the SYSTEM level of your OS.
Further on, let us now look at the code to detect language. This uses a TextAnalyticsClient detect the language an input text is written in, using this client.
IDetectLanguageUtil.cs
using Azure.AI.TextAnalytics;
namespaceMultiLingual.Translator.Lib
{
publicclassDetectLanguageUtil : IDetectLanguageUtil
{
private TextAnalyticsClient _client;
publicDetectLanguageUtil()
{
_client = TextAnalyticsClientFactory.CreateClient();
}
///<summary>/// Detects language of the <paramref name="inputText"/>.///</summary>///<param name="inputText"></param>///<remarks><see cref="Models.LanguageCode" /> contains the language code list of languages supported</remarks>publicasync Task<DetectedLanguage> DetectLanguage(string inputText)
{
DetectedLanguage detectedLanguage = await _client.DetectLanguageAsync(inputText);
return detectedLanguage;
}
///<summary>/// Detects language of the <paramref name="inputText"/>. Returns the language name.///</summary>///<param name="inputText"></param>///<remarks><see cref="Models.LanguageCode" /> contains the language code list of languages supported</remarks>publicasync Task<string> DetectLanguageName(string inputText)
{
DetectedLanguage detectedLanguage = await DetectLanguage(inputText);
return detectedLanguage.Name;
}
///<summary>/// Detects language of the <paramref name="inputText"/>. Returns the language code.///</summary>///<param name="inputText"></param>///<remarks><see cref="Models.LanguageCode" /> contains the language code list of languages supported</remarks>publicasync Task<string> DetectLanguageIso6391(string inputText)
{
DetectedLanguage detectedLanguage = await DetectLanguage(inputText);
return detectedLanguage.Iso6391Name;
}
///<summary>/// Detects language of the <paramref name="inputText"/>. Returns the confidence score///</summary>///<param name="inputText"></param>///<remarks><see cref="Models.LanguageCode" /> contains the language code list of languages supported</remarks>publicasync Task<double> DetectLanguageConfidenceScore(string inputText)
{
DetectedLanguage detectedLanguage = await DetectLanguage(inputText);
return detectedLanguage.ConfidenceScore;
}
}
}
The Iso6391 code is important when it comes to translation, which will be shown soon. But first let us look at the supported languages of Azure AI Translation services.
LanguageCode.cs
As there are about 5-10 000 languages in the World, the list above shows that Azure AI translation services supports about 130 of these, which is 1-2 % of the total amount of languages. Of course, the languages supported by Azure AI are also including the most spoken languages in the World.
Let us look at the translation util code next.
ITranslateUtil.cs
using Azure.AI.Translation.Text;
using MultiLingual.Translator.Lib.Models;
namespaceMultiLingual.Translator.Lib
{
publicclassTranslateUtil : ITranslateUtil
{
private TextTranslationClient _client;
publicTranslateUtil()
{
_client = TextAnalyticsClientFactory.CreateTranslateClient();
}
///<summary>/// Translates text using Azure AI Translate services. ///</summary>///<param name="targetLanguage"><see cref="LanguageCode" for a list of supported languages/></param>///<param name="inputText"></param>///<param name="sourceLanguage">Pass in null here to auto detect the source language</param>///<returns></returns>publicasync Task<string?> Translate(string targetLanguage, string inputText, string? sourceLanguage = null)
{
var translationOfText = await _client.TranslateAsync(targetLanguage, inputText, sourceLanguage);
if (translationOfText?.Value == null)
{
returnnull;
}
var translation = translationOfText.Value.SelectMany(l => l.Translations).Select(l => l.Text)?.ToList();
string? translationText = translation?.FlattenString();
return translationText;
}
}
}
We use a little helper extension method here too :
StringExtensions.cs
using System.Text;
namespaceMultiLingual.Translator.Lib
{
publicstaticclassStringExtensions
{
///<summary>/// Merges a collection of lines into a flattened string separating each line by a specified line separator./// Newline is deafult///</summary>///<param name="inputLines"></param>///<param name="lineSeparator"></param>///<returns></returns>publicstaticstring? FlattenString(this IEnumerable<string>? inputLines, string lineSeparator = "\n")
{
if (inputLines == null || !inputLines.Any())
{
returnnull;
}
var flattenedString = inputLines?.Aggregate(new StringBuilder(),
(sb, l) => sb.AppendLine(l + lineSeparator),
sb => sb.ToString().Trim());
return flattenedString;
}
}
}
Here are some tests for detecting language :
DetectLanguageUtilTests.cs
And here are some translation util tests :
TranslateUtilTests.cs
using FluentAssertions;
using MultiLingual.Translator.Lib.Models;
namespaceMultiLingual.Translator.Lib.Test
{
publicclassTranslateUtilTests
{
private TranslateUtil _translateUtil;
publicTranslateUtilTests()
{
_translateUtil = new TranslateUtil();
}
[Theory]
[InlineData("Jeg er fra Norge og jeg liker brunost", "i'm from norway and i like brown cheese", LanguageCode.Norwegian, LanguageCode.English)]
[InlineData("Jeg er fra Norge og jeg liker brunost", "i'm from norway and i like brown cheese", null, LanguageCode.English)] //auto detect language is tested here
[InlineData("Ich bin aus Hamburg und ich liebe bier", "i'm from hamburg and i love beer", LanguageCode.German, LanguageCode.English)]
[InlineData("Ich bin aus Hamburg und ich liebe bier", "i'm from hamburg and i love beer", null, LanguageCode.English)] //Auto detect source language is tested here
[InlineData("tlhIngan maH", "we are klingons", LanguageCode.Klingon, LanguageCode.English)] //Klingon force !publicasync Task TranslationReturnsExpected(string input, string expectedTranslation, string sourceLanguage, string targetLanguage)
{
string? translation = await _translateUtil.Translate(targetLanguage, input, sourceLanguage);
translation.Should().NotBeNull();
translation.Should().BeEquivalentTo(expectedTranslation);
}
}
}
Over to the UI. The app is made with MAUI Blazor.
Here are some models for the app :
LanguageInputModel.cs
namespaceMultiLingual.Translator.Models
{
publicclassNameValue
{
publicstring Name { get; set; }
publicstring Value { get; set; }
}
}
The UI consists of this razor code in, written for Blazor MAUI app.
Index.razor
@page "/"
@inject ITranslateUtil TransUtil
@inject IDetectLanguageUtil DetectLangUtil
@inject IJSRuntime JS
@using MultiLingual.Translator.Lib;
@using MultiLingual.Translator.Lib.Models;
@using MultiLingual.Translator.Models;
<h1>Azure AI Text Translation</h1>
<EditForm Model="@Model" OnValidSubmit="@Submit"class="form-group" style="background-color:aliceblue;">
<DataAnnotationsValidator />
<ValidationSummary />
<div class="form-group row">
<label for="Model.InputText">Text to translate</label>
<InputTextArea @bind-Value="Model!.InputText" placeholder="Enter text to translate" @ref="inputTextRef" id="textToTranslate" rows="5" />
</div>
<div class="form-group row">
<span>Detected language of text to translate</span>
<InputText class="languageLabelText"readonly="readonly" placeholder="The detected language of the text to translate" @bind-Value="Model!.DetectedLanguageInfo"></InputText>
@if (Model.DetectedLanguageInfo != null){
<img src="@FlagIcon"class="flagIcon" />
}
</div>
<br />
<div class="form-group row">
<span>Translate into language</span>
<InputSelect placeholder="Choose the target language" @bind-Value="Model!.TargetLanguage">
@foreach (var item in LanguageCodes){
<option value="@item.Value">@item.Name</option>
}
</InputSelect>
<br />
@if (Model.TargetLanguage != null){
<img src="@TargetFlagIcon"class="flagIcon" />
}
</div>
<br />
<div class="form-group row">
<span>Translation</span>
<InputTextArea readonly="readonly" placeholder="The translated text target language" @bind-Value="Model!.TranslatedText" rows="5"></InputTextArea>
</div>
<button type="submit"class="submitButton">Submit</button>
</EditForm>
@code {
private Azure.AI.TextAnalytics.TextAnalyticsClient _client;
private InputTextArea inputTextRef;
public LanguageInputModel Model { get; set; } = new();
privatestring FlagIcon {
get
{
return$"images/flags/png100px/{Model.DetectedLanguageIso6391}.png";
}
}
privatestring TargetFlagIcon {
get
{
return$"images/flags/png100px/{Model.TargetLanguage}.png";
}
}
private List<NameValue> LanguageCodes = typeof(LanguageCode).GetFields().Select(f => new NameValue {
Name = f.Name,
Value = f.GetValue(f)?.ToString(),
}).OrderBy(f => f.Name).ToList();
privateasyncvoidSubmit()
{
var detectedLanguage = await DetectLangUtil.DetectLanguage(Model.InputText);
Model.DetectedLanguageInfo = $"{detectedLanguage.Iso6391Name}{detectedLanguage.Name}";
Model.DetectedLanguageIso6391 = detectedLanguage.Iso6391Name;
if (_client == null)
{
_client = TextAnalyticsClientFactory.CreateClient();
}
Model.TranslatedText = await TransUtil.Translate(Model.TargetLanguage, Model.InputText, detectedLanguage.Iso6391Name);
StateHasChanged();
}
protectedoverrideasync Task OnAfterRenderAsync(bool firstRender)
{
if (firstRender)
{
Model.TargetLanguage = LanguageCode.English;
await JS.InvokeVoidAsync("exampleJsFunctions.focusElement", inputTextRef?.AdditionalAttributes.FirstOrDefault(a => a.Key?.ToLower() == "id").Value);
StateHasChanged();
}
}
}
Finally, a screenshot how the app looks like :
You enter the text to translate, then the detected language is shown after you hit Submit. You can select the target language to translate the text into. English is selected as default. The Iso6391 code of the selected language is shown as a flag icon, if there exists a 1:1 mapping between the Iso6391 code and
the flag icons available in the app. The top flag show the detected language via the Iso6391 code, IF there is a 1:1 mapping between this code and the available Flag icons.