Using the Browser Speech Recognition API from Delphi with WebView4Delphi (WebView2)

Japanese

Using the Browser Speech Recognition API (SpeechRecognition) from Delphi with WebView4Delphi (WebView2)

This article explains how to use the browser’s Speech Recognition API (SpeechRecognition) from Delphi XE4 and later by integrating WebView4Delphi (WebView2).

（1）Introduction

On Windows 10, install the Microsoft WebView2 Runtime from https://developer.microsoft.com/microsoft-edge/webview2/.

This guide assumes that WebView4Delphi has already been downloaded (https://github.com/salvadordf/WebView4Delphi) and installed.
The file WebView2Loader.dll is also required (included with WebView4Delphi).

（2）Creating and Saving the Project

Launch the Delphi IDE and select "File" → "Windows VCL Application – Delphi".
Then choose "File" → "Save All (Ctrl+Shift+S)" to create a folder for the project and save both the unit (Unit1) and the project (Project1).
Next, select "Project" → "Build Project1 (Shift+F9)" to compile it once in advance (this generates the necessary output folders).
Inside the project folder, create a files directory under win32\debug.
In win32\debug\files, create an HTML file named index.html and save it in UTF‑8 encoding.

<!DOCTYPE html>
<html>
<head>
  <meta charset="UTF-8">
</head>
<body>
<script>
  var SRecognition=null;
  window.addEventListener("load",function(){
    if(window.webkitSpeechRecognition){
      SRecognition = new window.webkitSpeechRecognition();
    }else if(window.SpeechRecognition){
      SRecognition = new window.SpeechRecognition();
    }
    if(SRecognition!=null){
      console.log("event:load");
      SRecognition.continuous = false;
      SRecognition.interimResults = false;
      SRecognition.lang=navigator.language || "en-US";

      SRecognition.addEventListener("result",function(event){
        console.log("message:"+event.results[0][0].transcript);//Success
        SRecognition.stop();
      });
      SRecognition.addEventListener("error",function(event){
        console.log("event:error");//error
      });
      SRecognition.addEventListener("end",function(event){
        console.log("event:end");
      });
      SRecognition.addEventListener("start",function(event){
        console.log("event:start");
      });
      SRecognition.addEventListener("audiostart",function(event){
        console.log("event:audiostart");
      });
    }
  });
</script>
</body>
</html>

（3）Designing the Form

Place the following components on the form by dragging and dropping them from the Tool Palette:
one TButton, one TMemo, one TWVBrowser, and one TWVWindowParent.

（4）Writing the Source Code

Double‑click Button1 on the form to open the event handler and add the source code.

unit Unit1;

interface

uses
  Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes, Vcl.Graphics,
  Vcl.Controls, Vcl.Forms, Vcl.Dialogs, Vcl.StdCtrls, System.JSON.Serializers,
  uWVWinControl, uWVWindowParent, uWVBrowserBase, uWVBrowser,
  uWVTypes, uWVConstants, uWVTypeLibrary, uWVLoader, uWVInterfaces,
  uWVCoreWebView2Args, uWVLibFunctions, uWVCoreWebView2CookieList,
  uWVCoreWebView2Cookie, uWVCoreWebView2HttpRequestHeaders,
  uWVCoreWebView2, System.Json, System.IOUtils;

type
  TForm1 = class(TForm)
    WVBrowser1: TWVBrowser;
    WVWindowParent1: TWVWindowParent;
    Button1: TButton;
    Memo1: TMemo;
    procedure FormCreate(Sender: TObject);
    procedure WVBrowser1AfterCreated(Sender: TObject);
    procedure WVBrowser1DevToolsProtocolEventReceived(Sender: TObject;
      const aWebView: ICoreWebView2;
      const aArgs: ICoreWebView2DevToolsProtocolEventReceivedEventArgs;
      const aEventName: wvstring; aEventID: Integer);
    procedure Button1Click(Sender: TObject);
  private
    { Private declarations }
  public
    { Public declarations }
  end;

var
  Form1: TForm1;

implementation

{$R *.dfm}

procedure TForm1.Button1Click(Sender: TObject);
begin
  Button1.Enabled:=False;
  //Start JavaScript SpeechRecognition
  WVBrowser1.ExecuteScript('SRecognition.start()',1);
end;

procedure TForm1.FormCreate(Sender: TObject);
var ct:integer;
begin
  Button1.Enabled:=False;
  Button1.Caption:='Start Speech Recognition';
  WVWindowParent1.Browser:=WVBrowser1;
  if GlobalWebView2Loader.InitializationError then
  begin
    ShowMessage(GlobalWebView2Loader.ErrorMessage);
  end
  else
  begin
    ct:=0;
    while (ct<20) and (not GlobalWebView2Loader.Initialized) do
    begin
      sleep(500);
      Application.ProcessMessages;
      inc(ct);
    end;
    if GlobalWebView2Loader.Initialized then
    begin
      WVBrowser1.CreateBrowser(WVWindowParent1.Handle);
      // Note:
      // Changing the User-Agent to a non-WebView value allows Google login.
      WVBrowser1.UserAgent:=
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'+
        ' AppleWebKit/537.36 (KHTML, like Gecko)'+
        ' Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53';
    end
    else
    begin
      ShowMessage('WebView2 initialization failed');
    end;
  end;
end;

procedure TForm1.WVBrowser1AfterCreated(Sender: TObject);
begin
  //Required
  WVWindowParent1.UpdateSize;

  //Map the local "files" directory to the virtual host "https://demo"
  WVBrowser1.CoreWebView2.SetVirtualHostNameToFolderMapping(
    'demo',
    PWideChar(ExtractFilePath(Application.ExeName)+'files'),
    COREWEBVIEW2_HOST_RESOURCE_ACCESS_KIND_ALLOW
  );

  //Enable console usage
  WVBrowser1.CallDevToolsProtocolMethod(
    'Console.enable',
    '{}', 0
  );
  // When a console message is output,
  // trigger OnDevToolsProtocolEventReceived with EventID = 1
  WVBrowser1.SubscribeToDevToolsProtocolEvent('Console.messageAdded',1);

  //Display the local ".\files\index.html"
  WVBrowser1.Navigate('https://demo/index.html');
end;


procedure TForm1.WVBrowser1DevToolsProtocolEventReceived(Sender: TObject;
  const aWebView: ICoreWebView2;
  const aArgs: ICoreWebView2DevToolsProtocolEventReceivedEventArgs;
  const aEventName: wvstring; aEventID: Integer);
type
  TJsonMsg=record
    column:Integer;
    level:String;
    line:Integer;
    source:String;
    text:String;
    url:String;
  end;
  TJsonMessage=record
    message:TJsonMsg;
  end;
var pwc:PWideChar;
    s:TJsonSerializer;
    res:TJsonMessage;
begin
  if aEventID=1 then
  begin
    aArgs.Get_ParameterObjectAsJson(pwc);
    s:=TJsonSerializer.Create;
    res:=s.Deserialize<TJsonMessage>(pwc);
    if res.message.text='event:load' then
    begin
      Button1.Enabled:=True;
      Memo1.Lines.Add('Loaded');
    end
    else if res.message.text='event:end' then
    begin
      Button1.Enabled:=True;
      Memo1.Lines.Add('Speech recognition finished');
    end
    else if res.message.text='event:audiostart' then
    begin
      Memo1.Lines.Add('Please speak into the microphone');
    end
    else if pos('message:',res.message.text,1)>0 then
    begin
      Memo1.Lines.Add(
        '「'+res.message.text.Substring(8)+'」'
      );
    end;
    s.free;
  end;
end;


initialization
var cachepath:string;
begin
  cachepath:=ExtractFilePath(Application.ExeName) + 'cache';
  //Delete cache
  if DirectoryExists(cachepath) then TDirectory.Delete(cachepath,true);
  //Load and initialize GlobalWebView2Loader
  GlobalWebView2Loader := TWVLoader.Create(nil);
  //Specify the folder for cache, cookies, etc.
  GlobalWebView2Loader.UserDataFolder := cachepath;
  GlobalWebView2Loader.StartWebView2;
end;

end.

（5）Copying "WebView2Loader.dll" to the Executable Folder

Copy the file:
C:\Program Files (x86)\Embarcadero\Studio\22.0\Redist\win32\WebView2Loader.dll
into the same folder as your application’s executable (Project Folder → Win32\Debug).

(A) "Debug" build "Windows 32‑bit"	ProjectFolder\Win32\Debug
(B) "Debug" build "Windows 64‑bit"	ProjectFolder\Win64\Debug
(C) "Release" build "Windows 32‑bit"	ProjectFolder\Win32\Release
(D) "Release" build "Windows 64‑bit"	ProjectFolder\Win64\Release

（6）Running the Application

Connect a microphone to your PC. (If you are using a laptop or another device with a built‑in microphone, that microphone can be used as well.)

Select “Run” → “Run” to start the application.
After a short moment, the “Start Speech Recognition (Button1)” button will become enabled.

Click the “Start Speech Recognition (Button1)” button. When the message “Please speak into the microphone” appears in Memo1, speak something into the microphone.
If a dialog appears asking for microphone permission, click “Allow”.
When speech recognition succeeds, the recognized text will be displayed in Memo1.