Using the Browser Speech Recognition API (SpeechRecognition) from Delphi with WebView4Delphi (WebView2)
This article explains how to use the browser’s Speech Recognition API (SpeechRecognition) from Delphi XE4 and later by integrating WebView4Delphi (WebView2).
(1)Introduction
On Windows 10, install the Microsoft WebView2 Runtime from
https://developer.microsoft.com/microsoft-edge/webview2/.
This guide assumes that WebView4Delphi has already been downloaded
(https://github.com/salvadordf/WebView4Delphi) and installed.
The file WebView2Loader.dll is also required (included with WebView4Delphi).
(2)Creating and Saving the Project
Launch the Delphi IDE and select "File" → "Windows VCL Application – Delphi".
Then choose "File" → "Save All (Ctrl+Shift+S)" to create a folder for the project and
save both the unit (Unit1) and the project (Project1).
Next, select "Project" → "Build Project1 (Shift+F9)" to compile it once in advance
(this generates the necessary output folders).
Inside the project folder, create a files directory under
win32\debug.
In win32\debug\files, create an HTML file named index.html
and save it in UTF‑8 encoding.
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
</head>
<body>
<script>
var SRecognition=null;
window.addEventListener("load",function(){
if(window.webkitSpeechRecognition){
SRecognition = new window.webkitSpeechRecognition();
}else if(window.SpeechRecognition){
SRecognition = new window.SpeechRecognition();
}
if(SRecognition!=null){
console.log("event:load");
SRecognition.continuous = false;
SRecognition.interimResults = false;
SRecognition.lang=navigator.language || "en-US";
SRecognition.addEventListener("result",function(event){
console.log("message:"+event.results[0][0].transcript);//Success
SRecognition.stop();
});
SRecognition.addEventListener("error",function(event){
console.log("event:error");//error
});
SRecognition.addEventListener("end",function(event){
console.log("event:end");
});
SRecognition.addEventListener("start",function(event){
console.log("event:start");
});
SRecognition.addEventListener("audiostart",function(event){
console.log("event:audiostart");
});
}
});
</script>
</body>
</html>
(3)Designing the Form
Place the following components on the form by dragging and dropping them from the Tool Palette:
one TButton, one TMemo, one TWVBrowser, and one TWVWindowParent.
(4)Writing the Source Code
Double‑click Button1 on the form to open the event handler and add the source code.
unit Unit1;
interface
uses
Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes, Vcl.Graphics,
Vcl.Controls, Vcl.Forms, Vcl.Dialogs, Vcl.StdCtrls, System.JSON.Serializers,
uWVWinControl, uWVWindowParent, uWVBrowserBase, uWVBrowser,
uWVTypes, uWVConstants, uWVTypeLibrary, uWVLoader, uWVInterfaces,
uWVCoreWebView2Args, uWVLibFunctions, uWVCoreWebView2CookieList,
uWVCoreWebView2Cookie, uWVCoreWebView2HttpRequestHeaders,
uWVCoreWebView2, System.Json, System.IOUtils;
type
TForm1 = class(TForm)
WVBrowser1: TWVBrowser;
WVWindowParent1: TWVWindowParent;
Button1: TButton;
Memo1: TMemo;
procedure FormCreate(Sender: TObject);
procedure WVBrowser1AfterCreated(Sender: TObject);
procedure WVBrowser1DevToolsProtocolEventReceived(Sender: TObject;
const aWebView: ICoreWebView2;
const aArgs: ICoreWebView2DevToolsProtocolEventReceivedEventArgs;
const aEventName: wvstring; aEventID: Integer);
procedure Button1Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;
var
Form1: TForm1;
implementation
{$R *.dfm}
procedure TForm1.Button1Click(Sender: TObject);
begin
Button1.Enabled:=False;
//Start JavaScript SpeechRecognition
WVBrowser1.ExecuteScript('SRecognition.start()',1);
end;
procedure TForm1.FormCreate(Sender: TObject);
var ct:integer;
begin
Button1.Enabled:=False;
Button1.Caption:='Start Speech Recognition';
WVWindowParent1.Browser:=WVBrowser1;
if GlobalWebView2Loader.InitializationError then
begin
ShowMessage(GlobalWebView2Loader.ErrorMessage);
end
else
begin
ct:=0;
while (ct<20) and (not GlobalWebView2Loader.Initialized) do
begin
sleep(500);
Application.ProcessMessages;
inc(ct);
end;
if GlobalWebView2Loader.Initialized then
begin
WVBrowser1.CreateBrowser(WVWindowParent1.Handle);
// Note:
// Changing the User-Agent to a non-WebView value allows Google login.
WVBrowser1.UserAgent:=
'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'+
' AppleWebKit/537.36 (KHTML, like Gecko)'+
' Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53';
end
else
begin
ShowMessage('WebView2 initialization failed');
end;
end;
end;
procedure TForm1.WVBrowser1AfterCreated(Sender: TObject);
begin
//Required
WVWindowParent1.UpdateSize;
//Map the local "files" directory to the virtual host "https://demo"
WVBrowser1.CoreWebView2.SetVirtualHostNameToFolderMapping(
'demo',
PWideChar(ExtractFilePath(Application.ExeName)+'files'),
COREWEBVIEW2_HOST_RESOURCE_ACCESS_KIND_ALLOW
);
//Enable console usage
WVBrowser1.CallDevToolsProtocolMethod(
'Console.enable',
'{}', 0
);
// When a console message is output,
// trigger OnDevToolsProtocolEventReceived with EventID = 1
WVBrowser1.SubscribeToDevToolsProtocolEvent('Console.messageAdded',1);
//Display the local ".\files\index.html"
WVBrowser1.Navigate('https://demo/index.html');
end;
procedure TForm1.WVBrowser1DevToolsProtocolEventReceived(Sender: TObject;
const aWebView: ICoreWebView2;
const aArgs: ICoreWebView2DevToolsProtocolEventReceivedEventArgs;
const aEventName: wvstring; aEventID: Integer);
type
TJsonMsg=record
column:Integer;
level:String;
line:Integer;
source:String;
text:String;
url:String;
end;
TJsonMessage=record
message:TJsonMsg;
end;
var pwc:PWideChar;
s:TJsonSerializer;
res:TJsonMessage;
begin
if aEventID=1 then
begin
aArgs.Get_ParameterObjectAsJson(pwc);
s:=TJsonSerializer.Create;
res:=s.Deserialize<TJsonMessage>(pwc);
if res.message.text='event:load' then
begin
Button1.Enabled:=True;
Memo1.Lines.Add('Loaded');
end
else if res.message.text='event:end' then
begin
Button1.Enabled:=True;
Memo1.Lines.Add('Speech recognition finished');
end
else if res.message.text='event:audiostart' then
begin
Memo1.Lines.Add('Please speak into the microphone');
end
else if pos('message:',res.message.text,1)>0 then
begin
Memo1.Lines.Add(
'「'+res.message.text.Substring(8)+'」'
);
end;
s.free;
end;
end;
initialization
var cachepath:string;
begin
cachepath:=ExtractFilePath(Application.ExeName) + 'cache';
//Delete cache
if DirectoryExists(cachepath) then TDirectory.Delete(cachepath,true);
//Load and initialize GlobalWebView2Loader
GlobalWebView2Loader := TWVLoader.Create(nil);
//Specify the folder for cache, cookies, etc.
GlobalWebView2Loader.UserDataFolder := cachepath;
GlobalWebView2Loader.StartWebView2;
end;
end.
(5)Copying "WebView2Loader.dll" to the Executable Folder
Copy the file:
C:\Program Files (x86)\Embarcadero\Studio\22.0\Redist\win32\WebView2Loader.dll
into the same folder as your application’s executable
(Project Folder → Win32\Debug).
| (A) "Debug" build "Windows 32‑bit" |
ProjectFolder\Win32\Debug |
| (B) "Debug" build "Windows 64‑bit" |
ProjectFolder\Win64\Debug |
| (C) "Release" build "Windows 32‑bit" |
ProjectFolder\Win32\Release |
| (D) "Release" build "Windows 64‑bit" |
ProjectFolder\Win64\Release |
(6)Running the Application
Connect a microphone to your PC.
(If you are using a laptop or another device with a built‑in microphone, that microphone can be used as well.)
Select “Run” → “Run” to start the application.
After a short moment, the “Start Speech Recognition (Button1)” button will become enabled.
Click the “Start Speech Recognition (Button1)” button.
When the message “Please speak into the microphone” appears in Memo1,
speak something into the microphone.
If a dialog appears asking for microphone permission, click “Allow”.
When speech recognition succeeds, the recognized text will be displayed in Memo1.
