Thread safe TStack (TThreadStack)
This post was migrated from my old blog delphi-snippets.blogspot.com, for explanation about this switch see my introduction post.
Recently i've been doing a lot of multithreading, and luckily Delphi provides with a handy bunch of classes to make the developers life easier. You've got some basic synchronization classes (TMutex, TEvent, TCriticalSection, TMultiReadExclusiveWriteSynchronizer...) and an basic data container (TThreadList). But for one program I needed a thread safe stack. In this post I will describe how I created my own.
There is not thread safe stack in Borland Turbo Delphi 2006, so off course I started with a Google for delphi TThreadStack, which at this time gives zero results. Searching the newsgroups I found an interesting group called comp.programming.threads, searching that group I found one Pascal Lock-Free stack (which basically means not using a critical section, or any other mechanism, to lock the data). But after testing it, FastMM pointed out an memory leak. I provided the author with a test case to cause the memory leak, but after 5 months no reply.
But I'm a hard person to please, and although it is indeed lock free, I was wondering if in my situation this lock-free solution wasn't to complicated and perhaps slower. So I created an simple TThreadStack from looking at the principle of the TThreadList (Very simple!), and compared it with the lock-free solution.
The source demonstrates is the test project.
program Project2;
{$APPTYPE CONSOLE}
uses
FastMM4,
unThreadStack,
FreeStack, Math,
Windows, SysUtils, Classes;
const
NumberOfAllocations = 1000;
PopTimeOut = 1;
PushTimeOut = 1;
NumberOfThreads = 8; // must be an multiple of 4
NumberOfTest = 10;
type
TTestRec = packed record
BigField: array[0..254] of Char;
SmallerField: Extended;
SmallField: Byte;
end;
PTestRec = ^TTestRec;
TFreeStackThread = class(TThread)
FPopper: Boolean;
FDestination: TFreeStack;
FFinished: THandle;
protected
procedure Execute; override;
public
constructor Create(ADestination: TFreeStack; APopper: Boolean; AFinished: THandle);
end;
TThreadStackThread = class(TThread)
FPopper: Boolean;
FDestination: TThreadStack;
FFinished: THandle;
protected
procedure Execute; override;
public
constructor Create(ADestination: TThreadStack; APopper: Boolean; AFinished: THandle);
end;
{ TThreadStackThread }
constructor TThreadStackThread.Create(ADestination: TThreadStack;
APopper: Boolean; AFinished: THandle);
begin
FDestination := ADestination;
FPopper := APopper;
FFinished := AFinished;
inherited Create(False);
end;
procedure TThreadStackThread.Execute;
var
TempStack: TStack;
p: PTestRec;
counter: Int64;
temp: LongWord;
begin
counter := 0;
while (not Terminated) and (counter < NumberOfAllocations) do
begin
TempStack := FDestination.LockStack;
try
if FPopper then
begin
if TempStack.Count > 0 then
begin
Dispose(PTestRec(TempStack.Pop));
inc(counter);
end;
end
else
begin
New(p);
TempStack.Push(p);
inc(counter);
end;
finally
FDestination.UnlockStack;
end;
if FPopper then
Sleep(PopTimeOut)
else
Sleep(PushTimeOut);
end;
ReleaseSemaphore(FFinished, 1, @temp);
end;
{ TFreeStackThread }
constructor TFreeStackThread.Create(ADestination: TFreeStack; APopper: Boolean; AFinished: THandle);
begin
FDestination := ADestination;
FPopper := APopper;
FFinished := AFinished;
inherited Create(False);
end;
procedure TFreeStackThread.Execute;
var
p: PTestRec;
counter: Int64;
temp: LongWord;
begin
counter := 0;
p := nil;
while (not Terminated) and (counter < NumberOfAllocations) do
begin
if FPopper then
begin
if FDestination.Count > 0 then
begin
if FDestination.Pop(TObject(p)) then
begin
Dispose(p);
inc(counter);
end;
end;
end
else
begin
if p = nil then
New(p);
if FDestination.Push(TObject(p)) then
begin
p := nil;
inc(counter);
end;
end;
if FPopper then
Sleep(PopTimeOut)
else
Sleep(PushTimeOut);
end;
ReleaseSemaphore(FFinished, 1, @temp);
end;
var
AThreadStacksTests: array[0..NumberOfThreads - 1] of TThreadStackThread;
AFreeStacksTests: array[0..NumberOfThreads - 1] of TFreeStackThread;
i, j: Integer;
Start, Stop, Freq: Int64;
ThreadStacks: array[0..1] of TThreadStack;
FreeStacks: array[0..1] of TFreeStack;
ResultThread: array[0..NumberOfTest - 1] of Double;
ResultFree: array[0..NumberOfTest - 1] of Double;
Finished: THandle;
Mean, StdDev: Extended;
begin
ThreadStacks[0] := TThreadStack.Create;
ThreadStacks[1] := TThreadStack.Create;
FreeStacks[0] := TFreeStack.Create;
FreeStacks[1] := TFreeStack.Create;
Finished := CreateSemaphore(nil, 0, NumberOfThreads, 'Thread runners');
Writeln('Starting ThreadStack threads');
for j := 0 to NumberOfTest - 1 do
begin
QueryPerformanceCounter(Start);
for I := 0 to NumberOfThreads - 1 do
AThreadStacksTests[i] := TThreadStackThread.Create(ThreadStacks[i mod 2], (i mod 4) >= 2, Finished);
for I := 0 to NumberOfThreads - 1 do
WaitForSingleObject(Finished, INFINITE);
QueryPerformanceCounter(Stop);
for I := 0 to NumberOfThreads - 1 do
AThreadStacksTests[i].Free;
ResultThread[j] := Stop - Start;
end;
Writeln('ThreadStack done.');
Writeln('Starting TFreeStack threads');
for j := 0 to NumberOfTest - 1 do
begin
QueryPerformanceCounter(Start);
for I := 0 to NumberOfThreads - 1 do
AFreeStacksTests[i] := TFreeStackThread.Create(FreeStacks[i mod 2], (i mod 4) >= 2, Finished);
for I := 0 to NumberOfThreads - 1 do
WaitForSingleObject(Finished, INFINITE);
QueryPerformanceCounter(Stop);
for I := 0 to NumberOfThreads - 1 do
AFreeStacksTests[i].Free;
ResultFree[j] := Stop - Start;
end;
Writeln('TFreeStack done.');
Writeln(Format('Calculating the mean and the standard deviation out of %d runs.', [NumberOfTest]));
QueryPerformanceFrequency(Freq);
MeanAndStdDev(ResultThread, Mean, StdDev);
Writeln(Format('TThreadStack: %f (%f)', [Mean, StdDev]));
Writeln(Format('TThreadStack: %fms (%fms)', [Mean / (Freq / 1000), StdDev / (Freq / 1000)]));
MeanAndStdDev(ResultFree, Mean, StdDev);
Writeln(Format('TFreeStack: %f (%f)', [Mean, StdDev]));
Writeln(Format('TFreeStack: %fms (%fms)', [Mean / (Freq / 1000), StdDev / (Freq / 1000)]));
if DebugHook <> 0 then
Readln;
{ freeing everything}
ThreadStacks[0].Free;
ThreadStacks[1].Free;
FreeStacks[0].Free;
FreeStacks[1].Free;
CloseHandle(Finished);
end.
This one also creates the memory leak. Running the code on a P4 2.26 returns this result:
Starting ThreadStack threads
ThreadStack done.
Starting TFreeStack threads
TFreeStack done.
Calculating the mean and the standard deviation out of 10 runs.
TThreadStack: 7152374,20 (26574,65)
TThreadStack: 1998,12ms (7,42ms)
TFreeStack: 7339945,80 (310570,01)
TFreeStack: 2050,52ms (86,76ms)
So the TFreestack is all most the same speed as the simple critical section based TThreadStack, although I know now the test isn't what you'd call regular, because it's constantly trying to push and pop. So perhaps a more normal situation would result differently. But in my program this situation was expected.
I also liked the simplicity of the TThreadStack above the complexity of the FreeStack, were you had to compile a piece TASM (containing the CAS) for it to work. Another show stopper was the memory leak. But actually I just wanted to post my simple TThreadStack so that next time I have to use it, I can just wander to my own blog. So without further ado, I bring you the following source.
unit unThreadStack;
interface
uses
Windows, Contnrs;
type
TStack = Contnrs.TStack;
TThreadStack = class
private
FStack: TStack;
FLock :TRTLCriticalSection;
public
constructor Create();
destructor Destroy; override;
function LockStack : TStack;
procedure UnlockStack;
function Count: Integer;
function Push(AItem: Pointer): Pointer;
function Pop: Pointer;
function Peek: Pointer;
end;
implementation
{ TThreadStack }
function TThreadStack.LockStack: TStack;
begin
EnterCriticalSection(FLock);
Result := FStack;
end;
function TThreadStack.Count: Integer;
begin
EnterCriticalSection(FLock);
try
Result := FStack.Count;
finally
LeaveCriticalSection(FLock);
end;
end;
constructor TThreadStack.Create();
begin
inherited Create();
InitializeCriticalSection(FLock);
FStack := Contnrs.TStack.Create;
end;
destructor TThreadStack.Destroy;
begin
DeleteCriticalSection(FLock);
FStack.Free;
inherited;
end;
function TThreadStack.Peek: Pointer;
begin
EnterCriticalSection(FLock);
try
Result := FStack.Peek;
finally
LeaveCriticalSection(FLock);
end;
end;
function TThreadStack.Pop: Pointer;
begin
EnterCriticalSection(FLock);
try
Result := FStack.Pop;
finally
LeaveCriticalSection(FLock);
end;
end;
function TThreadStack.Push(AItem: Pointer): Pointer;
begin
EnterCriticalSection(FLock);
try
Result := FStack.Push(AItem);
finally
LeaveCriticalSection(FLock);
end;
end;
procedure TThreadStack.UnlockStack;
begin
LeaveCriticalSection(FLock);
end;
end.
If you use this source, I would like it if you just left a comment on this blog.
Tags: delphi, multithreading, stack
Fast reading of files using Memory Mapping
This post was migrated from my old blog delphi-snippets.blogspot.com, for explanation about this switch see my introduction post. It has been six months since I last posted something. Lets just say things got a little busy :). And posting source code on Blogspot seemed to be a bitch because blogspot would filter out the enters. I solved that in the previous post by using an <br /> as an enter. But when copying and pasting from the page the newlines were lost (offcourse DelForExp fixes that.. but still it sucked).
Now I have just a little bit of time, and a few articles I wanted to post. So after some testing I found out blogspot fixed the enter removal and now I’ll try to post more frequently.
Now let’s get ontopic, Memory Mapped Files can be very helpful for reading large files. Looking through the internet you can find many advantages and disadvantages. The important thing is, think about what your doing, MMF can be very fast in one application. But slow in an other, it all depends on the situation, there are enough articles about the subject (for instance this one by the Delphi Compiler Team)
I like MMF a lot when using binary files of a certain format. Let’s assume we have the following file format:
TCustomerStruct = packed record
CustomerID: Longword;
CustomerName: array[0..254] of Char;
CustomerBirthDay: TDateTime;
CustomerRate: Double;
AccountManagerID: Longword;
end;
You could read this using BlockRead:
var
CustomerFile: file of TCustomerStruct;
Customers: array of TCustomerStruct;
i : integer;
begin
AssignFile(CustomerFile,'c:\customers.cus');
try
Reset(CustomerFile); // open the file for reading
SetLength(Customers, FileSize(CustomerFile)); // create the array
BlockRead(CustomerFile, Customers, Length(Customers)); // Read the hole party in to the array
for i := 0 to High(Customers) do
// List all the customers in a memo
memCustomerList.Lines.Add('Name: '+ Customers[i].CustomerName);
finally
CloseFile(CustomerFile);
end;
And now using MemoryMapping:
type
TCustomerStructArray = array[0..MaxInt div SizeOf(TCustomerStruct) - 1] of TCustomerStruct;
PCustomerStructArray = ^TCustomerStructArray;
var
CustomerFile : TMappedFile;
Customers: PCustomerStructArray;
i : integer;
begin
CustomerFile := TMappedFile.Create;
try
CustomerFile.MapFile('c:\customers.cus');
Customers := PCustomerStructArray(CustomerFile.Content); // not needed, but handy
for i := 0 to CustomerFile.Size div SizeOf(TCustomerStruct) -1 do
memCustomerList.Lines.Add('Name: '+ Customers[i].CustomerName);
finally
CustomerFile.Free;
end;
The MaxInt div SizeOf(TCustomerStruct) – 1 is the maximum amount of records (thus memory) loaded at once.
The TMappedFile class is something I created myself so I can be lazy. Off course I will share that piece of code too.
unit unFileMapping;
{
Copyright (c) 2005-2006 by Davy Landman
See the file COPYING.FPC, included in this distribution,
for details about the copyright. Alternately, you may use this source under the provisions of MPL v1.x or later
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
}
interface
uses
Windows, SysUtils;
type
TMappedFile = class
private
FMapping: THandle;
FContent: Pointer;
FSize: Integer;
procedure MapFile(const AFileName: WideString);
public
constructor Create(const AFileName: WideString);
destructor Destroy; override;
property Content: Pointer read FContent;
property Size: Integer read FSize;
end;
implementation
function FileExistsLongFileNames(const FileName: WideString): Boolean;
begin
if Length(FileName) < 2 then
begin
Result := False;
Exit;
end;
if CompareMem(@FileName[1], @WideString('\\')[1], 2) then
Result := (GetFileAttributesW(PWideChar(FileName)) and FILE_ATTRIBUTE_DIRECTORY = 0)
else
Result := (GetFileAttributesW(PWideChar(WideString('\\?\' + FileName))) and FILE_ATTRIBUTE_DIRECTORY = 0)
end;
{ TMappedFile }
constructor TMappedFile.Create(const AFileName: WideString);
begin
inherited Create;
if FileExistsLongFileNames(AFileName) then
MapFile(AFileName)
else
raise Exception.Create('File "' + AFileName + '" does not exists.');
end;
destructor TMappedFile.Destroy;
begin
if Assigned(FContent) then
begin
UnmapViewOfFile(FContent);
CloseHandle(FMapping);
end;
inherited;
end;
procedure TMappedFile.MapFile(const AFileName: WideString);
var
FileHandle: THandle;
begin
if CompareMem(@(AFileName[1]), @('\\'[1]), 2) then
{ Allready an UNC path }
FileHandle := CreateFileW(PWideChar(AFileName), GENERIC_READ, FILE_SHARE_READ or
FILE_SHARE_WRITE, nil, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0)
else
FileHandle := CreateFileW(PWideChar(WideString('\\?\' + AFileName)), GENERIC_READ, FILE_SHARE_READ or
FILE_SHARE_WRITE, nil, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
if FileHandle <> 0 then
try
FSize := GetFileSize(FileHandle, nil);
if FSize <> 0 then
begin
FMapping := CreateFileMappingW(FileHandle, nil, PAGE_READONLY, 0, 0, nil);
//Win32Check(FMapping <> 0);
end;
finally
CloseHandle(FileHandle);
end;
if FSize = 0 then
FContent := nil
else
FContent := MapViewOfFile(FMapping, FILE_MAP_READ, 0, 0, 0);
//Win32Check(FContent <> nil);
end;
end.
The big advantage is, that with BlockRead you can either read the whole content of the file in the array, or buffering the file in blocks. With MMF there is no need to worry about it (unless you get very big files), Windows automatically arranges the memory when requested.
Getting (possibly) 500% speed gain on divisions
This post was migrated from My old blog delphi-snippets.blogspot.com, for explanation about this switch see my introduction post. One of my fellow students once made a remark about the old days when a division was much faster if it was written as multiplication.
This means, instead of devising with a 100, you multiply with 0.01 (1/100). Because some programs of mine have a lot of divisions in their core loops, I investigated the difference.
I created the following test program.
program DivVsMult;
{$APPTYPE CONSOLE}
uses
SysUtils,
Windows;
var
Start,Stop, Start2, Stop2, Freq:int64;
i : integer;
t : real;
CpuSpeed : integer;
begin
{ TODO -oUser -cConsole Main : Insert code here }
{ Cpu Speed fastes cpu = 1 slower => 10
it's just to determin the number of time to do the loop
Maxint div CpuSpeed is calculated }
if ParamCount = 1 then
CpuSpeed := StrToIntDef(ParamStr(1),1)
else
CpuSpeed := 10;
Writeln('Simple Number division:');
Writeln('Calculating');
QueryPerformanceFrequency(freq);
QueryPerformanceCounter(Start);
for i:=0 to MaxInt div CpuSpeed do
t := i / 100;
QueryPerformanceCounter(Stop);
Writeln(Format('First Pass Result: %f',[t]));
{ This is needed because the compiler would optimize,
and would notice the result of the loop isn't used at all,
so therefor the result is useless.. so depending on the compiler, it will
choose what to do with it, this disables that optimization }
QueryPerformanceCounter(Start2);
for i:=0 to MaxInt div CpuSpeed do
t := i * (1/100);
QueryPerformanceCounter(Stop2);
Writeln(Format('Second Pass Result: %15.6f',[t]));
{ This is needed because the compiler would optimize,
and would notice the result of the loop isn't used at all,
so therefor the result is useless.. so depending on the compiler, it will
choose what to do with it, this disables that optimization }
Writeln('Done, Results:');
Writeln(Format('/ 100 Time: %6.4f seconds'+#13#10+
'/ 100 Clock: %d ticks'+#13#10+
'* 0.01 Time: %6.4f seconds'+#13#10+
'* 0.01 Clock: %d ticks',[(Stop-Start) / freq, (Stop-Start), (Stop2-Start2) / freq, (Stop2-Start2)]));
Writeln;
Writeln('Odd Number division:');
QueryPerformanceCounter(Start);
for i:=0 to high(i) div CpuSpeed do
t := i / 556;
QueryPerformanceCounter(Stop);
Writeln(Format('First Pass Result: %15.6f',[t]));
{ This is needed because the compiler would optimize,
and would notice the result of the loop isn't used at all,
so therefor the result is useless.. so depending on the compiler, it will
choose what to do with it, this disables that optimization }
QueryPerformanceCounter(Start2);
for i:=0 to high(i) div CpuSpeed do
t := i * (1/556);
QueryPerformanceCounter(Stop2);
Writeln(Format('Second Pass Result: %15.6f',[t]));
Writeln(Format('/ 556 Time: %6.4f seconds'+#13#10+
'/ 556 Clock: %d ticks'+#13#10+
'* (1/556) Time: %6.4f seconds'+#13#10+
'* (1/556) Clock: %d ticks',[(Stop-Start) / freq, (Stop-Start), (Stop2-Start2) / freq, (Stop2-Start2)]));
Writeln(Format(' (1/556) = %15.14f (approximate)',[1/556]));
Readln;
end.
On an old P3 900Mhz:
Simple Number division:
Calculating
First Pass Result: 2147483,64
Second Pass Result: 2147483,64
Done, Results:
/ 100 Time: 10,3319 seconds
/ 100 Clock: 36983482 ticks
* 0.01 Time: 2,0378 seconds
* 0.01 Clock: 7294251 ticks
Odd Number division:
First Pass Result: 386238,0647482014610000
Second Pass Result: 386238,0647482014610000
Done, Results:
/ 556 Time: 10,0735 seconds
/ 556 Clock: 36058581 ticks
* (1/556) Time: 2,0446 seconds
* (1/556) Clock: 7318775 ticks
(1/556) = 0,0017985611510791 (approximate)
On a new P4 2.3 Ghz:
Simple Number division:
Calculating
First Pass Result: 2147483.64
Second Pass Result: 2147483.64
Done, Results:
/ 100 Time: 4.6227 seconds
/ 100 Clock: 16547055 ticks
* 0.01 Time: 1.0782 seconds
* 0.01 Clock: 3859508 ticks
Odd Number division:
First Pass Result: 386238.064748
Second Pass Result: 386238.064748
Done, Results:
/ 556 Time: 4.5820 seconds
/ 556 Clock: 16401425 ticks
* (1/556) Time: 12.1746 seconds
* (1/556) Clock: 43579366 ticks
(1/556) = 0.00179856115108 (approximate)
The results are variating, on simple numbers like 0.01 the speedup is allways working, but somehow the very complex numbers tend to be slower sometimes.
I use this tip a lot when working with percentage.