 {
 ****************************************************************************
 CRW32 project
 Copyright (C) by Kuryakin Alexey, Sarov, Russia, 2011, <kouriakine@mail.ru>
 Int32, Int64 Interlocked Functions, uses for non-blocking synchronization.
 Uses ideas from JCL (Jedi Code Library) for Int32 and _INTLCK.PAS unit
 created by Will DeWitt Jr. <edge@icehouse.net>, 2005, for Int64.
 Modifications:
  20110913 - created from JCL and Will DeWitt's unit.
  20110914 - modified by Alex Kuryakin for Delphi 5, emit CMPXCHG8B via DB.
             Pure Pascal codes uses for all Int64-related functions except of
             LockedCompareExchange, which is basic interlocked primitive.
             Pure Pascal maybe is not fastest, but much more clear way.
 ****************************************************************************
 }

unit _intlcks;

interface

uses sysutils,windows,classes;

 {
 ********************************************************************************
 Purpose:
 --------
  Locked Int32 and Int64 manipulation.
  Routines to manipulate simple typed variables in a thread safe manner.
  It's simplest and fastest way for thread & process non-blocking synchronization.
   |----------------------|-----------------------|------------------|
   |Operation             | new value of Target   | Result           |
   |----------------------|-----------------------|------------------|
   |LockedGet             | Target                | Target           |
   |LockedInc             | Target + 1            | Target + 1       |
   |LockedDec             | Target - 1            | Target - 1       |
   |LockedAdd             | Target + Value        | Target + Value   |
   |LockedSub             | Target - Value        | Target - Value   |
   |LockedAnd             | Target and Value      | Target and Value |
   |LockedOr              | Target or  Value      | Target or  Value |
   |LockedXor             | Target xor Value      | Target xor Value |
   |LockedAdd6432         | Target + Value        | Target + Value   |
   |LockedSub6432         | Target - Value        | Target - Value   |
   |LockedExchange        | Value                 | Target           |
   |LockedExchangeInc     | Target + 1            | Target           |
   |LockedExchangeDec     | Target - 1            | Target           |
   |LockedExchangeAdd     | Target + Value        | Target           |
   |LockedExchangeSub     | Target - Value        | Target           |
   |LockedExchangeAnd     | Target and Value      | Target           |
   |LockedExchangeOr      | Target or  Value      | Target           |
   |LockedExchangeXor     | Target xor Value      | Target           |
   |LockedExchangeAdd6432 | Target + Value        | Target           |
   |LockedExchangeSub6432 | Target - Value        | Target           |
   |LockedCompareExchange | if Target=Comp        | Target           |
   |                      | then Exch else Target | Target           |
   |----------------------|-----------------------|------------------|
 Notes:
 ------
  1) Target variable address must be 32-bit aligned to avoid CPU exceptions.
  2) Int32 functions based on i486 and higher instructions, not available on i386.
  3) Int64 functions based on i586 and higher instructions, not available on i486.
  4) Locked/Add/Sub/Inc/Dec return result of operation, i.e. new value of Target.
  5) LockedExchange/Add/Sub/Inc/Dec return original Target, i.e. before operation.
 Benchmarks:
 -----------
  Tested on CPU: Intel(R) Core(TM)2 Duo P8700 2.53GHz
  -------------------------|--------------------------|-------------------
  Function group           |  Execution time          | Acceptable for ...
  -------------------------|--------------------------|-------------------
  Int32 LockedGet/Inc/Dec  |  ~ 15      nSec per call | IPC + thread sync
  Int64 LockedGet/Inc/Dec  |  ~ 30..350 nSec per call | IPC + thread sync
  CritSections-Enter/Leave |  ~ 30      nSec per call | only  thread sync
  Mutex-WaitFor/Release    |  ~ 2000    nSec per call | IPC + thread sync
  -------------------------|--------------------------|-------------------
  Int32 LockedX 2 times faster compare to critical sections lock/unlock pair,
  Int64 LockedX 70 times faster compare to mutex WaitForSingleObject/Release.
  Note that Int64 locked operations performance is strongly depends on address
  alignment. Data aligned to 8 byte is 10 times faster then not aligned data(?)
  Each system call like WaitForSingleObject,ReleaseMutex takes ~1 mks per call.
  Don't forget that critical sections uses only for synchronization with single
  process threads. While mutexes, semaphores and LockedXX functions can be used
  also for inter process communications. Locked operations can be used for IPC,
  to synchronize data located in shared memory buffers, in non-blocking manner.
 *******************************************************************************
 }
function LockedGet(var Target: Int64): Int64; register; overload;
function LockedInc(var Target: Int64): Int64; register; overload;
function LockedDec(var Target: Int64): Int64; register; overload;
function LockedAdd(var Target: Int64; Value: Int64): Int64; register; overload;
function LockedSub(var Target: Int64; Value: Int64): Int64; register; overload;
function LockedAnd(var Target: Int64; Value: Int64): Int64; register; overload;
function LockedOr(var Target: Int64; Value: Int64): Int64; register; overload;
function LockedXor(var Target: Int64; Value: Int64): Int64; register; overload;
function LockedAdd6432(var Target: Int64; Value: Integer): Int64; register;
function LockedSub6432(var Target: Int64; Value: Integer): Int64; register;
function LockedExchange(var Target: Int64; Value: Int64): Int64; register; overload;
function LockedExchangeInc(var Target: Int64): Int64; register; overload;
function LockedExchangeDec(var Target: Int64): Int64; register; overload;
function LockedExchangeAdd(var Target: Int64; Value: Int64): Int64; register; overload;
function LockedExchangeSub(var Target: Int64; Value: Int64): Int64; register; overload;
function LockedExchangeAnd(var Target: Int64; Value: Int64): Int64; register; overload;
function LockedExchangeOr(var Target: Int64; Value: Int64): Int64; register; overload;
function LockedExchangeXor(var Target: Int64; Value: Int64): Int64; register; overload;
function LockedExchangeAdd6432(var Target: Int64; Value: Integer): Int64; register;
function LockedExchangeSub6432(var Target: Int64; Value: Integer): Int64; register;
function LockedCompareExchange(var Target: Int64; Exch, Comp: Int64): Int64; register; overload;

function LockedGet(var Target: Integer): Integer; register; overload;
function LockedInc(var Target: Integer): Integer; register; overload;
function LockedDec(var Target: Integer): Integer; register; overload;
function LockedAdd(var Target: Integer; Value: Integer): Integer; register; overload;
function LockedSub(var Target: Integer; Value: Integer): Integer; register; overload;
function LockedAnd(var Target:Integer; Value:Integer):Integer; register; overload;
function LockedOr(var Target: Integer; Value: Integer): Integer; register; overload;
function LockedXor(var Target: Integer; Value: Integer): Integer; register; overload;
function LockedExchange(var Target: Integer; Value: Integer): Integer; register; overload;
function LockedExchangeInc(var Target: Integer): Integer; register; overload;
function LockedExchangeDec(var Target: Integer): Integer; register; overload;
function LockedExchangeAdd(var Target: Integer; Value: Integer): Integer; register; overload;
function LockedExchangeSub(var Target: Integer; Value: Integer): Integer; register; overload;
function LockedExchangeAnd(var Target:Integer; Value:Integer):Integer; register; overload;
function LockedExchangeOr(var Target: Integer; Value: Integer): Integer; register; overload;
function LockedExchangeXor(var Target: Integer; Value: Integer): Integer; register; overload;
function LockedCompareExchange(var Target: Integer; Exch, Comp: Integer): Integer; register; overload;
function LockedCompareExchange(var Target: Pointer; Exch, Comp: Pointer): Pointer; register; overload;

function Test_LockedOperations:String;

implementation

 {
 *************************
 Locked Int32 manipulation
 *************************
 }
function LockedGet(var Target: Integer): Integer; register;
asm
 MOV     ECX, EAX
 MOV     EAX, 0
 LOCK XADD [ECX], EAX
end;

function LockedInc(var Target: Integer): Integer; register;
asm
 MOV     ECX, EAX
 MOV     EAX, 1
 LOCK XADD [ECX], EAX
 INC     EAX
end;

function LockedDec(var Target: Integer): Integer; register;
asm
 MOV     ECX, EAX
 MOV     EAX, -1
 LOCK XADD [ECX], EAX
 DEC     EAX
end;

function LockedAdd(var Target: Integer; Value: Integer): Integer; register;
asm
 MOV     ECX, EAX
 MOV     EAX, EDX
 LOCK XADD [ECX], EAX
 ADD     EAX, EDX
end;

function LockedSub(var Target: Integer; Value: Integer): Integer; register;
asm
 MOV     ECX, EAX
 NEG     EDX
 MOV     EAX, EDX
 LOCK XADD [ECX], EAX
 ADD     EAX, EDX
end;

function LockedAnd(var Target:Integer; Value:Integer):Integer; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result and Value, Result) = Result);
 Result := Result and Value;
end;

function LockedOr(var Target: Integer; Value: Integer): Integer; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result or Value, Result) = Result);
 Result := Result or Value;
end;

function LockedXor(var Target: Integer; Value: Integer): Integer; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result xor Value, Result) = Result);
 Result := Result xor Value;
end;

function LockedExchange(var Target: Integer; Value: Integer): Integer; register;
asm
 MOV     ECX, EAX
 MOV     EAX, EDX
 LOCK XCHG [ECX], EAX
end;

function LockedExchangeInc(var Target: Integer): Integer; register;
asm
 MOV     ECX, EAX
 MOV     EAX, 1
 LOCK XADD [ECX], EAX
end;

function LockedExchangeDec(var Target: Integer): Integer; register;
asm
 MOV     ECX, EAX
 MOV     EAX, -1
 LOCK XADD [ECX], EAX
end;

function LockedExchangeAdd(var Target: Integer; Value: Integer): Integer; register;
asm
 MOV     ECX, EAX
 MOV     EAX, EDX
 LOCK XADD [ECX], EAX
end;

function LockedExchangeSub(var Target: Integer; Value: Integer): Integer; register;
asm
 MOV     ECX, EAX
 NEG     EDX
 MOV     EAX, EDX
 LOCK XADD [ECX], EAX
end;

function  LockedExchangeAnd(var Target:Integer; Value:Integer):Integer; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result and Value, Result) = Result);
end;

function  LockedExchangeOr(var Target: Integer; Value: Integer): Integer; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result or Value, Result) = Result);
end;

function  LockedExchangeXor(var Target: Integer; Value: Integer): Integer; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result xor Value, Result) = Result);
end;

function LockedCompareExchange(var Target: Integer; Exch, Comp: Integer): Integer; register;
asm
 XCHG    EAX, ECX
 LOCK CMPXCHG [ECX], EDX
end;

function LockedCompareExchange(var Target: Pointer; Exch, Comp: Pointer): Pointer; register;
asm
 XCHG    EAX, ECX
 LOCK CMPXCHG [ECX], EDX
end;

 {
 *************************
 Locked Int64 manipulation
 *************************
 }
function  LockedInc(var Target: Int64): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result + 1, Result) = Result);
 Result := Result + 1;
end;

function  LockedDec(var Target: Int64): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result - 1, Result) = Result);
 Result := Result - 1;
end;

function  LockedAdd(var Target: Int64; Value: Int64): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result + Value, Result) = Result);
 Result := Result + Value;
end;

function  LockedSub(var Target: Int64; Value: Int64): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result - Value, Result) = Result);
 Result := Result - Value;
end;

function  LockedAnd(var Target:Int64; Value:Int64):Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result and Value, Result) = Result);
 Result := Result and Value;
end;

function  LockedOr(var Target: Int64; Value: Int64): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result or Value, Result) = Result);
 Result := Result or Value;
end;

function  LockedXor(var Target: Int64; Value: Int64): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result xor Value, Result) = Result);
 Result := Result xor Value;
end;

function  LockedAdd6432(var Target: Int64; Value: Integer): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result + Value, Result) = Result);
 Result := Result + Value;
end;

function  LockedSub6432(var Target: Int64; Value: Integer): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result - Value, Result) = Result);
 Result := Result - Value;
end;

function  LockedExchange(var Target: Int64; Value: Int64): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Value, Result) = Result);
end;

function  LockedExchangeInc(var Target: Int64): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result + 1, Result) = Result);
end;

function  LockedExchangeDec(var Target: Int64): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result - 1, Result) = Result);
end;

function  LockedExchangeAdd(var Target: Int64; Value: Int64): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result + Value, Result) = Result);
end;

function  LockedExchangeSub(var Target: Int64; Value: Int64): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result - Value, Result) = Result);
end;

function  LockedExchangeAnd(var Target:Int64; Value:Int64):Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result and Value, Result) = Result);
end;

function  LockedExchangeOr(var Target: Int64; Value: Int64): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result or Value, Result) = Result);
end;

function  LockedExchangeXor(var Target: Int64; Value: Int64): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result xor Value, Result) = Result);
end;

function  LockedExchangeAdd6432(var Target: Int64; Value: Integer): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result + Value, Result) = Result);
end;

function  LockedExchangeSub6432(var Target: Int64; Value: Integer): Int64; register;
begin
 repeat
  Result := Target;
 until (LockedCompareExchange(Target, Result - Value, Result) = Result);
end;

{$IFDEF VER130}           // if Delphi 5
 {$DEFINE EMIT_CMPXCHG8B} // then should emit
{$ENDIF}                  // CMPXCHG8B opcode

 // if EMIT_CMPXCHG8B then emit opcode via DB:
 // LOCK CMPXCHG8B [EDI] -> DB $F0,$0F,$C7,$0F

function  LockedCompareExchange(var Target:Int64; Exch,Comp:Int64):Int64; register;
asm // -> EAX=Target; ESP+4=Exch; ESP+12=Comp; <- EDX:EAX=Result
 PUSH    EBX
 PUSH    EDI
 MOV     EDI, EAX
 MOV     EBX, DWORD PTR [Exch]
 MOV     ECX, DWORD PTR [Exch+4]
 MOV     EAX, DWORD PTR [Comp]
 MOV     EDX, DWORD PTR [Comp+4]
{$IFDEF EMIT_CMPXCHG8B}
 DB      $F0,$0F,$C7,$0F
{$ELSE}
 LOCK    CMPXCHG8B [EDI]
{$ENDIF}
 POP     EDI
 POP     EBX
end;

function  LockedGet(var Target:Int64):Int64; register;
asm // -> EAX=Target; <- EDX:EAX=Result
 PUSH    EBX
 PUSH    EDI
 MOV     EDI, EAX
 MOV     EBX, 0
 MOV     ECX, 0
 MOV     EAX, 0
 MOV     EDX, 0
{$IFDEF EMIT_CMPXCHG8B}
 DB      $F0,$0F,$C7,$0F
{$ELSE}
 LOCK    CMPXCHG8B [EDI]
{$ENDIF}
 POP     EDI
 POP     EBX
end;
{
function LockedGet(var Target: Int64): Int64;
begin
 Result:=LockedCompareExchange(Target, 0, 0);
end;
}

function Test_LockedOperations:String;
const DateTimeToMs = 1000*60*60*24;
var i,i1,n:Integer; t1,t2:Double; List:TStringList;
    crit:TRtlCriticalSection; mutex:THandle; d1:Int64;
begin
 Result:='Locked operations test and benchmark.';
 List:=TStringList.Create;
 try
  List.Add(Result);
  // CriticalSections
  n:=1000*1000*10;
  InitializeCriticalSection(crit);
  t1:=SysUtils.Now*DateTimeToMs;
  for i:=1 to n do begin EnterCriticalSection(crit); LeaveCriticalSection(crit); end;
  t2:=SysUtils.Now*DateTimeToMs;
  DeleteCriticalSection(crit);
  List.Add(Format('CritSections: %g ns/op',[(t2-t1)*1000*1000/n]));
  // Mutex
  n:=1000*1000*1;
  mutex:=CreateMutex(nil,true,'mca_test');;
  t1:=SysUtils.Now*DateTimeToMs;
  for i:=1 to n do begin WaitForSingleObject(mutex,0); ReleaseMutex(mutex); end;
  t2:=SysUtils.Now*DateTimeToMs;
  CloseHandle(mutex);
  List.Add(Format('Mutex: %g ns/op',[(t2-t1)*1000*1000/n]));
  // LockedGet32
  n:=1000*1000*10;
  t1:=SysUtils.Now*DateTimeToMs;
  for i:=1 to n do begin i1:=i; if LockedGet(i1)<>i then Exit; if i1<>i then Exit; end;
  t2:=SysUtils.Now*DateTimeToMs;
  List.Add(Format('LockedGet32: %g ns/op',[(t2-t1)*1000*1000/n]));
  // LockedInc32
  n:=1000*1000*10;
  t1:=SysUtils.Now*DateTimeToMs;
  for i:=1 to n do begin i1:=i; if LockedInc(i1)<>i+1 then Exit; if i1<>i+1 then Exit; end;
  t2:=SysUtils.Now*DateTimeToMs;
  List.Add(Format('LockedInc32: %g ns/op',[(t2-t1)*1000*1000/n]));
  // LockedDec32
  n:=1000*1000*10;
  t1:=SysUtils.Now*DateTimeToMs;
  for i:=1 to n do begin i1:=i; if LockedDec(i1)<>i-1 then Exit; if i1<>i-1 then Exit; end;
  t2:=SysUtils.Now*DateTimeToMs;
  List.Add(Format('LockedDec32: %g ns/op',[(t2-t1)*1000*1000/n]));
  // LockedCompareExchange
  n:=1000*1000*10;
  t1:=SysUtils.Now*DateTimeToMs;
  for i:=1 to n do begin i1:=i; if LockedCompareExchange(i1,i,i)<>i then Exit; if i1<>i then Exit; end;
  t2:=SysUtils.Now*DateTimeToMs;
  List.Add(Format('LockedCompareExchange32: %g ns/op',[(t2-t1)*1000*1000/n]));
  // LockedGet64
  n:=1000*1000*10;
  t1:=SysUtils.Now*DateTimeToMs;
  for i:=1 to n do begin d1:=i; if LockedGet(d1)<>i then Exit; if d1<>i then Exit; end;
  t2:=SysUtils.Now*DateTimeToMs;
  List.Add(Format('LockedGet64: %g ns/op',[(t2-t1)*1000*1000/n]));
  // LockedInc64
  n:=1000*1000*10;
  t1:=SysUtils.Now*DateTimeToMs;
  for i:=1 to n do begin d1:=i; if LockedInc(d1)<>i+1 then Exit; if d1<>i+1 then Exit; end;
  t2:=SysUtils.Now*DateTimeToMs;
  List.Add(Format('LockedInc64: %g ns/op',[(t2-t1)*1000*1000/n]));
  // LockedDec64
  n:=1000*1000*10;
  t1:=SysUtils.Now*DateTimeToMs;
  for i:=1 to n do begin d1:=i; if LockedDec(d1)<>i-1 then Exit; if d1<>i-1 then Exit; end;
  t2:=SysUtils.Now*DateTimeToMs;
  List.Add(Format('LockedDec64: %g ns/op',[(t2-t1)*1000*1000/n]));
  // LockedCompareExchange64
  n:=1000*1000*10;
  t1:=SysUtils.Now*DateTimeToMs;
  for i:=1 to n do begin d1:=i; if LockedCompareExchange(d1,i,i)<>i then Exit; if d1<>i then Exit; end;
  t2:=SysUtils.Now*DateTimeToMs;
  List.Add(Format('LockedCompareExchange64: %g ns/op',[(t2-t1)*1000*1000/n]));
 finally
  List.Add('Ok!');
  Result:=List.Text;
  FreeAndNil(List);
 end;
end;

initialization

finalization

end.

