//// System calls for AMD64, Linux//// func Syscall(trap int64, a1, a2, a3 uintptr) (r1, r2, err uintptr);// Trap # in AX, args in DI SI DX R10 R8 R9, return in AX DX// Note that this differs from "standard" ABI convention, which// would pass 4th arg in CX, not R10.
我们只实现System V AMD64 ABI规范的版本。在System V版本中,寄存器可以最多传递六个参数,分别对应DI、SI、DX、CX、R8和R9六个寄存器(如果是浮点数则需要通过XMM寄存器传送),返回值依然通过AX返回。通过对比系统调用的规范可以发现,系统调用的第四个参数是用R10寄存器传递,而C语言函数的第四个参数是用CX传递。
func SyscallWrite_Darwin(fd int, msg string) int
func main() {
if runtime.GOOS == "darwin" {
SyscallWrite_Darwin(1, "hello syscall!\n")
}
}
#include <stdint.h>
int64_t myadd(int64_t a, int64_t b) {
return a+b;
}
func asmCallCAdd(cfun uintptr, a, b int64) int64
// System V AMD64 ABI
// func asmCallCAdd(cfun uintptr, a, b int64) int64
TEXT ·asmCallCAdd(SB), NOSPLIT, $0
MOVQ cfun+0(FP), AX // cfun
MOVQ a+8(FP), DI // a
MOVQ b+16(FP), SI // b
CALL AX
MOVQ AX, ret+24(FP)
RET
package cpu
var X86 x86
// The booleans in x86 contain the correspondingly named cpuid feature bit.
// HasAVX and HasAVX2 are only set if the OS does support XMM and YMM registers
// in addition to the cpuid feature bit being set.
// The struct is padded to avoid false sharing.
type x86 struct {
HasAES bool
HasADX bool
HasAVX bool
HasAVX2 bool
HasBMI1 bool
HasBMI2 bool
HasERMS bool
HasFMA bool
HasOSXSAVE bool
HasPCLMULQDQ bool
HasPOPCNT bool
HasSSE2 bool
HasSSE3 bool
HasSSSE3 bool
HasSSE41 bool
HasSSE42 bool
}
import (
cpu "path/to/cpu"
)
func main() {
if cpu.X86.HasAVX2 {
// support AVX2
}
}
// func CopySlice_AVX2(dst, src []byte, len int)
TEXT ·CopySlice_AVX2(SB), NOSPLIT, $0
MOVQ dst_data+0(FP), DI
MOVQ src_data+24(FP), SI
MOVQ len+32(FP), BX
MOVQ $0, AX
LOOP:
VMOVDQU 0(SI)(AX*1), Y0
VMOVDQU Y0, 0(DI)(AX*1)
ADDQ $32, AX
CMPQ AX, BX
JL LOOP
RET