Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 14:04:28 +01:00
parent 81b91f4139
commit f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions

View file

@ -0,0 +1,135 @@
;
; jcolsamp.inc - private declarations for color conversion & up/downsampling
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
; --------------------------------------------------------------------------
; pseudo-resisters to make ordering of RGB configurable
;
%if RGB_RED == 0
%define mmA mm0
%define mmB mm1
%define xmmA xmm0
%define xmmB xmm1
%define ymmA ymm0
%define ymmB ymm1
%elif RGB_GREEN == 0
%define mmA mm2
%define mmB mm3
%define xmmA xmm2
%define xmmB xmm3
%define ymmA ymm2
%define ymmB ymm3
%elif RGB_BLUE == 0
%define mmA mm4
%define mmB mm5
%define xmmA xmm4
%define xmmB xmm5
%define ymmA ymm4
%define ymmB ymm5
%else
%define mmA mm6
%define mmB mm7
%define xmmA xmm6
%define xmmB xmm7
%define ymmA ymm6
%define ymmB ymm7
%endif
%if RGB_RED == 1
%define mmC mm0
%define mmD mm1
%define xmmC xmm0
%define xmmD xmm1
%define ymmC ymm0
%define ymmD ymm1
%elif RGB_GREEN == 1
%define mmC mm2
%define mmD mm3
%define xmmC xmm2
%define xmmD xmm3
%define ymmC ymm2
%define ymmD ymm3
%elif RGB_BLUE == 1
%define mmC mm4
%define mmD mm5
%define xmmC xmm4
%define xmmD xmm5
%define ymmC ymm4
%define ymmD ymm5
%else
%define mmC mm6
%define mmD mm7
%define xmmC xmm6
%define xmmD xmm7
%define ymmC ymm6
%define ymmD ymm7
%endif
%if RGB_RED == 2
%define mmE mm0
%define mmF mm1
%define xmmE xmm0
%define xmmF xmm1
%define ymmE ymm0
%define ymmF ymm1
%elif RGB_GREEN == 2
%define mmE mm2
%define mmF mm3
%define xmmE xmm2
%define xmmF xmm3
%define ymmE ymm2
%define ymmF ymm3
%elif RGB_BLUE == 2
%define mmE mm4
%define mmF mm5
%define xmmE xmm4
%define xmmF xmm5
%define ymmE ymm4
%define ymmF ymm5
%else
%define mmE mm6
%define mmF mm7
%define xmmE xmm6
%define xmmF xmm7
%define ymmE ymm6
%define ymmF ymm7
%endif
%if RGB_RED == 3
%define mmG mm0
%define mmH mm1
%define xmmG xmm0
%define xmmH xmm1
%define ymmG ymm0
%define ymmH ymm1
%elif RGB_GREEN == 3
%define mmG mm2
%define mmH mm3
%define xmmG xmm2
%define xmmH xmm3
%define ymmG ymm2
%define ymmH ymm3
%elif RGB_BLUE == 3
%define mmG mm4
%define mmH mm5
%define xmmG xmm4
%define xmmH xmm5
%define ymmG ymm4
%define ymmH ymm5
%else
%define mmG mm6
%define mmH mm7
%define xmmG xmm6
%define xmmH xmm7
%define ymmG ymm6
%define ymmH ymm7
%endif
; --------------------------------------------------------------------------

View file

@ -0,0 +1,31 @@
;
; jdct.inc - private declarations for forward & reverse DCT subsystems
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2018, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
; Each IDCT routine is responsible for range-limiting its results and
; converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could
; be quite far out of range if the input data is corrupt, so a bulletproof
; range-limiting step is required. We use a mask-and-table-lookup method
; to do the combined operations quickly.
;
%define RANGE_MASK (MAXJSAMPLE * 4 + 3) ; 2 bits wider than legal samples
%define ROW(n, b, s) ((b) + (n) * (s))
%define COL(n, b, s) ((b) + (n) * (s) * DCTSIZE)
%define DWBLOCK(m, n, b, s) \
((b) + (m) * DCTSIZE * (s) + (n) * SIZEOF_DWORD)
%define MMBLOCK(m, n, b, s) \
((b) + (m) * DCTSIZE * (s) + (n) * SIZEOF_MMWORD)
%define XMMBLOCK(m, n, b, s) \
((b) + (m) * DCTSIZE * (s) + (n) * SIZEOF_XMMWORD)
%define YMMBLOCK(m, n, b, s) \
((b) + (m) * DCTSIZE * (s) + (n) * SIZEOF_YMMWORD)
; --------------------------------------------------------------------------

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,93 @@
;
; Automatically generated include file from jsimdcfg.inc.h
;
;
; -- jpeglib.h
;
%define DCTSIZE 8
%define DCTSIZE2 64
;
; -- jmorecfg.h
;
%define RGB_RED 0
%define RGB_GREEN 1
%define RGB_BLUE 2
%define RGB_PIXELSIZE 3
%define EXT_RGB_RED 0
%define EXT_RGB_GREEN 1
%define EXT_RGB_BLUE 2
%define EXT_RGB_PIXELSIZE 3
%define EXT_RGBX_RED 0
%define EXT_RGBX_GREEN 1
%define EXT_RGBX_BLUE 2
%define EXT_RGBX_PIXELSIZE 4
%define EXT_BGR_RED 2
%define EXT_BGR_GREEN 1
%define EXT_BGR_BLUE 0
%define EXT_BGR_PIXELSIZE 3
%define EXT_BGRX_RED 2
%define EXT_BGRX_GREEN 1
%define EXT_BGRX_BLUE 0
%define EXT_BGRX_PIXELSIZE 4
%define EXT_XBGR_RED 3
%define EXT_XBGR_GREEN 2
%define EXT_XBGR_BLUE 1
%define EXT_XBGR_PIXELSIZE 4
%define EXT_XRGB_RED 1
%define EXT_XRGB_GREEN 2
%define EXT_XRGB_BLUE 3
%define EXT_XRGB_PIXELSIZE 4
%define RGBX_FILLER_0XFF 1
; Representation of a single sample (pixel element value).
; On this SIMD implementation, this must be 'unsigned char'.
;
%define JSAMPLE byte ; unsigned char
%define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE)
%define CENTERJSAMPLE 128
; Representation of a DCT frequency coefficient.
; On this SIMD implementation, this must be 'short'.
;
%define JCOEF word ; short
%define SIZEOF_JCOEF SIZEOF_WORD ; sizeof(JCOEF)
; Datatype used for image dimensions.
; On this SIMD implementation, this must be 'unsigned int'.
;
%define JDIMENSION dword ; unsigned int
%define SIZEOF_JDIMENSION SIZEOF_DWORD ; sizeof(JDIMENSION)
%define JSAMPROW POINTER ; JSAMPLE * (jpeglib.h)
%define JSAMPARRAY POINTER ; JSAMPROW * (jpeglib.h)
%define JSAMPIMAGE POINTER ; JSAMPARRAY * (jpeglib.h)
%define JCOEFPTR POINTER ; JCOEF * (jpeglib.h)
%define SIZEOF_JSAMPROW SIZEOF_POINTER ; sizeof(JSAMPROW)
%define SIZEOF_JSAMPARRAY SIZEOF_POINTER ; sizeof(JSAMPARRAY)
%define SIZEOF_JSAMPIMAGE SIZEOF_POINTER ; sizeof(JSAMPIMAGE)
%define SIZEOF_JCOEFPTR SIZEOF_POINTER ; sizeof(JCOEFPTR)
;
; -- jdct.h
;
; A forward DCT routine is given a pointer to a work area of type DCTELEM[];
; the DCT is to be performed in-place in that buffer.
; To maximize parallelism, Type DCTELEM is changed to short (originally, int).
;
%define DCTELEM word ; short
%define SIZEOF_DCTELEM SIZEOF_WORD ; sizeof(DCTELEM)
%define float FP32 ; float
%define SIZEOF_FAST_FLOAT SIZEOF_FP32 ; sizeof(float)
; To maximize parallelism, Type short is changed to short.
;
%define ISLOW_MULT_TYPE word ; must be short
%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_WORD ; sizeof(ISLOW_MULT_TYPE)
%define IFAST_MULT_TYPE word ; must be short
%define SIZEOF_IFAST_MULT_TYPE SIZEOF_WORD ; sizeof(IFAST_MULT_TYPE)
%define IFAST_SCALE_BITS 2 ; fractional bits in scale factors
%define FLOAT_MULT_TYPE FP32 ; must be float
%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FP32 ; sizeof(FLOAT_MULT_TYPE)
;
; -- jsimd.h
;
%define JSIMD_NONE 0x00
%define JSIMD_MMX 0x01
%define JSIMD_3DNOW 0x02
%define JSIMD_SSE 0x04
%define JSIMD_SSE2 0x08
%define JSIMD_AVX2 0x80

View file

@ -0,0 +1,131 @@
// This file generates the include file for the assembly
// implementations by abusing the C preprocessor.
//
// Note: Some things are manually defined as they need to
// be mapped to NASM types.
;
; Automatically generated include file from jsimdcfg.inc.h
;
#define JPEG_INTERNALS
#include "../jpeglib.h"
#include "../jconfig.h"
#include "../jmorecfg.h"
#include "jsimd.h"
;
; -- jpeglib.h
;
%define _cpp_protection_DCTSIZE DCTSIZE
%define _cpp_protection_DCTSIZE2 DCTSIZE2
;
; -- jmorecfg.h
;
%define _cpp_protection_RGB_RED RGB_RED
%define _cpp_protection_RGB_GREEN RGB_GREEN
%define _cpp_protection_RGB_BLUE RGB_BLUE
%define _cpp_protection_RGB_PIXELSIZE RGB_PIXELSIZE
%define _cpp_protection_EXT_RGB_RED EXT_RGB_RED
%define _cpp_protection_EXT_RGB_GREEN EXT_RGB_GREEN
%define _cpp_protection_EXT_RGB_BLUE EXT_RGB_BLUE
%define _cpp_protection_EXT_RGB_PIXELSIZE EXT_RGB_PIXELSIZE
%define _cpp_protection_EXT_RGBX_RED EXT_RGBX_RED
%define _cpp_protection_EXT_RGBX_GREEN EXT_RGBX_GREEN
%define _cpp_protection_EXT_RGBX_BLUE EXT_RGBX_BLUE
%define _cpp_protection_EXT_RGBX_PIXELSIZE EXT_RGBX_PIXELSIZE
%define _cpp_protection_EXT_BGR_RED EXT_BGR_RED
%define _cpp_protection_EXT_BGR_GREEN EXT_BGR_GREEN
%define _cpp_protection_EXT_BGR_BLUE EXT_BGR_BLUE
%define _cpp_protection_EXT_BGR_PIXELSIZE EXT_BGR_PIXELSIZE
%define _cpp_protection_EXT_BGRX_RED EXT_BGRX_RED
%define _cpp_protection_EXT_BGRX_GREEN EXT_BGRX_GREEN
%define _cpp_protection_EXT_BGRX_BLUE EXT_BGRX_BLUE
%define _cpp_protection_EXT_BGRX_PIXELSIZE EXT_BGRX_PIXELSIZE
%define _cpp_protection_EXT_XBGR_RED EXT_XBGR_RED
%define _cpp_protection_EXT_XBGR_GREEN EXT_XBGR_GREEN
%define _cpp_protection_EXT_XBGR_BLUE EXT_XBGR_BLUE
%define _cpp_protection_EXT_XBGR_PIXELSIZE EXT_XBGR_PIXELSIZE
%define _cpp_protection_EXT_XRGB_RED EXT_XRGB_RED
%define _cpp_protection_EXT_XRGB_GREEN EXT_XRGB_GREEN
%define _cpp_protection_EXT_XRGB_BLUE EXT_XRGB_BLUE
%define _cpp_protection_EXT_XRGB_PIXELSIZE EXT_XRGB_PIXELSIZE
%define RGBX_FILLER_0XFF 1
; Representation of a single sample (pixel element value).
; On this SIMD implementation, this must be 'unsigned char'.
;
%define JSAMPLE byte ; unsigned char
%define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE)
%define _cpp_protection_CENTERJSAMPLE CENTERJSAMPLE
; Representation of a DCT frequency coefficient.
; On this SIMD implementation, this must be 'short'.
;
%define JCOEF word ; short
%define SIZEOF_JCOEF SIZEOF_WORD ; sizeof(JCOEF)
; Datatype used for image dimensions.
; On this SIMD implementation, this must be 'unsigned int'.
;
%define JDIMENSION dword ; unsigned int
%define SIZEOF_JDIMENSION SIZEOF_DWORD ; sizeof(JDIMENSION)
%define JSAMPROW POINTER ; JSAMPLE * (jpeglib.h)
%define JSAMPARRAY POINTER ; JSAMPROW * (jpeglib.h)
%define JSAMPIMAGE POINTER ; JSAMPARRAY * (jpeglib.h)
%define JCOEFPTR POINTER ; JCOEF * (jpeglib.h)
%define SIZEOF_JSAMPROW SIZEOF_POINTER ; sizeof(JSAMPROW)
%define SIZEOF_JSAMPARRAY SIZEOF_POINTER ; sizeof(JSAMPARRAY)
%define SIZEOF_JSAMPIMAGE SIZEOF_POINTER ; sizeof(JSAMPIMAGE)
%define SIZEOF_JCOEFPTR SIZEOF_POINTER ; sizeof(JCOEFPTR)
;
; -- jdct.h
;
; A forward DCT routine is given a pointer to a work area of type DCTELEM[];
; the DCT is to be performed in-place in that buffer.
; To maximize parallelism, Type DCTELEM is changed to short (originally, int).
;
%define DCTELEM word ; short
%define SIZEOF_DCTELEM SIZEOF_WORD ; sizeof(DCTELEM)
%define FAST_FLOAT FP32 ; float
%define SIZEOF_FAST_FLOAT SIZEOF_FP32 ; sizeof(FAST_FLOAT)
; To maximize parallelism, Type MULTIPLIER is changed to short.
;
%define ISLOW_MULT_TYPE word ; must be short
%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_WORD ; sizeof(ISLOW_MULT_TYPE)
%define IFAST_MULT_TYPE word ; must be short
%define SIZEOF_IFAST_MULT_TYPE SIZEOF_WORD ; sizeof(IFAST_MULT_TYPE)
%define IFAST_SCALE_BITS 2 ; fractional bits in scale factors
%define FLOAT_MULT_TYPE FP32 ; must be float
%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FP32 ; sizeof(FLOAT_MULT_TYPE)
;
; -- jsimd.h
;
%define _cpp_protection_JSIMD_NONE JSIMD_NONE
%define _cpp_protection_JSIMD_MMX JSIMD_MMX
%define _cpp_protection_JSIMD_3DNOW JSIMD_3DNOW
%define _cpp_protection_JSIMD_SSE JSIMD_SSE
%define _cpp_protection_JSIMD_SSE2 JSIMD_SSE2
%define _cpp_protection_JSIMD_AVX2 JSIMD_AVX2

View file

@ -0,0 +1,479 @@
;
; jsimdext.inc - common declarations
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2010, 2016, 2019, D. R. Commander.
; Copyright (C) 2018, Matthieu Darbois.
;
; Based on the x86 SIMD extension for IJG JPEG library - version 1.02
;
; Copyright (C) 1999-2006, MIYASAKA Masaru.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; ==========================================================================
; System-dependent configurations
%ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)--------
; * Microsoft Visual C++
; * MinGW (Minimalist GNU for Windows)
; * CygWin
; * LCC-Win32
; -- segment definition --
;
%ifdef __YASM_VER__
%define SEG_TEXT .text align=32
%define SEG_CONST .rdata align=32
%else
%define SEG_TEXT .text align=32 public use32 class=CODE
%define SEG_CONST .rdata align=32 public use32 class=CONST
%endif
%elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)--------
; * Microsoft Visual C++
; -- segment definition --
;
%ifdef __YASM_VER__
%define SEG_TEXT .text align=32
%define SEG_CONST .rdata align=32
%else
%define SEG_TEXT .text align=32 public use64 class=CODE
%define SEG_CONST .rdata align=32 public use64 class=CONST
%endif
%define EXTN(name) name ; foo() -> foo
%elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)----------
; * Borland C++ (Win32)
; -- segment definition --
;
%define SEG_TEXT _text align=32 public use32 class=CODE
%define SEG_CONST _data align=32 public use32 class=DATA
%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------
; * Linux
; * *BSD family Unix using elf format
; * Unix System V, including Solaris x86, UnixWare and SCO Unix
; mark stack as non-executable
section .note.GNU-stack noalloc noexec nowrite progbits
; -- segment definition --
;
%ifdef __x86_64__
%define SEG_TEXT .text progbits align=32
%define SEG_CONST .rodata progbits align=32
%else
%define SEG_TEXT .text progbits alloc exec nowrite align=32
%define SEG_CONST .rodata progbits alloc noexec nowrite align=32
%endif
; To make the code position-independent, append -DPIC to the commandline
;
%define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC
%define EXTN(name) name ; foo() -> foo
%elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)----
; * Older Linux using a.out format (nasm -f aout -DAOUT ...)
; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...)
; -- segment definition --
;
%define SEG_TEXT .text
%define SEG_CONST .data
; To make the code position-independent, append -DPIC to the commandline
;
%define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC
%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)--------
; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format)
; -- segment definition --
;
%define SEG_TEXT .text ;align=32 ; nasm doesn't accept align=32. why?
%define SEG_CONST .rodata align=32
; The generation of position-independent code (PIC) is the default on Darwin.
;
%define PIC
%define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing
%else ; ----(Other case)----------------------
; -- segment definition --
;
%define SEG_TEXT .text
%define SEG_CONST .data
%endif ; ----------------------------------------------
; ==========================================================================
; --------------------------------------------------------------------------
; Common types
;
%ifdef __x86_64__
%define POINTER qword ; general pointer type
%define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER)
%define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT
%else
%define POINTER dword ; general pointer type
%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER)
%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT
%endif
%define INT dword ; signed integer type
%define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT)
%define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT
%define FP32 dword ; IEEE754 single
%define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32)
%define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT
%define MMWORD qword ; int64 (MMX register)
%define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD)
%define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT
; NASM is buggy and doesn't properly handle operand sizes for SSE
; instructions, so for now we have to define XMMWORD as blank.
%define XMMWORD ; int128 (SSE register)
%define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD)
%define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT
%define YMMWORD ; int256 (AVX register)
%define SIZEOF_YMMWORD SIZEOF_YWORD ; sizeof(YMMWORD)
%define YMMWORD_BIT YWORD_BIT ; sizeof(YMMWORD)*BYTE_BIT
; Similar hacks for when we load a dword or MMWORD into an xmm# register
%define XMM_DWORD
%define XMM_MMWORD
%define SIZEOF_BYTE 1 ; sizeof(byte)
%define SIZEOF_WORD 2 ; sizeof(word)
%define SIZEOF_DWORD 4 ; sizeof(dword)
%define SIZEOF_QWORD 8 ; sizeof(qword)
%define SIZEOF_OWORD 16 ; sizeof(oword)
%define SIZEOF_YWORD 32 ; sizeof(yword)
%define BYTE_BIT 8 ; CHAR_BIT in C
%define WORD_BIT 16 ; sizeof(word)*BYTE_BIT
%define DWORD_BIT 32 ; sizeof(dword)*BYTE_BIT
%define QWORD_BIT 64 ; sizeof(qword)*BYTE_BIT
%define OWORD_BIT 128 ; sizeof(oword)*BYTE_BIT
%define YWORD_BIT 256 ; sizeof(yword)*BYTE_BIT
; --------------------------------------------------------------------------
; External Symbol Name
;
%ifndef EXTN
%define EXTN(name) _ %+ name ; foo() -> _foo
%endif
; --------------------------------------------------------------------------
; Hidden symbols
;
%ifdef ELF ; ----(nasm -felf[64] -DELF ...)--------
%define GLOBAL_FUNCTION(name) global EXTN(name):function hidden
%define GLOBAL_DATA(name) global EXTN(name):data hidden
%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)--------
%ifdef __YASM_VER__
%define GLOBAL_FUNCTION(name) global EXTN(name):private_extern
%define GLOBAL_DATA(name) global EXTN(name):private_extern
%else
%if __NASM_VERSION_ID__ >= 0x020E0000
%define GLOBAL_FUNCTION(name) global EXTN(name):private_extern
%define GLOBAL_DATA(name) global EXTN(name):private_extern
%endif
%endif
%endif
%ifndef GLOBAL_FUNCTION
%define GLOBAL_FUNCTION(name) global EXTN(name)
%endif
%ifndef GLOBAL_DATA
%define GLOBAL_DATA(name) global EXTN(name)
%endif
; --------------------------------------------------------------------------
; Macros for position-independent code (PIC) support
;
%ifndef GOT_SYMBOL
%undef PIC
%endif
%ifdef PIC ; -------------------------------------------
%ifidn GOT_SYMBOL, _MACHO_PIC_ ; --------------------
; At present, nasm doesn't seem to support PIC generation for Mach-O.
; The PIC support code below is a little tricky.
SECTION SEG_CONST
const_base:
%define GOTOFF(got, sym) (got) + (sym) - const_base
%imacro get_GOT 1
; NOTE: this macro destroys ecx resister.
call %%geteip
add ecx, byte (%%ref - $)
jmp short %%adjust
%%geteip:
mov ecx, POINTER [esp]
ret
%%adjust:
push ebp
xor ebp, ebp ; ebp = 0
%ifidni %1, ebx ; (%1 == ebx)
; db 0x8D,0x9C + jmp near const_base =
; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32)
db 0x8D, 0x9C ; 8D,9C
jmp near const_base ; E9,(const_base-%%ref)
%%ref:
%else ; (%1 != ebx)
; db 0x8D,0x8C + jmp near const_base =
; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32)
db 0x8D, 0x8C ; 8D,8C
jmp near const_base ; E9,(const_base-%%ref)
%%ref:
mov %1, ecx
%endif ; (%1 == ebx)
pop ebp
%endmacro
%else ; GOT_SYMBOL != _MACHO_PIC_ ----------------
%define GOTOFF(got, sym) (got) + (sym) wrt ..gotoff
%imacro get_GOT 1
extern GOT_SYMBOL
call %%geteip
add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
jmp short %%done
%%geteip:
mov %1, POINTER [esp]
ret
%%done:
%endmacro
%endif ; GOT_SYMBOL == _MACHO_PIC_ ----------------
%imacro pushpic 1.nolist
push %1
%endmacro
%imacro poppic 1.nolist
pop %1
%endmacro
%imacro movpic 2.nolist
mov %1, %2
%endmacro
%else ; !PIC -----------------------------------------
%define GOTOFF(got, sym) (sym)
%imacro get_GOT 1.nolist
%endmacro
%imacro pushpic 1.nolist
%endmacro
%imacro poppic 1.nolist
%endmacro
%imacro movpic 2.nolist
%endmacro
%endif ; PIC -----------------------------------------
; --------------------------------------------------------------------------
; Align the next instruction on {2,4,8,16,..}-byte boundary.
; ".balign n,,m" in GNU as
;
%define MSKLE(x, y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
%define FILLB(b, n) (($$-(b)) & ((n)-1))
%imacro alignx 1-2.nolist 0xFFFF
%%bs: \
times MSKLE(FILLB(%%bs, %1), %2) & MSKLE(16, FILLB($, %1)) & FILLB($, %1) \
db 0x90 ; nop
times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 9 \
db 0x8D, 0x9C, 0x23, 0x00, 0x00, 0x00, 0x00 ; lea ebx,[ebx+0x00000000]
times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 7 \
db 0x8D, 0xAC, 0x25, 0x00, 0x00, 0x00, 0x00 ; lea ebp,[ebp+0x00000000]
times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 6 \
db 0x8D, 0xAD, 0x00, 0x00, 0x00, 0x00 ; lea ebp,[ebp+0x00000000]
times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 4 \
db 0x8D, 0x6C, 0x25, 0x00 ; lea ebp,[ebp+0x00]
times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 3 \
db 0x8D, 0x6D, 0x00 ; lea ebp,[ebp+0x00]
times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 2 \
db 0x8B, 0xED ; mov ebp,ebp
times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 1 \
db 0x90 ; nop
%endmacro
; Align the next data on {2,4,8,16,..}-byte boundary.
;
%imacro alignz 1.nolist
align %1, db 0 ; filling zeros
%endmacro
%ifdef __x86_64__
%ifdef WIN64
%imacro collect_args 1
sub rsp, SIZEOF_XMMWORD
movaps XMMWORD [rsp], xmm6
sub rsp, SIZEOF_XMMWORD
movaps XMMWORD [rsp], xmm7
mov r10, rcx
%if %1 > 1
mov r11, rdx
%endif
%if %1 > 2
push r12
mov r12, r8
%endif
%if %1 > 3
push r13
mov r13, r9
%endif
%if %1 > 4
push r14
mov r14, [rax+48]
%endif
%if %1 > 5
push r15
mov r15, [rax+56]
%endif
push rsi
push rdi
%endmacro
%imacro uncollect_args 1
pop rdi
pop rsi
%if %1 > 5
pop r15
%endif
%if %1 > 4
pop r14
%endif
%if %1 > 3
pop r13
%endif
%if %1 > 2
pop r12
%endif
movaps xmm7, XMMWORD [rsp]
add rsp, SIZEOF_XMMWORD
movaps xmm6, XMMWORD [rsp]
add rsp, SIZEOF_XMMWORD
%endmacro
%imacro push_xmm 1
sub rsp, %1 * SIZEOF_XMMWORD
movaps XMMWORD [rsp+0*SIZEOF_XMMWORD], xmm8
%if %1 > 1
movaps XMMWORD [rsp+1*SIZEOF_XMMWORD], xmm9
%endif
%if %1 > 2
movaps XMMWORD [rsp+2*SIZEOF_XMMWORD], xmm10
%endif
%if %1 > 3
movaps XMMWORD [rsp+3*SIZEOF_XMMWORD], xmm11
%endif
%endmacro
%imacro pop_xmm 1
movaps xmm8, XMMWORD [rsp+0*SIZEOF_XMMWORD]
%if %1 > 1
movaps xmm9, XMMWORD [rsp+1*SIZEOF_XMMWORD]
%endif
%if %1 > 2
movaps xmm10, XMMWORD [rsp+2*SIZEOF_XMMWORD]
%endif
%if %1 > 3
movaps xmm11, XMMWORD [rsp+3*SIZEOF_XMMWORD]
%endif
add rsp, %1 * SIZEOF_XMMWORD
%endmacro
%else
%imacro collect_args 1
push r10
mov r10, rdi
%if %1 > 1
push r11
mov r11, rsi
%endif
%if %1 > 2
push r12
mov r12, rdx
%endif
%if %1 > 3
push r13
mov r13, rcx
%endif
%if %1 > 4
push r14
mov r14, r8
%endif
%if %1 > 5
push r15
mov r15, r9
%endif
%endmacro
%imacro uncollect_args 1
%if %1 > 5
pop r15
%endif
%if %1 > 4
pop r14
%endif
%if %1 > 3
pop r13
%endif
%if %1 > 2
pop r12
%endif
%if %1 > 1
pop r11
%endif
pop r10
%endmacro
%imacro push_xmm 1
%endmacro
%imacro pop_xmm 1
%endmacro
%endif
%endif
; --------------------------------------------------------------------------
; Defines picked up from the C headers
;
%include "jsimdcfg.inc"
; --------------------------------------------------------------------------