Repo created
This commit is contained in:
parent
81b91f4139
commit
f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions
135
TMessagesProj/jni/mozjpeg/simd/nasm/jcolsamp.inc
Normal file
135
TMessagesProj/jni/mozjpeg/simd/nasm/jcolsamp.inc
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
;
|
||||
; jcolsamp.inc - private declarations for color conversion & up/downsampling
|
||||
;
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2015, Intel Corporation.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
||||
|
||||
; --------------------------------------------------------------------------
|
||||
|
||||
; pseudo-resisters to make ordering of RGB configurable
|
||||
;
|
||||
%if RGB_RED == 0
|
||||
%define mmA mm0
|
||||
%define mmB mm1
|
||||
%define xmmA xmm0
|
||||
%define xmmB xmm1
|
||||
%define ymmA ymm0
|
||||
%define ymmB ymm1
|
||||
%elif RGB_GREEN == 0
|
||||
%define mmA mm2
|
||||
%define mmB mm3
|
||||
%define xmmA xmm2
|
||||
%define xmmB xmm3
|
||||
%define ymmA ymm2
|
||||
%define ymmB ymm3
|
||||
%elif RGB_BLUE == 0
|
||||
%define mmA mm4
|
||||
%define mmB mm5
|
||||
%define xmmA xmm4
|
||||
%define xmmB xmm5
|
||||
%define ymmA ymm4
|
||||
%define ymmB ymm5
|
||||
%else
|
||||
%define mmA mm6
|
||||
%define mmB mm7
|
||||
%define xmmA xmm6
|
||||
%define xmmB xmm7
|
||||
%define ymmA ymm6
|
||||
%define ymmB ymm7
|
||||
%endif
|
||||
|
||||
%if RGB_RED == 1
|
||||
%define mmC mm0
|
||||
%define mmD mm1
|
||||
%define xmmC xmm0
|
||||
%define xmmD xmm1
|
||||
%define ymmC ymm0
|
||||
%define ymmD ymm1
|
||||
%elif RGB_GREEN == 1
|
||||
%define mmC mm2
|
||||
%define mmD mm3
|
||||
%define xmmC xmm2
|
||||
%define xmmD xmm3
|
||||
%define ymmC ymm2
|
||||
%define ymmD ymm3
|
||||
%elif RGB_BLUE == 1
|
||||
%define mmC mm4
|
||||
%define mmD mm5
|
||||
%define xmmC xmm4
|
||||
%define xmmD xmm5
|
||||
%define ymmC ymm4
|
||||
%define ymmD ymm5
|
||||
%else
|
||||
%define mmC mm6
|
||||
%define mmD mm7
|
||||
%define xmmC xmm6
|
||||
%define xmmD xmm7
|
||||
%define ymmC ymm6
|
||||
%define ymmD ymm7
|
||||
%endif
|
||||
|
||||
%if RGB_RED == 2
|
||||
%define mmE mm0
|
||||
%define mmF mm1
|
||||
%define xmmE xmm0
|
||||
%define xmmF xmm1
|
||||
%define ymmE ymm0
|
||||
%define ymmF ymm1
|
||||
%elif RGB_GREEN == 2
|
||||
%define mmE mm2
|
||||
%define mmF mm3
|
||||
%define xmmE xmm2
|
||||
%define xmmF xmm3
|
||||
%define ymmE ymm2
|
||||
%define ymmF ymm3
|
||||
%elif RGB_BLUE == 2
|
||||
%define mmE mm4
|
||||
%define mmF mm5
|
||||
%define xmmE xmm4
|
||||
%define xmmF xmm5
|
||||
%define ymmE ymm4
|
||||
%define ymmF ymm5
|
||||
%else
|
||||
%define mmE mm6
|
||||
%define mmF mm7
|
||||
%define xmmE xmm6
|
||||
%define xmmF xmm7
|
||||
%define ymmE ymm6
|
||||
%define ymmF ymm7
|
||||
%endif
|
||||
|
||||
%if RGB_RED == 3
|
||||
%define mmG mm0
|
||||
%define mmH mm1
|
||||
%define xmmG xmm0
|
||||
%define xmmH xmm1
|
||||
%define ymmG ymm0
|
||||
%define ymmH ymm1
|
||||
%elif RGB_GREEN == 3
|
||||
%define mmG mm2
|
||||
%define mmH mm3
|
||||
%define xmmG xmm2
|
||||
%define xmmH xmm3
|
||||
%define ymmG ymm2
|
||||
%define ymmH ymm3
|
||||
%elif RGB_BLUE == 3
|
||||
%define mmG mm4
|
||||
%define mmH mm5
|
||||
%define xmmG xmm4
|
||||
%define xmmH xmm5
|
||||
%define ymmG ymm4
|
||||
%define ymmH ymm5
|
||||
%else
|
||||
%define mmG mm6
|
||||
%define mmH mm7
|
||||
%define xmmG xmm6
|
||||
%define xmmH xmm7
|
||||
%define ymmG ymm6
|
||||
%define ymmH ymm7
|
||||
%endif
|
||||
|
||||
; --------------------------------------------------------------------------
|
||||
31
TMessagesProj/jni/mozjpeg/simd/nasm/jdct.inc
Normal file
31
TMessagesProj/jni/mozjpeg/simd/nasm/jdct.inc
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
;
|
||||
; jdct.inc - private declarations for forward & reverse DCT subsystems
|
||||
;
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2018, D. R. Commander.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
||||
|
||||
; Each IDCT routine is responsible for range-limiting its results and
|
||||
; converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could
|
||||
; be quite far out of range if the input data is corrupt, so a bulletproof
|
||||
; range-limiting step is required. We use a mask-and-table-lookup method
|
||||
; to do the combined operations quickly.
|
||||
;
|
||||
%define RANGE_MASK (MAXJSAMPLE * 4 + 3) ; 2 bits wider than legal samples
|
||||
|
||||
%define ROW(n, b, s) ((b) + (n) * (s))
|
||||
%define COL(n, b, s) ((b) + (n) * (s) * DCTSIZE)
|
||||
|
||||
%define DWBLOCK(m, n, b, s) \
|
||||
((b) + (m) * DCTSIZE * (s) + (n) * SIZEOF_DWORD)
|
||||
%define MMBLOCK(m, n, b, s) \
|
||||
((b) + (m) * DCTSIZE * (s) + (n) * SIZEOF_MMWORD)
|
||||
%define XMMBLOCK(m, n, b, s) \
|
||||
((b) + (m) * DCTSIZE * (s) + (n) * SIZEOF_XMMWORD)
|
||||
%define YMMBLOCK(m, n, b, s) \
|
||||
((b) + (m) * DCTSIZE * (s) + (n) * SIZEOF_YMMWORD)
|
||||
|
||||
; --------------------------------------------------------------------------
|
||||
4097
TMessagesProj/jni/mozjpeg/simd/nasm/jpeg_nbits_table.inc
Normal file
4097
TMessagesProj/jni/mozjpeg/simd/nasm/jpeg_nbits_table.inc
Normal file
File diff suppressed because it is too large
Load diff
93
TMessagesProj/jni/mozjpeg/simd/nasm/jsimdcfg.inc
Normal file
93
TMessagesProj/jni/mozjpeg/simd/nasm/jsimdcfg.inc
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
;
|
||||
; Automatically generated include file from jsimdcfg.inc.h
|
||||
;
|
||||
;
|
||||
; -- jpeglib.h
|
||||
;
|
||||
%define DCTSIZE 8
|
||||
%define DCTSIZE2 64
|
||||
;
|
||||
; -- jmorecfg.h
|
||||
;
|
||||
%define RGB_RED 0
|
||||
%define RGB_GREEN 1
|
||||
%define RGB_BLUE 2
|
||||
%define RGB_PIXELSIZE 3
|
||||
%define EXT_RGB_RED 0
|
||||
%define EXT_RGB_GREEN 1
|
||||
%define EXT_RGB_BLUE 2
|
||||
%define EXT_RGB_PIXELSIZE 3
|
||||
%define EXT_RGBX_RED 0
|
||||
%define EXT_RGBX_GREEN 1
|
||||
%define EXT_RGBX_BLUE 2
|
||||
%define EXT_RGBX_PIXELSIZE 4
|
||||
%define EXT_BGR_RED 2
|
||||
%define EXT_BGR_GREEN 1
|
||||
%define EXT_BGR_BLUE 0
|
||||
%define EXT_BGR_PIXELSIZE 3
|
||||
%define EXT_BGRX_RED 2
|
||||
%define EXT_BGRX_GREEN 1
|
||||
%define EXT_BGRX_BLUE 0
|
||||
%define EXT_BGRX_PIXELSIZE 4
|
||||
%define EXT_XBGR_RED 3
|
||||
%define EXT_XBGR_GREEN 2
|
||||
%define EXT_XBGR_BLUE 1
|
||||
%define EXT_XBGR_PIXELSIZE 4
|
||||
%define EXT_XRGB_RED 1
|
||||
%define EXT_XRGB_GREEN 2
|
||||
%define EXT_XRGB_BLUE 3
|
||||
%define EXT_XRGB_PIXELSIZE 4
|
||||
%define RGBX_FILLER_0XFF 1
|
||||
; Representation of a single sample (pixel element value).
|
||||
; On this SIMD implementation, this must be 'unsigned char'.
|
||||
;
|
||||
%define JSAMPLE byte ; unsigned char
|
||||
%define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE)
|
||||
%define CENTERJSAMPLE 128
|
||||
; Representation of a DCT frequency coefficient.
|
||||
; On this SIMD implementation, this must be 'short'.
|
||||
;
|
||||
%define JCOEF word ; short
|
||||
%define SIZEOF_JCOEF SIZEOF_WORD ; sizeof(JCOEF)
|
||||
; Datatype used for image dimensions.
|
||||
; On this SIMD implementation, this must be 'unsigned int'.
|
||||
;
|
||||
%define JDIMENSION dword ; unsigned int
|
||||
%define SIZEOF_JDIMENSION SIZEOF_DWORD ; sizeof(JDIMENSION)
|
||||
%define JSAMPROW POINTER ; JSAMPLE * (jpeglib.h)
|
||||
%define JSAMPARRAY POINTER ; JSAMPROW * (jpeglib.h)
|
||||
%define JSAMPIMAGE POINTER ; JSAMPARRAY * (jpeglib.h)
|
||||
%define JCOEFPTR POINTER ; JCOEF * (jpeglib.h)
|
||||
%define SIZEOF_JSAMPROW SIZEOF_POINTER ; sizeof(JSAMPROW)
|
||||
%define SIZEOF_JSAMPARRAY SIZEOF_POINTER ; sizeof(JSAMPARRAY)
|
||||
%define SIZEOF_JSAMPIMAGE SIZEOF_POINTER ; sizeof(JSAMPIMAGE)
|
||||
%define SIZEOF_JCOEFPTR SIZEOF_POINTER ; sizeof(JCOEFPTR)
|
||||
;
|
||||
; -- jdct.h
|
||||
;
|
||||
; A forward DCT routine is given a pointer to a work area of type DCTELEM[];
|
||||
; the DCT is to be performed in-place in that buffer.
|
||||
; To maximize parallelism, Type DCTELEM is changed to short (originally, int).
|
||||
;
|
||||
%define DCTELEM word ; short
|
||||
%define SIZEOF_DCTELEM SIZEOF_WORD ; sizeof(DCTELEM)
|
||||
%define float FP32 ; float
|
||||
%define SIZEOF_FAST_FLOAT SIZEOF_FP32 ; sizeof(float)
|
||||
; To maximize parallelism, Type short is changed to short.
|
||||
;
|
||||
%define ISLOW_MULT_TYPE word ; must be short
|
||||
%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_WORD ; sizeof(ISLOW_MULT_TYPE)
|
||||
%define IFAST_MULT_TYPE word ; must be short
|
||||
%define SIZEOF_IFAST_MULT_TYPE SIZEOF_WORD ; sizeof(IFAST_MULT_TYPE)
|
||||
%define IFAST_SCALE_BITS 2 ; fractional bits in scale factors
|
||||
%define FLOAT_MULT_TYPE FP32 ; must be float
|
||||
%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FP32 ; sizeof(FLOAT_MULT_TYPE)
|
||||
;
|
||||
; -- jsimd.h
|
||||
;
|
||||
%define JSIMD_NONE 0x00
|
||||
%define JSIMD_MMX 0x01
|
||||
%define JSIMD_3DNOW 0x02
|
||||
%define JSIMD_SSE 0x04
|
||||
%define JSIMD_SSE2 0x08
|
||||
%define JSIMD_AVX2 0x80
|
||||
131
TMessagesProj/jni/mozjpeg/simd/nasm/jsimdcfg.inc.h
Normal file
131
TMessagesProj/jni/mozjpeg/simd/nasm/jsimdcfg.inc.h
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
// This file generates the include file for the assembly
|
||||
// implementations by abusing the C preprocessor.
|
||||
//
|
||||
// Note: Some things are manually defined as they need to
|
||||
// be mapped to NASM types.
|
||||
|
||||
;
|
||||
; Automatically generated include file from jsimdcfg.inc.h
|
||||
;
|
||||
|
||||
#define JPEG_INTERNALS
|
||||
|
||||
#include "../jpeglib.h"
|
||||
#include "../jconfig.h"
|
||||
#include "../jmorecfg.h"
|
||||
#include "jsimd.h"
|
||||
|
||||
;
|
||||
; -- jpeglib.h
|
||||
;
|
||||
|
||||
%define _cpp_protection_DCTSIZE DCTSIZE
|
||||
%define _cpp_protection_DCTSIZE2 DCTSIZE2
|
||||
|
||||
;
|
||||
; -- jmorecfg.h
|
||||
;
|
||||
|
||||
%define _cpp_protection_RGB_RED RGB_RED
|
||||
%define _cpp_protection_RGB_GREEN RGB_GREEN
|
||||
%define _cpp_protection_RGB_BLUE RGB_BLUE
|
||||
%define _cpp_protection_RGB_PIXELSIZE RGB_PIXELSIZE
|
||||
|
||||
%define _cpp_protection_EXT_RGB_RED EXT_RGB_RED
|
||||
%define _cpp_protection_EXT_RGB_GREEN EXT_RGB_GREEN
|
||||
%define _cpp_protection_EXT_RGB_BLUE EXT_RGB_BLUE
|
||||
%define _cpp_protection_EXT_RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
||||
|
||||
%define _cpp_protection_EXT_RGBX_RED EXT_RGBX_RED
|
||||
%define _cpp_protection_EXT_RGBX_GREEN EXT_RGBX_GREEN
|
||||
%define _cpp_protection_EXT_RGBX_BLUE EXT_RGBX_BLUE
|
||||
%define _cpp_protection_EXT_RGBX_PIXELSIZE EXT_RGBX_PIXELSIZE
|
||||
|
||||
%define _cpp_protection_EXT_BGR_RED EXT_BGR_RED
|
||||
%define _cpp_protection_EXT_BGR_GREEN EXT_BGR_GREEN
|
||||
%define _cpp_protection_EXT_BGR_BLUE EXT_BGR_BLUE
|
||||
%define _cpp_protection_EXT_BGR_PIXELSIZE EXT_BGR_PIXELSIZE
|
||||
|
||||
%define _cpp_protection_EXT_BGRX_RED EXT_BGRX_RED
|
||||
%define _cpp_protection_EXT_BGRX_GREEN EXT_BGRX_GREEN
|
||||
%define _cpp_protection_EXT_BGRX_BLUE EXT_BGRX_BLUE
|
||||
%define _cpp_protection_EXT_BGRX_PIXELSIZE EXT_BGRX_PIXELSIZE
|
||||
|
||||
%define _cpp_protection_EXT_XBGR_RED EXT_XBGR_RED
|
||||
%define _cpp_protection_EXT_XBGR_GREEN EXT_XBGR_GREEN
|
||||
%define _cpp_protection_EXT_XBGR_BLUE EXT_XBGR_BLUE
|
||||
%define _cpp_protection_EXT_XBGR_PIXELSIZE EXT_XBGR_PIXELSIZE
|
||||
|
||||
%define _cpp_protection_EXT_XRGB_RED EXT_XRGB_RED
|
||||
%define _cpp_protection_EXT_XRGB_GREEN EXT_XRGB_GREEN
|
||||
%define _cpp_protection_EXT_XRGB_BLUE EXT_XRGB_BLUE
|
||||
%define _cpp_protection_EXT_XRGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
||||
|
||||
%define RGBX_FILLER_0XFF 1
|
||||
|
||||
; Representation of a single sample (pixel element value).
|
||||
; On this SIMD implementation, this must be 'unsigned char'.
|
||||
;
|
||||
|
||||
%define JSAMPLE byte ; unsigned char
|
||||
%define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE)
|
||||
|
||||
%define _cpp_protection_CENTERJSAMPLE CENTERJSAMPLE
|
||||
|
||||
; Representation of a DCT frequency coefficient.
|
||||
; On this SIMD implementation, this must be 'short'.
|
||||
;
|
||||
%define JCOEF word ; short
|
||||
%define SIZEOF_JCOEF SIZEOF_WORD ; sizeof(JCOEF)
|
||||
|
||||
; Datatype used for image dimensions.
|
||||
; On this SIMD implementation, this must be 'unsigned int'.
|
||||
;
|
||||
%define JDIMENSION dword ; unsigned int
|
||||
%define SIZEOF_JDIMENSION SIZEOF_DWORD ; sizeof(JDIMENSION)
|
||||
|
||||
%define JSAMPROW POINTER ; JSAMPLE * (jpeglib.h)
|
||||
%define JSAMPARRAY POINTER ; JSAMPROW * (jpeglib.h)
|
||||
%define JSAMPIMAGE POINTER ; JSAMPARRAY * (jpeglib.h)
|
||||
%define JCOEFPTR POINTER ; JCOEF * (jpeglib.h)
|
||||
%define SIZEOF_JSAMPROW SIZEOF_POINTER ; sizeof(JSAMPROW)
|
||||
%define SIZEOF_JSAMPARRAY SIZEOF_POINTER ; sizeof(JSAMPARRAY)
|
||||
%define SIZEOF_JSAMPIMAGE SIZEOF_POINTER ; sizeof(JSAMPIMAGE)
|
||||
%define SIZEOF_JCOEFPTR SIZEOF_POINTER ; sizeof(JCOEFPTR)
|
||||
|
||||
;
|
||||
; -- jdct.h
|
||||
;
|
||||
|
||||
; A forward DCT routine is given a pointer to a work area of type DCTELEM[];
|
||||
; the DCT is to be performed in-place in that buffer.
|
||||
; To maximize parallelism, Type DCTELEM is changed to short (originally, int).
|
||||
;
|
||||
%define DCTELEM word ; short
|
||||
%define SIZEOF_DCTELEM SIZEOF_WORD ; sizeof(DCTELEM)
|
||||
|
||||
%define FAST_FLOAT FP32 ; float
|
||||
%define SIZEOF_FAST_FLOAT SIZEOF_FP32 ; sizeof(FAST_FLOAT)
|
||||
|
||||
; To maximize parallelism, Type MULTIPLIER is changed to short.
|
||||
;
|
||||
%define ISLOW_MULT_TYPE word ; must be short
|
||||
%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_WORD ; sizeof(ISLOW_MULT_TYPE)
|
||||
|
||||
%define IFAST_MULT_TYPE word ; must be short
|
||||
%define SIZEOF_IFAST_MULT_TYPE SIZEOF_WORD ; sizeof(IFAST_MULT_TYPE)
|
||||
%define IFAST_SCALE_BITS 2 ; fractional bits in scale factors
|
||||
|
||||
%define FLOAT_MULT_TYPE FP32 ; must be float
|
||||
%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FP32 ; sizeof(FLOAT_MULT_TYPE)
|
||||
|
||||
;
|
||||
; -- jsimd.h
|
||||
;
|
||||
|
||||
%define _cpp_protection_JSIMD_NONE JSIMD_NONE
|
||||
%define _cpp_protection_JSIMD_MMX JSIMD_MMX
|
||||
%define _cpp_protection_JSIMD_3DNOW JSIMD_3DNOW
|
||||
%define _cpp_protection_JSIMD_SSE JSIMD_SSE
|
||||
%define _cpp_protection_JSIMD_SSE2 JSIMD_SSE2
|
||||
%define _cpp_protection_JSIMD_AVX2 JSIMD_AVX2
|
||||
479
TMessagesProj/jni/mozjpeg/simd/nasm/jsimdext.inc
Normal file
479
TMessagesProj/jni/mozjpeg/simd/nasm/jsimdext.inc
Normal file
|
|
@ -0,0 +1,479 @@
|
|||
;
|
||||
; jsimdext.inc - common declarations
|
||||
;
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2010, 2016, 2019, D. R. Commander.
|
||||
; Copyright (C) 2018, Matthieu Darbois.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library - version 1.02
|
||||
;
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
;
|
||||
; This software is provided 'as-is', without any express or implied
|
||||
; warranty. In no event will the authors be held liable for any damages
|
||||
; arising from the use of this software.
|
||||
;
|
||||
; Permission is granted to anyone to use this software for any purpose,
|
||||
; including commercial applications, and to alter it and redistribute it
|
||||
; freely, subject to the following restrictions:
|
||||
;
|
||||
; 1. The origin of this software must not be misrepresented; you must not
|
||||
; claim that you wrote the original software. If you use this software
|
||||
; in a product, an acknowledgment in the product documentation would be
|
||||
; appreciated but is not required.
|
||||
; 2. Altered source versions must be plainly marked as such, and must not be
|
||||
; misrepresented as being the original software.
|
||||
; 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
; ==========================================================================
|
||||
; System-dependent configurations
|
||||
|
||||
%ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)--------
|
||||
; * Microsoft Visual C++
|
||||
; * MinGW (Minimalist GNU for Windows)
|
||||
; * CygWin
|
||||
; * LCC-Win32
|
||||
|
||||
; -- segment definition --
|
||||
;
|
||||
%ifdef __YASM_VER__
|
||||
%define SEG_TEXT .text align=32
|
||||
%define SEG_CONST .rdata align=32
|
||||
%else
|
||||
%define SEG_TEXT .text align=32 public use32 class=CODE
|
||||
%define SEG_CONST .rdata align=32 public use32 class=CONST
|
||||
%endif
|
||||
|
||||
%elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)--------
|
||||
; * Microsoft Visual C++
|
||||
|
||||
; -- segment definition --
|
||||
;
|
||||
%ifdef __YASM_VER__
|
||||
%define SEG_TEXT .text align=32
|
||||
%define SEG_CONST .rdata align=32
|
||||
%else
|
||||
%define SEG_TEXT .text align=32 public use64 class=CODE
|
||||
%define SEG_CONST .rdata align=32 public use64 class=CONST
|
||||
%endif
|
||||
%define EXTN(name) name ; foo() -> foo
|
||||
|
||||
%elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)----------
|
||||
; * Borland C++ (Win32)
|
||||
|
||||
; -- segment definition --
|
||||
;
|
||||
%define SEG_TEXT _text align=32 public use32 class=CODE
|
||||
%define SEG_CONST _data align=32 public use32 class=DATA
|
||||
|
||||
%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------
|
||||
; * Linux
|
||||
; * *BSD family Unix using elf format
|
||||
; * Unix System V, including Solaris x86, UnixWare and SCO Unix
|
||||
|
||||
; mark stack as non-executable
|
||||
section .note.GNU-stack noalloc noexec nowrite progbits
|
||||
|
||||
; -- segment definition --
|
||||
;
|
||||
%ifdef __x86_64__
|
||||
%define SEG_TEXT .text progbits align=32
|
||||
%define SEG_CONST .rodata progbits align=32
|
||||
%else
|
||||
%define SEG_TEXT .text progbits alloc exec nowrite align=32
|
||||
%define SEG_CONST .rodata progbits alloc noexec nowrite align=32
|
||||
%endif
|
||||
|
||||
; To make the code position-independent, append -DPIC to the commandline
|
||||
;
|
||||
%define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC
|
||||
%define EXTN(name) name ; foo() -> foo
|
||||
|
||||
%elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)----
|
||||
; * Older Linux using a.out format (nasm -f aout -DAOUT ...)
|
||||
; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...)
|
||||
|
||||
; -- segment definition --
|
||||
;
|
||||
%define SEG_TEXT .text
|
||||
%define SEG_CONST .data
|
||||
|
||||
; To make the code position-independent, append -DPIC to the commandline
|
||||
;
|
||||
%define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC
|
||||
|
||||
%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)--------
|
||||
; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format)
|
||||
|
||||
; -- segment definition --
|
||||
;
|
||||
%define SEG_TEXT .text ;align=32 ; nasm doesn't accept align=32. why?
|
||||
%define SEG_CONST .rodata align=32
|
||||
|
||||
; The generation of position-independent code (PIC) is the default on Darwin.
|
||||
;
|
||||
%define PIC
|
||||
%define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing
|
||||
|
||||
%else ; ----(Other case)----------------------
|
||||
|
||||
; -- segment definition --
|
||||
;
|
||||
%define SEG_TEXT .text
|
||||
%define SEG_CONST .data
|
||||
|
||||
%endif ; ----------------------------------------------
|
||||
|
||||
; ==========================================================================
|
||||
|
||||
; --------------------------------------------------------------------------
|
||||
; Common types
|
||||
;
|
||||
%ifdef __x86_64__
|
||||
%define POINTER qword ; general pointer type
|
||||
%define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER)
|
||||
%define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT
|
||||
%else
|
||||
%define POINTER dword ; general pointer type
|
||||
%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER)
|
||||
%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT
|
||||
%endif
|
||||
|
||||
%define INT dword ; signed integer type
|
||||
%define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT)
|
||||
%define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT
|
||||
|
||||
%define FP32 dword ; IEEE754 single
|
||||
%define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32)
|
||||
%define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT
|
||||
|
||||
%define MMWORD qword ; int64 (MMX register)
|
||||
%define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD)
|
||||
%define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT
|
||||
|
||||
; NASM is buggy and doesn't properly handle operand sizes for SSE
|
||||
; instructions, so for now we have to define XMMWORD as blank.
|
||||
%define XMMWORD ; int128 (SSE register)
|
||||
%define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD)
|
||||
%define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT
|
||||
|
||||
%define YMMWORD ; int256 (AVX register)
|
||||
%define SIZEOF_YMMWORD SIZEOF_YWORD ; sizeof(YMMWORD)
|
||||
%define YMMWORD_BIT YWORD_BIT ; sizeof(YMMWORD)*BYTE_BIT
|
||||
|
||||
; Similar hacks for when we load a dword or MMWORD into an xmm# register
|
||||
%define XMM_DWORD
|
||||
%define XMM_MMWORD
|
||||
|
||||
%define SIZEOF_BYTE 1 ; sizeof(byte)
|
||||
%define SIZEOF_WORD 2 ; sizeof(word)
|
||||
%define SIZEOF_DWORD 4 ; sizeof(dword)
|
||||
%define SIZEOF_QWORD 8 ; sizeof(qword)
|
||||
%define SIZEOF_OWORD 16 ; sizeof(oword)
|
||||
%define SIZEOF_YWORD 32 ; sizeof(yword)
|
||||
|
||||
%define BYTE_BIT 8 ; CHAR_BIT in C
|
||||
%define WORD_BIT 16 ; sizeof(word)*BYTE_BIT
|
||||
%define DWORD_BIT 32 ; sizeof(dword)*BYTE_BIT
|
||||
%define QWORD_BIT 64 ; sizeof(qword)*BYTE_BIT
|
||||
%define OWORD_BIT 128 ; sizeof(oword)*BYTE_BIT
|
||||
%define YWORD_BIT 256 ; sizeof(yword)*BYTE_BIT
|
||||
|
||||
; --------------------------------------------------------------------------
|
||||
; External Symbol Name
|
||||
;
|
||||
%ifndef EXTN
|
||||
%define EXTN(name) _ %+ name ; foo() -> _foo
|
||||
%endif
|
||||
|
||||
; --------------------------------------------------------------------------
|
||||
; Hidden symbols
|
||||
;
|
||||
%ifdef ELF ; ----(nasm -felf[64] -DELF ...)--------
|
||||
%define GLOBAL_FUNCTION(name) global EXTN(name):function hidden
|
||||
%define GLOBAL_DATA(name) global EXTN(name):data hidden
|
||||
%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)--------
|
||||
%ifdef __YASM_VER__
|
||||
%define GLOBAL_FUNCTION(name) global EXTN(name):private_extern
|
||||
%define GLOBAL_DATA(name) global EXTN(name):private_extern
|
||||
%else
|
||||
%if __NASM_VERSION_ID__ >= 0x020E0000
|
||||
%define GLOBAL_FUNCTION(name) global EXTN(name):private_extern
|
||||
%define GLOBAL_DATA(name) global EXTN(name):private_extern
|
||||
%endif
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifndef GLOBAL_FUNCTION
|
||||
%define GLOBAL_FUNCTION(name) global EXTN(name)
|
||||
%endif
|
||||
%ifndef GLOBAL_DATA
|
||||
%define GLOBAL_DATA(name) global EXTN(name)
|
||||
%endif
|
||||
|
||||
; --------------------------------------------------------------------------
|
||||
; Macros for position-independent code (PIC) support
|
||||
;
|
||||
%ifndef GOT_SYMBOL
|
||||
%undef PIC
|
||||
%endif
|
||||
|
||||
%ifdef PIC ; -------------------------------------------
|
||||
|
||||
%ifidn GOT_SYMBOL, _MACHO_PIC_ ; --------------------
|
||||
|
||||
; At present, nasm doesn't seem to support PIC generation for Mach-O.
|
||||
; The PIC support code below is a little tricky.
|
||||
|
||||
SECTION SEG_CONST
|
||||
const_base:
|
||||
|
||||
%define GOTOFF(got, sym) (got) + (sym) - const_base
|
||||
|
||||
%imacro get_GOT 1
|
||||
; NOTE: this macro destroys ecx resister.
|
||||
call %%geteip
|
||||
add ecx, byte (%%ref - $)
|
||||
jmp short %%adjust
|
||||
%%geteip:
|
||||
mov ecx, POINTER [esp]
|
||||
ret
|
||||
%%adjust:
|
||||
push ebp
|
||||
xor ebp, ebp ; ebp = 0
|
||||
%ifidni %1, ebx ; (%1 == ebx)
|
||||
; db 0x8D,0x9C + jmp near const_base =
|
||||
; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32)
|
||||
db 0x8D, 0x9C ; 8D,9C
|
||||
jmp near const_base ; E9,(const_base-%%ref)
|
||||
%%ref:
|
||||
%else ; (%1 != ebx)
|
||||
; db 0x8D,0x8C + jmp near const_base =
|
||||
; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32)
|
||||
db 0x8D, 0x8C ; 8D,8C
|
||||
jmp near const_base ; E9,(const_base-%%ref)
|
||||
%%ref:
|
||||
mov %1, ecx
|
||||
%endif ; (%1 == ebx)
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%else ; GOT_SYMBOL != _MACHO_PIC_ ----------------
|
||||
|
||||
%define GOTOFF(got, sym) (got) + (sym) wrt ..gotoff
|
||||
|
||||
%imacro get_GOT 1
|
||||
extern GOT_SYMBOL
|
||||
call %%geteip
|
||||
add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
|
||||
jmp short %%done
|
||||
%%geteip:
|
||||
mov %1, POINTER [esp]
|
||||
ret
|
||||
%%done:
|
||||
%endmacro
|
||||
|
||||
%endif ; GOT_SYMBOL == _MACHO_PIC_ ----------------
|
||||
|
||||
%imacro pushpic 1.nolist
|
||||
push %1
|
||||
%endmacro
|
||||
%imacro poppic 1.nolist
|
||||
pop %1
|
||||
%endmacro
|
||||
%imacro movpic 2.nolist
|
||||
mov %1, %2
|
||||
%endmacro
|
||||
|
||||
%else ; !PIC -----------------------------------------
|
||||
|
||||
%define GOTOFF(got, sym) (sym)
|
||||
|
||||
%imacro get_GOT 1.nolist
|
||||
%endmacro
|
||||
%imacro pushpic 1.nolist
|
||||
%endmacro
|
||||
%imacro poppic 1.nolist
|
||||
%endmacro
|
||||
%imacro movpic 2.nolist
|
||||
%endmacro
|
||||
|
||||
%endif ; PIC -----------------------------------------
|
||||
|
||||
; --------------------------------------------------------------------------
|
||||
; Align the next instruction on {2,4,8,16,..}-byte boundary.
|
||||
; ".balign n,,m" in GNU as
|
||||
;
|
||||
%define MSKLE(x, y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
|
||||
%define FILLB(b, n) (($$-(b)) & ((n)-1))
|
||||
|
||||
%imacro alignx 1-2.nolist 0xFFFF
|
||||
%%bs: \
|
||||
times MSKLE(FILLB(%%bs, %1), %2) & MSKLE(16, FILLB($, %1)) & FILLB($, %1) \
|
||||
db 0x90 ; nop
|
||||
times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 9 \
|
||||
db 0x8D, 0x9C, 0x23, 0x00, 0x00, 0x00, 0x00 ; lea ebx,[ebx+0x00000000]
|
||||
times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 7 \
|
||||
db 0x8D, 0xAC, 0x25, 0x00, 0x00, 0x00, 0x00 ; lea ebp,[ebp+0x00000000]
|
||||
times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 6 \
|
||||
db 0x8D, 0xAD, 0x00, 0x00, 0x00, 0x00 ; lea ebp,[ebp+0x00000000]
|
||||
times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 4 \
|
||||
db 0x8D, 0x6C, 0x25, 0x00 ; lea ebp,[ebp+0x00]
|
||||
times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 3 \
|
||||
db 0x8D, 0x6D, 0x00 ; lea ebp,[ebp+0x00]
|
||||
times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 2 \
|
||||
db 0x8B, 0xED ; mov ebp,ebp
|
||||
times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 1 \
|
||||
db 0x90 ; nop
|
||||
%endmacro
|
||||
|
||||
; Align the next data on {2,4,8,16,..}-byte boundary.
|
||||
;
|
||||
%imacro alignz 1.nolist
|
||||
align %1, db 0 ; filling zeros
|
||||
%endmacro
|
||||
|
||||
%ifdef __x86_64__
|
||||
|
||||
%ifdef WIN64
|
||||
|
||||
%imacro collect_args 1
|
||||
sub rsp, SIZEOF_XMMWORD
|
||||
movaps XMMWORD [rsp], xmm6
|
||||
sub rsp, SIZEOF_XMMWORD
|
||||
movaps XMMWORD [rsp], xmm7
|
||||
mov r10, rcx
|
||||
%if %1 > 1
|
||||
mov r11, rdx
|
||||
%endif
|
||||
%if %1 > 2
|
||||
push r12
|
||||
mov r12, r8
|
||||
%endif
|
||||
%if %1 > 3
|
||||
push r13
|
||||
mov r13, r9
|
||||
%endif
|
||||
%if %1 > 4
|
||||
push r14
|
||||
mov r14, [rax+48]
|
||||
%endif
|
||||
%if %1 > 5
|
||||
push r15
|
||||
mov r15, [rax+56]
|
||||
%endif
|
||||
push rsi
|
||||
push rdi
|
||||
%endmacro
|
||||
|
||||
%imacro uncollect_args 1
|
||||
pop rdi
|
||||
pop rsi
|
||||
%if %1 > 5
|
||||
pop r15
|
||||
%endif
|
||||
%if %1 > 4
|
||||
pop r14
|
||||
%endif
|
||||
%if %1 > 3
|
||||
pop r13
|
||||
%endif
|
||||
%if %1 > 2
|
||||
pop r12
|
||||
%endif
|
||||
movaps xmm7, XMMWORD [rsp]
|
||||
add rsp, SIZEOF_XMMWORD
|
||||
movaps xmm6, XMMWORD [rsp]
|
||||
add rsp, SIZEOF_XMMWORD
|
||||
%endmacro
|
||||
|
||||
%imacro push_xmm 1
|
||||
sub rsp, %1 * SIZEOF_XMMWORD
|
||||
movaps XMMWORD [rsp+0*SIZEOF_XMMWORD], xmm8
|
||||
%if %1 > 1
|
||||
movaps XMMWORD [rsp+1*SIZEOF_XMMWORD], xmm9
|
||||
%endif
|
||||
%if %1 > 2
|
||||
movaps XMMWORD [rsp+2*SIZEOF_XMMWORD], xmm10
|
||||
%endif
|
||||
%if %1 > 3
|
||||
movaps XMMWORD [rsp+3*SIZEOF_XMMWORD], xmm11
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%imacro pop_xmm 1
|
||||
movaps xmm8, XMMWORD [rsp+0*SIZEOF_XMMWORD]
|
||||
%if %1 > 1
|
||||
movaps xmm9, XMMWORD [rsp+1*SIZEOF_XMMWORD]
|
||||
%endif
|
||||
%if %1 > 2
|
||||
movaps xmm10, XMMWORD [rsp+2*SIZEOF_XMMWORD]
|
||||
%endif
|
||||
%if %1 > 3
|
||||
movaps xmm11, XMMWORD [rsp+3*SIZEOF_XMMWORD]
|
||||
%endif
|
||||
add rsp, %1 * SIZEOF_XMMWORD
|
||||
%endmacro
|
||||
|
||||
%else
|
||||
|
||||
%imacro collect_args 1
|
||||
push r10
|
||||
mov r10, rdi
|
||||
%if %1 > 1
|
||||
push r11
|
||||
mov r11, rsi
|
||||
%endif
|
||||
%if %1 > 2
|
||||
push r12
|
||||
mov r12, rdx
|
||||
%endif
|
||||
%if %1 > 3
|
||||
push r13
|
||||
mov r13, rcx
|
||||
%endif
|
||||
%if %1 > 4
|
||||
push r14
|
||||
mov r14, r8
|
||||
%endif
|
||||
%if %1 > 5
|
||||
push r15
|
||||
mov r15, r9
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%imacro uncollect_args 1
|
||||
%if %1 > 5
|
||||
pop r15
|
||||
%endif
|
||||
%if %1 > 4
|
||||
pop r14
|
||||
%endif
|
||||
%if %1 > 3
|
||||
pop r13
|
||||
%endif
|
||||
%if %1 > 2
|
||||
pop r12
|
||||
%endif
|
||||
%if %1 > 1
|
||||
pop r11
|
||||
%endif
|
||||
pop r10
|
||||
%endmacro
|
||||
|
||||
%imacro push_xmm 1
|
||||
%endmacro
|
||||
|
||||
%imacro pop_xmm 1
|
||||
%endmacro
|
||||
|
||||
%endif
|
||||
|
||||
%endif
|
||||
|
||||
; --------------------------------------------------------------------------
|
||||
; Defines picked up from the C headers
|
||||
;
|
||||
%include "jsimdcfg.inc"
|
||||
|
||||
; --------------------------------------------------------------------------
|
||||
Loading…
Add table
Add a link
Reference in a new issue