diff options
Diffstat (limited to 'src/small3dlib.h')
-rw-r--r-- | src/small3dlib.h | 2743 |
1 files changed, 0 insertions, 2743 deletions
diff --git a/src/small3dlib.h b/src/small3dlib.h deleted file mode 100644 index 3ba5589..0000000 --- a/src/small3dlib.h +++ /dev/null @@ -1,2743 +0,0 @@ -#ifndef SMALL3DLIB_H -#define SMALL3DLIB_H - -/* - Simple realtime 3D software rasterization renderer. It is fast, focused on - resource-limited computers, located in a single C header file, with no - dependencies, using only 32bit integer arithmetics. - - author: Miloslav Ciz - license: CC0 1.0 (public domain) - found at https://creativecommons.org/publicdomain/zero/1.0/ - + additional waiver of all IP - version: 0.901d - - Before including the library, define S3L_PIXEL_FUNCTION to the name of the - function you'll be using to draw single pixels (this function will be called - by the library to render the frames). Also either init S3L_resolutionX and - S3L_resolutionY or define S3L_RESOLUTION_X and S3L_RESOLUTION_Y. - - You'll also need to decide what rendering strategy and other settings you - want to use, depending on your specific usecase. You may want to use a - z-buffer (full or reduced, S3L_Z_BUFFER), sorted-drawing (S3L_SORT), or even - none of these. See the description of the options in this file. - - The rendering itself is done with S3L_drawScene, usually preceded by - S3L_newFrame (for clearing zBuffer etc.). - - The library is meant to be used in not so huge programs that use single - translation unit and so includes both declarations and implementation at once. - If you for some reason use multiple translation units (which include the - library), you'll have to handle this yourself (e.g. create a wrapper, manually - split the library into .c and .h etc.). - - -------------------- - - This work's goal is to never be encumbered by any exclusive intellectual - property rights. The work is therefore provided under CC0 1.0 + additional - WAIVER OF ALL INTELLECTUAL PROPERTY RIGHTS that waives the rest of - intellectual property rights not already waived by CC0 1.0. The WAIVER OF ALL - INTELLECTUAL PROPERTY RGHTS is as follows: - - Each contributor to this work agrees that they waive any exclusive rights, - including but not limited to copyright, patents, trademark, trade dress, - industrial design, plant varieties and trade secrets, to any and all ideas, - concepts, processes, discoveries, improvements and inventions conceived, - discovered, made, designed, researched or developed by the contributor either - solely or jointly with others, which relate to this work or result from this - work. Should any waiver of such right be judged legally invalid or - ineffective under applicable law, the contributor hereby grants to each - affected person a royalty-free, non transferable, non sublicensable, non - exclusive, irrevocable and unconditional license to this right. - - -------------------- - - CONVENTIONS: - - This library should never draw pixels outside the specified screen - boundaries, so you don't have to check this (that would cost CPU time)! - - You can safely assume that triangles are rasterized one by one and from top - down, left to right (so you can utilize e.g. various caches), and if sorting - is disabled the order of rasterization will be that specified in the scene - structure and model arrays (of course, some triangles and models may be - skipped due to culling etc.). - - Angles are in S3L_Units, a full angle (2 pi) is S3L_FRACTIONS_PER_UNITs. - - We use row vectors. - - In 3D space, a left-handed coord. system is used. One spatial unit is split - into S3L_FRACTIONS_PER_UNIT fractions (fixed point arithmetic). - - y ^ - | _ - | /| z - | / - | / - [0,0,0]-------> x - - Untransformed camera is placed at [0,0,0], looking forward along +z axis. The - projection plane is centered at [0,0,0], stretrinch from - -S3L_FRACTIONS_PER_UNIT to S3L_FRACTIONS_PER_UNIT horizontally (x), - vertical size (y) depends on the aspect ratio (S3L_RESOLUTION_X and - S3L_RESOLUTION_Y). Camera FOV is defined by focal length in S3L_Units. - - y ^ - | _ - | /| z - ____|_/__ - | |/ | - -----[0,0,0]-|-----> x - |____|____| - | - | - - Rotations use Euler angles and are generally in the extrinsic Euler angles in - ZXY order (by Z, then by X, then by Y). Positive rotation about an axis - rotates CW (clock-wise) when looking in the direction of the axis. - - Coordinates of pixels on the screen start at the top left, from [0,0]. - - There is NO subpixel accuracy (screen coordinates are only integer). - - Triangle rasterization rules are these (mostly same as OpenGL, D3D etc.): - - - Let's define: - - left side: - - not exactly horizontal, and on the left side of triangle - - exactly horizontal and above the topmost - (in other words: its normal points at least a little to the left or - completely up) - - right side: not left side - - Pixel centers are at integer coordinates and triangle for drawing are - specified with integer coordinates of pixel centers. - - A pixel is rasterized: - - if its center is inside the triangle OR - - if its center is exactly on the triangle side which is left and at the - same time is not on the side that's right (case of a triangle that's on - a single line) OR - - if its center is exactly on the triangle corner of sides neither of which - is right. - - These rules imply among others: - - - Adjacent triangles don't have any overlapping pixels, nor gaps between. - - Triangles of points that lie on a single line are NOT rasterized. - - A single "long" triangle CAN be rasterized as isolated islands of pixels. - - Transforming (e.g. mirroring, rotating by 90 degrees etc.) a result of - rasterizing triangle A is NOT generally equal to applying the same - transformation to triangle A first and then rasterizing it. Even the number - of rasterized pixels is usually different. - - If specifying a triangle with integer coordinates (which we are), then: - - The bottom-most corner (or side) of a triangle is never rasterized - (because it is connected to a right side). - - The top-most corner can only be rasterized on completely horizontal side - (otherwise it is connected to a right side). - - Vertically middle corner is rasterized if and only if it is on the left - of the triangle and at the same time is also not the bottom-most corner. -*/ - -#include <stdint.h> - -#ifdef S3L_RESOLUTION_X -#ifdef S3L_RESOLUTION_Y -#define S3L_MAX_PIXELS (S3L_RESOLUTION_X * S3L_RESOLUTION_Y) -#endif -#endif - -#ifndef S3L_RESOLUTION_X -#ifndef S3L_MAX_PIXELS -#error Dynamic resolution set (S3L_RESOLUTION_X not defined), but\ - S3L_MAX_PIXELS not defined! -#endif - -uint16_t S3L_resolutionX = 512; /**< If a static resolution is not set with - S3L_RESOLUTION_X, this variable can be - used to change X resolution at runtime, - in which case S3L_MAX_PIXELS has to be - defined (to allocate zBuffer etc.)! */ -#define S3L_RESOLUTION_X S3L_resolutionX -#endif - -#ifndef S3L_RESOLUTION_Y -#ifndef S3L_MAX_PIXELS -#error Dynamic resolution set (S3L_RESOLUTION_Y not defined), but\ - S3L_MAX_PIXELS not defined! -#endif - -uint16_t S3L_resolutionY = 512; /**< Same as S3L_resolutionX, but for Y - resolution. */ -#define S3L_RESOLUTION_Y S3L_resolutionY -#endif - -#ifndef S3L_USE_WIDER_TYPES -/** If true, the library will use wider data types which will largely supress -many rendering bugs and imprecisions happening due to overflows, but this will -also consumer more RAM and may potentially be slower on computers with smaller -native integer. */ - -#define S3L_USE_WIDER_TYPES 0 -#endif - -/** Units of measurement in 3D space. There is S3L_FRACTIONS_PER_UNIT in one -spatial unit. By dividing the unit into fractions we effectively achieve a -fixed point arithmetic. The number of fractions is a constant that serves as -1.0 in floating point arithmetic (normalization etc.). */ - -typedef -#if S3L_USE_WIDER_TYPES - int64_t -#else - int32_t -#endif - S3L_Unit; - -/** How many fractions a spatial unit is split into. This is NOT SUPPOSED TO -BE REDEFINED, so rather don't do it (otherwise things may overflow etc.). */ - -#define S3L_FRACTIONS_PER_UNIT 512 - -typedef -#if S3L_USE_WIDER_TYPES - int32_t -#else - int16_t -#endif - S3L_ScreenCoord; - -typedef -#if S3L_USE_WIDER_TYPES - uint32_t -#else - uint16_t -#endif - S3L_Index; - -#ifndef S3L_NEAR_CROSS_STRATEGY -/** Specifies how the library will handle triangles that partially cross the -near plane. These are problematic and require special handling. Possible -values: - - 0: Strictly cull any triangle crossing the near plane. This will make such - triangles disappear. This is good for performance or models viewed only - from at least small distance. - 1: Forcefully push the vertices crossing near plane in front of it. This is - a cheap technique that can be good enough for displaying simple - environments on slow devices, but texturing and geometric artifacts/warps - will appear. - 2: Geometrically correct the triangles crossing the near plane. This may - result in some triangles being subdivided into two and is a little more - expensive, but the results will be geometrically correct, even though - barycentric correction is not performed so texturing artifacts will - appear. Can be ideal with S3L_FLAT. - 3: Perform both geometrical and barycentric correction of triangle crossing - the near plane. This is significantly more expensive but results in - correct rendering. */ - -#define S3L_NEAR_CROSS_STRATEGY 0 -#endif - -#ifndef S3L_FLAT -/** If on, disables computation of per-pixel values such as barycentric -coordinates and depth -- these will still be available but will be the same -for the whole triangle. This can be used to create flat-shaded renders and -will be a lot faster. With this option on you will probably want to use -sorting instead of z-buffer. */ - -#define S3L_FLAT 0 -#endif - -#if S3L_FLAT -#define S3L_COMPUTE_DEPTH 0 -#define S3L_PERSPECTIVE_CORRECTION 0 -// don't disable z-buffer, it makes sense to use it with no sorting -#endif - -#ifndef S3L_PERSPECTIVE_CORRECTION -/** Specifies what type of perspective correction (PC) to use. Remember this -is an expensive operation! Possible values: - -0: No perspective correction. Fastest, inaccurate from most angles. -1: Per-pixel perspective correction, accurate but very expensive. -2: Approximation (computing only at every S3L_PC_APPROX_LENGTHth pixel). - Quake-style approximation is used, which only computes the PC after - S3L_PC_APPROX_LENGTH pixels. This is reasonably accurate and fast. */ - -#define S3L_PERSPECTIVE_CORRECTION 0 -#endif - -#ifndef S3L_PC_APPROX_LENGTH -/** For S3L_PERSPECTIVE_CORRECTION == 2, this specifies after how many pixels -PC is recomputed. Should be a power of two to keep up the performance. -Smaller is nicer but slower. */ - -#define S3L_PC_APPROX_LENGTH 32 -#endif - -#if S3L_PERSPECTIVE_CORRECTION -#define S3L_COMPUTE_DEPTH 1 // PC inevitably computes depth, so enable it -#endif - -#ifndef S3L_COMPUTE_DEPTH -/** Whether to compute depth for each pixel (fragment). Some other options -may turn this on automatically. If you don't need depth information, turning -this off can save performance. Depth will still be accessible in -S3L_PixelInfo, but will be constant -- equal to center point depth -- over -the whole triangle. */ -#define S3L_COMPUTE_DEPTH 1 -#endif - -#ifndef S3L_Z_BUFFER -/** What type of z-buffer (depth buffer) to use for visibility determination. -Possible values: - -0: Don't use z-buffer. This saves a lot of memory, but visibility checking - won't be pixel-accurate and has to mostly be done by other means (typically - sorting). -1: Use full z-buffer (of S3L_Units) for visibiltiy determination. This is the - most accurate option (and also a fast one), but requires a big amount of - memory. -2: Use reduced-size z-buffer (of bytes). This is fast and somewhat accurate, - but inaccuracies can occur and a considerable amount of memory is - needed. */ - -#define S3L_Z_BUFFER 0 -#endif - -#ifndef S3L_REDUCED_Z_BUFFER_GRANULARITY -/** For S3L_Z_BUFFER == 2 this sets the reduced z-buffer granularity. */ - -#define S3L_REDUCED_Z_BUFFER_GRANULARITY 5 -#endif - -#ifndef S3L_STENCIL_BUFFER -/** Whether to use stencil buffer for drawing -- with this a pixel that would -be resterized over an already rasterized pixel (within a frame) will be -discarded. This is mostly for front-to-back sorted drawing. */ - -#define S3L_STENCIL_BUFFER 0 -#endif - -#ifndef S3L_SORT -/** Defines how to sort triangles before drawing a frame. This can be used to -solve visibility in case z-buffer is not used, to prevent overwriting already -rasterized pixels, implement transparency etc. Note that for simplicity and -performance a relatively simple sorting is used which doesn't work completely -correctly, so mistakes can occur (even the best sorting wouldn't be able to -solve e.g. intersecting triangles). Note that sorting requires a bit of extra -memory -- an array of the triangles to sort -- the size of this array limits -the maximum number of triangles that can be drawn in a single frame -(S3L_MAX_TRIANGES_DRAWN). Possible values: - -0: Don't sort triangles. This is fastest and doesn't use extra memory. -1: Sort triangles from back to front. This can in most cases solve visibility - without requiring almost any extra memory compared to z-buffer. -2: Sort triangles from front to back. This can be faster than back to front - because we prevent computing pixels that will be overwritten by nearer - ones, but we need a 1b stencil buffer for this (enable S3L_STENCIL_BUFFER), - so a bit more memory is needed. */ - -#define S3L_SORT 0 -#endif - -#ifndef S3L_MAX_TRIANGES_DRAWN -/** Maximum number of triangles that can be drawn in sorted modes. This -affects the size of the cache used for triangle sorting. */ - -#define S3L_MAX_TRIANGES_DRAWN 128 -#endif - -#ifndef S3L_NEAR -/** Distance of the near clipping plane. Points in front or EXATLY ON this -plane are considered outside the frustum. This must be >= 0. */ - -#define S3L_NEAR (S3L_FRACTIONS_PER_UNIT / 4) -#endif - -#if S3L_NEAR <= 0 -#define S3L_NEAR 1 // Can't be <= 0. -#endif - -#ifndef S3L_NORMAL_COMPUTE_MAXIMUM_AVERAGE -/** Affects the S3L_computeModelNormals function. See its description for -details. */ - -#define S3L_NORMAL_COMPUTE_MAXIMUM_AVERAGE 6 -#endif - -#ifndef S3L_FAST_LERP_QUALITY -/** Quality (scaling) of SOME (stepped) linear interpolations. 0 will most -likely be a tiny bit faster, but artifacts can occur for bigger tris, while -higher values can fix this -- in theory all higher values will have the same -speed (it is a shift value), but it mustn't be too high to prevent -overflow. */ - -#define S3L_FAST_LERP_QUALITY 11 -#endif - -/** Vector that consists of four scalars and can represent homogenous - coordinates, but is generally also used as Vec3 and Vec2 for various - purposes. */ -typedef struct { - S3L_Unit x; - S3L_Unit y; - S3L_Unit z; - S3L_Unit w; -} S3L_Vec4; - -#define S3L_logVec4(v) \ - printf("Vec4: %d %d %d %d\n", ((v).x), ((v).y), ((v).z), ((v).w)) - -static inline void S3L_vec4Init(S3L_Vec4* v); -static inline void S3L_vec4Set(S3L_Vec4* v, - S3L_Unit x, - S3L_Unit y, - S3L_Unit z, - S3L_Unit w); -static inline void S3L_vec3Add(S3L_Vec4* result, S3L_Vec4 added); -static inline void S3L_vec3Sub(S3L_Vec4* result, S3L_Vec4 substracted); -S3L_Unit S3L_vec3Length(S3L_Vec4 v); - -/** Normalizes Vec3. Note that this function tries to normalize correctly - rather than quickly! If you need to normalize quickly, do it yourself in a - way that best fits your case. */ -void S3L_vec3Normalize(S3L_Vec4* v); - -/** Like S3L_vec3Normalize, but doesn't perform any checks on the input vector, - which is faster, but can be very innacurate or overflowing. You are supposed - to provide a "nice" vector (not too big or small). */ -static inline void S3L_vec3NormalizeFast(S3L_Vec4* v); - -S3L_Unit S3L_vec2Length(S3L_Vec4 v); -void S3L_vec3Cross(S3L_Vec4 a, S3L_Vec4 b, S3L_Vec4* result); -static inline S3L_Unit S3L_vec3Dot(S3L_Vec4 a, S3L_Vec4 b); - -/** Computes a reflection direction (typically used e.g. for specular component - in Phong illumination). The input vectors must be normalized. The result will - be normalized as well. */ -void S3L_reflect(S3L_Vec4 toLight, S3L_Vec4 normal, S3L_Vec4* result); - -/** Determines the winding of a triangle, returns 1 (CW, clockwise), -1 (CCW, - counterclockwise) or 0 (points lie on a single line). */ -static inline int8_t S3L_triangleWinding(S3L_ScreenCoord x0, - S3L_ScreenCoord y0, - S3L_ScreenCoord x1, - S3L_ScreenCoord y1, - S3L_ScreenCoord x2, - S3L_ScreenCoord y2); - -typedef struct { - S3L_Vec4 translation; - S3L_Vec4 rotation; /**< Euler angles. Rortation is applied in this order: - 1. z = by z (roll) CW looking along z+ - 2. x = by x (pitch) CW looking along x+ - 3. y = by y (yaw) CW looking along y+ */ - S3L_Vec4 scale; -} S3L_Transform3D; - -#define S3L_logTransform3D(t) \ - printf("Transform3D: T = [%d %d %d], R = [%d %d %d], S = [%d %d %d]\n", \ - (t).translation.x, (t).translation.y, (t).translation.z, \ - (t).rotation.x, (t).rotation.y, (t).rotation.z, (t).scale.x, \ - (t).scale.y, (t).scale.z) - -static inline void S3L_transform3DInit(S3L_Transform3D* t); - -void S3L_lookAt(S3L_Vec4 pointTo, S3L_Transform3D* t); - -void S3L_transform3DSet(S3L_Unit tx, - S3L_Unit ty, - S3L_Unit tz, - S3L_Unit rx, - S3L_Unit ry, - S3L_Unit rz, - S3L_Unit sx, - S3L_Unit sy, - S3L_Unit sz, - S3L_Transform3D* t); - -/** Converts rotation transformation to three direction vectors of given length - (any one can be NULL, in which case it won't be computed). */ -void S3L_rotationToDirections(S3L_Vec4 rotation, - S3L_Unit length, - S3L_Vec4* forw, - S3L_Vec4* right, - S3L_Vec4* up); - -/** 4x4 matrix, used mostly for 3D transforms. The indexing is this: - matrix[column][row]. */ -typedef S3L_Unit S3L_Mat4[4][4]; - -#define S3L_logMat4(m) \ - printf( \ - "Mat4:\n %d %d %d %d\n %d %d %d %d\n %d %d %d %d\n %d %d %d %d\n", \ - (m)[0][0], (m)[1][0], (m)[2][0], (m)[3][0], (m)[0][1], (m)[1][1], \ - (m)[2][1], (m)[3][1], (m)[0][2], (m)[1][2], (m)[2][2], (m)[3][2], \ - (m)[0][3], (m)[1][3], (m)[2][3], (m)[3][3]) - -/** Initializes a 4x4 matrix to identity. */ -static inline void S3L_mat4Init(S3L_Mat4 m); - -void S3L_mat4Copy(S3L_Mat4 src, S3L_Mat4 dst); - -void S3L_mat4Transpose(S3L_Mat4 m); - -void S3L_makeTranslationMat(S3L_Unit offsetX, - S3L_Unit offsetY, - S3L_Unit offsetZ, - S3L_Mat4 m); - -/** Makes a scaling matrix. DON'T FORGET: scale of 1.0 is set with - S3L_FRACTIONS_PER_UNIT! */ -void S3L_makeScaleMatrix(S3L_Unit scaleX, - S3L_Unit scaleY, - S3L_Unit scaleZ, - S3L_Mat4 m); - -/** Makes a matrix for rotation in the ZXY order. */ -void S3L_makeRotationMatrixZXY(S3L_Unit byX, - S3L_Unit byY, - S3L_Unit byZ, - S3L_Mat4 m); - -void S3L_makeWorldMatrix(S3L_Transform3D worldTransform, S3L_Mat4 m); -void S3L_makeCameraMatrix(S3L_Transform3D cameraTransform, S3L_Mat4 m); - -/** Multiplies a vector by a matrix with normalization by - S3L_FRACTIONS_PER_UNIT. Result is stored in the input vector. */ -void S3L_vec4Xmat4(S3L_Vec4* v, S3L_Mat4 m); - -/** Same as S3L_vec4Xmat4 but faster, because this version doesn't compute the - W component of the result, which is usually not needed. */ -void S3L_vec3Xmat4(S3L_Vec4* v, S3L_Mat4 m); - -/** Multiplies two matrices with normalization by S3L_FRACTIONS_PER_UNIT. - Result is stored in the first matrix. The result represents a transformation - that has the same effect as applying the transformation represented by m1 and - then m2 (in that order). */ -void S3L_mat4Xmat4(S3L_Mat4 m1, S3L_Mat4 m2); - -typedef struct { - S3L_Unit focalLength; ///< Defines the field of view (FOV). - S3L_Transform3D transform; -} S3L_Camera; - -void S3L_cameraInit(S3L_Camera* camera); - -typedef struct { - uint8_t backfaceCulling; /**< What backface culling to use. Possible - values: - - 0 none - - 1 clock-wise - - 2 counter clock-wise */ - int8_t visible; /**< Can be used to easily hide the model. */ -} S3L_DrawConfig; - -void S3L_drawConfigInit(S3L_DrawConfig* config); - -typedef struct { - const S3L_Unit* vertices; - S3L_Index vertexCount; - const S3L_Index* triangles; - S3L_Index triangleCount; - S3L_Transform3D transform; - S3L_Mat4* customTransformMatrix; /**< This can be used to override the - transform (if != 0) with a custom - transform matrix, which is more - general. */ - S3L_DrawConfig config; -} S3L_Model3D; ///< Represents a 3D model. - -void S3L_model3DInit(const S3L_Unit* vertices, - S3L_Index vertexCount, - const S3L_Index* triangles, - S3L_Index triangleCount, - S3L_Model3D* model); - -typedef struct { - S3L_Model3D* models; - S3L_Index modelCount; - S3L_Camera camera; -} S3L_Scene; ///< Represent the 3D scene to be rendered. - -void S3L_sceneInit(S3L_Model3D* models, S3L_Index modelCount, S3L_Scene* scene); - -typedef struct { - S3L_ScreenCoord x; ///< Screen X coordinate. - S3L_ScreenCoord y; ///< Screen Y coordinate. - - S3L_Unit barycentric[3]; /**< Barycentric coords correspond to the three - vertices. These serve to locate the pixel on a - triangle and interpolate values between its - three points. Each one goes from 0 to - S3L_FRACTIONS_PER_UNIT (including), but due to - rounding error may fall outside this range (you - can use S3L_correctBarycentricCoords to fix this - for the price of some performance). The sum of - the three coordinates will always be exactly - S3L_FRACTIONS_PER_UNIT. */ - S3L_Index modelIndex; ///< Model index within the scene. - S3L_Index triangleIndex; ///< Triangle index within the model. - uint32_t triangleID; /**< Unique ID of the triangle withing the whole - scene. This can be used e.g. by a cache to - quickly find out if a triangle has changed. */ - S3L_Unit depth; ///< Depth (only if depth is turned on). - S3L_Unit previousZ; /**< Z-buffer value (not necessarily world depth in - S3L_Units!) that was in the z-buffer on the - pixels position before this pixel was - rasterized. This can be used to set the value - back, e.g. for transparency. */ - S3L_ScreenCoord triangleSize[2]; /**< Rasterized triangle width and height, - can be used e.g. for MIP mapping. */ -} S3L_PixelInfo; /**< Used to pass the info about a rasterized pixel - (fragment) to the user-defined drawing func. */ - -static inline void S3L_pixelInfoInit(S3L_PixelInfo* p); - -/** Corrects barycentric coordinates so that they exactly meet the defined - conditions (each fall into <0,S3L_FRACTIONS_PER_UNIT>, sum = - S3L_FRACTIONS_PER_UNIT). Note that doing this per-pixel can slow the program - down significantly. */ -static inline void S3L_correctBarycentricCoords(S3L_Unit barycentric[3]); - -// general helper functions -static inline S3L_Unit S3L_abs(S3L_Unit value); -static inline S3L_Unit S3L_min(S3L_Unit v1, S3L_Unit v2); -static inline S3L_Unit S3L_max(S3L_Unit v1, S3L_Unit v2); -static inline S3L_Unit S3L_clamp(S3L_Unit v, S3L_Unit v1, S3L_Unit v2); -static inline S3L_Unit S3L_wrap(S3L_Unit value, S3L_Unit mod); -static inline S3L_Unit S3L_nonZero(S3L_Unit value); -static inline S3L_Unit S3L_zeroClamp(S3L_Unit value); - -S3L_Unit S3L_sin(S3L_Unit x); -S3L_Unit S3L_asin(S3L_Unit x); -static inline S3L_Unit S3L_cos(S3L_Unit x); - -S3L_Unit S3L_vec3Length(S3L_Vec4 v); -S3L_Unit S3L_sqrt(S3L_Unit value); - -/** Projects a single point from 3D space to the screen space (pixels), which - can be useful e.g. for drawing sprites. The w component of input and result - holds the point size. If this size is 0 in the result, the sprite is outside - the view. */ -void project3DPointToScreen(S3L_Vec4 point, - S3L_Camera camera, - S3L_Vec4* result); - -/** Computes a normalized normal of given triangle. */ -void S3L_triangleNormal(S3L_Vec4 t0, S3L_Vec4 t1, S3L_Vec4 t2, S3L_Vec4* n); - -/** Helper function for retrieving per-vertex indexed values from an array, - e.g. texturing (UV) coordinates. The 'indices' array contains three indices - for each triangle, each index pointing into 'values' array, which contains - the values, each one consisting of 'numComponents' components (e.g. 2 for - UV coordinates). The three values are retrieved into 'v0', 'v1' and 'v2' - vectors (into x, y, z and w, depending on 'numComponents'). This function is - meant to be used per-triangle (typically from a cache), NOT per-pixel, as it - is not as fast as possible! */ -void S3L_getIndexedTriangleValues(S3L_Index triangleIndex, - const S3L_Index* indices, - const S3L_Unit* values, - uint8_t numComponents, - S3L_Vec4* v0, - S3L_Vec4* v1, - S3L_Vec4* v2); - -/** Computes a normalized normal for every vertex of given model (this is - relatively slow and SHOUDN'T be done each frame). The dst array must have a - sufficient size preallocated! The size is: number of model vertices * 3 * - sizeof(S3L_Unit). Note that for advanced allowing sharp edges it is not - sufficient to have per-vertex normals, but must be per-triangle. This - function doesn't support this. - - The function computes a normal for each vertex by averaging normals of - the triangles containing the vertex. The maximum number of these triangle - normals that will be averaged is set with - S3L_NORMAL_COMPUTE_MAXIMUM_AVERAGE. */ -void S3L_computeModelNormals(S3L_Model3D model, - S3L_Unit* dst, - int8_t transformNormals); - -/** Interpolated between two values, v1 and v2, in the same ratio as t is to - tMax. Does NOT prevent zero division. */ -static inline S3L_Unit S3L_interpolate(S3L_Unit v1, - S3L_Unit v2, - S3L_Unit t, - S3L_Unit tMax); - -/** Same as S3L_interpolate but with v1 == 0. Should be faster. */ -static inline S3L_Unit S3L_interpolateFrom0(S3L_Unit v2, - S3L_Unit t, - S3L_Unit tMax); - -/** Like S3L_interpolate, but uses a parameter that goes from 0 to - S3L_FRACTIONS_PER_UNIT - 1, which can be faster. */ -static inline S3L_Unit S3L_interpolateByUnit(S3L_Unit v1, - S3L_Unit v2, - S3L_Unit t); - -/** Same as S3L_interpolateByUnit but with v1 == 0. Should be faster. */ -static inline S3L_Unit S3L_interpolateByUnitFrom0(S3L_Unit v2, S3L_Unit t); - -static inline S3L_Unit S3L_distanceManhattan(S3L_Vec4 a, S3L_Vec4 b); - -/** Returns a value interpolated between the three triangle vertices based on - barycentric coordinates. */ -static inline S3L_Unit S3L_interpolateBarycentric(S3L_Unit value0, - S3L_Unit value1, - S3L_Unit value2, - S3L_Unit barycentric[3]); - -static inline void S3L_mapProjectionPlaneToScreen(S3L_Vec4 point, - S3L_ScreenCoord* screenX, - S3L_ScreenCoord* screenY); - -/** Draws a triangle according to given config. The vertices are specified in - Screen Space space (pixels). If perspective correction is enabled, each - vertex has to have a depth (Z position in camera space) specified in the Z - component. */ -void S3L_drawTriangle(S3L_Vec4 point0, - S3L_Vec4 point1, - S3L_Vec4 point2, - S3L_Index modelIndex, - S3L_Index triangleIndex); - -/** This should be called before rendering each frame. The function clears - buffers and does potentially other things needed for the frame. */ -void S3L_newFrame(void); - -void S3L_zBufferClear(void); -void S3L_stencilBufferClear(void); - -/** Writes a value (not necessarily depth! depends on the format of z-buffer) - to z-buffer (if enabled). Does NOT check boundaries! */ -void S3L_zBufferWrite(S3L_ScreenCoord x, S3L_ScreenCoord y, S3L_Unit value); - -/** Reads a value (not necessarily depth! depends on the format of z-buffer) - from z-buffer (if enabled). Does NOT check boundaries! */ -S3L_Unit S3L_zBufferRead(S3L_ScreenCoord x, S3L_ScreenCoord y); - -static inline void S3L_rotate2DPoint(S3L_Unit* x, S3L_Unit* y, S3L_Unit angle); - -/** Predefined vertices of a cube to simply insert in an array. These come with - S3L_CUBE_TRIANGLES and S3L_CUBE_TEXCOORDS. */ -#define S3L_CUBE_VERTICES(m) \ - /* 0 front, bottom, right */ \ - m / 2, -m / 2, -m / 2, /* 1 front, bottom, left */ \ - -m / 2, -m / 2, -m / 2, /* 2 front, top, right */ \ - m / 2, m / 2, -m / 2, /* 3 front, top, left */ \ - -m / 2, m / 2, -m / 2, /* 4 back, bottom, right */ \ - m / 2, -m / 2, m / 2, /* 5 back, bottom, left */ \ - -m / 2, -m / 2, m / 2, /* 6 back, top, right */ \ - m / 2, m / 2, m / 2, /* 7 back, top, left */ \ - -m / 2, m / 2, m / 2 - -#define S3L_CUBE_VERTEX_COUNT 8 - -/** Predefined triangle indices of a cube, to be used with S3L_CUBE_VERTICES - and S3L_CUBE_TEXCOORDS. */ -#define S3L_CUBE_TRIANGLES \ - 3, 0, 2, /* front */ \ - 1, 0, 3, 0, 4, 2, /* right */ \ - 2, 4, 6, 4, 5, 6, /* back */ \ - 7, 6, 5, 3, 7, 1, /* left */ \ - 1, 7, 5, 6, 3, 2, /* top */ \ - 7, 3, 6, 1, 4, 0, /* bottom */ \ - 5, 4, 1 - -#define S3L_CUBE_TRIANGLE_COUNT 12 - -/** Predefined texture coordinates of a cube, corresponding to triangles (NOT - vertices), to be used with S3L_CUBE_VERTICES and S3L_CUBE_TRIANGLES. */ -#define S3L_CUBE_TEXCOORDS(m) \ - 0, 0, m, m, m, 0, 0, m, m, m, 0, 0, m, m, m, 0, 0, m, 0, m, m, 0, 0, 0, m, \ - 0, 0, 0, m, m, 0, m, m, m, 0, 0, 0, 0, 0, m, m, 0, m, 0, 0, m, m, m, \ - 0, 0, m, m, m, 0, 0, m, m, m, 0, 0, m, 0, 0, m, m, m, 0, 0, 0, m, m, 0 - -//============================================================================= -// privates - -#define S3L_UNUSED(what) (void)(what) ///< helper macro for unused vars - -#define S3L_HALF_RESOLUTION_X (S3L_RESOLUTION_X >> 1) -#define S3L_HALF_RESOLUTION_Y (S3L_RESOLUTION_Y >> 1) - -#define S3L_PROJECTION_PLANE_HEIGHT \ - ((S3L_RESOLUTION_Y * S3L_FRACTIONS_PER_UNIT * 2) / S3L_RESOLUTION_X) - -#if S3L_Z_BUFFER == 1 -#define S3L_MAX_DEPTH 2147483647 -S3L_Unit S3L_zBuffer[S3L_MAX_PIXELS]; -#define S3L_zBufferFormat(depth) (depth) -#elif S3L_Z_BUFFER == 2 -#define S3L_MAX_DEPTH 255 -uint8_t S3L_zBuffer[S3L_MAX_PIXELS]; -#define S3L_zBufferFormat(depth) \ - S3L_min(255, (depth) >> S3L_REDUCED_Z_BUFFER_GRANULARITY) -#endif - -#if S3L_Z_BUFFER -static inline int8_t S3L_zTest(S3L_ScreenCoord x, - S3L_ScreenCoord y, - S3L_Unit depth) { - uint32_t index = y * S3L_RESOLUTION_X + x; - - depth = S3L_zBufferFormat(depth); - -#if S3L_Z_BUFFER == 2 -#define cmp \ - <= /* For reduced z-buffer we need equality test, because \ - otherwise pixels at the maximum depth (255) would never be \ - drawn over the background (which also has the depth of \ - 255). */ -#else -#define cmp \ - < /* For normal z-buffer we leave out equality test to not waste \ - time by drawing over already drawn pixls. */ -#endif - - if (depth cmp S3L_zBuffer[index]) { - S3L_zBuffer[index] = depth; - return 1; - } - -#undef cmp - - return 0; -} -#endif - -S3L_Unit S3L_zBufferRead(S3L_ScreenCoord x, S3L_ScreenCoord y) { -#if S3L_Z_BUFFER - return S3L_zBuffer[y * S3L_RESOLUTION_X + x]; -#else - S3L_UNUSED(x); - S3L_UNUSED(y); - - return 0; -#endif -} - -void S3L_zBufferWrite(S3L_ScreenCoord x, S3L_ScreenCoord y, S3L_Unit value) { -#if S3L_Z_BUFFER - S3L_zBuffer[y * S3L_RESOLUTION_X + x] = value; -#else - S3L_UNUSED(x); - S3L_UNUSED(y); - S3L_UNUSED(value); -#endif -} - -#if S3L_STENCIL_BUFFER -#define S3L_STENCIL_BUFFER_SIZE \ - ((S3L_RESOLUTION_X * S3L_RESOLUTION_Y - 1) / 8 + 1) - -uint8_t S3L_stencilBuffer[S3L_STENCIL_BUFFER_SIZE]; - -static inline int8_t S3L_stencilTest(S3L_ScreenCoord x, S3L_ScreenCoord y) { - uint32_t index = y * S3L_RESOLUTION_X + x; - uint32_t bit = (index & 0x00000007); - index = index >> 3; - - uint8_t val = S3L_stencilBuffer[index]; - - if ((val >> bit) & 0x1) - return 0; - - S3L_stencilBuffer[index] = val | (0x1 << bit); - - return 1; -} -#endif - -#define S3L_COMPUTE_LERP_DEPTH \ - (S3L_COMPUTE_DEPTH && (S3L_PERSPECTIVE_CORRECTION == 0)) - -#define S3L_SIN_TABLE_LENGTH 128 - -static const S3L_Unit S3L_sinTable[S3L_SIN_TABLE_LENGTH] = { - /* 511 was chosen here as a highest number that doesn't overflow during - compilation for S3L_FRACTIONS_PER_UNIT == 1024 */ - - (0 * S3L_FRACTIONS_PER_UNIT) / 511, (6 * S3L_FRACTIONS_PER_UNIT) / 511, - (12 * S3L_FRACTIONS_PER_UNIT) / 511, (18 * S3L_FRACTIONS_PER_UNIT) / 511, - (25 * S3L_FRACTIONS_PER_UNIT) / 511, (31 * S3L_FRACTIONS_PER_UNIT) / 511, - (37 * S3L_FRACTIONS_PER_UNIT) / 511, (43 * S3L_FRACTIONS_PER_UNIT) / 511, - (50 * S3L_FRACTIONS_PER_UNIT) / 511, (56 * S3L_FRACTIONS_PER_UNIT) / 511, - (62 * S3L_FRACTIONS_PER_UNIT) / 511, (68 * S3L_FRACTIONS_PER_UNIT) / 511, - (74 * S3L_FRACTIONS_PER_UNIT) / 511, (81 * S3L_FRACTIONS_PER_UNIT) / 511, - (87 * S3L_FRACTIONS_PER_UNIT) / 511, (93 * S3L_FRACTIONS_PER_UNIT) / 511, - (99 * S3L_FRACTIONS_PER_UNIT) / 511, (105 * S3L_FRACTIONS_PER_UNIT) / 511, - (111 * S3L_FRACTIONS_PER_UNIT) / 511, (118 * S3L_FRACTIONS_PER_UNIT) / 511, - (124 * S3L_FRACTIONS_PER_UNIT) / 511, (130 * S3L_FRACTIONS_PER_UNIT) / 511, - (136 * S3L_FRACTIONS_PER_UNIT) / 511, (142 * S3L_FRACTIONS_PER_UNIT) / 511, - (148 * S3L_FRACTIONS_PER_UNIT) / 511, (154 * S3L_FRACTIONS_PER_UNIT) / 511, - (160 * S3L_FRACTIONS_PER_UNIT) / 511, (166 * S3L_FRACTIONS_PER_UNIT) / 511, - (172 * S3L_FRACTIONS_PER_UNIT) / 511, (178 * S3L_FRACTIONS_PER_UNIT) / 511, - (183 * S3L_FRACTIONS_PER_UNIT) / 511, (189 * S3L_FRACTIONS_PER_UNIT) / 511, - (195 * S3L_FRACTIONS_PER_UNIT) / 511, (201 * S3L_FRACTIONS_PER_UNIT) / 511, - (207 * S3L_FRACTIONS_PER_UNIT) / 511, (212 * S3L_FRACTIONS_PER_UNIT) / 511, - (218 * S3L_FRACTIONS_PER_UNIT) / 511, (224 * S3L_FRACTIONS_PER_UNIT) / 511, - (229 * S3L_FRACTIONS_PER_UNIT) / 511, (235 * S3L_FRACTIONS_PER_UNIT) / 511, - (240 * S3L_FRACTIONS_PER_UNIT) / 511, (246 * S3L_FRACTIONS_PER_UNIT) / 511, - (251 * S3L_FRACTIONS_PER_UNIT) / 511, (257 * S3L_FRACTIONS_PER_UNIT) / 511, - (262 * S3L_FRACTIONS_PER_UNIT) / 511, (268 * S3L_FRACTIONS_PER_UNIT) / 511, - (273 * S3L_FRACTIONS_PER_UNIT) / 511, (278 * S3L_FRACTIONS_PER_UNIT) / 511, - (283 * S3L_FRACTIONS_PER_UNIT) / 511, (289 * S3L_FRACTIONS_PER_UNIT) / 511, - (294 * S3L_FRACTIONS_PER_UNIT) / 511, (299 * S3L_FRACTIONS_PER_UNIT) / 511, - (304 * S3L_FRACTIONS_PER_UNIT) / 511, (309 * S3L_FRACTIONS_PER_UNIT) / 511, - (314 * S3L_FRACTIONS_PER_UNIT) / 511, (319 * S3L_FRACTIONS_PER_UNIT) / 511, - (324 * S3L_FRACTIONS_PER_UNIT) / 511, (328 * S3L_FRACTIONS_PER_UNIT) / 511, - (333 * S3L_FRACTIONS_PER_UNIT) / 511, (338 * S3L_FRACTIONS_PER_UNIT) / 511, - (343 * S3L_FRACTIONS_PER_UNIT) / 511, (347 * S3L_FRACTIONS_PER_UNIT) / 511, - (352 * S3L_FRACTIONS_PER_UNIT) / 511, (356 * S3L_FRACTIONS_PER_UNIT) / 511, - (361 * S3L_FRACTIONS_PER_UNIT) / 511, (365 * S3L_FRACTIONS_PER_UNIT) / 511, - (370 * S3L_FRACTIONS_PER_UNIT) / 511, (374 * S3L_FRACTIONS_PER_UNIT) / 511, - (378 * S3L_FRACTIONS_PER_UNIT) / 511, (382 * S3L_FRACTIONS_PER_UNIT) / 511, - (386 * S3L_FRACTIONS_PER_UNIT) / 511, (391 * S3L_FRACTIONS_PER_UNIT) / 511, - (395 * S3L_FRACTIONS_PER_UNIT) / 511, (398 * S3L_FRACTIONS_PER_UNIT) / 511, - (402 * S3L_FRACTIONS_PER_UNIT) / 511, (406 * S3L_FRACTIONS_PER_UNIT) / 511, - (410 * S3L_FRACTIONS_PER_UNIT) / 511, (414 * S3L_FRACTIONS_PER_UNIT) / 511, - (417 * S3L_FRACTIONS_PER_UNIT) / 511, (421 * S3L_FRACTIONS_PER_UNIT) / 511, - (424 * S3L_FRACTIONS_PER_UNIT) / 511, (428 * S3L_FRACTIONS_PER_UNIT) / 511, - (431 * S3L_FRACTIONS_PER_UNIT) / 511, (435 * S3L_FRACTIONS_PER_UNIT) / 511, - (438 * S3L_FRACTIONS_PER_UNIT) / 511, (441 * S3L_FRACTIONS_PER_UNIT) / 511, - (444 * S3L_FRACTIONS_PER_UNIT) / 511, (447 * S3L_FRACTIONS_PER_UNIT) / 511, - (450 * S3L_FRACTIONS_PER_UNIT) / 511, (453 * S3L_FRACTIONS_PER_UNIT) / 511, - (456 * S3L_FRACTIONS_PER_UNIT) / 511, (459 * S3L_FRACTIONS_PER_UNIT) / 511, - (461 * S3L_FRACTIONS_PER_UNIT) / 511, (464 * S3L_FRACTIONS_PER_UNIT) / 511, - (467 * S3L_FRACTIONS_PER_UNIT) / 511, (469 * S3L_FRACTIONS_PER_UNIT) / 511, - (472 * S3L_FRACTIONS_PER_UNIT) / 511, (474 * S3L_FRACTIONS_PER_UNIT) / 511, - (476 * S3L_FRACTIONS_PER_UNIT) / 511, (478 * S3L_FRACTIONS_PER_UNIT) / 511, - (481 * S3L_FRACTIONS_PER_UNIT) / 511, (483 * S3L_FRACTIONS_PER_UNIT) / 511, - (485 * S3L_FRACTIONS_PER_UNIT) / 511, (487 * S3L_FRACTIONS_PER_UNIT) / 511, - (488 * S3L_FRACTIONS_PER_UNIT) / 511, (490 * S3L_FRACTIONS_PER_UNIT) / 511, - (492 * S3L_FRACTIONS_PER_UNIT) / 511, (494 * S3L_FRACTIONS_PER_UNIT) / 511, - (495 * S3L_FRACTIONS_PER_UNIT) / 511, (497 * S3L_FRACTIONS_PER_UNIT) / 511, - (498 * S3L_FRACTIONS_PER_UNIT) / 511, (499 * S3L_FRACTIONS_PER_UNIT) / 511, - (501 * S3L_FRACTIONS_PER_UNIT) / 511, (502 * S3L_FRACTIONS_PER_UNIT) / 511, - (503 * S3L_FRACTIONS_PER_UNIT) / 511, (504 * S3L_FRACTIONS_PER_UNIT) / 511, - (505 * S3L_FRACTIONS_PER_UNIT) / 511, (506 * S3L_FRACTIONS_PER_UNIT) / 511, - (507 * S3L_FRACTIONS_PER_UNIT) / 511, (507 * S3L_FRACTIONS_PER_UNIT) / 511, - (508 * S3L_FRACTIONS_PER_UNIT) / 511, (509 * S3L_FRACTIONS_PER_UNIT) / 511, - (509 * S3L_FRACTIONS_PER_UNIT) / 511, (510 * S3L_FRACTIONS_PER_UNIT) / 511, - (510 * S3L_FRACTIONS_PER_UNIT) / 511, (510 * S3L_FRACTIONS_PER_UNIT) / 511, - (510 * S3L_FRACTIONS_PER_UNIT) / 511, (510 * S3L_FRACTIONS_PER_UNIT) / 511}; - -#define S3L_SIN_TABLE_UNIT_STEP \ - (S3L_FRACTIONS_PER_UNIT / (S3L_SIN_TABLE_LENGTH * 4)) - -void S3L_vec4Init(S3L_Vec4* v) { - v->x = 0; - v->y = 0; - v->z = 0; - v->w = S3L_FRACTIONS_PER_UNIT; -} - -void S3L_vec4Set(S3L_Vec4* v, S3L_Unit x, S3L_Unit y, S3L_Unit z, S3L_Unit w) { - v->x = x; - v->y = y; - v->z = z; - v->w = w; -} - -void S3L_vec3Add(S3L_Vec4* result, S3L_Vec4 added) { - result->x += added.x; - result->y += added.y; - result->z += added.z; -} - -void S3L_vec3Sub(S3L_Vec4* result, S3L_Vec4 substracted) { - result->x -= substracted.x; - result->y -= substracted.y; - result->z -= substracted.z; -} - -void S3L_mat4Init(S3L_Mat4 m) { -#define M(x, y) m[x][y] -#define S S3L_FRACTIONS_PER_UNIT - - M(0, 0) = S; - M(1, 0) = 0; - M(2, 0) = 0; - M(3, 0) = 0; - M(0, 1) = 0; - M(1, 1) = S; - M(2, 1) = 0; - M(3, 1) = 0; - M(0, 2) = 0; - M(1, 2) = 0; - M(2, 2) = S; - M(3, 2) = 0; - M(0, 3) = 0; - M(1, 3) = 0; - M(2, 3) = 0; - M(3, 3) = S; - -#undef M -#undef S -} - -void S3L_mat4Copy(S3L_Mat4 src, S3L_Mat4 dst) { - for (uint8_t j = 0; j < 4; ++j) - for (uint8_t i = 0; i < 4; ++i) - dst[i][j] = src[i][j]; -} - -S3L_Unit S3L_vec3Dot(S3L_Vec4 a, S3L_Vec4 b) { - return (a.x * b.x + a.y * b.y + a.z * b.z) / S3L_FRACTIONS_PER_UNIT; -} - -void S3L_reflect(S3L_Vec4 toLight, S3L_Vec4 normal, S3L_Vec4* result) { - S3L_Unit d = 2 * S3L_vec3Dot(toLight, normal); - - result->x = (normal.x * d) / S3L_FRACTIONS_PER_UNIT - toLight.x; - result->y = (normal.y * d) / S3L_FRACTIONS_PER_UNIT - toLight.y; - result->z = (normal.z * d) / S3L_FRACTIONS_PER_UNIT - toLight.z; -} - -void S3L_vec3Cross(S3L_Vec4 a, S3L_Vec4 b, S3L_Vec4* result) { - result->x = a.y * b.z - a.z * b.y; - result->y = a.z * b.x - a.x * b.z; - result->z = a.x * b.y - a.y * b.x; -} - -void S3L_triangleNormal(S3L_Vec4 t0, S3L_Vec4 t1, S3L_Vec4 t2, S3L_Vec4* n) { -#define ANTI_OVERFLOW 32 - - t1.x = (t1.x - t0.x) / ANTI_OVERFLOW; - t1.y = (t1.y - t0.y) / ANTI_OVERFLOW; - t1.z = (t1.z - t0.z) / ANTI_OVERFLOW; - - t2.x = (t2.x - t0.x) / ANTI_OVERFLOW; - t2.y = (t2.y - t0.y) / ANTI_OVERFLOW; - t2.z = (t2.z - t0.z) / ANTI_OVERFLOW; - -#undef ANTI_OVERFLOW - - S3L_vec3Cross(t1, t2, n); - - S3L_vec3Normalize(n); -} - -void S3L_getIndexedTriangleValues(S3L_Index triangleIndex, - const S3L_Index* indices, - const S3L_Unit* values, - uint8_t numComponents, - S3L_Vec4* v0, - S3L_Vec4* v1, - S3L_Vec4* v2) { - uint32_t i0, i1; - S3L_Unit* value; - - i0 = triangleIndex * 3; - i1 = indices[i0] * numComponents; - value = (S3L_Unit*)v0; - - if (numComponents > 4) - numComponents = 4; - - for (uint8_t j = 0; j < numComponents; ++j) { - *value = values[i1]; - i1++; - value++; - } - - i0++; - i1 = indices[i0] * numComponents; - value = (S3L_Unit*)v1; - - for (uint8_t j = 0; j < numComponents; ++j) { - *value = values[i1]; - i1++; - value++; - } - - i0++; - i1 = indices[i0] * numComponents; - value = (S3L_Unit*)v2; - - for (uint8_t j = 0; j < numComponents; ++j) { - *value = values[i1]; - i1++; - value++; - } -} - -void S3L_computeModelNormals(S3L_Model3D model, - S3L_Unit* dst, - int8_t transformNormals) { - S3L_Index vPos = 0; - - S3L_Vec4 n; - - n.w = 0; - - S3L_Vec4 ns[S3L_NORMAL_COMPUTE_MAXIMUM_AVERAGE]; - S3L_Index normalCount; - - for (uint32_t i = 0; i < model.vertexCount; ++i) { - normalCount = 0; - - for (uint32_t j = 0; j < model.triangleCount * 3; j += 3) { - if ((model.triangles[j] == i) || (model.triangles[j + 1] == i) || - (model.triangles[j + 2] == i)) { - S3L_Vec4 t0, t1, t2; - uint32_t vIndex; - -#define getVertex(n) \ - vIndex = model.triangles[j + n] * 3; \ - t##n.x = model.vertices[vIndex]; \ - vIndex++; \ - t##n.y = model.vertices[vIndex]; \ - vIndex++; \ - t##n.z = model.vertices[vIndex]; - - getVertex(0) getVertex(1) getVertex(2) - -#undef getVertex - - S3L_triangleNormal(t0, t1, t2, &(ns[normalCount])); - - normalCount++; - - if (normalCount >= S3L_NORMAL_COMPUTE_MAXIMUM_AVERAGE) - break; - } - } - - n.x = S3L_FRACTIONS_PER_UNIT; - n.y = 0; - n.z = 0; - - if (normalCount != 0) { - // compute average - - n.x = 0; - - for (uint8_t i = 0; i < normalCount; ++i) { - n.x += ns[i].x; - n.y += ns[i].y; - n.z += ns[i].z; - } - - n.x /= normalCount; - n.y /= normalCount; - n.z /= normalCount; - - S3L_vec3Normalize(&n); - } - - dst[vPos] = n.x; - vPos++; - - dst[vPos] = n.y; - vPos++; - - dst[vPos] = n.z; - vPos++; - } - - S3L_Mat4 m; - - S3L_makeWorldMatrix(model.transform, m); - - if (transformNormals) - for (S3L_Index i = 0; i < model.vertexCount * 3; i += 3) { - n.x = dst[i]; - n.y = dst[i + 1]; - n.z = dst[i + 2]; - - S3L_vec4Xmat4(&n, m); - - dst[i] = n.x; - dst[i + 1] = n.y; - dst[i + 2] = n.z; - } -} - -void S3L_vec4Xmat4(S3L_Vec4* v, S3L_Mat4 m) { - S3L_Vec4 vBackup; - - vBackup.x = v->x; - vBackup.y = v->y; - vBackup.z = v->z; - vBackup.w = v->w; - -#define dotCol(col) \ - ((vBackup.x * m[col][0]) + (vBackup.y * m[col][1]) + \ - (vBackup.z * m[col][2]) + (vBackup.w * m[col][3])) / \ - S3L_FRACTIONS_PER_UNIT - - v->x = dotCol(0); - v->y = dotCol(1); - v->z = dotCol(2); - v->w = dotCol(3); -} - -void S3L_vec3Xmat4(S3L_Vec4* v, S3L_Mat4 m) { - S3L_Vec4 vBackup; - -#undef dotCol -#define dotCol(col) \ - (vBackup.x * m[col][0]) / S3L_FRACTIONS_PER_UNIT + \ - (vBackup.y * m[col][1]) / S3L_FRACTIONS_PER_UNIT + \ - (vBackup.z * m[col][2]) / S3L_FRACTIONS_PER_UNIT + m[col][3] - - vBackup.x = v->x; - vBackup.y = v->y; - vBackup.z = v->z; - vBackup.w = v->w; - - v->x = dotCol(0); - v->y = dotCol(1); - v->z = dotCol(2); - v->w = S3L_FRACTIONS_PER_UNIT; -} - -#undef dotCol - -S3L_Unit S3L_abs(S3L_Unit value) { - return value * (((value >= 0) << 1) - 1); -} - -S3L_Unit S3L_min(S3L_Unit v1, S3L_Unit v2) { - return v1 >= v2 ? v2 : v1; -} - -S3L_Unit S3L_max(S3L_Unit v1, S3L_Unit v2) { - return v1 >= v2 ? v1 : v2; -} - -S3L_Unit S3L_clamp(S3L_Unit v, S3L_Unit v1, S3L_Unit v2) { - return v >= v1 ? (v <= v2 ? v : v2) : v1; -} - -S3L_Unit S3L_zeroClamp(S3L_Unit value) { - return (value * (value >= 0)); -} - -S3L_Unit S3L_wrap(S3L_Unit value, S3L_Unit mod) { - return value >= 0 ? (value % mod) : (mod + (value % mod) - 1); -} - -S3L_Unit S3L_nonZero(S3L_Unit value) { - return (value + (value == 0)); -} - -S3L_Unit S3L_interpolate(S3L_Unit v1, S3L_Unit v2, S3L_Unit t, S3L_Unit tMax) { - return v1 + ((v2 - v1) * t) / tMax; -} - -S3L_Unit S3L_interpolateByUnit(S3L_Unit v1, S3L_Unit v2, S3L_Unit t) { - return v1 + ((v2 - v1) * t) / S3L_FRACTIONS_PER_UNIT; -} - -S3L_Unit S3L_interpolateByUnitFrom0(S3L_Unit v2, S3L_Unit t) { - return (v2 * t) / S3L_FRACTIONS_PER_UNIT; -} - -S3L_Unit S3L_interpolateFrom0(S3L_Unit v2, S3L_Unit t, S3L_Unit tMax) { - return (v2 * t) / tMax; -} - -S3L_Unit S3L_distanceManhattan(S3L_Vec4 a, S3L_Vec4 b) { - return S3L_abs(a.x - b.x) + S3L_abs(a.y - b.y) + S3L_abs(a.z - b.z); -} - -void S3L_mat4Xmat4(S3L_Mat4 m1, S3L_Mat4 m2) { - S3L_Mat4 mat1; - - for (uint16_t row = 0; row < 4; ++row) - for (uint16_t col = 0; col < 4; ++col) - mat1[col][row] = m1[col][row]; - - for (uint16_t row = 0; row < 4; ++row) - for (uint16_t col = 0; col < 4; ++col) { - m1[col][row] = 0; - - for (uint16_t i = 0; i < 4; ++i) - m1[col][row] += - (mat1[i][row] * m2[col][i]) / S3L_FRACTIONS_PER_UNIT; - } -} - -S3L_Unit S3L_sin(S3L_Unit x) { - x = S3L_wrap(x / S3L_SIN_TABLE_UNIT_STEP, S3L_SIN_TABLE_LENGTH * 4); - int8_t positive = 1; - - if (x < S3L_SIN_TABLE_LENGTH) { - } else if (x < S3L_SIN_TABLE_LENGTH * 2) { - x = S3L_SIN_TABLE_LENGTH * 2 - x - 1; - } else if (x < S3L_SIN_TABLE_LENGTH * 3) { - x = x - S3L_SIN_TABLE_LENGTH * 2; - positive = 0; - } else { - x = S3L_SIN_TABLE_LENGTH - (x - S3L_SIN_TABLE_LENGTH * 3) - 1; - positive = 0; - } - - return positive ? S3L_sinTable[x] : -1 * S3L_sinTable[x]; -} - -S3L_Unit S3L_asin(S3L_Unit x) { - x = S3L_clamp(x, -S3L_FRACTIONS_PER_UNIT, S3L_FRACTIONS_PER_UNIT); - - int8_t sign = 1; - - if (x < 0) { - sign = -1; - x *= -1; - } - - int16_t low = 0; - int16_t high = S3L_SIN_TABLE_LENGTH - 1; - int16_t middle; - - while (low <= high) // binary search - { - middle = (low + high) / 2; - - S3L_Unit v = S3L_sinTable[middle]; - - if (v > x) - high = middle - 1; - else if (v < x) - low = middle + 1; - else - break; - } - - middle *= S3L_SIN_TABLE_UNIT_STEP; - - return sign * middle; -} - -S3L_Unit S3L_cos(S3L_Unit x) { - return S3L_sin(x + S3L_FRACTIONS_PER_UNIT / 4); -} - -void S3L_correctBarycentricCoords(S3L_Unit barycentric[3]) { - barycentric[0] = S3L_clamp(barycentric[0], 0, S3L_FRACTIONS_PER_UNIT); - barycentric[1] = S3L_clamp(barycentric[1], 0, S3L_FRACTIONS_PER_UNIT); - - S3L_Unit d = S3L_FRACTIONS_PER_UNIT - barycentric[0] - barycentric[1]; - - if (d < 0) { - barycentric[0] += d; - barycentric[2] = 0; - } else - barycentric[2] = d; -} - -void S3L_makeTranslationMat(S3L_Unit offsetX, - S3L_Unit offsetY, - S3L_Unit offsetZ, - S3L_Mat4 m) { -#define M(x, y) m[x][y] -#define S S3L_FRACTIONS_PER_UNIT - - M(0, 0) = S; - M(1, 0) = 0; - M(2, 0) = 0; - M(3, 0) = 0; - M(0, 1) = 0; - M(1, 1) = S; - M(2, 1) = 0; - M(3, 1) = 0; - M(0, 2) = 0; - M(1, 2) = 0; - M(2, 2) = S; - M(3, 2) = 0; - M(0, 3) = offsetX; - M(1, 3) = offsetY; - M(2, 3) = offsetZ; - M(3, 3) = S; - -#undef M -#undef S -} - -void S3L_makeScaleMatrix(S3L_Unit scaleX, - S3L_Unit scaleY, - S3L_Unit scaleZ, - S3L_Mat4 m) { -#define M(x, y) m[x][y] - - M(0, 0) = scaleX; - M(1, 0) = 0; - M(2, 0) = 0; - M(3, 0) = 0; - M(0, 1) = 0; - M(1, 1) = scaleY; - M(2, 1) = 0; - M(3, 1) = 0; - M(0, 2) = 0; - M(1, 2) = 0; - M(2, 2) = scaleZ; - M(3, 2) = 0; - M(0, 3) = 0; - M(1, 3) = 0; - M(2, 3) = 0; - M(3, 3) = S3L_FRACTIONS_PER_UNIT; - -#undef M -} - -void S3L_makeRotationMatrixZXY(S3L_Unit byX, - S3L_Unit byY, - S3L_Unit byZ, - S3L_Mat4 m) { - byX *= -1; - byY *= -1; - byZ *= -1; - - S3L_Unit sx = S3L_sin(byX); - S3L_Unit sy = S3L_sin(byY); - S3L_Unit sz = S3L_sin(byZ); - - S3L_Unit cx = S3L_cos(byX); - S3L_Unit cy = S3L_cos(byY); - S3L_Unit cz = S3L_cos(byZ); - -#define M(x, y) m[x][y] -#define S S3L_FRACTIONS_PER_UNIT - - M(0, 0) = (cy * cz) / S + (sy * sx * sz) / (S * S); - M(1, 0) = (cx * sz) / S; - M(2, 0) = (cy * sx * sz) / (S * S) - (cz * sy) / S; - M(3, 0) = 0; - - M(0, 1) = (cz * sy * sx) / (S * S) - (cy * sz) / S; - M(1, 1) = (cx * cz) / S; - M(2, 1) = (cy * cz * sx) / (S * S) + (sy * sz) / S; - M(3, 1) = 0; - - M(0, 2) = (cx * sy) / S; - M(1, 2) = -1 * sx; - M(2, 2) = (cy * cx) / S; - M(3, 2) = 0; - - M(0, 3) = 0; - M(1, 3) = 0; - M(2, 3) = 0; - M(3, 3) = S3L_FRACTIONS_PER_UNIT; - -#undef M -#undef S -} - -S3L_Unit S3L_sqrt(S3L_Unit value) { - int8_t sign = 1; - - if (value < 0) { - sign = -1; - value *= -1; - } - - uint32_t result = 0; - uint32_t a = value; - uint32_t b = 1u << 30; - - while (b > a) - b >>= 2; - - while (b != 0) { - if (a >= result + b) { - a -= result + b; - result = result + 2 * b; - } - - b >>= 2; - result >>= 1; - } - - return result * sign; -} - -S3L_Unit S3L_vec3Length(S3L_Vec4 v) { - return S3L_sqrt(v.x * v.x + v.y * v.y + v.z * v.z); -} - -S3L_Unit S3L_vec2Length(S3L_Vec4 v) { - return S3L_sqrt(v.x * v.x + v.y * v.y); -} - -void S3L_vec3Normalize(S3L_Vec4* v) { -#define SCALE 16 -#define BOTTOM_LIMIT 16 -#define UPPER_LIMIT 900 - - /* Here we try to decide if the vector is too small and would cause - inaccurate result due to very its inaccurate length. If so, we scale - it up. We can't scale up everything as big vectors overflow in length - calculations. */ - - if (S3L_abs(v->x) <= BOTTOM_LIMIT && S3L_abs(v->y) <= BOTTOM_LIMIT && - S3L_abs(v->z) <= BOTTOM_LIMIT) { - v->x *= SCALE; - v->y *= SCALE; - v->z *= SCALE; - } else if (S3L_abs(v->x) > UPPER_LIMIT || S3L_abs(v->y) > UPPER_LIMIT || - S3L_abs(v->z) > UPPER_LIMIT) { - v->x /= SCALE; - v->y /= SCALE; - v->z /= SCALE; - } - -#undef SCALE -#undef BOTTOM_LIMIT -#undef UPPER_LIMIT - - S3L_Unit l = S3L_vec3Length(*v); - - if (l == 0) - return; - - v->x = (v->x * S3L_FRACTIONS_PER_UNIT) / l; - v->y = (v->y * S3L_FRACTIONS_PER_UNIT) / l; - v->z = (v->z * S3L_FRACTIONS_PER_UNIT) / l; -} - -void S3L_vec3NormalizeFast(S3L_Vec4* v) { - S3L_Unit l = S3L_vec3Length(*v); - - if (l == 0) - return; - - v->x = (v->x * S3L_FRACTIONS_PER_UNIT) / l; - v->y = (v->y * S3L_FRACTIONS_PER_UNIT) / l; - v->z = (v->z * S3L_FRACTIONS_PER_UNIT) / l; -} - -void S3L_transform3DInit(S3L_Transform3D* t) { - S3L_vec4Init(&(t->translation)); - S3L_vec4Init(&(t->rotation)); - t->scale.x = S3L_FRACTIONS_PER_UNIT; - t->scale.y = S3L_FRACTIONS_PER_UNIT; - t->scale.z = S3L_FRACTIONS_PER_UNIT; - t->scale.w = 0; -} - -/** Performs perspecive division (z-divide). Does NOT check for division by - zero. */ -static inline void S3L_perspectiveDivide(S3L_Vec4* vector, - S3L_Unit focalLength) { - vector->x = (vector->x * focalLength) / vector->z; - vector->y = (vector->y * focalLength) / vector->z; -} - -void project3DPointToScreen(S3L_Vec4 point, - S3L_Camera camera, - S3L_Vec4* result) { - S3L_Mat4 m; - S3L_makeCameraMatrix(camera.transform, m); - - S3L_Unit s = point.w; - - point.w = S3L_FRACTIONS_PER_UNIT; - - S3L_vec3Xmat4(&point, m); - - point.z = S3L_nonZero(point.z); - - S3L_perspectiveDivide(&point, camera.focalLength); - - S3L_ScreenCoord x, y; - - S3L_mapProjectionPlaneToScreen(point, &x, &y); - - result->x = x; - result->y = y; - result->z = point.z; - - result->w = (point.z <= 0) ? 0 - : ((s * camera.focalLength * S3L_RESOLUTION_X) / - (point.z * S3L_FRACTIONS_PER_UNIT)); -} - -void S3L_lookAt(S3L_Vec4 pointTo, S3L_Transform3D* t) { - S3L_Vec4 v; - - v.x = pointTo.x - t->translation.x; - v.y = pointTo.z - t->translation.z; - - S3L_Unit dx = v.x; - S3L_Unit l = S3L_vec2Length(v); - - dx = (v.x * S3L_FRACTIONS_PER_UNIT) / S3L_nonZero(l); // normalize - - t->rotation.y = -1 * S3L_asin(dx); - - if (v.y < 0) - t->rotation.y = S3L_FRACTIONS_PER_UNIT / 2 - t->rotation.y; - - v.x = pointTo.y - t->translation.y; - v.y = l; - - l = S3L_vec2Length(v); - - dx = (v.x * S3L_FRACTIONS_PER_UNIT) / S3L_nonZero(l); - - t->rotation.x = S3L_asin(dx); -} - -void S3L_transform3DSet(S3L_Unit tx, - S3L_Unit ty, - S3L_Unit tz, - S3L_Unit rx, - S3L_Unit ry, - S3L_Unit rz, - S3L_Unit sx, - S3L_Unit sy, - S3L_Unit sz, - S3L_Transform3D* t) { - t->translation.x = tx; - t->translation.y = ty; - t->translation.z = tz; - - t->rotation.x = rx; - t->rotation.y = ry; - t->rotation.z = rz; - - t->scale.x = sx; - t->scale.y = sy; - t->scale.z = sz; -} - -void S3L_cameraInit(S3L_Camera* camera) { - camera->focalLength = S3L_FRACTIONS_PER_UNIT; - S3L_transform3DInit(&(camera->transform)); -} - -void S3L_rotationToDirections(S3L_Vec4 rotation, - S3L_Unit length, - S3L_Vec4* forw, - S3L_Vec4* right, - S3L_Vec4* up) { - S3L_Mat4 m; - - S3L_makeRotationMatrixZXY(rotation.x, rotation.y, rotation.z, m); - - if (forw != 0) { - forw->x = 0; - forw->y = 0; - forw->z = length; - S3L_vec3Xmat4(forw, m); - } - - if (right != 0) { - right->x = length; - right->y = 0; - right->z = 0; - S3L_vec3Xmat4(right, m); - } - - if (up != 0) { - up->x = 0; - up->y = length; - up->z = 0; - S3L_vec3Xmat4(up, m); - } -} - -void S3L_pixelInfoInit(S3L_PixelInfo* p) { - p->x = 0; - p->y = 0; - p->barycentric[0] = S3L_FRACTIONS_PER_UNIT; - p->barycentric[1] = 0; - p->barycentric[2] = 0; - p->modelIndex = 0; - p->triangleIndex = 0; - p->triangleID = 0; - p->depth = 0; - p->previousZ = 0; -} - -void S3L_model3DInit(const S3L_Unit* vertices, - S3L_Index vertexCount, - const S3L_Index* triangles, - S3L_Index triangleCount, - S3L_Model3D* model) { - model->vertices = vertices; - model->vertexCount = vertexCount; - model->triangles = triangles; - model->triangleCount = triangleCount; - model->customTransformMatrix = 0; - - S3L_transform3DInit(&(model->transform)); - S3L_drawConfigInit(&(model->config)); -} - -void S3L_sceneInit(S3L_Model3D* models, - S3L_Index modelCount, - S3L_Scene* scene) { - scene->models = models; - scene->modelCount = modelCount; - S3L_cameraInit(&(scene->camera)); -} - -void S3L_drawConfigInit(S3L_DrawConfig* config) { - config->backfaceCulling = 2; - config->visible = 1; -} - -#ifndef S3L_PIXEL_FUNCTION -#error Pixel rendering function (S3L_PIXEL_FUNCTION) not specified! -#endif - -static inline void S3L_PIXEL_FUNCTION(S3L_PixelInfo* pixel); // forward decl - -/** Serves to accelerate linear interpolation for performance-critical - code. Functions such as S3L_interpolate require division to compute each - interpolated value, while S3L_FastLerpState only requires a division for - the initiation and a shift for retrieving each interpolated value. - - S3L_FastLerpState stores a value and a step, both scaled (shifted by - S3L_FAST_LERP_QUALITY) to increase precision. The step is being added to the - value, which achieves the interpolation. This will only be useful for - interpolations in which we need to get the interpolated value in every step. - - BEWARE! Shifting a negative value is undefined, so handling shifting of - negative values has to be done cleverly. */ -typedef struct { - S3L_Unit valueScaled; - S3L_Unit stepScaled; -} S3L_FastLerpState; - -#define S3L_getFastLerpValue(state) (state.valueScaled >> S3L_FAST_LERP_QUALITY) - -#define S3L_stepFastLerp(state) state.valueScaled += state.stepScaled - -static inline S3L_Unit S3L_interpolateBarycentric(S3L_Unit value0, - S3L_Unit value1, - S3L_Unit value2, - S3L_Unit barycentric[3]) { - return ((value0 * barycentric[0]) + (value1 * barycentric[1]) + - (value2 * barycentric[2])) / - S3L_FRACTIONS_PER_UNIT; -} - -void S3L_mapProjectionPlaneToScreen(S3L_Vec4 point, - S3L_ScreenCoord* screenX, - S3L_ScreenCoord* screenY) { - *screenX = S3L_HALF_RESOLUTION_X + - (point.x * S3L_HALF_RESOLUTION_X) / S3L_FRACTIONS_PER_UNIT; - - *screenY = S3L_HALF_RESOLUTION_Y - - (point.y * S3L_HALF_RESOLUTION_X) / S3L_FRACTIONS_PER_UNIT; -} - -void S3L_zBufferClear(void) { -#if S3L_Z_BUFFER - for (uint32_t i = 0; i < S3L_RESOLUTION_X * S3L_RESOLUTION_Y; ++i) - S3L_zBuffer[i] = S3L_MAX_DEPTH; -#endif -} - -void S3L_stencilBufferClear(void) { -#if S3L_STENCIL_BUFFER - for (uint32_t i = 0; i < S3L_STENCIL_BUFFER_SIZE; ++i) - S3L_stencilBuffer[i] = 0; -#endif -} - -void S3L_newFrame(void) { - S3L_zBufferClear(); - S3L_stencilBufferClear(); -} - -/* - the following serves to communicate info about if the triangle has been split - and how the barycentrics should be remapped. -*/ -uint8_t _S3L_projectedTriangleState = 0; // 0 = normal, 1 = cut, 2 = split - -#if S3L_NEAR_CROSS_STRATEGY == 3 -S3L_Vec4 _S3L_triangleRemapBarycentrics[6]; -#endif - -void S3L_drawTriangle(S3L_Vec4 point0, - S3L_Vec4 point1, - S3L_Vec4 point2, - S3L_Index modelIndex, - S3L_Index triangleIndex) { - S3L_PixelInfo p; - S3L_pixelInfoInit(&p); - p.modelIndex = modelIndex; - p.triangleIndex = triangleIndex; - p.triangleID = (modelIndex << 16) | triangleIndex; - - S3L_Vec4 *tPointSS, *lPointSS, *rPointSS; /* points in Screen Space (in - S3L_Units, normalized by - S3L_FRACTIONS_PER_UNIT) */ - - S3L_Unit* barycentric0; // bar. coord that gets higher from L to R - S3L_Unit* barycentric1; // bar. coord that gets higher from R to L - S3L_Unit* barycentric2; // bar. coord that gets higher from bottom up - - // sort the vertices: - -#define assignPoints(t, a, b) \ - { \ - tPointSS = &point##t; \ - barycentric2 = &(p.barycentric[t]); \ - if (S3L_triangleWinding(point##t.x, point##t.y, point##a.x, \ - point##a.y, point##b.x, point##b.y) >= 0) { \ - lPointSS = &point##a; \ - rPointSS = &point##b; \ - barycentric0 = &(p.barycentric[b]); \ - barycentric1 = &(p.barycentric[a]); \ - } else { \ - lPointSS = &point##b; \ - rPointSS = &point##a; \ - barycentric0 = &(p.barycentric[a]); \ - barycentric1 = &(p.barycentric[b]); \ - } \ - } - - if (point0.y <= point1.y) { - if (point0.y <= point2.y) - assignPoints(0, 1, 2) else assignPoints(2, 0, 1) - } else { - if (point1.y <= point2.y) - assignPoints(1, 0, 2) else assignPoints(2, 0, 1) - } - -#undef assignPoints - -#if S3L_FLAT - *barycentric0 = S3L_FRACTIONS_PER_UNIT / 3; - *barycentric1 = S3L_FRACTIONS_PER_UNIT / 3; - *barycentric2 = S3L_FRACTIONS_PER_UNIT - 2 * (S3L_FRACTIONS_PER_UNIT / 3); -#endif - - p.triangleSize[0] = rPointSS->x - lPointSS->x; - p.triangleSize[1] = - (rPointSS->y > lPointSS->y ? rPointSS->y : lPointSS->y) - tPointSS->y; - - // now draw the triangle line by line: - - S3L_ScreenCoord splitY; // Y of the vertically middle point of the triangle - S3L_ScreenCoord endY; // bottom Y of the whole triangle - int splitOnLeft; /* whether splitY is the y coord. of left or right - point */ - - if (rPointSS->y <= lPointSS->y) { - splitY = rPointSS->y; - splitOnLeft = 0; - endY = lPointSS->y; - } else { - splitY = lPointSS->y; - splitOnLeft = 1; - endY = rPointSS->y; - } - - S3L_ScreenCoord currentY = tPointSS->y; - - /* We'll be using an algorithm similar to Bresenham line algorithm. The - specifics of this algorithm are among others: - - - drawing possibly NON-CONTINUOUS line - - NOT tracing the line exactly, but rather rasterizing one the right - side of it, according to the pixel CENTERS, INCLUDING the pixel - centers - - The principle is this: - - - Move vertically by pixels and accumulate the error (abs(dx/dy)). - - If the error is greater than one (crossed the next pixel center), keep - moving horizontally and substracting 1 from the error until it is less - than 1 again. - - To make this INTEGER ONLY, scale the case so that distance between - pixels is equal to dy (instead of 1). This way the error becomes - dx/dy * dy == dx, and we're comparing the error to (and potentially - substracting) 1 * dy == dy. */ - - int16_t - /* triangle side: - left right */ - lX, - rX, // current x position on the screen - lDx, rDx, // dx (end point - start point) - lDy, rDy, // dy (end point - start point) - lInc, rInc, // direction in which to increment (1 or -1) - lErr, rErr, // current error (Bresenham) - lErrCmp, rErrCmp, // helper for deciding comparison (> vs >=) - lErrAdd, rErrAdd, // error value to add in each Bresenham cycle - lErrSub, - rErrSub; // error value to substract when moving in x direction - - S3L_FastLerpState lSideFLS, rSideFLS; - -#if S3L_COMPUTE_LERP_DEPTH - S3L_FastLerpState lDepthFLS, rDepthFLS; - -#define initDepthFLS(s, p1, p2) \ - s##DepthFLS.valueScaled = p1##PointSS->z << S3L_FAST_LERP_QUALITY; \ - s##DepthFLS.stepScaled = ((p2##PointSS->z << S3L_FAST_LERP_QUALITY) - \ - s##DepthFLS.valueScaled) / \ - (s##Dy != 0 ? s##Dy : 1); -#else -#define initDepthFLS(s, p1, p2) ; -#endif - -/* init side for the algorithm, params: - s - which side (l or r) - p1 - point from (t, l or r) - p2 - point to (t, l or r) - down - whether the side coordinate goes top-down or vice versa */ -#define initSide(s, p1, p2, down) \ - s##X = p1##PointSS->x; \ - s##Dx = p2##PointSS->x - p1##PointSS->x; \ - s##Dy = p2##PointSS->y - p1##PointSS->y; \ - initDepthFLS(s, p1, p2) s##SideFLS.stepScaled = \ - (S3L_FRACTIONS_PER_UNIT << S3L_FAST_LERP_QUALITY) / \ - (s##Dy != 0 ? s##Dy : 1); \ - s##SideFLS.valueScaled = 0; \ - if (!down) { \ - s##SideFLS.valueScaled = S3L_FRACTIONS_PER_UNIT \ - << S3L_FAST_LERP_QUALITY; \ - s##SideFLS.stepScaled *= -1; \ - } \ - s##Inc = s##Dx >= 0 ? 1 : -1; \ - if (s##Dx < 0) { \ - s##Err = 0; \ - s##ErrCmp = 0; \ - } else { \ - s##Err = s##Dy; \ - s##ErrCmp = 1; \ - } \ - s##ErrAdd = S3L_abs(s##Dx); \ - s##ErrSub = s##Dy != 0 ? s##Dy : 1; /* don't allow 0, could lead to an \ - infinite substracting loop */ - -#define stepSide(s) \ - while (s##Err - s##Dy >= s##ErrCmp) { \ - s##X += s##Inc; \ - s##Err -= s##ErrSub; \ - } \ - s##Err += s##ErrAdd; - - initSide(r, t, r, 1) initSide(l, t, l, 1) - -#if S3L_PERSPECTIVE_CORRECTION - /* PC is done by linearly interpolating reciprocals from which the corrected - velues can be computed. See - http://www.lysator.liu.se/~mikaelk/doc/perspectivetexture/ */ - -#if S3L_PERSPECTIVE_CORRECTION == 1 -#define Z_RECIP_NUMERATOR \ - (S3L_FRACTIONS_PER_UNIT * S3L_FRACTIONS_PER_UNIT * S3L_FRACTIONS_PER_UNIT) -#elif S3L_PERSPECTIVE_CORRECTION == 2 -#define Z_RECIP_NUMERATOR (S3L_FRACTIONS_PER_UNIT * S3L_FRACTIONS_PER_UNIT) -#endif - /* ^ This numerator is a number by which we divide values for the - reciprocals. For PC == 2 it has to be lower because linear - interpolation scaling would make it overflow -- this results in lower - depth precision in bigger distance for PC == 2. */ - - S3L_Unit tPointRecipZ, - lPointRecipZ, rPointRecipZ, /* Reciprocals of the depth of - each triangle point. */ - lRecip0, lRecip1, rRecip0, rRecip1; /* Helper variables for swapping - the above after split. */ - - tPointRecipZ = Z_RECIP_NUMERATOR / S3L_nonZero(tPointSS->z); - lPointRecipZ = Z_RECIP_NUMERATOR / S3L_nonZero(lPointSS->z); - rPointRecipZ = Z_RECIP_NUMERATOR / S3L_nonZero(rPointSS->z); - - lRecip0 = tPointRecipZ; - lRecip1 = lPointRecipZ; - rRecip0 = tPointRecipZ; - rRecip1 = rPointRecipZ; - -#define manageSplitPerspective(b0, b1) \ - b1##Recip0 = b0##PointRecipZ; \ - b1##Recip1 = b1##PointRecipZ; \ - b0##Recip0 = b0##PointRecipZ; \ - b0##Recip1 = tPointRecipZ; -#else -#define manageSplitPerspective(b0, b1) ; -#endif - - // clip to the screen in y dimension: - - endY = S3L_min(endY, S3L_RESOLUTION_Y); - - /* Clipping above the screen (y < 0) can't be easily done here, will be - handled inside the loop. */ - - while (currentY < endY) /* draw the triangle from top to bottom -- the - bottom-most row is left out because, following - from the rasterization rules (see start of the - file), it is to never be rasterized. */ - { - if (currentY == splitY) // reached a vertical split of the triangle? - { -#define manageSplit(b0, b1, s0, s1) \ - S3L_Unit* tmp = barycentric##b0; \ - barycentric##b0 = barycentric##b1; \ - barycentric##b1 = tmp; \ - s0##SideFLS.valueScaled = \ - (S3L_FRACTIONS_PER_UNIT << S3L_FAST_LERP_QUALITY) - \ - s0##SideFLS.valueScaled; \ - s0##SideFLS.stepScaled *= -1; \ - manageSplitPerspective(s0, s1) - - if (splitOnLeft) { - initSide(l, l, r, 0); - manageSplit(0, 2, r, l) - } else { - initSide(r, r, l, 0); - manageSplit(1, 2, l, r) - } - } - - stepSide(r) stepSide(l) - - if (currentY >= 0) /* clipping of pixels whose y < 0 (can't be - easily done outside the loop because of the - Bresenham-like algorithm steps) */ - { - p.y = currentY; - - // draw the horizontal line - -#if !S3L_FLAT - S3L_Unit rowLength = S3L_nonZero(rX - lX - 1); // prevent zero div - -#if S3L_PERSPECTIVE_CORRECTION - S3L_Unit lOverZ, lRecipZ, rOverZ, rRecipZ, lT, rT; - - lT = S3L_getFastLerpValue(lSideFLS); - rT = S3L_getFastLerpValue(rSideFLS); - - lOverZ = S3L_interpolateByUnitFrom0(lRecip1, lT); - lRecipZ = S3L_interpolateByUnit(lRecip0, lRecip1, lT); - - rOverZ = S3L_interpolateByUnitFrom0(rRecip1, rT); - rRecipZ = S3L_interpolateByUnit(rRecip0, rRecip1, rT); -#else - S3L_FastLerpState b0FLS, b1FLS; - -#if S3L_COMPUTE_LERP_DEPTH - S3L_FastLerpState depthFLS; - - depthFLS.valueScaled = lDepthFLS.valueScaled; - depthFLS.stepScaled = - (rDepthFLS.valueScaled - lDepthFLS.valueScaled) / rowLength; -#endif - - b0FLS.valueScaled = 0; - b1FLS.valueScaled = lSideFLS.valueScaled; - - b0FLS.stepScaled = rSideFLS.valueScaled / rowLength; - b1FLS.stepScaled = -1 * lSideFLS.valueScaled / rowLength; -#endif -#endif - - // clip to the screen in x dimension: - - S3L_ScreenCoord rXClipped = S3L_min(rX, S3L_RESOLUTION_X), - lXClipped = lX; - - if (lXClipped < 0) { - lXClipped = 0; - -#if !S3L_PERSPECTIVE_CORRECTION && !S3L_FLAT - b0FLS.valueScaled -= lX * b0FLS.stepScaled; - b1FLS.valueScaled -= lX * b1FLS.stepScaled; - -#if S3L_COMPUTE_LERP_DEPTH - depthFLS.valueScaled -= lX * depthFLS.stepScaled; -#endif -#endif - } - -#if S3L_PERSPECTIVE_CORRECTION - S3L_ScreenCoord i = lXClipped - lX; /* helper var to save one - substraction in the inner - loop */ -#endif - -#if S3L_PERSPECTIVE_CORRECTION == 2 - S3L_FastLerpState - depthPC, // interpolates depth between row segments - b0PC, // interpolates barycentric0 between row segments - b1PC; // interpolates barycentric1 between row segments - - /* ^ These interpolate values between row segments (lines of pixels - of S3L_PC_APPROX_LENGTH length). After each row segment - perspective correction is recomputed. */ - - depthPC.valueScaled = - (Z_RECIP_NUMERATOR / - S3L_nonZero(S3L_interpolate(lRecipZ, rRecipZ, i, rowLength))) - << S3L_FAST_LERP_QUALITY; - - b0PC.valueScaled = (S3L_interpolateFrom0(rOverZ, i, rowLength) * - depthPC.valueScaled) / - (Z_RECIP_NUMERATOR / S3L_FRACTIONS_PER_UNIT); - - b1PC.valueScaled = - ((lOverZ - S3L_interpolateFrom0(lOverZ, i, rowLength)) * - depthPC.valueScaled) / - (Z_RECIP_NUMERATOR / S3L_FRACTIONS_PER_UNIT); - - int8_t rowCount = S3L_PC_APPROX_LENGTH; -#endif - -#if S3L_Z_BUFFER - uint32_t zBufferIndex = p.y * S3L_RESOLUTION_X + lXClipped; -#endif - - // draw the row -- inner loop: - - for (S3L_ScreenCoord x = lXClipped; x < rXClipped; ++x) { - int8_t testsPassed = 1; - -#if S3L_STENCIL_BUFFER - if (!S3L_stencilTest(x, p.y)) - testsPassed = 0; -#endif - p.x = x; - -#if S3L_COMPUTE_DEPTH -#if S3L_PERSPECTIVE_CORRECTION == 1 - p.depth = - Z_RECIP_NUMERATOR / S3L_nonZero(S3L_interpolate( - lRecipZ, rRecipZ, i, rowLength)); -#elif S3L_PERSPECTIVE_CORRECTION == 2 - if (rowCount >= S3L_PC_APPROX_LENGTH) { - // init the linear interpolation to the next PC correct - // value - - rowCount = 0; - - S3L_Unit nextI = i + S3L_PC_APPROX_LENGTH; - - if (nextI < rowLength) { - S3L_Unit nextDepthScaled = - (Z_RECIP_NUMERATOR / - S3L_nonZero(S3L_interpolate(lRecipZ, rRecipZ, - nextI, rowLength))) - << S3L_FAST_LERP_QUALITY; - - depthPC.stepScaled = - (nextDepthScaled - depthPC.valueScaled) / - S3L_PC_APPROX_LENGTH; - - S3L_Unit nextValue = - (S3L_interpolateFrom0(rOverZ, nextI, rowLength) * - nextDepthScaled) / - (Z_RECIP_NUMERATOR / S3L_FRACTIONS_PER_UNIT); - - b0PC.stepScaled = (nextValue - b0PC.valueScaled) / - S3L_PC_APPROX_LENGTH; - - nextValue = - ((lOverZ - - S3L_interpolateFrom0(lOverZ, nextI, rowLength)) * - nextDepthScaled) / - (Z_RECIP_NUMERATOR / S3L_FRACTIONS_PER_UNIT); - - b1PC.stepScaled = (nextValue - b1PC.valueScaled) / - S3L_PC_APPROX_LENGTH; - } else { - /* A special case where we'd be interpolating outside - the triangle. It seems like a valid approach at - first, but it creates a bug in a case when the - rasaterized triangle is near screen 0 and can - actually never reach the extrapolated screen - position. So we have to clamp to the actual end of - the triangle here. */ - - S3L_Unit maxI = S3L_nonZero(rowLength - i); - - S3L_Unit nextDepthScaled = - (Z_RECIP_NUMERATOR / S3L_nonZero(rRecipZ)) - << S3L_FAST_LERP_QUALITY; - - depthPC.stepScaled = - (nextDepthScaled - depthPC.valueScaled) / maxI; - - S3L_Unit nextValue = - (rOverZ * nextDepthScaled) / - (Z_RECIP_NUMERATOR / S3L_FRACTIONS_PER_UNIT); - - b0PC.stepScaled = (nextValue - b0PC.valueScaled) / maxI; - - b1PC.stepScaled = -1 * b1PC.valueScaled / maxI; - } - } - - p.depth = S3L_getFastLerpValue(depthPC); -#else - p.depth = S3L_getFastLerpValue(depthFLS); - S3L_stepFastLerp(depthFLS); -#endif -#else // !S3L_COMPUTE_DEPTH - p.depth = (tPointSS->z + lPointSS->z + rPointSS->z) / 3; -#endif - -#if S3L_Z_BUFFER - p.previousZ = S3L_zBuffer[zBufferIndex]; - - zBufferIndex++; - - if (!S3L_zTest(p.x, p.y, p.depth)) - testsPassed = 0; -#endif - - if (testsPassed) { -#if !S3L_FLAT -#if S3L_PERSPECTIVE_CORRECTION == 0 - *barycentric0 = S3L_getFastLerpValue(b0FLS); - *barycentric1 = S3L_getFastLerpValue(b1FLS); -#elif S3L_PERSPECTIVE_CORRECTION == 1 - *barycentric0 = - (S3L_interpolateFrom0(rOverZ, i, rowLength) * p.depth) / - (Z_RECIP_NUMERATOR / S3L_FRACTIONS_PER_UNIT); - - *barycentric1 = - ((lOverZ - S3L_interpolateFrom0(lOverZ, i, rowLength)) * - p.depth) / - (Z_RECIP_NUMERATOR / S3L_FRACTIONS_PER_UNIT); -#elif S3L_PERSPECTIVE_CORRECTION == 2 - *barycentric0 = S3L_getFastLerpValue(b0PC); - *barycentric1 = S3L_getFastLerpValue(b1PC); -#endif - - *barycentric2 = - S3L_FRACTIONS_PER_UNIT - *barycentric0 - *barycentric1; -#endif - -#if S3L_NEAR_CROSS_STRATEGY == 3 - - if (_S3L_projectedTriangleState != 0) { - S3L_Unit newBarycentric[3]; - - newBarycentric[0] = S3L_interpolateBarycentric( - _S3L_triangleRemapBarycentrics[0].x, - _S3L_triangleRemapBarycentrics[1].x, - _S3L_triangleRemapBarycentrics[2].x, p.barycentric); - - newBarycentric[1] = S3L_interpolateBarycentric( - _S3L_triangleRemapBarycentrics[0].y, - _S3L_triangleRemapBarycentrics[1].y, - _S3L_triangleRemapBarycentrics[2].y, p.barycentric); - - newBarycentric[2] = S3L_interpolateBarycentric( - _S3L_triangleRemapBarycentrics[0].z, - _S3L_triangleRemapBarycentrics[1].z, - _S3L_triangleRemapBarycentrics[2].z, p.barycentric); - - p.barycentric[0] = newBarycentric[0]; - p.barycentric[1] = newBarycentric[1]; - p.barycentric[2] = newBarycentric[2]; - } -#endif - - S3L_PIXEL_FUNCTION(&p); - } // tests passed - -#if !S3L_FLAT -#if S3L_PERSPECTIVE_CORRECTION - i++; -#if S3L_PERSPECTIVE_CORRECTION == 2 - rowCount++; - - S3L_stepFastLerp(depthPC); - S3L_stepFastLerp(b0PC); - S3L_stepFastLerp(b1PC); -#endif -#else - S3L_stepFastLerp(b0FLS); - S3L_stepFastLerp(b1FLS); -#endif -#endif - } // inner loop - } // y clipping - -#if !S3L_FLAT - S3L_stepFastLerp(lSideFLS); - S3L_stepFastLerp(rSideFLS); - -#if S3L_COMPUTE_LERP_DEPTH - S3L_stepFastLerp(lDepthFLS); - S3L_stepFastLerp(rDepthFLS); -#endif -#endif - - ++currentY; - } // row drawing - -#undef manageSplit -#undef initPC -#undef initSide -#undef stepSide -#undef Z_RECIP_NUMERATOR -} - -void S3L_rotate2DPoint(S3L_Unit* x, S3L_Unit* y, S3L_Unit angle) { - if (angle < S3L_SIN_TABLE_UNIT_STEP) - return; // no visible rotation - - S3L_Unit angleSin = S3L_sin(angle); - S3L_Unit angleCos = S3L_cos(angle); - - S3L_Unit xBackup = *x; - - *x = (angleCos * (*x)) / S3L_FRACTIONS_PER_UNIT - - (angleSin * (*y)) / S3L_FRACTIONS_PER_UNIT; - - *y = (angleSin * xBackup) / S3L_FRACTIONS_PER_UNIT + - (angleCos * (*y)) / S3L_FRACTIONS_PER_UNIT; -} - -void S3L_makeWorldMatrix(S3L_Transform3D worldTransform, S3L_Mat4 m) { - S3L_makeScaleMatrix(worldTransform.scale.x, worldTransform.scale.y, - worldTransform.scale.z, m); - - S3L_Mat4 t; - - S3L_makeRotationMatrixZXY(worldTransform.rotation.x, - worldTransform.rotation.y, - worldTransform.rotation.z, t); - - S3L_mat4Xmat4(m, t); - - S3L_makeTranslationMat(worldTransform.translation.x, - worldTransform.translation.y, - worldTransform.translation.z, t); - - S3L_mat4Xmat4(m, t); -} - -void S3L_mat4Transpose(S3L_Mat4 m) { - S3L_Unit tmp; - - for (uint8_t y = 0; y < 3; ++y) - for (uint8_t x = 1 + y; x < 4; ++x) { - tmp = m[x][y]; - m[x][y] = m[y][x]; - m[y][x] = tmp; - } -} - -void S3L_makeCameraMatrix(S3L_Transform3D cameraTransform, S3L_Mat4 m) { - S3L_makeTranslationMat(-1 * cameraTransform.translation.x, - -1 * cameraTransform.translation.y, - -1 * cameraTransform.translation.z, m); - - S3L_Mat4 r; - - S3L_makeRotationMatrixZXY(cameraTransform.rotation.x, - cameraTransform.rotation.y, - cameraTransform.rotation.z, r); - - S3L_mat4Transpose(r); // transposing creates an inverse transform - - S3L_mat4Xmat4(m, r); -} - -int8_t S3L_triangleWinding(S3L_ScreenCoord x0, - S3L_ScreenCoord y0, - S3L_ScreenCoord x1, - S3L_ScreenCoord y1, - S3L_ScreenCoord x2, - S3L_ScreenCoord y2) { - int32_t winding = (y1 - y0) * (x2 - x1) - (x1 - x0) * (y2 - y1); - // ^ cross product for points with z == 0 - - return winding > 0 ? 1 : (winding < 0 ? -1 : 0); -} - -/** - Checks if given triangle (in Screen Space) is at least partially visible, - i.e. returns false if the triangle is either completely outside the frustum - (left, right, top, bottom, near) or is invisible due to backface culling. -*/ -static inline int8_t S3L_triangleIsVisible(S3L_Vec4 p0, - S3L_Vec4 p1, - S3L_Vec4 p2, - uint8_t backfaceCulling) { -#define clipTest(c, cmp, v) (p0.c cmp(v) && p1.c cmp(v) && p2.c cmp(v)) - - if ( // outside frustum? -#if S3L_NEAR_CROSS_STRATEGY == 0 - p0.z <= S3L_NEAR || p1.z <= S3L_NEAR || p2.z <= S3L_NEAR || - // ^ partially in front of NEAR? -#else - clipTest(z, <=, S3L_NEAR) || // completely in front of NEAR? -#endif - clipTest(x, <, 0) || clipTest(x, >=, S3L_RESOLUTION_X) || - clipTest(y, <, 0) || clipTest(y, >, S3L_RESOLUTION_Y)) - return 0; - -#undef clipTest - - if (backfaceCulling != 0) { - int8_t winding = - S3L_triangleWinding(p0.x, p0.y, p1.x, p1.y, p2.x, p2.y); - - if ((backfaceCulling == 1 && winding > 0) || - (backfaceCulling == 2 && winding < 0)) - return 0; - } - - return 1; -} - -#if S3L_SORT != 0 -typedef struct { - uint8_t modelIndex; - S3L_Index triangleIndex; - uint16_t sortValue; -} _S3L_TriangleToSort; - -_S3L_TriangleToSort S3L_sortArray[S3L_MAX_TRIANGES_DRAWN]; -uint16_t S3L_sortArrayLength; -#endif - -void _S3L_projectVertex(const S3L_Model3D* model, - S3L_Index triangleIndex, - uint8_t vertex, - S3L_Mat4 projectionMatrix, - S3L_Vec4* result) { - uint32_t vertexIndex = model->triangles[triangleIndex * 3 + vertex] * 3; - - result->x = model->vertices[vertexIndex]; - result->y = model->vertices[vertexIndex + 1]; - result->z = model->vertices[vertexIndex + 2]; - result->w = S3L_FRACTIONS_PER_UNIT; // needed for translation - - S3L_vec3Xmat4(result, projectionMatrix); - - result->w = result->z; - /* We'll keep the non-clamped z in w for sorting. */ -} - -void _S3L_mapProjectedVertexToScreen(S3L_Vec4* vertex, S3L_Unit focalLength) { - vertex->z = vertex->z >= S3L_NEAR ? vertex->z : S3L_NEAR; - /* ^ This firstly prevents zero division in the follwoing z-divide and - secondly "pushes" vertices that are in front of near a little bit forward, - which makes them behave a bit better. If all three vertices end up exactly - on NEAR, the triangle will be culled. */ - - S3L_perspectiveDivide(vertex, focalLength); - - S3L_ScreenCoord sX, sY; - - S3L_mapProjectionPlaneToScreen(*vertex, &sX, &sY); - - vertex->x = sX; - vertex->y = sY; -} - -/** - Projects a triangle to the screen. If enabled, a triangle can be potentially - subdivided into two if it crosses the near plane, in which case two projected - triangles are returned (the info about splitting or cutting the triangle is - passed in global variables, see above). -*/ -void _S3L_projectTriangle(const S3L_Model3D* model, - S3L_Index triangleIndex, - S3L_Mat4 matrix, - uint32_t focalLength, - S3L_Vec4 transformed[6]) { - _S3L_projectVertex(model, triangleIndex, 0, matrix, &(transformed[0])); - _S3L_projectVertex(model, triangleIndex, 1, matrix, &(transformed[1])); - _S3L_projectVertex(model, triangleIndex, 2, matrix, &(transformed[2])); - - _S3L_projectedTriangleState = 0; - -#if S3L_NEAR_CROSS_STRATEGY == 2 || S3L_NEAR_CROSS_STRATEGY == 3 - uint8_t infront = 0; - uint8_t behind = 0; - uint8_t infrontI[3]; - uint8_t behindI[3]; - - for (uint8_t i = 0; i < 3; ++i) - if (transformed[i].z < S3L_NEAR) { - infrontI[infront] = i; - infront++; - } else { - behindI[behind] = i; - behind++; - } - -#if S3L_NEAR_CROSS_STRATEGY == 3 - for (int i = 0; i < 3; ++i) - S3L_vec4Init(&(_S3L_triangleRemapBarycentrics[i])); - - _S3L_triangleRemapBarycentrics[0].x = S3L_FRACTIONS_PER_UNIT; - _S3L_triangleRemapBarycentrics[1].y = S3L_FRACTIONS_PER_UNIT; - _S3L_triangleRemapBarycentrics[2].z = S3L_FRACTIONS_PER_UNIT; -#endif - -#define interpolateVertex \ - S3L_Unit ratio = \ - ((transformed[be].z - S3L_NEAR) * S3L_FRACTIONS_PER_UNIT) / \ - (transformed[be].z - transformed[in].z); \ - transformed[in].x = transformed[be].x - \ - ((transformed[be].x - transformed[in].x) * ratio) / \ - S3L_FRACTIONS_PER_UNIT; \ - transformed[in].y = transformed[be].y - \ - ((transformed[be].y - transformed[in].y) * ratio) / \ - S3L_FRACTIONS_PER_UNIT; \ - transformed[in].z = S3L_NEAR; \ - if (beI != 0) { \ - beI->x = (beI->x * ratio) / S3L_FRACTIONS_PER_UNIT; \ - beI->y = (beI->y * ratio) / S3L_FRACTIONS_PER_UNIT; \ - beI->z = (beI->z * ratio) / S3L_FRACTIONS_PER_UNIT; \ - ratio = S3L_FRACTIONS_PER_UNIT - ratio; \ - beI->x += (beB->x * ratio) / S3L_FRACTIONS_PER_UNIT; \ - beI->y += (beB->y * ratio) / S3L_FRACTIONS_PER_UNIT; \ - beI->z += (beB->z * ratio) / S3L_FRACTIONS_PER_UNIT; \ - } - - if (infront == 2) { - // shift the two vertices forward along the edge - for (uint8_t i = 0; i < 2; ++i) { - uint8_t be = behindI[0], in = infrontI[i]; - -#if S3L_NEAR_CROSS_STRATEGY == 3 - S3L_Vec4 *beI = &(_S3L_triangleRemapBarycentrics[in]), - *beB = &(_S3L_triangleRemapBarycentrics[be]); -#else - S3L_Vec4 *beI = 0, *beB = 0; -#endif - - interpolateVertex - - _S3L_projectedTriangleState = 1; - } - } else if (infront == 1) { - // create another triangle and do the shifts - transformed[3] = transformed[behindI[1]]; - transformed[4] = transformed[infrontI[0]]; - transformed[5] = transformed[infrontI[0]]; - -#if S3L_NEAR_CROSS_STRATEGY == 3 - _S3L_triangleRemapBarycentrics[3] = - _S3L_triangleRemapBarycentrics[behindI[1]]; - _S3L_triangleRemapBarycentrics[4] = - _S3L_triangleRemapBarycentrics[infrontI[0]]; - _S3L_triangleRemapBarycentrics[5] = - _S3L_triangleRemapBarycentrics[infrontI[0]]; -#endif - - for (uint8_t i = 0; i < 2; ++i) { - uint8_t be = behindI[i], in = i + 4; - -#if S3L_NEAR_CROSS_STRATEGY == 3 - S3L_Vec4 *beI = &(_S3L_triangleRemapBarycentrics[in]), - *beB = &(_S3L_triangleRemapBarycentrics[be]); -#else - S3L_Vec4 *beI = 0, *beB = 0; -#endif - - interpolateVertex - } - -#if S3L_NEAR_CROSS_STRATEGY == 3 - _S3L_triangleRemapBarycentrics[infrontI[0]] = - _S3L_triangleRemapBarycentrics[4]; -#endif - - transformed[infrontI[0]] = transformed[4]; - - _S3L_mapProjectedVertexToScreen(&transformed[3], focalLength); - _S3L_mapProjectedVertexToScreen(&transformed[4], focalLength); - _S3L_mapProjectedVertexToScreen(&transformed[5], focalLength); - - _S3L_projectedTriangleState = 2; - } - -#undef interpolateVertex -#endif // S3L_NEAR_CROSS_STRATEGY == 2 - - _S3L_mapProjectedVertexToScreen(&transformed[0], focalLength); - _S3L_mapProjectedVertexToScreen(&transformed[1], focalLength); - _S3L_mapProjectedVertexToScreen(&transformed[2], focalLength); -} - -void S3L_drawScene(S3L_Scene scene) { - S3L_Mat4 matFinal, matCamera; - S3L_Vec4 transformed[6]; // transformed triangle coords, for 2 triangles - - const S3L_Model3D* model; - S3L_Index modelIndex, triangleIndex; - - S3L_makeCameraMatrix(scene.camera.transform, matCamera); - -#if S3L_SORT != 0 - uint16_t previousModel = 0; - S3L_sortArrayLength = 0; -#endif - - for (modelIndex = 0; modelIndex < scene.modelCount; ++modelIndex) { - if (!scene.models[modelIndex].config.visible) - continue; - -#if S3L_SORT != 0 - if (S3L_sortArrayLength >= S3L_MAX_TRIANGES_DRAWN) - break; - - previousModel = modelIndex; -#endif - - if (scene.models[modelIndex].customTransformMatrix == 0) - S3L_makeWorldMatrix(scene.models[modelIndex].transform, matFinal); - else { - S3L_Mat4* m = scene.models[modelIndex].customTransformMatrix; - - for (int8_t j = 0; j < 4; ++j) - for (int8_t i = 0; i < 4; ++i) - matFinal[i][j] = (*m)[i][j]; - } - - S3L_mat4Xmat4(matFinal, matCamera); - - S3L_Index triangleCount = scene.models[modelIndex].triangleCount; - - triangleIndex = 0; - - model = &(scene.models[modelIndex]); - - while (triangleIndex < triangleCount) { - /* Some kind of cache could be used in theory to not project - perviously already projected vertices, but after some testing - this was abandoned, no gain was seen. */ - - _S3L_projectTriangle(model, triangleIndex, matFinal, - scene.camera.focalLength, transformed); - - if (S3L_triangleIsVisible(transformed[0], transformed[1], - transformed[2], - model->config.backfaceCulling)) { -#if S3L_SORT == 0 - // without sorting draw right away - S3L_drawTriangle(transformed[0], transformed[1], transformed[2], - modelIndex, triangleIndex); - - if (_S3L_projectedTriangleState == - 2) // draw potential subtriangle - { -#if S3L_NEAR_CROSS_STRATEGY == 3 - _S3L_triangleRemapBarycentrics[0] = - _S3L_triangleRemapBarycentrics[3]; - _S3L_triangleRemapBarycentrics[1] = - _S3L_triangleRemapBarycentrics[4]; - _S3L_triangleRemapBarycentrics[2] = - _S3L_triangleRemapBarycentrics[5]; -#endif - - S3L_drawTriangle(transformed[3], transformed[4], - transformed[5], modelIndex, triangleIndex); - } -#else - - if (S3L_sortArrayLength >= S3L_MAX_TRIANGES_DRAWN) - break; - - // with sorting add to a sort list - S3L_sortArray[S3L_sortArrayLength].modelIndex = modelIndex; - S3L_sortArray[S3L_sortArrayLength].triangleIndex = - triangleIndex; - S3L_sortArray[S3L_sortArrayLength].sortValue = - S3L_zeroClamp(transformed[0].w + transformed[1].w + - transformed[2].w) >> - 2; - /* ^ - The w component here stores non-clamped z. - - As a simple approximation we sort by the triangle center - point, which is a mean coordinate -- we don't actually have - to divide by 3 (or anything), that is unnecessary for - sorting! We shift by 2 just as a fast operation to prevent - overflow of the sum over uint_16t. */ - - S3L_sortArrayLength++; -#endif - } - - triangleIndex++; - } - } - -#if S3L_SORT != 0 - -#if S3L_SORT == 1 -#define cmp < -#else -#define cmp > -#endif - - /* Sort the triangles. We use insertion sort, because it has many - advantages, especially for smaller arrays (better than bubble sort, - in-place, stable, simple, ...). */ - - for (int16_t i = 1; i < S3L_sortArrayLength; ++i) { - _S3L_TriangleToSort tmp = S3L_sortArray[i]; - - int16_t j = i - 1; - - while (j >= 0 && S3L_sortArray[j].sortValue cmp tmp.sortValue) { - S3L_sortArray[j + 1] = S3L_sortArray[j]; - j--; - } - - S3L_sortArray[j + 1] = tmp; - } - -#undef cmp - - for (S3L_Index i = 0; i < S3L_sortArrayLength; - ++i) // draw sorted triangles - { - modelIndex = S3L_sortArray[i].modelIndex; - triangleIndex = S3L_sortArray[i].triangleIndex; - - model = &(scene.models[modelIndex]); - - if (modelIndex != previousModel) { - // only recompute the matrix when the model has changed - S3L_makeWorldMatrix(model->transform, matFinal); - S3L_mat4Xmat4(matFinal, matCamera); - previousModel = modelIndex; - } - - /* Here we project the points again, which is redundant and slow as - they've already been projected above, but saving the projected points - would require a lot of memory, which for small resolutions could be - even worse than z-bufer. So this seems to be the best way - memory-wise. */ - - _S3L_projectTriangle(model, triangleIndex, matFinal, - scene.camera.focalLength, transformed); - - S3L_drawTriangle(transformed[0], transformed[1], transformed[2], - modelIndex, triangleIndex); - - if (_S3L_projectedTriangleState == 2) { -#if S3L_NEAR_CROSS_STRATEGY == 3 - _S3L_triangleRemapBarycentrics[0] = - _S3L_triangleRemapBarycentrics[3]; - _S3L_triangleRemapBarycentrics[1] = - _S3L_triangleRemapBarycentrics[4]; - _S3L_triangleRemapBarycentrics[2] = - _S3L_triangleRemapBarycentrics[5]; -#endif - - S3L_drawTriangle(transformed[3], transformed[4], transformed[5], - modelIndex, triangleIndex); - } - } -#endif -} - -#endif // guard |