@@ -487,8 +487,8 @@ typedef ptrdiff_t FT_PtrDist;
487
487
PCell cell_free ; /* call allocation next free slot */
488
488
PCell cell_limit ; /* cell allocation limit */
489
489
490
- PCell * ycells ; /* array of cell linked-lists, one per */
491
- /* vertical coordinate in the current band. */
490
+ PCell * ycells ; /* array of cell linked-lists; one per */
491
+ /* vertical coordinate in the current band */
492
492
493
493
PCell cells ; /* cell storage area */
494
494
FT_PtrDist max_cells ; /* cell storage capacity */
@@ -513,19 +513,21 @@ typedef ptrdiff_t FT_PtrDist;
513
513
static gray_TWorker ras ;
514
514
#endif
515
515
516
- /* Return a pointer to the "null cell", used as a sentinel at the end */
517
- /* of all ycells[] linked lists. Its x coordinate should be maximal */
518
- /* to ensure no NULL checks are necessary when looking for an insertion */
519
- /* point in gray_set_cell(). Other loops should check the cell pointer */
520
- /* with CELL_IS_NULL() to detect the end of the list. */
521
- #define NULL_CELL_PTR (ras ) (ras).cells
516
+ /*
517
+ * Return a pointer to the 'null cell', used as a sentinel at the end of
518
+ * all `ycells` linked lists. Its x coordinate should be maximal to
519
+ * ensure no NULL checks are necessary when looking for an insertion point
520
+ * in `gray_set_cell`. Other loops should check the cell pointer with
521
+ * CELL_IS_NULL() to detect the end of the list.
522
+ */
523
+ #define NULL_CELL_PTR ( ras ) (ras).cells
522
524
523
- /* The |x| value of the null cell. Must be the largest possible */
524
- /* integer value stored in a TCell.x field. */
525
+ /* The |x| value of the null cell. Must be the largest possible */
526
+ /* integer value stored in a ` TCell.x` field. */
525
527
#define CELL_MAX_X_VALUE INT_MAX
526
528
527
- /* Return true iff |cell| points to the null cell. */
528
- #define CELL_IS_NULL (cell ) ((cell)->x == CELL_MAX_X_VALUE)
529
+ /* Return true iff |cell| points to the null cell. */
530
+ #define CELL_IS_NULL ( cell ) ( (cell)->x == CELL_MAX_X_VALUE )
529
531
530
532
531
533
#define FT_INTEGRATE ( ras , a , b ) \
@@ -556,7 +558,7 @@ typedef ptrdiff_t FT_PtrDist;
556
558
557
559
printf ( "%3d:" , y );
558
560
559
- for ( ; !CELL_IS_NULL (cell ); cell = cell -> next )
561
+ for ( ; !CELL_IS_NULL ( cell ); cell = cell -> next )
560
562
printf ( " (%3d, c:%4d, a:%6d)" ,
561
563
cell -> x , cell -> cover , cell -> area );
562
564
printf ( "\n" );
@@ -584,9 +586,11 @@ typedef ptrdiff_t FT_PtrDist;
584
586
/* Note that if a cell is to the left of the clipping region, it is */
585
587
/* actually set to the (min_ex-1) horizontal position. */
586
588
587
- TCoord ey_index = ey - ras .min_ey ;
589
+ TCoord ey_index = ey - ras .min_ey ;
590
+
591
+
588
592
if ( ey_index < 0 || ey_index >= ras .count_ey || ex >= ras .max_ex )
589
- ras .cell = NULL_CELL_PTR (ras );
593
+ ras .cell = NULL_CELL_PTR ( ras );
590
594
else
591
595
{
592
596
PCell * pcell = ras .ycells + ey_index ;
@@ -610,7 +614,7 @@ typedef ptrdiff_t FT_PtrDist;
610
614
611
615
/* insert new cell */
612
616
cell = ras .cell_free ++ ;
613
- if (cell >= ras .cell_limit )
617
+ if ( cell >= ras .cell_limit )
614
618
ft_longjmp ( ras .jump_buffer , 1 );
615
619
616
620
cell -> x = ex ;
@@ -978,6 +982,7 @@ typedef ptrdiff_t FT_PtrDist;
978
982
}
979
983
980
984
gray_set_cell ( RAS_VAR_ ex1 , ey1 );
985
+
981
986
} while ( ex1 != ex2 || ey1 != ey2 );
982
987
}
983
988
@@ -987,30 +992,37 @@ typedef ptrdiff_t FT_PtrDist;
987
992
FT_INTEGRATE ( ras , fy2 - fy1 , fx1 + fx2 );
988
993
989
994
End :
990
- ras .x = to_x ;
991
- ras .y = to_y ;
995
+ ras .x = to_x ;
996
+ ras .y = to_y ;
992
997
}
993
998
994
999
#endif
995
1000
996
- /* Benchmarking shows that using DDA to flatten the quadratic bezier
997
- * arcs is slightly faster in the following cases:
998
- *
999
- * - When the host CPU is 64-bit.
1000
- * - When SSE2 SIMD registers and instructions are available (even on x86).
1001
- *
1002
- * For other cases, using binary splits is actually slightly faster.
1003
- */
1004
- #if defined(__SSE2__ ) || defined(__x86_64__ ) || defined(__aarch64__ ) || defined(_M_AMD64 ) || defined(_M_ARM64 )
1005
- #define BEZIER_USE_DDA 1
1001
+ /*
1002
+ * Benchmarking shows that using DDA to flatten the quadratic Bézier arcs
1003
+ * is slightly faster in the following cases:
1004
+ *
1005
+ * - When the host CPU is 64-bit.
1006
+ * - When SSE2 SIMD registers and instructions are available (even on
1007
+ * x86).
1008
+ *
1009
+ * For other cases, using binary splits is actually slightly faster.
1010
+ */
1011
+ #if defined( __SSE2__ ) || \
1012
+ defined( __x86_64__ ) || \
1013
+ defined( __aarch64__ ) || \
1014
+ defined( _M_AMD64 ) || \
1015
+ defined( _M_ARM64 )
1016
+ # define BEZIER_USE_DDA 1
1006
1017
#else
1007
- #define BEZIER_USE_DDA 0
1018
+ # define BEZIER_USE_DDA 0
1008
1019
#endif
1009
1020
1021
+
1010
1022
#if BEZIER_USE_DDA
1011
1023
1012
1024
#ifdef __SSE2__
1013
- #include <emmintrin.h>
1025
+ # include <emmintrin.h>
1014
1026
#endif
1015
1027
1016
1028
static void
@@ -1058,8 +1070,8 @@ typedef ptrdiff_t FT_PtrDist;
1058
1070
{
1059
1071
dx >>= 2 ;
1060
1072
shift += 1 ;
1061
- }
1062
- while (dx > ONE_PIXEL / 4 );
1073
+
1074
+ } while ( dx > ONE_PIXEL / 4 );
1063
1075
1064
1076
/*
1065
1077
* The (P0,P1,P2) arc equation, for t in [0,1] range:
@@ -1102,12 +1114,17 @@ typedef ptrdiff_t FT_PtrDist;
1102
1114
* Q << 32 = (2 * B << (32 - N)) + (A << (32 - N - N))
1103
1115
* = (B << (33 - N)) + (A << (32 - N - N))
1104
1116
*/
1117
+
1105
1118
#ifdef __SSE2__
1106
- /* Experience shows that for small shift values, SSE2 is actually slower. */
1107
- if (shift > 2 ) {
1108
- union {
1109
- struct { FT_Int64 ax , ay , bx , by ; } i ;
1110
- struct { __m128i a , b ; } vec ;
1119
+ /* Experience shows that for small shift values, */
1120
+ /* SSE2 is actually slower. */
1121
+ if ( shift > 2 )
1122
+ {
1123
+ union
1124
+ {
1125
+ struct { FT_Int64 ax , ay , bx , by ; } i ;
1126
+ struct { __m128i a , b ; } vec ;
1127
+
1111
1128
} u ;
1112
1129
1113
1130
u .i .ax = p0 .x + p2 .x - 2 * p1 .x ;
@@ -1138,10 +1155,11 @@ typedef ptrdiff_t FT_PtrDist;
1138
1155
p = _mm_add_epi64 (p , q );
1139
1156
q = _mm_add_epi64 (q , r );
1140
1157
1141
- _mm_store_si128 (& v .vec , p );
1158
+ _mm_store_si128 ( & v .vec , p );
1142
1159
1143
- gray_render_line ( RAS_VAR_ v .i .px_hi , v .i .py_hi );
1160
+ gray_render_line ( RAS_VAR_ v .i .px_hi , v .i .py_hi );
1144
1161
}
1162
+
1145
1163
return ;
1146
1164
}
1147
1165
#endif /* !__SSE2__ */
@@ -1167,13 +1185,15 @@ typedef ptrdiff_t FT_PtrDist;
1167
1185
qx += rx ;
1168
1186
qy += ry ;
1169
1187
1170
- gray_render_line ( RAS_VAR_ (FT_Pos )(px >> 32 ), (FT_Pos )(py >> 32 ));
1188
+ gray_render_line ( RAS_VAR_ (FT_Pos )( px >> 32 ),
1189
+ (FT_Pos )( py >> 32 ) );
1171
1190
}
1172
1191
}
1173
1192
1174
1193
#else /* !BEZIER_USE_DDA */
1175
1194
1176
- /* Note that multiple attempts to speed up the function below
1195
+ /*
1196
+ * Note that multiple attempts to speed up the function below
1177
1197
* with SSE2 intrinsics, using various data layouts, have turned
1178
1198
* out to be slower than the non-SIMD code below.
1179
1199
*/
@@ -1264,12 +1284,14 @@ typedef ptrdiff_t FT_PtrDist;
1264
1284
1265
1285
#endif /* !BEZIER_USE_DDA */
1266
1286
1267
- /* For cubic bezier, binary splits are still faster than DDA
1287
+
1288
+ /*
1289
+ * For cubic Bézier, binary splits are still faster than DDA
1268
1290
* because the splits are adaptive to how quickly each sub-arc
1269
1291
* approaches their chord trisection points.
1270
1292
*
1271
1293
* It might be useful to experiment with SSE2 to speed up
1272
- * gray_split_cubic() though.
1294
+ * ` gray_split_cubic`, though.
1273
1295
*/
1274
1296
static void
1275
1297
gray_split_cubic ( FT_Vector * base )
@@ -1361,6 +1383,7 @@ typedef ptrdiff_t FT_PtrDist;
1361
1383
}
1362
1384
}
1363
1385
1386
+
1364
1387
static int
1365
1388
gray_move_to ( const FT_Vector * to ,
1366
1389
gray_PWorker worker )
@@ -1428,7 +1451,7 @@ typedef ptrdiff_t FT_PtrDist;
1428
1451
unsigned char * line = ras .target .origin - ras .target .pitch * y ;
1429
1452
1430
1453
1431
- for ( ; !CELL_IS_NULL (cell ); cell = cell -> next )
1454
+ for ( ; !CELL_IS_NULL ( cell ); cell = cell -> next )
1432
1455
{
1433
1456
if ( cover != 0 && cell -> x > x )
1434
1457
{
@@ -1476,7 +1499,7 @@ typedef ptrdiff_t FT_PtrDist;
1476
1499
TArea area ;
1477
1500
1478
1501
1479
- for ( ; !CELL_IS_NULL (cell ); cell = cell -> next )
1502
+ for ( ; !CELL_IS_NULL ( cell ); cell = cell -> next )
1480
1503
{
1481
1504
if ( cover != 0 && cell -> x > x )
1482
1505
{
@@ -1898,19 +1921,19 @@ typedef ptrdiff_t FT_PtrDist;
1898
1921
/* memory management */
1899
1922
n = ( height * sizeof ( PCell ) + sizeof ( TCell ) - 1 ) / sizeof ( TCell );
1900
1923
1901
- ras .cells = buffer + n ;
1902
- ras .max_cells = (FT_PtrDist )( FT_MAX_GRAY_POOL - n );
1924
+ ras .cells = buffer + n ;
1925
+ ras .max_cells = (FT_PtrDist )( FT_MAX_GRAY_POOL - n );
1903
1926
ras .cell_limit = ras .cells + ras .max_cells ;
1904
- ras .ycells = (PCell * )buffer ;
1927
+ ras .ycells = (PCell * )buffer ;
1905
1928
1906
- /* Initialize the null cell is at the start of the ' cells' array. */
1907
- /* Note that this requires ras.cell_free initialization to skip */
1908
- /* over the first entry in the array. */
1909
- PCell null_cell = NULL_CELL_PTR (ras );
1910
- null_cell -> x = CELL_MAX_X_VALUE ;
1911
- null_cell -> area = 0 ;
1912
- null_cell -> cover = 0 ;
1913
- null_cell -> next = NULL ;;
1929
+ /* Initialize the null cell at the start of the ` cells` array. */
1930
+ /* Note that this requires ` ras.cell_free` initialization to skip */
1931
+ /* over the first entry in the array. */
1932
+ PCell null_cell = NULL_CELL_PTR ( ras );
1933
+ null_cell -> x = CELL_MAX_X_VALUE ;
1934
+ null_cell -> area = 0 ;
1935
+ null_cell -> cover = 0 ;
1936
+ null_cell -> next = NULL ;;
1914
1937
1915
1938
for ( y = yMin ; y < yMax ; )
1916
1939
{
@@ -1928,7 +1951,8 @@ typedef ptrdiff_t FT_PtrDist;
1928
1951
TCoord w ;
1929
1952
int error ;
1930
1953
1931
- for (w = 0 ; w < width ; ++ w )
1954
+
1955
+ for ( w = 0 ; w < width ; ++ w )
1932
1956
ras .ycells [w ] = null_cell ;
1933
1957
1934
1958
ras .cell_free = ras .cells + 1 ; /* NOTE: Skip over the null cell. */
0 commit comments