diff --git a/manual/luatex-nodes.tex b/manual/luatex-nodes.tex
index ad0a74b30ec1a9d19041e8f1ae64f719d1906f98..8ffa9507b2de211eebfc5c1240c8a3d8d4059314 100644
--- a/manual/luatex-nodes.tex
+++ b/manual/luatex-nodes.tex
@@ -384,8 +384,9 @@ Id: \showid{kern}
 \starttabulate[|lT|l|p|]
 \NC \ssbf field \NC \bf type        \NC \bf explanation \NC \NR
 \NC subtype     \NC number          \NC \type {0} = from font,
-                                        \type {1} = from \type {\kern} or \type {\/},
-                                        \type {2} = from \type {\accent} \NC \NR
+                                        \type {1} = from \type {\kern},
+                                        \type {2} = from \type {\accent},
+                                        \type {3} = from \type {\/} \NC \NR
 \NC attr        \NC \syntax{<node>} \NC \NC \NR
 \NC kern        \NC number          \NC \NC \NR
 \stoptabulate
diff --git a/manual/luatex.pdf b/manual/luatex.pdf
index 7a28cb730606ef5fc396299d5a666b1dd6b6e250..ce176f03154b6ba6af4ccaa60a3c5d7259856eaa 100644
Binary files a/manual/luatex.pdf and b/manual/luatex.pdf differ
diff --git a/source/build-aux/texinfo.tex b/source/build-aux/texinfo.tex
index 936c32dc5f431c715fa290b8a9098a6d9629da4b..58021b2c1065aa4e2bf33b8e8b8759220cd434d5 100644
--- a/source/build-aux/texinfo.tex
+++ b/source/build-aux/texinfo.tex
@@ -3,7 +3,7 @@
 % Load plain if necessary, i.e., if running under initex.
 \expandafter\ifx\csname fmtname\endcsname\relax\input plain\fi
 %
-\def\texinfoversion{2016-01-11.19}
+\def\texinfoversion{2016-02-02.07}
 %
 % Copyright 1985, 1986, 1988, 1990, 1991, 1992, 1993, 1994, 1995,
 % 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
@@ -4737,11 +4737,10 @@ end
   \def\_{\normalunderscore}%
   \def\-{}% @- shouldn't affect sorting
   %
-  \def\lbracechar{{\indexlbrace}}%
-  \def\rbracechar{{\indexrbrace}}%
-  \let\{=\lbracechar
-  \let\}=\rbracechar
-  %
+  \uccode`\1=`\{ \uppercase{\def\{{1}}%
+  \uccode`\1=`\} \uppercase{\def\}{1}}%
+  \let\lbracechar\{%
+  \let\rbracechar\}%
   %
   % Non-English letters.
   \def\AA{AA}%
@@ -4901,9 +4900,15 @@ end
   \indexdummies % Must do this here, since \bf, etc expand at this stage
   \useindexbackslash % \indexbackslash isn't defined now so it will be output 
                      % as is; and it will print as backslash.
+  % The braces around \indexbrace are recognized by texindex.
+  %
   % Get the string to sort by, by processing the index entry with all
   % font commands turned off.
   {\indexnofonts
+   \def\lbracechar{{\indexlbrace}}%
+   \def\rbracechar{{\indexrbrace}}%
+   \let\{=\lbracechar
+   \let\}=\rbracechar
    \indexnonalnumdisappear
    \xdef\indexsortkey{}%
    \let\sortas=\indexwritesortas
@@ -8526,10 +8531,6 @@ end
     }%
     \setcolor{\linkcolor}%
   \fi
-  %
-  % Float references are printed completely differently: "Figure 1.2"
-  % instead of "[somenode], p.3".  We distinguish them by the
-  % LABEL-title being set to a magic string.
   {%
     % Have to otherify everything special to allow the \csname to
     % include an _ in the xref name, etc.
@@ -8538,6 +8539,10 @@ end
     \expandafter\global\expandafter\let\expandafter\Xthisreftitle
       \csname XR#1-title\endcsname
   }%
+  %
+  % Float references are printed completely differently: "Figure 1.2"
+  % instead of "[somenode], p.3".  \iffloat distinguishes them by
+  % \Xthisreftitle being set to a magic string.
   \iffloat\Xthisreftitle
     % If the user specified the print name (third arg) to the ref,
     % print it instead of our usual "Figure 1.2".
@@ -8596,12 +8601,9 @@ end
       %
       % output the `page 3'.
       \turnoffactive \putwordpage\tie\refx{#1-pg}{}%
-           \ifx,\tokenafterxref
-      \else\ifx.\tokenafterxref
-      \else\ifx;\tokenafterxref
-      \else\ifx)\tokenafterxref
-      \else,% add a , if xref not followed by punctuation
-      \fi\fi\fi\fi
+      \if\noexpand\tokenafterxref\space
+        ,% add a , if xref not followed by punctuation
+      \fi
     \fi\fi
   \fi
   \endlink
@@ -9948,7 +9950,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{00AD}{\-}
   \DeclareUnicodeCharacter{00AE}{\registeredsymbol}
   \DeclareUnicodeCharacter{00AF}{\={ }}
-
+  %
   \DeclareUnicodeCharacter{00B0}{\ringaccent{ }}
   \DeclareUnicodeCharacter{00B1}{\ensuremath\pm}
   \DeclareUnicodeCharacter{00B2}{$^2$}
@@ -9965,7 +9967,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{00BD}{$1\over2$}
   \DeclareUnicodeCharacter{00BE}{$3\over4$}
   \DeclareUnicodeCharacter{00BF}{\questiondown}
-
+  %
   \DeclareUnicodeCharacter{00C0}{\`A}
   \DeclareUnicodeCharacter{00C1}{\'A}
   \DeclareUnicodeCharacter{00C2}{\^A}
@@ -9982,7 +9984,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{00CD}{\'I}
   \DeclareUnicodeCharacter{00CE}{\^I}
   \DeclareUnicodeCharacter{00CF}{\"I}
-
+  %
   \DeclareUnicodeCharacter{00D0}{\DH}
   \DeclareUnicodeCharacter{00D1}{\~N}
   \DeclareUnicodeCharacter{00D2}{\`O}
@@ -9999,7 +10001,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{00DD}{\'Y}
   \DeclareUnicodeCharacter{00DE}{\TH}
   \DeclareUnicodeCharacter{00DF}{\ss}
-
+  %
   \DeclareUnicodeCharacter{00E0}{\`a}
   \DeclareUnicodeCharacter{00E1}{\'a}
   \DeclareUnicodeCharacter{00E2}{\^a}
@@ -10016,7 +10018,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{00ED}{\'{\dotless{i}}}
   \DeclareUnicodeCharacter{00EE}{\^{\dotless{i}}}
   \DeclareUnicodeCharacter{00EF}{\"{\dotless{i}}}
-
+  %
   \DeclareUnicodeCharacter{00F0}{\dh}
   \DeclareUnicodeCharacter{00F1}{\~n}
   \DeclareUnicodeCharacter{00F2}{\`o}
@@ -10033,7 +10035,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{00FD}{\'y}
   \DeclareUnicodeCharacter{00FE}{\th}
   \DeclareUnicodeCharacter{00FF}{\"y}
-
+  %
   \DeclareUnicodeCharacter{0100}{\=A}
   \DeclareUnicodeCharacter{0101}{\=a}
   \DeclareUnicodeCharacter{0102}{\u{A}}
@@ -10050,7 +10052,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{010D}{\v{c}}
   \DeclareUnicodeCharacter{010E}{\v{D}}
   \DeclareUnicodeCharacter{010F}{d'}
-
+  %
   \DeclareUnicodeCharacter{0110}{\DH}
   \DeclareUnicodeCharacter{0111}{\dh}
   \DeclareUnicodeCharacter{0112}{\=E}
@@ -10067,7 +10069,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{011D}{\^g}
   \DeclareUnicodeCharacter{011E}{\u{G}}
   \DeclareUnicodeCharacter{011F}{\u{g}}
-
+  %
   \DeclareUnicodeCharacter{0120}{\dotaccent{G}}
   \DeclareUnicodeCharacter{0121}{\dotaccent{g}}
   \DeclareUnicodeCharacter{0122}{\cedilla{G}}
@@ -10084,7 +10086,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{012D}{\u{\dotless{i}}}
   \DeclareUnicodeCharacter{012E}{\ogonek{I}}
   \DeclareUnicodeCharacter{012F}{\ogonek{i}}
-
+  %
   \DeclareUnicodeCharacter{0130}{\dotaccent{I}}
   \DeclareUnicodeCharacter{0131}{\dotless{i}}
   \DeclareUnicodeCharacter{0132}{IJ}
@@ -10101,7 +10103,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{013D}{L'}% should kern
   \DeclareUnicodeCharacter{013E}{l'}% should kern
   \DeclareUnicodeCharacter{013F}{L\U{00B7}}
-
+  %
   \DeclareUnicodeCharacter{0140}{l\U{00B7}}
   \DeclareUnicodeCharacter{0141}{\L}
   \DeclareUnicodeCharacter{0142}{\l}
@@ -10118,7 +10120,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{014D}{\=o}
   \DeclareUnicodeCharacter{014E}{\u{O}}
   \DeclareUnicodeCharacter{014F}{\u{o}}
-
+  %
   \DeclareUnicodeCharacter{0150}{\H{O}}
   \DeclareUnicodeCharacter{0151}{\H{o}}
   \DeclareUnicodeCharacter{0152}{\OE}
@@ -10135,7 +10137,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{015D}{\^s}
   \DeclareUnicodeCharacter{015E}{\cedilla{S}}
   \DeclareUnicodeCharacter{015F}{\cedilla{s}}
-
+  %
   \DeclareUnicodeCharacter{0160}{\v{S}}
   \DeclareUnicodeCharacter{0161}{\v{s}}
   \DeclareUnicodeCharacter{0162}{\cedilla{T}}
@@ -10152,7 +10154,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{016D}{\u{u}}
   \DeclareUnicodeCharacter{016E}{\ringaccent{U}}
   \DeclareUnicodeCharacter{016F}{\ringaccent{u}}
-
+  %
   \DeclareUnicodeCharacter{0170}{\H{U}}
   \DeclareUnicodeCharacter{0171}{\H{u}}
   \DeclareUnicodeCharacter{0172}{\ogonek{U}}
@@ -10169,7 +10171,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{017D}{\v{Z}}
   \DeclareUnicodeCharacter{017E}{\v{z}}
   \DeclareUnicodeCharacter{017F}{\missingcharmsg{LONG S}}
-
+  %
   \DeclareUnicodeCharacter{01C4}{D\v{Z}}
   \DeclareUnicodeCharacter{01C5}{D\v{z}}
   \DeclareUnicodeCharacter{01C6}{d\v{z}}
@@ -10182,20 +10184,20 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{01CD}{\v{A}}
   \DeclareUnicodeCharacter{01CE}{\v{a}}
   \DeclareUnicodeCharacter{01CF}{\v{I}}
-
+  %
   \DeclareUnicodeCharacter{01D0}{\v{\dotless{i}}}
   \DeclareUnicodeCharacter{01D1}{\v{O}}
   \DeclareUnicodeCharacter{01D2}{\v{o}}
   \DeclareUnicodeCharacter{01D3}{\v{U}}
   \DeclareUnicodeCharacter{01D4}{\v{u}}
-
+  %
   \DeclareUnicodeCharacter{01E2}{\={\AE}}
   \DeclareUnicodeCharacter{01E3}{\={\ae}}
   \DeclareUnicodeCharacter{01E6}{\v{G}}
   \DeclareUnicodeCharacter{01E7}{\v{g}}
   \DeclareUnicodeCharacter{01E8}{\v{K}}
   \DeclareUnicodeCharacter{01E9}{\v{k}}
-
+  %
   \DeclareUnicodeCharacter{01F0}{\v{\dotless{j}}}
   \DeclareUnicodeCharacter{01F1}{DZ}
   \DeclareUnicodeCharacter{01F2}{Dz}
@@ -10208,23 +10210,23 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{01FD}{\'{\ae}}
   \DeclareUnicodeCharacter{01FE}{\'{\O}}
   \DeclareUnicodeCharacter{01FF}{\'{\o}}
-
+  %
   \DeclareUnicodeCharacter{021E}{\v{H}}
   \DeclareUnicodeCharacter{021F}{\v{h}}
-
+  %
   \DeclareUnicodeCharacter{0226}{\dotaccent{A}}
   \DeclareUnicodeCharacter{0227}{\dotaccent{a}}
   \DeclareUnicodeCharacter{0228}{\cedilla{E}}
   \DeclareUnicodeCharacter{0229}{\cedilla{e}}
   \DeclareUnicodeCharacter{022E}{\dotaccent{O}}
   \DeclareUnicodeCharacter{022F}{\dotaccent{o}}
-
+  %
   \DeclareUnicodeCharacter{0232}{\=Y}
   \DeclareUnicodeCharacter{0233}{\=y}
   \DeclareUnicodeCharacter{0237}{\dotless{j}}
-
+  %
   \DeclareUnicodeCharacter{02DB}{\ogonek{ }}
-
+  %
   % Greek letters upper case
   \DeclareUnicodeCharacter{0391}{{\it A}}
   \DeclareUnicodeCharacter{0392}{{\it B}}
@@ -10251,7 +10253,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{03A7}{{\it X}}
   \DeclareUnicodeCharacter{03A8}{\ensuremath{\mit\Psi}}
   \DeclareUnicodeCharacter{03A9}{\ensuremath{\mit\Omega}}
-
+  %
   % Vowels with accents
   \DeclareUnicodeCharacter{0390}{\ensuremath{\ddot{\acute\iota}}}
   \DeclareUnicodeCharacter{03AC}{\ensuremath{\acute\alpha}}
@@ -10259,10 +10261,10 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{03AE}{\ensuremath{\acute\eta}}
   \DeclareUnicodeCharacter{03AF}{\ensuremath{\acute\iota}}
   \DeclareUnicodeCharacter{03B0}{\ensuremath{\acute{\ddot\upsilon}}}
-
+  %
   % Standalone accent
   \DeclareUnicodeCharacter{0384}{\ensuremath{\acute{\ }}}
-
+  %
   % Greek letters lower case
   \DeclareUnicodeCharacter{03B1}{\ensuremath\alpha}
   \DeclareUnicodeCharacter{03B2}{\ensuremath\beta}
@@ -10289,19 +10291,19 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{03C7}{\ensuremath\chi}
   \DeclareUnicodeCharacter{03C8}{\ensuremath\psi}
   \DeclareUnicodeCharacter{03C9}{\ensuremath\omega}
-
+  %
   % More Greek vowels with accents
   \DeclareUnicodeCharacter{03CA}{\ensuremath{\ddot\iota}}
   \DeclareUnicodeCharacter{03CB}{\ensuremath{\ddot\upsilon}}
   \DeclareUnicodeCharacter{03CC}{\ensuremath{\acute o}}
   \DeclareUnicodeCharacter{03CD}{\ensuremath{\acute\upsilon}}
   \DeclareUnicodeCharacter{03CE}{\ensuremath{\acute\omega}}
-
+  %
   % Variant Greek letters
   \DeclareUnicodeCharacter{03D1}{\ensuremath\vartheta}
   \DeclareUnicodeCharacter{03D6}{\ensuremath\varpi}
   \DeclareUnicodeCharacter{03F1}{\ensuremath\varrho}
-
+  %
   \DeclareUnicodeCharacter{1E02}{\dotaccent{B}}
   \DeclareUnicodeCharacter{1E03}{\dotaccent{b}}
   \DeclareUnicodeCharacter{1E04}{\udotaccent{B}}
@@ -10314,10 +10316,10 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{1E0D}{\udotaccent{d}}
   \DeclareUnicodeCharacter{1E0E}{\ubaraccent{D}}
   \DeclareUnicodeCharacter{1E0F}{\ubaraccent{d}}
-
+  %
   \DeclareUnicodeCharacter{1E1E}{\dotaccent{F}}
   \DeclareUnicodeCharacter{1E1F}{\dotaccent{f}}
-
+  %
   \DeclareUnicodeCharacter{1E20}{\=G}
   \DeclareUnicodeCharacter{1E21}{\=g}
   \DeclareUnicodeCharacter{1E22}{\dotaccent{H}}
@@ -10326,7 +10328,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{1E25}{\udotaccent{h}}
   \DeclareUnicodeCharacter{1E26}{\"H}
   \DeclareUnicodeCharacter{1E27}{\"h}
-
+  %
   \DeclareUnicodeCharacter{1E30}{\'K}
   \DeclareUnicodeCharacter{1E31}{\'k}
   \DeclareUnicodeCharacter{1E32}{\udotaccent{K}}
@@ -10339,7 +10341,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{1E3B}{\ubaraccent{l}}
   \DeclareUnicodeCharacter{1E3E}{\'M}
   \DeclareUnicodeCharacter{1E3F}{\'m}
-
+  %
   \DeclareUnicodeCharacter{1E40}{\dotaccent{M}}
   \DeclareUnicodeCharacter{1E41}{\dotaccent{m}}
   \DeclareUnicodeCharacter{1E42}{\udotaccent{M}}
@@ -10350,7 +10352,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{1E47}{\udotaccent{n}}
   \DeclareUnicodeCharacter{1E48}{\ubaraccent{N}}
   \DeclareUnicodeCharacter{1E49}{\ubaraccent{n}}
-
+  %
   \DeclareUnicodeCharacter{1E54}{\'P}
   \DeclareUnicodeCharacter{1E55}{\'p}
   \DeclareUnicodeCharacter{1E56}{\dotaccent{P}}
@@ -10361,7 +10363,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{1E5B}{\udotaccent{r}}
   \DeclareUnicodeCharacter{1E5E}{\ubaraccent{R}}
   \DeclareUnicodeCharacter{1E5F}{\ubaraccent{r}}
-
+  %
   \DeclareUnicodeCharacter{1E60}{\dotaccent{S}}
   \DeclareUnicodeCharacter{1E61}{\dotaccent{s}}
   \DeclareUnicodeCharacter{1E62}{\udotaccent{S}}
@@ -10372,12 +10374,12 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{1E6D}{\udotaccent{t}}
   \DeclareUnicodeCharacter{1E6E}{\ubaraccent{T}}
   \DeclareUnicodeCharacter{1E6F}{\ubaraccent{t}}
-
+  %
   \DeclareUnicodeCharacter{1E7C}{\~V}
   \DeclareUnicodeCharacter{1E7D}{\~v}
   \DeclareUnicodeCharacter{1E7E}{\udotaccent{V}}
   \DeclareUnicodeCharacter{1E7F}{\udotaccent{v}}
-
+  %
   \DeclareUnicodeCharacter{1E80}{\`W}
   \DeclareUnicodeCharacter{1E81}{\`w}
   \DeclareUnicodeCharacter{1E82}{\'W}
@@ -10394,7 +10396,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{1E8D}{\"x}
   \DeclareUnicodeCharacter{1E8E}{\dotaccent{Y}}
   \DeclareUnicodeCharacter{1E8F}{\dotaccent{y}}
-
+  %
   \DeclareUnicodeCharacter{1E90}{\^Z}
   \DeclareUnicodeCharacter{1E91}{\^z}
   \DeclareUnicodeCharacter{1E92}{\udotaccent{Z}}
@@ -10405,30 +10407,30 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{1E97}{\"t}
   \DeclareUnicodeCharacter{1E98}{\ringaccent{w}}
   \DeclareUnicodeCharacter{1E99}{\ringaccent{y}}
-
+  %
   \DeclareUnicodeCharacter{1EA0}{\udotaccent{A}}
   \DeclareUnicodeCharacter{1EA1}{\udotaccent{a}}
-
+  %
   \DeclareUnicodeCharacter{1EB8}{\udotaccent{E}}
   \DeclareUnicodeCharacter{1EB9}{\udotaccent{e}}
   \DeclareUnicodeCharacter{1EBC}{\~E}
   \DeclareUnicodeCharacter{1EBD}{\~e}
-
+  %
   \DeclareUnicodeCharacter{1ECA}{\udotaccent{I}}
   \DeclareUnicodeCharacter{1ECB}{\udotaccent{i}}
   \DeclareUnicodeCharacter{1ECC}{\udotaccent{O}}
   \DeclareUnicodeCharacter{1ECD}{\udotaccent{o}}
-
+  %
   \DeclareUnicodeCharacter{1EE4}{\udotaccent{U}}
   \DeclareUnicodeCharacter{1EE5}{\udotaccent{u}}
-
+  %
   \DeclareUnicodeCharacter{1EF2}{\`Y}
   \DeclareUnicodeCharacter{1EF3}{\`y}
   \DeclareUnicodeCharacter{1EF4}{\udotaccent{Y}}
-
+  %
   \DeclareUnicodeCharacter{1EF8}{\~Y}
   \DeclareUnicodeCharacter{1EF9}{\~y}
-
+  %
   % Punctuation
   \DeclareUnicodeCharacter{2013}{--}
   \DeclareUnicodeCharacter{2014}{---}
@@ -10445,12 +10447,12 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{2026}{\dots}
   \DeclareUnicodeCharacter{2039}{\guilsinglleft}
   \DeclareUnicodeCharacter{203A}{\guilsinglright}
-
+  %
   \DeclareUnicodeCharacter{20AC}{\euro}
-
+  %
   \DeclareUnicodeCharacter{2192}{\expansion}
   \DeclareUnicodeCharacter{21D2}{\result}
-
+  %
   % Mathematical symbols
   \DeclareUnicodeCharacter{2200}{\ensuremath\forall}
   \DeclareUnicodeCharacter{2203}{\ensuremath\exists}
@@ -10466,7 +10468,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{2265}{\ensuremath\geq}
   \DeclareUnicodeCharacter{2282}{\ensuremath\subset}
   \DeclareUnicodeCharacter{2287}{\ensuremath\supseteq}
-
+  %
   \DeclareUnicodeCharacter{2016}{\ensuremath\Vert}
   \DeclareUnicodeCharacter{2032}{\ensuremath\prime}
   \DeclareUnicodeCharacter{210F}{\ensuremath\hbar}
@@ -10566,7 +10568,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{230B}{\ensuremath\rfloor}
   \DeclareUnicodeCharacter{2322}{\ensuremath\frown}
   \DeclareUnicodeCharacter{2323}{\ensuremath\smile}
-
+  %
   \DeclareUnicodeCharacter{25A1}{\ensuremath\Box}
   \DeclareUnicodeCharacter{25B3}{\ensuremath\triangle}
   \DeclareUnicodeCharacter{25B7}{\ensuremath\triangleright}
@@ -10598,7 +10600,7 @@ callback.register("process_output_buffer", convert_line_out)
   \DeclareUnicodeCharacter{2A3F}{\ensuremath\amalg}
   \DeclareUnicodeCharacter{2AAF}{\ensuremath\preceq}
   \DeclareUnicodeCharacter{2AB0}{\ensuremath\succeq}
-
+  %
   \global\mathchardef\checkmark="1370 % actually the square root sign
   \DeclareUnicodeCharacter{2713}{\ensuremath\checkmark}
 }% end of \utfeightchardefs
diff --git a/source/libs/README b/source/libs/README
index f44914f56dd91683c77198b80cc6ef314cfeeea5..bf39eca377f7cf7fee8831ed1334151013819c6e 100644
--- a/source/libs/README
+++ b/source/libs/README
@@ -1,4 +1,4 @@
-$Id: README 39398 2016-01-15 23:53:35Z kakuto $
+$Id: README 39575 2016-02-04 08:42:42Z kakuto $
 Public domain.  Originally created by Karl Berry, 2005.
 
 Libraries we compile for TeX Live.
@@ -21,7 +21,7 @@ gd 2.1.1 - checked 14jan15
 gmp 6.1.0 - checked 28dec15
   http://ftp.gnu.org/gnu/gmp/
 
-graphite2 1.3.3 - checked 27sep15
+graphite2 1.3.5 - checked 20jan16
   http://sourceforge.net/projects/silgraphite/files/graphite2/
 
 harfbuzz 1.1.3 - checked 12jan16
@@ -46,7 +46,7 @@ luajit 2.1.0-beta1 - checked 7sep15
 mpfr 3.1.3 - checked 20jun15
   http://ftp.gnu.org/gnu/mpfr/
 
-pixman 0.32.8 - checked 27sep15
+pixman 0.34.0 - checked 04feb16
   http://cairographics.org/releases/
 
 poppler 0.40.0 - checked 14jan16
diff --git a/source/libs/libpng/ChangeLog b/source/libs/libpng/ChangeLog
index 466208698fbd645ad3b07705b285fcf71e2d235d..a2d86e82d611b8b472da165d95e33b42909006b9 100644
--- a/source/libs/libpng/ChangeLog
+++ b/source/libs/libpng/ChangeLog
@@ -1,3 +1,8 @@
+2016-01-20  Karl Berry  <karl@tug.org>
+
+	* Makefile.am (AM_CPPFLAGS): add LIBPNG_USER_CPPFLAGS for Mojca.
+	http://tug.org/pipermail/tlbuild/2016q1/003327.html
+
 2016-01-15 Akira Kakuto  <kakuto@fuk.kindai.ac.jp>
 
 	Import libpng-1.6.21.
diff --git a/source/libs/libpng/Makefile.am b/source/libs/libpng/Makefile.am
index 6bb6c1726b785e9d59a52be93f610702d8d95690..805a6977de193f5e256c45474c380ac40e7a388c 100644
--- a/source/libs/libpng/Makefile.am
+++ b/source/libs/libpng/Makefile.am
@@ -1,6 +1,6 @@
 ## Proxy Makefile.am to build libpng for TeX Live.
 ##
-##   Copyright (C) 2009-2015 Peter Breitenlohner <tex-live@tug.org>
+##   Copyright (C) 2009-2016 Peter Breitenlohner <tex-live@tug.org>
 ##
 ##   This file is free software; the copyright holder
 ##   gives unlimited permission to copy and/or distribute it,
@@ -20,7 +20,8 @@ NEVER_NAMES += $(NEVER_NAMES_SUB)
 
 SUBDIRS = . include
 
-AM_CPPFLAGS = -I$(top_srcdir)/$(LIBPNG_TREE) $(ZLIB_INCLUDES) $(LIBPNG_DEFINES)
+AM_CPPFLAGS = -I$(top_srcdir)/$(LIBPNG_TREE) $(ZLIB_INCLUDES) \
+              $(LIBPNG_DEFINES) $(LIBPNG_USER_CPPFLAGS)
 AM_CFLAGS = $(VISIBILITY_CFLAGS) $(WARNING_CFLAGS)
 
 noinst_LIBRARIES=libpng.a
diff --git a/source/libs/libpng/Makefile.in b/source/libs/libpng/Makefile.in
index cc01ba72aca745972c3c65ba571eaa278c36e318..7d66dedf22c7d5bc916e3e09088e7668a79061a4 100644
--- a/source/libs/libpng/Makefile.in
+++ b/source/libs/libpng/Makefile.in
@@ -569,7 +569,9 @@ NEVER_NAMES = -name .svn $(NEVER_NAMES_SUB)
 NEVER_NAMES_SUB = -o -name .deps -o -name .dirstamp -o -name '*.$(OBJEXT)'
 NEVER_NAMES_LT = -o -name .libs -o -name '*.lo'
 SUBDIRS = . include
-AM_CPPFLAGS = -I$(top_srcdir)/$(LIBPNG_TREE) $(ZLIB_INCLUDES) $(LIBPNG_DEFINES)
+AM_CPPFLAGS = -I$(top_srcdir)/$(LIBPNG_TREE) $(ZLIB_INCLUDES) \
+              $(LIBPNG_DEFINES) $(LIBPNG_USER_CPPFLAGS)
+
 AM_CFLAGS = $(VISIBILITY_CFLAGS) $(WARNING_CFLAGS)
 noinst_LIBRARIES = libpng.a
 nodist_libpng_a_SOURCES = \
diff --git a/source/libs/pixman/ChangeLog b/source/libs/pixman/ChangeLog
index c4c881f57633ab78725f769a735c5837ec6c4e0f..40bddee164f04299ee1c5da2ecabc714562c2de0 100644
--- a/source/libs/pixman/ChangeLog
+++ b/source/libs/pixman/ChangeLog
@@ -1,3 +1,9 @@
+2016-02-04  Akira Kakuto  <kakuto@fuk.kindai.ac.jp>
+
+	Import pixman-0.34.0.
+	* version.ac: Adapted.
+	* configure.ac: New source tree convension.
+
 2015-09-26  Peter Breitenlohner  <peb@mppmu.mpg.de>
 
 	Import pixman-0.32.8.
diff --git a/source/libs/pixman/Makefile.in b/source/libs/pixman/Makefile.in
index b1ba8a5df916057fd077ae411ab642205fa637c7..b0a5a656416d66f7110c5b3e865ea2dc4cd3ccb7 100644
--- a/source/libs/pixman/Makefile.in
+++ b/source/libs/pixman/Makefile.in
@@ -440,7 +440,7 @@ am__DIST_COMMON = $(srcdir)/../../am/dist_hook.am \
 	$(top_srcdir)/../../build-aux/install-sh \
 	$(top_srcdir)/../../build-aux/missing \
 	$(top_srcdir)/../../build-aux/test-driver \
-	$(top_srcdir)/pixman-0.32.8/pixman/pixman-version.h.in \
+	$(top_srcdir)/pixman-src/pixman/pixman-version.h.in \
 	../../build-aux/compile ../../build-aux/config.guess \
 	../../build-aux/config.sub ../../build-aux/depcomp \
 	../../build-aux/install-sh ../../build-aux/ltmain.sh \
@@ -702,7 +702,7 @@ $(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
 
 distclean-hdr:
 	-rm -f config.h stamp-h1
-pixman-version.h: $(top_builddir)/config.status $(top_srcdir)/pixman-0.32.8/pixman/pixman-version.h.in
+pixman-version.h: $(top_builddir)/config.status $(top_srcdir)/pixman-src/pixman/pixman-version.h.in
 	cd $(top_builddir) && $(SHELL) ./config.status $@
 
 clean-noinstLIBRARIES:
diff --git a/source/libs/pixman/README b/source/libs/pixman/README
index 99988a75e746561127c281eb6347847afdc2fc45..7fb53afe2116f5732c37a45f844d0c3fe67d258e 100644
--- a/source/libs/pixman/README
+++ b/source/libs/pixman/README
@@ -1,14 +1,15 @@
-	Building pixman-0.28.0 as part of the TL tree
+	Building pixman-0.34.0 as part of the TL tree
 	=============================================
 
 This directory libs/pixman/ uses a proxy Makefile.am to build the pixman
 library 'libpixman' from the unmodified source tree in
-libs/pixman/pixman-x.y.z/, bypassing the original build system.
+libs/pixman/pixman-src/, bypassing the original build system.
 
-As far as applicable, the tests in libs/pixman/pixman-x.y.z/configure have
+As far as applicable, the tests in libs/pixman/pixman-src/configure have
 been translated into equivalent test in libs/pixman/configure.ac.
 
 =============================
 
 2012-11-10	Taco Hoekwater <taco@metatex.org>
 2012-11-15	Peter Breitenlohner <peb@mppmu.mpg.de>
+2016-02-04	Akira Kakuto <kakuto@fuk.kindai.ac.jp>
diff --git a/source/libs/pixman/configure b/source/libs/pixman/configure
index 57e14f3dcf03c4de973aedd024fef3488c0ee9af..92f9ad086238385f14fb8fbaacb9c5f8e58c0c7c 100755
--- a/source/libs/pixman/configure
+++ b/source/libs/pixman/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for pixman (TeX Live) 0.32.8.
+# Generated by GNU Autoconf 2.69 for pixman (TeX Live) 0.34.0.
 #
 # Report bugs to <tex-k@tug.org>.
 #
@@ -580,12 +580,12 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='pixman (TeX Live)'
 PACKAGE_TARNAME='pixman--tex-live-'
-PACKAGE_VERSION='0.32.8'
-PACKAGE_STRING='pixman (TeX Live) 0.32.8'
+PACKAGE_VERSION='0.34.0'
+PACKAGE_STRING='pixman (TeX Live) 0.34.0'
 PACKAGE_BUGREPORT='tex-k@tug.org'
 PACKAGE_URL=''
 
-ac_unique_file="pixman-0.32.8/pixman/pixman.h"
+ac_unique_file="pixman-src/pixman/pixman.h"
 # Factoring default headers for most tests.
 ac_includes_default="\
 #include <stdio.h>
@@ -1281,7 +1281,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures pixman (TeX Live) 0.32.8 to adapt to many kinds of systems.
+\`configure' configures pixman (TeX Live) 0.34.0 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1348,7 +1348,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of pixman (TeX Live) 0.32.8:";;
+     short | recursive ) echo "Configuration of pixman (TeX Live) 0.34.0:";;
    esac
   cat <<\_ACEOF
 
@@ -1445,7 +1445,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-pixman (TeX Live) configure 0.32.8
+pixman (TeX Live) configure 0.34.0
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1952,7 +1952,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by pixman (TeX Live) $as_me 0.32.8, which was
+It was created by pixman (TeX Live) $as_me 0.34.0, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -3875,7 +3875,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='pixman--tex-live-'
- VERSION='0.32.8'
+ VERSION='0.34.0'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -4077,8 +4077,8 @@ WARNING_CFLAGS=$kpse_cv_warning_cflags
 
 
 PIXMAN_VERSION_MAJOR=0
-PIXMAN_VERSION_MINOR=32
-PIXMAN_VERSION_MICRO=8
+PIXMAN_VERSION_MINOR=34
+PIXMAN_VERSION_MICRO=0
 
 test_CFLAGS=${CFLAGS+set} # We may override autoconf default CFLAGS.
 
@@ -6032,14 +6032,14 @@ rm -f core conftest.err conftest.$ac_objext \
 $as_echo "$_yesno" >&6; }
 
 
-PIXMAN_TREE=pixman-0.32.8
+PIXMAN_TREE=pixman-src
 
 
 if test -f $srcdir/$PIXMAN_TREE/pixman/pixman-version.h; then
   as_fn_error $? "Sorry, you must remove the file $PIXMAN_TREE/pixman/pixman-version.h" "$LINENO" 5
 fi
 
-ac_config_files="$ac_config_files Makefile include/Makefile pixman-version.h:pixman-0.32.8/pixman/pixman-version.h.in"
+ac_config_files="$ac_config_files Makefile include/Makefile pixman-version.h:pixman-src/pixman/pixman-version.h.in"
 
 
 cat >confcache <<\_ACEOF
@@ -6585,7 +6585,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by pixman (TeX Live) $as_me 0.32.8, which was
+This file was extended by pixman (TeX Live) $as_me 0.34.0, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -6651,7 +6651,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-pixman (TeX Live) config.status 0.32.8
+pixman (TeX Live) config.status 0.34.0
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
@@ -6784,7 +6784,7 @@ do
     "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
     "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
     "include/Makefile") CONFIG_FILES="$CONFIG_FILES include/Makefile" ;;
-    "pixman-version.h") CONFIG_FILES="$CONFIG_FILES pixman-version.h:pixman-0.32.8/pixman/pixman-version.h.in" ;;
+    "pixman-version.h") CONFIG_FILES="$CONFIG_FILES pixman-version.h:pixman-src/pixman/pixman-version.h.in" ;;
 
   *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
   esac
diff --git a/source/libs/pixman/configure.ac b/source/libs/pixman/configure.ac
index 7291f5045304cafe6ee670a5ec763671f9e85d68..5100a54a8e4581fe95ff8eaf693666dc819f304e 100644
--- a/source/libs/pixman/configure.ac
+++ b/source/libs/pixman/configure.ac
@@ -9,7 +9,7 @@ dnl
 m4_include([version.ac])[] dnl define pixman_version
 AC_INIT([pixman (TeX Live)], pixman_version, [tex-k@tug.org])
 AC_PREREQ([2.65])
-AC_CONFIG_SRCDIR([pixman-]pixman_version[/pixman/pixman.h])
+AC_CONFIG_SRCDIR([pixman-src/pixman/pixman.h])
 AC_CONFIG_AUX_DIR([../../build-aux])
 AC_CONFIG_MACRO_DIR([../../m4])
 
@@ -151,7 +151,7 @@ fi
 PIXMAN_CHECK_CFLAG([-Wall])
 PIXMAN_CHECK_CFLAG([-fno-strict-aliasing])
 
-AC_SUBST([PIXMAN_TREE], [pixman-]pixman_version)
+AC_SUBST([PIXMAN_TREE], [pixman-src])
 
 if test -f $srcdir/$PIXMAN_TREE/pixman/pixman-version.h; then
   AC_MSG_ERROR([Sorry, you must remove the file $PIXMAN_TREE/pixman/pixman-version.h])
@@ -159,6 +159,6 @@ fi
 
 AC_CONFIG_FILES([Makefile
 		 include/Makefile
-	         pixman-version.h:pixman-]pixman_version[/pixman/pixman-version.h.in])
+	         pixman-version.h:pixman-src/pixman/pixman-version.h.in])
 
 AC_OUTPUT
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-combine32.c b/source/libs/pixman/pixman-0.32.8/pixman/pixman-combine32.c
deleted file mode 100755
index 450114a52c40613db2f2e0eff7f2ada4d59a973f..0000000000000000000000000000000000000000
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-combine32.c
+++ /dev/null
@@ -1,2581 +0,0 @@
-/*
- * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
- *             2005 Lars Knoll & Zack Rusin, Trolltech
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-#include <string.h>
-
-#include "pixman-private.h"
-#include "pixman-combine32.h"
-
-/* component alpha helper functions */
-
-static void
-combine_mask_ca (uint32_t *src, uint32_t *mask)
-{
-    uint32_t a = *mask;
-
-    uint32_t x;
-    uint16_t xa;
-
-    if (!a)
-    {
-	*(src) = 0;
-	return;
-    }
-
-    x = *(src);
-    if (a == ~0)
-    {
-	x = x >> A_SHIFT;
-	x |= x << G_SHIFT;
-	x |= x << R_SHIFT;
-	*(mask) = x;
-	return;
-    }
-
-    xa = x >> A_SHIFT;
-    UN8x4_MUL_UN8x4 (x, a);
-    *(src) = x;
-    
-    UN8x4_MUL_UN8 (a, xa);
-    *(mask) = a;
-}
-
-static void
-combine_mask_value_ca (uint32_t *src, const uint32_t *mask)
-{
-    uint32_t a = *mask;
-    uint32_t x;
-
-    if (!a)
-    {
-	*(src) = 0;
-	return;
-    }
-
-    if (a == ~0)
-	return;
-
-    x = *(src);
-    UN8x4_MUL_UN8x4 (x, a);
-    *(src) = x;
-}
-
-static void
-combine_mask_alpha_ca (const uint32_t *src, uint32_t *mask)
-{
-    uint32_t a = *(mask);
-    uint32_t x;
-
-    if (!a)
-	return;
-
-    x = *(src) >> A_SHIFT;
-    if (x == MASK)
-	return;
-
-    if (a == ~0)
-    {
-	x |= x << G_SHIFT;
-	x |= x << R_SHIFT;
-	*(mask) = x;
-	return;
-    }
-
-    UN8x4_MUL_UN8 (a, x);
-    *(mask) = a;
-}
-
-/*
- * There are two ways of handling alpha -- either as a single unified value or
- * a separate value for each component, hence each macro must have two
- * versions.  The unified alpha version has a 'u' at the end of the name,
- * the component version has a 'ca'.  Similarly, functions which deal with
- * this difference will have two versions using the same convention.
- */
-
-static force_inline uint32_t
-combine_mask (const uint32_t *src, const uint32_t *mask, int i)
-{
-    uint32_t s, m;
-
-    if (mask)
-    {
-	m = *(mask + i) >> A_SHIFT;
-
-	if (!m)
-	    return 0;
-    }
-
-    s = *(src + i);
-
-    if (mask)
-	UN8x4_MUL_UN8 (s, m);
-
-    return s;
-}
-
-static void
-combine_clear (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               uint32_t *               dest,
-               const uint32_t *         src,
-               const uint32_t *         mask,
-               int                      width)
-{
-    memset (dest, 0, width * sizeof (uint32_t));
-}
-
-static void
-combine_dst (pixman_implementation_t *imp,
-	     pixman_op_t	      op,
-	     uint32_t *		      dest,
-	     const uint32_t *	      src,
-	     const uint32_t *         mask,
-	     int		      width)
-{
-    return;
-}
-
-static void
-combine_src_u (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               uint32_t *               dest,
-               const uint32_t *         src,
-               const uint32_t *         mask,
-               int                      width)
-{
-    int i;
-
-    if (!mask)
-    {
-	memcpy (dest, src, width * sizeof (uint32_t));
-    }
-    else
-    {
-	for (i = 0; i < width; ++i)
-	{
-	    uint32_t s = combine_mask (src, mask, i);
-
-	    *(dest + i) = s;
-	}
-    }
-}
-
-static void
-combine_over_u (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                uint32_t *               dest,
-                const uint32_t *         src,
-                const uint32_t *         mask,
-                int                      width)
-{
-    int i;
-
-    if (!mask)
-    {
-	for (i = 0; i < width; ++i)
-	{
-	    uint32_t s = *(src + i);
-	    uint32_t a = ALPHA_8 (s);
-	    if (a == 0xFF)
-	    {
-		*(dest + i) = s;
-	    }
-	    else if (s)
-	    {
-		uint32_t d = *(dest + i);
-		uint32_t ia = a ^ 0xFF;
-		UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
-		*(dest + i) = d;
-	    }
-	}
-    }
-    else
-    {
-	for (i = 0; i < width; ++i)
-	{
-	    uint32_t m = ALPHA_8 (*(mask + i));
-	    if (m == 0xFF)
-	    {
-		uint32_t s = *(src + i);
-		uint32_t a = ALPHA_8 (s);
-		if (a == 0xFF)
-		{
-		    *(dest + i) = s;
-		}
-		else if (s)
-		{
-		    uint32_t d = *(dest + i);
-		    uint32_t ia = a ^ 0xFF;
-		    UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
-		    *(dest + i) = d;
-		}
-	    }
-	    else if (m)
-	    {
-		uint32_t s = *(src + i);
-		if (s)
-		{
-		    uint32_t d = *(dest + i);
-		    UN8x4_MUL_UN8 (s, m);
-		    UN8x4_MUL_UN8_ADD_UN8x4 (d, ALPHA_8 (~s), s);
-		    *(dest + i) = d;
-		}
-	    }
-	}
-    }
-}
-
-static void
-combine_over_reverse_u (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        uint32_t *               dest,
-                        const uint32_t *         src,
-                        const uint32_t *         mask,
-                        int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t ia = ALPHA_8 (~*(dest + i));
-	UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_in_u (pixman_implementation_t *imp,
-              pixman_op_t              op,
-              uint32_t *               dest,
-              const uint32_t *         src,
-              const uint32_t *         mask,
-              int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t a = ALPHA_8 (*(dest + i));
-	UN8x4_MUL_UN8 (s, a);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_in_reverse_u (pixman_implementation_t *imp,
-                      pixman_op_t              op,
-                      uint32_t *               dest,
-                      const uint32_t *         src,
-                      const uint32_t *         mask,
-                      int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t a = ALPHA_8 (s);
-	UN8x4_MUL_UN8 (d, a);
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_out_u (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               uint32_t *               dest,
-               const uint32_t *         src,
-               const uint32_t *         mask,
-               int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t a = ALPHA_8 (~*(dest + i));
-	UN8x4_MUL_UN8 (s, a);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_out_reverse_u (pixman_implementation_t *imp,
-                       pixman_op_t              op,
-                       uint32_t *               dest,
-                       const uint32_t *         src,
-                       const uint32_t *         mask,
-                       int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t a = ALPHA_8 (~s);
-	UN8x4_MUL_UN8 (d, a);
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_atop_u (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                uint32_t *               dest,
-                const uint32_t *         src,
-                const uint32_t *         mask,
-                int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t dest_a = ALPHA_8 (d);
-	uint32_t src_ia = ALPHA_8 (~s);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_atop_reverse_u (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        uint32_t *               dest,
-                        const uint32_t *         src,
-                        const uint32_t *         mask,
-                        int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t src_a = ALPHA_8 (s);
-	uint32_t dest_ia = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_xor_u (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               uint32_t *               dest,
-               const uint32_t *         src,
-               const uint32_t *         mask,
-               int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t src_ia = ALPHA_8 (~s);
-	uint32_t dest_ia = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_add_u (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               uint32_t *               dest,
-               const uint32_t *         src,
-               const uint32_t *         mask,
-               int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	UN8x4_ADD_UN8x4 (d, s);
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_saturate_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint16_t sa, da;
-
-	sa = s >> A_SHIFT;
-	da = ~d >> A_SHIFT;
-	if (sa > da)
-	{
-	    sa = DIV_UN8 (da, sa);
-	    UN8x4_MUL_UN8 (s, sa);
-	}
-	;
-	UN8x4_ADD_UN8x4 (d, s);
-	*(dest + i) = d;
-    }
-}
-
-
-/*
- * PDF blend modes:
- *
- * The following blend modes have been taken from the PDF ISO 32000
- * specification, which at this point in time is available from
- *
- *     http://www.adobe.com/devnet/pdf/pdf_reference.html
- *
- * The specific documents of interest are the PDF spec itself:
- *
- *     http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf
- *
- * chapters 11.3.5 and 11.3.6 and a later supplement for Adobe Acrobat
- * 9.1 and Reader 9.1:
- *
- *     http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000_1.pdf
- *
- * that clarifies the specifications for blend modes ColorDodge and
- * ColorBurn.
- *
- * The formula for computing the final pixel color given in 11.3.6 is:
- *
- *     αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
- *
- * with B() is the blend function. When B(Cb, Cs) = Cs, this formula
- * reduces to the regular OVER operator.
- *
- * Cs and Cb are not premultiplied, so in our implementation we instead
- * use:
- *
- *     cr = (1 – αs) × cb  +  (1 – αb) × cs  +  αb × αs × B (cb/αb, cs/αs)
- *
- * where cr, cs, and cb are premultiplied colors, and where the
- *
- *     αb × αs × B(cb/αb, cs/αs)
- *
- * part is first arithmetically simplified under the assumption that αb
- * and αs are not 0, and then updated to produce a meaningful result when
- * they are.
- *
- * For all the blend mode operators, the alpha channel is given by
- *
- *     αr = αs + αb + αb × αs
- */
-
-/*
- * Multiply
- *
- *      ad * as * B(d / ad, s / as)
- *    = ad * as * d/ad * s/as
- *    = d * s
- *
- */
-static void
-combine_multiply_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t ss = s;
-	uint32_t src_ia = ALPHA_8 (~s);
-	uint32_t dest_ia = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (ss, dest_ia, d, src_ia);
-	UN8x4_MUL_UN8x4 (d, s);
-	UN8x4_ADD_UN8x4 (d, ss);
-
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_multiply_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               dest,
-                     const uint32_t *         src,
-                     const uint32_t *         mask,
-                     int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t m = *(mask + i);
-	uint32_t s = *(src + i);
-	uint32_t d = *(dest + i);
-	uint32_t r = d;
-	uint32_t dest_ia = ALPHA_8 (~d);
-
-	combine_mask_ca (&s, &m);
-
-	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (r, ~m, s, dest_ia);
-	UN8x4_MUL_UN8x4 (d, s);
-	UN8x4_ADD_UN8x4 (r, d);
-
-	*(dest + i) = r;
-    }
-}
-
-#define PDF_SEPARABLE_BLEND_MODE(name)					\
-    static void								\
-    combine_ ## name ## _u (pixman_implementation_t *imp,		\
-			    pixman_op_t              op,		\
-                            uint32_t *               dest,		\
-			    const uint32_t *         src,		\
-			    const uint32_t *         mask,		\
-			    int                      width)		\
-    {									\
-	int i;								\
-	for (i = 0; i < width; ++i)					\
-	{								\
-	    uint32_t s = combine_mask (src, mask, i);			\
-	    uint32_t d = *(dest + i);					\
-	    uint8_t sa = ALPHA_8 (s);					\
-	    uint8_t isa = ~sa;						\
-	    uint8_t da = ALPHA_8 (d);					\
-	    uint8_t ida = ~da;						\
-	    uint32_t result;						\
-									\
-	    result = d;							\
-	    UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida);	\
-	    								\
-	    *(dest + i) = result +					\
-		(DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) +		\
-		(blend_ ## name (RED_8 (d), da, RED_8 (s), sa) << R_SHIFT) + \
-		(blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), sa) << G_SHIFT) + \
-		(blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), sa));	\
-	}								\
-    }									\
-    									\
-    static void								\
-    combine_ ## name ## _ca (pixman_implementation_t *imp,		\
-			     pixman_op_t              op,		\
-                             uint32_t *               dest,		\
-			     const uint32_t *         src,		\
-			     const uint32_t *         mask,		\
-			     int                      width)		\
-    {									\
-	int i;								\
-	for (i = 0; i < width; ++i)					\
-	{								\
-	    uint32_t m = *(mask + i);					\
-	    uint32_t s = *(src + i);					\
-	    uint32_t d = *(dest + i);					\
-	    uint8_t da = ALPHA_8 (d);					\
-	    uint8_t ida = ~da;						\
-	    uint32_t result;						\
-            								\
-	    combine_mask_ca (&s, &m);					\
-            								\
-	    result = d;							\
-	    UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (result, ~m, s, ida);     \
-            								\
-	    result +=							\
-	        (DIV_ONE_UN8 (ALPHA_8 (m) * (uint32_t)da) << A_SHIFT) +	\
-	        (blend_ ## name (RED_8 (d), da, RED_8 (s), RED_8 (m)) << R_SHIFT) + \
-	        (blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), GREEN_8 (m)) << G_SHIFT) + \
-	        (blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), BLUE_8 (m))); \
-	    								\
-	    *(dest + i) = result;					\
-	}								\
-    }
-
-/*
- * Screen
- *
- *      ad * as * B(d/ad, s/as)
- *    = ad * as * (d/ad + s/as - s/as * d/ad)
- *    = ad * s + as * d - s * d
- */
-static inline uint32_t
-blend_screen (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
-{
-    return DIV_ONE_UN8 (s * ad + d * as - s * d);
-}
-
-PDF_SEPARABLE_BLEND_MODE (screen)
-
-/*
- * Overlay
- *
- *     ad * as * B(d/ad, s/as)
- *   = ad * as * Hardlight (s, d)
- *   = if (d / ad < 0.5)
- *         as * ad * Multiply (s/as, 2 * d/ad)
- *     else
- *         as * ad * Screen (s/as, 2 * d / ad - 1)
- *   = if (d < 0.5 * ad)
- *         as * ad * s/as * 2 * d /ad
- *     else
- *         as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1))
- *   = if (2 * d < ad)
- *         2 * s * d
- *     else
- *         ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1)
- *   = if (2 * d < ad)
- *         2 * s * d
- *     else
- *         as * ad - 2 * (ad - d) * (as - s)
- */
-static inline uint32_t
-blend_overlay (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
-{
-    uint32_t r;
-
-    if (2 * d < ad)
-	r = 2 * s * d;
-    else
-	r = as * ad - 2 * (ad - d) * (as - s);
-
-    return DIV_ONE_UN8 (r);
-}
-
-PDF_SEPARABLE_BLEND_MODE (overlay)
-
-/*
- * Darken
- *
- *     ad * as * B(d/ad, s/as)
- *   = ad * as * MIN(d/ad, s/as)
- *   = MIN (as * d, ad * s)
- */
-static inline uint32_t
-blend_darken (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
-{
-    s = ad * s;
-    d = as * d;
-
-    return DIV_ONE_UN8 (s > d ? d : s);
-}
-
-PDF_SEPARABLE_BLEND_MODE (darken)
-
-/*
- * Lighten
- *
- *     ad * as * B(d/ad, s/as)
- *   = ad * as * MAX(d/ad, s/as)
- *   = MAX (as * d, ad * s)
- */
-static inline uint32_t
-blend_lighten (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
-{
-    s = ad * s;
-    d = as * d;
-    
-    return DIV_ONE_UN8 (s > d ? s : d);
-}
-
-PDF_SEPARABLE_BLEND_MODE (lighten)
-
-/*
- * Color dodge
- *
- *     ad * as * B(d/ad, s/as)
- *   = if d/ad = 0
- *         ad * as * 0
- *     else if (d/ad >= (1 - s/as)
- *         ad * as * 1
- *     else
- *         ad * as * ((d/ad) / (1 - s/as))
- *   = if d = 0
- *         0
- *     elif as * d >= ad * (as - s)
- *         ad * as
- *     else
- *         as * (as * d / (as - s))
- *
- */
-static inline uint32_t
-blend_color_dodge (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
-{
-    if (d == 0)
-        return 0;
-    else if (as * d >= ad * (as - s))
-	return DIV_ONE_UN8 (as * ad);
-    else if (as - s == 0)
-        return DIV_ONE_UN8 (as * ad);
-    else
-        return DIV_ONE_UN8 (as * ((d * as) / ((as - s))));
-}
-
-PDF_SEPARABLE_BLEND_MODE (color_dodge)
-
-/*
- * Color burn
- *
- * We modify the first clause "if d = 1" to "if d >= 1" since with
- * premultiplied colors d > 1 can actually happen.
- *
- *     ad * as * B(d/ad, s/as)
- *   = if d/ad >= 1
- *         ad * as * 1
- *     elif (1 - d/ad) >= s/as
- *         ad * as * 0
- *     else
- *         ad * as * (1 - ((1 - d/ad) / (s/as)))
- *   = if d >= ad
- *         ad * as
- *     elif as * ad - as * d >= ad * s
- *         0
- *     else
- *         ad * as  - as * as * (ad - d) / s
- */
-static inline uint32_t
-blend_color_burn (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
-{
-    if (d >= ad)
-	return DIV_ONE_UN8 (ad * as);
-    else if (as * ad - as * d >= ad * s)
-	return 0;
-    else if (s == 0)
-	return 0;
-    else
-	return DIV_ONE_UN8 (ad * as - (as * as * (ad - d)) / s);
-}
-
-PDF_SEPARABLE_BLEND_MODE (color_burn)
-
-/*
- * Hard light
- *
- *     ad * as * B(d/ad, s/as)
- *   = if (s/as <= 0.5)
- *         ad * as * Multiply (d/ad, 2 * s/as)
- *     else
- *         ad * as * Screen (d/ad, 2 * s/as - 1)
- *   = if 2 * s <= as
- *         ad * as * d/ad * 2 * s / as
- *     else
- *         ad * as * (d/ad + (2 * s/as - 1) + d/ad * (2 * s/as - 1))
- *   = if 2 * s <= as
- *         2 * s * d
- *     else
- *         as * ad - 2 * (ad - d) * (as - s)
- */
-static inline uint32_t
-blend_hard_light (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
-{
-    if (2 * s < as)
-	return DIV_ONE_UN8 (2 * s * d);
-    else
-	return DIV_ONE_UN8 (as * ad - 2 * (ad - d) * (as - s));
-}
-
-PDF_SEPARABLE_BLEND_MODE (hard_light)
-
-/*
- * Soft light
- *
- *     ad * as * B(d/ad, s/as)
- *   = if (s/as <= 0.5)
- *         ad * as * (d/ad - (1 - 2 * s/as) * d/ad * (1 - d/ad))
- *     else if (d/ad <= 0.25)
- *         ad * as * (d/ad + (2 * s/as - 1) * ((((16 * d/ad - 12) * d/ad + 4) * d/ad) - d/ad))
- *     else
- *         ad * as * (d/ad + (2 * s/as - 1) * sqrt (d/ad))
- *   = if (2 * s <= as)
- *         d * as - d * (ad - d) * (as - 2 * s) / ad;
- *     else if (4 * d <= ad)
- *         (2 * s - as) * d * ((16 * d / ad - 12) * d / ad + 3);
- *     else
- *         d * as + (sqrt (d * ad) - d) * (2 * s - as);
- */
-static inline uint32_t
-blend_soft_light (uint32_t d_org,
-		  uint32_t ad_org,
-		  uint32_t s_org,
-		  uint32_t as_org)
-{
-    double d = d_org * (1.0 / MASK);
-    double ad = ad_org * (1.0 / MASK);
-    double s = s_org * (1.0 / MASK);
-    double as = as_org * (1.0 / MASK);
-    double r;
-
-    if (2 * s < as)
-    {
-	if (ad == 0)
-	    r = d * as;
-	else
-	    r = d * as - d * (ad - d) * (as - 2 * s) / ad;
-    }
-    else if (ad == 0)
-    {
-	r = 0;
-    }
-    else if (4 * d <= ad)
-    {
-	r = d * as +
-	    (2 * s - as) * d * ((16 * d / ad - 12) * d / ad + 3);
-    }
-    else
-    {
-	r = d * as + (sqrt (d * ad) - d) * (2 * s - as);
-    }
-    return r * MASK + 0.5;
-}
-
-PDF_SEPARABLE_BLEND_MODE (soft_light)
-
-/*
- * Difference
- *
- *     ad * as * B(s/as, d/ad)
- *   = ad * as * abs (s/as - d/ad)
- *   = if (s/as <= d/ad)
- *         ad * as * (d/ad - s/as)
- *     else
- *         ad * as * (s/as - d/ad)
- *   = if (ad * s <= as * d)
- *        as * d - ad * s
- *     else
- *        ad * s - as * d
- */
-static inline uint32_t
-blend_difference (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
-{
-    uint32_t das = d * as;
-    uint32_t sad = s * ad;
-
-    if (sad < das)
-	return DIV_ONE_UN8 (das - sad);
-    else
-	return DIV_ONE_UN8 (sad - das);
-}
-
-PDF_SEPARABLE_BLEND_MODE (difference)
-
-/*
- * Exclusion
- *
- *     ad * as * B(s/as, d/ad)
- *   = ad * as * (d/ad + s/as - 2 * d/ad * s/as)
- *   = as * d + ad * s - 2 * s * d
- */
-
-/* This can be made faster by writing it directly and not using
- * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */
-
-static inline uint32_t
-blend_exclusion (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
-{
-    return DIV_ONE_UN8 (s * ad + d * as - 2 * d * s);
-}
-
-PDF_SEPARABLE_BLEND_MODE (exclusion)
-
-#undef PDF_SEPARABLE_BLEND_MODE
-
-/*
- * PDF nonseperable blend modes are implemented using the following functions
- * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid
- * and min value of the red, green and blue components.
- *
- * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue
- *
- * clip_color (C):
- *     l = LUM (C)
- *     min = Cmin
- *     max = Cmax
- *     if n < 0.0
- *         C = l + (((C – l) × l) ⁄ (l – min))
- *     if x > 1.0
- *         C = l + (((C – l) × (1 – l) ) ⁄ (max – l))
- *     return C
- *
- * set_lum (C, l):
- *     d = l – LUM (C)
- *     C += d
- *     return clip_color (C)
- *
- * SAT (C) = CH_MAX (C) - CH_MIN (C)
- *
- * set_sat (C, s):
- *     if Cmax > Cmin
- *         Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) )
- *         Cmax = s
- *     else
- *         Cmid = Cmax = 0.0
- *         Cmin = 0.0
- *     return C
- */
-
-/* For premultiplied colors, we need to know what happens when C is
- * multiplied by a real number. LUM and SAT are linear:
- *
- *     LUM (r × C) = r × LUM (C)	SAT (r * C) = r * SAT (C)
- *
- * If we extend clip_color with an extra argument a and change
- *
- *     if x >= 1.0
- *
- * into
- *
- *     if x >= a
- *
- * then clip_color is also linear:
- *
- *     r * clip_color (C, a) = clip_color (r * C, r * a);
- *
- * for positive r.
- *
- * Similarly, we can extend set_lum with an extra argument that is just passed
- * on to clip_color:
- *
- *       r * set_lum (C, l, a)
- *
- *     = r × clip_color (C + l - LUM (C), a)
- *
- *     = clip_color (r * C + r × l - r * LUM (C), r * a)
- *
- *     = set_lum (r * C, r * l, r * a)
- *
- * Finally, set_sat:
- *
- *       r * set_sat (C, s) = set_sat (x * C, r * s)
- *
- * The above holds for all non-zero x, because the x'es in the fraction for
- * C_mid cancel out. Specifically, it holds for x = r:
- *
- *       r * set_sat (C, s) = set_sat (r * C, r * s)
- *
- */
-
-#define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2]))
-#define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2]))
-#define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100)
-#define SAT(c) (CH_MAX (c) - CH_MIN (c))
-
-#define PDF_NON_SEPARABLE_BLEND_MODE(name)				\
-    static void								\
-    combine_ ## name ## _u (pixman_implementation_t *imp,		\
-			    pixman_op_t		     op,		\
-                            uint32_t *               dest,		\
-			    const uint32_t *         src,		\
-			    const uint32_t *         mask,		\
-			    int                      width)		\
-    {									\
-	int i;								\
-	for (i = 0; i < width; ++i)					\
-	{								\
-	    uint32_t s = combine_mask (src, mask, i);			\
-	    uint32_t d = *(dest + i);					\
-	    uint8_t sa = ALPHA_8 (s);					\
-	    uint8_t isa = ~sa;						\
-	    uint8_t da = ALPHA_8 (d);					\
-	    uint8_t ida = ~da;						\
-	    uint32_t result;						\
-	    uint32_t sc[3], dc[3], c[3];				\
-            								\
-	    result = d;							\
-	    UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida);	\
-	    dc[0] = RED_8 (d);						\
-	    sc[0] = RED_8 (s);						\
-	    dc[1] = GREEN_8 (d);					\
-	    sc[1] = GREEN_8 (s);					\
-	    dc[2] = BLUE_8 (d);						\
-	    sc[2] = BLUE_8 (s);						\
-	    blend_ ## name (c, dc, da, sc, sa);				\
-            								\
-	    *(dest + i) = result +					\
-		(DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) +		\
-		(DIV_ONE_UN8 (c[0]) << R_SHIFT) +			\
-		(DIV_ONE_UN8 (c[1]) << G_SHIFT) +			\
-		(DIV_ONE_UN8 (c[2]));					\
-	}								\
-    }
-
-static void
-set_lum (uint32_t dest[3], uint32_t src[3], uint32_t sa, uint32_t lum)
-{
-    double a, l, min, max;
-    double tmp[3];
-
-    a = sa * (1.0 / MASK);
-
-    l = lum * (1.0 / MASK);
-    tmp[0] = src[0] * (1.0 / MASK);
-    tmp[1] = src[1] * (1.0 / MASK);
-    tmp[2] = src[2] * (1.0 / MASK);
-
-    l = l - LUM (tmp);
-    tmp[0] += l;
-    tmp[1] += l;
-    tmp[2] += l;
-
-    /* clip_color */
-    l = LUM (tmp);
-    min = CH_MIN (tmp);
-    max = CH_MAX (tmp);
-
-    if (min < 0)
-    {
-	if (l - min == 0.0)
-	{
-	    tmp[0] = 0;
-	    tmp[1] = 0;
-	    tmp[2] = 0;
-	}
-	else
-	{
-	    tmp[0] = l + (tmp[0] - l) * l / (l - min);
-	    tmp[1] = l + (tmp[1] - l) * l / (l - min);
-	    tmp[2] = l + (tmp[2] - l) * l / (l - min);
-	}
-    }
-    if (max > a)
-    {
-	if (max - l == 0.0)
-	{
-	    tmp[0] = a;
-	    tmp[1] = a;
-	    tmp[2] = a;
-	}
-	else
-	{
-	    tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l);
-	    tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l);
-	    tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l);
-	}
-    }
-
-    dest[0] = tmp[0] * MASK + 0.5;
-    dest[1] = tmp[1] * MASK + 0.5;
-    dest[2] = tmp[2] * MASK + 0.5;
-}
-
-static void
-set_sat (uint32_t dest[3], uint32_t src[3], uint32_t sat)
-{
-    int id[3];
-    uint32_t min, max;
-
-    if (src[0] > src[1])
-    {
-	if (src[0] > src[2])
-	{
-	    id[0] = 0;
-	    if (src[1] > src[2])
-	    {
-		id[1] = 1;
-		id[2] = 2;
-	    }
-	    else
-	    {
-		id[1] = 2;
-		id[2] = 1;
-	    }
-	}
-	else
-	{
-	    id[0] = 2;
-	    id[1] = 0;
-	    id[2] = 1;
-	}
-    }
-    else
-    {
-	if (src[0] > src[2])
-	{
-	    id[0] = 1;
-	    id[1] = 0;
-	    id[2] = 2;
-	}
-	else
-	{
-	    id[2] = 0;
-	    if (src[1] > src[2])
-	    {
-		id[0] = 1;
-		id[1] = 2;
-	    }
-	    else
-	    {
-		id[0] = 2;
-		id[1] = 1;
-	    }
-	}
-    }
-
-    max = dest[id[0]];
-    min = dest[id[2]];
-    if (max > min)
-    {
-	dest[id[1]] = (dest[id[1]] - min) * sat / (max - min);
-	dest[id[0]] = sat;
-	dest[id[2]] = 0;
-    }
-    else
-    {
-	dest[0] = dest[1] = dest[2] = 0;
-    }
-}
-
-/* Hue:
- *
- *       as * ad * B(s/as, d/as)
- *     = as * ad * set_lum (set_sat (s/as, SAT (d/ad)), LUM (d/ad), 1)
- *     = set_lum (set_sat (ad * s, as * SAT (d)), as * LUM (d), as * ad)
- *
- */
-static inline void
-blend_hsl_hue (uint32_t r[3],
-               uint32_t d[3],
-               uint32_t ad,
-               uint32_t s[3],
-               uint32_t as)
-{
-    r[0] = s[0] * ad;
-    r[1] = s[1] * ad;
-    r[2] = s[2] * ad;
-    set_sat (r, r, SAT (d) * as);
-    set_lum (r, r, as * ad, LUM (d) * as);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue)
-
-/* 
- * Saturation
- *
- *     as * ad * B(s/as, d/ad)
- *   = as * ad * set_lum (set_sat (d/ad, SAT (s/as)), LUM (d/ad), 1)
- *   = set_lum (as * ad * set_sat (d/ad, SAT (s/as)),
- *                                       as * LUM (d), as * ad)
- *   = set_lum (set_sat (as * d, ad * SAT (s), as * LUM (d), as * ad))
- */
-static inline void
-blend_hsl_saturation (uint32_t r[3],
-                      uint32_t d[3],
-                      uint32_t ad,
-                      uint32_t s[3],
-                      uint32_t as)
-{
-    r[0] = d[0] * as;
-    r[1] = d[1] * as;
-    r[2] = d[2] * as;
-    set_sat (r, r, SAT (s) * ad);
-    set_lum (r, r, as * ad, LUM (d) * as);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation)
-
-/* 
- * Color
- *
- *     as * ad * B(s/as, d/as)
- *   = as * ad * set_lum (s/as, LUM (d/ad), 1)
- *   = set_lum (s * ad, as * LUM (d), as * ad)
- */
-static inline void
-blend_hsl_color (uint32_t r[3],
-                 uint32_t d[3],
-                 uint32_t ad,
-                 uint32_t s[3],
-                 uint32_t as)
-{
-    r[0] = s[0] * ad;
-    r[1] = s[1] * ad;
-    r[2] = s[2] * ad;
-    set_lum (r, r, as * ad, LUM (d) * as);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_color)
-
-/*
- * Luminosity
- *
- *     as * ad * B(s/as, d/ad)
- *   = as * ad * set_lum (d/ad, LUM (s/as), 1)
- *   = set_lum (as * d, ad * LUM (s), as * ad)
- */
-static inline void
-blend_hsl_luminosity (uint32_t r[3],
-                      uint32_t d[3],
-                      uint32_t ad,
-                      uint32_t s[3],
-                      uint32_t as)
-{
-    r[0] = d[0] * as;
-    r[1] = d[1] * as;
-    r[2] = d[2] * as;
-    set_lum (r, r, as * ad, LUM (s) * ad);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity)
-
-#undef SAT
-#undef LUM
-#undef CH_MAX
-#undef CH_MIN
-#undef PDF_NON_SEPARABLE_BLEND_MODE
-
-/* All of the disjoint/conjoint composing functions
- *
- * The four entries in the first column indicate what source contributions
- * come from each of the four areas of the picture -- areas covered by neither
- * A nor B, areas covered only by A, areas covered only by B and finally
- * areas covered by both A and B.
- *
- * Disjoint			Conjoint
- * Fa		Fb		Fa		Fb
- * (0,0,0,0)	0		0		0		0
- * (0,A,0,A)	1		0		1		0
- * (0,0,B,B)	0		1		0		1
- * (0,A,B,A)	1		min((1-a)/b,1)	1		max(1-a/b,0)
- * (0,A,B,B)	min((1-b)/a,1)	1		max(1-b/a,0)	1
- * (0,0,0,A)	max(1-(1-b)/a,0) 0		min(1,b/a)	0
- * (0,0,0,B)	0		max(1-(1-a)/b,0) 0		min(a/b,1)
- * (0,A,0,0)	min(1,(1-b)/a)	0		max(1-b/a,0)	0
- * (0,0,B,0)	0		min(1,(1-a)/b)	0		max(1-a/b,0)
- * (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
- * (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
- * (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)
- *
- * See  http://marc.info/?l=xfree-render&m=99792000027857&w=2  for more
- * information about these operators.
- */
-
-#define COMBINE_A_OUT 1
-#define COMBINE_A_IN  2
-#define COMBINE_B_OUT 4
-#define COMBINE_B_IN  8
-
-#define COMBINE_CLEAR   0
-#define COMBINE_A       (COMBINE_A_OUT | COMBINE_A_IN)
-#define COMBINE_B       (COMBINE_B_OUT | COMBINE_B_IN)
-#define COMBINE_A_OVER  (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN)
-#define COMBINE_B_OVER  (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN)
-#define COMBINE_A_ATOP  (COMBINE_B_OUT | COMBINE_A_IN)
-#define COMBINE_B_ATOP  (COMBINE_A_OUT | COMBINE_B_IN)
-#define COMBINE_XOR     (COMBINE_A_OUT | COMBINE_B_OUT)
-
-/* portion covered by a but not b */
-static uint8_t
-combine_disjoint_out_part (uint8_t a, uint8_t b)
-{
-    /* min (1, (1-b) / a) */
-
-    b = ~b;                 /* 1 - b */
-    if (b >= a)             /* 1 - b >= a -> (1-b)/a >= 1 */
-	return MASK;        /* 1 */
-    return DIV_UN8 (b, a);     /* (1-b) / a */
-}
-
-/* portion covered by both a and b */
-static uint8_t
-combine_disjoint_in_part (uint8_t a, uint8_t b)
-{
-    /* max (1-(1-b)/a,0) */
-    /*  = - min ((1-b)/a - 1, 0) */
-    /*  = 1 - min (1, (1-b)/a) */
-
-    b = ~b;                 /* 1 - b */
-    if (b >= a)             /* 1 - b >= a -> (1-b)/a >= 1 */
-	return 0;           /* 1 - 1 */
-    return ~DIV_UN8(b, a);    /* 1 - (1-b) / a */
-}
-
-/* portion covered by a but not b */
-static uint8_t
-combine_conjoint_out_part (uint8_t a, uint8_t b)
-{
-    /* max (1-b/a,0) */
-    /* = 1-min(b/a,1) */
-
-    /* min (1, (1-b) / a) */
-
-    if (b >= a)             /* b >= a -> b/a >= 1 */
-	return 0x00;        /* 0 */
-    return ~DIV_UN8(b, a);    /* 1 - b/a */
-}
-
-/* portion covered by both a and b */
-static uint8_t
-combine_conjoint_in_part (uint8_t a, uint8_t b)
-{
-    /* min (1,b/a) */
-
-    if (b >= a)             /* b >= a -> b/a >= 1 */
-	return MASK;        /* 1 */
-    return DIV_UN8 (b, a);     /* b/a */
-}
-
-#define GET_COMP(v, i)   ((uint16_t) (uint8_t) ((v) >> i))
-
-#define ADD(x, y, i, t)							\
-    ((t) = GET_COMP (x, i) + GET_COMP (y, i),				\
-     (uint32_t) ((uint8_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))
-
-#define GENERIC(x, y, i, ax, ay, t, u, v)				\
-    ((t) = (MUL_UN8 (GET_COMP (y, i), ay, (u)) +			\
-            MUL_UN8 (GET_COMP (x, i), ax, (v))),			\
-     (uint32_t) ((uint8_t) ((t) |					\
-                           (0 - ((t) >> G_SHIFT)))) << (i))
-
-static void
-combine_disjoint_general_u (uint32_t *      dest,
-                            const uint32_t *src,
-                            const uint32_t *mask,
-                            int            width,
-                            uint8_t        combine)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t m, n, o, p;
-	uint16_t Fa, Fb, t, u, v;
-	uint8_t sa = s >> A_SHIFT;
-	uint8_t da = d >> A_SHIFT;
-
-	switch (combine & COMBINE_A)
-	{
-	default:
-	    Fa = 0;
-	    break;
-
-	case COMBINE_A_OUT:
-	    Fa = combine_disjoint_out_part (sa, da);
-	    break;
-
-	case COMBINE_A_IN:
-	    Fa = combine_disjoint_in_part (sa, da);
-	    break;
-
-	case COMBINE_A:
-	    Fa = MASK;
-	    break;
-	}
-
-	switch (combine & COMBINE_B)
-	{
-	default:
-	    Fb = 0;
-	    break;
-
-	case COMBINE_B_OUT:
-	    Fb = combine_disjoint_out_part (da, sa);
-	    break;
-
-	case COMBINE_B_IN:
-	    Fb = combine_disjoint_in_part (da, sa);
-	    break;
-
-	case COMBINE_B:
-	    Fb = MASK;
-	    break;
-	}
-	m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
-	n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
-	o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
-	p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
-	s = m | n | o | p;
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_disjoint_over_u (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         uint32_t *                dest,
-                         const uint32_t *          src,
-                         const uint32_t *          mask,
-                         int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint16_t a = s >> A_SHIFT;
-
-	if (s != 0x00)
-	{
-	    uint32_t d = *(dest + i);
-	    a = combine_disjoint_out_part (d >> A_SHIFT, a);
-	    UN8x4_MUL_UN8_ADD_UN8x4 (d, a, s);
-
-	    *(dest + i) = d;
-	}
-    }
-}
-
-static void
-combine_disjoint_in_u (pixman_implementation_t *imp,
-                       pixman_op_t              op,
-                       uint32_t *                dest,
-                       const uint32_t *          src,
-                       const uint32_t *          mask,
-                       int                      width)
-{
-    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_disjoint_in_reverse_u (pixman_implementation_t *imp,
-                               pixman_op_t              op,
-                               uint32_t *                dest,
-                               const uint32_t *          src,
-                               const uint32_t *          mask,
-                               int                      width)
-{
-    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_disjoint_out_u (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        uint32_t *                dest,
-                        const uint32_t *          src,
-                        const uint32_t *          mask,
-                        int                      width)
-{
-    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_disjoint_out_reverse_u (pixman_implementation_t *imp,
-                                pixman_op_t              op,
-                                uint32_t *                dest,
-                                const uint32_t *          src,
-                                const uint32_t *          mask,
-                                int                      width)
-{
-    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_disjoint_atop_u (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         uint32_t *                dest,
-                         const uint32_t *          src,
-                         const uint32_t *          mask,
-                         int                      width)
-{
-    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_disjoint_atop_reverse_u (pixman_implementation_t *imp,
-                                 pixman_op_t              op,
-                                 uint32_t *                dest,
-                                 const uint32_t *          src,
-                                 const uint32_t *          mask,
-                                 int                      width)
-{
-    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_disjoint_xor_u (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        uint32_t *                dest,
-                        const uint32_t *          src,
-                        const uint32_t *          mask,
-                        int                      width)
-{
-    combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR);
-}
-
-static void
-combine_conjoint_general_u (uint32_t *      dest,
-                            const uint32_t *src,
-                            const uint32_t *mask,
-                            int            width,
-                            uint8_t        combine)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t m, n, o, p;
-	uint16_t Fa, Fb, t, u, v;
-	uint8_t sa = s >> A_SHIFT;
-	uint8_t da = d >> A_SHIFT;
-
-	switch (combine & COMBINE_A)
-	{
-	default:
-	    Fa = 0;
-	    break;
-
-	case COMBINE_A_OUT:
-	    Fa = combine_conjoint_out_part (sa, da);
-	    break;
-
-	case COMBINE_A_IN:
-	    Fa = combine_conjoint_in_part (sa, da);
-	    break;
-
-	case COMBINE_A:
-	    Fa = MASK;
-	    break;
-	}
-
-	switch (combine & COMBINE_B)
-	{
-	default:
-	    Fb = 0;
-	    break;
-
-	case COMBINE_B_OUT:
-	    Fb = combine_conjoint_out_part (da, sa);
-	    break;
-
-	case COMBINE_B_IN:
-	    Fb = combine_conjoint_in_part (da, sa);
-	    break;
-
-	case COMBINE_B:
-	    Fb = MASK;
-	    break;
-	}
-
-	m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
-	n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
-	o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
-	p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
-
-	s = m | n | o | p;
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_conjoint_over_u (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         uint32_t *                dest,
-                         const uint32_t *          src,
-                         const uint32_t *          mask,
-                         int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER);
-}
-
-static void
-combine_conjoint_over_reverse_u (pixman_implementation_t *imp,
-                                 pixman_op_t              op,
-                                 uint32_t *                dest,
-                                 const uint32_t *          src,
-                                 const uint32_t *          mask,
-                                 int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER);
-}
-
-static void
-combine_conjoint_in_u (pixman_implementation_t *imp,
-                       pixman_op_t              op,
-                       uint32_t *                dest,
-                       const uint32_t *          src,
-                       const uint32_t *          mask,
-                       int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_conjoint_in_reverse_u (pixman_implementation_t *imp,
-                               pixman_op_t              op,
-                               uint32_t *                dest,
-                               const uint32_t *          src,
-                               const uint32_t *          mask,
-                               int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_conjoint_out_u (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        uint32_t *                dest,
-                        const uint32_t *          src,
-                        const uint32_t *          mask,
-                        int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_conjoint_out_reverse_u (pixman_implementation_t *imp,
-                                pixman_op_t              op,
-                                uint32_t *                dest,
-                                const uint32_t *          src,
-                                const uint32_t *          mask,
-                                int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_conjoint_atop_u (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         uint32_t *                dest,
-                         const uint32_t *          src,
-                         const uint32_t *          mask,
-                         int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_conjoint_atop_reverse_u (pixman_implementation_t *imp,
-                                 pixman_op_t              op,
-                                 uint32_t *                dest,
-                                 const uint32_t *          src,
-                                 const uint32_t *          mask,
-                                 int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_conjoint_xor_u (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        uint32_t *                dest,
-                        const uint32_t *          src,
-                        const uint32_t *          mask,
-                        int                      width)
-{
-    combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR);
-}
-
-
-/* Component alpha combiners */
-
-static void
-combine_clear_ca (pixman_implementation_t *imp,
-                  pixman_op_t              op,
-                  uint32_t *                dest,
-                  const uint32_t *          src,
-                  const uint32_t *          mask,
-                  int                      width)
-{
-    memset (dest, 0, width * sizeof(uint32_t));
-}
-
-static void
-combine_src_ca (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                uint32_t *                dest,
-                const uint32_t *          src,
-                const uint32_t *          mask,
-                int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-
-	combine_mask_value_ca (&s, &m);
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_over_ca (pixman_implementation_t *imp,
-                 pixman_op_t              op,
-                 uint32_t *                dest,
-                 const uint32_t *          src,
-                 const uint32_t *          mask,
-                 int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t a;
-
-	combine_mask_ca (&s, &m);
-
-	a = ~m;
-	if (a)
-	{
-	    uint32_t d = *(dest + i);
-	    UN8x4_MUL_UN8x4_ADD_UN8x4 (d, a, s);
-	    s = d;
-	}
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_over_reverse_ca (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         uint32_t *                dest,
-                         const uint32_t *          src,
-                         const uint32_t *          mask,
-                         int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t d = *(dest + i);
-	uint32_t a = ~d >> A_SHIFT;
-
-	if (a)
-	{
-	    uint32_t s = *(src + i);
-	    uint32_t m = *(mask + i);
-
-	    UN8x4_MUL_UN8x4 (s, m);
-	    UN8x4_MUL_UN8_ADD_UN8x4 (s, a, d);
-
-	    *(dest + i) = s;
-	}
-    }
-}
-
-static void
-combine_in_ca (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               uint32_t *                dest,
-               const uint32_t *          src,
-               const uint32_t *          mask,
-               int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t d = *(dest + i);
-	uint16_t a = d >> A_SHIFT;
-	uint32_t s = 0;
-
-	if (a)
-	{
-	    uint32_t m = *(mask + i);
-
-	    s = *(src + i);
-	    combine_mask_value_ca (&s, &m);
-
-	    if (a != MASK)
-		UN8x4_MUL_UN8 (s, a);
-	}
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_in_reverse_ca (pixman_implementation_t *imp,
-                       pixman_op_t              op,
-                       uint32_t *                dest,
-                       const uint32_t *          src,
-                       const uint32_t *          mask,
-                       int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t a;
-
-	combine_mask_alpha_ca (&s, &m);
-
-	a = m;
-	if (a != ~0)
-	{
-	    uint32_t d = 0;
-
-	    if (a)
-	    {
-		d = *(dest + i);
-		UN8x4_MUL_UN8x4 (d, a);
-	    }
-
-	    *(dest + i) = d;
-	}
-    }
-}
-
-static void
-combine_out_ca (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                uint32_t *                dest,
-                const uint32_t *          src,
-                const uint32_t *          mask,
-                int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t d = *(dest + i);
-	uint16_t a = ~d >> A_SHIFT;
-	uint32_t s = 0;
-
-	if (a)
-	{
-	    uint32_t m = *(mask + i);
-
-	    s = *(src + i);
-	    combine_mask_value_ca (&s, &m);
-
-	    if (a != MASK)
-		UN8x4_MUL_UN8 (s, a);
-	}
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_out_reverse_ca (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        uint32_t *                dest,
-                        const uint32_t *          src,
-                        const uint32_t *          mask,
-                        int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t a;
-
-	combine_mask_alpha_ca (&s, &m);
-
-	a = ~m;
-	if (a != ~0)
-	{
-	    uint32_t d = 0;
-
-	    if (a)
-	    {
-		d = *(dest + i);
-		UN8x4_MUL_UN8x4 (d, a);
-	    }
-
-	    *(dest + i) = d;
-	}
-    }
-}
-
-static void
-combine_atop_ca (pixman_implementation_t *imp,
-                 pixman_op_t              op,
-                 uint32_t *                dest,
-                 const uint32_t *          src,
-                 const uint32_t *          mask,
-                 int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t d = *(dest + i);
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t ad;
-	uint16_t as = d >> A_SHIFT;
-
-	combine_mask_ca (&s, &m);
-
-	ad = ~m;
-
-	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
-
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_atop_reverse_ca (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         uint32_t *                dest,
-                         const uint32_t *          src,
-                         const uint32_t *          mask,
-                         int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t d = *(dest + i);
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t ad;
-	uint16_t as = ~d >> A_SHIFT;
-
-	combine_mask_ca (&s, &m);
-
-	ad = m;
-
-	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
-
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_xor_ca (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                uint32_t *                dest,
-                const uint32_t *          src,
-                const uint32_t *          mask,
-                int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t d = *(dest + i);
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t ad;
-	uint16_t as = ~d >> A_SHIFT;
-
-	combine_mask_ca (&s, &m);
-
-	ad = ~m;
-
-	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
-
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_add_ca (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                uint32_t *                dest,
-                const uint32_t *          src,
-                const uint32_t *          mask,
-                int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t d = *(dest + i);
-
-	combine_mask_value_ca (&s, &m);
-
-	UN8x4_ADD_UN8x4 (d, s);
-
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_saturate_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *                dest,
-                     const uint32_t *          src,
-                     const uint32_t *          mask,
-                     int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s, d;
-	uint16_t sa, sr, sg, sb, da;
-	uint16_t t, u, v;
-	uint32_t m, n, o, p;
-
-	d = *(dest + i);
-	s = *(src + i);
-	m = *(mask + i);
-
-	combine_mask_ca (&s, &m);
-
-	sa = (m >> A_SHIFT);
-	sr = (m >> R_SHIFT) & MASK;
-	sg = (m >> G_SHIFT) & MASK;
-	sb =  m             & MASK;
-	da = ~d >> A_SHIFT;
-
-	if (sb <= da)
-	    m = ADD (s, d, 0, t);
-	else
-	    m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v);
-
-	if (sg <= da)
-	    n = ADD (s, d, G_SHIFT, t);
-	else
-	    n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v);
-
-	if (sr <= da)
-	    o = ADD (s, d, R_SHIFT, t);
-	else
-	    o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v);
-
-	if (sa <= da)
-	    p = ADD (s, d, A_SHIFT, t);
-	else
-	    p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v);
-
-	*(dest + i) = m | n | o | p;
-    }
-}
-
-static void
-combine_disjoint_general_ca (uint32_t *      dest,
-                             const uint32_t *src,
-                             const uint32_t *mask,
-                             int            width,
-                             uint8_t        combine)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s, d;
-	uint32_t m, n, o, p;
-	uint32_t Fa, Fb;
-	uint16_t t, u, v;
-	uint32_t sa;
-	uint8_t da;
-
-	s = *(src + i);
-	m = *(mask + i);
-	d = *(dest + i);
-	da = d >> A_SHIFT;
-
-	combine_mask_ca (&s, &m);
-
-	sa = m;
-
-	switch (combine & COMBINE_A)
-	{
-	default:
-	    Fa = 0;
-	    break;
-
-	case COMBINE_A_OUT:
-	    m = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> 0), da);
-	    n = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
-	    o = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
-	    p = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
-	    Fa = m | n | o | p;
-	    break;
-
-	case COMBINE_A_IN:
-	    m = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> 0), da);
-	    n = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
-	    o = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
-	    p = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
-	    Fa = m | n | o | p;
-	    break;
-
-	case COMBINE_A:
-	    Fa = ~0;
-	    break;
-	}
-
-	switch (combine & COMBINE_B)
-	{
-	default:
-	    Fb = 0;
-	    break;
-
-	case COMBINE_B_OUT:
-	    m = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> 0));
-	    n = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
-	    o = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
-	    p = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
-	    Fb = m | n | o | p;
-	    break;
-
-	case COMBINE_B_IN:
-	    m = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> 0));
-	    n = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
-	    o = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
-	    p = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
-	    Fb = m | n | o | p;
-	    break;
-
-	case COMBINE_B:
-	    Fb = ~0;
-	    break;
-	}
-	m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
-	n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
-	o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
-	p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
-
-	s = m | n | o | p;
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_disjoint_over_ca (pixman_implementation_t *imp,
-                          pixman_op_t              op,
-                          uint32_t *                dest,
-                          const uint32_t *          src,
-                          const uint32_t *          mask,
-                          int                      width)
-{
-    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
-}
-
-static void
-combine_disjoint_in_ca (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        uint32_t *                dest,
-                        const uint32_t *          src,
-                        const uint32_t *          mask,
-                        int                      width)
-{
-    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_disjoint_in_reverse_ca (pixman_implementation_t *imp,
-                                pixman_op_t              op,
-                                uint32_t *                dest,
-                                const uint32_t *          src,
-                                const uint32_t *          mask,
-                                int                      width)
-{
-    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_disjoint_out_ca (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         uint32_t *                dest,
-                         const uint32_t *          src,
-                         const uint32_t *          mask,
-                         int                      width)
-{
-    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_disjoint_out_reverse_ca (pixman_implementation_t *imp,
-                                 pixman_op_t              op,
-                                 uint32_t *                dest,
-                                 const uint32_t *          src,
-                                 const uint32_t *          mask,
-                                 int                      width)
-{
-    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_disjoint_atop_ca (pixman_implementation_t *imp,
-                          pixman_op_t              op,
-                          uint32_t *                dest,
-                          const uint32_t *          src,
-                          const uint32_t *          mask,
-                          int                      width)
-{
-    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp,
-                                  pixman_op_t              op,
-                                  uint32_t *                dest,
-                                  const uint32_t *          src,
-                                  const uint32_t *          mask,
-                                  int                      width)
-{
-    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_disjoint_xor_ca (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         uint32_t *                dest,
-                         const uint32_t *          src,
-                         const uint32_t *          mask,
-                         int                      width)
-{
-    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
-}
-
-static void
-combine_conjoint_general_ca (uint32_t *      dest,
-                             const uint32_t *src,
-                             const uint32_t *mask,
-                             int            width,
-                             uint8_t        combine)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s, d;
-	uint32_t m, n, o, p;
-	uint32_t Fa, Fb;
-	uint16_t t, u, v;
-	uint32_t sa;
-	uint8_t da;
-
-	s = *(src + i);
-	m = *(mask + i);
-	d = *(dest + i);
-	da = d >> A_SHIFT;
-
-	combine_mask_ca (&s, &m);
-
-	sa = m;
-
-	switch (combine & COMBINE_A)
-	{
-	default:
-	    Fa = 0;
-	    break;
-
-	case COMBINE_A_OUT:
-	    m = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> 0), da);
-	    n = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
-	    o = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
-	    p = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
-	    Fa = m | n | o | p;
-	    break;
-
-	case COMBINE_A_IN:
-	    m = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> 0), da);
-	    n = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
-	    o = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
-	    p = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
-	    Fa = m | n | o | p;
-	    break;
-
-	case COMBINE_A:
-	    Fa = ~0;
-	    break;
-	}
-
-	switch (combine & COMBINE_B)
-	{
-	default:
-	    Fb = 0;
-	    break;
-
-	case COMBINE_B_OUT:
-	    m = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> 0));
-	    n = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
-	    o = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
-	    p = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
-	    Fb = m | n | o | p;
-	    break;
-
-	case COMBINE_B_IN:
-	    m = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> 0));
-	    n = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
-	    o = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
-	    p = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
-	    Fb = m | n | o | p;
-	    break;
-
-	case COMBINE_B:
-	    Fb = ~0;
-	    break;
-	}
-	m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
-	n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
-	o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
-	p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
-
-	s = m | n | o | p;
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_conjoint_over_ca (pixman_implementation_t *imp,
-                          pixman_op_t              op,
-                          uint32_t *                dest,
-                          const uint32_t *          src,
-                          const uint32_t *          mask,
-                          int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
-}
-
-static void
-combine_conjoint_over_reverse_ca (pixman_implementation_t *imp,
-                                  pixman_op_t              op,
-                                  uint32_t *                dest,
-                                  const uint32_t *          src,
-                                  const uint32_t *          mask,
-                                  int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER);
-}
-
-static void
-combine_conjoint_in_ca (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        uint32_t *                dest,
-                        const uint32_t *          src,
-                        const uint32_t *          mask,
-                        int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_conjoint_in_reverse_ca (pixman_implementation_t *imp,
-                                pixman_op_t              op,
-                                uint32_t *                dest,
-                                const uint32_t *          src,
-                                const uint32_t *          mask,
-                                int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_conjoint_out_ca (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         uint32_t *                dest,
-                         const uint32_t *          src,
-                         const uint32_t *          mask,
-                         int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_conjoint_out_reverse_ca (pixman_implementation_t *imp,
-                                 pixman_op_t              op,
-                                 uint32_t *                dest,
-                                 const uint32_t *          src,
-                                 const uint32_t *          mask,
-                                 int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_conjoint_atop_ca (pixman_implementation_t *imp,
-                          pixman_op_t              op,
-                          uint32_t *                dest,
-                          const uint32_t *          src,
-                          const uint32_t *          mask,
-                          int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp,
-                                  pixman_op_t              op,
-                                  uint32_t *                dest,
-                                  const uint32_t *          src,
-                                  const uint32_t *          mask,
-                                  int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_conjoint_xor_ca (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         uint32_t *                dest,
-                         const uint32_t *          src,
-                         const uint32_t *          mask,
-                         int                      width)
-{
-    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
-}
-
-void
-_pixman_setup_combiner_functions_32 (pixman_implementation_t *imp)
-{
-    /* Unified alpha */
-    imp->combine_32[PIXMAN_OP_CLEAR] = combine_clear;
-    imp->combine_32[PIXMAN_OP_SRC] = combine_src_u;
-    imp->combine_32[PIXMAN_OP_DST] = combine_dst;
-    imp->combine_32[PIXMAN_OP_OVER] = combine_over_u;
-    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u;
-    imp->combine_32[PIXMAN_OP_IN] = combine_in_u;
-    imp->combine_32[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u;
-    imp->combine_32[PIXMAN_OP_OUT] = combine_out_u;
-    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u;
-    imp->combine_32[PIXMAN_OP_ATOP] = combine_atop_u;
-    imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u;
-    imp->combine_32[PIXMAN_OP_XOR] = combine_xor_u;
-    imp->combine_32[PIXMAN_OP_ADD] = combine_add_u;
-    imp->combine_32[PIXMAN_OP_SATURATE] = combine_saturate_u;
-
-    /* Disjoint, unified */
-    imp->combine_32[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear;
-    imp->combine_32[PIXMAN_OP_DISJOINT_SRC] = combine_src_u;
-    imp->combine_32[PIXMAN_OP_DISJOINT_DST] = combine_dst;
-    imp->combine_32[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u;
-    imp->combine_32[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u;
-    imp->combine_32[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u;
-    imp->combine_32[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u;
-    imp->combine_32[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u;
-    imp->combine_32[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u;
-    imp->combine_32[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u;
-    imp->combine_32[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u;
-    imp->combine_32[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u;
-
-    /* Conjoint, unified */
-    imp->combine_32[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear;
-    imp->combine_32[PIXMAN_OP_CONJOINT_SRC] = combine_src_u;
-    imp->combine_32[PIXMAN_OP_CONJOINT_DST] = combine_dst;
-    imp->combine_32[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u;
-    imp->combine_32[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u;
-    imp->combine_32[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u;
-    imp->combine_32[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u;
-    imp->combine_32[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u;
-    imp->combine_32[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u;
-    imp->combine_32[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u;
-    imp->combine_32[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u;
-    imp->combine_32[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u;
-
-    imp->combine_32[PIXMAN_OP_MULTIPLY] = combine_multiply_u;
-    imp->combine_32[PIXMAN_OP_SCREEN] = combine_screen_u;
-    imp->combine_32[PIXMAN_OP_OVERLAY] = combine_overlay_u;
-    imp->combine_32[PIXMAN_OP_DARKEN] = combine_darken_u;
-    imp->combine_32[PIXMAN_OP_LIGHTEN] = combine_lighten_u;
-    imp->combine_32[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u;
-    imp->combine_32[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u;
-    imp->combine_32[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u;
-    imp->combine_32[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u;
-    imp->combine_32[PIXMAN_OP_DIFFERENCE] = combine_difference_u;
-    imp->combine_32[PIXMAN_OP_EXCLUSION] = combine_exclusion_u;
-    imp->combine_32[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u;
-    imp->combine_32[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u;
-    imp->combine_32[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u;
-    imp->combine_32[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u;
-
-    /* Component alpha combiners */
-    imp->combine_32_ca[PIXMAN_OP_CLEAR] = combine_clear_ca;
-    imp->combine_32_ca[PIXMAN_OP_SRC] = combine_src_ca;
-    /* dest */
-    imp->combine_32_ca[PIXMAN_OP_OVER] = combine_over_ca;
-    imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_IN] = combine_in_ca;
-    imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_OUT] = combine_out_ca;
-    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_ATOP] = combine_atop_ca;
-    imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_XOR] = combine_xor_ca;
-    imp->combine_32_ca[PIXMAN_OP_ADD] = combine_add_ca;
-    imp->combine_32_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca;
-
-    /* Disjoint CA */
-    imp->combine_32_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca;
-    imp->combine_32_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca;
-    imp->combine_32_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst;
-    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca;
-    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca;
-    imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca;
-    imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca;
-    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca;
-    imp->combine_32_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca;
-
-    /* Conjoint CA */
-    imp->combine_32_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca;
-    imp->combine_32_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca;
-    imp->combine_32_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst;
-    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca;
-    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca;
-    imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca;
-    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca;
-    imp->combine_32_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca;
-
-    imp->combine_32_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca;
-    imp->combine_32_ca[PIXMAN_OP_SCREEN] = combine_screen_ca;
-    imp->combine_32_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca;
-    imp->combine_32_ca[PIXMAN_OP_DARKEN] = combine_darken_ca;
-    imp->combine_32_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca;
-    imp->combine_32_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca;
-    imp->combine_32_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca;
-    imp->combine_32_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca;
-    imp->combine_32_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca;
-    imp->combine_32_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
-    imp->combine_32_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
-
-    /* It is not clear that these make sense, so make them noops for now */
-    imp->combine_32_ca[PIXMAN_OP_HSL_HUE] = combine_dst;
-    imp->combine_32_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst;
-    imp->combine_32_ca[PIXMAN_OP_HSL_COLOR] = combine_dst;
-    imp->combine_32_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst;
-}
diff --git a/source/libs/pixman/pixman-0.32.8-PATCHES/ChangeLog b/source/libs/pixman/pixman-src-PATCHES/ChangeLog
old mode 100755
new mode 100644
similarity index 87%
rename from source/libs/pixman/pixman-0.32.8-PATCHES/ChangeLog
rename to source/libs/pixman/pixman-src-PATCHES/ChangeLog
index d77828604cbb5618af8241b235df1bc50c2f33cb..39dcd76ca07e967fa98ace227a487e698db648cf
--- a/source/libs/pixman/pixman-0.32.8-PATCHES/ChangeLog
+++ b/source/libs/pixman/pixman-src-PATCHES/ChangeLog
@@ -1,3 +1,8 @@
+2016-02-04  Akira Kakuto  <kakuto@fuk.kindai.ac.jp>
+
+	Import pixman-0.34.0.
+	* patch-01-hide-symbols: Adapted.
+
 2015-09-26  Peter Breitenlohner  <peb@mppmu.mpg.de>
 
 	Import pixman-0.32.8.
diff --git a/source/libs/pixman/pixman-0.32.8-PATCHES/TL-Changes b/source/libs/pixman/pixman-src-PATCHES/TL-Changes
old mode 100755
new mode 100644
similarity index 77%
rename from source/libs/pixman/pixman-0.32.8-PATCHES/TL-Changes
rename to source/libs/pixman/pixman-src-PATCHES/TL-Changes
index 9207296fcc0276beed222ea3bd2c9cd77090697c..7df6c0ac66a777d46da9082cc0ff7a6e93e8baa9
--- a/source/libs/pixman/pixman-0.32.8-PATCHES/TL-Changes
+++ b/source/libs/pixman/pixman-src-PATCHES/TL-Changes
@@ -1,4 +1,4 @@
-Changes applied to the pixman-0.32.6/ tree as obtained from:
+Changes applied to the pixman-0.34.0/ tree as obtained from:
 	http://cairographics.org/releases/
 
 Removed:
@@ -7,6 +7,7 @@ Removed:
 	config.guess
 	config.sub
 	configure
+	compile
 	depcomp
 	install-sh
 	ltmain.sh
diff --git a/source/libs/pixman/pixman-0.32.8-PATCHES/patch-01-hide-symbols b/source/libs/pixman/pixman-src-PATCHES/patch-01-hide-symbols
old mode 100755
new mode 100644
similarity index 55%
rename from source/libs/pixman/pixman-0.32.8-PATCHES/patch-01-hide-symbols
rename to source/libs/pixman/pixman-src-PATCHES/patch-01-hide-symbols
index 088231f47ef355a09868d4098495f236ef2f3ea8..62cc5f039f789a9009afa30b3afb596aff5b8c51
--- a/source/libs/pixman/pixman-0.32.8-PATCHES/patch-01-hide-symbols
+++ b/source/libs/pixman/pixman-src-PATCHES/patch-01-hide-symbols
@@ -1,6 +1,6 @@
-diff -ur pixman-0.32.8.orig/pixman/pixman-compiler.h pixman-0.32.8/pixman/pixman-compiler.h
---- pixman-0.32.8.orig/pixman/pixman-compiler.h	2015-06-30 11:48:31.000000000 +0200
-+++ pixman-0.32.8/pixman/pixman-compiler.h	2015-09-26 18:58:53.409947954 +0200
+diff -ur pixman-0.34.0.orig/pixman/pixman-compiler.h pixman-0.34.0/pixman/pixman-compiler.h
+--- pixman-0.34.0.orig/pixman/pixman-compiler.h	Tue Jun 30 18:48:31 2015
++++ pixman-0.34.0/pixman/pixman-compiler.h	Thu Feb 04 16:56:27 2016
 @@ -91,7 +91,7 @@
  
  /* GCC visibility */
diff --git a/source/libs/pixman/pixman-0.32.8/AUTHORS b/source/libs/pixman/pixman-src/AUTHORS
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/AUTHORS
rename to source/libs/pixman/pixman-src/AUTHORS
diff --git a/source/libs/pixman/pixman-0.32.8/COPYING b/source/libs/pixman/pixman-src/COPYING
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/COPYING
rename to source/libs/pixman/pixman-src/COPYING
diff --git a/source/libs/pixman/pixman-0.32.8/ChangeLog b/source/libs/pixman/pixman-src/ChangeLog
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/ChangeLog
rename to source/libs/pixman/pixman-src/ChangeLog
diff --git a/source/libs/pixman/pixman-0.32.8/INSTALL b/source/libs/pixman/pixman-src/INSTALL
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/INSTALL
rename to source/libs/pixman/pixman-src/INSTALL
diff --git a/source/libs/pixman/pixman-0.32.8/Makefile.am b/source/libs/pixman/pixman-src/Makefile.am
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/Makefile.am
rename to source/libs/pixman/pixman-src/Makefile.am
diff --git a/source/libs/pixman/pixman-0.32.8/Makefile.win32 b/source/libs/pixman/pixman-src/Makefile.win32
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/Makefile.win32
rename to source/libs/pixman/pixman-src/Makefile.win32
diff --git a/source/libs/pixman/pixman-0.32.8/Makefile.win32.common b/source/libs/pixman/pixman-src/Makefile.win32.common
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/Makefile.win32.common
rename to source/libs/pixman/pixman-src/Makefile.win32.common
diff --git a/source/libs/pixman/pixman-0.32.8/NEWS b/source/libs/pixman/pixman-src/NEWS
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/NEWS
rename to source/libs/pixman/pixman-src/NEWS
diff --git a/source/libs/pixman/pixman-0.32.8/README b/source/libs/pixman/pixman-src/README
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/README
rename to source/libs/pixman/pixman-src/README
diff --git a/source/libs/pixman/pixman-0.32.8/config.h.in b/source/libs/pixman/pixman-src/config.h.in
old mode 100755
new mode 100644
similarity index 96%
rename from source/libs/pixman/pixman-0.32.8/config.h.in
rename to source/libs/pixman/pixman-src/config.h.in
index 17d8250349c057ed5d5f7095881bcf3874bc51cb..c707dd52c10a48decd56d6d52756c71e2ec2470a
--- a/source/libs/pixman/pixman-0.32.8/config.h.in
+++ b/source/libs/pixman/pixman-src/config.h.in
@@ -12,6 +12,9 @@
 /* Define to 1 if you have the <dlfcn.h> header file. */
 #undef HAVE_DLFCN_H
 
+/* Whether we have FE_DIVBYZERO */
+#undef HAVE_FEDIVBYZERO
+
 /* Whether we have feenableexcept() */
 #undef HAVE_FEENABLEEXCEPT
 
@@ -84,8 +87,7 @@
 /* Define to 1 if you have the <unistd.h> header file. */
 #undef HAVE_UNISTD_H
 
-/* Define to the sub-directory in which libtool stores uninstalled libraries.
-   */
+/* Define to the sub-directory where libtool stores uninstalled libraries. */
 #undef LT_OBJDIR
 
 /* Name of package */
diff --git a/source/libs/pixman/pixman-0.32.8/configure.ac b/source/libs/pixman/pixman-src/configure.ac
old mode 100755
new mode 100644
similarity index 97%
rename from source/libs/pixman/pixman-0.32.8/configure.ac
rename to source/libs/pixman/pixman-src/configure.ac
index 97494e693f8275f35f4fa2023c9013b9e3795263..156edfbf3f905a1d303f9b1ca109d0fb5b5386a1
--- a/source/libs/pixman/pixman-0.32.8/configure.ac
+++ b/source/libs/pixman/pixman-src/configure.ac
@@ -53,8 +53,8 @@ AC_PREREQ([2.57])
 #
 
 m4_define([pixman_major], 0)
-m4_define([pixman_minor], 32)
-m4_define([pixman_micro], 8)
+m4_define([pixman_minor], 34)
+m4_define([pixman_micro], 0)
 
 m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
 
@@ -184,6 +184,7 @@ AC_SUBST(LT_VERSION_INFO)
 
 PIXMAN_CHECK_CFLAG([-Wall])
 PIXMAN_CHECK_CFLAG([-Wdeclaration-after-statement])
+PIXMAN_CHECK_CFLAG([-Wno-unused-local-typedefs])
 PIXMAN_CHECK_CFLAG([-fno-strict-aliasing])
 
 dnl =========================================================================
@@ -346,15 +347,27 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
 #error "Need GCC >= 3.4 for MMX intrinsics"
 #endif
 #include <mmintrin.h>
+#include <stdint.h>
+
+/* Check support for block expressions */
+#define _mm_shuffle_pi16(A, N)						\
+    ({									\
+	__m64 ret;							\
+									\
+	/* Some versions of clang will choke on K */ 			\
+	asm ("pshufw %2, %1, %0\n\t"					\
+	     : "=y" (ret)						\
+	     : "y" (A), "K" ((const int8_t)N)				\
+	);								\
+									\
+	ret;								\
+    })
+
 int main () {
     __m64 v = _mm_cvtsi32_si64 (1);
     __m64 w;
 
-    /* Some versions of clang will choke on K */
-    asm ("pshufw %2, %1, %0\n\t"
-        : "=y" (w)
-        : "y" (v), "K" (5)
-    );
+    w = _mm_shuffle_pi16(v, 5);
 
     /* Some versions of clang will choke on this */
     asm ("pmulhuw %1, %0\n\t"
@@ -416,10 +429,11 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
 #include <mmintrin.h>
 #include <xmmintrin.h>
 #include <emmintrin.h>
+int param;
 int main () {
-    __m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c;
+    __m128i a = _mm_set1_epi32 (param), b = _mm_set1_epi32 (param + 1), c;
 	c = _mm_xor_si128 (a, b);
-    return 0;
+    return _mm_cvtsi128_si32(c);
 }]])], have_sse2_intrinsics=yes)
 CFLAGS=$xserver_save_CFLAGS
 
@@ -460,10 +474,11 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
 #include <xmmintrin.h>
 #include <emmintrin.h>
 #include <tmmintrin.h>
+int param;
 int main () {
-    __m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c;
+    __m128i a = _mm_set1_epi32 (param), b = _mm_set1_epi32 (param + 1), c;
     c = _mm_maddubs_epi16 (a, b);
-    return 0;
+    return _mm_cvtsi128_si32(c);
 }]])], have_ssse3_intrinsics=yes)
 CFLAGS=$xserver_save_CFLAGS
 
@@ -890,6 +905,11 @@ if test x$have_feenableexcept = xyes; then
    AC_DEFINE(HAVE_FEENABLEEXCEPT, 1, [Whether we have feenableexcept()])
 fi
 
+AC_CHECK_DECL([FE_DIVBYZERO],
+	[AC_DEFINE(HAVE_FEDIVBYZERO, 1, [Whether we have FE_DIVBYZERO])],
+	[],
+	[[#include <fenv.h>]])
+
 AC_CHECK_FUNC(gettimeofday, have_gettimeofday=yes, have_gettimeofday=no)
 AC_CHECK_HEADER(sys/time.h, have_sys_time_h=yes, have_sys_time_h=no)
 if test x$have_gettimeofday = xyes && test x$have_sys_time_h = xyes; then
diff --git a/source/libs/pixman/pixman-0.32.8/pixman-1-uninstalled.pc.in b/source/libs/pixman/pixman-src/pixman-1-uninstalled.pc.in
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman-1-uninstalled.pc.in
rename to source/libs/pixman/pixman-src/pixman-1-uninstalled.pc.in
diff --git a/source/libs/pixman/pixman-0.32.8/pixman-1.pc.in b/source/libs/pixman/pixman-src/pixman-1.pc.in
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman-1.pc.in
rename to source/libs/pixman/pixman-src/pixman-1.pc.in
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/Makefile.am b/source/libs/pixman/pixman-src/pixman/Makefile.am
old mode 100755
new mode 100644
similarity index 98%
rename from source/libs/pixman/pixman-0.32.8/pixman/Makefile.am
rename to source/libs/pixman/pixman-src/pixman/Makefile.am
index b376d9aeb3992e16d91cedbebd3ab2af5fc8d713..581b6f61e2e525530b22e7e16b395303ab7402f0
--- a/source/libs/pixman/pixman-0.32.8/pixman/Makefile.am
+++ b/source/libs/pixman/pixman-src/pixman/Makefile.am
@@ -72,6 +72,7 @@ libpixman_arm_simd_la_SOURCES = \
 	pixman-arm-common.h	\
 	pixman-arm-simd-asm.S   \
 	pixman-arm-simd-asm-scaled.S \
+	pixman-arm-asm.h	\
 	pixman-arm-simd-asm.h
 libpixman_1_la_LIBADD += libpixman-arm-simd.la
 
@@ -86,6 +87,7 @@ libpixman_arm_neon_la_SOURCES = \
         pixman-arm-common.h	\
         pixman-arm-neon-asm.S	\
 		pixman-arm-neon-asm-bilinear.S \
+        pixman-arm-asm.h	\
         pixman-arm-neon-asm.h
 libpixman_1_la_LIBADD += libpixman-arm-neon.la
 
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/Makefile.sources b/source/libs/pixman/pixman-src/pixman/Makefile.sources
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/Makefile.sources
rename to source/libs/pixman/pixman-src/pixman/Makefile.sources
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/Makefile.win32 b/source/libs/pixman/pixman-src/pixman/Makefile.win32
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/Makefile.win32
rename to source/libs/pixman/pixman-src/pixman/Makefile.win32
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/loongson-mmintrin.h b/source/libs/pixman/pixman-src/pixman/loongson-mmintrin.h
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/loongson-mmintrin.h
rename to source/libs/pixman/pixman-src/pixman/loongson-mmintrin.h
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-access-accessors.c b/source/libs/pixman/pixman-src/pixman/pixman-access-accessors.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-access-accessors.c
rename to source/libs/pixman/pixman-src/pixman/pixman-access-accessors.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-access.c b/source/libs/pixman/pixman-src/pixman/pixman-access.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-access.c
rename to source/libs/pixman/pixman-src/pixman/pixman-access.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-accessor.h b/source/libs/pixman/pixman-src/pixman/pixman-accessor.h
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-accessor.h
rename to source/libs/pixman/pixman-src/pixman/pixman-accessor.h
diff --git a/source/libs/pixman/pixman-src/pixman/pixman-arm-asm.h b/source/libs/pixman/pixman-src/pixman/pixman-arm-asm.h
new file mode 100644
index 0000000000000000000000000000000000000000..ee78541087377907073c02f15ac546e308c39a3f
--- /dev/null
+++ b/source/libs/pixman/pixman-src/pixman/pixman-arm-asm.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ * Copyright © 2010 Nokia Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Jeff Muizelaar (jeff@infidigm.net)
+ *
+ */
+
+/* Supplementary macro for setting function attributes */
+.macro pixman_asm_function fname
+	.func fname
+	.global fname
+#ifdef __ELF__
+	.hidden fname
+	.type fname, %function
+#endif
+fname:
+.endm
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-common.h b/source/libs/pixman/pixman-src/pixman/pixman-arm-common.h
old mode 100755
new mode 100644
similarity index 97%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-common.h
rename to source/libs/pixman/pixman-src/pixman/pixman-arm-common.h
index 3a7cb2bef1fde60df5029d7c104dfa1fe0448d54..9537688306e6ba2521ab19e13663f596f5cefe95
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-common.h
+++ b/source/libs/pixman/pixman-src/pixman/pixman-arm-common.h
@@ -266,13 +266,6 @@ FAST_NEAREST_MAINLOOP (cputype##_##name##_normal_##op,                        \
                        scaled_nearest_scanline_##cputype##_##name##_##op,     \
                        src_type, dst_type, NORMAL)
 
-/* Provide entries for the fast path table */
-#define PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                      \
-    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),                             \
-    SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),                              \
-    SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),                               \
-    SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
-
 #define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op,   \
                                                   src_type, dst_type)         \
 void                                                                          \
@@ -318,9 +311,7 @@ FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_normal_##op,                 \
 
 /* Provide entries for the fast path table */
 #define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)              \
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),                     \
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),                      \
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func),                       \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH (op,s,d,func),                           \
     SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
 
 /*****************************************************************************/
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-neon-asm-bilinear.S b/source/libs/pixman/pixman-src/pixman/pixman-arm-neon-asm-bilinear.S
old mode 100755
new mode 100644
similarity index 99%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-neon-asm-bilinear.S
rename to source/libs/pixman/pixman-src/pixman/pixman-arm-neon-asm-bilinear.S
index e37b5c298e4da33ff0a7ba82321967ccfb06fed3..0fd92d61c58204048d5753b0566ae5aa1436f08c
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-neon-asm-bilinear.S
+++ b/source/libs/pixman/pixman-src/pixman/pixman-arm-neon-asm-bilinear.S
@@ -65,23 +65,13 @@
 .p2align 2
 
 #include "pixman-private.h"
+#include "pixman-arm-asm.h"
 #include "pixman-arm-neon-asm.h"
 
 /*
  * Bilinear macros from pixman-arm-neon-asm.S
  */
 
-/* Supplementary macro for setting function attributes */
-.macro pixman_asm_function fname
-    .func fname
-    .global fname
-#ifdef __ELF__
-    .hidden fname
-    .type fname, %function
-#endif
-fname:
-.endm
-
 /*
  * Bilinear scaling support code which tries to provide pixel fetching, color
  * format conversion, and interpolation as separate macros which can be used
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-neon-asm.S b/source/libs/pixman/pixman-src/pixman/pixman-arm-neon-asm.S
old mode 100755
new mode 100644
similarity index 99%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-neon-asm.S
rename to source/libs/pixman/pixman-src/pixman/pixman-arm-neon-asm.S
index 187197dc31d6a91004bef6f4d62c37ae038e5b70..7e949a38fd748fe7f0d834b89409b7c0d69a2de8
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-neon-asm.S
+++ b/source/libs/pixman/pixman-src/pixman/pixman-arm-neon-asm.S
@@ -50,6 +50,7 @@
     .p2align 2
 
 #include "pixman-private.h"
+#include "pixman-arm-asm.h"
 #include "pixman-arm-neon-asm.h"
 
 /* Global configuration options and preferences */
@@ -2830,17 +2831,6 @@ generate_composite_function_nearest_scanline \
 
 /******************************************************************************/
 
-/* Supplementary macro for setting function attributes */
-.macro pixman_asm_function fname
-    .func fname
-    .global fname
-#ifdef __ELF__
-    .hidden fname
-    .type fname, %function
-#endif
-fname:
-.endm
-
 /*
  * Bilinear scaling support code which tries to provide pixel fetching, color
  * format conversion, and interpolation as separate macros which can be used
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-neon-asm.h b/source/libs/pixman/pixman-src/pixman/pixman-arm-neon-asm.h
old mode 100755
new mode 100644
similarity index 99%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-neon-asm.h
rename to source/libs/pixman/pixman-src/pixman/pixman-arm-neon-asm.h
index d0d92d74cb8198b0ff4dacd90acaf6e85f20c621..bdcf6a9d47f30a93d03267dfe9a4b466f98350b1
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-neon-asm.h
+++ b/source/libs/pixman/pixman-src/pixman/pixman-arm-neon-asm.h
@@ -631,14 +631,8 @@ local skip1
                                    src_basereg_   = 0, \
                                    mask_basereg_  = 24
 
-    .func fname
-    .global fname
-    /* For ELF format also set function visibility to hidden */
-#ifdef __ELF__
-    .hidden fname
-    .type fname, %function
-#endif
-fname:
+    pixman_asm_function fname
+
     push        {r4-r12, lr}        /* save all registers */
 
 /*
@@ -945,14 +939,8 @@ fname:
                                                    src_basereg_   = 0, \
                                                    mask_basereg_  = 24
 
-    .func fname
-    .global fname
-    /* For ELF format also set function visibility to hidden */
-#ifdef __ELF__
-    .hidden fname
-    .type fname, %function
-#endif
-fname:
+    pixman_asm_function fname
+
     .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
 /*
  * Make some macro arguments globally visible and accessible
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-neon.c b/source/libs/pixman/pixman-src/pixman/pixman-arm-neon.c
old mode 100755
new mode 100644
similarity index 96%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-neon.c
rename to source/libs/pixman/pixman-src/pixman/pixman-arm-neon.c
index 60e9c78d29a57acc88b2814285611409b954b413..be761c96529e999a582b6c1c2b8559a0aaeff8a7
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-neon.c
+++ b/source/libs/pixman/pixman-src/pixman/pixman-arm-neon.c
@@ -362,21 +362,21 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, a8r8g8b8, neon_composite_out_reverse_8_8888),
     PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, a8b8g8r8, neon_composite_out_reverse_8_8888),
 
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
 
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
 
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
+    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
+    SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
+    SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
+    SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
 
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
+    SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
+    SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
     /* Note: NONE repeat is not supported yet */
     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-simd-asm-scaled.S b/source/libs/pixman/pixman-src/pixman/pixman-arm-simd-asm-scaled.S
old mode 100755
new mode 100644
similarity index 95%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-simd-asm-scaled.S
rename to source/libs/pixman/pixman-src/pixman/pixman-arm-simd-asm-scaled.S
index 71109954885567537ebb47d52e64da93e82172c2..e050292e05e2c459b08fc40fc840a05dfb92aace
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-simd-asm-scaled.S
+++ b/source/libs/pixman/pixman-src/pixman/pixman-arm-simd-asm-scaled.S
@@ -37,16 +37,7 @@
 	.altmacro
 	.p2align 2
 
-/* Supplementary macro for setting function attributes */
-.macro pixman_asm_function fname
-	.func fname
-	.global fname
-#ifdef __ELF__
-	.hidden fname
-	.type fname, %function
-#endif
-fname:
-.endm
+#include "pixman-arm-asm.h"
 
 /*
  * Note: This code is only using armv5te instructions (not even armv6),
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-simd-asm.S b/source/libs/pixman/pixman-src/pixman/pixman-arm-simd-asm.S
old mode 100755
new mode 100644
similarity index 53%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-simd-asm.S
rename to source/libs/pixman/pixman-src/pixman/pixman-arm-simd-asm.S
index c2096887907612b3ffb8b0991bb89b7965e0d693..a74a0a8f3460762f15b78f2f8bced177b9d9e395
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-simd-asm.S
+++ b/source/libs/pixman/pixman-src/pixman/pixman-arm-simd-asm.S
@@ -37,6 +37,7 @@
 	.altmacro
 	.p2align 2
 
+#include "pixman-arm-asm.h"
 #include "pixman-arm-simd-asm.h"
 
 /* A head macro should do all processing which results in an output of up to
@@ -303,6 +304,83 @@ generate_composite_function \
 
 /******************************************************************************/
 
+.macro src_x888_0565_init
+        /* Hold loop invariant in MASK */
+        ldr     MASK, =0x001F001F
+        line_saved_regs  STRIDE_S, ORIG_W
+.endm
+
+.macro src_x888_0565_1pixel  s, d
+        and     WK&d, MASK, WK&s, lsr #3           @ 00000000000rrrrr00000000000bbbbb
+        and     STRIDE_S, WK&s, #0xFC00            @ 0000000000000000gggggg0000000000
+        orr     WK&d, WK&d, WK&d, lsr #5           @ 00000000000-----rrrrr000000bbbbb
+        orr     WK&d, WK&d, STRIDE_S, lsr #5       @ 00000000000-----rrrrrggggggbbbbb
+        /* Top 16 bits are discarded during the following STRH */
+.endm
+
+.macro src_x888_0565_2pixels  slo, shi, d, tmp
+        and     SCRATCH, WK&shi, #0xFC00           @ 0000000000000000GGGGGG0000000000
+        and     WK&tmp, MASK, WK&shi, lsr #3       @ 00000000000RRRRR00000000000BBBBB
+        and     WK&shi, MASK, WK&slo, lsr #3       @ 00000000000rrrrr00000000000bbbbb
+        orr     WK&tmp, WK&tmp, WK&tmp, lsr #5     @ 00000000000-----RRRRR000000BBBBB
+        orr     WK&tmp, WK&tmp, SCRATCH, lsr #5    @ 00000000000-----RRRRRGGGGGGBBBBB
+        and     SCRATCH, WK&slo, #0xFC00           @ 0000000000000000gggggg0000000000
+        orr     WK&shi, WK&shi, WK&shi, lsr #5     @ 00000000000-----rrrrr000000bbbbb
+        orr     WK&shi, WK&shi, SCRATCH, lsr #5    @ 00000000000-----rrrrrggggggbbbbb
+        pkhbt   WK&d, WK&shi, WK&tmp, lsl #16      @ RRRRRGGGGGGBBBBBrrrrrggggggbbbbb
+.endm
+
+.macro src_x888_0565_process_head   cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+        WK4     .req    STRIDE_S
+        WK5     .req    STRIDE_M
+        WK6     .req    WK3
+        WK7     .req    ORIG_W
+ .if numbytes == 16
+        pixld   , 16, 4, SRC, 0
+        src_x888_0565_2pixels  4, 5, 0, 0
+        pixld   , 8, 4, SRC, 0
+        src_x888_0565_2pixels  6, 7, 1, 1
+        pixld   , 8, 6, SRC, 0
+ .else
+        pixld   , numbytes*2, 4, SRC, 0
+ .endif
+.endm
+
+.macro src_x888_0565_process_tail   cond, numbytes, firstreg
+ .if numbytes == 16
+        src_x888_0565_2pixels  4, 5, 2, 2
+        src_x888_0565_2pixels  6, 7, 3, 4
+ .elseif numbytes == 8
+        src_x888_0565_2pixels  4, 5, 1, 1
+        src_x888_0565_2pixels  6, 7, 2, 2
+ .elseif numbytes == 4
+        src_x888_0565_2pixels  4, 5, 1, 1
+ .else
+        src_x888_0565_1pixel  4, 1
+ .endif
+ .if numbytes == 16
+        pixst   , numbytes, 0, DST
+ .else
+        pixst   , numbytes, 1, DST
+ .endif
+        .unreq  WK4
+        .unreq  WK5
+        .unreq  WK6
+        .unreq  WK7
+.endm
+
+generate_composite_function \
+    pixman_composite_src_x888_0565_asm_armv6, 32, 0, 16, \
+    FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \
+    3, /* prefetch distance */ \
+    src_x888_0565_init, \
+    nop_macro, /* newline */ \
+    nop_macro, /* cleanup */ \
+    src_x888_0565_process_head, \
+    src_x888_0565_process_tail
+
+/******************************************************************************/
+
 .macro add_8_8_8pixels  cond, dst1, dst2
         uqadd8&cond  WK&dst1, WK&dst1, MASK
         uqadd8&cond  WK&dst2, WK&dst2, STRIDE_M
@@ -611,3 +689,491 @@ generate_composite_function \
 
 /******************************************************************************/
 
+.macro over_reverse_n_8888_init
+        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
+        ldr     MASK, =0x00800080
+        /* Split source pixel into RB/AG parts */
+        uxtb16  STRIDE_S, SRC
+        uxtb16  STRIDE_M, SRC, ror #8
+        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+        uadd8   SCRATCH, MASK, MASK
+        line_saved_regs  STRIDE_D, ORIG_W
+.endm
+
+.macro over_reverse_n_8888_newline
+        mov     STRIDE_D, #0xFF
+.endm
+
+.macro over_reverse_n_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+        pixld   , numbytes, firstreg, DST, 0
+.endm
+
+.macro over_reverse_n_8888_1pixel  d, is_only
+        teq     WK&d, #0
+        beq     8f       /* replace with source */
+        bics    ORIG_W, STRIDE_D, WK&d, lsr #24
+ .if is_only == 1
+        beq     49f      /* skip store */
+ .else
+        beq     9f       /* write same value back */
+ .endif
+        mla     SCRATCH, STRIDE_S, ORIG_W, MASK /* red/blue */
+        mla     ORIG_W, STRIDE_M, ORIG_W, MASK  /* alpha/green */
+        uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
+        uxtab16 ORIG_W, ORIG_W, ORIG_W, ror #8
+        mov     SCRATCH, SCRATCH, ror #8
+        sel     ORIG_W, SCRATCH, ORIG_W
+        uqadd8  WK&d, WK&d, ORIG_W
+        b       9f
+8:      mov     WK&d, SRC
+9:
+.endm
+
+.macro over_reverse_n_8888_tail  numbytes, reg1, reg2, reg3, reg4
+ .if numbytes == 4
+        over_reverse_n_8888_1pixel  reg1, 1
+ .else
+        and     SCRATCH, WK&reg1, WK&reg2
+  .if numbytes == 16
+        and     SCRATCH, SCRATCH, WK&reg3
+        and     SCRATCH, SCRATCH, WK&reg4
+  .endif
+        mvns    SCRATCH, SCRATCH, asr #24
+        beq     49f /* skip store if all opaque */
+        over_reverse_n_8888_1pixel  reg1, 0
+        over_reverse_n_8888_1pixel  reg2, 0
+  .if numbytes == 16
+        over_reverse_n_8888_1pixel  reg3, 0
+        over_reverse_n_8888_1pixel  reg4, 0
+  .endif
+ .endif
+        pixst   , numbytes, reg1, DST
+49:
+.endm
+
+.macro over_reverse_n_8888_process_tail  cond, numbytes, firstreg
+        over_reverse_n_8888_tail  numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
+.endm
+
+generate_composite_function \
+    pixman_composite_over_reverse_n_8888_asm_armv6, 0, 0, 32 \
+    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \
+    3, /* prefetch distance */ \
+    over_reverse_n_8888_init, \
+    over_reverse_n_8888_newline, \
+    nop_macro, /* cleanup */ \
+    over_reverse_n_8888_process_head, \
+    over_reverse_n_8888_process_tail
+
+/******************************************************************************/
+
+.macro over_white_8888_8888_ca_init
+        HALF    .req    SRC
+        TMP0    .req    STRIDE_D
+        TMP1    .req    STRIDE_S
+        TMP2    .req    STRIDE_M
+        TMP3    .req    ORIG_W
+        WK4     .req    SCRATCH
+        line_saved_regs STRIDE_D, STRIDE_M, ORIG_W
+        ldr     SCRATCH, =0x800080
+        mov     HALF, #0x80
+        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+        uadd8   SCRATCH, SCRATCH, SCRATCH
+        .set DST_PRELOAD_BIAS, 8
+.endm
+
+.macro over_white_8888_8888_ca_cleanup
+        .set DST_PRELOAD_BIAS, 0
+        .unreq  HALF
+        .unreq  TMP0
+        .unreq  TMP1
+        .unreq  TMP2
+        .unreq  TMP3
+        .unreq  WK4
+.endm
+
+.macro over_white_8888_8888_ca_combine  m, d
+        uxtb16  TMP1, TMP0                /* rb_notmask */
+        uxtb16  TMP2, d                   /* rb_dest; 1 stall follows */
+        smlatt  TMP3, TMP2, TMP1, HALF    /* red */
+        smlabb  TMP2, TMP2, TMP1, HALF    /* blue */
+        uxtb16  TMP0, TMP0, ror #8        /* ag_notmask */
+        uxtb16  TMP1, d, ror #8           /* ag_dest; 1 stall follows */
+        smlatt  d, TMP1, TMP0, HALF       /* alpha */
+        smlabb  TMP1, TMP1, TMP0, HALF    /* green */
+        pkhbt   TMP0, TMP2, TMP3, lsl #16 /* rb; 1 stall follows */
+        pkhbt   TMP1, TMP1, d, lsl #16    /* ag */
+        uxtab16 TMP0, TMP0, TMP0, ror #8
+        uxtab16 TMP1, TMP1, TMP1, ror #8
+        mov     TMP0, TMP0, ror #8
+        sel     d, TMP0, TMP1
+        uqadd8  d, d, m                   /* d is a late result */
+.endm
+
+.macro over_white_8888_8888_ca_1pixel_head
+        pixld   , 4, 1, MASK, 0
+        pixld   , 4, 3, DST, 0
+.endm
+
+.macro over_white_8888_8888_ca_1pixel_tail
+        mvn     TMP0, WK1
+        teq     WK1, WK1, asr #32
+        bne     01f
+        bcc     03f
+        mov     WK3, WK1
+        b       02f
+01:     over_white_8888_8888_ca_combine WK1, WK3
+02:     pixst   , 4, 3, DST
+03:
+.endm
+
+.macro over_white_8888_8888_ca_2pixels_head
+        pixld   , 8, 1, MASK, 0
+.endm
+
+.macro over_white_8888_8888_ca_2pixels_tail
+        pixld   , 8, 3, DST
+        mvn     TMP0, WK1
+        teq     WK1, WK1, asr #32
+        bne     01f
+        movcs   WK3, WK1
+        bcs     02f
+        teq     WK2, #0
+        beq     05f
+        b       02f
+01:     over_white_8888_8888_ca_combine WK1, WK3
+02:     mvn     TMP0, WK2
+        teq     WK2, WK2, asr #32
+        bne     03f
+        movcs   WK4, WK2
+        b       04f
+03:     over_white_8888_8888_ca_combine WK2, WK4
+04:     pixst   , 8, 3, DST
+05:
+.endm
+
+.macro over_white_8888_8888_ca_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ .if numbytes == 4
+        over_white_8888_8888_ca_1pixel_head
+ .else
+  .if numbytes == 16
+        over_white_8888_8888_ca_2pixels_head
+        over_white_8888_8888_ca_2pixels_tail
+  .endif
+        over_white_8888_8888_ca_2pixels_head
+ .endif
+.endm
+
+.macro over_white_8888_8888_ca_process_tail  cond, numbytes, firstreg
+ .if numbytes == 4
+        over_white_8888_8888_ca_1pixel_tail
+ .else
+        over_white_8888_8888_ca_2pixels_tail
+ .endif
+.endm
+
+generate_composite_function \
+    pixman_composite_over_white_8888_8888_ca_asm_armv6, 0, 32, 32 \
+    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH \
+    2, /* prefetch distance */ \
+    over_white_8888_8888_ca_init, \
+    nop_macro, /* newline */ \
+    over_white_8888_8888_ca_cleanup, \
+    over_white_8888_8888_ca_process_head, \
+    over_white_8888_8888_ca_process_tail
+
+
+.macro over_n_8888_8888_ca_init
+        /* Set up constants. RB_SRC and AG_SRC are in registers;
+         * RB_FLDS, A_SRC, and the two HALF values need to go on the
+         * stack (and the ful SRC value is already there) */
+        ldr     SCRATCH, [sp, #ARGS_STACK_OFFSET]
+        mov     WK0, #0x00FF0000
+        orr     WK0, WK0, #0xFF        /* RB_FLDS (0x00FF00FF) */
+        mov     WK1, #0x80             /* HALF default value */
+        mov     WK2, SCRATCH, lsr #24  /* A_SRC */
+        orr     WK3, WK1, WK1, lsl #16 /* HALF alternate value (0x00800080) */
+        push    {WK0-WK3}
+ .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET+16
+        uxtb16  SRC, SCRATCH
+        uxtb16  STRIDE_S, SCRATCH, ror #8
+
+        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+        uadd8   SCRATCH, WK3, WK3
+
+        .unreq  WK0
+        .unreq  WK1
+        .unreq  WK2
+        .unreq  WK3
+        WK0     .req    Y
+        WK1     .req    STRIDE_D
+        RB_SRC  .req    SRC
+        AG_SRC  .req    STRIDE_S
+        WK2     .req    STRIDE_M
+        RB_FLDS .req    r8       /* the reloaded constants have to be at consecutive registers starting at an even one */
+        A_SRC   .req    r8
+        HALF    .req    r9
+        WK3     .req    r10
+        WK4     .req    r11
+        WK5     .req    SCRATCH
+        WK6     .req    ORIG_W
+
+        line_saved_regs Y, STRIDE_D, STRIDE_M, ORIG_W
+.endm
+
+.macro over_n_8888_8888_ca_cleanup
+        add     sp, sp, #16
+ .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET-16
+
+        .unreq  WK0
+        .unreq  WK1
+        .unreq  RB_SRC
+        .unreq  AG_SRC
+        .unreq  WK2
+        .unreq  RB_FLDS
+        .unreq  A_SRC
+        .unreq  HALF
+        .unreq  WK3
+        .unreq  WK4
+        .unreq  WK5
+        .unreq  WK6
+        WK0     .req    r8
+        WK1     .req    r9
+        WK2     .req    r10
+        WK3     .req    r11
+.endm
+
+.macro over_n_8888_8888_ca_1pixel_head
+        pixld   , 4, 6, MASK, 0
+        pixld   , 4, 0, DST, 0
+.endm
+
+.macro over_n_8888_8888_ca_1pixel_tail
+        ldrd    A_SRC, HALF, [sp, #LOCALS_STACK_OFFSET+8]
+        uxtb16  WK1, WK6                 /* rb_mask (first step of hard case placed in what would otherwise be a stall) */
+        teq     WK6, WK6, asr #32        /* Zc if transparent, ZC if opaque */
+        bne     20f
+        bcc     40f
+        /* Mask is fully opaque (all channels) */
+        ldr     WK6, [sp, #ARGS_STACK_OFFSET] /* get SRC back */
+        eors    A_SRC, A_SRC, #0xFF
+        bne     10f
+        /* Source is also opaque - same as src_8888_8888 */
+        mov     WK0, WK6
+        b       30f
+10:     /* Same as over_8888_8888 */
+        mul_8888_8 WK0, A_SRC, WK5, HALF
+        uqadd8  WK0, WK0, WK6
+        b       30f
+20:     /* No simplifications possible - do it the hard way */
+        uxtb16  WK2, WK6, ror #8         /* ag_mask */
+        mla     WK3, WK1, A_SRC, HALF    /* rb_mul; 2 cycles */
+        mla     WK4, WK2, A_SRC, HALF    /* ag_mul; 2 cycles */
+        ldrd    RB_FLDS, HALF, [sp, #LOCALS_STACK_OFFSET]
+        uxtb16  WK5, WK0                 /* rb_dest */
+        uxtab16 WK3, WK3, WK3, ror #8
+        uxtb16  WK6, WK0, ror #8         /* ag_dest */
+        uxtab16 WK4, WK4, WK4, ror #8
+        smlatt  WK0, RB_SRC, WK1, HALF   /* red1 */
+        smlabb  WK1, RB_SRC, WK1, HALF   /* blue1 */
+        bic     WK3, RB_FLDS, WK3, lsr #8
+        bic     WK4, RB_FLDS, WK4, lsr #8
+        pkhbt   WK1, WK1, WK0, lsl #16   /* rb1 */
+        smlatt  WK0, WK5, WK3, HALF      /* red2 */
+        smlabb  WK3, WK5, WK3, HALF      /* blue2 */
+        uxtab16 WK1, WK1, WK1, ror #8
+        smlatt  WK5, AG_SRC, WK2, HALF   /* alpha1 */
+        pkhbt   WK3, WK3, WK0, lsl #16   /* rb2 */
+        smlabb  WK0, AG_SRC, WK2, HALF   /* green1 */
+        smlatt  WK2, WK6, WK4, HALF      /* alpha2 */
+        smlabb  WK4, WK6, WK4, HALF      /* green2 */
+        pkhbt   WK0, WK0, WK5, lsl #16   /* ag1 */
+        uxtab16 WK3, WK3, WK3, ror #8
+        pkhbt   WK4, WK4, WK2, lsl #16   /* ag2 */
+        uxtab16 WK0, WK0, WK0, ror #8
+        uxtab16 WK4, WK4, WK4, ror #8
+        mov     WK1, WK1, ror #8
+        mov     WK3, WK3, ror #8
+        sel     WK2, WK1, WK0            /* recombine source*mask */
+        sel     WK1, WK3, WK4            /* recombine dest*(1-source_alpha*mask) */
+        uqadd8  WK0, WK1, WK2            /* followed by 1 stall */
+30:     /* The destination buffer is already in the L1 cache, so
+         * there's little point in amalgamating writes */
+        pixst   , 4, 0, DST
+40:
+.endm
+
+.macro over_n_8888_8888_ca_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ .rept (numbytes / 4) - 1
+        over_n_8888_8888_ca_1pixel_head
+        over_n_8888_8888_ca_1pixel_tail
+ .endr
+        over_n_8888_8888_ca_1pixel_head
+.endm
+
+.macro over_n_8888_8888_ca_process_tail  cond, numbytes, firstreg
+        over_n_8888_8888_ca_1pixel_tail
+.endm
+
+pixman_asm_function pixman_composite_over_n_8888_8888_ca_asm_armv6
+        ldr     ip, [sp]
+        cmp     ip, #-1
+        beq     pixman_composite_over_white_8888_8888_ca_asm_armv6
+        /* else drop through... */
+ .endfunc
+generate_composite_function \
+    pixman_composite_over_n_8888_8888_ca_asm_armv6_helper, 0, 32, 32 \
+    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_PROCESS_CORRUPTS_WK0 \
+    2, /* prefetch distance */ \
+    over_n_8888_8888_ca_init, \
+    nop_macro, /* newline */ \
+    over_n_8888_8888_ca_cleanup, \
+    over_n_8888_8888_ca_process_head, \
+    over_n_8888_8888_ca_process_tail
+
+/******************************************************************************/
+
+.macro in_reverse_8888_8888_init
+        /* Hold loop invariant in MASK */
+        ldr     MASK, =0x00800080
+        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+        uadd8   SCRATCH, MASK, MASK
+        /* Offset the source pointer: we only need the alpha bytes */
+        add     SRC, SRC, #3
+        line_saved_regs  ORIG_W
+.endm
+
+.macro in_reverse_8888_8888_head  numbytes, reg1, reg2, reg3
+        ldrb    ORIG_W, [SRC], #4
+ .if numbytes >= 8
+        ldrb    WK&reg1, [SRC], #4
+  .if numbytes == 16
+        ldrb    WK&reg2, [SRC], #4
+        ldrb    WK&reg3, [SRC], #4
+  .endif
+ .endif
+        add     DST, DST, #numbytes
+.endm
+
+.macro in_reverse_8888_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+        in_reverse_8888_8888_head  numbytes, firstreg, %(firstreg+1), %(firstreg+2)
+.endm
+
+.macro in_reverse_8888_8888_1pixel  s, d, offset, is_only
+ .if is_only != 1
+        movs    s, ORIG_W
+  .if offset != 0
+        ldrb    ORIG_W, [SRC, #offset]
+  .endif
+        beq     01f
+        teq     STRIDE_M, #0xFF
+        beq     02f
+ .endif
+        uxtb16  SCRATCH, d                 /* rb_dest */
+        uxtb16  d, d, ror #8               /* ag_dest */
+        mla     SCRATCH, SCRATCH, s, MASK
+        mla     d, d, s, MASK
+        uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
+        uxtab16 d, d, d, ror #8
+        mov     SCRATCH, SCRATCH, ror #8
+        sel     d, SCRATCH, d
+        b       02f
+ .if offset == 0
+48:     /* Last mov d,#0 of the set - used as part of shortcut for
+         * source values all 0 */
+ .endif
+01:     mov     d, #0
+02:
+.endm
+
+.macro in_reverse_8888_8888_tail  numbytes, reg1, reg2, reg3, reg4
+ .if numbytes == 4
+        teq     ORIG_W, ORIG_W, asr #32
+        ldrne   WK&reg1, [DST, #-4]
+ .elseif numbytes == 8
+        teq     ORIG_W, WK&reg1
+        teqeq   ORIG_W, ORIG_W, asr #32  /* all 0 or all -1? */
+        ldmnedb DST, {WK&reg1-WK&reg2}
+ .else
+        teq     ORIG_W, WK&reg1
+        teqeq   ORIG_W, WK&reg2
+        teqeq   ORIG_W, WK&reg3
+        teqeq   ORIG_W, ORIG_W, asr #32  /* all 0 or all -1? */
+        ldmnedb DST, {WK&reg1-WK&reg4}
+ .endif
+        cmnne   DST, #0   /* clear C if NE */
+        bcs     49f       /* no writes to dest if source all -1 */
+        beq     48f       /* set dest to all 0 if source all 0 */
+ .if numbytes == 4
+        in_reverse_8888_8888_1pixel  ORIG_W, WK&reg1, 0, 1
+        str     WK&reg1, [DST, #-4]
+ .elseif numbytes == 8
+        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg1, -4, 0
+        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg2, 0, 0
+        stmdb   DST, {WK&reg1-WK&reg2}
+ .else
+        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg1, -12, 0
+        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg2, -8, 0
+        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg3, -4, 0
+        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg4, 0, 0
+        stmdb   DST, {WK&reg1-WK&reg4}
+ .endif
+49:
+.endm
+
+.macro in_reverse_8888_8888_process_tail  cond, numbytes, firstreg
+        in_reverse_8888_8888_tail  numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
+.endm
+
+generate_composite_function \
+    pixman_composite_in_reverse_8888_8888_asm_armv6, 32, 0, 32 \
+    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_NO_PRELOAD_DST \
+    2, /* prefetch distance */ \
+    in_reverse_8888_8888_init, \
+    nop_macro, /* newline */ \
+    nop_macro, /* cleanup */ \
+    in_reverse_8888_8888_process_head, \
+    in_reverse_8888_8888_process_tail
+
+/******************************************************************************/
+
+.macro over_n_8888_init
+        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
+        /* Hold loop invariant in MASK */
+        ldr     MASK, =0x00800080
+        /* Hold multiplier for destination in STRIDE_M */
+        mov     STRIDE_M, #255
+        sub     STRIDE_M, STRIDE_M, SRC, lsr #24
+        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+        uadd8   SCRATCH, MASK, MASK
+.endm
+
+.macro over_n_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+        pixld   , numbytes, firstreg, DST, 0
+.endm
+
+.macro over_n_8888_1pixel dst
+        mul_8888_8  WK&dst, STRIDE_M, SCRATCH, MASK
+        uqadd8  WK&dst, WK&dst, SRC
+.endm
+
+.macro over_n_8888_process_tail  cond, numbytes, firstreg
+ .set PROCESS_REG, firstreg
+ .rept numbytes / 4
+        over_n_8888_1pixel %(PROCESS_REG)
+  .set PROCESS_REG, PROCESS_REG+1
+ .endr
+        pixst   , numbytes, firstreg, DST
+.endm
+
+generate_composite_function \
+    pixman_composite_over_n_8888_asm_armv6, 0, 0, 32 \
+    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE \
+    2, /* prefetch distance */ \
+    over_n_8888_init, \
+    nop_macro, /* newline */ \
+    nop_macro, /* cleanup */ \
+    over_n_8888_process_head, \
+    over_n_8888_process_tail
+
+/******************************************************************************/
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-simd-asm.h b/source/libs/pixman/pixman-src/pixman/pixman-arm-simd-asm.h
old mode 100755
new mode 100644
similarity index 90%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-simd-asm.h
rename to source/libs/pixman/pixman-src/pixman/pixman-arm-simd-asm.h
index 65436062b01e90ba271dea3436402220011af895..da153c3f58571d06f787cfa9af70e035f3e9c1d7
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-simd-asm.h
+++ b/source/libs/pixman/pixman-src/pixman/pixman-arm-simd-asm.h
@@ -76,6 +76,16 @@
 .set FLAG_SPILL_LINE_VARS,           48
 .set FLAG_PROCESS_CORRUPTS_SCRATCH,  0
 .set FLAG_PROCESS_PRESERVES_SCRATCH, 64
+.set FLAG_PROCESS_PRESERVES_WK0,     0
+.set FLAG_PROCESS_CORRUPTS_WK0,      128 /* if possible, use the specified register(s) instead so WK0 can hold number of leading pixels */
+.set FLAG_PRELOAD_DST,               0
+.set FLAG_NO_PRELOAD_DST,            256
+
+/*
+ * Number of bytes by which to adjust preload offset of destination
+ * buffer (allows preload instruction to be moved before the load(s))
+ */
+.set DST_PRELOAD_BIAS, 0
 
 /*
  * Offset into stack where mask and source pointer/stride can be accessed.
@@ -86,6 +96,11 @@
 .set ARGS_STACK_OFFSET,        (9*4)
 #endif
 
+/*
+ * Offset into stack where space allocated during init macro can be accessed.
+ */
+.set LOCALS_STACK_OFFSET,     0
+
 /*
  * Constants for selecting preferable prefetch type.
  */
@@ -196,8 +211,8 @@
         PF  add,    SCRATCH, base, WK0, lsl #bpp_shift-dst_bpp_shift
         PF  and,    SCRATCH, SCRATCH, #31
         PF  rsb,    SCRATCH, SCRATCH, WK0, lsl #bpp_shift-dst_bpp_shift
-        PF  sub,    SCRATCH, SCRATCH, #1    /* so now ranges are -16..-1 / 0..31 / 32..63 */
-        PF  movs,   SCRATCH, SCRATCH, #32-6 /* so this sets         NC   /  nc   /   Nc   */
+        PF  sub,    SCRATCH, SCRATCH, #1        /* so now ranges are -16..-1 / 0..31 / 32..63 */
+        PF  movs,   SCRATCH, SCRATCH, lsl #32-6 /* so this sets         NC   /  nc   /   Nc   */
         PF  bcs,    61f
         PF  bpl,    60f
         PF  pld,    [ptr, #32*(prefetch_distance+2)]
@@ -359,23 +374,41 @@
 
 
 .macro test_bits_1_0_ptr
+ .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
+        movs    SCRATCH, X, lsl #32-1  /* C,N = bits 1,0 of DST */
+ .else
         movs    SCRATCH, WK0, lsl #32-1  /* C,N = bits 1,0 of DST */
+ .endif
 .endm
 
 .macro test_bits_3_2_ptr
+ .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
+        movs    SCRATCH, X, lsl #32-3  /* C,N = bits 3, 2 of DST */
+ .else
         movs    SCRATCH, WK0, lsl #32-3  /* C,N = bits 3, 2 of DST */
+ .endif
 .endm
 
 .macro leading_15bytes  process_head, process_tail
         /* On entry, WK0 bits 0-3 = number of bytes until destination is 16-byte aligned */
+ .set DECREMENT_X, 1
+ .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
+  .set DECREMENT_X, 0
+        sub     X, X, WK0, lsr #dst_bpp_shift
+        str     X, [sp, #LINE_SAVED_REG_COUNT*4]
+        mov     X, WK0
+ .endif
         /* Use unaligned loads in all cases for simplicity */
  .if dst_w_bpp == 8
-        conditional_process2  test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, 1
+        conditional_process2  test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X
  .elseif dst_w_bpp == 16
         test_bits_1_0_ptr
-        conditional_process1  cs, process_head, process_tail, 2, 2, 1, 1, 1
+        conditional_process1  cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X
+ .endif
+        conditional_process2  test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X
+ .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
+        ldr     X, [sp, #LINE_SAVED_REG_COUNT*4]
  .endif
-        conditional_process2  test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, 1
 .endm
 
 .macro test_bits_3_2_pix
@@ -414,7 +447,7 @@
         preload_middle  src_bpp, SRC, 0
         preload_middle  mask_bpp, MASK, 0
   .endif
-  .if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0)
+  .if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0) && (((flags) & FLAG_NO_PRELOAD_DST) == 0)
         /* Because we know that writes are 16-byte aligned, it's relatively easy to ensure that
          * destination prefetches are 32-byte aligned. It's also the easiest channel to offset
          * preloads for, to achieve staggered prefetches for multiple channels, because there are
@@ -437,11 +470,11 @@
  .if dst_r_bpp > 0
         tst     DST, #16
         bne     111f
-        process_inner_loop  process_head, process_tail, unaligned_src, unaligned_mask, 16
+        process_inner_loop  process_head, process_tail, unaligned_src, unaligned_mask, 16 + DST_PRELOAD_BIAS
         b       112f
 111:
  .endif
-        process_inner_loop  process_head, process_tail, unaligned_src, unaligned_mask, 0
+        process_inner_loop  process_head, process_tail, unaligned_src, unaligned_mask, 0 + DST_PRELOAD_BIAS
 112:
         /* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */
  .if (src_bpp*pix_per_block > 256) || (mask_bpp*pix_per_block > 256) || (dst_r_bpp*pix_per_block > 256)
@@ -449,7 +482,9 @@
  .endif
         preload_trailing  src_bpp, src_bpp_shift, SRC
         preload_trailing  mask_bpp, mask_bpp_shift, MASK
+ .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
         preload_trailing  dst_r_bpp, dst_bpp_shift, DST
+ .endif
         add     X, X, #(prefetch_distance+2)*pix_per_block - 128/dst_w_bpp
         /* The remainder of the line is handled identically to the medium case */
         medium_case_inner_loop_and_trailing_pixels  process_head, process_tail,, exit_label, unaligned_src, unaligned_mask
@@ -561,13 +596,7 @@
                                    process_tail, \
                                    process_inner_loop
 
- .func fname
- .global fname
- /* For ELF format also set function visibility to hidden */
-#ifdef __ELF__
- .hidden fname
- .type fname, %function
-#endif
+    pixman_asm_function fname
 
 /*
  * Make some macro arguments globally visible and accessible
@@ -679,7 +708,6 @@
     SCRATCH     .req    r12
     ORIG_W      .req    r14 /* width (pixels) */
 
-fname:
         push    {r4-r11, lr}        /* save all registers */
 
         subs    Y, Y, #1
@@ -705,6 +733,13 @@ fname:
 #endif
 
         init
+
+ .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
+        /* Reserve a word in which to store X during leading pixels */
+        sub     sp, sp, #4
+  .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET+4
+  .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET+4
+ .endif
         
         lsl     STRIDE_D, #dst_bpp_shift /* stride in bytes */
         sub     STRIDE_D, STRIDE_D, X, lsl #dst_bpp_shift
@@ -734,42 +769,49 @@ fname:
   .if (flags) & FLAG_SPILL_LINE_VARS_WIDE
         /* This is stmdb sp!,{} */
         .word   0xE92D0000 | LINE_SAVED_REGS
+   .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
+   .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
   .endif
 151:    /* New line */
         newline
         preload_leading_step1  src_bpp, WK1, SRC
         preload_leading_step1  mask_bpp, WK2, MASK
+  .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
         preload_leading_step1  dst_r_bpp, WK3, DST
+  .endif
         
-        tst     DST, #15
+        ands    WK0, DST, #15
         beq     154f
-        rsb     WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */
-  .if (src_bpp != 0 && src_bpp != 2*dst_w_bpp) || (mask_bpp != 0 && mask_bpp != 2*dst_w_bpp)
-        PF  and,    WK0, WK0, #15
-  .endif
+        rsb     WK0, WK0, #16 /* number of leading bytes until destination aligned */
 
         preload_leading_step2  src_bpp, src_bpp_shift, WK1, SRC
         preload_leading_step2  mask_bpp, mask_bpp_shift, WK2, MASK
+  .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
         preload_leading_step2  dst_r_bpp, dst_bpp_shift, WK3, DST
+  .endif
 
         leading_15bytes  process_head, process_tail
         
 154:    /* Destination now 16-byte aligned; we have at least one prefetch on each channel as well as at least one 16-byte output block */
- .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
+  .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
         and     SCRATCH, SRC, #31
         rsb     SCRATCH, SCRATCH, #32*prefetch_distance
- .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
+  .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
         and     SCRATCH, MASK, #31
         rsb     SCRATCH, SCRATCH, #32*prefetch_distance
- .endif
- .ifc "process_inner_loop",""
+  .endif
+  .ifc "process_inner_loop",""
         switch_on_alignment  wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, wide_case_inner_loop, 157f
- .else
+  .else
         switch_on_alignment  wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, process_inner_loop, 157f
- .endif
+  .endif
 
 157:    /* Check for another line */
         end_of_line 1, %((flags) & FLAG_SPILL_LINE_VARS_WIDE), 151b
+  .if (flags) & FLAG_SPILL_LINE_VARS_WIDE
+   .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
+   .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
+  .endif
  .endif
 
  .ltorg
@@ -779,17 +821,21 @@ fname:
  .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE
         /* This is stmdb sp!,{} */
         .word   0xE92D0000 | LINE_SAVED_REGS
+  .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
+  .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
  .endif
 161:    /* New line */
         newline
         preload_line 0, src_bpp, src_bpp_shift, SRC  /* in: X, corrupts: WK0-WK1 */
         preload_line 0, mask_bpp, mask_bpp_shift, MASK
+ .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
         preload_line 0, dst_r_bpp, dst_bpp_shift, DST
+ .endif
         
         sub     X, X, #128/dst_w_bpp     /* simplifies inner loop termination */
-        tst     DST, #15
+        ands    WK0, DST, #15
         beq     164f
-        rsb     WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */
+        rsb     WK0, WK0, #16 /* number of leading bytes until destination aligned */
         
         leading_15bytes  process_head, process_tail
         
@@ -813,7 +859,9 @@ fname:
         newline
         preload_line 1, src_bpp, src_bpp_shift, SRC  /* in: X, corrupts: WK0-WK1 */
         preload_line 1, mask_bpp, mask_bpp_shift, MASK
+ .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
         preload_line 1, dst_r_bpp, dst_bpp_shift, DST
+ .endif
         
  .if dst_w_bpp == 8
         tst     DST, #3
@@ -844,12 +892,22 @@ fname:
 
 177:    /* Check for another line */
         end_of_line %(dst_w_bpp < 32), %((flags) & FLAG_SPILL_LINE_VARS_NON_WIDE), 171b, last_one
+ .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE
+  .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
+  .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
+ .endif
 
 197:
  .if (flags) & FLAG_SPILL_LINE_VARS
         add     sp, sp, #LINE_SAVED_REG_COUNT*4
  .endif
 198:
+ .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
+  .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET-4
+  .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET-4
+        add     sp, sp, #4
+ .endif
+
         cleanup
 
 #ifdef DEBUG_PARAMS
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-simd.c b/source/libs/pixman/pixman-src/pixman/pixman-arm-simd.c
old mode 100755
new mode 100644
similarity index 79%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-simd.c
rename to source/libs/pixman/pixman-src/pixman/pixman-arm-simd.c
index af062e19dcb3711167cfefe57d9dcca447f60bc3..f0d14540bcf098faa520acf27358709d6c46f7e0
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm-simd.c
+++ b/source/libs/pixman/pixman-src/pixman/pixman-arm-simd.c
@@ -41,11 +41,20 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8_8,
                                    uint8_t, 1, uint8_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_8888,
                                    uint16_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_x888_0565,
+                                   uint32_t, 1, uint16_t, 1)
 
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
                                    uint8_t, 1, uint8_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
                                    uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, in_reverse_8888_8888,
+                                   uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_8888,
+                                 uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888,
+                                 uint32_t, 1)
 
 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
                                      uint32_t, 1, uint32_t, 1)
@@ -53,6 +62,9 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888,
                                       uint8_t, 1, uint32_t, 1)
 
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8888_8888_ca,
+                                      uint32_t, 1, uint32_t, 1)
+
 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC,
                                         uint16_t, uint16_t)
 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC,
@@ -216,6 +228,11 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
     PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, armv6_composite_src_0565_8888),
     PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, armv6_composite_src_0565_8888),
 
+    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, armv6_composite_src_x888_0565),
+    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, armv6_composite_src_x888_0565),
+    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, armv6_composite_src_x888_0565),
+    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, armv6_composite_src_x888_0565),
+
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888),
@@ -225,6 +242,13 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
 
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, armv6_composite_over_reverse_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, armv6_composite_over_reverse_n_8888),
+
     PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8),
 
     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
@@ -232,15 +256,25 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888),
 
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),
+    PIXMAN_STD_FAST_PATH (IN_REVERSE, a8r8g8b8, null, a8r8g8b8, armv6_composite_in_reverse_8888_8888),
+    PIXMAN_STD_FAST_PATH (IN_REVERSE, a8r8g8b8, null, x8r8g8b8, armv6_composite_in_reverse_8888_8888),
+    PIXMAN_STD_FAST_PATH (IN_REVERSE, a8b8g8r8, null, a8b8g8r8, armv6_composite_in_reverse_8888_8888),
+    PIXMAN_STD_FAST_PATH (IN_REVERSE, a8b8g8r8, null, x8b8g8r8, armv6_composite_in_reverse_8888_8888),
+
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, armv6_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, armv6_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, armv6_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, armv6_composite_over_n_8888_8888_ca),
+
+    SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
+    SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),
 
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888),
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888),
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888),
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888),
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888),
-    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
 
     { PIXMAN_OP_NONE },
 };
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-arm.c b/source/libs/pixman/pixman-src/pixman/pixman-arm.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-arm.c
rename to source/libs/pixman/pixman-src/pixman/pixman-arm.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-bits-image.c b/source/libs/pixman/pixman-src/pixman/pixman-bits-image.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-bits-image.c
rename to source/libs/pixman/pixman-src/pixman/pixman-bits-image.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-combine-float.c b/source/libs/pixman/pixman-src/pixman/pixman-combine-float.c
old mode 100755
new mode 100644
similarity index 80%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-combine-float.c
rename to source/libs/pixman/pixman-src/pixman/pixman-combine-float.c
index 5ea739f766caf5703e19ffe2c163d7f4e7e5d288..f5145bc9d78383f34ed2e0576b579c9598e196d9
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-combine-float.c
+++ b/source/libs/pixman/pixman-src/pixman/pixman-combine-float.c
@@ -319,23 +319,44 @@ MAKE_PD_COMBINERS (conjoint_xor,		ONE_MINUS_DA_OVER_SA,		ONE_MINUS_SA_OVER_DA)
  *
  * The following blend modes have been taken from the PDF ISO 32000
  * specification, which at this point in time is available from
- * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
- * The relevant chapters are 11.3.5 and 11.3.6.
+ *
+ *     http://www.adobe.com/devnet/pdf/pdf_reference.html
+ *
+ * The specific documents of interest are the PDF spec itself:
+ *
+ *     http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf
+ *
+ * chapters 11.3.5 and 11.3.6 and a later supplement for Adobe Acrobat
+ * 9.1 and Reader 9.1:
+ *
+ *     http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000_1.pdf
+ *
+ * that clarifies the specifications for blend modes ColorDodge and
+ * ColorBurn.
+ *
  * The formula for computing the final pixel color given in 11.3.6 is:
- * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
- * with B() being the blend function.
- * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
- *
- * These blend modes should match the SVG filter draft specification, as
- * it has been designed to mirror ISO 32000. Note that at the current point
- * no released draft exists that shows this, as the formulas have not been
- * updated yet after the release of ISO 32000.
- *
- * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
- * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
- * argument. Note that this implementation operates on premultiplied colors,
- * while the PDF specification does not. Therefore the code uses the formula
- * ar.Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as)
+ *
+ *     αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
+ *
+ * with B() is the blend function. When B(Cb, Cs) = Cs, this formula
+ * reduces to the regular OVER operator.
+ *
+ * Cs and Cb are not premultiplied, so in our implementation we instead
+ * use:
+ *
+ *     cr = (1 – αs) × cb  +  (1 – αb) × cs  +  αb × αs × B (cb/αb, cs/αs)
+ *
+ * where cr, cs, and cb are premultiplied colors, and where the
+ *
+ *     αb × αs × B(cb/αb, cs/αs)
+ *
+ * part is first arithmetically simplified under the assumption that αb
+ * and αs are not 0, and then updated to produce a meaningful result when
+ * they are.
+ *
+ * For all the blend mode operators, the alpha channel is given by
+ *
+ *     αr = αs + αb + αb × αs
  */
 
 #define MAKE_SEPARABLE_PDF_COMBINERS(name)				\
@@ -355,18 +376,55 @@ MAKE_PD_COMBINERS (conjoint_xor,		ONE_MINUS_DA_OVER_SA,		ONE_MINUS_SA_OVER_DA)
     									\
     MAKE_COMBINERS (name, combine_ ## name ## _a, combine_ ## name ## _c)
 
+/*
+ * Multiply
+ *
+ *      ad * as * B(d / ad, s / as)
+ *    = ad * as * d/ad * s/as
+ *    = d * s
+ *
+ */
 static force_inline float
 blend_multiply (float sa, float s, float da, float d)
 {
     return d * s;
 }
 
+/*
+ * Screen
+ *
+ *      ad * as * B(d/ad, s/as)
+ *    = ad * as * (d/ad + s/as - s/as * d/ad)
+ *    = ad * s + as * d - s * d
+ */
 static force_inline float
 blend_screen (float sa, float s, float da, float d)
 {
     return d * sa + s * da - s * d;
 }
 
+/*
+ * Overlay
+ *
+ *     ad * as * B(d/ad, s/as)
+ *   = ad * as * Hardlight (s, d)
+ *   = if (d / ad < 0.5)
+ *         as * ad * Multiply (s/as, 2 * d/ad)
+ *     else
+ *         as * ad * Screen (s/as, 2 * d / ad - 1)
+ *   = if (d < 0.5 * ad)
+ *         as * ad * s/as * 2 * d /ad
+ *     else
+ *         as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1))
+ *   = if (2 * d < ad)
+ *         2 * s * d
+ *     else
+ *         ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1)
+ *   = if (2 * d < ad)
+ *         2 * s * d
+ *     else
+ *         as * ad - 2 * (ad - d) * (as - s)
+ */
 static force_inline float
 blend_overlay (float sa, float s, float da, float d)
 {
@@ -376,6 +434,13 @@ blend_overlay (float sa, float s, float da, float d)
 	return sa * da - 2 * (da - d) * (sa - s);
 }
 
+/*
+ * Darken
+ *
+ *     ad * as * B(d/ad, s/as)
+ *   = ad * as * MIN(d/ad, s/as)
+ *   = MIN (as * d, ad * s)
+ */
 static force_inline float
 blend_darken (float sa, float s, float da, float d)
 {
@@ -388,6 +453,13 @@ blend_darken (float sa, float s, float da, float d)
 	return s;
 }
 
+/*
+ * Lighten
+ *
+ *     ad * as * B(d/ad, s/as)
+ *   = ad * as * MAX(d/ad, s/as)
+ *   = MAX (as * d, ad * s)
+ */
 static force_inline float
 blend_lighten (float sa, float s, float da, float d)
 {
@@ -400,6 +472,24 @@ blend_lighten (float sa, float s, float da, float d)
 	return d;
 }
 
+/*
+ * Color dodge
+ *
+ *     ad * as * B(d/ad, s/as)
+ *   = if d/ad = 0
+ *         ad * as * 0
+ *     else if (d/ad >= (1 - s/as)
+ *         ad * as * 1
+ *     else
+ *         ad * as * ((d/ad) / (1 - s/as))
+ *   = if d = 0
+ *         0
+ *     elif as * d >= ad * (as - s)
+ *         ad * as
+ *     else
+ *         as * (as * d / (as - s))
+ *
+ */
 static force_inline float
 blend_color_dodge (float sa, float s, float da, float d)
 {
@@ -413,6 +503,26 @@ blend_color_dodge (float sa, float s, float da, float d)
 	return sa * sa * d / (sa - s);
 }
 
+/*
+ * Color burn
+ *
+ * We modify the first clause "if d = 1" to "if d >= 1" since with
+ * premultiplied colors d > 1 can actually happen.
+ *
+ *     ad * as * B(d/ad, s/as)
+ *   = if d/ad >= 1
+ *         ad * as * 1
+ *     elif (1 - d/ad) >= s/as
+ *         ad * as * 0
+ *     else
+ *         ad * as * (1 - ((1 - d/ad) / (s/as)))
+ *   = if d >= ad
+ *         ad * as
+ *     elif as * ad - as * d >= ad * s
+ *         0
+ *     else
+ *         ad * as  - as * as * (ad - d) / s
+ */
 static force_inline float
 blend_color_burn (float sa, float s, float da, float d)
 {
@@ -426,6 +536,23 @@ blend_color_burn (float sa, float s, float da, float d)
 	return sa * (da - sa * (da - d) / s);
 }
 
+/*
+ * Hard light
+ *
+ *     ad * as * B(d/ad, s/as)
+ *   = if (s/as <= 0.5)
+ *         ad * as * Multiply (d/ad, 2 * s/as)
+ *     else
+ *         ad * as * Screen (d/ad, 2 * s/as - 1)
+ *   = if 2 * s <= as
+ *         ad * as * d/ad * 2 * s / as
+ *     else
+ *         ad * as * (d/ad + (2 * s/as - 1) + d/ad * (2 * s/as - 1))
+ *   = if 2 * s <= as
+ *         2 * s * d
+ *     else
+ *         as * ad - 2 * (ad - d) * (as - s)
+ */
 static force_inline float
 blend_hard_light (float sa, float s, float da, float d)
 {
@@ -435,10 +562,27 @@ blend_hard_light (float sa, float s, float da, float d)
 	return sa * da - 2 * (da - d) * (sa - s);
 }
 
+/*
+ * Soft light
+ *
+ *     ad * as * B(d/ad, s/as)
+ *   = if (s/as <= 0.5)
+ *         ad * as * (d/ad - (1 - 2 * s/as) * d/ad * (1 - d/ad))
+ *     else if (d/ad <= 0.25)
+ *         ad * as * (d/ad + (2 * s/as - 1) * ((((16 * d/ad - 12) * d/ad + 4) * d/ad) - d/ad))
+ *     else
+ *         ad * as * (d/ad + (2 * s/as - 1) * sqrt (d/ad))
+ *   = if (2 * s <= as)
+ *         d * as - d * (ad - d) * (as - 2 * s) / ad;
+ *     else if (4 * d <= ad)
+ *         (2 * s - as) * d * ((16 * d / ad - 12) * d / ad + 3);
+ *     else
+ *         d * as + (sqrt (d * ad) - d) * (2 * s - as);
+ */
 static force_inline float
 blend_soft_light (float sa, float s, float da, float d)
 {
-    if (2 * s < sa)
+    if (2 * s <= sa)
     {
 	if (FLOAT_IS_ZERO (da))
 	    return d * sa;
@@ -449,7 +593,7 @@ blend_soft_light (float sa, float s, float da, float d)
     {
 	if (FLOAT_IS_ZERO (da))
 	{
-	    return 0.0f;
+	    return d * sa;
 	}
 	else
 	{
@@ -461,6 +605,20 @@ blend_soft_light (float sa, float s, float da, float d)
     }
 }
 
+/*
+ * Difference
+ *
+ *     ad * as * B(s/as, d/ad)
+ *   = ad * as * abs (s/as - d/ad)
+ *   = if (s/as <= d/ad)
+ *         ad * as * (d/ad - s/as)
+ *     else
+ *         ad * as * (s/as - d/ad)
+ *   = if (ad * s <= as * d)
+ *        as * d - ad * s
+ *     else
+ *        ad * s - as * d
+ */
 static force_inline float
 blend_difference (float sa, float s, float da, float d)
 {
@@ -473,6 +631,13 @@ blend_difference (float sa, float s, float da, float d)
 	return sda - dsa;
 }
 
+/*
+ * Exclusion
+ *
+ *     ad * as * B(s/as, d/ad)
+ *   = ad * as * (d/ad + s/as - 2 * d/ad * s/as)
+ *   = as * d + ad * s - 2 * s * d
+ */
 static force_inline float
 blend_exclusion (float sa, float s, float da, float d)
 {
@@ -492,116 +657,79 @@ MAKE_SEPARABLE_PDF_COMBINERS (difference)
 MAKE_SEPARABLE_PDF_COMBINERS (exclusion)
 
 /*
- * PDF nonseperable blend modes.
- *
- * These are implemented using the following functions to operate in Hsl
- * space, with Cmax, Cmid, Cmin referring to the max, mid and min value
- * of the red, green and blue components.
+ * PDF nonseperable blend modes are implemented using the following functions
+ * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid
+ * and min value of the red, green and blue components.
  *
  * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue
  *
  * clip_color (C):
- *   l = LUM (C)
- *   min = Cmin
- *   max = Cmax
- *   if n < 0.0
- *     C = l + (((C – l) × l) ⁄     (l – min))
- *   if x > 1.0
- *     C = l + (((C – l) × (1 – l)) (max – l))
- *   return C
+ *     l = LUM (C)
+ *     min = Cmin
+ *     max = Cmax
+ *     if n < 0.0
+ *         C = l + (((C – l) × l) ⁄ (l – min))
+ *     if x > 1.0
+ *         C = l + (((C – l) × (1 – l) ) ⁄ (max – l))
+ *     return C
  *
  * set_lum (C, l):
- *   d = l – LUM (C)
- *   C += d
- *   return clip_color (C)
+ *     d = l – LUM (C)
+ *     C += d
+ *     return clip_color (C)
  *
  * SAT (C) = CH_MAX (C) - CH_MIN (C)
  *
  * set_sat (C, s):
- *  if Cmax > Cmin
- *    Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) )
- *    Cmax = s
- *  else
- *    Cmid = Cmax = 0.0
- *  Cmin = 0.0
- *  return C
+ *     if Cmax > Cmin
+ *         Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) )
+ *         Cmax = s
+ *     else
+ *         Cmid = Cmax = 0.0
+ *         Cmin = 0.0
+ *     return C
  */
 
 /* For premultiplied colors, we need to know what happens when C is
  * multiplied by a real number. LUM and SAT are linear:
  *
- *    LUM (r × C) = r × LUM (C)		SAT (r × C) = r × SAT (C)
+ *     LUM (r × C) = r × LUM (C)	SAT (r * C) = r * SAT (C)
  *
  * If we extend clip_color with an extra argument a and change
  *
- *        if x >= 1.0
+ *     if x >= 1.0
  *
  * into
  *
- *        if x >= a
+ *     if x >= a
  *
  * then clip_color is also linear:
  *
- *     r * clip_color (C, a) = clip_color (r_c, ra);
+ *     r * clip_color (C, a) = clip_color (r * C, r * a);
  *
  * for positive r.
  *
  * Similarly, we can extend set_lum with an extra argument that is just passed
  * on to clip_color:
  *
- *     r × set_lum ( C, l, a)
+ *       r * set_lum (C, l, a)
  *
- *   = r × clip_color ( C + l - LUM (C), a)
+ *     = r × clip_color (C + l - LUM (C), a)
  *
- *   = clip_color ( r * C + r × l - LUM (r × C), r * a)
+ *     = clip_color (r * C + r × l - r * LUM (C), r * a)
  *
- *   = set_lum ( r * C, r * l, r * a)
+ *     = set_lum (r * C, r * l, r * a)
  *
  * Finally, set_sat:
  *
- *     r * set_sat (C, s) = set_sat (x * C, r * s)
+ *       r * set_sat (C, s) = set_sat (x * C, r * s)
  *
- * The above holds for all non-zero x because they x'es in the fraction for
+ * The above holds for all non-zero x, because the x'es in the fraction for
  * C_mid cancel out. Specifically, it holds for x = r:
  *
- *     r * set_sat (C, s) = set_sat (r_c, rs)
- *
- *
- *
- *
- * So, for the non-separable PDF blend modes, we have (using s, d for
- * non-premultiplied colors, and S, D for premultiplied:
- *
- *   Color:
- *
- *     a_s * a_d * B(s, d)
- *   = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1)
- *   = set_lum (S * a_d, a_s * LUM (D), a_s * a_d)
- *
- *
- *   Luminosity:
- *
- *     a_s * a_d * B(s, d)
- *   = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1)
- *   = set_lum (a_s * D, a_d * LUM(S), a_s * a_d)
- *
- *
- *   Saturation:
- *
- *     a_s * a_d * B(s, d)
- *   = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1)
- *   = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)),
- *                                        a_s * LUM (D), a_s * a_d)
- *   = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d))
- *
- *   Hue:
- *
- *     a_s * a_d * B(s, d)
- *   = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
- *   = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
+ *       r * set_sat (C, s) = set_sat (r * C, r * s)
  *
  */
-
 typedef struct
 {
     float	r;
@@ -769,9 +897,12 @@ set_sat (rgb_t *src, float sat)
     *min = 0.0f;
 }
 
-/*
- * Hue:
- * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb))
+/* Hue:
+ *
+ *       as * ad * B(s/as, d/as)
+ *     = as * ad * set_lum (set_sat (s/as, SAT (d/ad)), LUM (d/ad), 1)
+ *     = set_lum (set_sat (ad * s, as * SAT (d)), as * LUM (d), as * ad)
+ *
  */
 static force_inline void
 blend_hsl_hue (rgb_t *res,
@@ -786,9 +917,14 @@ blend_hsl_hue (rgb_t *res,
     set_lum (res, sa * da, get_lum (dest) * sa);
 }
 
-/*
- * Saturation:
- * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb))
+/* 
+ * Saturation
+ *
+ *     as * ad * B(s/as, d/ad)
+ *   = as * ad * set_lum (set_sat (d/ad, SAT (s/as)), LUM (d/ad), 1)
+ *   = set_lum (as * ad * set_sat (d/ad, SAT (s/as)),
+ *                                       as * LUM (d), as * ad)
+ *   = set_lum (set_sat (as * d, ad * SAT (s), as * LUM (d), as * ad))
  */
 static force_inline void
 blend_hsl_saturation (rgb_t *res,
@@ -803,9 +939,12 @@ blend_hsl_saturation (rgb_t *res,
     set_lum (res, sa * da, get_lum (dest) * sa);
 }
 
-/*
- * Color:
- * B(Cb, Cs) = set_lum (Cs, LUM (Cb))
+/* 
+ * Color
+ *
+ *     as * ad * B(s/as, d/as)
+ *   = as * ad * set_lum (s/as, LUM (d/ad), 1)
+ *   = set_lum (s * ad, as * LUM (d), as * ad)
  */
 static force_inline void
 blend_hsl_color (rgb_t *res,
@@ -820,8 +959,11 @@ blend_hsl_color (rgb_t *res,
 }
 
 /*
- * Luminosity:
- * B(Cb, Cs) = set_lum (Cb, LUM (Cs))
+ * Luminosity
+ *
+ *     as * ad * B(s/as, d/ad)
+ *   = as * ad * set_lum (d/ad, LUM (s/as), 1)
+ *   = set_lum (as * d, ad * LUM (s), as * ad)
  */
 static force_inline void
 blend_hsl_luminosity (rgb_t *res,
diff --git a/source/libs/pixman/pixman-src/pixman/pixman-combine32.c b/source/libs/pixman/pixman-src/pixman/pixman-combine32.c
new file mode 100644
index 0000000000000000000000000000000000000000..4c484d3e38cc1dedbc62cc10c6ec3389626d14a4
--- /dev/null
+++ b/source/libs/pixman/pixman-src/pixman/pixman-combine32.c
@@ -0,0 +1,1189 @@
+/*
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+#include <string.h>
+
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+
+/* component alpha helper functions */
+
+static void
+combine_mask_ca (uint32_t *src, uint32_t *mask)
+{
+    uint32_t a = *mask;
+
+    uint32_t x;
+    uint16_t xa;
+
+    if (!a)
+    {
+	*(src) = 0;
+	return;
+    }
+
+    x = *(src);
+    if (a == ~0)
+    {
+	x = x >> A_SHIFT;
+	x |= x << G_SHIFT;
+	x |= x << R_SHIFT;
+	*(mask) = x;
+	return;
+    }
+
+    xa = x >> A_SHIFT;
+    UN8x4_MUL_UN8x4 (x, a);
+    *(src) = x;
+    
+    UN8x4_MUL_UN8 (a, xa);
+    *(mask) = a;
+}
+
+static void
+combine_mask_value_ca (uint32_t *src, const uint32_t *mask)
+{
+    uint32_t a = *mask;
+    uint32_t x;
+
+    if (!a)
+    {
+	*(src) = 0;
+	return;
+    }
+
+    if (a == ~0)
+	return;
+
+    x = *(src);
+    UN8x4_MUL_UN8x4 (x, a);
+    *(src) = x;
+}
+
+static void
+combine_mask_alpha_ca (const uint32_t *src, uint32_t *mask)
+{
+    uint32_t a = *(mask);
+    uint32_t x;
+
+    if (!a)
+	return;
+
+    x = *(src) >> A_SHIFT;
+    if (x == MASK)
+	return;
+
+    if (a == ~0)
+    {
+	x |= x << G_SHIFT;
+	x |= x << R_SHIFT;
+	*(mask) = x;
+	return;
+    }
+
+    UN8x4_MUL_UN8 (a, x);
+    *(mask) = a;
+}
+
+/*
+ * There are two ways of handling alpha -- either as a single unified value or
+ * a separate value for each component, hence each macro must have two
+ * versions.  The unified alpha version has a 'u' at the end of the name,
+ * the component version has a 'ca'.  Similarly, functions which deal with
+ * this difference will have two versions using the same convention.
+ */
+
+static force_inline uint32_t
+combine_mask (const uint32_t *src, const uint32_t *mask, int i)
+{
+    uint32_t s, m;
+
+    if (mask)
+    {
+	m = *(mask + i) >> A_SHIFT;
+
+	if (!m)
+	    return 0;
+    }
+
+    s = *(src + i);
+
+    if (mask)
+	UN8x4_MUL_UN8 (s, m);
+
+    return s;
+}
+
+static void
+combine_clear (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *               dest,
+               const uint32_t *         src,
+               const uint32_t *         mask,
+               int                      width)
+{
+    memset (dest, 0, width * sizeof (uint32_t));
+}
+
+static void
+combine_dst (pixman_implementation_t *imp,
+	     pixman_op_t	      op,
+	     uint32_t *		      dest,
+	     const uint32_t *	      src,
+	     const uint32_t *         mask,
+	     int		      width)
+{
+    return;
+}
+
+static void
+combine_src_u (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *               dest,
+               const uint32_t *         src,
+               const uint32_t *         mask,
+               int                      width)
+{
+    int i;
+
+    if (!mask)
+    {
+	memcpy (dest, src, width * sizeof (uint32_t));
+    }
+    else
+    {
+	for (i = 0; i < width; ++i)
+	{
+	    uint32_t s = combine_mask (src, mask, i);
+
+	    *(dest + i) = s;
+	}
+    }
+}
+
+static void
+combine_over_u (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *               dest,
+                const uint32_t *         src,
+                const uint32_t *         mask,
+                int                      width)
+{
+    int i;
+
+    if (!mask)
+    {
+	for (i = 0; i < width; ++i)
+	{
+	    uint32_t s = *(src + i);
+	    uint32_t a = ALPHA_8 (s);
+	    if (a == 0xFF)
+	    {
+		*(dest + i) = s;
+	    }
+	    else if (s)
+	    {
+		uint32_t d = *(dest + i);
+		uint32_t ia = a ^ 0xFF;
+		UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+		*(dest + i) = d;
+	    }
+	}
+    }
+    else
+    {
+	for (i = 0; i < width; ++i)
+	{
+	    uint32_t m = ALPHA_8 (*(mask + i));
+	    if (m == 0xFF)
+	    {
+		uint32_t s = *(src + i);
+		uint32_t a = ALPHA_8 (s);
+		if (a == 0xFF)
+		{
+		    *(dest + i) = s;
+		}
+		else if (s)
+		{
+		    uint32_t d = *(dest + i);
+		    uint32_t ia = a ^ 0xFF;
+		    UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+		    *(dest + i) = d;
+		}
+	    }
+	    else if (m)
+	    {
+		uint32_t s = *(src + i);
+		if (s)
+		{
+		    uint32_t d = *(dest + i);
+		    UN8x4_MUL_UN8 (s, m);
+		    UN8x4_MUL_UN8_ADD_UN8x4 (d, ALPHA_8 (~s), s);
+		    *(dest + i) = d;
+		}
+	    }
+	}
+    }
+}
+
+static void
+combine_over_reverse_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *               dest,
+                        const uint32_t *         src,
+                        const uint32_t *         mask,
+                        int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t ia = ALPHA_8 (~*(dest + i));
+	UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_in_u (pixman_implementation_t *imp,
+              pixman_op_t              op,
+              uint32_t *               dest,
+              const uint32_t *         src,
+              const uint32_t *         mask,
+              int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t a = ALPHA_8 (*(dest + i));
+	UN8x4_MUL_UN8 (s, a);
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_in_reverse_u (pixman_implementation_t *imp,
+                      pixman_op_t              op,
+                      uint32_t *               dest,
+                      const uint32_t *         src,
+                      const uint32_t *         mask,
+                      int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t a = ALPHA_8 (s);
+	UN8x4_MUL_UN8 (d, a);
+	*(dest + i) = d;
+    }
+}
+
+static void
+combine_out_u (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *               dest,
+               const uint32_t *         src,
+               const uint32_t *         mask,
+               int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t a = ALPHA_8 (~*(dest + i));
+	UN8x4_MUL_UN8 (s, a);
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_out_reverse_u (pixman_implementation_t *imp,
+                       pixman_op_t              op,
+                       uint32_t *               dest,
+                       const uint32_t *         src,
+                       const uint32_t *         mask,
+                       int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t a = ALPHA_8 (~s);
+	UN8x4_MUL_UN8 (d, a);
+	*(dest + i) = d;
+    }
+}
+
+static void
+combine_atop_u (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *               dest,
+                const uint32_t *         src,
+                const uint32_t *         mask,
+                int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t dest_a = ALPHA_8 (d);
+	uint32_t src_ia = ALPHA_8 (~s);
+
+	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_atop_reverse_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *               dest,
+                        const uint32_t *         src,
+                        const uint32_t *         mask,
+                        int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t src_a = ALPHA_8 (s);
+	uint32_t dest_ia = ALPHA_8 (~d);
+
+	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_xor_u (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *               dest,
+               const uint32_t *         src,
+               const uint32_t *         mask,
+               int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t src_ia = ALPHA_8 (~s);
+	uint32_t dest_ia = ALPHA_8 (~d);
+
+	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_add_u (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *               dest,
+               const uint32_t *         src,
+               const uint32_t *         mask,
+               int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	UN8x4_ADD_UN8x4 (d, s);
+	*(dest + i) = d;
+    }
+}
+
+/*
+ * PDF blend modes:
+ *
+ * The following blend modes have been taken from the PDF ISO 32000
+ * specification, which at this point in time is available from
+ *
+ *     http://www.adobe.com/devnet/pdf/pdf_reference.html
+ *
+ * The specific documents of interest are the PDF spec itself:
+ *
+ *     http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf
+ *
+ * chapters 11.3.5 and 11.3.6 and a later supplement for Adobe Acrobat
+ * 9.1 and Reader 9.1:
+ *
+ *     http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000_1.pdf
+ *
+ * that clarifies the specifications for blend modes ColorDodge and
+ * ColorBurn.
+ *
+ * The formula for computing the final pixel color given in 11.3.6 is:
+ *
+ *     αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
+ *
+ * with B() is the blend function. When B(Cb, Cs) = Cs, this formula
+ * reduces to the regular OVER operator.
+ *
+ * Cs and Cb are not premultiplied, so in our implementation we instead
+ * use:
+ *
+ *     cr = (1 – αs) × cb  +  (1 – αb) × cs  +  αb × αs × B (cb/αb, cs/αs)
+ *
+ * where cr, cs, and cb are premultiplied colors, and where the
+ *
+ *     αb × αs × B(cb/αb, cs/αs)
+ *
+ * part is first arithmetically simplified under the assumption that αb
+ * and αs are not 0, and then updated to produce a meaningful result when
+ * they are.
+ *
+ * For all the blend mode operators, the alpha channel is given by
+ *
+ *     αr = αs + αb + αb × αs
+ */
+
+/*
+ * Multiply
+ *
+ *      ad * as * B(d / ad, s / as)
+ *    = ad * as * d/ad * s/as
+ *    = d * s
+ *
+ */
+static void
+combine_multiply_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = combine_mask (src, mask, i);
+	uint32_t d = *(dest + i);
+	uint32_t ss = s;
+	uint32_t src_ia = ALPHA_8 (~s);
+	uint32_t dest_ia = ALPHA_8 (~d);
+
+	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (ss, dest_ia, d, src_ia);
+	UN8x4_MUL_UN8x4 (d, s);
+	UN8x4_ADD_UN8x4 (d, ss);
+
+	*(dest + i) = d;
+    }
+}
+
+static void
+combine_multiply_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               dest,
+                     const uint32_t *         src,
+                     const uint32_t *         mask,
+                     int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t m = *(mask + i);
+	uint32_t s = *(src + i);
+	uint32_t d = *(dest + i);
+	uint32_t r = d;
+	uint32_t dest_ia = ALPHA_8 (~d);
+
+	combine_mask_ca (&s, &m);
+
+	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (r, ~m, s, dest_ia);
+	UN8x4_MUL_UN8x4 (d, s);
+	UN8x4_ADD_UN8x4 (r, d);
+
+	*(dest + i) = r;
+    }
+}
+
+#define CLAMP(v, low, high)						\
+    do									\
+    {									\
+	if (v < (low))							\
+	    v = (low);							\
+	if (v > (high))							\
+	    v = (high);							\
+    } while (0)
+
+#define PDF_SEPARABLE_BLEND_MODE(name)					\
+    static void								\
+    combine_ ## name ## _u (pixman_implementation_t *imp,		\
+			    pixman_op_t              op,		\
+                            uint32_t *               dest,		\
+			    const uint32_t *         src,		\
+			    const uint32_t *         mask,		\
+			    int                      width)		\
+    {									\
+	int i;								\
+	for (i = 0; i < width; ++i)					\
+	{								\
+	    uint32_t s = combine_mask (src, mask, i);			\
+	    uint32_t d = *(dest + i);					\
+	    uint8_t sa = ALPHA_8 (s);					\
+	    uint8_t isa = ~sa;						\
+	    uint8_t da = ALPHA_8 (d);					\
+	    uint8_t ida = ~da;						\
+	    int32_t ra, rr, rg, rb;					\
+	    								\
+	    ra = da * 0xff + sa * 0xff - sa * da;			\
+	    rr = isa * RED_8 (d) + ida * RED_8 (s);			\
+	    rg = isa * GREEN_8 (d) + ida * GREEN_8 (s);			\
+	    rb = isa * BLUE_8 (d) + ida * BLUE_8 (s);			\
+									\
+	    rr += blend_ ## name (RED_8 (d), da, RED_8 (s), sa);	\
+	    rg += blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), sa);    \
+	    rb += blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), sa);	\
+                                                                        \
+	    CLAMP (ra, 0, 255 * 255);				        \
+	    CLAMP (rr, 0, 255 * 255);				        \
+	    CLAMP (rg, 0, 255 * 255);				        \
+	    CLAMP (rb, 0, 255 * 255);				        \
+									\
+	    ra = DIV_ONE_UN8 (ra);					\
+	    rr = DIV_ONE_UN8 (rr);					\
+	    rg = DIV_ONE_UN8 (rg);					\
+	    rb = DIV_ONE_UN8 (rb);					\
+									\
+	    *(dest + i) = ra << 24 | rr << 16 | rg << 8 | rb;		\
+	}								\
+    }									\
+    									\
+    static void								\
+    combine_ ## name ## _ca (pixman_implementation_t *imp,		\
+			     pixman_op_t              op,		\
+                             uint32_t *               dest,		\
+			     const uint32_t *         src,		\
+			     const uint32_t *         mask,		\
+			     int                      width)		\
+    {									\
+	int i;								\
+	for (i = 0; i < width; ++i)					\
+	{								\
+	    uint32_t m = *(mask + i);					\
+	    uint32_t s = *(src + i);					\
+	    uint32_t d = *(dest + i);					\
+	    uint8_t da = ALPHA_8 (d);					\
+	    uint8_t ida = ~da;						\
+	    int32_t ra, rr, rg, rb;					\
+	    uint8_t ira, iga, iba;					\
+	    								\
+	    combine_mask_ca (&s, &m);					\
+	    								\
+	    ira = ~RED_8 (m);						\
+	    iga = ~GREEN_8 (m);						\
+	    iba = ~BLUE_8 (m);						\
+									\
+	    ra = da * 0xff + ALPHA_8 (s) * 0xff - ALPHA_8 (s) * da;	\
+	    rr = ira * RED_8 (d) + ida * RED_8 (s);			\
+	    rg = iga * GREEN_8 (d) + ida * GREEN_8 (s);			\
+	    rb = iba * BLUE_8 (d) + ida * BLUE_8 (s);			\
+									\
+	    rr += blend_ ## name (RED_8 (d), da, RED_8 (s), RED_8 (m));	\
+	    rg += blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), GREEN_8 (m)); \
+	    rb += blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), BLUE_8 (m)); \
+									\
+	    CLAMP (ra, 0, 255 * 255);				        \
+	    CLAMP (rr, 0, 255 * 255);				        \
+	    CLAMP (rg, 0, 255 * 255);				        \
+	    CLAMP (rb, 0, 255 * 255);				        \
+									\
+	    ra = DIV_ONE_UN8 (ra);					\
+	    rr = DIV_ONE_UN8 (rr);					\
+	    rg = DIV_ONE_UN8 (rg);					\
+	    rb = DIV_ONE_UN8 (rb);					\
+									\
+	    *(dest + i) = ra << 24 | rr << 16 | rg << 8 | rb;		\
+	}								\
+    }
+
+/*
+ * Screen
+ *
+ *      ad * as * B(d/ad, s/as)
+ *    = ad * as * (d/ad + s/as - s/as * d/ad)
+ *    = ad * s + as * d - s * d
+ */
+static inline int32_t
+blend_screen (int32_t d, int32_t ad, int32_t s, int32_t as)
+{
+    return s * ad + d * as - s * d;
+}
+
+PDF_SEPARABLE_BLEND_MODE (screen)
+
+/*
+ * Overlay
+ *
+ *     ad * as * B(d/ad, s/as)
+ *   = ad * as * Hardlight (s, d)
+ *   = if (d / ad < 0.5)
+ *         as * ad * Multiply (s/as, 2 * d/ad)
+ *     else
+ *         as * ad * Screen (s/as, 2 * d / ad - 1)
+ *   = if (d < 0.5 * ad)
+ *         as * ad * s/as * 2 * d /ad
+ *     else
+ *         as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1))
+ *   = if (2 * d < ad)
+ *         2 * s * d
+ *     else
+ *         ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1)
+ *   = if (2 * d < ad)
+ *         2 * s * d
+ *     else
+ *         as * ad - 2 * (ad - d) * (as - s)
+ */
+static inline int32_t
+blend_overlay (int32_t d, int32_t ad, int32_t s, int32_t as)
+{
+    uint32_t r;
+
+    if (2 * d < ad)
+	r = 2 * s * d;
+    else
+	r = as * ad - 2 * (ad - d) * (as - s);
+
+    return r;
+}
+
+PDF_SEPARABLE_BLEND_MODE (overlay)
+
+/*
+ * Darken
+ *
+ *     ad * as * B(d/ad, s/as)
+ *   = ad * as * MIN(d/ad, s/as)
+ *   = MIN (as * d, ad * s)
+ */
+static inline int32_t
+blend_darken (int32_t d, int32_t ad, int32_t s, int32_t as)
+{
+    s = ad * s;
+    d = as * d;
+
+    return s > d ? d : s;
+}
+
+PDF_SEPARABLE_BLEND_MODE (darken)
+
+/*
+ * Lighten
+ *
+ *     ad * as * B(d/ad, s/as)
+ *   = ad * as * MAX(d/ad, s/as)
+ *   = MAX (as * d, ad * s)
+ */
+static inline int32_t
+blend_lighten (int32_t d, int32_t ad, int32_t s, int32_t as)
+{
+    s = ad * s;
+    d = as * d;
+    
+    return s > d ? s : d;
+}
+
+PDF_SEPARABLE_BLEND_MODE (lighten)
+
+/*
+ * Hard light
+ *
+ *     ad * as * B(d/ad, s/as)
+ *   = if (s/as <= 0.5)
+ *         ad * as * Multiply (d/ad, 2 * s/as)
+ *     else
+ *         ad * as * Screen (d/ad, 2 * s/as - 1)
+ *   = if 2 * s <= as
+ *         ad * as * d/ad * 2 * s / as
+ *     else
+ *         ad * as * (d/ad + (2 * s/as - 1) + d/ad * (2 * s/as - 1))
+ *   = if 2 * s <= as
+ *         2 * s * d
+ *     else
+ *         as * ad - 2 * (ad - d) * (as - s)
+ */
+static inline int32_t
+blend_hard_light (int32_t d, int32_t ad, int32_t s, int32_t as)
+{
+    if (2 * s < as)
+	return 2 * s * d;
+    else
+	return as * ad - 2 * (ad - d) * (as - s);
+}
+
+PDF_SEPARABLE_BLEND_MODE (hard_light)
+
+/*
+ * Difference
+ *
+ *     ad * as * B(s/as, d/ad)
+ *   = ad * as * abs (s/as - d/ad)
+ *   = if (s/as <= d/ad)
+ *         ad * as * (d/ad - s/as)
+ *     else
+ *         ad * as * (s/as - d/ad)
+ *   = if (ad * s <= as * d)
+ *        as * d - ad * s
+ *     else
+ *        ad * s - as * d
+ */
+static inline int32_t
+blend_difference (int32_t d, int32_t ad, int32_t s, int32_t as)
+{
+    int32_t das = d * as;
+    int32_t sad = s * ad;
+
+    if (sad < das)
+	return das - sad;
+    else
+	return sad - das;
+}
+
+PDF_SEPARABLE_BLEND_MODE (difference)
+
+/*
+ * Exclusion
+ *
+ *     ad * as * B(s/as, d/ad)
+ *   = ad * as * (d/ad + s/as - 2 * d/ad * s/as)
+ *   = as * d + ad * s - 2 * s * d
+ */
+
+/* This can be made faster by writing it directly and not using
+ * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */
+
+static inline int32_t
+blend_exclusion (int32_t d, int32_t ad, int32_t s, int32_t as)
+{
+    return s * ad + d * as - 2 * d * s;
+}
+
+PDF_SEPARABLE_BLEND_MODE (exclusion)
+
+#undef PDF_SEPARABLE_BLEND_MODE
+
+/* Component alpha combiners */
+
+static void
+combine_clear_ca (pixman_implementation_t *imp,
+                  pixman_op_t              op,
+                  uint32_t *                dest,
+                  const uint32_t *          src,
+                  const uint32_t *          mask,
+                  int                      width)
+{
+    memset (dest, 0, width * sizeof(uint32_t));
+}
+
+static void
+combine_src_ca (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *                dest,
+                const uint32_t *          src,
+                const uint32_t *          mask,
+                int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+
+	combine_mask_value_ca (&s, &m);
+
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_over_ca (pixman_implementation_t *imp,
+                 pixman_op_t              op,
+                 uint32_t *                dest,
+                 const uint32_t *          src,
+                 const uint32_t *          mask,
+                 int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t a;
+
+	combine_mask_ca (&s, &m);
+
+	a = ~m;
+	if (a)
+	{
+	    uint32_t d = *(dest + i);
+	    UN8x4_MUL_UN8x4_ADD_UN8x4 (d, a, s);
+	    s = d;
+	}
+
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_over_reverse_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t d = *(dest + i);
+	uint32_t a = ~d >> A_SHIFT;
+
+	if (a)
+	{
+	    uint32_t s = *(src + i);
+	    uint32_t m = *(mask + i);
+
+	    UN8x4_MUL_UN8x4 (s, m);
+	    UN8x4_MUL_UN8_ADD_UN8x4 (s, a, d);
+
+	    *(dest + i) = s;
+	}
+    }
+}
+
+static void
+combine_in_ca (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *                dest,
+               const uint32_t *          src,
+               const uint32_t *          mask,
+               int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t d = *(dest + i);
+	uint16_t a = d >> A_SHIFT;
+	uint32_t s = 0;
+
+	if (a)
+	{
+	    uint32_t m = *(mask + i);
+
+	    s = *(src + i);
+	    combine_mask_value_ca (&s, &m);
+
+	    if (a != MASK)
+		UN8x4_MUL_UN8 (s, a);
+	}
+
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_in_reverse_ca (pixman_implementation_t *imp,
+                       pixman_op_t              op,
+                       uint32_t *                dest,
+                       const uint32_t *          src,
+                       const uint32_t *          mask,
+                       int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t a;
+
+	combine_mask_alpha_ca (&s, &m);
+
+	a = m;
+	if (a != ~0)
+	{
+	    uint32_t d = 0;
+
+	    if (a)
+	    {
+		d = *(dest + i);
+		UN8x4_MUL_UN8x4 (d, a);
+	    }
+
+	    *(dest + i) = d;
+	}
+    }
+}
+
+static void
+combine_out_ca (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *                dest,
+                const uint32_t *          src,
+                const uint32_t *          mask,
+                int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t d = *(dest + i);
+	uint16_t a = ~d >> A_SHIFT;
+	uint32_t s = 0;
+
+	if (a)
+	{
+	    uint32_t m = *(mask + i);
+
+	    s = *(src + i);
+	    combine_mask_value_ca (&s, &m);
+
+	    if (a != MASK)
+		UN8x4_MUL_UN8 (s, a);
+	}
+
+	*(dest + i) = s;
+    }
+}
+
+static void
+combine_out_reverse_ca (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t a;
+
+	combine_mask_alpha_ca (&s, &m);
+
+	a = ~m;
+	if (a != ~0)
+	{
+	    uint32_t d = 0;
+
+	    if (a)
+	    {
+		d = *(dest + i);
+		UN8x4_MUL_UN8x4 (d, a);
+	    }
+
+	    *(dest + i) = d;
+	}
+    }
+}
+
+static void
+combine_atop_ca (pixman_implementation_t *imp,
+                 pixman_op_t              op,
+                 uint32_t *                dest,
+                 const uint32_t *          src,
+                 const uint32_t *          mask,
+                 int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t d = *(dest + i);
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t ad;
+	uint16_t as = d >> A_SHIFT;
+
+	combine_mask_ca (&s, &m);
+
+	ad = ~m;
+
+	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
+
+	*(dest + i) = d;
+    }
+}
+
+static void
+combine_atop_reverse_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t d = *(dest + i);
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t ad;
+	uint16_t as = ~d >> A_SHIFT;
+
+	combine_mask_ca (&s, &m);
+
+	ad = m;
+
+	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
+
+	*(dest + i) = d;
+    }
+}
+
+static void
+combine_xor_ca (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *                dest,
+                const uint32_t *          src,
+                const uint32_t *          mask,
+                int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t d = *(dest + i);
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t ad;
+	uint16_t as = ~d >> A_SHIFT;
+
+	combine_mask_ca (&s, &m);
+
+	ad = ~m;
+
+	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
+
+	*(dest + i) = d;
+    }
+}
+
+static void
+combine_add_ca (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *                dest,
+                const uint32_t *          src,
+                const uint32_t *          mask,
+                int                      width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t d = *(dest + i);
+
+	combine_mask_value_ca (&s, &m);
+
+	UN8x4_ADD_UN8x4 (d, s);
+
+	*(dest + i) = d;
+    }
+}
+
+void
+_pixman_setup_combiner_functions_32 (pixman_implementation_t *imp)
+{
+    /* Unified alpha */
+    imp->combine_32[PIXMAN_OP_CLEAR] = combine_clear;
+    imp->combine_32[PIXMAN_OP_SRC] = combine_src_u;
+    imp->combine_32[PIXMAN_OP_DST] = combine_dst;
+    imp->combine_32[PIXMAN_OP_OVER] = combine_over_u;
+    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u;
+    imp->combine_32[PIXMAN_OP_IN] = combine_in_u;
+    imp->combine_32[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u;
+    imp->combine_32[PIXMAN_OP_OUT] = combine_out_u;
+    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u;
+    imp->combine_32[PIXMAN_OP_ATOP] = combine_atop_u;
+    imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u;
+    imp->combine_32[PIXMAN_OP_XOR] = combine_xor_u;
+    imp->combine_32[PIXMAN_OP_ADD] = combine_add_u;
+
+    imp->combine_32[PIXMAN_OP_MULTIPLY] = combine_multiply_u;
+    imp->combine_32[PIXMAN_OP_SCREEN] = combine_screen_u;
+    imp->combine_32[PIXMAN_OP_OVERLAY] = combine_overlay_u;
+    imp->combine_32[PIXMAN_OP_DARKEN] = combine_darken_u;
+    imp->combine_32[PIXMAN_OP_LIGHTEN] = combine_lighten_u;
+    imp->combine_32[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u;
+    imp->combine_32[PIXMAN_OP_DIFFERENCE] = combine_difference_u;
+    imp->combine_32[PIXMAN_OP_EXCLUSION] = combine_exclusion_u;
+
+    /* Component alpha combiners */
+    imp->combine_32_ca[PIXMAN_OP_CLEAR] = combine_clear_ca;
+    imp->combine_32_ca[PIXMAN_OP_SRC] = combine_src_ca;
+    /* dest */
+    imp->combine_32_ca[PIXMAN_OP_OVER] = combine_over_ca;
+    imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_IN] = combine_in_ca;
+    imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_OUT] = combine_out_ca;
+    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_ATOP] = combine_atop_ca;
+    imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_XOR] = combine_xor_ca;
+    imp->combine_32_ca[PIXMAN_OP_ADD] = combine_add_ca;
+
+    imp->combine_32_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca;
+    imp->combine_32_ca[PIXMAN_OP_SCREEN] = combine_screen_ca;
+    imp->combine_32_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca;
+    imp->combine_32_ca[PIXMAN_OP_DARKEN] = combine_darken_ca;
+    imp->combine_32_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca;
+    imp->combine_32_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca;
+    imp->combine_32_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
+    imp->combine_32_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
+}
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-combine32.h b/source/libs/pixman/pixman-src/pixman/pixman-combine32.h
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-combine32.h
rename to source/libs/pixman/pixman-src/pixman/pixman-combine32.h
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-compiler.h b/source/libs/pixman/pixman-src/pixman/pixman-compiler.h
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-compiler.h
rename to source/libs/pixman/pixman-src/pixman/pixman-compiler.h
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-conical-gradient.c b/source/libs/pixman/pixman-src/pixman/pixman-conical-gradient.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-conical-gradient.c
rename to source/libs/pixman/pixman-src/pixman/pixman-conical-gradient.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-edge-accessors.c b/source/libs/pixman/pixman-src/pixman/pixman-edge-accessors.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-edge-accessors.c
rename to source/libs/pixman/pixman-src/pixman/pixman-edge-accessors.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-edge-imp.h b/source/libs/pixman/pixman-src/pixman/pixman-edge-imp.h
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-edge-imp.h
rename to source/libs/pixman/pixman-src/pixman/pixman-edge-imp.h
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-edge.c b/source/libs/pixman/pixman-src/pixman/pixman-edge.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-edge.c
rename to source/libs/pixman/pixman-src/pixman/pixman-edge.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-fast-path.c b/source/libs/pixman/pixman-src/pixman/pixman-fast-path.c
old mode 100755
new mode 100644
similarity index 99%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-fast-path.c
rename to source/libs/pixman/pixman-src/pixman/pixman-fast-path.c
index c6e43de10acc8112f6e26c78df761fb270ee4cd8..53d4a1f9047a2803ce31326a9b8a8e6a30d1e85e
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-fast-path.c
+++ b/source/libs/pixman/pixman-src/pixman/pixman-fast-path.c
@@ -2343,6 +2343,8 @@ fast_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask)
     int32_t dist_y;
     int i;
 
+    COMPILE_TIME_ASSERT (BILINEAR_INTERPOLATION_BITS < 8);
+
     fx = info->x;
     ux = iter->image->common.transform->matrix[0][0];
 
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-filter.c b/source/libs/pixman/pixman-src/pixman/pixman-filter.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-filter.c
rename to source/libs/pixman/pixman-src/pixman/pixman-filter.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-general.c b/source/libs/pixman/pixman-src/pixman/pixman-general.c
old mode 100755
new mode 100644
similarity index 91%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-general.c
rename to source/libs/pixman/pixman-src/pixman/pixman-general.c
index f82ea7d71743308a317992d6e99e16045c688346..6141cb0a30cdb767d8d2b896b1eaf8626d84bf61
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-general.c
+++ b/source/libs/pixman/pixman-src/pixman/pixman-general.c
@@ -109,6 +109,20 @@ static const op_info_t op_flags[PIXMAN_N_OPERATORS] =
 
 #define SCANLINE_BUFFER_LENGTH 8192
 
+static pixman_bool_t
+operator_needs_division (pixman_op_t op)
+{
+    static const uint8_t needs_division[] =
+    {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, /* SATURATE */
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, /* DISJOINT */
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, /* CONJOINT */
+	0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, /* blend ops */
+    };
+
+    return needs_division[op];
+}
+
 static void
 general_composite_rect  (pixman_implementation_t *imp,
                          pixman_composite_info_t *info)
@@ -124,9 +138,10 @@ general_composite_rect  (pixman_implementation_t *imp,
     int Bpp;
     int i;
 
-    if ((src_image->common.flags & FAST_PATH_NARROW_FORMAT)		    &&
-	(!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) &&
-	(dest_image->common.flags & FAST_PATH_NARROW_FORMAT))
+    if ((src_image->common.flags & FAST_PATH_NARROW_FORMAT)		     &&
+	(!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT)  &&
+	(dest_image->common.flags & FAST_PATH_NARROW_FORMAT)		     &&
+	!(operator_needs_division (op)))
     {
 	width_flag = ITER_NARROW;
 	Bpp = 4;
@@ -143,9 +158,9 @@ general_composite_rect  (pixman_implementation_t *imp,
     if (width <= 0 || _pixman_multiply_overflows_int (width, Bpp * 3))
 	return;
 
-    if (width * Bpp * 3 > sizeof (stack_scanline_buffer) - 32 * 3)
+    if (width * Bpp * 3 > sizeof (stack_scanline_buffer) - 15 * 3)
     {
-	scanline_buffer = pixman_malloc_ab_plus_c (width, Bpp * 3, 32 * 3);
+	scanline_buffer = pixman_malloc_ab_plus_c (width, Bpp * 3, 15 * 3);
 
 	if (!scanline_buffer)
 	    return;
@@ -181,11 +196,7 @@ general_composite_rect  (pixman_implementation_t *imp,
 	mask_image = NULL;
     }
 
-    component_alpha =
-        mask_image			      &&
-        mask_image->common.type == BITS       &&
-        mask_image->common.component_alpha    &&
-        PIXMAN_FORMAT_RGB (mask_image->bits.format);
+    component_alpha = mask_image && mask_image->common.component_alpha;
 
     _pixman_implementation_iter_init (
 	imp->toplevel, &mask_iter,
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-glyph.c b/source/libs/pixman/pixman-src/pixman/pixman-glyph.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-glyph.c
rename to source/libs/pixman/pixman-src/pixman/pixman-glyph.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-gradient-walker.c b/source/libs/pixman/pixman-src/pixman/pixman-gradient-walker.c
old mode 100755
new mode 100644
similarity index 99%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-gradient-walker.c
rename to source/libs/pixman/pixman-src/pixman/pixman-gradient-walker.c
index 5944a559adef5ca069650d314e0abe6b13bee676..822f8e62bae715b2e4f026915429db4bf9409ef3
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-gradient-walker.c
+++ b/source/libs/pixman/pixman-src/pixman/pixman-gradient-walker.c
@@ -54,7 +54,7 @@ static void
 gradient_walker_reset (pixman_gradient_walker_t *walker,
 		       pixman_fixed_48_16_t      pos)
 {
-    int32_t x, left_x, right_x;
+    int64_t x, left_x, right_x;
     pixman_color_t *left_c, *right_c;
     int n, count = walker->num_stops;
     pixman_gradient_stop_t *stops = walker->stops;
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-image.c b/source/libs/pixman/pixman-src/pixman/pixman-image.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-image.c
rename to source/libs/pixman/pixman-src/pixman/pixman-image.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-implementation.c b/source/libs/pixman/pixman-src/pixman/pixman-implementation.c
old mode 100755
new mode 100644
similarity index 96%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-implementation.c
rename to source/libs/pixman/pixman-src/pixman/pixman-implementation.c
index 58840545141526ef3f9778505f7c2b1b93eda94f..2c7de4c68797c90a872c283cf35b4b70978cdc49
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-implementation.c
+++ b/source/libs/pixman/pixman-src/pixman/pixman-implementation.c
@@ -380,6 +380,11 @@ _pixman_disabled (const char *name)
     return FALSE;
 }
 
+static const pixman_fast_path_t empty_fast_path[] =
+{
+    { PIXMAN_OP_NONE }
+};
+
 pixman_implementation_t *
 _pixman_choose_implementation (void)
 {
@@ -397,5 +402,16 @@ _pixman_choose_implementation (void)
 
     imp = _pixman_implementation_create_noop (imp);
 
+    if (_pixman_disabled ("wholeops"))
+    {
+        pixman_implementation_t *cur;
+
+        /* Disable all whole-operation paths except the general one,
+         * so that optimized iterators are used as much as possible.
+         */
+        for (cur = imp; cur->fallback; cur = cur->fallback)
+            cur->fast_paths = empty_fast_path;
+    }
+
     return imp;
 }
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-inlines.h b/source/libs/pixman/pixman-src/pixman/pixman-inlines.h
old mode 100755
new mode 100644
similarity index 99%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-inlines.h
rename to source/libs/pixman/pixman-src/pixman/pixman-inlines.h
index dd1c2f17f0c4808444dc4ea3d0f4264ced614e23..1c8441d6dabef89ca42f9e454b3ff92e844ee379
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-inlines.h
+++ b/source/libs/pixman/pixman-src/pixman/pixman-inlines.h
@@ -747,7 +747,8 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func)		\
     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func),              \
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
 
 /*****************************************************************************/
 
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-linear-gradient.c b/source/libs/pixman/pixman-src/pixman/pixman-linear-gradient.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-linear-gradient.c
rename to source/libs/pixman/pixman-src/pixman/pixman-linear-gradient.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-matrix.c b/source/libs/pixman/pixman-src/pixman/pixman-matrix.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-matrix.c
rename to source/libs/pixman/pixman-src/pixman/pixman-matrix.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-mips-dspr2-asm.S b/source/libs/pixman/pixman-src/pixman/pixman-mips-dspr2-asm.S
old mode 100755
new mode 100644
similarity index 99%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-mips-dspr2-asm.S
rename to source/libs/pixman/pixman-src/pixman/pixman-mips-dspr2-asm.S
index 866e93e58f14166b8ad1694b158bcfd3e77aca08..9dad163b792e9fc690a385d4650551aff4be9e7c
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-mips-dspr2-asm.S
+++ b/source/libs/pixman/pixman-src/pixman/pixman-mips-dspr2-asm.S
@@ -26,7 +26,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * Author:  Nemanja Lukic (nlukic@mips.com)
+ * Author:  Nemanja Lukic (nemanja.lukic@rt-rk.com)
  */
 
 #include "pixman-private.h"
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-mips-dspr2-asm.h b/source/libs/pixman/pixman-src/pixman/pixman-mips-dspr2-asm.h
old mode 100755
new mode 100644
similarity index 99%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-mips-dspr2-asm.h
rename to source/libs/pixman/pixman-src/pixman/pixman-mips-dspr2-asm.h
index 11849bd66c7dad1c86879d4c247fd3c2f2f61ad9..e2385661966b3db402c9243f50d23fe2d09adf33
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-mips-dspr2-asm.h
+++ b/source/libs/pixman/pixman-src/pixman/pixman-mips-dspr2-asm.h
@@ -26,7 +26,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * Author:  Nemanja Lukic (nlukic@mips.com)
+ * Author:  Nemanja Lukic (nemanja.lukic@rt-rk.com)
  */
 
 #ifndef PIXMAN_MIPS_DSPR2_ASM_H
@@ -72,10 +72,8 @@
 #define LEAF_MIPS32R2(symbol)                           \
                 .globl  symbol;                         \
                 .align  2;                              \
-#ifdef __ELF__
                 .hidden symbol;                         \
                 .type   symbol, @function;              \
-#endif
                 .ent    symbol, 0;                      \
 symbol:         .frame  sp, 0, ra;                      \
                 .set    push;                           \
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-mips-dspr2.c b/source/libs/pixman/pixman-src/pixman/pixman-mips-dspr2.c
old mode 100755
new mode 100644
similarity index 98%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-mips-dspr2.c
rename to source/libs/pixman/pixman-src/pixman/pixman-mips-dspr2.c
index e10c9df0acf22fb001ad1dd4cd36f568bd7a085a..87969ae704fd5b32722d26bcc32fa4bd43e7cea0
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-mips-dspr2.c
+++ b/source/libs/pixman/pixman-src/pixman/pixman-mips-dspr2.c
@@ -26,7 +26,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * Author:  Nemanja Lukic (nlukic@mips.com)
+ * Author:  Nemanja Lukic (nemanja.lukic@rt-rk.com)
  */
 
 #ifdef HAVE_CONFIG_H
@@ -388,11 +388,11 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, mips_0565_8888),
     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, mips_0565_8888),
 
-    PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565),
-    PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565),
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565),
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565),
 
-    PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, mips_0565_8_0565),
-    PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, mips_0565_8_0565),
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, mips_0565_8_0565),
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, mips_0565_8_0565),
 
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888),
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888),
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-mips-dspr2.h b/source/libs/pixman/pixman-src/pixman/pixman-mips-dspr2.h
old mode 100755
new mode 100644
similarity index 98%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-mips-dspr2.h
rename to source/libs/pixman/pixman-src/pixman/pixman-mips-dspr2.h
index 955ed70b8d0348b45d8c51b62fbad57e28288afb..57b38359e37ed425a5d13cdece0dbbc9d669ded9
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-mips-dspr2.h
+++ b/source/libs/pixman/pixman-src/pixman/pixman-mips-dspr2.h
@@ -26,7 +26,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * Author:  Nemanja Lukic (nlukic@mips.com)
+ * Author:  Nemanja Lukic (nemanja.lukic@rt-rk.com)
  */
 
 #ifndef PIXMAN_MIPS_DSPR2_H
@@ -328,12 +328,6 @@ FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_pad_##op,                         \
                               scaled_nearest_scanline_mips_##name##_##op,     \
                               src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
 
-/* Provide entries for the fast path table */
-#define PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)             \
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),                     \
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),                      \
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
-
 /****************************************************************************/
 
 #define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op,            \
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-mips-memcpy-asm.S b/source/libs/pixman/pixman-src/pixman/pixman-mips-memcpy-asm.S
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-mips-memcpy-asm.S
rename to source/libs/pixman/pixman-src/pixman/pixman-mips-memcpy-asm.S
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-mips.c b/source/libs/pixman/pixman-src/pixman/pixman-mips.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-mips.c
rename to source/libs/pixman/pixman-src/pixman/pixman-mips.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-mmx.c b/source/libs/pixman/pixman-src/pixman/pixman-mmx.c
old mode 100755
new mode 100644
similarity index 96%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-mmx.c
rename to source/libs/pixman/pixman-src/pixman/pixman-mmx.c
index f9a92ce09555f5f79650b5d23cf7f9ee99799731..dec3974324259f2c5d669870d144fda839adf7ec
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-mmx.c
+++ b/source/libs/pixman/pixman-src/pixman/pixman-mmx.c
@@ -89,21 +89,7 @@ _mm_mulhi_pu16 (__m64 __A, __m64 __B)
     return __A;
 }
 
-#  ifdef __OPTIMIZE__
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shuffle_pi16 (__m64 __A, int8_t const __N)
-{
-    __m64 ret;
-
-    asm ("pshufw %2, %1, %0\n\t"
-	: "=y" (ret)
-	: "y" (__A), "K" (__N)
-    );
-
-    return ret;
-}
-#  else
-#   define _mm_shuffle_pi16(A, N)					\
+# define _mm_shuffle_pi16(A, N)						\
     ({									\
 	__m64 ret;							\
 									\
@@ -114,7 +100,6 @@ _mm_shuffle_pi16 (__m64 __A, int8_t const __N)
 									\
 	ret;								\
     })
-#  endif
 # endif
 #endif
 
@@ -3555,6 +3540,105 @@ mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
     _mm_empty ();
 }
 
+static force_inline void
+scaled_nearest_scanline_mmx_8888_8888_OVER (uint32_t*       pd,
+                                            const uint32_t* ps,
+                                            int32_t         w,
+                                            pixman_fixed_t  vx,
+                                            pixman_fixed_t  unit_x,
+                                            pixman_fixed_t  src_width_fixed,
+                                            pixman_bool_t   fully_transparent_src)
+{
+    if (fully_transparent_src)
+	return;
+
+    while (w)
+    {
+	__m64 d = load (pd);
+	__m64 s = load (ps + pixman_fixed_to_int (vx));
+	vx += unit_x;
+	while (vx >= 0)
+	    vx -= src_width_fixed;
+
+	store8888 (pd, core_combine_over_u_pixel_mmx (s, d));
+	pd++;
+
+	w--;
+    }
+
+    _mm_empty ();
+}
+
+FAST_NEAREST_MAINLOOP (mmx_8888_8888_cover_OVER,
+		       scaled_nearest_scanline_mmx_8888_8888_OVER,
+		       uint32_t, uint32_t, COVER)
+FAST_NEAREST_MAINLOOP (mmx_8888_8888_none_OVER,
+		       scaled_nearest_scanline_mmx_8888_8888_OVER,
+		       uint32_t, uint32_t, NONE)
+FAST_NEAREST_MAINLOOP (mmx_8888_8888_pad_OVER,
+		       scaled_nearest_scanline_mmx_8888_8888_OVER,
+		       uint32_t, uint32_t, PAD)
+FAST_NEAREST_MAINLOOP (mmx_8888_8888_normal_OVER,
+		       scaled_nearest_scanline_mmx_8888_8888_OVER,
+		       uint32_t, uint32_t, NORMAL)
+
+static force_inline void
+scaled_nearest_scanline_mmx_8888_n_8888_OVER (const uint32_t * mask,
+					      uint32_t *       dst,
+					      const uint32_t * src,
+					      int32_t          w,
+					      pixman_fixed_t   vx,
+					      pixman_fixed_t   unit_x,
+					      pixman_fixed_t   src_width_fixed,
+					      pixman_bool_t    zero_src)
+{
+    __m64 mm_mask;
+
+    if (zero_src || (*mask >> 24) == 0)
+    {
+	/* A workaround for https://gcc.gnu.org/PR47759 */
+	_mm_empty ();
+	return;
+    }
+
+    mm_mask = expand_alpha (load8888 (mask));
+
+    while (w)
+    {
+	uint32_t s = *(src + pixman_fixed_to_int (vx));
+	vx += unit_x;
+	while (vx >= 0)
+	    vx -= src_width_fixed;
+
+	if (s)
+	{
+	    __m64 ms = load8888 (&s);
+	    __m64 alpha = expand_alpha (ms);
+	    __m64 dest  = load8888 (dst);
+
+	    store8888 (dst, (in_over (ms, alpha, mm_mask, dest)));
+	}
+
+	dst++;
+	w--;
+    }
+
+    _mm_empty ();
+}
+
+FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_cover_OVER,
+			      scaled_nearest_scanline_mmx_8888_n_8888_OVER,
+			      uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_pad_OVER,
+			      scaled_nearest_scanline_mmx_8888_n_8888_OVER,
+			      uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_none_OVER,
+			      scaled_nearest_scanline_mmx_8888_n_8888_OVER,
+			      uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_normal_OVER,
+			      scaled_nearest_scanline_mmx_8888_n_8888_OVER,
+			      uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)
+
 #define BSHIFT ((1 << BILINEAR_INTERPOLATION_BITS))
 #define BMSK (BSHIFT - 1)
 
@@ -3995,6 +4079,16 @@ static const pixman_fast_path_t mmx_fast_paths[] =
     PIXMAN_STD_FAST_PATH    (IN,   a8,       null,     a8,       mmx_composite_in_8_8              ),
     PIXMAN_STD_FAST_PATH    (IN,   solid,    a8,       a8,       mmx_composite_in_n_8_8            ),
 
+    SIMPLE_NEAREST_FAST_PATH (OVER,   a8r8g8b8, x8r8g8b8, mmx_8888_8888                            ),
+    SIMPLE_NEAREST_FAST_PATH (OVER,   a8b8g8r8, x8b8g8r8, mmx_8888_8888                            ),
+    SIMPLE_NEAREST_FAST_PATH (OVER,   a8r8g8b8, a8r8g8b8, mmx_8888_8888                            ),
+    SIMPLE_NEAREST_FAST_PATH (OVER,   a8b8g8r8, a8b8g8r8, mmx_8888_8888                            ),
+
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_n_8888                 ),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_n_8888                 ),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_n_8888                 ),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_n_8888                 ),
+
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8,          a8r8g8b8, mmx_8888_8888                     ),
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8,          x8r8g8b8, mmx_8888_8888                     ),
     SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8,          x8r8g8b8, mmx_8888_8888                     ),
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-noop.c b/source/libs/pixman/pixman-src/pixman/pixman-noop.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-noop.c
rename to source/libs/pixman/pixman-src/pixman/pixman-noop.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-ppc.c b/source/libs/pixman/pixman-src/pixman/pixman-ppc.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-ppc.c
rename to source/libs/pixman/pixman-src/pixman/pixman-ppc.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-private.h b/source/libs/pixman/pixman-src/pixman/pixman-private.h
old mode 100755
new mode 100644
similarity index 99%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-private.h
rename to source/libs/pixman/pixman-src/pixman/pixman-private.h
index 6ca13b216d8578a92a6173ce8bc7bd5954c0c7ae..73108a01d32dbbf49021138176e433667e84ae6c
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-private.h
+++ b/source/libs/pixman/pixman-src/pixman/pixman-private.h
@@ -7,7 +7,7 @@
  * The defines which are shared between C and assembly code
  */
 
-/* bilinear interpolation precision (must be <= 8) */
+/* bilinear interpolation precision (must be < 8) */
 #define BILINEAR_INTERPOLATION_BITS 7
 #define BILINEAR_INTERPOLATION_RANGE (1 << BILINEAR_INTERPOLATION_BITS)
 
@@ -345,8 +345,8 @@ typedef struct
     float		    r_s, r_b;
     float		    g_s, g_b;
     float		    b_s, b_b;
-    pixman_fixed_t	    left_x;
-    pixman_fixed_t          right_x;
+    pixman_fixed_48_16_t    left_x;
+    pixman_fixed_48_16_t    right_x;
 
     pixman_gradient_stop_t *stops;
     int                     num_stops;
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-radial-gradient.c b/source/libs/pixman/pixman-src/pixman/pixman-radial-gradient.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-radial-gradient.c
rename to source/libs/pixman/pixman-src/pixman/pixman-radial-gradient.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-region.c b/source/libs/pixman/pixman-src/pixman/pixman-region.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-region.c
rename to source/libs/pixman/pixman-src/pixman/pixman-region.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-region16.c b/source/libs/pixman/pixman-src/pixman/pixman-region16.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-region16.c
rename to source/libs/pixman/pixman-src/pixman/pixman-region16.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-region32.c b/source/libs/pixman/pixman-src/pixman/pixman-region32.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-region32.c
rename to source/libs/pixman/pixman-src/pixman/pixman-region32.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-solid-fill.c b/source/libs/pixman/pixman-src/pixman/pixman-solid-fill.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-solid-fill.c
rename to source/libs/pixman/pixman-src/pixman/pixman-solid-fill.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-sse2.c b/source/libs/pixman/pixman-src/pixman/pixman-sse2.c
old mode 100755
new mode 100644
similarity index 99%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-sse2.c
rename to source/libs/pixman/pixman-src/pixman/pixman-sse2.c
index a6e780815b9eacf7dcf37e9a571e149e6e681fff..895510372fd1cc5d6e3b18996fb376668abc8d69
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-sse2.c
+++ b/source/libs/pixman/pixman-src/pixman/pixman-sse2.c
@@ -6274,31 +6274,15 @@ static const pixman_fast_path_t sse2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8),
     PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8),
 
-    SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
 
     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
 
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-ssse3.c b/source/libs/pixman/pixman-src/pixman/pixman-ssse3.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-ssse3.c
rename to source/libs/pixman/pixman-src/pixman/pixman-ssse3.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-timer.c b/source/libs/pixman/pixman-src/pixman/pixman-timer.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-timer.c
rename to source/libs/pixman/pixman-src/pixman/pixman-timer.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-trap.c b/source/libs/pixman/pixman-src/pixman/pixman-trap.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-trap.c
rename to source/libs/pixman/pixman-src/pixman/pixman-trap.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-utils.c b/source/libs/pixman/pixman-src/pixman/pixman-utils.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-utils.c
rename to source/libs/pixman/pixman-src/pixman/pixman-utils.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-version.h.in b/source/libs/pixman/pixman-src/pixman/pixman-version.h.in
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-version.h.in
rename to source/libs/pixman/pixman-src/pixman/pixman-version.h.in
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-vmx.c b/source/libs/pixman/pixman-src/pixman/pixman-vmx.c
old mode 100755
new mode 100644
similarity index 61%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-vmx.c
rename to source/libs/pixman/pixman-src/pixman/pixman-vmx.c
index c33631c0edb500a67aaaca1901fc2250a6c9a3c1..41efdcfa1de49f23738f4e3febcbde298287eb6b
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman-vmx.c
+++ b/source/libs/pixman/pixman-src/pixman/pixman-vmx.c
@@ -30,17 +30,41 @@
 #endif
 #include "pixman-private.h"
 #include "pixman-combine32.h"
+#include "pixman-inlines.h"
 #include <altivec.h>
 
 #define AVV(x...) {x}
 
+static vector unsigned int mask_ff000000;
+static vector unsigned int mask_red;
+static vector unsigned int mask_green;
+static vector unsigned int mask_blue;
+static vector unsigned int mask_565_fix_rb;
+static vector unsigned int mask_565_fix_g;
+
 static force_inline vector unsigned int
 splat_alpha (vector unsigned int pix)
 {
+#ifdef WORDS_BIGENDIAN
     return vec_perm (pix, pix,
 		     (vector unsigned char)AVV (
 			 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04,
 			 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C));
+#else
+    return vec_perm (pix, pix,
+		     (vector unsigned char)AVV (
+			 0x03, 0x03, 0x03, 0x03, 0x07, 0x07, 0x07, 0x07,
+			 0x0B, 0x0B, 0x0B, 0x0B, 0x0F, 0x0F, 0x0F, 0x0F));
+#endif
+}
+
+static force_inline vector unsigned int
+splat_pixel (vector unsigned int pix)
+{
+    return vec_perm (pix, pix,
+		     (vector unsigned char)AVV (
+			 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
+			 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03));
 }
 
 static force_inline vector unsigned int
@@ -50,12 +74,22 @@ pix_multiply (vector unsigned int p, vector unsigned int a)
 
     /* unpack to short */
     hi = (vector unsigned short)
+#ifdef WORDS_BIGENDIAN
 	vec_mergeh ((vector unsigned char)AVV (0),
 		    (vector unsigned char)p);
+#else
+	vec_mergeh ((vector unsigned char) p,
+		    (vector unsigned char) AVV (0));
+#endif
 
     mod = (vector unsigned short)
+#ifdef WORDS_BIGENDIAN
 	vec_mergeh ((vector unsigned char)AVV (0),
 		    (vector unsigned char)a);
+#else
+	vec_mergeh ((vector unsigned char) a,
+		    (vector unsigned char) AVV (0));
+#endif
 
     hi = vec_mladd (hi, mod, (vector unsigned short)
                     AVV (0x0080, 0x0080, 0x0080, 0x0080,
@@ -67,11 +101,22 @@ pix_multiply (vector unsigned int p, vector unsigned int a)
 
     /* unpack to short */
     lo = (vector unsigned short)
+#ifdef WORDS_BIGENDIAN
 	vec_mergel ((vector unsigned char)AVV (0),
 		    (vector unsigned char)p);
+#else
+	vec_mergel ((vector unsigned char) p,
+		    (vector unsigned char) AVV (0));
+#endif
+
     mod = (vector unsigned short)
+#ifdef WORDS_BIGENDIAN
 	vec_mergel ((vector unsigned char)AVV (0),
 		    (vector unsigned char)a);
+#else
+	vec_mergel ((vector unsigned char) a,
+		    (vector unsigned char) AVV (0));
+#endif
 
     lo = vec_mladd (lo, mod, (vector unsigned short)
                     AVV (0x0080, 0x0080, 0x0080, 0x0080,
@@ -129,6 +174,7 @@ over (vector unsigned int src,
     over (pix_multiply (src, mask),					\
           pix_multiply (srca, mask), dest)
 
+#ifdef WORDS_BIGENDIAN
 
 #define COMPUTE_SHIFT_MASK(source)					\
     source ## _mask = vec_lvsl (0, source);
@@ -140,36 +186,305 @@ over (vector unsigned int src,
     mask ## _mask = vec_lvsl (0, mask);					\
     source ## _mask = vec_lvsl (0, source);
 
-/* notice you have to declare temp vars...
- * Note: tmp3 and tmp4 must remain untouched!
- */
-
-#define LOAD_VECTORS(dest, source)			  \
+#define LOAD_VECTOR(source)				  \
+do							  \
+{							  \
+    vector unsigned char tmp1, tmp2;			  \
     tmp1 = (typeof(tmp1))vec_ld (0, source);		  \
     tmp2 = (typeof(tmp2))vec_ld (15, source);		  \
-    v ## source = (typeof(v ## source))			  \
+    v ## source = (typeof(v ## source)) 		  \
 	vec_perm (tmp1, tmp2, source ## _mask);		  \
-    v ## dest = (typeof(v ## dest))vec_ld (0, dest);
+} while (0)
 
-#define LOAD_VECTORSC(dest, source, mask)		  \
-    tmp1 = (typeof(tmp1))vec_ld (0, source);		  \
-    tmp2 = (typeof(tmp2))vec_ld (15, source);		  \
-    v ## source = (typeof(v ## source))			  \
-	vec_perm (tmp1, tmp2, source ## _mask);		  \
-    tmp1 = (typeof(tmp1))vec_ld (0, mask);		  \
+#define LOAD_VECTORS(dest, source)			  \
+do							  \
+{							  \
+    LOAD_VECTOR(source);				  \
     v ## dest = (typeof(v ## dest))vec_ld (0, dest);	  \
-    tmp2 = (typeof(tmp2))vec_ld (15, mask);		  \
-    v ## mask = (typeof(v ## mask))			  \
-	vec_perm (tmp1, tmp2, mask ## _mask);
+} while (0)
+
+#define LOAD_VECTORSC(dest, source, mask)		  \
+do							  \
+{							  \
+    LOAD_VECTORS(dest, source); 			  \
+    LOAD_VECTOR(mask);					  \
+} while (0)
+
+#define DECLARE_SRC_MASK_VAR vector unsigned char src_mask
+#define DECLARE_MASK_MASK_VAR vector unsigned char mask_mask
+
+#else
+
+/* Now the COMPUTE_SHIFT_{MASK, MASKS, MASKC} below are just no-op.
+ * They are defined that way because little endian altivec can do unaligned
+ * reads natively and have no need for constructing the permutation pattern
+ * variables.
+ */
+#define COMPUTE_SHIFT_MASK(source)
+
+#define COMPUTE_SHIFT_MASKS(dest, source)
+
+#define COMPUTE_SHIFT_MASKC(dest, source, mask)
+
+# define LOAD_VECTOR(source)				\
+    v ## source = *((typeof(v ## source)*)source);
+
+# define LOAD_VECTORS(dest, source)			\
+    LOAD_VECTOR(source);				\
+    LOAD_VECTOR(dest);					\
+
+# define LOAD_VECTORSC(dest, source, mask)		\
+    LOAD_VECTORS(dest, source); 			\
+    LOAD_VECTOR(mask);					\
+
+#define DECLARE_SRC_MASK_VAR
+#define DECLARE_MASK_MASK_VAR
+
+#endif /* WORDS_BIGENDIAN */
 
 #define LOAD_VECTORSM(dest, source, mask)				\
-    LOAD_VECTORSC (dest, source, mask)					\
+    LOAD_VECTORSC (dest, source, mask); 				\
     v ## source = pix_multiply (v ## source,				\
                                 splat_alpha (v ## mask));
 
 #define STORE_VECTOR(dest)						\
     vec_st ((vector unsigned int) v ## dest, 0, dest);
 
+/* load 4 pixels from a 16-byte boundary aligned address */
+static force_inline vector unsigned int
+load_128_aligned (const uint32_t* src)
+{
+    return *((vector unsigned int *) src);
+}
+
+/* load 4 pixels from a unaligned address */
+static force_inline vector unsigned int
+load_128_unaligned (const uint32_t* src)
+{
+    vector unsigned int vsrc;
+    DECLARE_SRC_MASK_VAR;
+
+    COMPUTE_SHIFT_MASK (src);
+    LOAD_VECTOR (src);
+
+    return vsrc;
+}
+
+/* save 4 pixels on a 16-byte boundary aligned address */
+static force_inline void
+save_128_aligned (uint32_t* data,
+		  vector unsigned int vdata)
+{
+    STORE_VECTOR(data)
+}
+
+static force_inline vector unsigned int
+create_mask_1x32_128 (const uint32_t *src)
+{
+    vector unsigned int vsrc;
+    DECLARE_SRC_MASK_VAR;
+
+    COMPUTE_SHIFT_MASK (src);
+    LOAD_VECTOR (src);
+    return vec_splat(vsrc, 0);
+}
+
+static force_inline vector unsigned int
+create_mask_32_128 (uint32_t mask)
+{
+    return create_mask_1x32_128(&mask);
+}
+
+static force_inline vector unsigned int
+unpacklo_128_16x8 (vector unsigned int data1, vector unsigned int data2)
+{
+    vector unsigned char lo;
+
+    /* unpack to short */
+    lo = (vector unsigned char)
+#ifdef WORDS_BIGENDIAN
+	vec_mergel ((vector unsigned char) data2,
+		    (vector unsigned char) data1);
+#else
+	vec_mergel ((vector unsigned char) data1,
+		    (vector unsigned char) data2);
+#endif
+
+    return (vector unsigned int) lo;
+}
+
+static force_inline vector unsigned int
+unpackhi_128_16x8 (vector unsigned int data1, vector unsigned int data2)
+{
+    vector unsigned char hi;
+
+    /* unpack to short */
+    hi = (vector unsigned char)
+#ifdef WORDS_BIGENDIAN
+	vec_mergeh ((vector unsigned char) data2,
+		    (vector unsigned char) data1);
+#else
+	vec_mergeh ((vector unsigned char) data1,
+		    (vector unsigned char) data2);
+#endif
+
+    return (vector unsigned int) hi;
+}
+
+static force_inline vector unsigned int
+unpacklo_128_8x16 (vector unsigned int data1, vector unsigned int data2)
+{
+    vector unsigned short lo;
+
+    /* unpack to char */
+    lo = (vector unsigned short)
+#ifdef WORDS_BIGENDIAN
+	vec_mergel ((vector unsigned short) data2,
+		    (vector unsigned short) data1);
+#else
+	vec_mergel ((vector unsigned short) data1,
+		    (vector unsigned short) data2);
+#endif
+
+    return (vector unsigned int) lo;
+}
+
+static force_inline vector unsigned int
+unpackhi_128_8x16 (vector unsigned int data1, vector unsigned int data2)
+{
+    vector unsigned short hi;
+
+    /* unpack to char */
+    hi = (vector unsigned short)
+#ifdef WORDS_BIGENDIAN
+	vec_mergeh ((vector unsigned short) data2,
+		    (vector unsigned short) data1);
+#else
+	vec_mergeh ((vector unsigned short) data1,
+		    (vector unsigned short) data2);
+#endif
+
+    return (vector unsigned int) hi;
+}
+
+static force_inline void
+unpack_128_2x128 (vector unsigned int data1, vector unsigned int data2,
+		    vector unsigned int* data_lo, vector unsigned int* data_hi)
+{
+    *data_lo = unpacklo_128_16x8(data1, data2);
+    *data_hi = unpackhi_128_16x8(data1, data2);
+}
+
+static force_inline void
+unpack_128_2x128_16 (vector unsigned int data1, vector unsigned int data2,
+		    vector unsigned int* data_lo, vector unsigned int* data_hi)
+{
+    *data_lo = unpacklo_128_8x16(data1, data2);
+    *data_hi = unpackhi_128_8x16(data1, data2);
+}
+
+static force_inline vector unsigned int
+unpack_565_to_8888 (vector unsigned int lo)
+{
+    vector unsigned int r, g, b, rb, t;
+
+    r = vec_and (vec_sl(lo, create_mask_32_128(8)), mask_red);
+    g = vec_and (vec_sl(lo, create_mask_32_128(5)), mask_green);
+    b = vec_and (vec_sl(lo, create_mask_32_128(3)), mask_blue);
+
+    rb = vec_or (r, b);
+    t  = vec_and (rb, mask_565_fix_rb);
+    t  = vec_sr (t, create_mask_32_128(5));
+    rb = vec_or (rb, t);
+
+    t  = vec_and (g, mask_565_fix_g);
+    t  = vec_sr (t, create_mask_32_128(6));
+    g  = vec_or (g, t);
+
+    return vec_or (rb, g);
+}
+
+static force_inline int
+is_opaque (vector unsigned int x)
+{
+    uint32_t cmp_result;
+    vector bool int ffs = vec_cmpeq(x, x);
+
+    cmp_result = vec_all_eq(x, ffs);
+
+    return (cmp_result & 0x8888) == 0x8888;
+}
+
+static force_inline int
+is_zero (vector unsigned int x)
+{
+    uint32_t cmp_result;
+
+    cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));
+
+    return cmp_result == 0xffff;
+}
+
+static force_inline int
+is_transparent (vector unsigned int x)
+{
+    uint32_t cmp_result;
+
+    cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));
+    return (cmp_result & 0x8888) == 0x8888;
+}
+
+static force_inline uint32_t
+core_combine_over_u_pixel_vmx (uint32_t src, uint32_t dst)
+{
+    uint32_t a;
+
+    a = ALPHA_8(src);
+
+    if (a == 0xff)
+    {
+	return src;
+    }
+    else if (src)
+    {
+	UN8x4_MUL_UN8_ADD_UN8x4(dst, (~a & MASK), src);
+    }
+
+    return dst;
+}
+
+static force_inline uint32_t
+combine1 (const uint32_t *ps, const uint32_t *pm)
+{
+    uint32_t s = *ps;
+
+    if (pm)
+	UN8x4_MUL_UN8(s, ALPHA_8(*pm));
+
+    return s;
+}
+
+static force_inline vector unsigned int
+combine4 (const uint32_t* ps, const uint32_t* pm)
+{
+    vector unsigned int src, msk;
+
+    if (pm)
+    {
+	msk = load_128_unaligned(pm);
+
+	if (is_transparent(msk))
+	    return (vector unsigned int) AVV(0);
+    }
+
+    src = load_128_unaligned(ps);
+
+    if (pm)
+	src = pix_multiply(src, msk);
+
+    return src;
+}
+
 static void
 vmx_combine_over_u_no_mask (uint32_t *      dest,
                             const uint32_t *src,
@@ -177,7 +492,7 @@ vmx_combine_over_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, src_mask;
+    DECLARE_SRC_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -227,7 +542,8 @@ vmx_combine_over_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -298,7 +614,7 @@ vmx_combine_over_reverse_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, src_mask;
+    DECLARE_SRC_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -346,7 +662,8 @@ vmx_combine_over_reverse_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -414,7 +731,7 @@ vmx_combine_in_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, src_mask;
+    DECLARE_SRC_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -459,7 +776,8 @@ vmx_combine_in_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -524,7 +842,7 @@ vmx_combine_in_reverse_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, src_mask;
+    DECLARE_SRC_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -571,7 +889,8 @@ vmx_combine_in_reverse_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -638,7 +957,7 @@ vmx_combine_out_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, src_mask;
+    DECLARE_SRC_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -685,7 +1004,8 @@ vmx_combine_out_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -750,7 +1070,7 @@ vmx_combine_out_reverse_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, src_mask;
+    DECLARE_SRC_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -798,7 +1118,8 @@ vmx_combine_out_reverse_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -865,7 +1186,7 @@ vmx_combine_atop_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, src_mask;
+    DECLARE_SRC_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -917,7 +1238,8 @@ vmx_combine_atop_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -993,7 +1315,7 @@ vmx_combine_atop_reverse_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, src_mask;
+    DECLARE_SRC_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1045,7 +1367,8 @@ vmx_combine_atop_reverse_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1121,7 +1444,7 @@ vmx_combine_xor_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, src_mask;
+    DECLARE_SRC_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1173,7 +1496,8 @@ vmx_combine_xor_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1249,7 +1573,7 @@ vmx_combine_add_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, src_mask;
+    DECLARE_SRC_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1295,7 +1619,8 @@ vmx_combine_add_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1363,7 +1688,8 @@ vmx_combine_src_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1413,7 +1739,8 @@ vmx_combine_over_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1471,7 +1798,8 @@ vmx_combine_over_reverse_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1527,7 +1855,8 @@ vmx_combine_in_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1581,7 +1910,8 @@ vmx_combine_in_reverse_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1636,7 +1966,8 @@ vmx_combine_out_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1693,7 +2024,8 @@ vmx_combine_out_reverse_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1750,7 +2082,8 @@ vmx_combine_atop_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask, vsrca;
-    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1816,7 +2149,8 @@ vmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1879,7 +2213,8 @@ vmx_combine_xor_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1942,7 +2277,8 @@ vmx_combine_add_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+    DECLARE_SRC_MASK_VAR;
+    DECLARE_MASK_MASK_VAR;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1986,16 +2322,809 @@ vmx_combine_add_ca (pixman_implementation_t *imp,
     }
 }
 
+static void
+vmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src, srca;
+    uint32_t *dst_line, *dst;
+    uint8_t *mask_line;
+    int dst_stride, mask_stride;
+    int32_t w;
+    uint32_t m, d, s, ia;
+
+    vector unsigned int vsrc, valpha, vmask, vdst;
+
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+
+    srca = ALPHA_8(src);
+    if (src == 0)
+	return;
+
+    PIXMAN_IMAGE_GET_LINE (
+	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+    vsrc = (vector unsigned int) {src, src, src, src};
+    valpha = splat_alpha(vsrc);
+
+    while (height--)
+    {
+	const uint8_t *pm = mask_line;
+	dst = dst_line;
+	dst_line += dst_stride;
+	mask_line += mask_stride;
+	w = width;
+
+	while (w && (uintptr_t)dst & 15)
+	{
+	    s = src;
+	    m = *pm++;
+
+	    if (m)
+	    {
+		d = *dst;
+		UN8x4_MUL_UN8 (s, m);
+		ia = ALPHA_8 (~s);
+		UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+		*dst = d;
+	    }
+
+	    w--;
+	    dst++;
+	}
+
+	while (w >= 4)
+	{
+	    m = *((uint32_t*)pm);
+
+	    if (srca == 0xff && m == 0xffffffff)
+	    {
+		save_128_aligned(dst, vsrc);
+	    }
+	    else if (m)
+	    {
+		vmask = splat_pixel((vector unsigned int) {m, m, m, m});
+
+		/* dst is 16-byte aligned */
+		vdst = in_over (vsrc, valpha, vmask, load_128_aligned (dst));
+
+		save_128_aligned(dst, vdst);
+	    }
+
+	    w -= 4;
+	    dst += 4;
+	    pm += 4;
+	}
+
+	while (w)
+	{
+	    s = src;
+	    m = *pm++;
+
+	    if (m)
+	    {
+		d = *dst;
+		UN8x4_MUL_UN8 (s, m);
+		ia = ALPHA_8 (~s);
+		UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+		*dst = d;
+	    }
+
+	    w--;
+	    dst++;
+	}
+    }
+
+}
+
+static pixman_bool_t
+vmx_fill (pixman_implementation_t *imp,
+           uint32_t *               bits,
+           int                      stride,
+           int                      bpp,
+           int                      x,
+           int                      y,
+           int                      width,
+           int                      height,
+           uint32_t		    filler)
+{
+    uint32_t byte_width;
+    uint8_t *byte_line;
+
+    vector unsigned int vfiller;
+
+    if (bpp == 8)
+    {
+	uint8_t b;
+	uint16_t w;
+
+	stride = stride * (int) sizeof (uint32_t) / 1;
+	byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
+	byte_width = width;
+	stride *= 1;
+
+	b = filler & 0xff;
+	w = (b << 8) | b;
+	filler = (w << 16) | w;
+    }
+    else if (bpp == 16)
+    {
+	stride = stride * (int) sizeof (uint32_t) / 2;
+	byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
+	byte_width = 2 * width;
+	stride *= 2;
+
+        filler = (filler & 0xffff) * 0x00010001;
+    }
+    else if (bpp == 32)
+    {
+	stride = stride * (int) sizeof (uint32_t) / 4;
+	byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
+	byte_width = 4 * width;
+	stride *= 4;
+    }
+    else
+    {
+	return FALSE;
+    }
+
+    vfiller = create_mask_1x32_128(&filler);
+
+    while (height--)
+    {
+	int w;
+	uint8_t *d = byte_line;
+	byte_line += stride;
+	w = byte_width;
+
+	if (w >= 1 && ((uintptr_t)d & 1))
+	{
+	    *(uint8_t *)d = filler;
+	    w -= 1;
+	    d += 1;
+	}
+
+	while (w >= 2 && ((uintptr_t)d & 3))
+	{
+	    *(uint16_t *)d = filler;
+	    w -= 2;
+	    d += 2;
+	}
+
+	while (w >= 4 && ((uintptr_t)d & 15))
+	{
+	    *(uint32_t *)d = filler;
+
+	    w -= 4;
+	    d += 4;
+	}
+
+	while (w >= 128)
+	{
+	    vec_st(vfiller, 0, (uint32_t *) d);
+	    vec_st(vfiller, 0, (uint32_t *) d + 4);
+	    vec_st(vfiller, 0, (uint32_t *) d + 8);
+	    vec_st(vfiller, 0, (uint32_t *) d + 12);
+	    vec_st(vfiller, 0, (uint32_t *) d + 16);
+	    vec_st(vfiller, 0, (uint32_t *) d + 20);
+	    vec_st(vfiller, 0, (uint32_t *) d + 24);
+	    vec_st(vfiller, 0, (uint32_t *) d + 28);
+
+	    d += 128;
+	    w -= 128;
+	}
+
+	if (w >= 64)
+	{
+	    vec_st(vfiller, 0, (uint32_t *) d);
+	    vec_st(vfiller, 0, (uint32_t *) d + 4);
+	    vec_st(vfiller, 0, (uint32_t *) d + 8);
+	    vec_st(vfiller, 0, (uint32_t *) d + 12);
+
+	    d += 64;
+	    w -= 64;
+	}
+
+	if (w >= 32)
+	{
+	    vec_st(vfiller, 0, (uint32_t *) d);
+	    vec_st(vfiller, 0, (uint32_t *) d + 4);
+
+	    d += 32;
+	    w -= 32;
+	}
+
+	if (w >= 16)
+	{
+	    vec_st(vfiller, 0, (uint32_t *) d);
+
+	    d += 16;
+	    w -= 16;
+	}
+
+	while (w >= 4)
+	{
+	    *(uint32_t *)d = filler;
+
+	    w -= 4;
+	    d += 4;
+	}
+
+	if (w >= 2)
+	{
+	    *(uint16_t *)d = filler;
+	    w -= 2;
+	    d += 2;
+	}
+
+	if (w >= 1)
+	{
+	    *(uint8_t *)d = filler;
+	    w -= 1;
+	    d += 1;
+	}
+    }
+
+    return TRUE;
+}
+
+static void
+vmx_composite_src_x888_8888 (pixman_implementation_t *imp,
+			      pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int32_t w;
+    int dst_stride, src_stride;
+
+    PIXMAN_IMAGE_GET_LINE (
+	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	while (w && (uintptr_t)dst & 15)
+	{
+	    *dst++ = *src++ | 0xff000000;
+	    w--;
+	}
+
+	while (w >= 16)
+	{
+	    vector unsigned int vmx_src1, vmx_src2, vmx_src3, vmx_src4;
+
+	    vmx_src1 = load_128_unaligned (src);
+	    vmx_src2 = load_128_unaligned (src + 4);
+	    vmx_src3 = load_128_unaligned (src + 8);
+	    vmx_src4 = load_128_unaligned (src + 12);
+
+	    save_128_aligned (dst, vec_or (vmx_src1, mask_ff000000));
+	    save_128_aligned (dst + 4, vec_or (vmx_src2, mask_ff000000));
+	    save_128_aligned (dst + 8, vec_or (vmx_src3, mask_ff000000));
+	    save_128_aligned (dst + 12, vec_or (vmx_src4, mask_ff000000));
+
+	    dst += 16;
+	    src += 16;
+	    w -= 16;
+	}
+
+	while (w)
+	{
+	    *dst++ = *src++ | 0xff000000;
+	    w--;
+	}
+    }
+}
+
+static void
+vmx_composite_over_n_8888 (pixman_implementation_t *imp,
+                           pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t *dst_line, *dst;
+    uint32_t src, ia;
+    int      i, w, dst_stride;
+    vector unsigned int vdst, vsrc, via;
+
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+
+    if (src == 0)
+	return;
+
+    PIXMAN_IMAGE_GET_LINE (
+	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+    vsrc = (vector unsigned int){src, src, src, src};
+    via = negate (splat_alpha (vsrc));
+    ia = ALPHA_8 (~src);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	w = width;
+
+	while (w && ((uintptr_t)dst & 15))
+	{
+	    uint32_t d = *dst;
+	    UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src);
+	    *dst++ = d;
+	    w--;
+	}
+
+	for (i = w / 4; i > 0; i--)
+	{
+	    vdst = pix_multiply (load_128_aligned (dst), via);
+	    save_128_aligned (dst, pix_add (vsrc, vdst));
+	    dst += 4;
+	}
+
+	for (i = w % 4; --i >= 0;)
+	{
+	    uint32_t d = dst[i];
+	    UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src);
+	    dst[i] = d;
+	}
+    }
+}
+
+static void
+vmx_composite_over_8888_8888 (pixman_implementation_t *imp,
+                               pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    int dst_stride, src_stride;
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+
+    PIXMAN_IMAGE_GET_LINE (
+    dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+    src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+    dst = dst_line;
+    src = src_line;
+
+    while (height--)
+    {
+        vmx_combine_over_u (imp, op, dst, src, NULL, width);
+
+        dst += dst_stride;
+        src += src_stride;
+    }
+}
+
+static void
+vmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
+                                    pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src, ia;
+    uint32_t    *dst_line, d;
+    uint32_t    *mask_line, m;
+    uint32_t pack_cmp;
+    int dst_stride, mask_stride;
+
+    vector unsigned int vsrc, valpha, vmask, vdest;
+
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+
+    if (src == 0)
+	return;
+
+    PIXMAN_IMAGE_GET_LINE (
+	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+
+    vsrc = (vector unsigned int) {src, src, src, src};
+    valpha = splat_alpha(vsrc);
+    ia = ALPHA_8 (src);
+
+    while (height--)
+    {
+	int w = width;
+	const uint32_t *pm = (uint32_t *)mask_line;
+	uint32_t *pd = (uint32_t *)dst_line;
+	uint32_t s;
+
+	dst_line += dst_stride;
+	mask_line += mask_stride;
+
+	while (w && (uintptr_t)pd & 15)
+	{
+	    s = src;
+	    m = *pm++;
+
+	    if (m)
+	    {
+		d = *pd;
+		UN8x4_MUL_UN8x4 (s, m);
+		UN8x4_MUL_UN8 (m, ia);
+		m = ~m;
+		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, m, s);
+		*pd = d;
+	    }
+
+	    pd++;
+	    w--;
+	}
+
+	while (w >= 4)
+	{
+	    /* pm is NOT necessarily 16-byte aligned */
+	    vmask = load_128_unaligned (pm);
+
+	    pack_cmp = vec_all_eq(vmask, (vector unsigned int) AVV(0));
+
+	    /* if all bits in mask are zero, pack_cmp is not 0 */
+	    if (pack_cmp == 0)
+	    {
+		/* pd is 16-byte aligned */
+		vdest = in_over (vsrc, valpha, vmask, load_128_aligned (pd));
+
+		save_128_aligned(pd, vdest);
+	    }
+
+	    pd += 4;
+	    pm += 4;
+	    w -= 4;
+	}
+
+	while (w)
+	{
+	    s = src;
+	    m = *pm++;
+
+	    if (m)
+	    {
+		d = *pd;
+		UN8x4_MUL_UN8x4 (s, m);
+		UN8x4_MUL_UN8 (m, ia);
+		m = ~m;
+		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, m, s);
+		*pd = d;
+	    }
+
+	    pd++;
+	    w--;
+	}
+    }
+}
+
+static void
+vmx_composite_add_8_8 (pixman_implementation_t *imp,
+            pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint8_t     *dst_line, *dst;
+    uint8_t     *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+    uint16_t t;
+
+    PIXMAN_IMAGE_GET_LINE (
+    src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+    dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+
+    while (height--)
+    {
+	dst = dst_line;
+	src = src_line;
+
+	dst_line += dst_stride;
+	src_line += src_stride;
+	w = width;
+
+	/* Small head */
+	while (w && (uintptr_t)dst & 3)
+	{
+	    t = (*dst) + (*src++);
+	    *dst++ = t | (0 - (t >> 8));
+	    w--;
+	}
+
+	vmx_combine_add_u (imp, op,
+		    (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
+
+	/* Small tail */
+	dst += w & 0xfffc;
+	src += w & 0xfffc;
+
+	w &= 3;
+
+	while (w)
+	{
+	    t = (*dst) + (*src++);
+	    *dst++ = t | (0 - (t >> 8));
+	    w--;
+	}
+    }
+}
+
+static void
+vmx_composite_add_8888_8888 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+
+    PIXMAN_IMAGE_GET_LINE (
+	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+
+	vmx_combine_add_u (imp, op, dst, src, NULL, width);
+    }
+}
+
+static force_inline void
+scaled_nearest_scanline_vmx_8888_8888_OVER (uint32_t*       pd,
+                                            const uint32_t* ps,
+                                            int32_t         w,
+                                            pixman_fixed_t  vx,
+                                            pixman_fixed_t  unit_x,
+                                            pixman_fixed_t  src_width_fixed,
+                                            pixman_bool_t   fully_transparent_src)
+{
+    uint32_t s, d;
+    const uint32_t* pm = NULL;
+
+    vector unsigned int vsrc, vdst;
+
+    if (fully_transparent_src)
+	return;
+
+    /* Align dst on a 16-byte boundary */
+    while (w && ((uintptr_t)pd & 15))
+    {
+	d = *pd;
+	s = combine1 (ps + pixman_fixed_to_int (vx), pm);
+	vx += unit_x;
+	while (vx >= 0)
+	    vx -= src_width_fixed;
+
+	*pd++ = core_combine_over_u_pixel_vmx (s, d);
+	if (pm)
+	    pm++;
+	w--;
+    }
+
+    while (w >= 4)
+    {
+	vector unsigned int tmp;
+	uint32_t tmp1, tmp2, tmp3, tmp4;
+
+	tmp1 = *(ps + pixman_fixed_to_int (vx));
+	vx += unit_x;
+	while (vx >= 0)
+	    vx -= src_width_fixed;
+	tmp2 = *(ps + pixman_fixed_to_int (vx));
+	vx += unit_x;
+	while (vx >= 0)
+	    vx -= src_width_fixed;
+	tmp3 = *(ps + pixman_fixed_to_int (vx));
+	vx += unit_x;
+	while (vx >= 0)
+	    vx -= src_width_fixed;
+	tmp4 = *(ps + pixman_fixed_to_int (vx));
+	vx += unit_x;
+	while (vx >= 0)
+	    vx -= src_width_fixed;
+
+	tmp[0] = tmp1;
+	tmp[1] = tmp2;
+	tmp[2] = tmp3;
+	tmp[3] = tmp4;
+
+	vsrc = combine4 ((const uint32_t *) &tmp, pm);
+
+	if (is_opaque (vsrc))
+	{
+	    save_128_aligned (pd, vsrc);
+	}
+	else if (!is_zero (vsrc))
+	{
+	    vdst = over(vsrc, splat_alpha(vsrc), load_128_aligned (pd));
+
+	    save_128_aligned (pd, vdst);
+	}
+
+	w -= 4;
+	pd += 4;
+	if (pm)
+	    pm += 4;
+    }
+
+    while (w)
+    {
+	d = *pd;
+	s = combine1 (ps + pixman_fixed_to_int (vx), pm);
+	vx += unit_x;
+	while (vx >= 0)
+	    vx -= src_width_fixed;
+
+	*pd++ = core_combine_over_u_pixel_vmx (s, d);
+	if (pm)
+	    pm++;
+
+	w--;
+    }
+}
+
+FAST_NEAREST_MAINLOOP (vmx_8888_8888_cover_OVER,
+		       scaled_nearest_scanline_vmx_8888_8888_OVER,
+		       uint32_t, uint32_t, COVER)
+FAST_NEAREST_MAINLOOP (vmx_8888_8888_none_OVER,
+		       scaled_nearest_scanline_vmx_8888_8888_OVER,
+		       uint32_t, uint32_t, NONE)
+FAST_NEAREST_MAINLOOP (vmx_8888_8888_pad_OVER,
+		       scaled_nearest_scanline_vmx_8888_8888_OVER,
+		       uint32_t, uint32_t, PAD)
+FAST_NEAREST_MAINLOOP (vmx_8888_8888_normal_OVER,
+		       scaled_nearest_scanline_vmx_8888_8888_OVER,
+		       uint32_t, uint32_t, NORMAL)
+
 static const pixman_fast_path_t vmx_fast_paths[] =
 {
+    PIXMAN_STD_FAST_PATH (OVER, solid,    null, a8r8g8b8, vmx_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    null, x8r8g8b8, vmx_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, vmx_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, vmx_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, vmx_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, vmx_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, vmx_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, vmx_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, vmx_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, vmx_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, vmx_composite_over_n_8888_8888_ca),
+
+    /* PIXMAN_OP_ADD */
+    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, vmx_composite_add_8_8),
+    PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, vmx_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, vmx_composite_add_8888_8888),
+
+    /* PIXMAN_OP_SRC */
+    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, vmx_composite_src_x888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, vmx_composite_src_x888_8888),
+
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, vmx_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, vmx_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, vmx_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, vmx_8888_8888),
+
     {   PIXMAN_OP_NONE	},
 };
 
+static uint32_t *
+vmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+    int w = iter->width;
+    vector unsigned int ff000000 = mask_ff000000;
+    uint32_t *dst = iter->buffer;
+    uint32_t *src = (uint32_t *)iter->bits;
+
+    iter->bits += iter->stride;
+
+    while (w && ((uintptr_t)dst) & 0x0f)
+    {
+	*dst++ = (*src++) | 0xff000000;
+	w--;
+    }
+
+    while (w >= 4)
+    {
+	save_128_aligned(dst, vec_or(load_128_unaligned(src), ff000000));
+
+	dst += 4;
+	src += 4;
+	w -= 4;
+    }
+
+    while (w)
+    {
+	*dst++ = (*src++) | 0xff000000;
+	w--;
+    }
+
+    return iter->buffer;
+}
+
+static uint32_t *
+vmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+    int w = iter->width;
+    uint32_t *dst = iter->buffer;
+    uint8_t *src = iter->bits;
+    vector unsigned int vmx0, vmx1, vmx2, vmx3, vmx4, vmx5, vmx6;
+
+    iter->bits += iter->stride;
+
+    while (w && (((uintptr_t)dst) & 15))
+    {
+        *dst++ = *(src++) << 24;
+        w--;
+    }
+
+    while (w >= 16)
+    {
+	vmx0 = load_128_unaligned((uint32_t *) src);
+
+	unpack_128_2x128((vector unsigned int) AVV(0), vmx0, &vmx1, &vmx2);
+	unpack_128_2x128_16((vector unsigned int) AVV(0), vmx1, &vmx3, &vmx4);
+	unpack_128_2x128_16((vector unsigned int) AVV(0), vmx2, &vmx5, &vmx6);
+
+	save_128_aligned(dst, vmx6);
+	save_128_aligned((dst +  4), vmx5);
+	save_128_aligned((dst +  8), vmx4);
+	save_128_aligned((dst + 12), vmx3);
+
+	dst += 16;
+	src += 16;
+	w -= 16;
+    }
+
+    while (w)
+    {
+	*dst++ = *(src++) << 24;
+	w--;
+    }
+
+    return iter->buffer;
+}
+
+#define IMAGE_FLAGS							\
+    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |		\
+     FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+
+static const pixman_iter_info_t vmx_iters[] =
+{
+    { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
+      _pixman_iter_init_bits_stride, vmx_fetch_x8r8g8b8, NULL
+    },
+    { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
+      _pixman_iter_init_bits_stride, vmx_fetch_a8, NULL
+    },
+    { PIXMAN_null },
+};
+
 pixman_implementation_t *
 _pixman_implementation_create_vmx (pixman_implementation_t *fallback)
 {
     pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths);
 
+    /* VMX constants */
+    mask_ff000000 = create_mask_32_128 (0xff000000);
+    mask_red   = create_mask_32_128 (0x00f80000);
+    mask_green = create_mask_32_128 (0x0000fc00);
+    mask_blue  = create_mask_32_128 (0x000000f8);
+    mask_565_fix_rb = create_mask_32_128 (0x00e000e0);
+    mask_565_fix_g = create_mask_32_128  (0x0000c000);
+
     /* Set up function pointers */
 
     imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u;
@@ -2022,5 +3151,9 @@ _pixman_implementation_create_vmx (pixman_implementation_t *fallback)
     imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca;
     imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca;
 
+    imp->fill = vmx_fill;
+
+    imp->iter_info = vmx_iters;
+
     return imp;
 }
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman-x86.c b/source/libs/pixman/pixman-src/pixman/pixman-x86.c
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman-x86.c
rename to source/libs/pixman/pixman-src/pixman/pixman-x86.c
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman.c b/source/libs/pixman/pixman-src/pixman/pixman.c
old mode 100755
new mode 100644
similarity index 97%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman.c
rename to source/libs/pixman/pixman-src/pixman/pixman.c
index 9555ceaafb8d3722328da770ced38f1fd2467283..f932eac3c1b6fbf82033c5aa4a5c8c80484d5837
--- a/source/libs/pixman/pixman-0.32.8/pixman/pixman.c
+++ b/source/libs/pixman/pixman-src/pixman/pixman.c
@@ -325,18 +325,20 @@ _pixman_compute_composite_region32 (pixman_region32_t * region,
     return TRUE;
 }
 
-typedef struct
+typedef struct box_48_16 box_48_16_t;
+
+struct box_48_16
 {
-    pixman_fixed_48_16_t	x1;
-    pixman_fixed_48_16_t	y1;
-    pixman_fixed_48_16_t	x2;
-    pixman_fixed_48_16_t	y2;
-} box_48_16_t;
+    pixman_fixed_48_16_t        x1;
+    pixman_fixed_48_16_t        y1;
+    pixman_fixed_48_16_t        x2;
+    pixman_fixed_48_16_t        y2;
+};
 
 static pixman_bool_t
-compute_transformed_extents (pixman_transform_t *transform,
+compute_transformed_extents (pixman_transform_t   *transform,
 			     const pixman_box32_t *extents,
-			     box_48_16_t *transformed)
+			     box_48_16_t          *transformed)
 {
     pixman_fixed_48_16_t tx1, ty1, tx2, ty2;
     pixman_fixed_t x1, y1, x2, y2;
@@ -495,21 +497,12 @@ analyze_extent (pixman_image_t       *image,
     if (!compute_transformed_extents (transform, extents, &transformed))
 	return FALSE;
 
-    /* Expand the source area by a tiny bit so account of different rounding that
-     * may happen during sampling. Note that (8 * pixman_fixed_e) is very far from
-     * 0.5 so this won't cause the area computed to be overly pessimistic.
-     */
-    transformed.x1 -= 8 * pixman_fixed_e;
-    transformed.y1 -= 8 * pixman_fixed_e;
-    transformed.x2 += 8 * pixman_fixed_e;
-    transformed.y2 += 8 * pixman_fixed_e;
-
     if (image->common.type == BITS)
     {
-	if (pixman_fixed_to_int (transformed.x1) >= 0			&&
-	    pixman_fixed_to_int (transformed.y1) >= 0			&&
-	    pixman_fixed_to_int (transformed.x2) < image->bits.width	&&
-	    pixman_fixed_to_int (transformed.y2) < image->bits.height)
+	if (pixman_fixed_to_int (transformed.x1 - pixman_fixed_e) >= 0                &&
+	    pixman_fixed_to_int (transformed.y1 - pixman_fixed_e) >= 0                &&
+	    pixman_fixed_to_int (transformed.x2 - pixman_fixed_e) < image->bits.width &&
+	    pixman_fixed_to_int (transformed.y2 - pixman_fixed_e) < image->bits.height)
 	{
 	    *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
 	}
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/pixman.h b/source/libs/pixman/pixman-src/pixman/pixman.h
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/pixman.h
rename to source/libs/pixman/pixman-src/pixman/pixman.h
diff --git a/source/libs/pixman/pixman-0.32.8/pixman/solaris-hwcap.mapfile b/source/libs/pixman/pixman-src/pixman/solaris-hwcap.mapfile
old mode 100755
new mode 100644
similarity index 100%
rename from source/libs/pixman/pixman-0.32.8/pixman/solaris-hwcap.mapfile
rename to source/libs/pixman/pixman-src/pixman/solaris-hwcap.mapfile
diff --git a/source/libs/pixman/version.ac b/source/libs/pixman/version.ac
index c0935e7a31360fe4360e5bce496045a80b2b97f6..149bb50d120c3f58ff8c2da62e6f99eada471a6d 100644
--- a/source/libs/pixman/version.ac
+++ b/source/libs/pixman/version.ac
@@ -8,4 +8,4 @@ dnl
 dnl --------------------------------------------------------
 dnl
 dnl  m4-include this file to define the current pixman version
-m4_define([pixman_version], [0.32.8])
+m4_define([pixman_version], [0.34.0])
diff --git a/source/texk/README b/source/texk/README
index d039d23210bd28388b945d17666c91a414cb2cbf..993b4e5a2317c794ed4dd1d2b5a072e214df251b 100644
--- a/source/texk/README
+++ b/source/texk/README
@@ -1,4 +1,4 @@
-$Id: README 39295 2016-01-06 05:27:06Z kakuto $
+$Id: README 39470 2016-01-23 22:38:00Z kakuto $
 Copyright 2006-2015 TeX Users Group.
 You may freely use, modify and/or distribute this file.
 
@@ -59,10 +59,10 @@ dvipos - ?
 
 dvipsk - maintained here, by us
 
-dvisvgm 1.14.1 - checked 6jan16
+dvisvgm 1.14.2 - checked 20jan16
   http://dvisvgm.bplaced.net/Downloads
 
-gregorio 4.0.1 - checked 29dec15
+gregorio 4.1.0-beta1 - checked 23jan16
   https://github.com/gregorio-project/gregorio/releases/
 
 gsftopk - from Paul Vojta's xdvi?
diff --git a/source/texk/kpathsea/ChangeLog b/source/texk/kpathsea/ChangeLog
index f43e07ec920994c26f1ede217dd76199d05b09d3..82b10a2ba0e1006fc95b3c8e09e8062aefbb5ace 100644
--- a/source/texk/kpathsea/ChangeLog
+++ b/source/texk/kpathsea/ChangeLog
@@ -1,3 +1,8 @@
+2016-02-03  Akira Kakuto  <kakuto@fuk.kindai.ac.jp>
+
+	* win32lib.[ch], mingw32.[ch]: Define a function
+	double win32_floor (double x), since floor is defined in cpascal.h.
+
 2015-12-29  Akira Kakuto  <kakuto@fuk.kindai.ac.jp>
 
 	* win32lib.h: Update (w32 only).
diff --git a/source/texk/kpathsea/mingw32.c b/source/texk/kpathsea/mingw32.c
index f6f73719bf790f338f93db705f3b9c1d7bac46e6..32c23e75d704053b82ba624a1a41f63b97a6ce1c 100644
--- a/source/texk/kpathsea/mingw32.c
+++ b/source/texk/kpathsea/mingw32.c
@@ -48,6 +48,12 @@ extern int __cdecl _free_osfhnd (int fd);
 static char *get_home_directory (void);
 static int _parse_root (char * name, char ** pPath);
 
+double
+win32_floor (double x)
+{
+  return floor (x);
+}
+
 void
 init_user_info (void)
 {
diff --git a/source/texk/kpathsea/mingw32.h b/source/texk/kpathsea/mingw32.h
index f05935cef1da4fd74f0ab24d142ced178c6fb2ad..8db9c135f252cf4c43927231bec0ec70248b4744 100644
--- a/source/texk/kpathsea/mingw32.h
+++ b/source/texk/kpathsea/mingw32.h
@@ -72,7 +72,8 @@ extern char *quote_args(char **);
 #endif /* MAKE_KPSE_DLL */
 
 extern KPSEDLL BOOL win32_get_long_filename (char *, char *, int);
-extern KPSEDLL void texlive_gs_init(void);
+extern KPSEDLL void texlive_gs_init (void);
+extern KPSEDLL double win32_floor (double);
 
 static inline FILE *
 win32_popen (const char *command, const char *fmode)
diff --git a/source/texk/kpathsea/win32lib.c b/source/texk/kpathsea/win32lib.c
index 35a0267887eeb313af3179e8deeceb5afd0ddc82..4b176aab9225c0f80b0ed43608ca9106ae008699 100644
--- a/source/texk/kpathsea/win32lib.c
+++ b/source/texk/kpathsea/win32lib.c
@@ -31,6 +31,11 @@ static int is_include_space(const char *s)
   return 0;
 }
 
+double win32_floor (double x)
+{
+  return floor (x);
+}
+
 FILE * win32_popen (const char *cmd, const char *fmode)
 {
   char mode[3];
diff --git a/source/texk/kpathsea/win32lib.h b/source/texk/kpathsea/win32lib.h
index b31ab6235677aa832260f7d3007dbcc2cb0bfa1a..28aae28d8d5f116cd1e13fda6ffef3beb6e9881d 100644
--- a/source/texk/kpathsea/win32lib.h
+++ b/source/texk/kpathsea/win32lib.h
@@ -255,6 +255,7 @@
 extern "C" {
 #endif
 
+extern KPSEDLL double win32_floor (double x);
 extern KPSEDLL FILE *win32_popen (const char *cmd, const char *mode);
 extern KPSEDLL int win32_pclose (FILE *f);
 extern KPSEDLL struct passwd *kpathsea_getpwnam (kpathsea kpse, char *name);
diff --git a/source/texk/web2c/cwebdir/README b/source/texk/web2c/cwebdir/README
index 1f4add6dd090bc0d164459e3028c7df68f905c4c..e4dfc0de0ae758641a53cf92952fe79ebc250355 100644
--- a/source/texk/web2c/cwebdir/README
+++ b/source/texk/web2c/cwebdir/README
@@ -1,9 +1,9 @@
 % This file is part of CWEB.
 % The CWEB programs by Silvio Levy are based on programs by D. E. Knuth.
 % They are distributed WITHOUT ANY WARRANTY, express or implied.
-% This README file last updated May 2000 by Don Knuth
+% This README file last updated January 2016 by Don Knuth
 
-% Copyright (C) 1987,1990,1993,2000 Silvio Levy and Donald E. Knuth
+% Copyright (C) 1987,1990,1993,2000,2016 Silvio Levy and Donald E. Knuth
 
 % Permission is granted to make and distribute verbatim copies of this
 % document provided that the copyright notice and this permission notice
@@ -14,7 +14,13 @@
 % entire resulting derived work is given a different name and distributed
 % under the terms of a permission notice identical to this one.
 
-% Please send comments, suggestions, etc. to levy@math.berkeley.edu.
+% Please send comments, suggestions, etc. to tex-k@tug.org, and people
+% there will verify and forward bug reports.
+%
+% DEK takes no responsibility for the changefiles; they should be
+% maintained independently. His job is to correct errors in
+% ctangle.w, cweave.w, common.w, cwebmac.tex, cwebman.tex,
+% and in the files of examples/*.w, nothing more.
 
 This directory contains the following files:
 
diff --git a/source/texk/web2c/cwebdir/cwebmac.tex b/source/texk/web2c/cwebdir/cwebmac.tex
index 8890b63b4dd8aca26e2f232fb104338e503f699b..d50987da9be54a9ae45dab526978bfd34ee2ccf2 100644
--- a/source/texk/web2c/cwebdir/cwebmac.tex
+++ b/source/texk/web2c/cwebdir/cwebmac.tex
@@ -1,8 +1,8 @@
 % standard macros for CWEB listings (in addition to plain.tex)
-% Version 3.67 --- July 2006
+% Version 3.68 --- January 2016
 \ifx\renewenvironment\undefined\else\endinput\fi % LaTeX will use other macros
-\xdef\fmtversion{\fmtversion+CWEB3.67}
-\chardef\cwebversion=3 \chardef\cwebrevision=67
+\xdef\fmtversion{\fmtversion+CWEB3.68}
+\chardef\cwebversion=3 \chardef\cwebrevision=68
 \newif\ifpdf
 \ifx\pdf+\pdftrue\fi
 % Uncomment the following line if you want PDF goodies to be the default
@@ -278,7 +278,8 @@
   \sfcode`;=1500 \pretolerance 200 \hyphenpenalty 50 \exhyphenpenalty 50
   \noindent{\let\*=\lapstar\bf\secstar.\quad}%
   \ifpdftex\smash{\raise\baselineskip\hbox to0pt{%
-     \let\*=\empty\pdfdest num \secstar fith}}
+%     \let\*=\empty\pdfdest num \secstar fith}} % bad space in versions < 3.68
+     \let\*=\empty\pdfdest num \secstar fith}}% changed in version 3.68
   \else\ifpdf\smash{\raise\baselineskip\hbox to0pt{%
      \let\*=\empty\special{%
        pdf: dest (\romannumeral\secstar) [ @thispage /FitH @ypos ]}}}\fi\fi}
@@ -340,7 +341,7 @@
 \def\T#1{\leavevmode % octal, hex or decimal constant
   \hbox{$\def\?{\kern.2em}%
 %    \def\$##1{\egroup_{\,\rm##1}\bgroup}% suffix to constant % versions < 3.67
-    \def\$##1{\egroup_{\rm##1}\bgroup}% suffix to constant
+    \def\$##1{\egroup_{\rm##1}\bgroup}% suffix to constant % in version 3.67
     \def\_{\cdot 10^{\aftergroup}}% power of ten (via dirty trick)
     \let\~=\oct \let\^=\hex {#1}$}}
 \def\U{\note{This code is used in section}} % xref for use of a section
@@ -457,6 +458,7 @@
 \def\fin{\par\vfill\eject % this is done when we are ending the index
   \ifpagesaved\null\vfill\eject\fi % output a null index column
   \if L\lr\else\null\vfill\eject\fi % finish the current page
+  \ifpdftex \makebookmarks \fi % added in Version 3.68
   \parfillskip 0pt plus 1fil
   \def\grouptitle{NAMES OF THE SECTIONS}
   \let\topsecno=\nullsec
@@ -470,7 +472,8 @@
   \def\Us{\note{Used in sections}} % crossref for uses of a section
   \def\I{\par\hangindent 2em}\let\*=*
   \ifacro \def\outsecname{Names of the sections} \let\Xpdf\X
-  \ifpdftex \makebookmarks \pdfdest name {NOS} fitb
+%  \ifpdftex \makebookmarks \pdfdest name {NOS} fitb % in versions < 3.68
+  \ifpdftex \pdfdest name {NOS} fitb % changed in version 3.68
     \pdfoutline goto name {NOS} count -\secno {\outsecname}
     \def\X##1:##2\X{\Xpdf##1:##2\X \firstsecno##1.%
       {\toksF={}\makeoutlinetoks##2\outlinedone\outlinedone}%
diff --git a/source/texk/web2c/lib/ChangeLog b/source/texk/web2c/lib/ChangeLog
index 32bf88532792d0b281fe6a3680cb4abb2664da60..a0ea61120ec818e9c89f60462c2ffee0cc5d06a6 100644
--- a/source/texk/web2c/lib/ChangeLog
+++ b/source/texk/web2c/lib/ChangeLog
@@ -1,3 +1,7 @@
+2016-01-26  Akira Kakuto  <kakuto@fuk.kinidai.ac.jp>
+
+	* texmfmp.c: Improve to skip BOM (win32 only).
+
 2016-01-02  Akira Kakuto  <kakuto@fuk.kinidai.ac.jp>
 
 	* printversion.c: 2015 ---> 2016.
diff --git a/source/texk/web2c/lib/texmfmp.c b/source/texk/web2c/lib/texmfmp.c
index 75c846a339b3bf73ab362158913d35b79cb1e354..b5242e2d372f5934da4a1a732ffdfd629af35426 100644
--- a/source/texk/web2c/lib/texmfmp.c
+++ b/source/texk/web2c/lib/texmfmp.c
@@ -2288,12 +2288,13 @@ input_line (FILE *f)
     long position = ftell (f);
 
     if (position == 0L) {  /* Detect and skip Byte order marks.  */
-      int k1 = getc (f);
+      int k1, k2, k3, k4;
+      k1 = getc (f);
 
       if (k1 != 0xff && k1 != 0xfe && k1 != 0xef)
         rewind (f);
       else {
-        int k2 = getc (f);
+        k2 = getc (f);
 
         if (k2 != 0xff && k2 != 0xfe && k2 != 0xbb)
           rewind (f);
@@ -2301,10 +2302,11 @@ input_line (FILE *f)
                  (k1 == 0xfe && k2 == 0xff))   /* UTF-16(BE) */
           ;
         else {
-          int k3 = getc (f);
-
-          if (k1 == 0xef && k2 == 0xbb && k3 == 0xbf) /* UTF-8 */
-            ;
+          k3 = getc (f);
+          k4 = getc (f);
+          if (k1 == 0xef && k2 == 0xbb && k3 == 0xbf &&
+              k4 >= 0 && k4 <= 0x7e) /* UTF-8 */
+            ungetc (k4, f);
           else
             rewind (f);
         }
diff --git a/source/texk/web2c/luatexdir/lang/texlang.w b/source/texk/web2c/luatexdir/lang/texlang.w
index 65cc3c1636f9f1f5f77140cf0c1940c2074d588f..8e0deb3357d21ea2829d3af243f541b3989ca81e 100644
--- a/source/texk/web2c/luatexdir/lang/texlang.w
+++ b/source/texk/web2c/luatexdir/lang/texlang.w
@@ -26,7 +26,7 @@
 @ Low-level helpers
 
 @ @c
-#define noVERBOSE
+#define unVERBOSE
 
 #define MAX_TEX_LANGUAGES  16384
 
@@ -795,7 +795,9 @@ static int valid_wordend(halfword s)
                   ||  type(r) == ins_node
                   ||  type(r) == adjust_node
                   ||  type(r) == penalty_node
-                  || (type(r) == kern_node && (subtype(r) == explicit || subtype(r) == acc_kern)))
+                  || (type(r) == kern_node && (subtype(r) == explicit_kern ||
+                                               subtype(r) == italic_kern   ||
+                                               subtype(r) == accent_kern   )))
         return 1;
     return 0;
 }
diff --git a/source/texk/web2c/luatexdir/lua/lpdflib.c b/source/texk/web2c/luatexdir/lua/lpdflib.c
index cb7aa5aba35d786b58ae192f0caa5f9f11636625..210bc41a7ccf1e414a4fb5ab6837ca2b5ed91d4d 100644
--- a/source/texk/web2c/luatexdir/lua/lpdflib.c
+++ b/source/texk/web2c/luatexdir/lua/lpdflib.c
@@ -730,12 +730,12 @@ static int l_getmatrix(lua_State * L)
 {
     if (matrix_stack_used > 0) {
         matrix_entry *m = &matrix_stack[matrix_stack_used - 1];
-        lua_pushinteger(L, m->a);
-        lua_pushinteger(L, m->b);
-        lua_pushinteger(L, m->c);
-        lua_pushinteger(L, m->d);
-        lua_pushinteger(L, m->e);
-        lua_pushinteger(L, m->f);
+        lua_pushnumber(L, m->a);
+        lua_pushnumber(L, m->b);
+        lua_pushnumber(L, m->c);
+        lua_pushnumber(L, m->d);
+        lua_pushnumber(L, m->e);
+        lua_pushnumber(L, m->f);
     } else {
         lua_pushinteger(L, 1);
         lua_pushinteger(L, 0);
diff --git a/source/texk/web2c/luatexdir/luatex.c b/source/texk/web2c/luatexdir/luatex.c
index 23e5252dcf5893bab0d8e95252df0004a6d36a25..14d5fd2cd996fd7b9c9c40d3a6878e58673a0448 100644
--- a/source/texk/web2c/luatexdir/luatex.c
+++ b/source/texk/web2c/luatexdir/luatex.c
@@ -30,7 +30,7 @@
 
 int luatex_version = 89;        /* \.{\\luatexversion}  */
 int luatex_revision = '0';      /* \.{\\luatexrevision}  */
-int luatex_date_info = 2016012800;     /* the compile date is now hardwired */
+int luatex_date_info = 2016020500;     /* the compile date is now hardwired */
 const char *luatex_version_string = "beta-0.89.0";
 const char *engine_name = my_name;     /* the name of this engine */
 
diff --git a/source/texk/web2c/luatexdir/pdf/pdfgen.w b/source/texk/web2c/luatexdir/pdf/pdfgen.w
index f14254c1996942a1cf66bfa5786062cf76718807..b9f34c27009e30aad540ecd061d5aa4882ebc79c 100644
--- a/source/texk/web2c/luatexdir/pdf/pdfgen.w
+++ b/source/texk/web2c/luatexdir/pdf/pdfgen.w
@@ -538,6 +538,7 @@ void pdf_print_int(PDF pdf, longinteger n)
 }
 
 @ @c
+/*
 void print_pdffloat(PDF pdf, pdffloat f)
 {
     char a[24];
@@ -561,6 +562,53 @@ void print_pdffloat(PDF pdf, pdffloat f)
         pdf_puts(pdf, (a + 1));
     }
 }
+*/
+
+void print_pdffloat(PDF pdf, pdffloat f)
+{
+    int64_t m = f.m;
+    if (m == 0) {
+        pdf_out(pdf, '0');
+    } else {
+        int e = f.e;
+        if (e == 0) {
+            /* division by ten_pow[0] == 1 */
+            if (m == 1) {
+                pdf_out(pdf, '1');
+            } else {
+                char a[24];
+                snprintf(a, 23, "%i", m);
+                pdf_puts(pdf, a);
+            }
+        } else {
+            int t = ten_pow[e] ;
+            if (t == m) {
+                pdf_out(pdf, '1');
+            } else {
+                int i;
+                char a[24];
+                int l = m / t;
+                int w = snprintf(a, 23, "%i", l);
+                pdf_out_block(pdf, (const char *) a, (size_t) w);
+                if (m < 0) {
+                    l = - m % t;
+                } else {
+                    l = m % t;
+                }
+                if (l != 0) {
+                    pdf_out(pdf, '.');
+                    snprintf(a, 23, "%d", l + t);
+                    for (i = e; i > 0; i--) {
+                        if (a[i] != '0')
+                            break;
+                        a[i] = '\0';
+                    }
+                    pdf_puts(pdf, (a + 1));
+                }
+            }
+        }
+    }
+}
 
 @ print out |s| as string in PDF output
 
@@ -914,7 +962,8 @@ static void init_pdf_outputparameters(PDF pdf)
     int pk_mode;
     pdf->draftmode = fix_int(int_par(draft_mode_code), 0, 1);
     pdf->compress_level = fix_int(pdf_compress_level, 0, 9);
-    pdf->decimal_digits = fix_int(pdf_decimal_digits, 3, 6);
+    pdf->decimal_digits = fix_int(pdf_decimal_digits, 3, 5);
+/*    pdf->decimal_digits = fix_int(pdf_decimal_digits, 3, 6);*//* later, maybe (LS)*/
     pdf->gamma = fix_int(pdf_gamma, 0, 1000000);
     pdf->image_gamma = fix_int(pdf_image_gamma, 0, 1000000);
     pdf->image_hicolor = fix_int(pdf_image_hicolor, 0, 1);
diff --git a/source/texk/web2c/luatexdir/pdf/pdfpage.w b/source/texk/web2c/luatexdir/pdf/pdfpage.w
index 34bb5d1e933257e10faf56f451e61f14089c5e33..90f63534d327943377eed5f13a92551238b16aab 100644
--- a/source/texk/web2c/luatexdir/pdf/pdfpage.w
+++ b/source/texk/web2c/luatexdir/pdf/pdfpage.w
@@ -105,16 +105,16 @@ boolean calc_pdfpos(pdfstructure * p, scaledpos pos)
     boolean move_pdfpos = false;
     switch (p->mode) {
         case PMODE_PAGE:
-            new.h = i32round(pos.h * p->k1);
-            new.v = i32round(pos.v * p->k1);
+            new.h = i64round(pos.h * p->k1);
+            new.v = i64round(pos.v * p->k1);
             p->cm[4].m = new.h - p->pdf.h.m;        /* cm is concatenated */
             p->cm[5].m = new.v - p->pdf.v.m;
             if (new.h != p->pdf.h.m || new.v != p->pdf.v.m)
                 move_pdfpos = true;
             break;
         case PMODE_TEXT:
-            new.h = i32round(pos.h * p->k1);
-            new.v = i32round(pos.v * p->k1);
+            new.h = i64round(pos.h * p->k1);
+            new.v = i64round(pos.v * p->k1);
             p->tm[4].m = new.h - p->pdf_bt_pos.h.m; /* Tm replaces */
             p->tm[5].m = new.v - p->pdf_bt_pos.v.m;
             if (new.h != p->pdf.h.m || new.v != p->pdf.v.m)
@@ -124,16 +124,16 @@ boolean calc_pdfpos(pdfstructure * p, scaledpos pos)
         case PMODE_CHARARRAY:
             switch (p->wmode) {
                 case WMODE_H:
-                    new.h = i32round((pos.h * p->k1 - (double) p->pdf_tj_pos.h.m) * p->k2);
-                    new.v = i32round(pos.v * p->k1);
+                    new.h = i64round((pos.h * p->k1 - (double) p->pdf_tj_pos.h.m) * p->k2);
+                    new.v = i64round(pos.v * p->k1);
                     p->tj_delta.m = -i64round((double) ((new.h - p->cw.m) / ten_pow[p->cw.e - p->tj_delta.e]));
                     p->tm[5].m = new.v - p->pdf_bt_pos.v.m;     /* p->tm[4] is meaningless */
                     if (p->tj_delta.m != 0 || new.v != p->pdf.v.m)
                         move_pdfpos = true;
                     break;
                 case WMODE_V:
-                    new.h = i32round(pos.h * p->k1);
-                    new.v = i32round(((double) p->pdf_tj_pos.v.m - pos.v * p->k1) * p->k2);
+                    new.h = i64round(pos.h * p->k1);
+                    new.v = i64round(((double) p->pdf_tj_pos.v.m - pos.v * p->k1) * p->k2);
                     p->tm[4].m = new.h - p->pdf_bt_pos.h.m;     /* p->tm[5] is meaningless */
                     p->tj_delta.m = -i64round((double) ((new.v - p->cw.m) / ten_pow[p->cw.e - p->tj_delta.e]));
                     if (p->tj_delta.m != 0 || new.h != p->pdf.h.m)
diff --git a/source/texk/web2c/luatexdir/pdf/pdfrule.w b/source/texk/web2c/luatexdir/pdf/pdfrule.w
index e4897dfb04b2801d98bb184b4018ca28bd68e0a0..2f75b4c761e951485a6b4381d1079783c6830a96 100644
--- a/source/texk/web2c/luatexdir/pdf/pdfrule.w
+++ b/source/texk/web2c/luatexdir/pdf/pdfrule.w
@@ -54,7 +54,7 @@ void pdf_place_rule(PDF pdf, halfword q, scaledpos size, int callback_id)
         dim.v.e = p->pdf.v.e;
         pdf_puts(pdf, "q\n");
         if (size.v <= one_bp) {
-            pos.v += i32round(0.5 * size.v);
+            pos.v += i64round(0.5 * size.v);
             pdf_set_pos_temp(pdf, pos);
             pdf_puts(pdf, "[]0 d 0 J ");
             print_pdffloat(pdf, dim.v);
@@ -62,7 +62,7 @@ void pdf_place_rule(PDF pdf, halfword q, scaledpos size, int callback_id)
             print_pdffloat(pdf, dim.h);
             pdf_puts(pdf, " 0 l S\n");
         } else if (size.h <= one_bp) {
-            pos.h += i32round(0.5 * size.h);
+            pos.h += i64round(0.5 * size.h);
             pdf_set_pos_temp(pdf, pos);
             pdf_puts(pdf, "[]0 d 0 J ");
             print_pdffloat(pdf, dim.h);
diff --git a/source/texk/web2c/luatexdir/pdf/pdftypes.h b/source/texk/web2c/luatexdir/pdf/pdftypes.h
index 8a670a5274f8a7731f6e4f6ead68e3a660316eb5..71f843e00f3007e624272353e8e8ec1049bb46f3 100644
--- a/source/texk/web2c/luatexdir/pdf/pdftypes.h
+++ b/source/texk/web2c/luatexdir/pdf/pdftypes.h
@@ -36,7 +36,14 @@
 #  define pdfkeyprefix "PTEX"
 
 #  define i32round(a) (int) floor((a) + 0.5)
-#  define i64round(a) (int64_t) floor((a) + 0.5)
+/*#  define i64round(a) (int64_t) floor((a) + 0.5)*/
+/*#  define i64round(a) (int64_t) ( (double)(a+0.5) - ((int64_t)(a+0.5))%1  ) */
+#ifdef _WIN32
+#  define i64round(a) (int64_t) win32_floor((a) + 0.5)
+#else
+#  define i64round(a) (int64_t) lround((a))
+#endif
+
 
 #  define MAX_OBJ_COMPRESS_LEVEL 3                  /* maximum/clipping value for \pdfobjcompresslevel */
 #  define OBJSTM_UNSET -1                           /* initial value */
@@ -81,9 +88,12 @@ typedef struct {
 #  define scaled int
 
 typedef struct scaledpos_ {
-    scaled h;
-    scaled v;
-} scaledpos;
+    int64_t h;
+    int64_t v;
+ } scaledpos;
+
+
+
 
 typedef struct scaled_whd_ {
     scaled wd; /* TeX width */
diff --git a/source/texk/web2c/luatexdir/tex/commands.h b/source/texk/web2c/luatexdir/tex/commands.h
index d2a7779a0e14b97bea2e32bad261ac62f3b147f3..60bfef895d22019bd7ab94b9836581f64fa2cc21 100644
--- a/source/texk/web2c/luatexdir/tex/commands.h
+++ b/source/texk/web2c/luatexdir/tex/commands.h
@@ -327,13 +327,10 @@ typedef enum {
     expand_font_code,
 } normal_codes;
 
-#  define explicit 1
-#  define acc_kern 2
 #  define lp_code_base 2
 #  define rp_code_base 3
 #  define ef_code_base 4
 #  define tag_code 5
-#  define auto_kern explicit
 #  define no_lig_code 6
 
 #  define immediate_code 4      /* command modifier for \.{\\immediate} */
diff --git a/source/texk/web2c/luatexdir/tex/commands.w b/source/texk/web2c/luatexdir/tex/commands.w
index 125b203a341ee8ce647329eb8aa4b50c35b5b5f7..53497af528c61c23f5740abed7dc826fab7a64be 100644
--- a/source/texk/web2c/luatexdir/tex/commands.w
+++ b/source/texk/web2c/luatexdir/tex/commands.w
@@ -405,7 +405,7 @@ void initialize_commands(void)
     primitive_tex("vss", vskip_cmd, ss_code, 0);
     primitive_tex("vfilneg", vskip_cmd, fil_neg_code, 0);
     primitive_tex("mskip", mskip_cmd, mskip_code, 0);
-    primitive_tex("kern", kern_cmd, explicit, 0);
+    primitive_tex("kern", kern_cmd, explicit_kern, 0);
     primitive_tex("mkern", mkern_cmd, mu_glue, 0);
     primitive_tex("moveleft", hmove_cmd, 1, 0);
     primitive_tex("moveright", hmove_cmd, 0, 0);
diff --git a/source/texk/web2c/luatexdir/tex/linebreak.w b/source/texk/web2c/luatexdir/tex/linebreak.w
index 28ab6fa55866d92ab5004abd19429127fe79193b..b2909606d48cdab830c4ff359af58b121da8d10a 100644
--- a/source/texk/web2c/luatexdir/tex/linebreak.w
+++ b/source/texk/web2c/luatexdir/tex/linebreak.w
@@ -913,7 +913,7 @@ static void compute_break_width(int break_type, int line_break_dir, int adjust_s
             case penalty_node:
                 break;
             case kern_node:
-                if (subtype(s) != explicit)
+                if (subtype(s) != explicit_kern && subtype(s) != italic_kern)
                     return;
                 else
                     break_width[1] -= width(s);
@@ -1866,7 +1866,8 @@ void ext_do_line_break(int paragraph_dir,
                         if (prev_p != temp_head && (
                                 is_char_node(prev_p)
                              || precedes_break(prev_p)
-                             || ((type(prev_p) == kern_node) && (subtype(prev_p) != explicit))
+                             || ((type(prev_p) == kern_node) && (subtype(prev_p) != explicit_kern &&
+                                                                 subtype(prev_p) != italic_kern   ))
                             )) {
                             ext_try_break(0, unhyphenated_node, line_break_dir, adjust_spacing,
                                           par_shape_ptr, adj_demerits,
@@ -1889,7 +1890,7 @@ void ext_do_line_break(int paragraph_dir,
                     /* end mathskip code */
                     break;
                 case kern_node:
-                    if (subtype(cur_p) == explicit) {
+                    if (subtype(cur_p) == explicit_kern || subtype(cur_p) == italic_kern) {
                         kern_break();
                     } else {
                         active_width[1] += width(cur_p);
diff --git a/source/texk/web2c/luatexdir/tex/maincontrol.w b/source/texk/web2c/luatexdir/tex/maincontrol.w
index c6b30b09921e3e94d250f5706e0e38155639469c..ff7e7279a0757a247537e860269988ec4017345b 100644
--- a/source/texk/web2c/luatexdir/tex/maincontrol.w
+++ b/source/texk/web2c/luatexdir/tex/maincontrol.w
@@ -25,13 +25,10 @@
 /* these will move to equivalents.h */
 
 @ @c
-#define explicit 1
-#define acc_kern 2
 #define lp_code_base 2
 #define rp_code_base 3
 #define ef_code_base 4
 #define tag_code 5
-#define auto_kern explicit
 #define no_lig_code 6
 #define gp_code_base 7
 
@@ -505,7 +502,7 @@ static void run_par_end_hmode (void) {
 
 @ @c
 static void append_italic_correction_mmode (void) {
-    tail_append(new_kern(0));
+    tail_append(new_kern(0)); /* what subtype to use */
 }
 
 @ @c
@@ -1810,7 +1807,7 @@ void append_italic_correction(void)
             return;
         f = font(p);
         tail_append(new_kern(char_italic(f, character(p))));
-        subtype(tail) = explicit;
+        subtype(tail) = italic_kern;
     }
 }
 
@@ -2035,11 +2032,11 @@ void make_accent(void)
             }
             delta = round(float_cast(w - a) / float_constant(2) + h * t - x * s);       /* real multiplication */
             r = new_kern(delta);
-            subtype(r) = acc_kern;
+            subtype(r) = accent_kern;
             couple_nodes(tail, r);
             couple_nodes(r, p);
             tail = new_kern(-a - delta);
-            subtype(tail) = acc_kern;
+            subtype(tail) = accent_kern;
             couple_nodes(p, tail);
             p = q;
 
diff --git a/source/texk/web2c/luatexdir/tex/mlist.w b/source/texk/web2c/luatexdir/tex/mlist.w
index 035ef6340dd5561b4be869f453c7870586e55c59..fbded284414aacc4ab148a9cfd2876a8c1df063f 100644
--- a/source/texk/web2c/luatexdir/tex/mlist.w
+++ b/source/texk/web2c/luatexdir/tex/mlist.w
@@ -1567,7 +1567,7 @@ static void math_kern(pointer p, scaled m)
             f = f + unity;
         }
         width(p) = mu_mult(width(p));
-        subtype(p) = explicit;
+        subtype(p) = italic_kern;
     }
 }
 
diff --git a/source/texk/web2c/luatexdir/tex/postlinebreak.w b/source/texk/web2c/luatexdir/tex/postlinebreak.w
index 873eaf96856709275c68702252529d4b116eab64..b78b599aac69a0a663cf8c144119ec92bb180070 100644
--- a/source/texk/web2c/luatexdir/tex/postlinebreak.w
+++ b/source/texk/web2c/luatexdir/tex/postlinebreak.w
@@ -174,7 +174,7 @@ void ext_post_line_break(int paragraph_dir,
                         break;
                     } else if (non_discardable(q)) {
                         break;
-                    } else if (type(q) == kern_node && subtype(q) != explicit) {
+                    } else if (type(q) == kern_node && subtype(q) != explicit_kern && subtype(q) != italic_kern) {
                         break;
                     }
                     q = vlink(q);
@@ -559,7 +559,7 @@ void ext_post_line_break(int paragraph_dir,
                 if (q == cur_break(cur_p) || is_char_node(q))
                     break;
                 if (!((type(q) == local_par_node))) {
-                    if (non_discardable(q) || (type(q) == kern_node && subtype(q) != explicit))
+                    if (non_discardable(q) || (type(q) == kern_node && subtype(q) != explicit_kern && subtype(q) != italic_kern))
                         break;
                 }
                 */
@@ -581,7 +581,7 @@ void ext_post_line_break(int paragraph_dir,
                     /* weird, in the middle somewhere */
                 } else if (non_discardable(q)) {
                     break;
-                } else if (type(q) == kern_node && subtype(q) != explicit) {
+                } else if (type(q) == kern_node && subtype(q) != explicit_kern && subtype(q) != italic_kern) {
                     break;
                 }
                 r = q;
diff --git a/source/texk/web2c/luatexdir/tex/printing.w b/source/texk/web2c/luatexdir/tex/printing.w
index baafa24a8af99ed52b09035efaffbf1f3adf7e2b..478d55f88ba26cb7a222bb55581e2e615b496b6f 100644
--- a/source/texk/web2c/luatexdir/tex/printing.w
+++ b/source/texk/web2c/luatexdir/tex/printing.w
@@ -987,11 +987,13 @@ void short_display_n(int p, int m)
                 print(character(p));
             }
         } else {
-            if ((type(p) == glue_node) ||
-                (type(p) == disc_node) ||
-                (type(p) == penalty_node) ||
-                ((type(p) == kern_node) && (subtype(p) == explicit)))
+            if ( (type(p) == glue_node) ||
+                 (type(p) == disc_node) ||
+                 (type(p) == penalty_node) ||
+                ((type(p) == kern_node) && (subtype(p) == explicit_kern ||
+                                            subtype(p) == italic_kern   ))) {
                 incr(i);
+            }
             if (i >= m)
                 return;
             if (type(p) == disc_node) {
diff --git a/source/texk/web2c/luatexdir/tex/texnodes.h b/source/texk/web2c/luatexdir/tex/texnodes.h
index 618902b58fd2ccf1f6e6ac2ee04d6e7d530bd4da..d5198b92f037f2247bf763a731028c78302beba3 100644
--- a/source/texk/web2c/luatexdir/tex/texnodes.h
+++ b/source/texk/web2c/luatexdir/tex/texnodes.h
@@ -177,9 +177,14 @@ typedef enum {
 #  define tlink_post_break(a) tlink(post_break_head(a))
 #  define tlink_no_break(a)   tlink(no_break_head(a))
 
+typedef enum {
+    font_kern = 0,
+    explicit_kern,  /* |subtype| of kern nodes from \.{\\kern} and \.{\\/} */
+    accent_kern,    /* |subtype| of kern nodes from accents */
+    italic_kern,
+} kern_subtypes;
+
 #  define kern_node_size       5
-#  define explicit             1             /* |subtype| of kern nodes from \.{\\kern} and \.{\\/} */
-#  define acc_kern             2             /* |subtype| of kern nodes from accents */
 #  define ex_kern(a)           vinfo((a)+3)  /* expansion factor (hz) */
 #  define synctex_tag_kern(a)  vinfo((a)+4)
 #  define synctex_line_kern(a) vlink((a)+4)
diff --git a/source/texk/web2c/luatexdir/tex/texnodes.w b/source/texk/web2c/luatexdir/tex/texnodes.w
index 0e0c92676e98164888e706ca14627616824a3415..de718139923060d261f13dfcdf639c09f5381af8 100644
--- a/source/texk/web2c/luatexdir/tex/texnodes.w
+++ b/source/texk/web2c/luatexdir/tex/texnodes.w
@@ -280,7 +280,7 @@ const char *node_subtypes_penalty[] = {
     "userpenalty", NULL
 };
 const char *node_subtypes_kern[] = {
-    "fontkern", "userkern", "accentkern", NULL
+    "fontkern", "userkern", "accentkern", "italiccorrection", NULL
 };
 const char *node_subtypes_rule[] = {
     "normal", "box", "image", "empty", "user", NULL
@@ -3092,7 +3092,7 @@ void show_node_list(int p)
                     if (subtype(p) != normal)
                         print_char(' ');
                     print_scaled(width(p));
-                    if (subtype(p) == acc_kern)
+                    if (subtype(p) == accent_kern)
                         tprint(" (for accent)");
                 } else {
                     tprint_esc("mkern");
diff --git a/source/texk/web2c/luatexdir/tex/textoken.w b/source/texk/web2c/luatexdir/tex/textoken.w
index 4b14b6ec9fec1ed04a22ae0cf5cb1cf632c107a9..5b10b1cdd2dc7c3b197eae561091956b39247f34 100644
--- a/source/texk/web2c/luatexdir/tex/textoken.w
+++ b/source/texk/web2c/luatexdir/tex/textoken.w
@@ -238,6 +238,10 @@ including the expansion of a macro or mark.
 @c
 void print_meaning(void)
 {
+    if (cur_cmd == math_char_num_cmd && cur_chr == 0) {
+        /* \mathchar -> \Umathchar */
+        cur_chr = 1 ;
+    }
     print_cmd_chr((quarterword) cur_cmd, cur_chr);
     if (cur_cmd >= call_cmd) {
         print_char(':');
@@ -249,21 +253,21 @@ void print_meaning(void)
             print_char(':');
             print_ln();
             switch (cur_chr) {
-            case first_mark_code:
-                token_show(first_mark(0));
-                break;
-            case bot_mark_code:
-                token_show(bot_mark(0));
-                break;
-            case split_first_mark_code:
-                token_show(split_first_mark(0));
-                break;
-            case split_bot_mark_code:
-                token_show(split_bot_mark(0));
-                break;
-            default:
-                token_show(top_mark(0));
-                break;
+                case first_mark_code:
+                    token_show(first_mark(0));
+                    break;
+                case bot_mark_code:
+                    token_show(bot_mark(0));
+                    break;
+                case split_first_mark_code:
+                    token_show(split_first_mark(0));
+                    break;
+                case split_bot_mark_code:
+                    token_show(split_bot_mark(0));
+                    break;
+                default:
+                    token_show(top_mark(0));
+                    break;
             }
         }
     }