Main Page   Namespace List   Class Hierarchy   Compound List   File List   Compound Members   File Members  

string_asm.h

Go to the documentation of this file.
00001 /*
00002  *  linux/drivers/video/fbcon.h -- Low level frame buffer based console driver
00003  *
00004  *      Copyright (C) 1997 Geert Uytterhoeven
00005  *
00006  *  This file is subject to the terms and conditions of the GNU General Public
00007  *  License.  See the file COPYING in the main directory of this archive
00008  *  for more details.
00009  */
00010 
00011 #ifndef string_asm
00012 #define string_asm
00013 
00014 /* ================================================================= */
00015 /*                      Utility Assembler Functions                  */
00016 /* ================================================================= */
00017 
00018 
00019 #if defined(__mc68000__)
00020 
00021 /* ====================================================================== */
00022 
00023 /* Those of a delicate disposition might like to skip the next couple of
00024  * pages.
00025  *
00026  * These functions are drop in replacements for memmove and
00027  * memset(_, 0, _). However their five instances add at least a kilobyte
00028  * to the object file. You have been warned.
00029  *
00030  * Not a great fan of assembler for the sake of it, but I think
00031  * that these routines are at least 10 times faster than their C
00032  * equivalents for large blits, and that's important to the lowest level of
00033  * a graphics driver. Question is whether some scheme with the blitter
00034  * would be faster. I suspect not for simple text system - not much
00035  * asynchrony.
00036  *
00037  * Code is very simple, just gruesome expansion. Basic strategy is to
00038  * increase data moved/cleared at each step to 16 bytes to reduce
00039  * instruction per data move overhead. movem might be faster still
00040  * For more than 15 bytes, we try to align the write direction on a
00041  * longword boundary to get maximum speed. This is even more gruesome.
00042  * Unaligned read/write used requires 68020+ - think this is a problem?
00043  *
00044  * Sorry!
00045  */
00046 
00047 
00048 /* ++roman: I've optimized Robert's original versions in some minor
00049  * aspects, e.g. moveq instead of movel, let gcc choose the registers,
00050  * use movem in some places...
00051  * For other modes than 1 plane, lots of more such assembler functions
00052  * were needed (e.g. the ones using movep or expanding color values).
00053  */
00054 
00055 /* ++andreas: more optimizations:
00056    subl #65536,d0 replaced by clrw d0; subql #1,d0 for dbcc
00057    addal is faster than addaw
00058    movep is rather expensive compared to ordinary move's
00059    some functions rewritten in C for clarity, no speed loss */
00060 
00061 static __inline__ void *asm_memclear_small(void *s, size_t count)
00062 {
00063    if (!count)
00064       return(0);
00065 
00066    __asm__ __volatile__(
00067          "lsrl   #1,%1 ; jcc 1f ; moveb %2,%0@-\n\t"
00068       "1: lsrl   #1,%1 ; jcc 1f ; movew %2,%0@-\n\t"
00069       "1: lsrl   #1,%1 ; jcc 1f ; movel %2,%0@-\n\t"
00070       "1: lsrl   #1,%1 ; jcc 1f ; movel %2,%0@- ; movel %2,%0@-\n\t"
00071       "1: subql  #1,%1 ; jcs 3f\n\t"
00072       "2: moveml %2/%3/%4/%5,%0@-\n\t"
00073          "dbra %1,2b\n\t"
00074       "3:"
00075          : "=a" (s), "=d" (count)
00076          :  "d" (0), "d" (0), "d" (0), "d" (0),
00077             "0" ((char *)s+count), "1" (count)
00078   );
00079 
00080    return(0);
00081 }
00082 
00083 
00084 static __inline__ void *asm_memclear(void *s, size_t count)
00085 {
00086    if (!count)
00087       return(0);
00088 
00089    if (count < 16) {
00090       __asm__ __volatile__(
00091             "lsrl   #1,%1 ; jcc 1f ; clrb %0@+\n\t"
00092          "1: lsrl   #1,%1 ; jcc 1f ; clrw %0@+\n\t"
00093          "1: lsrl   #1,%1 ; jcc 1f ; clrl %0@+\n\t"
00094          "1: lsrl   #1,%1 ; jcc 1f ; clrl %0@+ ; clrl %0@+\n\t"
00095          "1:"
00096             : "=a" (s), "=d" (count)
00097             : "0" (s), "1" (count)
00098      );
00099    } else {
00100       long tmp;
00101       __asm__ __volatile__(
00102             "movel %1,%2\n\t"
00103             "lsrl   #1,%2 ; jcc 1f ; clrb %0@+ ; subqw #1,%1\n\t"
00104             "lsrl   #1,%2 ; jcs 2f\n\t"  /* %0 increased=>bit 2 switched*/
00105             "clrw   %0@+  ; subqw  #2,%1 ; jra 2f\n\t"
00106          "1: lsrl   #1,%2 ; jcc 2f\n\t"
00107             "clrw   %0@+  ; subqw  #2,%1\n\t"
00108          "2: movew %1,%2; lsrl #2,%1 ; jeq 6f\n\t"
00109             "lsrl   #1,%1 ; jcc 3f ; clrl %0@+\n\t"
00110          "3: lsrl   #1,%1 ; jcc 4f ; clrl %0@+ ; clrl %0@+\n\t"
00111          "4: subql  #1,%1 ; jcs 6f\n\t"
00112          "5: clrl %0@+; clrl %0@+ ; clrl %0@+ ; clrl %0@+\n\t"
00113             "dbra %1,5b   ; clrw %1; subql #1,%1; jcc 5b\n\t"
00114          "6: movew %2,%1; btst #1,%1 ; jeq 7f ; clrw %0@+\n\t"
00115          "7:            ; btst #0,%1 ; jeq 8f ; clrb %0@+\n\t"
00116          "8:"
00117             : "=a" (s), "=d" (count), "=d" (tmp)
00118             : "0" (s), "1" (count)
00119      );
00120    }
00121 
00122    return(0);
00123 }
00124 
00125 
00126 static __inline__ void *asm_memset(void *s, size_t count)
00127 {
00128    if (!count)
00129       return(0);
00130 
00131    __asm__ __volatile__(
00132          "lsrl   #1,%1 ; jcc 1f ; moveb %2,%0@-\n\t"
00133       "1: lsrl   #1,%1 ; jcc 1f ; movew %2,%0@-\n\t"
00134       "1: lsrl   #1,%1 ; jcc 1f ; movel %2,%0@-\n\t"
00135       "1: lsrl   #1,%1 ; jcc 1f ; movel %2,%0@- ; movel %2,%0@-\n\t"
00136       "1: subql  #1,%1 ; jcs 3f\n\t"
00137       "2: moveml %2/%3/%4/%5,%0@-\n\t"
00138          "dbra %1,2b\n\t"
00139       "3:"
00140          : "=a" (s), "=d" (count)
00141          :  "d" (-1), "d" (-1), "d" (-1), "d" (-1),
00142             "0" ((char *) s + count), "1" (count)
00143   );
00144 
00145    return(0);
00146 }
00147 
00148 
00149 static __inline__ void *asm_memmove(void *d, const void *s, size_t count)
00150 {
00151    if (d < s) {
00152       if (count < 16) {
00153          __asm__ __volatile__(
00154                "lsrl   #1,%2 ; jcc 1f ; moveb %1@+,%0@+\n\t"
00155             "1: lsrl   #1,%2 ; jcc 1f ; movew %1@+,%0@+\n\t"
00156             "1: lsrl   #1,%2 ; jcc 1f ; movel %1@+,%0@+\n\t"
00157             "1: lsrl   #1,%2 ; jcc 1f ; movel %1@+,%0@+ ; movel %1@+,%0@+\n\t"
00158             "1:"
00159                : "=a" (d), "=a" (s), "=d" (count)
00160                : "0" (d), "1" (s), "2" (count)
00161         );
00162       } else {
00163          long tmp;
00164          __asm__ __volatile__(
00165                "movel  %0,%3\n\t"
00166                "lsrl   #1,%3 ; jcc 1f ; moveb %1@+,%0@+ ; subqw #1,%2\n\t"
00167                "lsrl   #1,%3 ; jcs 2f\n\t"  /* %0 increased=>bit 2 switched*/
00168                "movew  %1@+,%0@+  ; subqw  #2,%2 ; jra 2f\n\t"
00169             "1: lsrl   #1,%3 ; jcc 2f\n\t"
00170                "movew  %1@+,%0@+  ; subqw  #2,%2\n\t"
00171             "2: movew  %2,%-; lsrl #2,%2 ; jeq 6f\n\t"
00172                "lsrl   #1,%2 ; jcc 3f ; movel %1@+,%0@+\n\t"
00173             "3: lsrl   #1,%2 ; jcc 4f ; movel %1@+,%0@+ ; movel %1@+,%0@+\n\t"
00174             "4: subql  #1,%2 ; jcs 6f\n\t"
00175             "5: movel  %1@+,%0@+;movel %1@+,%0@+\n\t"
00176                "movel  %1@+,%0@+;movel %1@+,%0@+\n\t"
00177                "dbra   %2,5b ; clrw %2; subql #1,%2; jcc 5b\n\t"
00178             "6: movew  %+,%2; btst #1,%2 ; jeq 7f ; movew %1@+,%0@+\n\t"
00179             "7:              ; btst #0,%2 ; jeq 8f ; moveb %1@+,%0@+\n\t"
00180             "8:"
00181                : "=a" (d), "=a" (s), "=d" (count), "=d" (tmp)
00182                : "0" (d), "1" (s), "2" (count)
00183         );
00184       }
00185    } else {
00186       if (count < 16) {
00187          __asm__ __volatile__(
00188                "lsrl   #1,%2 ; jcc 1f ; moveb %1@-,%0@-\n\t"
00189             "1: lsrl   #1,%2 ; jcc 1f ; movew %1@-,%0@-\n\t"
00190             "1: lsrl   #1,%2 ; jcc 1f ; movel %1@-,%0@-\n\t"
00191             "1: lsrl   #1,%2 ; jcc 1f ; movel %1@-,%0@- ; movel %1@-,%0@-\n\t"
00192             "1:"
00193                : "=a" (d), "=a" (s), "=d" (count)
00194                : "0" ((char *) d + count), "1" ((char *) s + count), "2" (count)
00195         );
00196       } else {
00197          long tmp;
00198          __asm__ __volatile__(
00199                "movel %0,%3\n\t"
00200                "lsrl   #1,%3 ; jcc 1f ; moveb %1@-,%0@- ; subqw #1,%2\n\t"
00201                "lsrl   #1,%3 ; jcs 2f\n\t"  /* %0 increased=>bit 2 switched*/
00202                "movew  %1@-,%0@-  ; subqw  #2,%2 ; jra 2f\n\t"
00203             "1: lsrl   #1,%3 ; jcc 2f\n\t"
00204                "movew  %1@-,%0@-  ; subqw  #2,%2\n\t"
00205             "2: movew %2,%-; lsrl #2,%2 ; jeq 6f\n\t"
00206                "lsrl   #1,%2 ; jcc 3f ; movel %1@-,%0@-\n\t"
00207             "3: lsrl   #1,%2 ; jcc 4f ; movel %1@-,%0@- ; movel %1@-,%0@-\n\t"
00208             "4: subql  #1,%2 ; jcs 6f\n\t"
00209             "5: movel %1@-,%0@-;movel %1@-,%0@-\n\t"
00210                "movel %1@-,%0@-;movel %1@-,%0@-\n\t"
00211                "dbra %2,5b ; clrw %2; subql #1,%2; jcc 5b\n\t"
00212             "6: movew %+,%2; btst #1,%2 ; jeq 7f ; movew %1@-,%0@-\n\t"
00213             "7:              ; btst #0,%2 ; jeq 8f ; moveb %1@-,%0@-\n\t"
00214             "8:"
00215                : "=a" (d), "=a" (s), "=d" (count), "=d" (tmp)
00216                : "0" ((char *) d + count), "1" ((char *) s + count), "2" (count)
00217         );
00218       }
00219    }
00220 
00221    return(0);
00222 }
00223 
00224 
00225 /* ++andreas: Simple and fast version of memmove, assumes size is
00226    divisible by 16, suitable for moving the whole screen bitplane */
00227 static __inline__ void fast_memmove(char *dst, const char *src, size_t size)
00228 {
00229   if (!size)
00230     return;
00231   if (dst < src)
00232     __asm__ __volatile__
00233       ("1:"
00234        "  moveml %0@+,%/d0/%/d1/%/a0/%/a1\n"
00235        "  moveml %/d0/%/d1/%/a0/%/a1,%1@\n"
00236        "  addql #8,%1; addql #8,%1\n"
00237        "  dbra %2,1b\n"
00238        "  clrw %2; subql #1,%2\n"
00239        "  jcc 1b"
00240        : "=a" (src), "=a" (dst), "=d" (size)
00241        : "0" (src), "1" (dst), "2" (size / 16 - 1)
00242        : "d0", "d1", "a0", "a1", "memory");
00243   else
00244     __asm__ __volatile__
00245       ("1:"
00246        "  subql #8,%0; subql #8,%0\n"
00247        "  moveml %0@,%/d0/%/d1/%/a0/%/a1\n"
00248        "  moveml %/d0/%/d1/%/a0/%/a1,%1@-\n"
00249        "  dbra %2,1b\n"
00250        "  clrw %2; subql #1,%2\n"
00251        "  jcc 1b"
00252        : "=a" (src), "=a" (dst), "=d" (size)
00253        : "0" (src + size), "1" (dst + size), "2" (size / 16 - 1)
00254        : "d0", "d1", "a0", "a1", "memory");
00255 }
00256 
00257 #elif defined(CONFIG_SUN4)
00258 
00259 /* You may think that I'm crazy and that I should use generic
00260    routines.  No, I'm not: sun4's framebuffer crashes if we std
00261    into it, so we cannot use memset.  */
00262 
00263 static __inline__ void *sun4_memset(void *s, char val, size_t count)
00264 {
00265     int i;
00266     for(i=0; i<count;i++)
00267         ((char *) s) [i] = val;
00268     return s;
00269 }
00270 
00271 static __inline__ void *asm_memset(void *s, size_t count)
00272 {
00273     return sun4_memset(s, 255, count);
00274 }
00275 
00276 static __inline__ void *asm_memclear(void *s, size_t count)
00277 {
00278     return sun4_memset(s, 0, count);
00279 }
00280 
00281 static __inline__ void *asm_memclear_small(void *s, size_t count)
00282 {
00283     return sun4_memset(s, 0, count);
00284 }
00285 
00286 /* To be honest, this is slow_memmove :). But sun4 is crappy, so what we can do. */
00287 static __inline__ void fast_memmove(void *d, const void *s, size_t count)
00288 {
00289     int i;
00290     if (d<s) {
00291         for (i=0; i<count; i++)
00292             ((char *) d)[i] = ((char *) s)[i];
00293     } else
00294         for (i=0; i<count; i++)
00295             ((char *) d)[count-i-1] = ((char *) s)[count-i-1];
00296 }
00297 
00298 static __inline__ void *asm_memmove(char *dst, const char *src, size_t size)
00299 {
00300     fast_memmove(dst, src, size);
00301     return dst;
00302 }
00303 
00304 #else
00305 
00306 static __inline__ void *asm_memclear_small(void *s, size_t count)
00307 {
00308     return(memset(s, 0, count));
00309 }
00310 
00311 static __inline__ void *asm_memclear(void *s, size_t count)
00312 {
00313     return(memset(s, 0, count));
00314 }
00315 
00316 static __inline__ void *asm_memset(void *s, size_t count)
00317 {
00318     return(memset(s, 255, count));
00319 }
00320 
00321 #if defined(__i386__)
00322 
00323 static __inline__ void fast_memmove(void *d, const void *s, size_t count)
00324 {
00325   int d0, d1, d2, d3;
00326     if (d < s) {
00327 __asm__ __volatile__ (
00328         "cld\n\t"
00329         "shrl $1,%%ecx\n\t"
00330         "jnc 1f\n\t"
00331         "movsb\n"
00332         "1:\tshrl $1,%%ecx\n\t"
00333         "jnc 2f\n\t"
00334         "movsw\n"
00335         "2:\trep\n\t"
00336         "movsl"
00337         : "=&c" (d0), "=&D" (d1), "=&S" (d2)
00338         :"0"(count),"1"((long)d),"2"((long)s)
00339         :"memory");
00340     } else {
00341 __asm__ __volatile__ (
00342         "std\n\t"
00343         "shrl $1,%%ecx\n\t"
00344         "jnc 1f\n\t"
00345         "movb 3(%%esi),%%al\n\t"
00346         "movb %%al,3(%%edi)\n\t"
00347         "decl %%esi\n\t"
00348         "decl %%edi\n"
00349         "1:\tshrl $1,%%ecx\n\t"
00350         "jnc 2f\n\t"
00351         "movw 2(%%esi),%%ax\n\t"
00352         "movw %%ax,2(%%edi)\n\t"
00353         "decl %%esi\n\t"
00354         "decl %%edi\n\t"
00355         "decl %%esi\n\t"
00356         "decl %%edi\n"
00357         "2:\trep\n\t"
00358         "movsl\n\t"
00359         "cld"
00360         : "=&c" (d0), "=&D" (d1), "=&S" (d2), "=&a" (d3)
00361         :"0"(count),"1"(count-4+(long)d),"2"(count-4+(long)s)
00362         :"memory");
00363     }
00364 }
00365 
00366 static __inline__ void *asm_memmove(char *dst, const char *src, size_t size)
00367 {
00368     fast_memmove(dst, src, size);
00369     return dst;
00370 }
00371 
00372 #else /* !i386 */
00373 
00374     /*
00375      *  Anyone who'd like to write asm functions for other CPUs?
00376      *   (Why are these functions better than those from include/asm/string.h?)
00377      */
00378 
00379 static __inline__ void *asm_memmove(void *d, const void *s, size_t count)
00380 {
00381     return(memmove(d, s, count));
00382 }
00383 
00384 static __inline__ void fast_memmove(char *dst, const char *src, size_t size)
00385 {
00386     memmove(dst, src, size);
00387 }
00388 
00389 #endif /* !i386 */
00390 
00391 #endif
00392 
00393 #endif

Generated at Wed Apr 4 19:54:04 2001 for ClanLib by doxygen1.2.6 written by Dimitri van Heesch, © 1997-2001