[PATCH 8/8] g10/armor: optimize radix64 to binary conversion

Jussi Kivilinna jussi.kivilinna at iki.fi
Sat Oct 27 23:06:21 CEST 2018


* g10/armor.c (asctobin): Larger look-up table for fast path.
(initialize): Update 'asctobin' initialization.
(radix64_read): Add fast path for radix64 to binary conversion.
--

This patch adds fast path for radix64 to binary conversion in
armored decryption.

Benchmark results below, tested on Intel Core i7-4790K (turbo off).
Encrypted 2 GiB through pipe to ramfs file using AES128. Decrypt
ramfs file out through pipe to /dev/null.

before patch-set
----------------
               gpg process
armor:         user time    pipe transfer rate
 encrypt-aead:  13.8         140 MB/s
 decrypt-aead:  30.6         68 MB/s
 encrypt-cfb:   17.4         114 MB/s
 decrypt-cfb:   32.6         64 MB/s

after (decrypt+iobuf+crc+radix64 opt)
-------------------------------------
               gpg process
armor:         user time    pipe transfer rate
 decrypt-aead:  9.8          200 MB/s
 decrypt-cfb:   11.9         168 MB/s

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 0 files changed

diff --git a/g10/armor.c b/g10/armor.c
index 95293d91c..972766503 100644
--- a/g10/armor.c
+++ b/g10/armor.c
@@ -40,7 +40,7 @@
 static const byte bintoasc[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                                "abcdefghijklmnopqrstuvwxyz"
                                "0123456789+/";
-static byte asctobin[256]; /* runtime initialized */
+static u32 asctobin[4][256]; /* runtime initialized */
 static int is_initialized;
 
 
@@ -171,11 +171,16 @@ initialize(void)
     u32 i;
     const byte *s;
 
-    /* build the helptable for radix64 to bin conversion */
-    for(i=0; i < 256; i++ )
-	asctobin[i] = 255; /* used to detect invalid characters */
+    /* Build the helptable for radix64 to bin conversion.  Value 0xffffffff is
+       used to detect invalid characters.  */
+    memset (asctobin, 0xff, sizeof(asctobin));
     for(s=bintoasc,i=0; *s; s++,i++ )
-	asctobin[*s] = i;
+      {
+	asctobin[0][*s] = i << (0 * 6);
+	asctobin[1][*s] = i << (1 * 6);
+	asctobin[2][*s] = i << (2 * 6);
+	asctobin[3][*s] = i << (3 * 6);
+      }
 
     is_initialized=1;
 }
@@ -802,11 +807,13 @@ radix64_read( armor_filter_context_t *afx, IOBUF a, size_t *retn,
 	      byte *buf, size_t size )
 {
     byte val;
-    int c, c2;
+    int c;
+    u32 binc;
     int checkcrc=0;
     int rc = 0;
     size_t n = 0;
-    int  idx, onlypad=0;
+    int idx, onlypad=0;
+    int skip_fast = 0;
 
     idx = afx->idx;
     val = afx->radbuf[0];
@@ -827,6 +834,122 @@ radix64_read( armor_filter_context_t *afx, IOBUF a, size_t *retn,
 	}
 
       again:
+	binc = asctobin[0][c];
+
+	if( binc != 0xffffffffUL )
+	  {
+	    if( idx == 0 && skip_fast == 0
+		&& afx->buffer_pos + (16 - 1) < afx->buffer_len
+		&& n + 12 < size)
+	      {
+		/* Fast path for radix64 to binary conversion.  */
+		u32 b0,b1,b2,b3;
+
+		/* Speculatively load 15 more input bytes.  */
+		b0 = binc << (3 * 6);
+		b0 |= asctobin[2][afx->buffer[afx->buffer_pos + 0]];
+		b0 |= asctobin[1][afx->buffer[afx->buffer_pos + 1]];
+		b0 |= asctobin[0][afx->buffer[afx->buffer_pos + 2]];
+		b1  = asctobin[3][afx->buffer[afx->buffer_pos + 3]];
+		b1 |= asctobin[2][afx->buffer[afx->buffer_pos + 4]];
+		b1 |= asctobin[1][afx->buffer[afx->buffer_pos + 5]];
+		b1 |= asctobin[0][afx->buffer[afx->buffer_pos + 6]];
+		b2  = asctobin[3][afx->buffer[afx->buffer_pos + 7]];
+		b2 |= asctobin[2][afx->buffer[afx->buffer_pos + 8]];
+		b2 |= asctobin[1][afx->buffer[afx->buffer_pos + 9]];
+		b2 |= asctobin[0][afx->buffer[afx->buffer_pos + 10]];
+		b3  = asctobin[3][afx->buffer[afx->buffer_pos + 11]];
+		b3 |= asctobin[2][afx->buffer[afx->buffer_pos + 12]];
+		b3 |= asctobin[1][afx->buffer[afx->buffer_pos + 13]];
+		b3 |= asctobin[0][afx->buffer[afx->buffer_pos + 14]];
+
+		/* Check if any of the input bytes were invalid. */
+		if( (b0 | b1 | b2 | b3) != 0xffffffffUL )
+		  {
+		    /* All 16 bytes are valid. */
+		    buf[n + 0] = b0 >> (2 * 8);
+		    buf[n + 1] = b0 >> (1 * 8);
+		    buf[n + 2] = b0 >> (0 * 8);
+		    buf[n + 3] = b1 >> (2 * 8);
+		    buf[n + 4] = b1 >> (1 * 8);
+		    buf[n + 5] = b1 >> (0 * 8);
+		    buf[n + 6] = b2 >> (2 * 8);
+		    buf[n + 7] = b2 >> (1 * 8);
+		    buf[n + 8] = b2 >> (0 * 8);
+		    buf[n + 9] = b3 >> (2 * 8);
+		    buf[n + 10] = b3 >> (1 * 8);
+		    buf[n + 11] = b3 >> (0 * 8);
+		    afx->buffer_pos += 16 - 1;
+		    n += 12;
+		    continue;
+		  }
+		else if( b0 == 0xffffffffUL )
+		  {
+		    /* byte[1..3] have invalid character(s).  Switch to slow
+		       path.  */
+		    skip_fast = 1;
+		  }
+		else if( b1 == 0xffffffffUL )
+		  {
+		    /* byte[4..7] have invalid character(s), first 4 bytes are
+		       valid.  */
+		    buf[n + 0] = b0 >> (2 * 8);
+		    buf[n + 1] = b0 >> (1 * 8);
+		    buf[n + 2] = b0 >> (0 * 8);
+		    afx->buffer_pos += 4 - 1;
+		    n += 3;
+		    skip_fast = 1;
+		    continue;
+		  }
+		else if( b2 == 0xffffffffUL )
+		  {
+		    /* byte[8..11] have invalid character(s), first 8 bytes are
+		       valid.  */
+		    buf[n + 0] = b0 >> (2 * 8);
+		    buf[n + 1] = b0 >> (1 * 8);
+		    buf[n + 2] = b0 >> (0 * 8);
+		    buf[n + 3] = b1 >> (2 * 8);
+		    buf[n + 4] = b1 >> (1 * 8);
+		    buf[n + 5] = b1 >> (0 * 8);
+		    afx->buffer_pos += 8 - 1;
+		    n += 6;
+		    skip_fast = 1;
+		    continue;
+		  }
+		else /*if( b3 == 0xffffffffUL )*/
+		  {
+		    /* byte[12..15] have invalid character(s), first 12 bytes
+		       are valid.  */
+		    buf[n + 0] = b0 >> (2 * 8);
+		    buf[n + 1] = b0 >> (1 * 8);
+		    buf[n + 2] = b0 >> (0 * 8);
+		    buf[n + 3] = b1 >> (2 * 8);
+		    buf[n + 4] = b1 >> (1 * 8);
+		    buf[n + 5] = b1 >> (0 * 8);
+		    buf[n + 6] = b2 >> (2 * 8);
+		    buf[n + 7] = b2 >> (1 * 8);
+		    buf[n + 8] = b2 >> (0 * 8);
+		    afx->buffer_pos += 12 - 1;
+		    n += 9;
+		    skip_fast = 1;
+		    continue;
+		  }
+	      }
+
+	    switch(idx)
+	      {
+		case 0: val =  binc << 2; break;
+		case 1: val |= (binc>>4)&3; buf[n++]=val;val=(binc<<4)&0xf0;break;
+		case 2: val |= (binc>>2)&15; buf[n++]=val;val=(binc<<6)&0xc0;break;
+		case 3: val |= binc&0x3f; buf[n++] = val; break;
+	      }
+	    idx = (idx+1) % 4;
+
+	    continue;
+	  }
+
+	skip_fast = 0;
+
 	if( c == '\n' || c == ' ' || c == '\r' || c == '\t' )
 	    continue;
 	else if( c == '=' ) { /* pad character: stop */
@@ -857,10 +980,10 @@ radix64_read( armor_filter_context_t *afx, IOBUF a, size_t *retn,
             if (afx->buffer_pos + 6 < afx->buffer_len
                 && afx->buffer[afx->buffer_pos + 0] == '3'
                 && afx->buffer[afx->buffer_pos + 1] == 'D'
-                && asctobin[afx->buffer[afx->buffer_pos + 2]] != 255
-                && asctobin[afx->buffer[afx->buffer_pos + 3]] != 255
-                && asctobin[afx->buffer[afx->buffer_pos + 4]] != 255
-                && asctobin[afx->buffer[afx->buffer_pos + 5]] != 255
+                && asctobin[0][afx->buffer[afx->buffer_pos + 2]] != 0xffffffffUL
+                && asctobin[0][afx->buffer[afx->buffer_pos + 3]] != 0xffffffffUL
+                && asctobin[0][afx->buffer[afx->buffer_pos + 4]] != 0xffffffffUL
+                && asctobin[0][afx->buffer[afx->buffer_pos + 5]] != 0xffffffffUL
                 && afx->buffer[afx->buffer_pos + 6] == '\n')
               {
                 afx->buffer_pos += 2;
@@ -875,17 +998,10 @@ radix64_read( armor_filter_context_t *afx, IOBUF a, size_t *retn,
 	    checkcrc++;
 	    break;
 	}
-	else if( (c = asctobin[(c2=c)]) == 255 ) {
-	    log_error(_("invalid radix64 character %02X skipped\n"), c2);
+	else {
+	    log_error(_("invalid radix64 character %02X skipped\n"), c);
 	    continue;
 	}
-	switch(idx) {
-	  case 0: val =  c << 2; break;
-	  case 1: val |= (c>>4)&3; buf[n++]=val;val=(c<<4)&0xf0;break;
-	  case 2: val |= (c>>2)&15; buf[n++]=val;val=(c<<6)&0xc0;break;
-	  case 3: val |= c&0x3f; buf[n++] = val; break;
-	}
-	idx = (idx+1) % 4;
     }
 
     afx->idx = idx;
@@ -924,13 +1040,13 @@ radix64_read( armor_filter_context_t *afx, IOBUF a, size_t *retn,
 	    u32 mycrc = 0;
 	    idx = 0;
 	    do {
-		if( (c = asctobin[c]) == 255 )
+		if( (binc = asctobin[0][c]) == 0xffffffffUL )
 		    break;
 		switch(idx) {
-		  case 0: val =  c << 2; break;
-		  case 1: val |= (c>>4)&3; mycrc |= val << 16;val=(c<<4)&0xf0;break;
-		  case 2: val |= (c>>2)&15; mycrc |= val << 8;val=(c<<6)&0xc0;break;
-		  case 3: val |= c&0x3f; mycrc |= val; break;
+		  case 0: val =  binc << 2; break;
+		  case 1: val |= (binc>>4)&3; mycrc |= val << 16;val=(binc<<4)&0xf0;break;
+		  case 2: val |= (binc>>2)&15; mycrc |= val << 8;val=(binc<<6)&0xc0;break;
+		  case 3: val |= binc&0x3f; mycrc |= val; break;
 		}
 		for(;;) {
 		    if( afx->buffer_pos < afx->buffer_len )




More information about the Gnupg-devel mailing list