Forum

Posted by
Martin Green  -  October 2008
I couldn't find any support for SHA-512 in the built in crypto library, so I made my own pike module.
It works fine, but it is a little slow. Since this is my first experience with Pike I wonder if there are any obvious optimizations that I could make in the code?

Here is the code:

///////////////////////////////////////////////////////////////////////
//
//  SHA512 hash in Pike
// 
//  2008-06-10, Martin Green, Initial revision Ported from C++
//

#include < module.h> // (space needed so the forum does not remove the "html-tag"...)
inherit "module";

constant module_type = MODULE_TAG;
constant module_name = "SHA512 Hash Module";
constant module_doc  = "Calulates a SHA512 hash for any given string. Call the method HashSHA512 or use the rxml-tag: <sha512> with a string to be hashed, it returns the hash value.";

#define SHA512_DIGEST_SIZE  64
#define SHA512_BLOCK_SIZE  128
#define SHA512_MASK (SHA512_BLOCK_SIZE - 1)
#define MASK64BITS 0xffffffffffffffff

typedef int sha2_64t;
typedef int sha2_32t;

sha2_32t rotr32(sha2_32t x, int n)
{
return (((x) >> n) | ((x) << (32 - n)));
}
sha2_32t bswap_32(sha2_32t x)
{
return (rotr32((x), 24) & 0x00ff00ff | rotr32((x), 8) & 0xff00ff00);
}
sha2_64t rotr64(sha2_64t x, int n)
{
return (((x) >> n) | ((x) << (64 - n)));
}
sha2_64t bswap_64(sha2_64t x)
{
return (bswap_32(x & 0x00000000ffffffff)) << 32 | bswap_32((x & 0xffffffff00000000) >> 32);
}
array(sha2_64t) bsw_64(array(sha2_64t) p, int n)
{
int _i = (n);
while(_i--)
{
p[_i] = bswap_64(p[_i]);
}
return p;
}

/* SHA512 mixing function definitions   */

sha2_64t s512_0(sha2_64t x)
{
return (rotr64((x), 28) ^ rotr64((x), 34) ^ rotr64((x), 39));
}
sha2_64t s512_1(sha2_64t x)
{
return (rotr64((x), 14) ^ rotr64((x), 18) ^ rotr64((x), 41));
}
sha2_64t g512_0(sha2_64t x)
{
return (rotr64((x),  1) ^ rotr64((x),  8) ^ ((x) >>  7));
}
sha2_64t g512_1(sha2_64t x)
{
return (rotr64((x), 19) ^ rotr64((x), 61) ^ ((x) >>  6));
}

array(sha2_64t) k512 =
({
    0x428a2f98d728ae22, 0x7137449123ef65cd,
    0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
    0x3956c25bf348b538, 0x59f111f1b605d019,
    0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
    0xd807aa98a3030242, 0x12835b0145706fbe,
    0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
    0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
    0x9bdc06a725c71235, 0xc19bf174cf692694,
    0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
    0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
    0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
    0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
    0x983e5152ee66dfab, 0xa831c66d2db43210,
    0xb00327c898fb213f, 0xbf597fc7beef0ee4,
    0xc6e00bf33da88fc2, 0xd5a79147930aa725,
    0x06ca6351e003826f, 0x142929670a0e6e70,
    0x27b70a8546d22ffc, 0x2e1b21385c26c926,
    0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
    0x650a73548baf63de, 0x766a0abb3c77b2a8,
    0x81c2c92e47edaee6, 0x92722c851482353b,
    0xa2bfe8a14cf10364, 0xa81a664bbc423001,
    0xc24b8b70d0f89791, 0xc76c51a30654be30,
    0xd192e819d6ef5218, 0xd69906245565a910,
    0xf40e35855771202a, 0x106aa07032bbd1b8,
    0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
    0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
    0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
    0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
    0x748f82ee5defb2fc, 0x78a5636f43172f60,
    0x84c87814a1f0ab72, 0x8cc702081a6439ec,
    0x90befffa23631e28, 0xa4506cebde82bde9,
    0xbef9a3f7b2c67915, 0xc67178f2e372532b,
    0xca273eceea26619c, 0xd186b8c721c0c207,
    0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
    0x06f067aa72176fba, 0x0a637dc5a2c898a6,
    0x113f9804bef90dae, 0x1b710b35131c471b,
    0x28db77f523047d84, 0x32caab7b40c72493,
    0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
    0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
    0x5fcb6fab3ad6faec, 0x6c44198c4a475817
});

/* SHA512 initialisation data   */

array(sha2_64t) i512 =
({
    0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
    0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
    0x510e527fade682d1, 0x9b05688c2b3e6c1f,
    0x1f83d9abfb41bd6b, 0x5be0cd19137e2179
});

/* SHA384/512 Final padding and digest calculation  */

array(sha2_64t) m2 =
({
    0x0000000000000000, 0xff00000000000000,
    0xffff000000000000, 0xffffff0000000000,
    0xffffffff00000000, 0xffffffffff000000,
    0xffffffffffff0000, 0xffffffffffffff00
});

array(sha2_64t) b2 =
({
    0x8000000000000000, 0x0080000000000000,
    0x0000800000000000, 0x0000008000000000,
    0x0000000080000000, 0x0000000000800000,
    0x0000000000008000, 0x0000000000000080
});

int ch(int x, int y, int z)
{
return (((x) & (y)) ^ (~(x) & (z)));
}
int maj(int x, int y, int z)
{
return (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)));
}

class sha512_ctx
{
void create()
{
m_Count = allocate(2);
m_Hash = copy_value(i512);
m_Wbuf = allocate(16);
}

//* rotated SHA512 round definition. Rather than swapping variables as in    */
//* FIPS-180, different variables are 'rotated' on each round, returning     */
//* to their starting positions every eight rounds                           */

int h5(int i)
{
return m_Wbuf[i & 15] = m_Wbuf[i & 15] + (g512_1(m_Wbuf[(i + 14) & 15]) + m_Wbuf[(i + 9) & 15] + g512_0(m_Wbuf[(i + 1) & 15])) & MASK64BITS;
}

array(sha2_64t) h5_cycle(array(sha2_64t) v, int i, int j)
{
v[(7 - i) & 7] = (v[(7 - i) & 7] + ((j != 0 ? h5(i) : m_Wbuf[i & 15]) + k512[i + j] + s512_1(v[(4 - i) & 7]) + ch(v[(4 - i) & 7], v[(5 - i) & 7], v[(6 - i) & 7]))) & MASK64BITS;
v[(3 - i) & 7] = (v[(3 - i) & 7] + v[(7 - i) & 7]) & MASK64BITS;
v[(7 - i) & 7] = (v[(7 - i) & 7] + (s512_0(v[(0 - i) & 7]) + maj(v[(0 - i) & 7], v[(1 - i) & 7], v[(2 - i) & 7]))) & MASK64BITS;
return v;
}

/* Compile 64 bytes of hash data into SHA384/SHA512 digest value  */
private void sha512_compile()
{   
array(sha2_64t) v = copy_value(m_Hash);
sha2_32t j;

for(j = 0; j < 80; j += 16)
{
v=h5_cycle(v, 0, j); v=h5_cycle(v, 1, j); v=h5_cycle(v, 2, j); v=h5_cycle(v, 3, j);
v=h5_cycle(v, 4, j); v=h5_cycle(v, 5, j); v=h5_cycle(v, 6, j); v=h5_cycle(v, 7, j);
v=h5_cycle(v, 8, j); v=h5_cycle(v, 9, j); v=h5_cycle(v,10, j); v=h5_cycle(v,11, j);
v=h5_cycle(v,12, j); v=h5_cycle(v,13, j); v=h5_cycle(v,14, j); v=h5_cycle(v,15, j);
}

for (j = 0; j < 8; ++j)
{
m_Hash[j] = (m_Hash[j] + v[j]) & MASK64BITS;
}
}

void sha512_hash(array(int) data)
{
int len = sizeof(data);
sha2_32t pos = m_Count[0] & SHA512_MASK;
    sha2_32t space = SHA512_BLOCK_SIZE - pos;
    int sp = 0;
    int index;

if((m_Count[0] += len) < len)
++(m_Count[1]);

while(len >= space)     /* tranfer whole blocks while possible  */
{
for (index; index < space; ++index)// memcpy(wbuf, &data[sp], space)
{
int byte_index = index & 7;
int wbuf_index = (pos+index) >> 3;
if (byte_index==0)
{
m_Wbuf[wbuf_index] = (0xff & data[sp+index]);
}
else
{
m_Wbuf[wbuf_index] |= (0xff & data[sp+index]) << ((byte_index) << 3);
}
}
sp += space;
len -= space;
space = SHA512_BLOCK_SIZE;
pos = 0;
m_Wbuf = bsw_64(m_Wbuf, SHA512_BLOCK_SIZE >> 3);
sha512_compile();
}

for (index; index < len; ++index)
{
int byte_index = index & 7;
int wbuf_index = (pos+index) >> 3;
if (byte_index==0)
{
m_Wbuf[wbuf_index] = (0xff & data[sp+index]);
}
else
{
m_Wbuf[wbuf_index] |= (0xff & data[sp+index]) << ((byte_index) << 3);
}
}
}

array(int) sha_end(int hlen)
{
sha2_32t    i = (m_Count[0] & SHA512_MASK);

m_Wbuf = bsw_64(m_Wbuf, (i + 7) >> 3);

/* bytes in the buffer are now in an order in which references  */
/* to 64-bit words will put bytes with lower addresses into the */
/* top of 64 bit words on BOTH big and little endian machines   */
   
/* we now need to mask valid bytes and add the padding which is */
/* a single 1 bit and as many zero bits as necessary.           */
m_Wbuf[i >> 3] = (m_Wbuf[i >> 3] & m2[i & 7]) | b2[i & 7];

/* we need 17 or more empty byte positions, one for the padding */
/* byte (above) and sixteen for the length count.  If there is  */
/* not enough space pad and empty the buffer                    */
if(i > SHA512_BLOCK_SIZE - 17)
{
if(i < 120) m_Wbuf[15] = 0;
sha512_compile();
i = 0;
}
else
i = (i >> 3) + 1;

while(i < 14) m_Wbuf[i++] = 0;
   
/* the following 64-bit length fields are assembled in the      */
/* wrong byte order on little endian machines but this is       */
/* corrected later since they are only ever used as 64-bit      */
/* word values.                                                 */

m_Wbuf[14] = (m_Count[1] << 3) | (m_Count[0] >> 61);
m_Wbuf[15] = m_Count[0] << 3;

sha512_compile();

/* extract the hash value as bytes in case the hash buffer is   */
/* misaligned for 32-bit words                                  */
array(int) hval = allocate(hlen);
for(i = 0; i < hlen; ++i)
{
hval[i] = (m_Hash[i >> 3] >> 8 * (~i & 7)) & 0xFF;
}
return hval;
}

void debugdump()
{
write("sha512_ctx{\n");
write("count(%d):", sizeof(m_Count));
foreach(m_Count;; sha2_64t c) write("%d, ", c);
write("\nhash(%d):", sizeof(m_Hash));
foreach(m_Hash;; sha2_64t h) write("0x%016X, ", h);
write("\nwbuf(%d):", sizeof(m_Wbuf));
foreach(m_Wbuf;; sha2_64t w) write("0x%016X, ", w);
write("\n}\n");
}

public array(sha2_64t) m_Count; // [2];
public array(sha2_64t) m_Hash; // [8];
public array(sha2_64t) m_Wbuf; // [16];
};

string HashSHA512(string strData)
{
sha512_ctx cx = sha512_ctx();
cx->sha512_hash((array(int))strData);
array(int) hashArray = cx->sha_end(SHA512_DIGEST_SIZE);

string strHash = "";
foreach(hashArray;;int value)
{
strHash += sprintf("%02X", value);
}

return strHash;
}

class TagSha512
{
inherit RXML.Tag;
constant name = "sha512";

class Frame
{
inherit RXML.Frame;

array do_return(RequestID id)
{
result = HashSHA512(content);
return 0;
}
};
};

/*
// Test:
int main()
{
// sleep(0);
// int start_time = time();
// for (int loop=0; loop < 10000; ++loop)
// {
string strHash1 = HashSHA512("The quick brown fox jumps over the lazy dog");
string strHash2 = HashSHA512("david:abc");
if (strHash2 == "04AE7539A2DF3351EBBFD5D84FB717A5C839EB6A9613430E8ED130030A33A6A2E386ECDC5774B7FB1868663F06FEF0C03F545569EB519DCA81350C47295118D4" &&
    strHash1 == "07E547D9586F6A73F73FBAC0435ED76951218FB7D0C8D788A309D785436BBB642E93A252A954F23912547D1E8A3B5ED6E1BFD7097821233FA0538F3DB854FEE6")
{
write("OK!!! Woohoo\n");
}
else
{
write("Error!!!\n");
write("Hash=%s\n", strHash);
}
// }
// sleep(0);
// write("Duration: %d s\n", time()-start_time);
return 0;
}
*/

 
Posted by
Erik Allemann  -  October 2008
Very nice! :)

I know that the discussion has surfaced somewhere and, if I remember correctly, to add SHA-512 is not overly complicated.

Although, I' m not a Pike hacker, so I couldn't elaborate. Maybe I can poke a pike dev. :-)
 
Posted by
Martin Stjernholm  -  October 2008
Cool! I was hoping to be able to say that this has been added to Pike 7.8 which is due RSN, but alas there's no support there either. That's really something to look into, since things like this never gets fast when written in pike.

I haven't studied your code in detail yet, but I noticed one thing: Maybe it's worth to turn some of the small functions into macros to avoid the function call overhead.
 
Posted by
Martin Green  -  October 2008
I removed most of the small functions and wrote them inline instead and removed some unnecessary masking. But it had no effect on performance, it is still 6.5 ms per call.
Is there any better way to measure execution timing than:
sleep(0);
int start_time = time();
//...
sleep(0);
write("Duration: %d s\n", time()-start_time);
 
Posted by
Martin Green  -  October 2008
Obviously it would be better to have SHA512 in the crypto library (as optimized C code).
 
Posted by
Martin Karlgren  -  October 2008
Please have a look at gethrtime() for the time measuring - http://pike.ida.liu.se/generated/manual/modref/ex/predef_3A_3A/gethrtime.html - i.e:

int starttime = gethrtime(); dosomestuff(); werror("Time spent: %.5f\n", gethrtime()-starttime);
 
Posted by
Martin Green  -  October 2008
Thank's MJ!

This is a nice utility to measure the time spent inside a scope. Just make a local variable of this class at the top of the scope:
// --->
class Elapsed
{
void create(string name)
{
m_Name = name;
m_Start = gethrtime();
}
void destroy()
{
werror("%s: %.5f s\n", m_Name, (gethrtime()-m_Start)/1000000.0);
}

int m_Start;
string m_Name;
};
// <---

Like this:
// --->
void Foo()
{
  Elapsed el = Elapsed("Foo");
  do_funky_stuff();
}
// <---
 
Posted by
Martin Stjernholm  -  October 2008
gethrvtime is theoretically better than gethrtime since it measures actual cpu time. The drawback is that it can have lousy accuracy. That's a lot better with later pike versions and newer OS's. With 7.8 on linux 2.6 I'd definitely use gethrvtime instead (which also means gauge{} is accurate with high precision).
 
Posted by
Martin Green  -  October 2008
Aha you can use gauge!

werror(sprintf("%O s\n", gauge {
Do_stuff();
}));

I couldn't find that in the documentation anywhere!
 
Posted by
Martin Stjernholm  -  October 2008
You're right it's missing from the module refdoc. Thanks for letting us know.

You can find it in the "complete reference", though: http://pike.ida.liu.se/generated/manual/ref/chapter_6.html#3
 
1
Search this thread: