@ -46,201 +46,199 @@ Redistribution and use in source and binary forms, with or without modification,
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS " AS IS " AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT , INDIRECT , INCIDENTAL , SPECIAL , EXEMPLARY , OR CONSEQUENTIAL DAMAGES ( INCLUDING , BUT NOT LIMITED TO , PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE , DATA , OR PROFITS ; OR BUSINESS INTERRUPTION ) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY , WHETHER IN CONTRACT , STRICT LIABILITY , OR TORT ( INCLUDING NEGLIGENCE OR OTHERWISE ) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE , EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE .
*/
/* Our compression codebook, used for compression */
static const char * Smaz_cb [ 241 ] = {
" \002 s, \266 " , " \003 had \232 \002 leW " , " \003 on \216 " , " " , " \001 yS " ,
" \002 ma \255 \002 li \227 " , " \003 or \260 " , " " , " \002 ll \230 \003 s t \277 " ,
" \004 fromg \002 mel " , " " , " \003 its \332 " , " \001 z \333 " , " \003 ingF " , " \001 > \336 " ,
" \001 \000 \003 ( \002 nc \344 " , " \002 nd= \003 on \312 " ,
" \002 ne \213 \003 hat \276 \003 re q " , " " , " \002 ngT \003 herz \004 have \306 \003 s o \225 " ,
" " , " \003 ionk \003 s a \254 \002 ly \352 " , " \003 hisL \003 inN \003 be \252 " , " " ,
" \003 fo \325 \003 of \003 ha \311 " , " " , " \002 of \005 " ,
" \003 co \241 \002 no \267 \003 ma \370 " , " " , " " , " \003 cl \356 \003 enta \003 an7 " ,
" \002 ns \300 \001 \" e " , " \003 n t \217 \002 ntP \003 s, \205 " ,
" \002 pe \320 \003 we \351 \002 om \223 " , " \002 on \037 " , " " , " \002 y G " , " \003 wa \271 " ,
" \003 re \321 \002 or* " , " " , " \002 = \" \251 \002 ot \337 " , " \003 forD \002 ou[ " ,
" \003 toR " , " \003 th \r " , " \003 it \366 " ,
" \003 but \261 \002 ra \202 \003 wi \363 \002 </ \361 " , " \003 wh \237 " , " \002 4 " ,
" \003 nd ? " , " \002 re! " , " " , " \003 ng c " , " " ,
" \003 ly \307 \003 ass \323 \001 a \004 \002 rir " , " " , " " , " " , " \002 se_ " , " \003 of \" " ,
" \003 div \364 \002 ros \003 ere \240 " , " " , " \002 ta \310 \001 bZ \002 si \324 " , " " ,
" \003 and \a \002 rs \335 " , " \002 rt \362 " , " \002 teE " , " \003 ati \316 " , " \002 so \263 " ,
" \002 th \021 " , " \002 tiJ \001 c \034 \003 allp " , " \003 ate \345 " , " \002 ss \246 " ,
" \002 stM " , " " , " \002 >< \346 " , " \002 to \024 " , " \003 arew " , " \001 d \030 " ,
" \002 tr \303 " , " " , " \001 \n 1 \003 a \222 " , " \003 f tv \002 veo " , " \002 un \340 " , " " ,
" \003 e o \242 " , " \002 a \243 \002 wa \326 \001 e \002 " , " \002 ur \226 \003 e a \274 " ,
" \002 us \244 \003 \n \r \n \247 " , " \002 ut \304 \003 e c \373 " , " \002 we \221 " , " " , " " ,
" \002 wh \302 " , " \001 f, " , " " , " " , " " , " \003 d t \206 " , " " , " " , " \003 th \343 " ,
" \001 g; " , " " , " " , " \001 \r 9 \003 e s \265 " , " \003 e t \234 " , " " , " \003 to Y " ,
" \003 e \r \n \236 " , " \002 d \036 \001 h \022 " , " " , " \001 ,Q " , " \002 a \031 " , " \002 b^ " ,
" \002 \r \n \025 \002 cI " , " \002 d \245 " , " \002 e \253 " , " \002 fh \001 i \b \002 e \v " ,
" " , " \002 hU \001 - \314 " , " \002 i8 " , " " , " " , " \002 l \315 " , " \002 m{ " ,
" \002 f : \002 n \354 " , " \002 o \035 " , " \002 p} \001 .n \003 \r \n \r \250 " , " " ,
" \002 r \275 " , " \002 s> " , " \002 t \016 " , " " , " \002 g \235 \005 which+ \003 whi \367 " ,
" \002 w5 " , " \001 / \305 " , " \003 as \214 " , " \003 at \207 " , " " , " \003 who \331 " , " " ,
" \001 l \026 \002 h \212 " , " " , " \002 , $ " , " " , " \004 withV " , " " , " " , " " , " \001 m- " , " " ,
" " , " \002 ac \357 " , " \002 ad \350 " , " \003 TheH " , " " , " " , " \004 this \233 \001 n \t " ,
" " , " \002 . y " , " " , " \002 alX \003 e, \365 " , " \003 tio \215 \002 be \\ " ,
" \002 an \032 \003 ver \347 " , " " , " \004 that0 \003 tha \313 \001 o \006 " , " \003 was2 " ,
" \002 arO " , " \002 as. " , " \002 at' \003 the \001 \004 they \200 \005 there \322 \005 theird " ,
" \002 ce \210 " , " \004 were] " , " " , " \002 ch \231 \002 l \264 \001 p< " , " " , " " ,
" \003 one \256 " , " " , " \003 he \023 \002 dej " , " \003 ter \270 " , " \002 cou " , " " ,
" \002 by \177 \002 di \201 \002 eax " , " " , " \002 ec \327 " , " \002 edB " , " \002 ee \353 " , " " ,
" " , " \001 r \f \002 n ) " , " " , " " , " " , " \002 el \262 " , " " , " \003 in i \002 en3 " , " " ,
" \002 o ` \001 s \n " , " " , " \002 er \033 " , " \003 is t \002 es6 " , " " , " \002 ge \371 " ,
" \004 .com \375 " , " \002 fo \334 \003 our \330 " , " \003 ch \301 \001 t \003 " , " \002 hab " , " " ,
" \003 men \374 " , " " , " \002 he \020 " , " " , " " , " \001 u& " , " \002 hif " , " " ,
" \003 not \204 \002 ic \203 " , " \003 ed @ \002 id \355 " , " " , " " , " \002 ho \273 " ,
" \002 r K \001 vm " , " " , " " , " " , " \003 t t \257 \002 il \360 " , " \002 im \342 " ,
" \003 en \317 \002 in \017 " , " \002 io \220 " , " \002 s \027 \001 wA " , " " , " \003 er | " ,
" \003 es ~ \002 is% " , " \002 it/ " , " " , " \002 iv \272 " , " " ,
" \002 t # \a http://C \001 x \372 " , " \002 la \211 " , " \001 < \341 " , " \003 , a \224 "
" \002 s, \266 " , " \003 had \232 \002 leW " , " \003 on \216 " , " " , " \001 yS " ,
" \002 ma \255 \002 li \227 " , " \003 or \260 " , " " , " \002 ll \230 \003 s t \277 " ,
" \004 fromg \002 mel " , " " , " \003 its \332 " , " \001 z \333 " , " \003 ingF " , " \001 > \336 " ,
" \001 \000 \003 ( \002 nc \344 " , " \002 nd= \003 on \312 " ,
" \002 ne \213 \003 hat \276 \003 re q " , " " , " \002 ngT \003 herz \004 have \306 \003 s o \225 " ,
" " , " \003 ionk \003 s a \254 \002 ly \352 " , " \003 hisL \003 inN \003 be \252 " , " " ,
" \003 fo \325 \003 of \003 ha \311 " , " " , " \002 of \005 " ,
" \003 co \241 \002 no \267 \003 ma \370 " , " " , " " , " \003 cl \356 \003 enta \003 an7 " ,
" \002 ns \300 \001 \" e " , " \003 n t \217 \002 ntP \003 s, \205 " ,
" \002 pe \320 \003 we \351 \002 om \223 " , " \002 on \037 " , " " , " \002 y G " , " \003 wa \271 " ,
" \003 re \321 \002 or* " , " " , " \002 = \" \251 \002 ot \337 " , " \003 forD \002 ou[ " ,
" \003 toR " , " \003 th \r " , " \003 it \366 " ,
" \003 but \261 \002 ra \202 \003 wi \363 \002 </ \361 " , " \003 wh \237 " , " \002 4 " ,
" \003 nd ? " , " \002 re! " , " " , " \003 ng c " , " " ,
" \003 ly \307 \003 ass \323 \001 a \004 \002 rir " , " " , " " , " " , " \002 se_ " , " \003 of \" " ,
" \003 div \364 \002 ros \003 ere \240 " , " " , " \002 ta \310 \001 bZ \002 si \324 " , " " ,
" \003 and \a \002 rs \335 " , " \002 rt \362 " , " \002 teE " , " \003 ati \316 " , " \002 so \263 " ,
" \002 th \021 " , " \002 tiJ \001 c \034 \003 allp " , " \003 ate \345 " , " \002 ss \246 " ,
" \002 stM " , " " , " \002 >< \346 " , " \002 to \024 " , " \003 arew " , " \001 d \030 " ,
" \002 tr \303 " , " " , " \001 \n 1 \003 a \222 " , " \003 f tv \002 veo " , " \002 un \340 " , " " ,
" \003 e o \242 " , " \002 a \243 \002 wa \326 \001 e \002 " , " \002 ur \226 \003 e a \274 " ,
" \002 us \244 \003 \n \r \n \247 " , " \002 ut \304 \003 e c \373 " , " \002 we \221 " , " " , " " ,
" \002 wh \302 " , " \001 f, " , " " , " " , " " , " \003 d t \206 " , " " , " " , " \003 th \343 " ,
" \001 g; " , " " , " " , " \001 \r 9 \003 e s \265 " , " \003 e t \234 " , " " , " \003 to Y " ,
" \003 e \r \n \236 " , " \002 d \036 \001 h \022 " , " " , " \001 ,Q " , " \002 a \031 " , " \002 b^ " ,
" \002 \r \n \025 \002 cI " , " \002 d \245 " , " \002 e \253 " , " \002 fh \001 i \b \002 e \v " ,
" " , " \002 hU \001 - \314 " , " \002 i8 " , " " , " " , " \002 l \315 " , " \002 m{ " ,
" \002 f : \002 n \354 " , " \002 o \035 " , " \002 p} \001 .n \003 \r \n \r \250 " , " " ,
" \002 r \275 " , " \002 s> " , " \002 t \016 " , " " , " \002 g \235 \005 which+ \003 whi \367 " ,
" \002 w5 " , " \001 / \305 " , " \003 as \214 " , " \003 at \207 " , " " , " \003 who \331 " , " " ,
" \001 l \026 \002 h \212 " , " " , " \002 , $ " , " " , " \004 withV " , " " , " " , " " , " \001 m- " , " " ,
" " , " \002 ac \357 " , " \002 ad \350 " , " \003 TheH " , " " , " " , " \004 this \233 \001 n \t " ,
" " , " \002 . y " , " " , " \002 alX \003 e, \365 " , " \003 tio \215 \002 be \\ " ,
" \002 an \032 \003 ver \347 " , " " , " \004 that0 \003 tha \313 \001 o \006 " , " \003 was2 " ,
" \002 arO " , " \002 as. " , " \002 at' \003 the \001 \004 they \200 \005 there \322 \005 theird " ,
" \002 ce \210 " , " \004 were] " , " " , " \002 ch \231 \002 l \264 \001 p< " , " " , " " ,
" \003 one \256 " , " " , " \003 he \023 \002 dej " , " \003 ter \270 " , " \002 cou " , " " ,
" \002 by \177 \002 di \201 \002 eax " , " " , " \002 ec \327 " , " \002 edB " , " \002 ee \353 " , " " ,
" " , " \001 r \f \002 n ) " , " " , " " , " " , " \002 el \262 " , " " , " \003 in i \002 en3 " , " " ,
" \002 o ` \001 s \n " , " " , " \002 er \033 " , " \003 is t \002 es6 " , " " , " \002 ge \371 " ,
" \004 .com \375 " , " \002 fo \334 \003 our \330 " , " \003 ch \301 \001 t \003 " , " \002 hab " , " " ,
" \003 men \374 " , " " , " \002 he \020 " , " " , " " , " \001 u& " , " \002 hif " , " " ,
" \003 not \204 \002 ic \203 " , " \003 ed @ \002 id \355 " , " " , " " , " \002 ho \273 " ,
" \002 r K \001 vm " , " " , " " , " " , " \003 t t \257 \002 il \360 " , " \002 im \342 " ,
" \003 en \317 \002 in \017 " , " \002 io \220 " , " \002 s \027 \001 wA " , " " , " \003 er | " ,
" \003 es ~ \002 is% " , " \002 it/ " , " " , " \002 iv \272 " , " " ,
" \002 t # \a http://C \001 x \372 " , " \002 la \211 " , " \001 < \341 " , " \003 , a \224 "
} ;
/* Reverse compression codebook, used for decompression */
static const char * Smaz_rcb [ 254 ] = {
" " , " the " , " e " , " t " , " a " , " of " , " o " , " and " , " i " , " n " , " s " , " e " , " r " , " th " ,
" t " , " in " , " he " , " th " , " h " , " he " , " to " , " \r \n " , " l " , " s " , " d " , " a " , " an " ,
" er " , " c " , " o " , " d " , " on " , " of " , " re " , " of " , " t " , " , " , " is " , " u " , " at " ,
" " , " n " , " or " , " which " , " f " , " m " , " as " , " it " , " that " , " \n " , " was " , " en " ,
" " , " w " , " es " , " an " , " i " , " \r " , " f " , " g " , " p " , " nd " , " s " , " nd " , " ed " ,
" w " , " ed " , " http:// " , " for " , " te " , " ing " , " y " , " The " , " c " , " ti " , " r " , " his " ,
" st " , " in " , " ar " , " nt " , " , " , " to " , " y " , " ng " , " h " , " with " , " le " , " al " , " to " ,
" b " , " ou " , " be " , " were " , " b " , " se " , " o " , " ent " , " ha " , " ng " , " their " , " \" " ,
" hi " , " from " , " f " , " in " , " de " , " ion " , " me " , " v " , " . " , " ve " , " all " , " re " ,
" ri " , " ro " , " is " , " co " , " f t " , " are " , " ea " , " . " , " her " , " m " , " er " , " p " ,
" es " , " by " , " they " , " di " , " ra " , " ic " , " not " , " s, " , " d t " , " at " , " ce " , " la " ,
" h " , " ne " , " as " , " tio " , " on " , " n t " , " io " , " we " , " a " , " om " , " , a " , " s o " ,
" ur " , " li " , " ll " , " ch " , " had " , " this " , " e t " , " g " , " e \r \n " , " wh " , " ere " ,
" co " , " e o " , " a " , " us " , " d " , " ss " , " \n \r \n " , " \r \n \r " , " = \" " , " be " , " e " ,
" s a " , " ma " , " one " , " t t " , " or " , " but " , " el " , " so " , " l " , " e s " , " s, " , " no " ,
" ter " , " wa " , " iv " , " ho " , " e a " , " r " , " hat " , " s t " , " ns " , " ch " , " wh " , " tr " ,
" ut " , " / " , " have " , " ly " , " ta " , " ha " , " on " , " tha " , " - " , " l " , " ati " , " en " ,
" pe " , " re " , " there " , " ass " , " si " , " fo " , " wa " , " ec " , " our " , " who " , " its " , " z " ,
" fo " , " rs " , " > " , " ot " , " un " , " < " , " im " , " th " , " nc " , " ate " , " >< " , " ver " , " ad " ,
" we " , " ly " , " ee " , " n " , " id " , " cl " , " ac " , " il " , " </ " , " rt " , " wi " , " div " ,
" e, " , " it " , " whi " , " ma " , " ge " , " x " , " e c " , " men " , " .com "
" " , " the " , " e " , " t " , " a " , " of " , " o " , " and " , " i " , " n " , " s " , " e " , " r " , " th " ,
" t " , " in " , " he " , " th " , " h " , " he " , " to " , " \r \n " , " l " , " s " , " d " , " a " , " an " ,
" er " , " c " , " o " , " d " , " on " , " of " , " re " , " of " , " t " , " , " , " is " , " u " , " at " ,
" " , " n " , " or " , " which " , " f " , " m " , " as " , " it " , " that " , " \n " , " was " , " en " ,
" " , " w " , " es " , " an " , " i " , " \r " , " f " , " g " , " p " , " nd " , " s " , " nd " , " ed " ,
" w " , " ed " , " http:// " , " for " , " te " , " ing " , " y " , " The " , " c " , " ti " , " r " , " his " ,
" st " , " in " , " ar " , " nt " , " , " , " to " , " y " , " ng " , " h " , " with " , " le " , " al " , " to " ,
" b " , " ou " , " be " , " were " , " b " , " se " , " o " , " ent " , " ha " , " ng " , " their " , " \" " ,
" hi " , " from " , " f " , " in " , " de " , " ion " , " me " , " v " , " . " , " ve " , " all " , " re " ,
" ri " , " ro " , " is " , " co " , " f t " , " are " , " ea " , " . " , " her " , " m " , " er " , " p " ,
" es " , " by " , " they " , " di " , " ra " , " ic " , " not " , " s, " , " d t " , " at " , " ce " , " la " ,
" h " , " ne " , " as " , " tio " , " on " , " n t " , " io " , " we " , " a " , " om " , " , a " , " s o " ,
" ur " , " li " , " ll " , " ch " , " had " , " this " , " e t " , " g " , " e \r \n " , " wh " , " ere " ,
" co " , " e o " , " a " , " us " , " d " , " ss " , " \n \r \n " , " \r \n \r " , " = \" " , " be " , " e " ,
" s a " , " ma " , " one " , " t t " , " or " , " but " , " el " , " so " , " l " , " e s " , " s, " , " no " ,
" ter " , " wa " , " iv " , " ho " , " e a " , " r " , " hat " , " s t " , " ns " , " ch " , " wh " , " tr " ,
" ut " , " / " , " have " , " ly " , " ta " , " ha " , " on " , " tha " , " - " , " l " , " ati " , " en " ,
" pe " , " re " , " there " , " ass " , " si " , " fo " , " wa " , " ec " , " our " , " who " , " its " , " z " ,
" fo " , " rs " , " > " , " ot " , " un " , " < " , " im " , " th " , " nc " , " ate " , " >< " , " ver " , " ad " ,
" we " , " ly " , " ee " , " n " , " id " , " cl " , " ac " , " il " , " </ " , " rt " , " wi " , " div " ,
" e, " , " it " , " whi " , " ma " , " ge " , " x " , " e c " , " men " , " .com "
} ;
static int smaz_compress ( const char * in , int inlen , char * out , int outlen ) {
unsigned int h1 , h2 , h3 = 0 ;
int verblen = 0 , _outlen = outlen ;
char verb [ 256 ] , * _out = out ;
unsigned int h1 , h2 , h3 = 0 ;
int verblen = 0 , _outlen = outlen ;
char verb [ 256 ] , * _out = out ;
while ( inlen ) {
int j = 7 , needed ;
char * flush = NULL ;
const char * slot ;
while ( inlen ) {
int j = 7 , needed ;
char * flush = NULL ;
const char * slot ;
h1 = h2 = in [ 0 ] < < 3 ;
if ( inlen > 1 ) h2 + = in [ 1 ] ;
if ( inlen > 2 ) h3 = h2 ^ in [ 2 ] ;
if ( j > inlen ) j = inlen ;
h1 = h2 = in [ 0 ] < < 3 ;
if ( inlen > 1 ) h2 + = in [ 1 ] ;
if ( inlen > 2 ) h3 = h2 ^ in [ 2 ] ;
if ( j > inlen ) j = inlen ;
/* Try to lookup substrings into the hash table, starting from the
/* Try to lookup substrings into the hash table, starting from the
* longer to the shorter substrings */
for ( ; j > 0 ; j - - ) {
switch ( j ) {
case 1 : slot = Smaz_cb [ h1 % 241 ] ; break ;
case 2 : slot = Smaz_cb [ h2 % 241 ] ; break ;
default : slot = Smaz_cb [ h3 % 241 ] ; break ;
}
while ( slot [ 0 ] ) {
if ( slot [ 0 ] = = j & & memcmp ( slot + 1 , in , j ) = = 0 ) {
/* Match found in the hash table,
for ( ; j > 0 ; j - - ) {
switch ( j ) {
case 1 : slot = Smaz_cb [ h1 % 241 ] ; break ;
case 2 : slot = Smaz_cb [ h2 % 241 ] ; break ;
default : slot = Smaz_cb [ h3 % 241 ] ; break ;
}
while ( slot [ 0 ] ) {
if ( slot [ 0 ] = = j & & memcmp ( slot + 1 , in , j ) = = 0 ) {
/* Match found in the hash table,
* prepare a verbatim bytes flush if needed */
if ( verblen ) {
needed = ( verblen = = 1 ) ? 2 : 2 + verblen ;
if ( verblen ) {
needed = ( verblen = = 1 ) ? 2 : 2 + verblen ;
flush = out ;
out + = needed ;
outlen - = needed ;
}
/* Emit the byte */
if ( outlen < = 0 ) return _outlen + 1 ;
out [ 0 ] = slot [ slot [ 0 ] + 1 ] ;
out + + ;
outlen - - ;
inlen - = j ;
in + = j ;
goto out ;
} else {
slot + = slot [ 0 ] + 2 ;
}
}
}
/* Match not found - add the byte to the verbatim buffer */
verb [ verblen ] = in [ 0 ] ;
verblen + + ;
inlen - - ;
in + + ;
out :
/* Prepare a flush if we reached the flush length limit, and there
* is not already a pending flush operation . */
if ( ! flush & & ( verblen = = 256 | | ( verblen > 0 & & inlen = = 0 ) ) ) {
needed = ( verblen = = 1 ) ? 2 : 2 + verblen ;
flush = out ;
out + = needed ;
outlen - = needed ;
}
/* Emit the byte */
if ( outlen < = 0 ) return _outlen + 1 ;
out [ 0 ] = slot [ slot [ 0 ] + 1 ] ;
out + + ;
outlen - - ;
inlen - = j ;
in + = j ;
goto out ;
} else {
slot + = slot [ 0 ] + 2 ;
if ( outlen < 0 ) return _outlen + 1 ;
}
/* Perform a verbatim flush if needed */
if ( flush ) {
if ( verblen = = 1 ) {
flush [ 0 ] = ( signed char ) 254 ;
flush [ 1 ] = verb [ 0 ] ;
} else {
flush [ 0 ] = ( signed char ) 255 ;
flush [ 1 ] = ( signed char ) ( verblen - 1 ) ;
memcpy ( flush + 2 , verb , verblen ) ;
}
flush = NULL ;
verblen = 0 ;
}
}
}
/* Match not found - add the byte to the verbatim buffer */
verb [ verblen ] = in [ 0 ] ;
verblen + + ;
inlen - - ;
in + + ;
out :
/* Prepare a flush if we reached the flush length limit, and there
* is not already a pending flush operation . */
if ( ! flush & & ( verblen = = 256 | | ( verblen > 0 & & inlen = = 0 ) ) ) {
needed = ( verblen = = 1 ) ? 2 : 2 + verblen ;
flush = out ;
out + = needed ;
outlen - = needed ;
if ( outlen < 0 ) return _outlen + 1 ;
}
/* Perform a verbatim flush if needed */
if ( flush ) {
if ( verblen = = 1 ) {
flush [ 0 ] = ( signed char ) 254 ;
flush [ 1 ] = verb [ 0 ] ;
} else {
flush [ 0 ] = ( signed char ) 255 ;
flush [ 1 ] = ( signed char ) ( verblen - 1 ) ;
memcpy ( flush + 2 , verb , verblen ) ;
}
flush = NULL ;
verblen = 0 ;
}
}
return out - _out ;
return out - _out ;
}
static int smaz_decompress ( const char * in , int inlen , char * out , int outlen ) {
unsigned char * c = ( unsigned char * ) in ;
char * _out = out ;
int _outlen = outlen ;
while ( inlen ) {
if ( * c = = 254 ) {
/* Verbatim byte */
if ( outlen < 1 ) return _outlen + 1 ;
* out = * ( c + 1 ) ;
out + + ;
outlen - - ;
c + = 2 ;
inlen - = 2 ;
} else if ( * c = = 255 ) {
/* Verbatim string */
int len = ( * ( c + 1 ) ) + 1 ;
if ( outlen < len ) return _outlen + 1 ;
memcpy ( out , c + 2 , len ) ;
out + = len ;
outlen - = len ;
c + = 2 + len ;
inlen - = 2 + len ;
} else {
/* Codebook entry */
const char * s = Smaz_rcb [ * c ] ;
int len = strlen ( s ) ;
if ( outlen < len ) return _outlen + 1 ;
memcpy ( out , s , len ) ;
out + = len ;
outlen - = len ;
c + + ;
inlen - - ;
unsigned char * c = ( unsigned char * ) in ;
char * _out = out ;
int _outlen = outlen ;
while ( inlen ) {
if ( * c = = 254 ) {
/* Verbatim byte */
if ( outlen < 1 ) return _outlen + 1 ;
* out = * ( c + 1 ) ;
out + + ;
outlen - - ;
c + = 2 ;
inlen - = 2 ;
} else if ( * c = = 255 ) {
/* Verbatim string */
int len = ( * ( c + 1 ) ) + 1 ;
if ( outlen < len ) return _outlen + 1 ;
memcpy ( out , c + 2 , len ) ;
out + = len ;
outlen - = len ;
c + = 2 + len ;
inlen - = 2 + len ;
} else {
/* Codebook entry */
const char * s = Smaz_rcb [ * c ] ;
int len = strlen ( s ) ;
if ( outlen < len ) return _outlen + 1 ;
memcpy ( out , s , len ) ;
out + = len ;
outlen - = len ;
c + + ;
inlen - - ;
}
}
}
return out - _out ;
return out - _out ;
}
/////////// END OF SMAZ /////////////
struct _PHashTranslationCmp {
@ -255,104 +253,100 @@ void PHashTranslation::generate(const Ref<Translation> &p_from) {
List < StringName > keys ;
p_from - > get_message_list ( & keys ) ;
int size = Math : : larger_prime ( keys . size ( ) ) ;
int size = Math : : larger_prime ( keys . size ( ) ) ;
print_line ( " compressing keys: " + itos ( keys . size ( ) ) ) ;
Vector < Vector < Pair < int , CharString > > > buckets ;
Vector < Map < uint32_t , int > > table ;
Vector < uint32_t > hfunc_table ;
Vector < _PHashTranslationCmp > compressed ;
print_line ( " compressing keys: " + itos ( keys . size ( ) ) ) ;
Vector < Vector < Pair < int , CharString > > > buckets ;
Vector < Map < uint32_t , int > > table ;
Vector < uint32_t > hfunc_table ;
Vector < _PHashTranslationCmp > compressed ;
table . resize ( size ) ;
hfunc_table . resize ( size ) ;
buckets . resize ( size ) ;
compressed . resize ( keys . size ( ) ) ;
int idx = 0 ;
int total_compression_size = 0 ;
int total_string_size = 0 ;
int idx = 0 ;
int total_compression_size = 0 ;
int total_string_size = 0 ;
for ( List < StringName > : : Element * E = keys . front ( ) ; E ; E = E - > next ( ) ) {
for ( List < StringName > : : Element * E = keys . front ( ) ; E ; E = E - > next ( ) ) {
//hash string
CharString cs = E - > get ( ) . operator String ( ) . utf8 ( ) ;
uint32_t h = hash ( 0 , cs . get_data ( ) ) ;
Pair < int , CharString > p ;
p . first = idx ;
p . second = cs ;
uint32_t h = hash ( 0 , cs . get_data ( ) ) ;
Pair < int , CharString > p ;
p . first = idx ;
p . second = cs ;
buckets [ h % size ] . push_back ( p ) ;
//compress string
CharString src_s = p_from - > get_message ( E - > get ( ) ) . operator String ( ) . utf8 ( ) ;
_PHashTranslationCmp ps ;
ps . orig_len = src_s . size ( ) ;
ps . offset = total_compression_size ;
ps . orig_len = src_s . size ( ) ;
ps . offset = total_compression_size ;
if ( ps . orig_len ! = 0 ) {
if ( ps . orig_len ! = 0 ) {
CharString dst_s ;
dst_s . resize ( src_s . size ( ) ) ;
int ret = smaz_compress ( src_s . get_data ( ) , src_s . size ( ) , & dst_s [ 0 ] , src_s . size ( ) ) ;
if ( ret > = src_s . size ( ) ) {
int ret = smaz_compress ( src_s . get_data ( ) , src_s . size ( ) , & dst_s [ 0 ] , src_s . size ( ) ) ;
if ( ret > = src_s . size ( ) ) {
//if compressed is larger than original, just use original
ps . orig_len = src_s . size ( ) ;
ps . compressed = src_s ;
ps . orig_len = src_s . size ( ) ;
ps . compressed = src_s ;
} else {
dst_s . resize ( ret ) ;
//ps.orig_len=;
ps . compressed = dst_s ;
ps . compressed = dst_s ;
}
} else {
ps . orig_len = 1 ;
ps . orig_len = 1 ;
ps . compressed . resize ( 1 ) ;
ps . compressed [ 0 ] = 0 ;
ps . compressed [ 0 ] = 0 ;
}
compressed [ idx ] = ps ;
total_compression_size + = ps . compressed . size ( ) ;
total_string_size + = src_s . size ( ) ;
compressed [ idx ] = ps ;
total_compression_size + = ps . compressed . size ( ) ;
total_string_size + = src_s . size ( ) ;
idx + + ;
}
int bucket_table_size = 0 ;
print_line ( " total compressed string size: " + itos ( total_compression_size ) + " ( " + itos ( total_string_size ) + " uncompressed). " ) ;
int bucket_table_size = 0 ;
print_line ( " total compressed string size: " + itos ( total_compression_size ) + " ( " + itos ( total_string_size ) + " uncompressed). " ) ;
for ( int i = 0 ; i < size ; i + + ) {
for ( int i = 0 ; i < size ; i + + ) {
Vector < Pair < int , CharString > > & b = buckets [ i ] ;
Map < uint32_t , int > & t = table [ i ] ;
Vector < Pair < int , CharString > > & b = buckets [ i ] ;
Map < uint32_t , int > & t = table [ i ] ;
if ( b . size ( ) = = 0 )
if ( b . size ( ) = = 0 )
continue ;
//print_line("bucket: "+itos(i)+" - elements: "+itos(b.size()));
int d = 1 ;
int item = 0 ;
int item = 0 ;
while ( item < b . size ( ) ) {
while ( item < b . size ( ) ) {
uint32_t slot = hash ( d , b [ item ] . second . get_data ( ) ) ;
uint32_t slot = hash ( d , b [ item ] . second . get_data ( ) ) ;
if ( t . has ( slot ) ) {
item = 0 ;
item = 0 ;
d + + ;
t . clear ( ) ;
} else {
t [ slot ] = b [ item ] . first ;
t [ slot ] = b [ item ] . first ;
item + + ;
}
}
hfunc_table [ i ] = d ;
bucket_table_size + = 2 + b . size ( ) * 4 ;
hfunc_table [ i ] = d ;
bucket_table_size + = 2 + b . size ( ) * 4 ;
}
print_line ( " bucket table size: " + itos ( bucket_table_size * 4 ) ) ;
print_line ( " hash table size: " + itos ( size * 4 ) ) ;
print_line ( " bucket table size: " + itos ( bucket_table_size * 4 ) ) ;
print_line ( " hash table size: " + itos ( size * 4 ) ) ;
hash_table . resize ( size ) ;
bucket_table . resize ( bucket_table_size ) ;
@ -360,136 +354,130 @@ void PHashTranslation::generate(const Ref<Translation> &p_from) {
PoolVector < int > : : Write htwb = hash_table . write ( ) ;
PoolVector < int > : : Write btwb = bucket_table . write ( ) ;
uint32_t * htw = ( uint32_t * ) & htwb [ 0 ] ;
uint32_t * btw = ( uint32_t * ) & btwb [ 0 ] ;
uint32_t * htw = ( uint32_t * ) & htwb [ 0 ] ;
uint32_t * btw = ( uint32_t * ) & btwb [ 0 ] ;
int btindex = 0 ;
int collisions = 0 ;
int btindex = 0 ;
int collisions = 0 ;
for ( int i = 0 ; i < size ; i + + ) {
for ( int i = 0 ; i < size ; i + + ) {
Map < uint32_t , int > & t = table [ i ] ;
if ( t . size ( ) = = 0 ) {
htw [ i ] = 0xFFFFFFFF ; //nothing
Map < uint32_t , int > & t = table [ i ] ;
if ( t . size ( ) = = 0 ) {
htw [ i ] = 0xFFFFFFFF ; //nothing
continue ;
} else if ( t . size ( ) > 1 ) {
collisions + = t . size ( ) - 1 ;
} else if ( t . size ( ) > 1 ) {
collisions + = t . size ( ) - 1 ;
}
htw [ i ] = btindex ;
btw [ btindex + + ] = t . size ( ) ;
btw [ btindex + + ] = hfunc_table [ i ] ;
htw [ i ] = btindex ;
btw [ btindex + + ] = t . size ( ) ;
btw [ btindex + + ] = hfunc_table [ i ] ;
for ( Map < uint32_t , int > : : Element * E = t . front ( ) ; E ; E = E - > next ( ) ) {
for ( Map < uint32_t , int > : : Element * E = t . front ( ) ; E ; E = E - > next ( ) ) {
btw [ btindex + + ] = E - > key ( ) ;
btw [ btindex + + ] = compressed [ E - > get ( ) ] . offset ;
btw [ btindex + + ] = compressed [ E - > get ( ) ] . compressed . size ( ) ;
btw [ btindex + + ] = compressed [ E - > get ( ) ] . orig_len ;
btw [ btindex + + ] = E - > key ( ) ;
btw [ btindex + + ] = compressed [ E - > get ( ) ] . offset ;
btw [ btindex + + ] = compressed [ E - > get ( ) ] . compressed . size ( ) ;
btw [ btindex + + ] = compressed [ E - > get ( ) ] . orig_len ;
}
}
print_line ( " total collisions: " + itos ( collisions ) ) ;
print_line ( " total collisions: " + itos ( collisions ) ) ;
strings . resize ( total_compression_size ) ;
PoolVector < uint8_t > : : Write cw = strings . write ( ) ;
for ( int i = 0 ; i < compressed . size ( ) ; i + + ) {
memcpy ( & cw [ compressed [ i ] . offset ] , compressed [ i ] . compressed . get_data ( ) , compressed [ i ] . compressed . size ( ) ) ;
for ( int i = 0 ; i < compressed . size ( ) ; i + + ) {
memcpy ( & cw [ compressed [ i ] . offset ] , compressed [ i ] . compressed . get_data ( ) , compressed [ i ] . compressed . size ( ) ) ;
}
ERR_FAIL_COND ( btindex ! = bucket_table_size ) ;
ERR_FAIL_COND ( btindex ! = bucket_table_size ) ;
set_locale ( p_from - > get_locale ( ) ) ;
# endif
}
bool PHashTranslation : : _set ( const StringName & p_name , const Variant & p_value ) {
bool PHashTranslation : : _set ( const StringName & p_name , const Variant & p_value ) {
String name = p_name . operator String ( ) ;
if ( name = = " hash_table " ) {
hash_table = p_value ;
if ( name = = " hash_table " ) {
hash_table = p_value ;
//print_line("translation: loaded hash table of size: "+itos(hash_table.size()));
} else if ( name = = " bucket_table " ) {
bucket_table = p_value ;
} else if ( name = = " bucket_table " ) {
bucket_table = p_value ;
//print_line("translation: loaded bucket table of size: "+itos(bucket_table.size()));
} else if ( name = = " strings " ) {
strings = p_value ;
} else if ( name = = " strings " ) {
strings = p_value ;
//print_line("translation: loaded string table of size: "+itos(strings.size()));
} else if ( name = = " load_from " ) {
} else if ( name = = " load_from " ) {
//print_line("generating");
generate ( p_value ) ;
} else
return false ;
return true ;
}
bool PHashTranslation : : _get ( const StringName & p_name , Variant & r_ret ) const {
bool PHashTranslation : : _get ( const StringName & p_name , Variant & r_ret ) const {
String name = p_name . operator String ( ) ;
if ( name = = " hash_table " )
r_ret = hash_table ;
else if ( name = = " bucket_table " )
r_ret = bucket_table ;
else if ( name = = " strings " )
r_ret = strings ;
if ( name = = " hash_table " )
r_ret = hash_table ;
else if ( name = = " bucket_table " )
r_ret = bucket_table ;
else if ( name = = " strings " )
r_ret = strings ;
else
return false ;
return true ;
}
StringName PHashTranslation : : get_message ( const StringName & p_src_text ) const {
StringName PHashTranslation : : get_message ( const StringName & p_src_text ) const {
int htsize = hash_table . size ( ) ;
if ( htsize = = 0 )
if ( htsize = = 0 )
return StringName ( ) ;
CharString str = p_src_text . operator String ( ) . utf8 ( ) ;
uint32_t h = hash ( 0 , str . get_data ( ) ) ;
uint32_t h = hash ( 0 , str . get_data ( ) ) ;
PoolVector < int > : : Read htr = hash_table . read ( ) ;
const uint32_t * htptr = ( const uint32_t * ) & htr [ 0 ] ;
PoolVector < int > : : Read btr = bucket_table . read ( ) ;
const uint32_t * btptr = ( const uint32_t * ) & btr [ 0 ] ;
PoolVector < int > : : Read htr = hash_table . read ( ) ;
const uint32_t * htptr = ( const uint32_t * ) & htr [ 0 ] ;
PoolVector < int > : : Read btr = bucket_table . read ( ) ;
const uint32_t * btptr = ( const uint32_t * ) & btr [ 0 ] ;
PoolVector < uint8_t > : : Read sr = strings . read ( ) ;
const char * sptr = ( const char * ) & sr [ 0 ] ;
const char * sptr = ( const char * ) & sr [ 0 ] ;
uint32_t p = htptr [ h % htsize ] ;
uint32_t p = htptr [ h % htsize ] ;
//print_line("String: "+p_src_text.operator String());
//print_line("Hash: "+itos(p));
if ( p = = 0xFFFFFFFF ) {
if ( p = = 0xFFFFFFFF ) {
//print_line("GETMSG: Nothing!");
return StringName ( ) ; //nothing
}
const Bucket & bucket = * ( const Bucket * ) & btptr [ p ] ;
const Bucket & bucket = * ( const Bucket * ) & btptr [ p ] ;
h = hash ( bucket . func , str . get_data ( ) ) ;
h = hash ( bucket . func , str . get_data ( ) ) ;
int idx = - 1 ;
int idx = - 1 ;
for ( int i = 0 ; i < bucket . size ; i + + ) {
for ( int i = 0 ; i < bucket . size ; i + + ) {
if ( bucket . elem [ i ] . key = = h ) {
if ( bucket . elem [ i ] . key = = h ) {
idx = i ;
idx = i ;
break ;
}
}
//print_line("bucket pos: "+itos(idx));
if ( idx = = - 1 ) {
if ( idx = = - 1 ) {
//print_line("GETMSG: Not in Bucket!");
return StringName ( ) ;
}
@ -497,7 +485,7 @@ StringName PHashTranslation::get_message(const StringName& p_src_text) const {
if ( bucket . elem [ idx ] . comp_size = = bucket . elem [ idx ] . uncomp_size ) {
String rstr ;
rstr . parse_utf8 ( & sptr [ bucket . elem [ idx ] . str_offset ] , bucket . elem [ idx ] . uncomp_size ) ;
rstr . parse_utf8 ( & sptr [ bucket . elem [ idx ] . str_offset ] , bucket . elem [ idx ] . uncomp_size ) ;
//print_line("Uncompressed, size: "+itos(bucket.elem[idx].comp_size));
//print_line("Return: "+rstr);
@ -505,31 +493,27 @@ StringName PHashTranslation::get_message(const StringName& p_src_text) const {
} else {
CharString uncomp ;
uncomp . resize ( bucket . elem [ idx ] . uncomp_size + 1 ) ;
smaz_decompress ( & sptr [ bucket . elem [ idx ] . str_offset ] , bucket . elem [ idx ] . comp_size , uncomp . ptr ( ) , bucket . elem [ idx ] . uncomp_size ) ;
uncomp . resize ( bucket . elem [ idx ] . uncomp_size + 1 ) ;
smaz_decompress ( & sptr [ bucket . elem [ idx ] . str_offset ] , bucket . elem [ idx ] . comp_size , uncomp . ptr ( ) , bucket . elem [ idx ] . uncomp_size ) ;
String rstr ;
rstr . parse_utf8 ( uncomp . get_data ( ) ) ;
//print_line("Compressed, size: "+itos(bucket.elem[idx].comp_size));
//print_line("Return: "+rstr);
return rstr ;
}
}
void PHashTranslation : : _get_property_list ( List < PropertyInfo > * p_list ) const {
void PHashTranslation : : _get_property_list ( List < PropertyInfo > * p_list ) const {
p_list - > push_back ( PropertyInfo ( Variant : : POOL_INT_ARRAY , " hash_table " ) ) ;
p_list - > push_back ( PropertyInfo ( Variant : : POOL_INT_ARRAY , " bucket_table " ) ) ;
p_list - > push_back ( PropertyInfo ( Variant : : POOL_BYTE_ARRAY , " strings " ) ) ;
p_list - > push_back ( PropertyInfo ( Variant : : OBJECT , " load_from " , PROPERTY_HINT_RESOURCE_TYPE , " Translation " , PROPERTY_USAGE_EDITOR ) ) ;
p_list - > push_back ( PropertyInfo ( Variant : : POOL_INT_ARRAY , " hash_table " ) ) ;
p_list - > push_back ( PropertyInfo ( Variant : : POOL_INT_ARRAY , " bucket_table " ) ) ;
p_list - > push_back ( PropertyInfo ( Variant : : POOL_BYTE_ARRAY , " strings " ) ) ;
p_list - > push_back ( PropertyInfo ( Variant : : OBJECT , " load_from " , PROPERTY_HINT_RESOURCE_TYPE , " Translation " , PROPERTY_USAGE_EDITOR ) ) ;
}
void PHashTranslation : : _bind_methods ( ) {
ClassDB : : bind_method ( D_METHOD ( " generate " , " from:Translation " ) , & PHashTranslation : : generate ) ;
ClassDB : : bind_method ( D_METHOD ( " generate " , " from:Translation " ) , & PHashTranslation : : generate ) ;
}
PHashTranslation : : PHashTranslation ( )
{
PHashTranslation : : PHashTranslation ( ) {
}