/*
* Copyright (c) 2006
* Tama Communications Corporation
*
* This file is part of GNU GLOBAL.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#ifdef HAVE_CONFIG_H
#include
#endif
#include "compress.h"
#include "die.h"
#include "gtagsop.h"
#include "strbuf.h"
#include "strlimcpy.h"
#include "varray.h"
/*
* Compress module
*
* Function compress() reduces the size of GTAGS by about 10-20% average.
*
* PROTOCOL:
*
* meta record: " __.COMPRESS ddefine ttypedef"
*
* 'ddefine' means d => define
* 'ttypedef' means t => typedef
*
* source abbreviation
* -----------------------------------------
* @ @@
* @n
* "define" @d
* "typedef" @t
* @ or @{}
*
* EXAMPLE OF COMPRESS:
*
* 100 macro 23 #define macro(c) a; b;
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~
* | [compress] ^ [uncompress]
* v |
* 100 macro 23 #@d @n(c) a;@6b;
* ~~~~~~~~~~~~~~~~
* DATA STRUCTURE
*
* o Ab2name table is used to convert from abbreviation character
* to the string value.
* o Name2ab table is used to convert from string value to the
* abbreviation character.
*
* ab2name = ('a' => NULL, ... , 'd' => "define", ... 'z' => NULL)
* name2ab = ("define" => 'a', "typdef" => 't')
*/
struct abbrmap {
int c;
char *name;
int length;
};
static struct abbrmap ab2name[26];
static VARRAY *name2ab;
static char abbrev_string[1024];
/*
* setup two internal tables for abbreviation.
*
* i) abbrev abbreviation string
*/
void
abbrev_open(const char *abbrev)
{
int i, limit;
struct abbrmap *ab;
char *p;
/*
* abbrev string: "ddefine ttypedef"
*/
/* copy abbrev string to static area */
strlimcpy(abbrev_string, abbrev, sizeof(abbrev_string));
p = abbrev_string;
/* initialize ab2name table */
limit = sizeof(ab2name) / sizeof(struct abbrmap);
for (i = 0; i < limit; i++) {
ab2name[i].c = 0;
ab2name[i].name = NULL;
}
name2ab = varray_open(sizeof(struct abbrmap), 5);
while (*p) {
ab = (struct abbrmap *)varray_append(name2ab);
ab->c = *p++;
ab->name = p;
for (; *p && *p != ' '; p++)
;
if (*p == ' ')
*p++ = '\0';
ab->length = strlen(ab->name);
if (ab->c < 'a' || ab->c > 'z')
die("Abbrev character must be a lower alphabetic character. (%s)", abbrev);
i = ab->c - 'a';
ab2name[i].c = ab->c;
ab2name[i].name = ab->name;
ab2name[i].length = ab->length;
}
}
/*
* free allocated memory.
*/
void
abbrev_close(void)
{
if (name2ab)
varray_close(name2ab);
name2ab = NULL;
}
/*
* for debugging.
*/
void
abbrev_dump(void)
{
struct abbrmap *ab;
int i, limit = sizeof(ab2name) / sizeof(struct abbrmap);
fprintf(stderr, "ab2name: %d entries\n", limit);
for (i = 0; i < limit; i++) {
if (ab2name[i].c != 0) {
fprintf(stderr, "ab2name[%d].c = %c\n", i, ab2name[i].c);
fprintf(stderr, "ab2name[%d].name = %s\n", i, ab2name[i].name);
}
}
ab = (struct abbrmap *)varray_assign(name2ab, 0, 0);
limit = name2ab->length;
fprintf(stderr, "name2ab: %d entries\n", limit);
for (i = 0; i < limit; i++) {
if (ab[i].c != 0) {
fprintf(stderr, "name2ab[%d].c = %c\n", i, ab[i].c);
fprintf(stderr, "name2ab[%d].name = %s\n", i, ab[i].name);
}
}
}
/*
* compress source line.
*
* i) in source line
* i) name replaced string
* r) compressed string
*/
char *
compress(const char *in, const char *name)
{
STATIC_STRBUF(sb);
const char *p = in;
int length = strlen(name);
int spaces = 0;
strbuf_clear(sb);
while (*p) {
if (*p == ' ') {
spaces++;
p++;
continue;
}
if (spaces > 0) {
if (spaces >= 10) {
strbuf_putc(sb, '@');
strbuf_putc(sb, '{');
strbuf_putn(sb, spaces);
strbuf_putc(sb, '}');
} else if (spaces > 3) {
strbuf_putc(sb, '@');
strbuf_putn(sb, spaces);
} else {
strbuf_nputc(sb, ' ', spaces);
}
}
spaces = 0;
if (*p == '@') {
strbuf_puts(sb, "@@");
p++;
} else if (!strncmp(p, name, length)) {
strbuf_puts(sb, "@n");
p += length;
} else {
int i, limit = name2ab->length;
struct abbrmap *ab = (struct abbrmap *)varray_assign(name2ab, 0, 0);
for (i = 0; i < limit; i++) {
if (!strncmp(p, ab[i].name, ab[i].length)) {
strbuf_putc(sb, '@');
strbuf_putc(sb, ab[i].c);
p += ab[i].length;
break;
}
}
if (i >= limit) {
strbuf_putc(sb, *p);
p++;
}
}
}
if (spaces > 0) {
if (spaces < 4) {
strbuf_nputc(sb, ' ', spaces);
} else if (spaces < 10) {
strbuf_putc(sb, '@');
strbuf_putn(sb, spaces);
} else {
strbuf_putc(sb, '@');
strbuf_putc(sb, '{');
strbuf_putn(sb, spaces);
strbuf_putc(sb, '}');
}
}
return strbuf_value(sb);
}
/*
* uncompress source line.
*
* i) in compressed string
* i) name replaced string
* r) uncompressed string
*/
char *
uncompress(const char *in, const char *name)
{
STATIC_STRBUF(sb);
const char *p;
int i;
strbuf_clear(sb);
for (p = in; *p; p++) {
if (*p == '@') {
int spaces = 0;
switch (*++p) {
case '@':
strbuf_putc(sb, '@');
break;
case 'n':
strbuf_puts(sb, name);
break;
case '{': /* } */
for (p++; *p && isdigit((unsigned char)*p); p++)
spaces = spaces * 10 + *p - '0';
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
spaces = *p - '0';
break;
default:
if (*p < 'a' || *p > 'z')
die("Abbrev character must be a lower alphabetic character. (%c)", *p);
i = *p - 'a';
if (ab2name[i].name)
strbuf_puts(sb, ab2name[i].name);
break;
}
strbuf_nputc(sb, ' ', spaces);
} else {
strbuf_putc(sb, *p);
}
}
return strbuf_value(sb);
}