C parser for .qxd files

From Edgar BV Wiki
Revision as of 13:14, 2 March 2007 by Red (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search
/* Q -- A simple parser for Quark Express 4.1 files
   Copyright (C) 2001 Frans Faase

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

GNU General Public License:
   http://home.wxs.nl/~faase009/GNU.txt
*/
#include <stdio.h>

#define DUMP_BLOCK(X) /*printf X*/
#define DUMP_TRAIL(X) /*printf X*/
#define DUMP_FORMAT(X) /*printf X*/



typedef int bool;
#define TRUE 1
#define FALSE 0
typedef unsigned char byte;
typedef unsigned short word;
typedef unsigned long lword;

byte *buf;
lword flen = 0L;
lword fpos = 0L;

lword glword(lword *rpos)
{
  lword pos = *rpos;
  lword b1 = buf[pos++],
        b2 = buf[pos++],
        b3 = buf[pos++],
        b4 = buf[pos++];

  *rpos = pos;
  return b1 + (b2 << 8) + (b3 << 16) + (b4 << 24);
}

word gword(lword *rpos)
{
  lword pos = *rpos;
  lword b1 = buf[pos++],
        b2 = buf[pos++];

  *rpos = pos;
  return b1 + (b2 << 8);
}

lword blockend;


void skip(lword *rpos)
{
  if (*rpos == blockend)
  {
    long nextblock;
    printf("At %0X ", *rpos);
    nextblock = glword(rpos);

    if (nextblock < 0)
    {
       word nrblocks;
       *rpos = (-nextblock) * 256 - 256;
       nrblocks = gword(rpos);
       blockend = *rpos - 2 + 256 * nrblocks - 4;       
    }
    else
    {
       *rpos = nextblock * 256 - 256;
       blockend = *rpos + 256 - 4;
    }
    DUMP_BLOCK(("\nStart new block at %8X till %8X\n", 
               *rpos, blockend));
  }
}
     
lword xlword(lword *rpos, lword lines[], int nr_lines)
{
  lword w1, w2;
  skip(rpos);
  w1 = gword(rpos); 
  skip(rpos);
  w2 = gword(rpos);
  return (w2 << 16) | w1;
}

word xword(lword *rpos, lword lines[], int nr_lines)
{
  skip(rpos);
  return gword(rpos);
}

main ()
{ FILE *f = fopen("p2.qxd", "r");
  word blnr = 0;

  buf = (byte*)malloc(500000);
  flen = fread(buf, 1, 500000, f);
  fclose(f);

  printf("Length = %ld bytes\n",flen);
  
  while (fpos < 55 * 256)
  { int i;

    printf("%4d %06x ", blnr, fpos);
    blnr++;

    for (i = 0; i < 256; i++)
    { byte ch = buf[fpos];
      fpos++;
 
      if (i < 30)
        if (ch >= ' ' && ch < 126)
          printf(" %c", ch);
        else
          printf("%02X", (word)ch);
    }
    printf("\n");
  }
  printf("--------------\n");

  while (fpos < flen)
  { int i;
    lword code;
    lword len;
    word nr;
    lword line[1000];
    lword linelen[1000];
    lword nr_a;
    word acode[1000];
    lword apos[1000];
    lword nr_b;
    word bcode[1000];
    lword bpos[1000];
    lword nr_char;
    int ai;
    lword atill;
    int bi, bit;
    lword btill;

    printf("\n%4d %06x ", fpos / 256, fpos);
    blnr++;

    blockend = fpos + 256 - 4;
    code = glword(&fpos);
    printf("code = %ld\n", code);
    len = glword(&fpos);
    nr = len / 6;
    DUMP_FORMAT(("%08X %d\n", len, nr));
    bit = 0;
    if (nr <= 0 || nr >= 100)
    {
      printf("%ld", nr);
      return;
    }
    for (i = 0; i < nr; i++)
    {
      line[i] = xlword(&fpos, line, i);
      linelen[i] = xword(&fpos, line, i);
      DUMP_FORMAT(("\n      %08X %d", line[i], linelen[i]));
    }
    { lword chcount = 0L;
      for (i = 0; i < nr; i++)
      { int j;
        lword lpos = line[i]*256 - 256;
  
        for (j = 0; j < linelen[i]; j++) 
        {
          chcount++;
          if (buf[lpos++] == 0x0D)
          {  
             DUMP_FORMAT(("\n paragraph of %X charaters", chcount));
             bpos[bit++] = chcount;
             chcount = 0L;
          }
        }
      }
      DUMP_FORMAT(("\n paragraph of %X charaters", chcount));
      bpos[bit++] = chcount;
    }

    nr_a = xlword(&fpos, line, nr);
    DUMP_FORMAT(("\n nr a = %ld", nr_a));
    for (i = 0; i < nr_a / 6; i++)
    {
      acode[i] = xword(&fpos, line, nr);
      apos[i] = xlword(&fpos, line, nr);
      DUMP_FORMAT(("\n    a: %08X %d", apos[i], acode[i]));
    } 
    nr_b = xlword(&fpos, line, nr);
    DUMP_FORMAT(("\n nr b = %ld", nr_b));
    for (i = 0; i < nr_b / 6; i++)
    { lword expos;
      bcode[i] = xword(&fpos, line, nr);
      expos = xlword(&fpos, line, nr);
      if (expos != bpos[i])
      {
        printf("!!! pos : %X, expect %X found %X\n", 
               fpos - 4, bpos[i], expos);
        return 1;
      }
      DUMP_FORMAT(("\n    b: %08X %d", bpos[i], bcode[i]));
    } 
    DUMP_FORMAT(("\n"));

    nr_char = 0;
    ai = 0;
    bi = 0;

    while (fpos % 256)
    { byte b = buf[fpos++];
      if (b != 0)
        printf("!!! At %08X have %d\n", fpos, b);
    }

    printf("\n");
    if (bi < nr_b / 6)
    {
      printf("<P%d>", bcode[bi]);
      btill = nr_char + bpos[bi];
    }
    if (ai < nr_a / 6)
    {
      printf("<C%d>", acode[ai]);
      atill = nr_char + apos[ai];
    }
    for (i = 0; i < nr; i++)
    { int j;
      lword lpos = line[i]*256 - 256;

      for (j = 0; j < linelen[i]; j++) 
      { byte ch = buf[lpos++];

        if (nr_char == atill)
        {
          printf("</C%d>", acode[ai]);
          ai++;
          if (ai < nr_a / 6)
          {
            printf("<C%d>", acode[ai]);
            atill = nr_char + apos[ai];
          }
        }
        if (nr_char == btill)
        {
          printf("</P%d>", bcode[bi]);
          bi++;
          if (bi < nr_b / 6)
          {
            printf("<P%d>", bcode[bi]);
            btill = nr_char + bpos[bi];
          }
        }
        nr_char++;
      
        if (ch >= ' ' && ch < 126)
          printf("%c", ch);
        else
          printf("(%02X)", (unsigned int)ch);
      }
      if (j < 256)
        DUMP_TRAIL(("-- %d\n",j));
      
      for (; j < 256; j++)
      { byte b = buf[lpos++];
        DUMP_TRAIL(("%02X ", b));
      }
      DUMP_TRAIL(("\n"));
      if (lpos > fpos)
        fpos = lpos;
    }
    if (nr_char == atill)
    {
      printf("</C%d>", acode[ai]);
    }
    if (nr_char == btill)
    {
      printf("</P%d>", bcode[bi]);
    }
    if (nr_a / 6 - ai != 1 || nr_b / 6 - bi != 1)
      printf("-- %d %d\n", nr_a / 6 - ai, nr_b / 6 - bi);
  }
}