Cobra Interactive Query Language standalone programs


Building Standalone Checkers


Some types of queries are difficult to handle in an interactive Cobra session with the predefined types of queries. For many of these we can still use the same basic data-structure, but we need the ability to store extracted data into additional user-defined data structures.

We can support these types of queries by building small standalone checkers that import the infrastructure that is used in the Cobra tool. The Cobra source distribution includes a small library of such checkers.

The following example shows the general method for building these standalone checkers. The following sample checker turns a C program text into an abstracted version, where all identifiers are replaced by generated names.

    1   #include "c_api.h"
    2
    3   typedef struct Names Names;
    4   struct Names {
    5           char *nm;
    6           int  cnt;
    7           Names *nxt;
    8   } *names;
    9
   10   int
   11   newname(char *s)
   12   {    Names *n;
   13        static int cnt=1;
   14
   15        for (n = names; n; n = n->nxt)
   16        {    if (strcmp(n->nm, s) == 0)
   17             {    return n->cnt;
   18        }    }
   19        n = (Names *) malloc(sizeof(Names));
   20        n->cnt = cnt++;
   21        n->nm = malloc(strlen(s)+1);
   22        strcpy(n->nm, s);
   23        n->nxt = names;
   24        names = n;
   25        return n->cnt;
   26   }
   27
   28   void
   29   cobra_main(void)
   30   {    cur = prim;
   31        while (cobra_nxt())
   32        {    if (TYPE("ident"))
   33             {    if (verbose)
   34                  {    printf("n_%d ", newname(cobra_txt()));
   35                  } else
   36                  {    printf("ident ");
   37                  }
   38             } else
   39             {    printf("%s ", cobra_txt());
   40             }
   41             if (MATCH(";")
   42             ||  MATCH("}")
   43             ||  TYPE("cpp"))
   44             {       printf("\n");
   45             }
   46        }
   47   }
All the same command-line options from the cobra tool are available by default, for instance the -cpp option to enable preprocessing of the source code.

The common pattern that all standalone checkers follow is that they define a function called cobra_main(), and then perform a scan of all elements in the tokenized data-structure, which is itself defined in the headerfile c_api.h and populated in the source files available in the precompiled archive c.ar.

When the cobra standalone checker is compiled it needs to include header file c_api.h, and be linked to c.ar:

	$ cc -I.. -o abstract abstract.c c.ar

If we apply this checker to its own source code, and use the Linux program indent to reformat the output, we get this result:

$ bin/abstract -n cobra_checkers/abstract.c | indent | num
    1   #include "c_api.h"
    2   typedef struct ident ident;
    3   struct ident
    4   {
    5     char *ident;
    6     int ident;
    7     ident *ident;
    8   }
    9    *ident;
   10   int
   11   ident (char *ident)
   12   {
   13     ident *ident;
   14     static int ident = 1;
   15     for (ident = ident; ident; ident = ident->ident)
   16     {
   17       if (ident (ident->ident, ident) == 0)
   18       {
   19         return ident->ident;
   20       }
   21     }
   22     ident = (ident *) ident (sizeof (ident));
   23     ident->ident = ident++;
   24     ident->ident = ident (ident (ident) + 1);
   25     ident (ident->ident, ident);
   26     ident->ident = ident;
   27     ident = ident;
   28     return ident->ident;
   29   }
   30
   31   void
   32   ident (void)
   33   { ident = ident;
   34     while (ident())
   35     {
   36         if (ident ("ident"))
   37         {
   38             if (ident)
   39             {
   40                 ident ("n_%d ", ident (ident ()));
   41             }
   42             else
   43             {
   44                 ident ("ident ");
   45             }
   46         }
   47         else
   48         {
   49             ident ("%s ", ident ());
   50         }
   51         if (ident (";") || ident ("}") || ident ("cpp"))
   52         {
   53             ident ("\n");
   54         }
   55       }
   56   }
As an aside, this particular example program can also be written in half the number of lines as a Cobra inline program, as follows (executed in single-core mode):
    1	def abstract
    2	%{
    3		if (@ident)
    4		{	x = H[.txt];		# see if .txt was seen before
    5			if (x.lnr > 0)		# yes it was
    6			{	print "n_" x.mark " ";
    7			} else			# not it was not
    8			{	cnt.mark++;	# assign new unique value
    9				.mark = cnt.mark;
   10				H[.txt] = .;	# remember it
   11				print "n_" .mark " ";
   12			}
   13		} else
   14		{	print .txt " ";		# not an identifier
   15		}
   16		if (.txt == ";"
   17		||  .txt == "}"
   18		||  @cpp)
   19		{	print "\n";
   20		}
   21	%}
   22	end
   23	abstract
The C version is likely to execute more quickly, though, if applied to large amounts of input.

Return to index
Manual
Tutorial
(Last Updated: 15 May 2017)