/* pcode.h */ #ifndef SEEN_PCODE_H #define SEEN_PCODE_H #include #include #include #define PCODE_VERSION_STR "0.1" #define PCODE_DATE_STR "2004-08-26" /* Word range: -9,999,999,999 to -0, +0 to +9,999,999,999 * * - This fits in a long long with +0 == 0 and -0 == 1e10, but I want ANSI C. * - Using an ordinary long would only work on 64-bit machines such as * omega.uta.edu (which has a 64-bit Alpha processor, at least until 2005). * - On 16-bit machines I could use four longs, one for each 1000 group. * - On 32-bit machines a pair of longs would be usable, one per 1,000,000. * - On any machine this can be represented as an 11-byte character array. * * Using anything short of a 64-bit type makes the mathematics quite hard, also * slow, so being the lazy programmer I am I chose to use GMP, the GNU Multiple * Precision library, for all "bignum" storage and manipulation. GMP is now * installed on omega. Get it at . Besides, GMP * is a nice thing to have around--- it's the fastest bignum library available. * * The only problem with this approach is that it precludes the possibility * of differentiating between -0 and +0, but I don't think any program can * actually take advantage of this, given the design of the instruction set. */ /* A possible future enhancement would be to add extra fields to insn_t and * data_t to track the frequency with which each instruction is executed and * each memory address is accessed and changed. Also a total instruction * count could be added to vm_t, but of course I'd have to flag it as not * really being part of the formal machine state. */ /* A single instruction in the program text. The digits of the pcode are * exploded into fields for the sake of convenience, but they can be recombined * fairly easily if necessary. * * The opcode field is a pointer into the global opcodes table. To convert * back to a signed integer, do the following: * * sign = (insn.opcode - opcodes) % 2 == 0 ? '+' : '-'; * magnitude = (insn.opcode - opcodes) / 2; */ struct opcode; typedef struct opcode opcode_t; typedef struct insn { opcode_t* opcode; int arg[3]; /* 0 through 999. */ } insn_t; /* A single value in the program data or the input cards. This is essentially * the same type as insn_t except it's packed into an opaque GMP type. Trust * me, this is a signed ten-digit integer. :^) I have to wrap mpz_t in a * struct because vec cannot handle vectors whose elements are arrays. */ typedef struct data { mpz_t val; } data_t; /* Most generic possible representation of a word in the input file. * * I could make word_t a big struct containing both the string representation * (possibly in union form, as below) and its context-sensitive decoded value * (a union between insn_t and data_t). The two "ports" would be accessed * through accessor functions and synchronized or at least marked dirty as * necessary. This would very much like Tcl's Tcl_Obj struct. However, this * is probably too much work for such a dinky program. */ typedef union word { char str[11]; /* -9999999999 ... -0000000000, +0000000000 ... +9999999999 */ struct { char opcode[2]; /* "+5", "-2", etc. */ char arg[3][3]; /* "000" through "999". */ } insn; struct { char sign [1]; /* "+" or "-". */ char val [10]; /* "0000000000" through "9999999999". */ } data; } word_t; /* State of the pcode virtual machine. The values in this struct completely * define the remainder of the execution of the program. * * For example, if this struct ever has the same values twice during the * execution of a program, then the program will never halt. Note that a * program cannot run indefinitely in a section of code that reads input, since * it will eventually run out of cards. * * For the (theoretical) case of infinite input, it is more correct to say that * the set of remaining cards, rather than the index of the next card, is part * of the virtual machine state. If the input data repeats in sync with the * program, then removing cards from the (infinite) set of remaining cards does * not actually change the set (or at least it will eventually return to a * state equal to one that it had before), so it is possible for the entire vm * state to repeat. A simple example of this is a program that repeats if it * receives the input of +0123456789, and the input is a neverending stream of * +0123456789 cards. */ typedef struct vm { data_t data[1000]; /* Program data. */ insn_t text[1000]; /* Program text. */ VEC_T(data_t) cards;/* Input cards. */ int pc; /* Program counter (index of next instruction). */ } vm_t; /* Information globally useful to the interpreter. */ typedef struct interp { FILE* input; /* Stream from which input data is read. */ int input_line; /* Current line number of input stream. */ FILE* output; /* Stream to which output data is written. */ vm_t vm; /* Virtual machine state. */ int debug; /* Perform interactive execution? */ int color; /* Colorize debug display? */ /* Debug stuff. */ int single_step; /* Pause prior to each instruction? */ VEC_T(int) breakpoints; /* Program locations at which to pause. */ VEC_T(int) watches; /* Memory locations to monitor. */ } interp_t; /* Return values for an opcode_func_t. */ typedef enum opcode_ret { RET_NEXT, /* Increment PC prior to next instruction. */ RET_GOTO, /* PC was modified by opcode; leave it alone. */ RET_HALT, /* Machine is halted. */ RET_FAIL /* Program terminated with error. */ } opcode_ret_t; /* A function that implements the behavior of an opcode. */ typedef opcode_ret_t (*opcode_func_t)(insn_t*, interp_t*); /* All possible meanings of an operand (argument) to an opcode. */ typedef enum arg_type { ARG_IGN , /* Argument's value is not used. */ ARG_DATA, /* Argument is a program data address. */ ARG_TEXT, /* Argument is a program text address. */ ARG_BASE, /* Argument is the base address of an array. */ ARG_IDX /* Argument is an offset into an array. */ } arg_type_t; /* Description of an opcode in the instruction set. */ struct opcode { char* code; /* "+5", "-2", etc. */ char* name; /* "ge", "div", etc. */ int valid; /* Is this a valid opcode? */ opcode_func_t func; /* Function implementing the opcode behavior. */ arg_type_t arg[3]; /* Argument types. */ }; /* opcode_t */ /* The instruction set. */ extern opcode_t opcodes[20]; /* Maximum and minimum values for program data. */ extern data_t data_min, data_max; /* Function prototypes. */ extern int scan_args (int argc, char** argv, interp_t* interp); extern int load_prog (interp_t* interp); extern int exec_prog (interp_t* interp); extern void debug_init (interp_t* interp); extern void debug_clear(interp_t* interp); extern int debug_step (interp_t* interp); #endif /* vim: set ts=4 sts=4 sw=4 tw=80 ft=c et: */