static char rcsver[] = "$Id: buffs.c,v 1.11 2001/12/05 22:32:40 saadat Exp $"; /** ** $Source: /tes/cvs/vanilla/buffs.c,v $ ** ** $Log: buffs.c,v $ ** Revision 1.11 2001/12/05 22:32:40 saadat ** If a "-select" field is specified multiple times. The multiple ** instances are assumed to be ORing to each other's selection. Thus ** "-select 'detector 1 3 detector 6 6'" will select detectors 1-3 and 6. ** ** Added code for indexing fixed-length array fields. ** ** Revision 1.10 2000/09/01 17:27:30 saadat ** Added type-4 indices. Index update fixes. ** Added bit-fields based index searching during query processing. ** ** This version has both qsort()-based and bits-based index searching, ** switchable thru Makefile. ** ** Revision 1.9 2000/08/04 00:52:14 saadat ** Added a preliminary version of logging/statistics gathering on normal ** termination through exit() and on receipt of the following signals: ** SIGSEGV, SIGBUS, SIGPIPE, SIGXCPU, SIGXFSZ ** ** Revision 1.8 2000/07/26 00:26:46 saadat ** Added preliminary code to create/use indices for searching. ** ** Fragment record access mechanism has also been changed ** from a simple pointer to a record-handle structure. ** ** Revision 1.7 2000/07/07 17:15:22 saadat ** Added support for Windows MS Visual C++ memory-mapped file support. ** Removed some unused variables from various files. ** ** Revision 1.6 2000/02/11 00:35:49 saadat ** Missed something in the last change. Fixed. ** ** Revision 1.5 2000/02/10 22:14:40 saadat ** Bug fix in V3.3.6 was not working properly, fixed. ** ** for(i=initial; (i #include #include #include #include "header.h" #include "mem.h" /* Creates a new TBLBUFF object for a given table object. User * supplies the record count for the buffer and the overlap count. * IMPORTANT: The overlap count MUST be at least two times the size of * the largest possible keyblock. */ #define EOFRAG(rhnd) ((rhnd).frp >= (rhnd).tbuff->end) TBLBUFF * NewTblBuff(TABLE *t) { RHND rhnd; TBLBUFF *b; b = calloc(sizeof(TBLBUFF), 1); if (b == NULL){ return NULL; } b->tbl = t; rhnd = RefillTblBuff(b); if (RPTR(rhnd) == NULL){ free(b); return(NULL); } return(b); } void SelectResetIdxPPFlag( SELECT **selects, int ns ) { int i; SELECT *s; for(i = 0; i < ns; i++){ for(s = selects[i]; s != NULL; s = s->next){ s->idxpp = 0; } } } RHND RefillTblBuff(TBLBUFF *b) { char *fname; RHND rhnd; if (b->buf != NULL) { MemUnMapFile(b->buf, b->len); if (b->varbuf) { MemUnMapFile(b->varbuf, b->varlen); b->varbuf = NULL; } #ifdef DEBUG /* Following debug info is misleading since b->fileidx may not always be sequential. It can take jumps of one or more. See find_jump() for details. */ fname = (char *)b->tbl->files->ptr[b->fileidx]; fprintf(stderr, "Unmapping %s\n", fname); #endif b->fileidx++; } if (b->fileidx >= b->tbl->files->number) { INITRHND(rhnd); /* rhnd.tbuff = NULL; */ return rhnd; } if (b->frag) FreeFragment(b->frag); fname = (char *)b->tbl->files->ptr[b->fileidx]; assert((b->frag = AllocFragment()) != NULL); b->frag->fraghead = LoadFragHead(fname, b->tbl); b->len = b->frag->fraghead->sbuf.st_size; #ifdef DEBUG fprintf(stderr, "mapping: %s\n", fname); #endif b->buf = MemMapFile(NULL, b->len, PROT_READ, MAP_PRIVATE, fname, O_RDONLY, 0); if (b->buf == NULL){ fprintf(stderr, "vanilla: Memory Mapping %s failed. ", fname?fname:"(null)"); perror("Reason"); abort(); } /* ** Reset any SELECT->idxpp flags that may have been set during the ** last fragment's index processing. */ /* ** Load Fragment Index - If one exists! ** Set new index preprocessing flags. */ if (b->tbl->selects != NULL) { SelectResetIdxPPFlag( (SELECT **)list_data(b->tbl->selects), list_count(b->tbl->selects)); b->frag->fragidx = LoadFragIdxAndMarkUsedSelects( fname, b->tbl->selects, b->tbl->label->name, b->frag->fraghead->nrows); #ifdef DEBUG if (b->frag->fragidx != NULL){ fprintf(stderr, "%s being processed through index.\n", fname); } #endif /* DEBUG */ } /* Adjust current pointer */ b->curr = b->buf + b->frag->fraghead->offset; b->end = b->buf + b->len; b->reclen = b->tbl->label->reclen; rhnd.tbuff = b; rhnd.frp = b->curr; rhnd.irec = 0; /* ** If index selection resulted in zero number of records, then ** the data in the table fragment has already exhausted. Set ** the next fragment record to the end of file. */ if (b->frag->fragidx != NULL){ if (b->frag->fragidx->n == 0){ SETEOFRAG(rhnd); } else { RPTR(rhnd) += b->reclen * b->frag->fragidx->idxsel[0]; } rhnd.irec = 1; } return rhnd; } PTR GiveMeVarPtr(PTR raw, TABLE *table, int offset) { TBLBUFF *b = table->buff; struct stat sbuf; char *fname, buf[256], *p; fname = ((char **)(table->files->ptr))[b->fileidx]; if (b->varbuf == NULL) { strcpy(buf, fname); p = &buf[strlen(buf)-4]; strcpy(p, ".var"); if (stat(buf, &sbuf) != 0) { p = &buf[strlen(buf)-4]; strcpy(p, ".VAR"); /* try capital case */ if (stat(buf, &sbuf) != 0) { fprintf(stderr, "Unable to open var file: %s\n", buf); return(NULL); } } /* fd = open(buf, O_RDONLY | O_BINARY); */ b->varlen = sbuf.st_size; /* b->varbuf = mmap(NULL, b->varlen, PROT_READ, MAP_PRIVATE, fd, 0); */ b->varbuf = MemMapFile(NULL, b->varlen, PROT_READ, MAP_PRIVATE, buf, O_RDONLY, 0); if (b->varbuf == NULL){ fprintf(stderr, "vanilla: Memory Mapping %s failed. ", buf?buf:"(null)"); perror("Reason"); abort(); } } if (offset >= b->varlen){ fprintf(stderr, "vanilla: VarPtr beyond EOF. File %s. Aborting...", (fname? fname: "(null)")); abort(); } return(b->varbuf+offset); } short ConvertVaxVarByteCount(PTR raw, VARDATA *vdata) { char buf[4]; short s; memcpy(buf, raw, 2); s = ((short *)MSB2(buf))[0]; return(s); } /* Returns the first record in a table. This will ONLY WORK if it's the * first operation performed on the table. Calling this function after * we've done find's on the table will give useless results. */ RHND GetFirstRec(TABLE * t) { RHND rhnd; INITRHND(rhnd); rhnd.tbuff = t->buff; if (t->buff == NULL) { t->buff = NewTblBuff(t); rhnd.tbuff = t->buff; if (rhnd.tbuff != NULL){ RPTR(rhnd) = t->buff->curr; if (t->buff->frag->fragidx != NULL){ if (t->buff->frag->fragidx->n > 0){ RPTR(rhnd) += t->buff->reclen * t->buff->frag->fragidx->idxsel[0]; } else { RPTR(rhnd) = t->buff->end; } rhnd.irec = 1; } } } return rhnd; } RHND GetNextRec(RHND rhnd) { int frec; if (rhnd.tbuff != NULL){ if (rhnd.tbuff->frag->fragidx == NULL){ RPTR(rhnd) += rhnd.tbuff->reclen; } else { if (rhnd.irec < rhnd.tbuff->frag->fragidx->n){ frec = rhnd.tbuff->frag->fragidx->idxsel[rhnd.irec]; RPTR(rhnd) = rhnd.tbuff->curr + frec * rhnd.tbuff->reclen; rhnd.irec ++; } else { RPTR(rhnd) = rhnd.tbuff->end; } } } return rhnd; } RHND GetEndOfFrag(TBLBUFF *b) { RHND rhnd; INITRHND(rhnd); rhnd.tbuff = b; if (rhnd.tbuff != NULL){ SETEOFRAG(rhnd); } return rhnd; } /** **/ RHND find_jump(TABLE * t, FIELD * f, DATA d, RHND beg, RHND end, int deep) { TBLBUFF *b = t->buff; int refill = 0; /* Added by Mankan, 5/99 */ FRAGMENT *frag; char *fname; int fileidx; DATA ek; /* */ if (RPTR(beg) == NULL) { beg = GetFirstRec(t); b = t->buff; } if (RPTR(end) == NULL) { end = GetEndOfFrag(t->buff); refill = 1; } while (1) { /* Added by Mankan 5/99 */ /* ** See if this record can even be in this fragment. ** This is terribly slow because it has to load the ODL ** header of each file. A file index would speed this up a lot */ if (deep==0) { frag = b->frag; ek = *((DATA **)(frag->fraghead->end_keys->ptr))[0]; if (CompareData(ek,d,f) < 0 ) { for(fileidx = b->fileidx+1; fileidx < b->tbl->files->number; fileidx++){ fname = (char *)b->tbl->files->ptr[fileidx]; assert((frag = AllocFragment()) != NULL); frag->fraghead = LoadFragHead(fname, b->tbl); /* The following line is quite restrictive */ ek = *((DATA **)(frag->fraghead->end_keys->ptr))[0]; /* If a fragment containing the required key value (as stored in variable "d") is found, load it and continue with the processing */ if (CompareData(ek,d,f) >= 0){ FreeFragment(frag); break; } FreeFragment(frag); } if (fileidx < b->tbl->files->number){ /* A fragment potentially containing the required key is found, so unmap the previous fragment and load this instead. */ #ifdef DEBUG fprintf(stderr, "Unmapping %s...\n", ((char **)b->tbl->files->ptr)[b->fileidx]); #endif /* Save the fragment serial no (being referred to as fileidx here) that contains the required key value */ /* Note: RefillTblBuff() increments b->fileidx before using it, that's why b->fileidx is one less than the actual fileidx */ b->fileidx = fileidx - 1; /* Load data from the selected fragment */ beg = RefillTblBuff(b); end = GetEndOfFrag(b); } else { /* List of fragments exhausted */ #ifdef DEBUG fprintf(stderr, "find_jump[list-exhausted] %s...\n", ((char **)b->tbl->files->ptr)[b->tbl->files->number-1]); #endif RPTR(beg) = NULL; return beg; } } } /* */ while (RPTR(beg) < RPTR(end) && CompareData(ConvertFieldData(RPTR(beg),f),d,f) < 0) { /* beg += b->reclen; */ beg = GetNextRec(beg); #ifdef DEBUG fprintf(stderr, "find_jump[1], "); fprintf(stderr, "%d / %d %s\n", (RPTR(beg) - b->buf)/b->reclen, (RPTR(end) - b->buf)/b->reclen, b->tbl->files->ptr[b->fileidx]); #endif } if (RPTR(beg) >= RPTR(end)) { if (refill) { beg = RefillTblBuff(b); if (RPTR(beg) == NULL){ return beg; } end = GetEndOfFrag(b); #ifdef DEBUG printf("find_jump[2], refill %s\n", (char *)b->tbl->files->ptr[b->fileidx]); #endif continue; } else { RPTR(beg) = NULL; return beg; } } #ifdef DEBUG fprintf(stderr, "find_jump[3], match "); fprintf(stderr, "%d %s\n",(RPTR(beg) - b->buf)/b->reclen, b->tbl->files->ptr[b->fileidx]); #endif return(beg); } } RHND find_until(TABLE * t, FIELD * f, RHND beg, RHND end) { TBLBUFF *b = t->buff; DATA d = ConvertFieldData(RPTR(beg), f); if (RPTR(end) == NULL){ end = GetEndOfFrag(b); } while ((beg = GetNextRec(beg), RPTR(beg) < RPTR(end)) && EquivalentData(ConvertFieldData(RPTR(beg), f), d, f)) { #ifdef DEBUG fprintf(stderr, "find_until: "); fprintf(stderr, "%d %s\n",(RPTR(beg) - b->buf)/b->reclen, b->tbl->files->ptr[b->fileidx]); #endif } return(beg); } /** ** Returns non-zero if the data "d" maches the SELECTion ** criteria enforced on the field "s->field". **/ int matchesSelectionCriteria(DATA d, SELECT *s) { do { /** ** Selection criteria is matched if the following holds: ** d >= s->low ** d <= s->high **/ /** The following was the actual condition that ** has been replaced. !(CompareData(d, s[i]->low, s[i]->field) < 0 || CompareData(d, s[i]->high, s[i]->field) > 0) **/ if (CompareData(d, s->low, s->field) >= 0 && CompareData(d, s->high, s->field) <= 0){ return 1; } s = s->next; }while(s); return 0; } RHND find_select(TABLE * t, RHND beg, RHND end) { DATA d; SELECT **s; int i, n, count = 0; TBLBUFF *b = t->buff; int refill = 0; RHND last; INITRHND(last); if (t->selects == NULL) return(beg); s = (SELECT **)t->selects->ptr; n = t->selects->number; if (RPTR(end) == NULL) { RPTR(end) = b->end; refill = 1; } /** ** "i" contains a (mod n) index into the SELECTs. Where ** "n" is the size of the SELECT list attached to the ** current TABLE. ** On each SELECT match, the match count (i.e. the var ** "count") is incremented. ** If match fails at some point, the count is reset. ** Once the match count reaches "n" the selection is ** deemed successful. ** Resuming the search from the current index "i" ** means that at least the i'th SELECT is already ** matched. **/ i = 0; while (1) { while (RPTR(beg) < RPTR(end)) { /* ** If the records for the current select i.e., s[i] have ** already been pruned during index processing, then, bypass ** this select. Otherwise, process it. */ if (!s[i]->idxpp){ d = ConvertData(RPTR(beg) + s[i]->start, s[i]->field); /* if (CompareData(d, s[i]->low, s[i]->field) < 0 || CompareData(d, s[i]->high, s[i]->field) > 0) { */ if (!matchesSelectionCriteria(d, s[i])){ beg = GetNextRec(beg); /* beg += b->reclen; */ /* ** Selection mismatch, continue the-"while" until a ** match is found. */ continue; } } /* ** Current selection matched. ** If the selection s[(i-1)%n] did not match on the same ** record, reset the number of selects ("count") matched. */ if (RPTR(beg) != RPTR(last)) { last = beg; count = 0; } i = (i+1)%n; if (++count == n) { #ifdef DEBUG fprintf(stderr, "find_select[2]: match, "); fprintf(stderr, "%d %s\n",(RPTR(beg) - b->buf)/b->reclen, b->tbl->files->ptr[b->fileidx]); #endif return(beg); } } if (refill) { beg = RefillTblBuff(b); if (RPTR(beg) == NULL) return beg; end = GetEndOfFrag(b); #ifdef DEBUG fprintf(stderr, "find_select[3]: refill %s\n", b->tbl->files->ptr[b->fileidx]); #endif } else { RPTR(beg) = NULL; return beg; } } /* unreachable */ } /** ** Get the maximum value of the next record across all tables. ** This function also moves us across a fragment boundary (and sets up the ** first fragment) when necessary. **/ DATA * maxFieldVal(SLICE * s, int dim, TABLE **tbl, DATA *maxValue) { int i, rv = 0; DATA value; RHND beg; for (i = 0; i < dim; i++) { if (s[i].party_key == NULL) continue; if (RPTR(s[i].start_rec) == NULL) { /* No buffer loaded yet */ s[i].start_rec = GetFirstRec(tbl[i]); } else if (RPTR(s[i].start_rec) == tbl[i]->buff->buf + tbl[i]->buff->len) { /* EOF of current bufer */ beg = RefillTblBuff(tbl[i]->buff); if (RPTR(beg) == NULL) return NULL; s[i].start_rec = beg; } value = ConvertFieldData(RPTR(s[i].start_rec), s[i].party_key); if (!rv) { *maxValue = value; rv++; } else if (CompareData(value, *maxValue, s[i].party_key) > 0) *maxValue = value; } return maxValue; } /* PTR find_select(TABLE * t, PTR beg, PTR end) find_select checks all the selects in a table, and returns the next record that satisfies. If we had an index for the file, we could join the relevant blocks for each field used, and have a list of rows ready to go. */