// osmfilter 2011-03-27 14:20
#define VERSION "0.3C"
// (c) Markus Weber, Nuernberg
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Affero General Public License
// as published by the Free Software Foundation.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, see http://www.gnu.org/licenses/.
// Other licenses are available on request; please ask the author.
const char* helptext=
"\nosmfiter " VERSION "\n"
"\n"
"This program operates as filter for OSM XML data.\n"
"Only sections containing certain tags will be copied from standard\n"
"input to standard output. Use the calling line parameter -k to\n"
"determine which sections you want to have in standard output.\n"
"For example:\n"
"\n"
" -k\"key1=val1 key2=val2 key3=val3\"\n"
" -k\"amenity=restaurant =bar =pub =cafe =fast_food =food_court =nightclub\"\n"
" -k\"barrier=\"\n"
" -K/ -k\"description=something with blanks/name=New York\"\n"
"\n"
"Limitations: the maximum number of key/value pairs is 1000, the\n"
"maximum length of keys and/or values is 100. The -t option invokes\n"
"a test mode which prints a list of accepted search strings to\n"
"standard output.\n"
"\n"
"To suppress certain records, please use the -d option. For example:\n"
"\n"
" -d\"highway=path =footway =cycleway railway=rail\"\n"
"\n"
"All objects containing at least one of the mentioned values will be\n"
"dropped, regardless of their being part of a relation which is not\n"
"dropped. I.e., key/val pairs in the -d parameter overrule the pairs\n"
"which have been defined in the -k parameter.\n"
"\n"
"\n"
"Considering Dependencies\n"
"------------------------\n"
"\n"
"To get dependent elements, e.g. nodes of a selected way or ways of\n"
"a selected relation, you need to feed the input OSM XML file more\n"
"than once. You need to do this at least 3 times to get the nodes of\n"
"a way which is referred to by a relation.\n"
"If you want to ensure that relations which are referred by other\n"
"relations are also processed correctly, you must input the file\n"
"a 4th time. If there are more than one inter-relational hierarchies\n"
"to be considered, you will need to do this a 5th or 6th time.\n"
"\n"
"If you feed the input file into an osmfilter more than once, you must\n"
"tell the program the exact beginning and ending of the pre-processing\n"
"sequence. For example:\n"
"\n"
" cat lim a.osm a.osm lim a.osm | ./osmfilter -k\"lit=yes\" >new.osm\n"
"\n"
"where 'lim' is a file containing this sequence as a delimiter:\n"
" \n"
"If you have a compressed input file, you can use bzcat instead of.\n"
"cat. If this is the case, be sure to have compressed the 'lim' file\n"
"as well.\n"
"\n"
"To speed-up the filter process, the program uses some main memory\n"
"for a hash table. By default, it uses 320 MiB for storing a flag\n"
"for every possible node, 60 for the way flags, and 20 relation\n"
"flags.\n"
"Every byte holds the flag for 8 ID numbers, i.e., in 320 MiB the\n"
"program can store 2684 million flags. As there are less than 1000\n"
"million IDs for nodes at present (Oct 2010), 120 MiB would suffice.\n"
"So, for example, you can decrease the hash sizes to e.g. 130, 12 and\n"
"2 MiB using this option:\n"
"\n"
" -h130-12-2\n"
"\n"
"But keep in mind that the OSM database is continuously expanding. For\n"
"this reason the program-own default value is higher than shown in the\n"
"example, and it may be appropriate to increase it in the future.\n"
"If you do not want to bother with the details, you can enter the\n"
"amount of memory as a sum, and the program will divide it by itself.\n"
"For example:\n"
"\n"
" -h1000\n"
"\n"
"These 1000 MiB will be split in three parts: 800 for nodes, 150 for\n"
"ways, and 50 for relations.\n"
"\n"
"Because we are taking hashes, it is not necessary to provide all the\n"
"suggested memory; the program will operate with less hash memory too.\n"
"But, in this case, the filter will be less effective, i.e., some\n"
"nodes and some ways will be left in the output file although they\n"
"should have been excluded.\n"
"The maximum value the program accepts for the hash size is 4000 MiB;\n"
"If you exceed the maximum amount of memory available on your system,\n"
"the program will try to reduce this amount and display a warning\n"
"message.\n"
"\n"
"\n"
"Optimizing the Performance\n"
"--------------------------\n"
"\n"
"As there are no nodes which refer to other objects, preprocessing\n"
"does not need the node section of the OSM XML file. Nearly the same\n"
"applies to ways, so the ways are needed only once in preprocessing -\n"
"in the last run.\n"
"If you want to enhance performance, you should take pre-filtering the\n"
"OSM XML file into consideration. Pre-filtering can be done using the\n"
"drop option. For example:\n"
"\n"
" cat a.osm | ./osmfilter --drop-changesets --drop-nodes >wr.osm\n"
" cat wr.osm | ./osmfilter --drop-ways >r.osm\n"
" cat lim r.osm wr.osm lim a.osm | ./osmfilter -k\"lit=yes\" >new.osm\n"
"\n"
"If you are using pre-filtering, there will be no other filtering,\n"
"i.e., the parameter -k will be ignored.\n"
"\n"
"There is NO WARRANTY, to the extent permitted by law.\n"
"Please send any bug reports to markus.weber@gmx.com\n\n";
#define _FILE_OFFSET_BITS 64
#include
#include
#include
#include
#include
#include
typedef enum {false= 0,true= 1} bool;
#define isdig(x) isdigit((unsigned char)(x))
static char *strmcpy(char *dest, const char *src, size_t maxlen) {
// similar to strcpy(), this procedure copies a character string;
// here, the lenght is cared about, i.e. the target string will
// be limited in case it is too long;
// src[]: source string which is to be copied;
// maxlen: maximum length of the destination string
// (including terminator null);
// return:
// dest[]: destination string of the copy; this is the
// function's return value too;
char* d;
if(maxlen==0)
return dest;
d= dest;
while(--maxlen>0 && *src!=0)
*d++= *src++;
*d= 0;
return dest;
} // end strmcpy()
#define strMcpy(d,s) strmcpy((d),(s),sizeof(d))
static int strzcmp(const char* s1,const char* s2) {
// similar to strcmp(), this procedure compares two character strings;
// here, the number of characters which are to be compared is limited
// to the length of the second string;
// i.e., this procedure can be used to identify a short string s2
// within a long string s1;
// s1[]: first string;
// s2[]: string to compare with the first string;
// return:
// 0: both strings are identical; the first string may be longer than
// the second;
// -1: the first string is alphabetical smaller than the second;
// 1: the first string is alphabetical greater than the second;
while(*s1==*s2 && *s1!=0) { s1++; s2++; }
if(*s2==0)
return 0;
return *(unsigned char*)s1 < *(unsigned char*)s2? -1: 1;
} // end strzcmp()
static int strzlcmp(const char* s1,const char* s2) {
// similar to strzcmp(), this procedure compares two character strings;
// and accepts the first string to be longer than the second;
// other than strzcmp(), this procedure returns the length of s2[] in
// case both string contents are identical, and returns 0 otherwise;
// s1[]: first string;
// s2[]: string to compare with the first string;
// return:
// >0: both strings are identical, the length of the second string is
// returned; the first string may be longer than the second;
// 0: the string contents are not identical;
const char* s2a;
s2a= s2;
while(*s1==*s2 && *s1!=0) { s1++; s2++; }
if(*s2==0)
return s2-s2a;
return 0;
} // end strzlcmp()
int readbyte(char* c) {
// read a single byte from stdin, but use a buffer;
// return: 1: byte has been read; -1: no (more) bytes to read;
// *c: the byte read;
static char readbuf[20000000];
static char* readbufe= readbuf;
static char* readbufp= readbuf;
int r;
if(readbufp>=readbufe) { // the read buffer is empty
r= read(0,readbuf,sizeof(readbuf));
// fill the read buffer with bytes from stdin
if(r<=0) { // no more bytes to read
*c= 0;
return -1;
}
readbufe= readbuf+r;
readbufp= readbuf;
}
*c= *readbufp++;
return 1;
} // end readbyte();
void writestdout(const char* s,long len) {
// write some bytes to stdout, use a buffer;
// s[]: bytes to write;
// len: number of bytes to write; -1: flush the buffer;
static char writebuf[20000000];
static char* writebufe= writebuf+sizeof(writebuf);
static char* writebufp= writebuf;
if(len<0) { // the write buffer shall be flushed
if(writebufp>writebuf) // at least one byte in buffer
write(1,writebuf,writebufp-writebuf);
writebufp= writebuf;
return;
}
while(--len>=0) {
if(writebufp>=writebufe) { // the write buffer is full
if(writebufp>writebuf) // at least one byte in buffer
write(1,writebuf,writebufp-writebuf);
writebufp= writebuf;
}
*writebufp++= *s++;
}
} // end writestdout();
//------------------------------------------------------------
// Module hash_ OSM hash module
//------------------------------------------------------------
// this module provides three hash tables with default sizes
// of 320, 60 and 20 MB;
// the procedures hash_set() and hash_get() allow bitwise access
// to these tables;
// as usual, all identifiers of a module have the same prefix,
// in this case 'hash'; an underline will follow in case of a
// global accessible object, two underlines in case of objects
// which are not meant to be accessed from outside this module;
// the sections of private and public definitions are separated
// by a horizontal line: ----
static bool hash__initialized= false;
#define hash__M 3
static unsigned char* hash__mem[hash__M]= {NULL,NULL,NULL};
// start of the hash fields for each object type (node, way, relation);
static unsigned long hash__max[hash__M]= {0,0,0};
// size of the hash fields for each object type (node, way, relation);
static void hash__end() {
// clean-up for hash module;
// will be called at program's end;
int o; // object type
for(o= 0;o4000) x= 4000; \
hash__max[o]= x*(1024*1024);
D(n,0) D(w,1) D(r,2)
#undef D
// allocate memory for each hash table
for(o= 0;o=1024);
if(hash__mem[o]==NULL) // allocation unsuccessful at all
error= true; // memorize that the program should be aborted
} // end for each hash table
atexit(hash__end); // chain-in the clean-up procedure
if(!error) hash__initialized= true;
return error? 2: warning? 1: 0;
} // end hash_ini()
static void hash_set(int o,const char* id) {
// set a flag for a specific object type and ID;
// o: object type; 0: node; 1: way; 2: relation;
// caution: due to performance reasons the boundaries
// are not checked;
// id: id of the object; the id is given as a string of decimal
// digits; a specific string terminator is not necessary,
// it is assumed that the id number ends with the first
// non-digit character;
unsigned char* mem; // address of byte in hash table
uint64_t idi; // bit number (0..7)
unsigned int ido; // bit offset to idi;
if(!hash__initialized) return; // error prevention
idi= 0;
if(*id!='-') { // positive id
while(isdig(*id)) { idi= idi*10+(*id-'0'); id++; }
}
else { // negative id
id++;
while(isdig(*id)) { idi= idi*10+(*id-'0'); id++; }
idi= hash__max[o]*8-idi;
}
ido= idi&0x7; // extract bit number (0..7)
idi>>=3; // calculate byte offset
idi%= hash__max[o]; // consider length of hash table
mem= hash__mem[o]; // get start address of hash table
mem+= idi; // calculate address of the byte
*mem|= (1<>=3; // calculate byte offset
idi%= hash__max[o]; // consider length of hash table
mem= hash__mem[o]; // get start address of hash table
mem+= idi; // calculate address of the byte
flag= (*mem&(1<0) o= 0;
else if((l= strzlcmp(s,"0) o= 0;
else if((l= strzlcmp(s,"0) o= 1;
else if((l= strzlcmp(s,"0) o= 2;
if(l>0) { // we found one of the searched key identifiers
hash_set(o,s+l); // set flag in hash table
s+= l-1; // jump over the search string
insidequote= true; // because a quotation mark will follow
}
} // end a key identifier starts here
} // end we're not in a quoted area
s++; // take the next character
} // end still characters to parse
} // end parse_hashset()
#if 0 // OSM XML examples
#endif
bool parse_hashget(const char* s) {
// parse a given key identifier and check the hash table if the
// relating flag has been set;
// s[]: characters to parse (need not to be null-terminated);
// return: related hash flag is set;
// uses: hash_get();
int l; // length of matched identifier
int o; // object type; 0: node; 1: way; 2: relation;
bool flag;
o= 0; // (just to get sure)
if((l= strzlcmp(s,"0) o= 0;
else if((l= strzlcmp(s,"0) o= 1;
else if((l= strzlcmp(s,"0) o= 2;
flag= l>0 && hash_get(o,s+l);
return flag;
} // end parse_hashget()
#if 0 // OSM XML examples
#endif
//------------------------------------------------------------
// end Module parse_ OSM parse module
//------------------------------------------------------------
int main(int argc,const char *argv[]) {
// main program;
// for the meaning of the calling line parameters please look at the
// contents of helptext[];
int insidepreproc; // 0: no preprocessing;
// 1: preprocessing files at present;
// 2: there was preprocessing;
bool insidequote; // we are between quotation marks;
int nesting; // nesting level of OSM XML objects;
// 0: we are not inside a section of one of the defined keys which
// have been defined in keys[];
// 1: we are in a section of the key with index keyi;
// 2..: same as 1, but nested;
static const char keys[][20]=
{"osmfilter_pre","node","way","relation","changeset",""};
// keys which initiate a section which may be dropped;
// first index is reserved for the preprocessor delimiter;
// the list is terminated by a "";
int keyi; // index of the key we are presently dealing with
const char* keyp; // key of the presently processed section
bool preserve; // the whole section should be preserved
bool drop; // the whole section is to drop; overrules 'preserve';
bool justdropped; // a section has just been dropped; afterwards,
// all blanks, tabs, CRs and NLs shall be ignored;
char c; // character we have read
static char buf[250000+2];
// buffer for sections which may be dropped;
// 50000 could be enough for huge sections (e.g. landuse=forest"),
// but take 250000, it's saver;
static const char* bufe= buf+sizeof(buf)-2;
// address of the end of the buffer
char* bufp; // pointer in buf[]
static char keybuf[30000+2]; // buffer for keywords;
// keywords are words which follow a '<' mark;
static const char* keybufee= keybuf+sizeof(keybuf)-1-2;
// address of the end of the keyword buffer
char* keybufp; // pointer in keybuf[]
char* keybuft;
// pointer in keybuf[] which determines the end of the identifier
int r; // return code for different purposes
// for calling line parameters
#define pairMAX 1000 // maximum number of key-val-pairs
#define pairkMAX (100+20) // maximum length of key or val;
// +20, because we're creating search strings including the
// key resp. val;
static struct { // key/val pair for the include filter
char k[pairkMAX]; // key to compare;
// "": same key as previous key in list;
int klen; // length of .k[];
// 0: same length as previous key in list;
char v[pairkMAX]; // value to the key in .k[];
// "": any value will be accepted;
} pair[pairMAX];
static int pairn= 0; // number of pairs in list 'pair[]';
static struct { // key/val pair for the exclude filter
char k[pairkMAX]; // key to compare;
// "": same key as previous key in list;
int klen; // length of .k[];
// 0: same length as previous key in list;
char v[pairkMAX]; // value to the key in .k[];
// "": any value will be accepted;
} dair[pairMAX];
static int dairn= 0; // number of pairs in list 'dair[]';
// remark: 'dair' is a word creation; it stands for 'drop pair',
// which means 'pair which is to drop';
int h_n,h_w,h_r; // user-suggested hash size in MiB, for
// hash tables of nodes, ways, and relations;
static bool keydrop[]= {true,false,false,false,false};
// section must be dropped, no matter of its content;
// element index refers to keys[];
bool dropsect;
// at least one section is to be dropped due to user request
/* read command line parameters */ {
char pairlim; // delimiter between pairs in command line
const char* pk,*pv,*pe; // pointers in parameter for key/val pairs;
// pk: key; pv: val; pe: end of val;
bool testmode; // user does not want processing; just print
// the given parameter;
int len; // string length; different purposes;
testmode= false;
pairlim= ' ';
h_n= h_w= h_r= 0;
dropsect= false;
pk= NULL; // (initialize to suppress compiler warning)
if(argc<=1) { // no command line parameters given
fprintf(stderr,"osmfilter " VERSION "\n"
"Filters an .osm file for specific key/val sequences.\n"
"To get detailed help, please enter: ./osmfilter -h\n");
return 0; // end the program, because without having parameters
// we do not know what to do;
}
while(--argc>0) { // for every parameter in command line
argv++; // switch to next parameter; as the first one is just
// the program name, we must do this previous reading the
// first 'real' parameter;
if(strcmp(argv[0],"-h")==0) {
// user wants help text
fprintf(stderr,helptext); // print help text
return 0;
}
if(strzcmp(argv[0],"--drop-node")==0) {
// remark: here, we use strzcmp() to accept "--drop-node"
// as well as "--drop-nodes" (plural);
keydrop[1]= true; dropsect= true;
continue; // take next parameter
}
if(strzcmp(argv[0],"--drop-way")==0) {
keydrop[2]= true; dropsect= true;
continue; // take next parameter
}
if(strzcmp(argv[0],"--drop-relation")==0) {
keydrop[3]= true; dropsect= true;
continue; // take next parameter
}
if(strzcmp(argv[0],"--drop-changeset")==0) {
keydrop[4]= true; dropsect= true;
continue; // take next parameter
}
if(strcmp(argv[0],"--drop-none")==0 ||
strcmp(argv[0],"--drop-nothing")==0) {
dropsect= true;
continue; // take next parameter
}
if(argv[0][0]=='-' && argv[0][1]=='h' && isdig(argv[0][2])) {
// "-h...": user wants a specific hash size;
// note that we accept "-h" only if it is continued by a
// digit, so that a plain "-h" would not be recognized
// and therefore print the help text;
const char* p;
p= argv[0]+2; // jump over "-h"
h_n= h_w= h_r= 0;
// read the up to three values for hash tables' size;
// format examples: "-h200-20-10", "-h1200"
while(isdig(*p)) { h_n= h_n*10+*p-'0'; p++; }
if(*p!=0) { p++; while(isdig(*p)) { h_w= h_w*10+*p-'0'; p++; } }
if(*p!=0) { p++; while(isdig(*p)) { h_r= h_r*10+*p-'0'; p++; } }
continue; // take next parameter
}
if(strcmp(argv[0],"-t")==0) {
// "-t": user wants test mode;
testmode= true;
continue; // take next parameter
}
if(argv[0][0]=='-' && argv[0][1]=='K' &&
argv[0][2]!=0 && argv[0][3]==0) {
// user wants a special pair delimiter
pairlim= argv[0][2];
continue; // take next parameter
}
if(argv[0][0]=='-' && argv[0][1]=='k' && argv[0][2]!=0) {
// key/val pairs for include filter
// may be ignored later, in case one of the
// "--drop..." parameters has been given;
pk= argv[0]+2; // jump over "-k"
while(pk!=NULL && pk[0]!=0 && pairn=pe-1) pv= pe;
len= pv-pk; // length of this key
if(len>(pairkMAX-20)) {
len= pairkMAX-20; // delimit key length
fprintf(stderr,"osmfilter: Key too long: %.*s\n",pairkMAX-20,pk);
}
if(pv>=pe) { // there is a key but no value
if(len>0 && pk[len-1]=='=') len--;
sprintf(pair[pairn].k,"0 &&
(len==0 || strcmp(pair[pairn].k,pair[pairn-1].k)==0)) {
// no key or same key as previous one
pair[pairn].k[0]= 0; // mark pair as 'pair with same key'
pair[pairn].klen= 0; // mark key length as
// 'same key key length as previous one'
}
len= pe-pv-1; // length of this value
if(len>(pairkMAX-20)) {
len= pairkMAX-20; // delimit value length
fprintf(stderr,"osmfilter: Val too long: %.*s\n",
pairkMAX-20,pv);
}
sprintf(pair[pairn].v,"%.*s\"",len,pv+1);
// assemble the search string for the value
}
pairn++; // next pair in key/val table
pk= pe; // jump to next key/val pair in parameter list
} // end for every key/val pair
continue; // take next parameter
} // end key/val pairs for include filter
if(argv[0][0]=='-' && argv[0][1]=='d' && argv[0][2]!=0) {
// key/val pairs for exclude filter
pk= argv[0]+2; // jump over "-d"
while(pk!=NULL && pk[0]!=0 && dairn=pe-1) pv= pe;
len= pv-pk; // length of this key
if(len>(pairkMAX-20)) {
len= pairkMAX-20; // delimit key length
fprintf(stderr,"osmfilter: Key too long: %.*s\n",pairkMAX-20,pk);
}
if(pv>=pe) { // there is a key but no value
if(len>0 && pk[len-1]=='=') len--;
sprintf(dair[dairn].k,"0 &&
(len==0 || strcmp(dair[dairn].k,dair[dairn-1].k)==0)) {
// no key or same key as previous one
dair[dairn].k[0]= 0; // mark pair as 'pair with same key'
dair[dairn].klen= 0; // mark key length as
// 'same key key length as previous one'
}
len= pe-pv-1; // length of this value
if(len>(pairkMAX-20)) {
len= pairkMAX-20; // delimit value length
fprintf(stderr,"osmfilter: Val too long: %.*s\n",
pairkMAX-20,pv);
}
sprintf(dair[dairn].v,"%.*s\"",len,pv+1);
// assemble the search string for the value
}
dairn++; // next pair in key/val table
pk= pe; // jump to next key/val pair in parameter list
} // end for every key/val pair
continue; // take next parameter
} // end key/val pairs for exclude filter
// here: parameter not recognized;
fprintf(stderr,
"osmfilter: Unrecognized parameter. Try: ./osmfilter -h\n");
return 0; // end the program, because there must be something
// wrong with the parameters;
} // end for every parameter in command line
if(pk!=NULL && pk[0]!=0)
fprintf(stderr,"osmfilter: Too many key/val pairs.\n");
if(h_n==0) h_n= 400; // use standard value if not set otherwise
if(h_w==0 && h_r==0) {
// user chose simple form for hash memory value
// take the one given value as reference and determine the
// three values using these factors: 80%, 15%, 5%
h_w= h_n/5; h_r= h_n/20;
h_n-= h_w; h_w-= h_r; }
if(testmode) {
// user only wants a print-out of the command line parameters
int i;
for(i= 0;i0 && (c==' ' || c=='\t' || c=='\r' || c=='\n'))
r= readbyte(&c); // read next byte
}
if(r<0) // we reached the end of the file
break; // end processing
keybufp= keybuf; // default: we've read no key identifier
if(c=='\"') // we encountered a quotation mark
insidequote= !insidequote;
else if(!insidequote) { // we're not in a quoted area
if(c=='<') { // a key identifier starts here
// read-in the key identifier
keybuf[0]= c;
keybufp= keybuf+1; // key starts after the '<'
keybuft= NULL;
for(;;) { // for every character of the key identifier
r= readbyte(&c); // read one character from stdin
if(r<=0 || c=='>' || keybufp>=keybufee)
// no more characters of the key identifier to read
break;
if(keybuft==NULL && (c==' ' || c=='>')) keybuft= keybufp;
// store the memory address of the key's end
*keybufp++= c; // store next character of the key
} // end for every character of the key identifier
if(keybuft==NULL) keybuft= keybufp; // if not already
// having been set: set the address of the key's end
*keybufp= 0; // add null-terminator to key identifier
// process the key
if(nesting>0) { // we're already inside a specified section
bool kfound; // we did find one of the searched keys
if(!drop) {
// this section has not already been marked as to be dropped
// test for searched drop pair, a so-called 'dair'
int i; // for index of the key in search string list
for(i= 0;i=dairn || dair[i+1].klen>0)
// next key is different
break;
// here: next key is identical to the present key
i++; // step to next key
} // end for every value of this key
} // end for every key in search string list
foundthedair:;
} // end sect. has not already been marked as to be dropped
kfound= false;
if(!drop && !preserve) {
// neither this section is to be dropped nor
// is it already listed to being preserved
// test for searched pair
if(!preserve) {
// section is not already listed to being preserved
int i; // for index of the key in search string list
for(i= 0;i=pairn || pair[i+1].klen>0)
// next key is different
break;
// here: next key is identical to the present key
i++; // step to next key
} // end for every value of this key
} // end for every key in search string list
foundthepair:;
} // end section is not already listed to being preserved
} // end test for searched pair
if(!drop && strcmp(keybuf,"keybuf+1 &&
memcmp(keybuf+1,keyp,keybuft-keybuf-1)==0 &&
keyp[keybuft-keybuf]==0) // same key as previous key
// is now starting a nested section
nesting++; // memorize that we dived one layer deeper
else if(keybuft>keybuf+2 && keybuf[1]=='/' &&
memcmp(keybuf+2,keyp,keybuft-keybuf-2)==0 &&
keyp[keybuft-keybuf]==0) { // same key as previous key
// is now ending the section
nesting--; // memorize that went up one layer
if(nesting==0) { // outermost section has ended
if(!preserve) {
// nobody found the section worth keeping
drop= true; // make sure to drop the section
}
preserve= dropsect; // initialize the variable again
} // end outermost section has ended
} // end same key as prev. key is now ending the section
} // end we're already inside a specified section
else if(keybuft>keybuf+1 && keybuf[1]!='/') {
// maybe a new specified section starts here
// note: in this 'else' branch we are on nesting level 0;
// now, find out if this section starts with one of
// the specified keys
keyi= 0; // first index of keys in drop list
keyp= keys[0]; // first key in list
while(keyp[0]!=0) { // for every key in list
int len;
len= keybuft-keybuf-1;
if(len>0 && keybuf[len]=='/') {
len--;
while(len>0 && keybuf[len]==' ') len--;
}
if(memcmp(keybuf+1,keyp,len)==0 &&
keyp[keybuft-keybuf]==0)
// examined key is identical to key in list
break; // the section's key is listed
keyi++; keyp+= sizeof(keys[0]); // next key in list
}
if(keyp[0]!=0) { // yes it's a specified key
nesting= 1; // we entered a specified section
drop= keydrop[keyi];
// set drop mark in case this section is
// to be dropped regardless of its contents
if(keyi==0) { // this section is the preprocessor tag
if(insidepreproc==0) { // it's the first preprocessor tag
int i;
insidepreproc= 1;
// mark that we're now in the phase of preprocessing
i= hash_ini(h_n,h_w,h_r); // initialize hash table
if(i==1) write(2,
"Warning: hash size had to be reduced.\n",38);
else if(i==2) write(2,
"Error: not enough memory.\n",26);
}
else if(insidepreproc==1)
// presently, we are in the phase of preprocessing
insidepreproc= 2;
// mark that we just left the phase preprocessing
} // end preprocessor tag
if(keybufp[-1]=='/') { // section ends right after start,
// so it is a small section
if(!dropsect) // not running a prefiltering
drop= true; // in regular processing small sections
// will be node sections without tags and therefore
// shall be deleted unless listed in hash table
nesting= 0; // we just left a small outermost section
} // end it is a small section
// test for searched IDs
if(!dropsect) { // not running a prefiltering
if(insidepreproc>0) {
// there is (or has been) preprocessing
if(parse_hashget(keybuf)) {
// present key identifier had been listed
// to being preserved
drop= false;
preserve= true;
// this small section shall be preserved
}
}
} // end not a run of prefiltering AND
} // end yes it's a specified key
} // end maybe a new specified section starts here
} // end a key identifier starts here
} // end we're not in a quoted area
if(nesting<=0) { // we're not inside a specified section
if(drop) { // previously processed section shall be dropped
// skip the buffered section
drop= false;
bufp= buf; // clear buffer
keybufp= keybuf; // clear key buffer
justdropped= true; // memorize that trailing spaces and
// newlines are to be dropped as well
} // end last section shall be dropped
else { // last section must not be dropped
if(bufp>buf) { // there are some characters in
// buffer waiting to be written
if(insidepreproc==1) { // inside phase of preprocessing
*bufp= 0; // set null-terminator
parse_hashset(buf); // parse for key identifiers
}
else // not inside phase of preprocessing
writestdout(buf,bufp-buf); // write buffer contents
bufp= buf; // clear buffer
}
if(keybufp>keybuf) { // there are some characters in
// key buffer waiting to be written
if(insidepreproc==1) { // inside phase of preprocessing
*keybufp= 0; // set null-terminator
parse_hashset(keybuf); // parse for key identifiers
}
else // not inside phase of preprocessing
writestdout(keybuf,keybufp-keybuf);
// write key buffer contents
if(keybufp[-1]=='/' && r>=0 && c=='>')
// a small section ends here 2011-03-27
preserve= dropsect; // initialize the variable again
}
if(r>=0 && insidepreproc!=1) // read-in character is valid
writestdout(&c,1); // write the read-in character to stdout
} // end last section must not be dropped
} // end we're not inside a specified section
else if(!drop) {
// we are inside a specified section which is not to drop
if((keybufp-keybuf+1)>(bufe-bufp)) // buffer too small
drop= true; // we decide to drop this section because it's too
// large and therefore cannot be processed by this program
else { // buffer size is sufficient
// store all data of the key we've read
if(keybufp>keybuf) { // at least one character has been read
memcpy(bufp,keybuf,keybufp-keybuf);
// copy data from key buffer to normal buffer
bufp+= keybufp-keybuf; // increase buffer pointer accordingly
}
if(r>=0) // read-in character is valid
*bufp++= c; // add character to the buffer
} // end buffer size is sufficient
} // end we are inside a specified section which is not to drop
} // end main loop
writestdout(&c,-1); // flush write buffer
return 0;
} // end main()