#define PROGRAM "osmgeobase" #define VERSION "0.0.0" // 2016-03-30 05:10 // // compile this file: // gcc osmgeobase.c -O3 -o osmgeobase // there may be compiler warnings informing you about unused procedures // and variables; both have been left in the source file intentionally, // they are for future expansions; // // tests: // gcc osmgeobase.c -g -o osmgeobase // ./osmgeobase -v adrr.o5m // // (c) 2016 Markus Weber, Nuernberg // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU Affero General Public License // version 3 as published by the Free Software Foundation. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // You should have received a copy of this license along // with this program; if not, see http://www.gnu.org/licenses/. // // Other licenses are available on request; please ask the author. //------------------------------------------------------------ // What this Program does //------------------------------------------------------------ // // This program creates and fills the data structures for effective // geocoding. // An .o5m file containing address data as node objects is read. // Tag values must also be provided unique-sorted and zero-terminated // by separate files: // strings_region, strings_subregion, strings_city, // strings_street, strings_housenumber. // The file strings_nodecount must contain the actual number of // node objects of .o5m input file. #ifndef I // from here: main program #define MAXLOGLEVEL 1 // keep at 1, unless you want do debug const char* shorthelptext= "\n" PROGRAM " " VERSION " Parameter Overview\n" "(Please use --help to get more information.)\n" "\n" " input file name\n" "- read from standard input\n" "--max-strrefs= maximum space for string references\n" "-h display this parameter overview\n" "--help display a more detailed help\n" "--out-none no standard output (for testing purposes)\n" "-o= reroute standard output to a file\n" "--parameter-file= param. in file, separated by empty lines\n" "--verbose activate verbose mode\n"; const char* helptext= "\n" PROGRAM " " VERSION "\n" "\n" "This program creates and fills the data structures for effective\n" "geocoding.\n" "An .o5m file containing address data as node objects is read.\n" "Tag values must also be provided unique-sorted and zero-terminated\n" "by separate files:\n" " strings_region, strings_subregion, strings_city,\n" " strings_street, strings_housenumber.\n" "The file strings_nodecount must contain the actual number of\n" "node objects of .o5m input file. If there are semicolon-separated\n" "housenumbers, this number must have been increased accordingly.\n" "\n" "-h\n" " Display a short parameter overview.\n" "\n" "--help\n" " Display this help.\n" "\n" "--max-strrefs=\n" " The program needs to allocate a certain amount of memory\n" " space for string references. The size of this space can be\n" " changed by the user, in an absolute manner and in a relative\n" " manner. For example, this will allocate 20 % more space than\n" " usual:\n" " --max-strrefs=120%\n" " You also can allocate a concrete amount of MiB:\n" " --max-strrefs=800\n" " Use verbose option -v to see how much memory space is\n" " allocated by default.\n" "\n" "--out-none\n" " This will be no standard output. This option is for testing\n" " purposes only.\n" "\n" "-o=\n" " Standard output will be rerouted to the specified file.\n" "\n" "--parameter-file=FILE\n" " If you want to supply one ore more command line arguments\n" " by a parameter file, please use this option and specify the\n" " file name. Within the parameter file, parameters must be\n" " separated by empty lines. Line feeds inside a parameter will\n" " be converted to spaces.\n" " Lines starting with \"// \" will be treated as comments.\n" "\n" "-v\n" "--verbose\n" " With activated \'verbose\' mode, some statistical data and\n" " diagnosis data will be displayed.\n" " If -v resp. --verbose is the first parameter in the line,\n" " " PROGRAM " will display all input parameters.\n" "\n" "Example\n" "\n" "./" PROGRAM " adrr.o5m\n" "\n" "Limitations\n" "\n" "This program is for experimental use. Expect malfunctions and data\n" "loss. Do not use the program in productive or commercial systems.\n" "\n" "There is NO WARRANTY, to the extent permitted by law.\n" "Please send any bug reports to marqqs@gmx.eu\n\n"; #define _FILE_OFFSET_BITS 64 #include #include #include #include #include #include #include #include #include #include #include typedef enum {false= 0,true= 1} bool; typedef uint8_t byte; typedef unsigned int uint; #define isdig(x) isdigit((unsigned char)(x)) static int loglevel= 0; // logging to stderr; // 0: no logging; 1: small logging; 2: normal logging; // 3: extended logging; #define DP(f) fprintf(stderr,"Debug: " #f "\n"); #define DPv(f,...) fprintf(stderr,"Debug: " #f "\n",__VA_ARGS__); #define DPM(f,p,m) { byte* pp; int i,mm; static int msgn= 3; \ if(--msgn>=0) { fprintf(stderr,"Debug memory: " #f); \ pp= (byte*)(p); mm= (m); if(pp==NULL) fprintf(stderr,"\n (null)"); \ else for(i= 0; i0: absolute value in MiB; static int64_t global_nodecount= 0; // number of nodes as stated in file "strings_nodecount" #define PERR(f) { static int msgn= 3; if(--msgn>=0) \ fprintf(stderr,PROGRAM " Error: " f "\n"); } // print error message #define PERRv(f,...) { static int msgn= 3; if(--msgn>=0) \ fprintf(stderr,PROGRAM " Error: " f "\n",__VA_ARGS__); } // print error message with value(s) #define PWARN(f) { static int msgn= 3; if(--msgn>=0) \ fprintf(stderr,PROGRAM " Warning: " f "\n"); } // print a warning message, do it maximal 3 times #define PWARNv(f,...) { static int msgn= 3; if(--msgn>=0) \ fprintf(stderr,PROGRAM " Warning: " f "\n",__VA_ARGS__); } // print a warning message with value(s), do it maximal 3 times #define PINFO(f) \ fprintf(stderr,PROGRAM ": " f "\n"); // print info message #define PINFOv(f,...) \ fprintf(stderr,PROGRAM ": " f "\n",__VA_ARGS__); #define PLOG(ll,f,...) { \ if(ll<=MAXLOGLEVEL && loglevel>=ll) PINFO(f) } #define PLOGv(ll,f,...) { \ if(ll<=MAXLOGLEVEL && loglevel>=ll) PINFOv(f,__VA_ARGS__) } #define FREE(x) if(x!=NULL){free(x);x= NULL;} #define ONAME(i) \ (i==0? "node": i==1? "way": i==2? "relation": "unknown object") #define global_fileM 1002 // maximum number of input files //------------------------------------------------------------ // end Module Global global variables for this program //------------------------------------------------------------ #define COORDINATE_NIL INT32_C(2000000000) // nil value for geocoordinates which are stored as // 10^-7 fixpoint numbers in int32_t static uint32_t strtouint32(const char* s) { // read a number and convert it to an unsigned 32-bit integer; // return: number; int32_t i; uint8_t b; i= 0; for(;;) { b= (uint8_t)(*s++ -'0'); if(b>=10) break; i= i*10+b; } return i; } // strtouint32() static inline int32_t strtoint32(const char* s) { // read a number and convert it to a signed 32-bit integer; // return: number; int sign; int i; uint8_t b; if(*s=='-') { s++; sign= -1; } else sign= 1; i= 0; for(;;) { b= (uint8_t)(*s++ -'0'); if(b>=10) break; i= i*10+b; } return i*sign; } // strtoint32() static inline int64_t strtosint64(const char* s) { // read a number and convert it to a signed 64-bit integer; // return: number; int sign; int64_t i; uint8_t b; if(*s=='-') { s++; sign= -1; } else sign= 1; i= 0; for(;;) { b= (uint8_t)(*s++ -'0'); if(b>=10) break; i= i*10+b; } return i*sign; } // strtosint64() static int32_t strtoint32percent(const char* s) { // read an unsigned number and convert it to a signed 32-bit // integer; the result will be a positive number unless the // character string is followed by a percent sign; // this trailing percent sign will negate the value; // return: number; int32_t i; uint8_t b; if(*s=='-') s++; // ignore leading minus sign i= 0; for(;;) { b= (uint8_t)(*s-'0'); if(b>=10) break; i= i*10+b; s++; } if(*s==' ') s++; // jump over space sign if(*s=='%') return -i; return i; } // strtoint32percent() static inline char* uint32toa(uint32_t v,char* s) { // convert uint32_t integer into string; // v: long integer value to convert; // return: s; // s[]: digit string; char* s1,*s2; char c; s1= s; if(v==0) *s1++= '0'; s2= s1; while(v>0) { *s2++= "0123456789"[v%10]; v/= 10; } *s2--= 0; while(s2>s1) { c= *s1; *s1= *s2; *s2= c; s1++; s2--; } return s; } // end uint32toa() static inline char* int64toa(int64_t v,char* s) { // convert int64_t integer into string; // v: long integer value to convert; // return: s; // s[21]: digit string; char* s1,*s2; char c; s1= s; if(v<0) { *s1++= '-'; v= -v; } else if(v==0) *s1++= '0'; s2= s1; while(v>0) { *s2++= "0123456789"[v%10]; v/= 10; } *s2--= 0; while(s2>s1) { c= *s1; *s1= *s2; *s2= c; s1++; s2--; } return s; } // end int64toa() static inline char* int32fix7topa(int32_t v,char* s) { // convert a signed 7 decimals fixpoint value into a string; // v: fixpoint value // return: pointer to string terminator; // s[13]: destination string; char* s1,*s2,*sterm,c; int i; s1= s; if(v<0) { *s1++= '-'; v= -v; } s2= s1; i= 7; while(--i>=0) { *s2++= (v%10)+'0'; v/= 10; } *s2++= '.'; do { *s2++= (v%10)+'0'; v/= 10; } while(v>0); sterm= s2; *s2--= 0; while(s2>s1) { c= *s1; *s1= *s2; *s2= c; s1++; s2--; } return sterm; } // end int32fix7topa() static inline char *stpcpy0(char *dest, const char *src) { // redefinition of C99's stpcpy() because it's missing in MinGW, // and declaration in Linux seems to be wrong; while(*src!=0) *dest++= *src++; *dest= 0; return dest; } // end stpcpy0() static inline char *strmcpy(char *dest, const char *src, size_t maxlen) { // similar to strcpy(), this procedure copies a character string; // here, the length is cared about, i.e. the target string will // be limited in case it is too long; // src[]: source string which is to be copied; // maxlen: maximum length of the destination string // (including terminator null); // return: // dest[]: destination string of the copy; this is the // function's return value too; char* d; if(maxlen==0) return dest; d= dest; while(--maxlen>0 && *src!=0) *d++= *src++; *d= 0; return dest; } // end strmcpy() #define strMcpy(d,s) strmcpy((d),(s),sizeof(d)) static inline char *stpmcpy(char *dest, const char *src, size_t maxlen) { // similar to strmcpy(), this procedure copies a character string; // however, it returns the address of the destination string's // terminating zero character; // this makes it easier to concatenate strings; char* d; if(maxlen==0) return dest; d= dest; while(--maxlen>0 && *src!=0) *d++= *src++; *d= 0; return d; } // end stpmcpy() #define stpMcpy(d,s) stpmcpy(d,s,sizeof(d)) static inline int strzcmp(const char* s1,const char* s2) { // similar to strcmp(), this procedure compares two character strings; // here, the number of characters which are to be compared is limited // to the length of the second string; // i.e., this procedure can be used to identify a short string s2 // within a long string s1; // s1[]: first string; // s2[]: string to compare with the first string; // return: // 0: both strings are identical; the first string may be longer than // the second; // -1: the first string is alphabetical smaller than the second; // 1: the first string is alphabetical greater than the second; while(*s1==*s2 && *s1!=0) { s1++; s2++; } if(*s2==0) return 0; return *(unsigned char*)s1 < *(unsigned char*)s2? -1: 1; } // end strzcmp() static inline int strzlcmp(const char* s1,const char* s2) { // similar to strzcmp(), this procedure compares two character strings; // and accepts the first string to be longer than the second; // other than strzcmp(), this procedure returns the length of s2[] in // case both string contents are identical, and returns 0 otherwise; // s1[]: first string; // s2[]: string to compare with the first string; // return: // >0: both strings are identical, the length of the second string is // returned; the first string may be longer than the second; // 0: the string contents are not identical; const char* s2a; s2a= s2; while(*s1==*s2 && *s1!=0) { s1++; s2++; } if(*s2==0) return s2-s2a; return 0; } // end strzlcmp() static inline int strycmp(const char* s1,const char* s2) { // similar to strcmp(), this procedure compares two character strings; // here, both strings are end-aligned; // not more characters will be compared than are existing in string s2; // i.e., this procedure can be used to identify a file name extension; const char* s1e; int l; l= strchr(s2,0)-s2; s1e= strchr(s1,0); if(s1e-s1900) lat= 900; // set maximum of 90 degree return ((uint64_t)cosrtab[lat]*(int64_t)londiff)/INT64_C(0x100000000); } // lonadapt() // the table in the previous procedure has been generated by this // program: #if 0 // file cosr.c, run it with: gcc cosr.c -lm -o cosr && ./cosr #include #include #include int main() { int i; printf(" static const uint32_t cosrtab[901]= " "{\n UINT32_C(4294967295),"); for(i= 1;i<900;i++) { if(i%3==0) printf("\n "); printf("UINT32_C(%"PRIu32"),",(uint32_t)( cos(i/1800.0*3.14159265359) * INT64_C(0x100000000) )); } printf("\n 0"); printf(" }; // cosr values for 10th degrees from 0 to 90\n"); return 0; } #endif static int32_t geodistance(int32_t x1,int32_t y1, int32_t x2,int32_t y2) { // approximates the geodistance between two points; // x1,y1: geocoordinates of first point; // x2,y2: geocoordinates of second point; // return: distance as angle; // all units in 100 nanodegrees; // how this is done: // distances in West-East direction and in South-North direction // are compared; the longer shorter distance is divided by 3 and // added to the value of the longer distance; // => all points on the edges of an octagon around point 1 // are interpreted as equidistant; // this approximation is close enough for this application; int32_t xdist,ydist; xdist= x2-x1; if(xdist<0) xdist= -xdist; ydist= y2-y1; if(ydist<0) ydist= -ydist; xdist= lonadapt(xdist,y1); if(xdist>1); else return i>>1; } sig= i & 1; i= (i & 0x7e)>>1; fac= 0x40; while(*++p & 0x80) { // more byte(s) will follow i+= (*p & 0x7f)*fac; fac<<= 7; } i+= *p++ *fac; *pp= p; if(sig) // negative return -1-i; else return i; } // end pbf_sint32() static inline uint64_t pbf_uint64(byte** pp) { // get the value of an unsigned integer; // pp: see module header; byte* p; uint64_t i; uint64_t fac; p= *pp; i= *p; if((*p & 0x80)==0) { // just one byte (*pp)++; return i; } i&= 0x7f; fac= 0x80; while(*++p & 0x80) { // more byte(s) will follow i+= (*p & 0x7f)*fac; fac<<= 7; } i+= *p++ *fac; *pp= p; return i; } // end pbf_uint64() static inline int64_t pbf_sint64(byte** pp) { // get the value of a signed integer; // pp: see module header; byte* p; int64_t i; int64_t fac; int sig; p= *pp; i= *p; if((*p & 0x80)==0) { // just one byte (*pp)++; if(i & 1) // negative return -1-(i>>1); else return i>>1; } sig= i & 1; i= (i & 0x7e)>>1; fac= 0x40; while(*++p & 0x80) { // more byte(s) will follow i+= (*p & 0x7f)*fac; fac<<= 7; } i+= *p++ *fac; *pp= p; if(sig) // negative return -1-i; else return i; } // end pbf_sint64() static inline bool pbf_jump(byte** pp) { // jump over a protobuf formatted element - no matter // which kind of element; // pp: see module header; // return: the data do not meet protobuf specifications (error); byte* p; int type; uint32_t u; p= *pp; type= *p & 0x07; switch(type) { // protobuf type case 0: // Varint while(*p & 0x80) p++; p++; // jump over id while(*p & 0x80) p++; p++; // jump over data break; case 1: // fixed 64 bit; while(*p & 0x80) p++; p++; // jump over id p+= 4; // jump over data break; case 2: // String while(*p & 0x80) p++; p++; // jump over id u= pbf_uint32(&p); p+= u; // jump over string contents break; case 5: // fixed 32 bit; while(*p & 0x80) p++; p++; // jump over id p+= 2; // jump over data break; default: // unknown id PERRv("Format 0x%02X.",*p) (*pp)++; return true; } // end protobuf type *pp= p; return false; } // end pbf_jump() static inline void pbf_intjump(byte** pp) { // jump over a protobuf formatted integer; // pp: see module header; // we do not care about a possibly existing identifier, // therefore as the start address *pp the address of the // integer value is expected; byte* p; p= *pp; while(*p & 0x80) p++; p++; *pp= p; } // end pbf_intjump() //------------------------------------------------------------ // end Module pbf_ protobuf conversions module //------------------------------------------------------------ //------------------------------------------------------------ // Module read_ OSM file read module //------------------------------------------------------------ // this module provides procedures for buffered reading of // standard input; // as usual, all identifiers of a module have the same prefix, // in this case 'read'; an underline will follow in case of a // global accessible object, two underlines in case of objects // which are not meant to be accessed from outside this module; // the sections of private and public definitions are separated // by a horizontal line: ---- #define read_PREFETCH ((32+3)*1024*1024) // number of bytes which will be available in the buffer after // every call of read_input(); // (important for reading .pbf files: // size must be greater than pb__blockM) #define read__bufM (read_PREFETCH*5) // length of the buffer; #define read_GZ 0 // determines which read procedure set will be used; // ==0: use open(); ==1: use fopen(); // ==2: use gzopen() (accept gzip compressed input files); // ==3: use gzopen() with increased gzip buffer; typedef struct { // members may not be accessed from external #if read_GZ==0 int fd; // file descriptor off_t jumppos; // position to jump to; -1: invalid #elif read_GZ==1 FILE* fi; // file stream off_t jumppos; // position to jump to; -1: invalid #else gzFile fi; // gzip file stream #if __WIN32__ z_off64_t jumppos; // position to jump to; -1: invalid #else z_off_t jumppos; // position to jump to; -1: invalid #endif #endif int64_t counter; // byte counter to get the read position in input file; char filename[300]; bool isstdin; // is standard input bool eof; // we are at the end of input file byte* bufp; // pointer in buf[] byte* bufe; // pointer to the end of valid input in buf[] uint64_t bufferstart; // dummy variable which marks the start of the read buffer // concatenated with this instance of read info structure; } read_info_t; static bool read__jumplock= false; // do not change .jumppos anymore; //------------------------------------------------------------ static read_info_t* read_infop= NULL; // presently used read info structure, i.e. file handle #define read__buf ((byte*)&read_infop->bufferstart) // start address of the file's input buffer static byte* read_bufp= NULL; // may be incremented by external // up to the number of read_PREFETCH bytes before read_input() is // called again; static byte* read_bufe= NULL; // may not be changed from external static int read_open(const char* filename) { // open an input file; // filename[]: path and name of input file; // ==NULL: standard input; // return: 0: ok; !=0: error; // read_infop: handle of the file; // note that you should close every opened file with read_close() // before the program ends; // save status of presently processed input file (if any) if(read_infop!=NULL) { read_infop->bufp= read_bufp; read_infop->bufp= read_bufe; } // get memory space for file information and input buffer read_infop= (read_info_t*)malloc(sizeof(read_info_t)+read__bufM); if(read_infop==NULL) { PERRv("could not get %i bytes of memory.",read__bufM) return 1; } // initialize read info structure #if read_GZ==0 read_infop->fd= 0; // (default) standard input #else read_infop->fi= NULL; // (default) file not opened #endif if((read_infop->isstdin= filename==NULL)) strcpy(read_infop->filename,"standard input"); else strMcpy(read_infop->filename,filename); read_infop->eof= false; // we are not at the end of input file read_infop->bufp= read_infop->bufe= read__buf; // pointer in buf[] // pointer to the end of valid input in buf[] read_infop->counter= 0; read_infop->jumppos= 0; // store start of file as default jump destination // set modul-global variables which are associated with this file read_bufp= read_infop->bufp; read_bufe= read_infop->bufe; // open the file PLOGv(2,"Read-opening: %s",read_infop->filename) if(read_infop->isstdin) { // stdin shall be used #if read_GZ==0 read_infop->fd= 0; #elif read_GZ==1 read_infop->fi= stdin; #else read_infop->fi= gzdopen(0,"rb"); #if read_GZ==3 && ZLIB_VERNUM>=0x1235 gzbuffer(read_infop->fi,128*1024); #endif #endif } else if(filename!=NULL) { // a real file shall be opened #if read_GZ==0 read_infop->fd= open(filename,O_RDONLY|O_BINARY); #elif read_GZ==1 read_infop->fi= fopen(filename,"rb"); #else read_infop->fi= gzopen(filename,"rb"); #if read_GZ==3 && ZLIB_VERNUM>=0x1235 PLOG(2,"Read-opening: increasing gzbuffer.") gzbuffer(read_infop->fi,128*1024); #endif #endif #if read_GZ==0 if(read_infop->fd<0) { #else if(read_infop->fi==NULL) { #endif PERRv("could not open input file: %.80s", read_infop->filename) free(read_infop); read_infop= NULL; read_bufp= read_bufe= NULL; return 1; } } // end a real file shall be opened return 0; } // end read_open() static void read_close() { // close an opened file; // read_infop: handle of the file which is to close; if(read_infop==NULL) // handle not valid; return; PLOGv(2,"Read-closing: %s",read_infop->filename) #if read_GZ==0 if(read_infop->fd>0) // not standard input close(read_infop->fd); #elif read_GZ==1 if(!read_infop->isstdin) // not standard input fclose(read_infop->fi); #else gzclose(read_infop->fi); #endif free(read_infop); read_infop= NULL; read_bufp= read_bufe= NULL; } // end read_close() static inline bool read_input() { // read data from standard input file, use an internal buffer; // make data available at read_bufp; // read_open() must have been called before calling this procedure; // return: there are no (more) bytes to read; // read_bufp: start of next bytes available; // may be incremented by the caller, up to read_bufe; // read_bufe: end of bytes in buffer; // must not be changed by the caller; // after having called this procedure, the caller may rely on // having available at least read_PREFETCH bytes at address // read_bufp - with one exception: if there are not enough bytes // left to read from standard input, every byte after the end of // the remaining part of the file in the buffer will be set to // 0x00 - up to read_bufp+read_PREFETCH; int l,r; if(read_bufp+read_PREFETCH>=read_bufe) { // read buffer is too low if(!read_infop->eof) { // still bytes in the file if(read_bufe>read_bufp) { // bytes remaining in buffer memmove(read__buf,read_bufp,read_bufe-read_bufp); // move remaining bytes to start of buffer read_bufe= read__buf+(read_bufe-read_bufp); // protect the remaining bytes at buffer start } else // no remaining bytes in buffer read_bufe= read__buf; // no bytes remaining to protect // add read bytes to debug counter read_bufp= read__buf; do { // while buffer has not been filled l= (read__buf+read__bufM)-read_bufe-4; // number of bytes to read #if read_GZ==0 r= read(read_infop->fd,read_bufe,l); #elif read_GZ==1 r= read(fileno(read_infop->fi),read_bufe,l); #else r= gzread(read_infop->fi,read_bufe,l); #endif if(r<=0) { // no more bytes in the file read_infop->eof= true; // memorize that there we are at end of file l= (read__buf+read__bufM)-read_bufe; // remaining space in buffer if(l>read_PREFETCH) l= read_PREFETCH; memset(read_bufe,0,l); // 2011-12-24 // set remaining space up to prefetch bytes in buffer to 0 break; } read_infop->counter+= r; read_bufe+= r; // set new mark for end of data read_bufe[0]= 0; read_bufe[1]= 0; // set 4 null-terminators read_bufe[2]= 0; read_bufe[3]= 0; } while(reof && read_bufp>=read_bufe; } // end read__input() static void read_switch(read_info_t* filehandle) { // switch to another already opened file; // filehandle: handle of the file which shall be switched to; // first, save status of presently processed input file if(read_infop!=NULL) { read_infop->bufp= read_bufp; read_infop->bufe= read_bufe; } // switch to new file information read_infop= filehandle; read_bufp= read_infop->bufp; read_bufe= read_infop->bufe; read_input(); } // end read_switch() static inline int read_rewind() { // rewind the file, i.e., the file pointer is set // to the first byte in the file; // read_infop: handle of the file which is to rewind; // return: ==0: ok; !=0: rewind error; bool err; #if read_GZ==0 err= lseek(read_infop->fd,0,SEEK_SET)<0; #elif read_GZ==1 err= fseek(read_infop->fi,0,SEEK_SET)<0; #else err= gzseek(read_infop->fi,0,SEEK_SET)<0; #endif if(err) { PERRv("could not rewind file: %-80s",read_infop->filename) return 1; } read_infop->counter= 0; read_bufp= read_bufe; // force refetch read_infop->eof= false; // force retest for end of file read_input(); // ensure prefetch return 0; } // end read_rewind() static inline void read_lockjump() { // prevent a previously stored jump position from being overwritten; read__jumplock= true; } // end read_lockjump() //------------------------------------------------------------ // end Module read_ OSM file read module //------------------------------------------------------------ //------------------------------------------------------------ // Module str_ string read module //------------------------------------------------------------ // this module provides procedures for conversions from // strings which have been stored in data stream objects to // c-formatted strings; // as usual, all identifiers of a module have the same prefix, // in this case 'str'; one underline will follow in case of a // global accessible object, two underlines in case of objects // which are not meant to be accessed from outside this module; // the sections of private and public definitions are separated // by a horizontal line: ---- #define str__tabM (15000+4000) // +4000 because it might happen that an object has a lot of // key/val pairs or refroles which are not stored already; #define str__tabstrM 250 // must be < row size of str__rab[] typedef struct str__info_struct { // members of this structure must not be accessed // from outside this module; char tab[str__tabM][256]; // string table; see o5m documentation; // row length must be at least str__tabstrM+2; // each row contains a double string; each of the two strings // is terminated by a zero byte, the logical lengths must not // exceed str__tabstrM bytes in total; // the first str__tabM lines of this array are used as // input buffer for strings; int tabi; // index of last entered element in string table; int tabn; // number of valid strings in string table; struct str__info_struct* prev; // address of previous unit; } str_info_t; str_info_t* str__infop= NULL; static void str__end() { // clean-up this module; str_info_t* p; while(str__infop!=NULL) { p= str__infop->prev; free(str__infop); str__infop= p; } } // end str__end() //------------------------------------------------------------ static str_info_t* str_open() { // open an new string client unit; // this will allow us to process multiple o5m input files; // return: handle of the new unit; // ==NULL: error; // you do not need to care about closing the unit(s); static bool firstrun= true; str_info_t* prev; prev= str__infop; str__infop= (str_info_t*)malloc(sizeof(str_info_t)); if(str__infop==NULL) { PERR("could not get memory for string buffer.") return NULL; } str__infop->tabi= 0; str__infop->tabn= 0; str__infop->prev= prev; if(firstrun) { firstrun= false; atexit(str__end); } return str__infop; } // end str_open() static inline void str_switch(str_info_t* sh) { // switch to another string unit // sh: string unit handle; str__infop= sh; } // end str_switch() static inline void str_reset() { // clear string table; // must be called before any other procedure of this module // and may be called every time the string processing shall // be restarted; if(str__infop!=NULL) str__infop->tabi= str__infop->tabn= 0; } // end str_reset() static void str_read(byte** pp,char** s1p,char** s2p) { // read an o5m formatted string (pair), e.g. key/val, from // standard input buffer; // if got a string reference, resolve it, using an internal // string table; // no reference is used if the strings are longer than // 250 characters in total (252 including terminators); // pp: address of a buffer pointer; // this pointer will be incremented by the number of bytes // the converted protobuf element consumes; // s2p: ==NULL: read not a string pair but a single string; // return: // *s1p,*s2p: pointers to the strings which have been read; char* p; int len1,len2; int ref; bool donotstore; // string has 'do not store flag' 2012-10-01 p= (char*)*pp; if(*p==0) { // string (pair) given directly p++; donotstore= false; *s1p= p; len1= strlen(p); p+= len1+1; if(s2p==NULL) { // single string if(!donotstore && len1<=str__tabstrM) { // single string short enough for string table stpcpy0(str__infop->tab[str__infop->tabi],*s1p)[1]= 0; // add a second terminator, just in case someone will try // to read this single string as a string pair later; if(++str__infop->tabi>=str__tabM) str__infop->tabi= 0; if(str__infop->tabntabn++; } // end single string short enough for string table } // end single string else { // string pair *s2p= p; len2= strlen(p); p+= len2+1; if(!donotstore && len1+len2<=str__tabstrM) { // string pair short enough for string table memcpy(str__infop->tab[str__infop->tabi],*s1p,len1+len2+2); if(++str__infop->tabi>=str__tabM) str__infop->tabi= 0; if(str__infop->tabntabn++; } // end string pair short enough for string table } // end string pair *pp= (byte*)p; } // end string (pair) given directly else { // string (pair) given by reference ref= pbf_uint32(pp); if(ref>str__infop->tabn) { // string reference invalid PWARNv("invalid .o5m string reference: %i->%i", str__infop->tabn,ref) *s1p= "(invalid)"; if(s2p!=NULL) // caller wants a string pair *s2p= "(invalid)"; } // end string reference invalid else { // string reference valid ref= str__infop->tabi-ref; if(ref<0) ref+= str__tabM; *s1p= str__infop->tab[ref]; if(s2p!=NULL) // caller wants a string pair *s2p= strchr(str__infop->tab[ref],0)+1; } // end string reference valid } // end string (pair) given by reference } // end str_read() //------------------------------------------------------------ // end Module str_ string read module //------------------------------------------------------------ //------------------------------------------------------------ // Module wrs_ write stream module //------------------------------------------------------------ // this module provides procedures to write into an output // stream; // as usual, all identifiers of a module have the same prefix, // in this case 'wrs'; an underline will follow in case of a // global accessible object, two underlines in case of objects // which are not meant to be accessed from outside this module; // the sections of private and public definitions are separated // by a horizontal line: ---- #define wrs__BUFSIZE 1600000 static char* wrs__buf= NULL; // write buffer static char* wrs__bufe= NULL; // water mark for buffer filled 100% static char* wrs__bufp= NULL; // write pointer in buffer static int wrs__fd= -1; // file descriptor static const char* wrs__name; // file name static void wrs__end() { // terminate the services of this module; // flush if(wrs__bufp>wrs__buf) { // at least one character in buffer if(write(wrs__fd,wrs__buf,wrs__bufp-wrs__buf)<0) PERRv("flush error at file: %.80s",wrs__name) wrs__bufp= wrs__buf; // reset buffer pointer; } // at least one character in buffer // close if(wrs__fd!=1) { close(wrs__fd); wrs__fd= 1; } // free buffer FREE(wrs__buf) } // wrs__end() //------------------------------------------------------------ static bool wrs_open(const char* name) { // open an output stream; // name[]: file name of this stream; // return: success; // there is no need to explicitly close the output stream, // this is done automatically at program end; atexit(wrs__end); if(name==NULL) { // standard output wrs__name= "standard output"; wrs__fd= 1; } // standard output else { // not standard output wrs__name= name; wrs__buf= (char*)malloc(wrs__BUFSIZE); if(wrs__buf==NULL) { PERR("not enough memory for output buffer.") return false; } wrs__bufe= wrs__buf+wrs__BUFSIZE; wrs__bufp= wrs__buf; wrs__fd= open(wrs__name,O_WRONLY|O_CREAT|O_TRUNC|O_BINARY,00600); if(wrs__fd<=0) { PERRv("could not open output file: %.80s",wrs__name) wrs__fd= -1; return false; } } // not standard output return true; } // wrs_open() static inline void wrs_char(int c) { // write one byte to output stream, use a buffer; // c: character to write; if(wrs__bufp>=wrs__bufe) { // write buffer is full // flush if(write(wrs__fd,wrs__buf,wrs__bufp-wrs__buf)<0) PERRv("write error at file: %.80s",wrs__name) wrs__bufp= wrs__buf; // reset buffer pointer; } // write buffer is full *wrs__bufp++= (char)c; } // wrs_char(); static inline void wrs_mem(const void* bp,int64_t l) { // write a memory area to output stream, use a buffer; // bp: start of the memory area to write; // l: length of the memory area (in bytes); while(--l>=0) { // for all bytes of the memory area if(wrs__bufp>=wrs__bufe) { // write buffer is full // flush if(write(wrs__fd,wrs__buf,wrs__bufp-wrs__buf)<0) PERRv("write error at file: %.80s",wrs__name) wrs__bufp= wrs__buf; // reset buffer pointer; } // write buffer is full *wrs__bufp++= *(char*)bp; (char*)bp++; } // for all bytes of the memory area } // wrs_mem(); static inline void wrs_str(const char* s) { // write a string to output stream, use a buffer; // s: zero-terminated string to write; // the terminating zero is NOT written; for(;;) { // for all bytes of the string if(*s==0) break; if(wrs__bufp>=wrs__bufe) { // write buffer is full // flush if(write(wrs__fd,wrs__buf,wrs__bufp-wrs__buf)<0) PERRv("write error at file: %.80s",wrs__name) wrs__bufp= wrs__buf; // reset buffer pointer; } // write buffer is full *wrs__bufp++= (char)(*s); s++; } // for all bytes of the string } // wrs_str(); static inline void wrs_strz(const char* s,int n) { // write a string to output stream, use a buffer; // the terminating zero is also written; // s[]: zero-terminated string to write; // n: maximum number of valid bytes of the string; // example: wrs_strz(abc,40) will never read more than // 40 bytes of the string, however a terminating zero is // added if the string length is exactly 40 bytes; for(;;) { // for all bytes of the string if(wrs__bufp>=wrs__bufe) { // write buffer is full // flush if(write(wrs__fd,wrs__buf,wrs__bufp-wrs__buf)<0) PERRv("write error at file: %.80s",wrs__name) wrs__bufp= wrs__buf; // reset buffer pointer; } // write buffer is full if(n==0) { *wrs__bufp++= 0; break; } *wrs__bufp++= (char)(*s); if(*s==0) break; s++; n--; } // for all bytes of the string } // wrs_strz(); static inline void wrs_uint32s(uint32_t v) { // write an unsigned 32 bit integer number as string // to output stream; char s[20],*s1,*s2,c; s1= s; if(v==0) *s1++= '0'; s2= s1; while(v>0) { *s2++= (v%10)+'0'; v/= 10; } *s2--= 0; while(s2>s1) { c= *s1; *s1= *s2; *s2= c; s1++; s2--; } wrs_str(s); } // wrs_uint32s() static inline void wrs_uint64s(uint64_t v) { // write an unsigned 64 bit integer number as string // to output stream; char s[30],*s1,*s2,c; s1= s; if(v==0) *s1++= '0'; s2= s1; while(v>0) { *s2++= (v%10)+'0'; v/= 10; } *s2--= 0; while(s2>s1) { c= *s1; *s1= *s2; *s2= c; s1++; s2--; } wrs_str(s); } // wrs_uint64s() static inline void wrs_int64s(int64_t v) { // write a signed 64 bit integer number as string // to output stream; static char s[30],*s1,*s2,c; s1= s; if(v<0) { *s1++= '-'; v= -v; } else if(v==0) *s1++= '0'; s2= s1; while(v>0) { *s2++= (v%10)+'0'; v/= 10; } *s2--= 0; while(s2>s1) { c= *s1; *s1= *s2; *s2= c; s1++; s2--; } wrs_str(s); } // end wrs_int64s() static inline void wrs_uint32(uint32_t v) { // write an undsigned 32 bit integer number as it is // to output stream; just a wrapper for wrs_mem(); wrs_mem(&v,sizeof(v)); } // end wrs_uint32() static inline void wrs_int64(int64_t v) { // write a signed 64 bit integer number as it is // to output stream; just a wrapper for wrs_mem(); wrs_mem(&v,sizeof(v)); } // end wrs_int64() //------------------------------------------------------------ // end Module wrs_ write stream module //------------------------------------------------------------ // "I" represents the name for the instances: // region, subregion, city, street, housenumber; #define I region #include __FILE__ #undef I #define I subregion #include __FILE__ #undef I #define I city #include __FILE__ #undef I #define I street #include __FILE__ #undef I #define I housenumber #include __FILE__ #undef I //------------------------------------------------------------ // Module oo_ osm processing module //------------------------------------------------------------ // this module provides procedures which read osm objects, // process them and write them as polygon objects; // as usual, all identifiers of a module have the same prefix, // in this case 'oo'; one underline will follow in case of a // global accessible object, two underlines in case of objects // which are not meant to be accessed from outside this module; // the sections of private and public definitions are separated // by a horizontal line: ---- typedef struct { read_info_t* ri; // file handles for input files int format; // input file format; // ==-9: unknown; ==0: o5m; ==10: xml; ==-1: pbf; str_info_t* str; // string unit handle (if o5m format) const char* filename; bool endoffile; int deleteobject; // replacement for .osc tag // 0: not to delete; 1: delete this object; 2: delete from now on; int64_t o5id; // for o5m delta coding int32_t o5lon,o5lat; // for o5m delta coding int64_t o5histime; // for o5m delta coding int64_t o5hiscset; // for o5m delta coding int64_t o5rid[3]; // for o5m delta coding } oo__if_t; static oo__if_t oo__if[global_fileM]; static oo__if_t* oo__ifp= oo__if; // currently used element in oo__if[] #define oo__ifI (oo__ifp-oo__if) // index static oo__if_t* oo__ife= oo__if; // logical end of elements in oo__if[] static oo__if_t* oo__ifee= oo__if+global_fileM; // physical end of oo_if[] static int oo_ifn= 0; // number of currently open files static int oo__getformat() { // determine the formats of all opened files of unknown format // and store these determined formats; // do some intitialization for the format, of necessary; // oo__if[].format: !=-9: do nothing for this file; // return: 0: ok; !=0: error; // 5: .pbf format (cannot be processed by this program); // 6: .osm format (cannot be processed by this program); // oo__if[].format: input file format; ==0: o5m; ==10: xml; ==-1: pbf; oo__if_t* ifptemp; byte* bufp; #define bufsp ((char*)bufp) // for signed char ifptemp= oo__ifp; oo__ifp= oo__if; while(oo__ifpri!=NULL && oo__ifp->format==-9) { // format not yet determined read_switch(oo__ifp->ri); if(read_bufp>=read_bufe) { // file empty PERRv("file empty: %.80s",oo__ifp->filename) return 2; } bufp= read_bufp; if(bufp[0]==0 && bufp[1]==0 && bufp[2]==0 && bufp[3]>8 && bufp[3]<20) { // presumably .pbf format PERR("cannot process .pbf format.") return 5; } else if(strzcmp(bufsp,"format= 10; PERR("cannot process .osm format.") return 6; } else if(bufp[0]==0xff && bufp[1]==0xe0 && ( strzcmp(bufsp+2,"\x04""o5m2")==0 || strzcmp(bufsp+2,"\x04""o5c2")==0 )) { // presumably .o5m format oo__ifp->format= 0; oo__ifp->str= str_open(); // call some initialization of string read module } else if((bufp[0]==0xff && bufp[1]>=0x10 && bufp[1]<=0x12) || (bufp[0]==0xff && bufp[1]==0xff && bufp[2]>=0x10 && bufp[2]<=0x12) || (bufp[0]==0xff && read_bufe==read_bufp+1)) { // presumably shortened .o5m format if(loglevel>=2) PINFOv("Not a standard .o5m file header %.80s\n", oo__ifp->filename) oo__ifp->format= 0; oo__ifp->str= str_open(); // call some initialization of string read module } else { // unknown file format PERRv("unknown file format: %.80s",oo__ifp->filename) return 3; } } // format not yet determined oo__ifp++; } // for all input files oo__ifp= ifptemp; return 0; #undef bufsp } // end oo__getformat() static void oo__reset() { // reset counters for writing o5m files; if(oo__ifp->format==0) { // o5m oo__ifp->o5id= 0; oo__ifp->o5lat= oo__ifp->o5lon= 0; oo__ifp->o5hiscset= 0; oo__ifp->o5histime= 0; oo__ifp->o5rid[0]= oo__ifp->o5rid[1]= oo__ifp->o5rid[2]= 0; str_reset(); } // o5m } // oo__reset() static void oo__close() { // close an input file; // oo__ifp: handle of currently active input file; // if this file has already been closed, nothing happens; // after calling this procedure, the handle of active input file // will be invalid; if(oo__ifp!=NULL && oo__ifp->ri!=NULL) { if(!oo__ifp->endoffile && oo_ifn>0) // missing logical end of file PWARNv("unexpected end of input file: %.80s",oo__ifp->filename) read_close(oo__ifp->ri); oo__ifp->ri= NULL; oo_ifn--; } oo__ifp= NULL; } // end oo__close() static void oo__end() { // clean-up this module; oo_ifn= 0; // mark end of program; // this is used to suppress warning messages in oo__close() while(oo__ife>oo__if) { oo__ifp= --oo__ife; oo__close(); } oo_ifn= 0; } // end oo__end() //------------------------------------------------------------ static bool oo_open(const char* filename) { // open an input file; // filename[]: path and name of input file; // ==NULL: standard input; // return: 0: ok; 1: no appropriate input file; // 2: maximum number of input files exceeded; // the handle for the current input file oo__ifp is set // to the opened file; // after having opened all input files, call oo__getformat(); // you do not need to care about closing the file; static bool firstrun= true; if(oo__ife>=oo__ifee) { PERR("too many input files.") return 2; } if(read_open(filename)!=0) return 1; oo__ife->ri= read_infop; oo__ife->str= NULL; oo__ife->format= -9; // 'not yet determined' oo__ife->filename= filename; oo__ife->endoffile= false; oo__ife->deleteobject= 0; oo__ifp= oo__ife++; oo_ifn++; if(firstrun) { firstrun= false; atexit(oo__end); } return 0; } // end oo_open() static int oo_sequencetype= -1; // type of last object which has been processed; // -1: no object yet; 0: node; 1: way; 2: relation; static int64_t oo_sequenceid= INT64_C(-0x7fffffffffffffff); // id of last object which has been processed; static int oo_main() { // start reading osm objects; // return: ==0: ok; !=0: error; // this procedure must only be called once; // before calling this procedure you must open an input file // using oo_open(); bool writeheader; // header must be written int otype; // type of currently processed object; // 0: node; 1: way; 2: relation; int64_t id; int32_t lon,lat; uint32_t hisver; int64_t histime; int64_t hiscset; uint32_t hisuid; char* hisuser; // flag mask 128 byte* bufp; // pointer in read buffer byte* bufe; // pointer in read buffer, end of object uint8_t b; // latest byte which has been read int l; byte* bp; char* sp; int64_t nodecount; // number of nodes written // procedure initialization atexit(oo__end); writeheader= true; nodecount= 0; // get input file format and care about tempfile name if(oo__getformat()) return 5; // process the file for(;;) { // read input file // get next object read_input(); // care about recursive processing if(read_bufp>=read_bufe) { // at end of input file; break; } // end at end of input file if(oo__ifp->endoffile) { // after logical end of file PWARN("unexpected contents after logical end of file.") break; } bufp= read_bufp; b= *bufp; // care about header and unknown objects /* parse .o5m */ { if(b<0x10 || b>0x12) { // not a regular dataset id if(b>=0xf0) { // single byte dataset if(b==0xff) // file start, resp. o5m reset oo__reset(); else if(b==0xfe) oo__ifp->endoffile= true; else PLOGv(1,"unknown .o5m short dataset id: 0x%02x",b) read_bufp++; continue; } // end single byte dataset else { // unknown multibyte dataset if(b!=0xe0 && b!=0xdb && b!=0xdc) PLOGv(1,"unknown .o5m dataset id: 0x%02x",b) read_bufp++; l= pbf_uint32(&read_bufp); // jump over this dataset read_bufp+= l; // jump over this dataset continue; } // end unknown multibyte dataset } // end not a regular dataset id otype= b&3; } // end o5m // write header if(writeheader) { writeheader= false; if(oo__ifp->filename==NULL) PLOG(2,"input file: stdin") else PLOGv(2,"input file: %s",oo__ifp->filename); } // object initialization hisver= 0; histime= 0; hiscset= 0; hisuid= 0; hisuser= ""; if(oo__ifp->deleteobject==1) oo__ifp->deleteobject= 0; /* read and process one osm object */ { // read object id bufp++; l= pbf_uint32(&bufp); read_bufp= bufe= bufp+l; id= oo__ifp->o5id+= pbf_sint64(&bufp); // read author hisver= pbf_uint32(&bufp); if(hisver!=0) { // author information available histime= oo__ifp->o5histime+= pbf_sint64(&bufp); if(histime!=0) { hiscset= oo__ifp->o5hiscset+= pbf_sint32(&bufp); str_read(&bufp,&sp,&hisuser); hisuid= pbf_uint64((byte**)&sp); } } // end author information available if(bufp>=bufe) // just the id and author, i.e. this is a delete request oo__ifp->deleteobject= 1; else { // not a delete request oo__ifp->deleteobject= 0; // read coordinates (for nodes only) if(otype==0) { // node // read node body lon= oo__ifp->o5lon+= pbf_sint32(&bufp); lat= oo__ifp->o5lat+= pbf_sint32(&bufp); } // end node // read noderefs (for ways only) if(otype==1) { // way PWARN("way objects cannot be processed.") l= pbf_uint32(&bufp); bp= bufp+l; if(bp>bufe) bp= bufe; // (format error) while(bufpo5rid[0]+= pbf_sint64(&bufp); } // end way // read refs (for relations only) else if(otype==2) { // relation int64_t ri; // temporary, refid int rt; // temporary, reftype char* rr; // temporary, refrole PWARN("relation objects cannot be processed.") l= pbf_uint32(&bufp); bp= bufp+l; if(bp>bufe) bp= bufe; // (format error) while(bufpo5rid[rt]+= ri; // (delta-coded ref id) } // each reference } // end relation // read key/val pairs if(otype==0) { // node char* k,*v; // key and val tag const char* region,*subregion,*city,*street,*place, *housenumber,*boundary; int8_t weight; region= subregion= city= street= place= housenumber= boundary= NULL; weight= 0; while(bufpv0 && vp[-1]==' ') vp[-1]= 0; // remove trailing blank from substring if(vp>v0+1 && vp[-2]==' ') vp[-2]= 0; // remove second trailing blank *vp++= 0; // terminate substring at ';' } // (further) semicolon(s) if(*v0!=0) { // substring has at least one character if(subsequent) nodecount++; subsequent= true; data_addradd(lon,lat,region,subregion, city,street,v0,weight); } if(vp==NULL) // no further substrings break; v0= vp; // continue with next substring } // for all substrings } // there is at least one semicolon in housenumber } // node contains valid address data nodecount++; } // node else { // not a node char* k,*v; // key and val tag while(bufp1 && id<=oo_sequenceid))) PWARNv("wrong sequence at %s %"PRIi64,ONAME(otype),id) oo_sequencetype= otype; oo_sequenceid= id; } } // end read all input files // end of processing if(nodecount>global_nodecount) { PWARN("wrong nodecount in \"strings_nodecount\".") PINFOv(" in file \"strings_nodecount\": %"PRIi64,global_nodecount) PINFOv(" in .o5m file: %"PRIi64,nodecount) } return 0; } // end oo_main() //------------------------------------------------------------ // end Module oo_ osm processing module //------------------------------------------------------------ #if !__WIN32__ void sigcatcher(int sig) { PINFO("Output has been terminated.") exit(1); } // end sigcatcher() #endif int main(int argc,char** argv) { // main program; // for the meaning of the calling line parameters please look at the // contents of helptext[]; bool usesstdin; static char outputfilename[400]= ""; // standard output file name // =="": standard output 'stdout' int r,l; const char* a; // command line argument static FILE* parafile= NULL; static char* aa= NULL; // buffer for parameter file line char* ap; // pointer in aa[] int aamax; // maximum length of string to read #define main__aaM 1000000 #if !__WIN32__ /* care about signal handler */ { static struct sigaction siga; siga.sa_handler= sigcatcher; sigemptyset(&siga.sa_mask); siga.sa_flags= 0; sigaction(SIGPIPE,&siga,NULL); } #endif // initializations usesstdin= false; #if __WIN32__ setmode(fileno(stdout),O_BINARY); setmode(fileno(stdin),O_BINARY); #endif // read command line parameters if(argc<=1) { // no command line parameters given fprintf(stderr,"\n" PROGRAM " " VERSION "\n" "(Please use -h to display a short guide.)\n\n"); return 0; } while(parafile!=NULL || argc>0) { // for every parameter in command line if(parafile!=NULL) do { // there are parameters waiting in a parameter file ap= aa; for(;;) { aamax= main__aaM-1-(ap-aa); if(fgets(ap,aamax,parafile)==NULL) { if(ap>aa) { if(ap>aa && ap[-1]==' ') *--ap= 0; // cut one trailing space break; } goto parafileend; } if(strzcmp(ap,"// ")==0) continue; if(ap>aa && (*ap=='\r' || *ap=='\n' || *ap==0)) { // end of this parameter while(ap>aa && (ap[-1]=='\r' || ap[-1]=='\n')) *--ap= 0; // eliminate trailing NL if(ap>aa && ap[-1]==' ') *--ap= 0; // cut one trailing space break; } ap= strchr(ap,0); // find end of string while(ap>aa && (ap[-1]=='\r' || ap[-1]=='\n')) *--ap= 0; // cut newline chars *ap++= ' '; *ap= 0; // add a space } a= aa; while(*a!=0 && strchr(" \t\r\n",*a)!=NULL) a++; if(*a!=0) break; parafileend: fclose(parafile); parafile= NULL; free(aa); aa= NULL; } while(false); if(parafile==NULL) { if(--argc<=0) break; argv++; // switch to next parameter; as the first one is just // the program name, we must do this previous reading the // first 'real' parameter; a= argv[0]; } if((l= strzlcmp(a,"--parameter-file="))>0 && a[l]!=0) { // parameter file parafile= fopen(a+l,"r"); if(parafile==NULL) { PERRv("Cannot open parameter file: %.80s",a+l) perror(PROGRAM); return 1; } aa= (char*)malloc(main__aaM); if(aa==NULL) { PERR("Cannot get memory for parameter file.") fclose(parafile); parafile= NULL; return 1; } aa[0]= 0; continue; // take next parameter } if(loglevel>0) // verbose mode PINFOv("Parameter %.2000s",a) if(strcmp(a,"-h")==0) { // user wants parameter overview fprintf(stdout,"%s",shorthelptext); // print brief help text // (took "%s", to prevent oversensitive compiler reactions) return 0; } if(strcmp(a,"-help")==0 || strcmp(a,"--help")==0) { // user wants help text fprintf(stdout,"%s",helptext); // print help text // (took "%s", to prevent oversensitive compiler reactions) return 0; } if((l= strzlcmp(a,"--max-strrefs="))>0 && a[l]!=0) { // define maximum memory space for string references global_maxstrrefs= strtoint32percent(a+l); continue; // take next parameter } if(strcmp(a,"--out-none")==0) { // user does not want any standard output global_outnone= true; continue; // take next parameter } if(strzcmp(a,"-o=")==0 && a[3]!=0) { // reroute standard output to a file strMcpy(outputfilename,a+3); continue; // take next parameter } if((strcmp(a,"-v")==0 || strcmp(a,"--verbose")==0 || strzcmp(a,"-v=")==0 || strzcmp(a,"--verbose=")==0) && loglevel==0) { // test mode - if not given already char* sp; sp= strchr(a,'='); if(sp!=NULL) loglevel= sp[1]-'0'; else loglevel= 1; if(loglevel<1) loglevel= 1; if(loglevel>MAXLOGLEVEL) loglevel= MAXLOGLEVEL; if(a[1]=='-') { // must be "--verbose" and not "-v" if(loglevel==1) PINFO("Verbose mode.") else PINFOv("Verbose mode %i.",loglevel) } continue; // take next parameter } if(strcmp(a,"-")==0) { // use standard input usesstdin= true; if(oo_open(NULL)) // file cannot be read return 2; continue; // take next parameter } if(a[0]=='-') { PERRv("unrecognized option: %.80s",a) return 1; } // here: parameter must be a file name if(strcmp(a,"/dev/stdin")==0) usesstdin= true; if(oo_open(a)) // file cannot be read return 2; } // end for every parameter in command line // process parameters if(oo_ifn==0) { // no input files given PERR("use \"-\" to read from standard input or try: " PROGRAM " -h") return 0; // end the program, because without having input files // we do not know what to do; } if(usesstdin) PLOG(1,"using standard input.") // initialization global_nodecount= data_ini("strings_nodecount"); if(global_nodecount!=0) { data_ini_region("strings_region",1); data_ini_subregion("strings_subregion",2); data_ini_city("strings_city",3); data_ini_street("strings_street",4); data_ini_housenumber("strings_housenumber",5); } // do the work r= oo_main(); if(loglevel>=2) { // verbose if(read_bufp!=NULL && read_bufp0) { // verbose mode if(oo_sequenceid!=INT64_C(-0x7fffffffffffffff)) PINFOv("Last processed: %s %"PRIu64".", ONAME(oo_sequencetype),oo_sequenceid) if(r!=0) PINFOv("Exit: %i\n",r) } // verbose mode return r; } // end main() //------------------------------------------------------------ // Module data_ osm data module //------------------------------------------------------------ static int data__addr_qsort(const void* a,const void* b) { // address data comparison for qsort(); // sort ascending by city string as first criteria, // and descending by weight as second if weight>=64, // by street and subsequent by housenumber and weight otherwise; const char* aval,*bval; int_least32_t aw,bw; int_least32_t cv; aval= data__str_city[((data__addr_t*)a)->city].string; bval= data__str_city[((data__addr_t*)b)->city].string; cv= strncmp(aval,bval,data__strstringM); if(cv!=0) return cv; aw= ((data__addr_t*)a)->weight; bw= ((data__addr_t*)b)->weight; if(aw<64) aw= 0; if(bw<64) bw= 0; cv= bw-aw; if(cv!=0) return cv; aval= data__str_street[((data__addr_t*)a)->street].string; bval= data__str_street[((data__addr_t*)b)->street].string; cv= strncmp(aval,bval,data__strstringM); if(cv!=0) return cv; aw= ((data__addr_t*)a)->housenumeric; bw= ((data__addr_t*)b)->housenumeric; cv= aw-bw; if(cv!=0) return cv; aval= data__str_housenumber[((data__addr_t*)a)->housenumber].string; bval= data__str_housenumber[((data__addr_t*)b)->housenumber].string; cv= strncmp(aval,bval,data__strstringM); if(cv!=0) return cv; aw= ((data__addr_t*)a)->weight; bw= ((data__addr_t*)b)->weight; return bw-aw; } // data__addr_qsort() #else // from here: procedure templates #define TEMPLATEM(f,a) f##_##a #define T(f,a) TEMPLATEM(f,a) #undef TEMPLATEONCE #ifndef TEMPLATEFIRSTRUN #define TEMPLATEFIRSTRUN #define TEMPLATEONCE // container for externally stored used-by references #define data__strrefusedM 64 struct data__strref_struct { uint32_t used[data__strrefusedM]; // indexes for used-references; struct data__strref_struct* next; }; typedef struct data__strref_struct data__strref_t; // container for strings and their used-by references #define data__strstringM 40 #define data__strusedM 5 struct data__str_struct { char string[data__strstringM]; // string (including terminator); // if maximum string length, the terminator is omitted; uint32_t usedn; // number of backreferences for this string union { uint32_t used[data__strusedM]; // backreferences for this string; // these backreferences are stored as 32-bit indexes; // unused backreferences need not to be zeroed: // array elements are valid in dependence of 'usedn'; // if there are more than 'data__strusedM' backreferences, // they are entered as external references (see below); struct { uint32_t filler; data__strref_t* usedref,*usedlast; }; // if there are more than 'data__strusedM' backreferences, // these two pointers refer to elements of data__strref; // .usedref: first element in chain; // .usedlast: last element of the chain; // (this substructure should not exceed 20 Bytes in length // to keep the container length at 64 Bytes; }; } __attribute__((__packed__)); typedef struct data__str_struct data__str_t; // memory area for externally stored used-by references static data__strref_t* data__strref= NULL; // string references static data__strref_t* data__strrefe= NULL; // logical end static data__strref_t* data__strrefee= NULL; // physical end static uint_fast32_t data__strrefn= NULL; // number of refs // container for address data struct data__addr_struct { // (length 32 Bytes) int32_t x,y; // geocoordinates (unit 10^-7 degree) uint32_t region; // index of region string uint32_t subregion; // index of subregion string uint32_t city; // index of city string uint32_t street; // index of street string uint32_t housenumber; // index of housenumber string uint16_t housenumeric; // numeric representation of housenumber int8_t weight; // geographical extend: // binary logarithm of bounding-box width (meters) // as provided by 'osmconvert --add-bboxwidthweight'; uint8_t work; // temporary variable, for internal use; // 0: default; 1: street without city; 2: street was without city; } __attribute__((__packed__)); typedef struct data__addr_struct data__addr_t; static data__addr_t* data__addr= NULL; // address memory area static data__addr_t* data__addre= NULL; // logical end static data__addr_t* data__addree= NULL; // physical end static uint_fast32_t data__addrn= 0; // logical number of address datasets static uint_fast32_t data__nodecount= 0; // physical number of node objects (as read from file) // indexes for geocoordinates struct data__coco_struct { uint64_t yx; // rounded geocoordinate combination uint32_t addrindex; // index of address element uint32_t filler; } __attribute__((__packed__)); typedef struct data__coco_struct data__coco_t; static data__coco_t* data__coco; // rounded geocoordinate index; // search criterion is a geocoordinates combination: // ((lat+900000000)&0xffffc000)*2^16 + (lon+1800000000); // 16384 * 100 nanodegrees represents a length of about 182 meters; // this is done to accelerate the determination of neighboring // points within a certain geographical distance; static data__coco_t* data__cocoe= NULL; // logical end // procedure prototypes static inline uint32_t data_index_region(const char*); static inline uint32_t data_index_subregion(const char*); static inline uint32_t data_index_city(const char*); static inline uint32_t data_index_street(const char*); static inline uint32_t data_index_housenumber(const char*); static void data__addused_region(uint32_t,uint32_t); static void data__addused_subregion(uint32_t,uint32_t); static void data__addused_city(uint32_t,uint32_t); static void data__addused_street(uint32_t,uint32_t); static void data__addused_housenumber(uint32_t,uint32_t); static bool data__strclean_region(); static bool data__strclean_subregion(); static bool data__strclean_city(); static bool data__strclean_street(); static bool data__strclean_housenumber(); static bool data__write_region(); static bool data__write_subregion(); static bool data__write_city(); static bool data__write_street(); static bool data__write_housenumber(); static int data__addr_qsort(const void*,const void*); static inline const char* data_string_region(uint32_t); static inline const char* data_string_subregion(uint32_t); static inline const char* data_string_city(uint32_t); static inline const char* data_string_street(uint32_t); static inline const char* data_string_housenumber(uint32_t); static int data__strref_qsort(const void* a,const void* b) { // strref comparison for qsort() uint32_t aval,bval; aval= *(uint32_t*)a; bval= *(uint32_t*)b; if(aval>bval) return 1; if(aval==bval) return 0; return -1; } // data__strref_qsort() static int data__coco_qsort(const void* a,const void* b) { // coordinate comparison for qsort() uint64_t aval,bval; aval= ((data__coco_t*)a)->yx; bval= ((data__coco_t*)b)->yx; if(aval>bval) return 1; if(aval==bval) return 0; return -1; } // data__coco_qsort() #define data__coco_PORTION 16384 // latitude grid in degrees; 16384 is as much as 182 Meters; static inline uint64_t data__coco_calc(int32_t x,int32_t y) { // calculate the geocoordinate combination; // y coordinate will be rounded and left-shifted and then // summed with y coordinate; uint64_t ii; ii= UINT64_C(900000000); ii+= y; ii&= UINT64_MAX-data__coco_PORTION+1; // (0xffffffffffffc000) ii<<= 32; ii+= UINT64_C(1800000000); ii+= x; return ii; } // data__coco_calc() static inline data__coco_t* data__coco_find(uint64_t cc) { // searches in data__coco and finds the first occurrence // which is not smaller than the given compare value; // cc: geocoordinate combination to search for; // return: position in data__coco[] which matches (or is higher); // if there is no such position, then the highest // position in data__coco[] is returned; int_fast32_t i,i1,i2; // iteration indexes: // middle, lower including, upper excluding; i1= 0; i2= data__addrn; while(i10 && cc==data__coco[i-1].yx) i--; // find first of multiple occurrences if(i>=data__addrn) i= data__addrn-1; // check range (the value searched for could be higher than // the highest value in data__coco[]) return data__coco+i; } // data__coco_find() // geocoordinate combination; // for data__coco_center() and data__coco_next(); static int32_t data__coco_x,data__coco_y; // current center static int32_t data__coco_radius; // max. allowed distance static uint64_t data__coco_s,data__coco_n; // South and North edge; static uint32_t data__coco_w,data__coco_e; // West and East edge; // normalized by +180 degree; static data__coco_t* data__coco_here= NULL; // current position in data__coco[] static inline void data__coco_center(int32_t x,int32_t y, int32_t radius) { // set center point for subsequent neighbor calculation; // x,y: geocoordinates of the center; // radius: radius around x,y; unit: 100 latitude nanodegrees; // this procedure sets the borders of a square all searched points // must be located in; // this square is snapped into the latitude grid laid down // by data__coco[]; // the actual neighbor calculation is done by data__coco_next(); int32_t lonradius; // radius in longitude degrees // set West and East borders data__coco_x= x; data__coco_y= y; lonradius= INT32_C(0x10000)/lonadapt(INT32_C(0x10000),y)*radius; // using lonadapt() in reverse direction data__coco_w= UINT32_C(1800000000)+x-lonradius; data__coco_e= UINT64_C(1800000000)+x+lonradius; // set South and North borders data__coco_s= UINT64_C(900000000)+y-radius; data__coco_s&= UINT64_MAX-data__coco_PORTION+1; data__coco_s<<= 32; data__coco_n= UINT64_C(900000000)+y+radius+(data__coco_PORTION-1); data__coco_n&= UINT64_MAX-data__coco_PORTION+1; data__coco_n<<= 32; // set radius data__coco_radius= radius; // initialize control variable data__coco_here= NULL; // to tell data__coco_next() that the // first address object within the radius is still to search for; } // data__coco_center() static inline data__addr_t* data__coco_next(int32_t* distancep) { // get (next) address dataset within radius; // return: address dataset within range; // NULL: no more datasets within range; // *distancep: distance to the dataset returned; uint64_t cc; // geocoordinate combination; data__addr_t* addrp; int32_t dist; // distance do { // until have found an address within the radius if(data__coco_here==NULL) // first run data__coco_here= data__coco_find(data__coco_s+data__coco_w); else { // not first run data__coco_here++; if(data__coco_here>=data__cocoe) { *distancep= 0; return NULL; } cc= data__coco_here->yx; if((cc&0xffffffff)>data__coco_e) { // beyond Eastern edge cc&= 0xffffffff00000000; // only latitude cc+= ((uint64_t)data__coco_PORTION)<<32; if(cc>data__coco_n) { // no more portions within the square *distancep= 0; return NULL; } data__coco_here= data__coco_find(cc+data__coco_w); } // beyond Eastern edge } // not first run addrp= data__addr+data__coco_here->addrindex; dist= geodistance(addrp->x,addrp->y,data__coco_x,data__coco_y); } while(dist>data__coco_radius); // until have found an address within the radius *distancep= dist; return addrp; } // data__coco_next() static data__strref_t* data__strrefget() { // provide a new string reference container; // if there is no element left in string reference memory, // a dummy element will be provided; // note that the calling procedure must care about all matters // of container initialization, including the next-pointer; static data__strref_t dummyref= {{0},0}; if(data__strrefe>=data__strrefee) { // no free elements left PERR("not enough string reference memory size") PINFO("try to increase --max-strrefs=") return &dummyref; } data__strrefn++; return data__strrefe++; } // data__strrefget() static void data__end() { // terminate the services of this module; FREE(data__addr) FREE(data__strref) FREE(data__coco) } // data__end() static bool data__addrclean() { // remove address dataset duplicates // return: success; data__addr_t* readp,*writep; data__addr_t *readbeforep; // dataset read before (for comparisons) // examine all address datasets and remove // empty ones and duplicates readbeforep= data__addr; readp= writep= data__addr+1; while(readpregion!=readbeforep->region || readp->subregion!=readbeforep->subregion || readp->city!=readbeforep->city || readp->street!=readbeforep->street || readp->housenumber!=readbeforep->housenumber ) && (readp->region!=0 || readp->subregion!=0 || readp->city!=0 || readp->street!=0 || readp->housenumber!=0) ) { // this is not an empty dataset and not a duplicate if(writep!=readp) *writep= *readp; writep++; } // this is not an empty dataset and not a duplicate readp++; readbeforep++; } // for each dataset data__addre= writep; data__addrn= data__addre-data__addr; return true; } // data__addrclean() #endif // TEMPLATEONCE static int T(data__strtypeid,I)= 0; // data type id; // 1: region; 2: subregion; 3: city; 4: street; 5: housenumber; // memory area for strings and their used-by references static data__str_t* T(data__str,I)= NULL; // string memory area static data__str_t* T(data__stre,I)= NULL; // logical end static uint_fast32_t T(data__strn,I)= NULL; // number of strings static void T(data__addused,I)(uint32_t index,uint32_t addrindex) { // add used-by information to a string; // index: index of the string; // addrindex: index of address dataset which uses this string; uint32_t usedn; // number of backreferences for this string uint32_t usedremainder; data__str_t* strp; // this string data__strref_t* usedp; strp= T(data__str,I)+index; usedn= strp->usedn; if(usednused[usedn]= addrindex; strp->usedn= usedn+1; return; } if(usedn==data__strusedM) { // first element to be stored externally usedp= data__strrefget(); // get new external container usedp->next= NULL; memcpy(usedp->used,strp->used,sizeof(uint32_t)*data__strusedM); strp->usedref= strp->usedlast= usedp; } usedremainder= usedn % data__strrefusedM; if(usedremainder==0) { // need a new strref container usedp= data__strrefget(); // get new external container usedp->next= NULL; strp->usedlast->next= usedp; // link two newest containers strp->usedlast= usedp; // link new container to string } else usedp= strp->usedlast; usedp->used[usedremainder]= addrindex; strp->usedn= usedn+1; } // data__addused_I() static bool T(data__strclean,I)() { // remove unused strings // return: success; data__str_t* readp,*writep; uint32_t* ttab; // translation table: old index -> new index uint32_t* ttabp; // pointer in translation table uint32_t ttabi; // index in translation table data__addr_t* addrp; // pointer in address memory space if(T(data__strn,I)<=1) // strings too few for cleaning return true; // allocate memory for translation table ttab= (uint32_t*)malloc(sizeof(uint32_t)*T(data__strn,I)+4); if(ttab==NULL) { PERRv( "not enough memory for string index translation type-%i.", T(data__strtypeid,I)) return false; } // examine all strings of this instance and remove unused of them ttabp= ttab; ttabi= 0; *ttabp++= ttabi++; // (first string is a dummy) readp= writep= T(data__str,I)+1; while(readpusedn!=0) { // this string is used by at least one address dataset if(writep!=readp) *writep= *readp; *ttabp++= ttabi++; writep++; } else *ttabp++= 0; readp++; } // for each string if(writep!=readp) { // at least one string has been removed // adjust length of string table T(data__stre,I)= writep; T(data__strn,I)= T(data__stre,I)-T(data__str,I); // adjust indexes of address datasets, use translation table for this for(addrp= data__addr;addrpI= ttab[addrp->I]; } // at least one string has been removed free(ttab); return true; } // data__strclean_I() static bool T(data__write,I)() { // write string data to output stream; // return: success; int64_t strn; // number of strings int64_t strlensum; // sum of lengths of all strings // (without terminating zeros) int64_t strlenmax; // length of longest string // (without terminating zeros); that's usually 40 because of // limitation in data__str_t; int64_t usednsum; // total number of all used-references int64_t usednmax; // maximum number of used-references for a // single string data__str_t* strp; int64_t len,usedn; uint32_t* usedref; // temporary storage area for used-refs // write statistics information strn= T(data__strn,I); strlensum= 0; strlenmax= 0; usednsum= 0; usednmax= 0; for(strp= T(data__str,I);strpstring,data__strstringM); strlensum+= len; if(len>strlenmax) strlenmax= len; usedn= strp->usedn; usednsum+= usedn; if(usedn>usednmax) usednmax= usedn; } // for all strings #define DD(s,v) PLOGv(1,s ": %"PRIi64,T(data__strtypeid,I),v) #define D(s,v) PLOGv(1,s ": %"PRIi64,v) DD("writing type-%i strings",strn) D(" strings total length",strlensum) D(" strings max. length",strlenmax) D(" strrefs",usednsum) D(" strrefs max",usednmax) #undef DD #undef D wrs_char(0x50+T(data__strtypeid,I)); // section type id wrs_int64(sizeof(int64_t)*5); // number following Bytes wrs_int64(strn); // number of strings wrs_int64(strlensum); // sum of lengths of all strings // (without terminating zeros) wrs_int64(strlenmax); // length of longest string // (without terminating zeros); that's usually 40 because of // limitation in data__str_t; wrs_int64(usednsum); // total number of all used-references wrs_int64(usednmax); // maximum number of used-references for a // single string // write strings wrs_char(0x60+T(data__strtypeid,I)); // section type id wrs_int64(strlensum+strn); // number following Bytes for(strp= T(data__str,I);strpstring,data__strstringM); // allocate memory for sorting used-references usedref= (uint32_t*)malloc(sizeof(uint32_t)*usednmax+4); if(usedref==NULL) { PERRv("not enough memory for sorting type-%i strrefs", T(data__strtypeid,I)) return false; } // write used-references header wrs_char(0x70+T(data__strtypeid,I)); // section type id wrs_int64(sizeof(int32_t)*(strn+usednsum+strn)); // number following Bytes // write used-references body for(strp= T(data__str,I);strpusedn; if(un<=data__strusedM) { // references are string-dataset internal up= strp->used; while(un-->0) *usedrefp++= *up++; } else { // references are string-dataset external srp= strp->usedref; up= srp->used; i= data__strrefusedM; while(un-->0) { *usedrefp++= *up++; if(--i==0) { i= data__strrefusedM; srp= srp->next; up= srp->used; } } } // references are string-dataset external // sort used-references if(usedn>0) qsort(usedref,usedn,sizeof(uint32_t),data__strref_qsort); // write used-references for this string wrs_uint32(usedn); if(usedn>0) wrs_mem(usedref,sizeof(uint32_t)*usedn); wrs_uint32(0); // write references' terminator } // for all strings free(usedref); return true; } // data__write_I() static void T(data__end,I)() { // terminate the services of this instance; FREE(T(data__str,I)) } // data__end_I() //------------------------------------------------------------ #ifdef TEMPLATEONCE static int64_t data_ini(const char* file) { // initialization of this module; // must be called before any data_str_ini_I() procedures; // file: file with number of node objects to read; // return: number of nodes; // 0: error; // reads the number of nodes and initializes the data structures // for address data; int fd; // file descriptor atexit(data__end); // open the nodecount file fd= open(file,O_RDONLY|O_BINARY); if(fd<0) { PERRv("could not open input file: %.80s",file) return 0; } /* read the file and get the number */ { char buf[102]; // read buffer char* bufe; // logical end in buffer char* bufee; // physical end in buffer size_t rb; // number of bytes read bufe= buf; bufee= buf+ sizeof(buf)-2; buf[0]= 0; while(bufe0) // manually-defined absolute size sizeref= global_maxstrrefs*(INT64_C(1024)*1024); else if(global_maxstrrefs<0) // manually-defined relative size sizeref*= global_maxstrrefs*-INT64_C(100); if(sizeref<128) sizeref= 128; sizeref-= (sizeref+sizeof(data__strref_t)-1)% sizeof(data__strref_t); if(global_maxstrrefs!=0) PLOGv(1,"changed by --max-strrefs= to: %"PRIi64,sizeref) // allocate memory data__strref= (data__strref_t*)malloc(sizeref+4); if(data__strref==NULL) { PERRv("not enough memory for string references: %" PRIi64" MiB",sizeref/(1024*1024)) PINFO("try to decrease --max-strrefs=") return 0; } data__strrefe= data__strref; // logical end data__strrefee= (data__strref_t*)((int8_t*)data__strref+sizeref); // physical end data__strrefn= 0; // number of elements in data__strref[] } // care about string reference memory area // allocate memory for geocoordinate combinations data__coco= (data__coco_t*) malloc(sizeof(data__coco_t)*data__nodecount+4); if(data__coco==NULL) { PERR("not enough memory for geocoordinate combinations") return 0; } return data__nodecount; } // data_ini() static void data_addradd(int32_t x,int32_t y, const char* region,const char* subregion, const char* city,const char* street, const char* housenumber,int8_t weight) { // enter a new dataset for an address; // x,y: geocoordinates (unit 10^-7 degree); // region,subregion,city,street,housenumber: // parts of this address as strings; // weight: geographical extent: // binary logarithm of bounding-box width in meters; uint32_t si; // string index uint32_t hnr; // house number (numeric) if(data__addre>=data__addree) { // no free elements left PERR("not enough address memory size") PINFO("maybe file \"strings_nodecount\" is faulty.") return; } data__addre->x= x; data__addre->y= y; #define D(z) si= T(data_index,z)(z); \ if(si==0 && z!=NULL) PERRv("unknown " #z " string: %.80s",z) \ data__addre->z= si; D(region) D(subregion) D(city) D(street) D(housenumber) #undef D if(housenumber==NULL) hnr= 0; else hnr= strtouint32(housenumber); if(hnr>65535) hnr= 65535; // limit range to fit into 16 bit data__addre->housenumeric= hnr; data__addre->weight= weight; if(data__addre->street!=0 && data__addre->housenumber==0 && data__addre->city==0) // street data set is without city information data__addre->work= 1; else data__addre->work= 0; data__addre++; data__addrn++; } // data_addradd() static bool data_addrcomplete() { // complete address data and sort them; // return: success; bool r; r= true; // log some information PLOGv(1,"string reference memory used: %"PRIi64, (int64_t)(data__strrefe-data__strref)*sizeof(data__strref)) /* initialize rounded geocoordinate combination index */ { uint_fast32_t count; data__addr_t* addrp; for(count= 0,addrp= data__addr,data__cocoe= data__coco; countyx= data__coco_calc(addrp->x,addrp->y); data__cocoe->addrindex= count; } // for each address dataset // sort index by geocoordinate combination qsort(data__coco,data__addrn,sizeof(data__coco_t), data__coco_qsort); } // initialize rounded geocoordinate combination index /* add city to addresses and streets */ { // (streets will be recognized as address data which have // street name but no housenumber); uint_fast32_t count; data__addr_t* addrp,*nearest_ap,*ap; uint32_t city,street; int32_t x,y; // geocoordinates of currently examined street int32_t nearest_d,d; // distance in degrees for(count= 0,addrp= data__addr;countcity; street= addrp->street; if(addrp->city==0) { // city is unknown if(street==0) { // street is unknown if(addrp->housenumber==0 && addrp->region!=0) { // house number is unknown, but region is known // -> must be a region dataset then // copy region's name to city string const char* rs,*cs; // region string, city string uint32_t ci; // region index, city index rs= data_string_subregion(addrp->subregion); if(*rs==0) rs= data_string_region(addrp->region); ci= data_index_city(rs); if(ci==0) PWARNv("missing city string for region %.80s",rs) else { // take region string for city string; addrp->city= ci; PLOGv(2,"%.80s -> %.80s\n",cs,rs) } } // house number is unknown continue; } // street is unknown if(addrp->housenumber!=0) { // house address, not just a street uint32_t ci; // city string index ci= data_index_city(data_string_region(addrp->region)); if(ci==0) PWARNv("could not find city string for region %.80s", data_string_region(addrp->region)) else addrp->city= ci; continue; } // house address, not just a street // here: must be a street AND city is unknown x= addrp->x; y= addrp->y; nearest_ap= NULL; nearest_d= INT32_MAX; data__coco_center(x,y,INT32_C(67500)); // radius 750 m around x,y for(;;) { // for all address objects within radius ap= data__coco_next(&d); // get next point within radius if(ap==NULL) // no other point in range break; if(ap->street!= addrp->street || ap->city==0 || ap->work!=0) // different street name OR no city information OR // that address dataset represents a street too continue; if(dcity= nearest_ap->city; addrp->work= 2; } // have found a nearby object with city information else { // no nearby object with city information city= addrp->city= data_index_city(data_string_region(addrp->region)); // take region string as city string } } // city is unknown if(city!=0) { // city is known // we cannot be sure the city's name is correct // hence we need to adjust it to the region's name const char* rs,*cs; // region string, city string uint32_t ri,ci; // region index, city index int l,c; rs= data_string_region(addrp->region); cs= data_string_city(city); l= strzlcmp(rs,cs); c= rs[l]; if(l==0 || c!=0) { // not an exact match if(l>=3 && c!=0 && (c==' ' || c=='-' || c=='/' || c=='(')) { // region string is longer than city string // and fully contains the city string AND // city string is at least 3 bytes long // -> we assume the city string is incomplete, // for example "Frankfurt" instead of "Frankfurt am Main"; ci= data_index_city(rs); if(ci==0) PWARNv("missing city string for region %.80s", data_string_region(addrp->region)) else { // take region string for city string; addrp->city= ci; PLOGv(2,"%.80s -> %.80s\n",cs,rs) } } // region string is longer than city string ... else { // city string does not partially match region string ri= data_index_region(cs); if(ri==0) { // city string does not match any region string ci= data_index_city(rs); if(ci==0) PWARNv("missing city string for region %.80s", data_string_region(addrp->region)) else { // take region string for city string; addrp->city= ci; PLOGv(2,"%.80s -> %.80s\n",cs,rs) } } // city string does not match any region string else { // city string matches another region string rs= data_string_region(ri); ci= data_index_city(rs); if(ci==0) PWARNv("missing city string for region %.80s", data_string_region(addrp->region)) else { // take the other region string for city string; addrp->city= ci; PLOGv(2,"%.80s -> %.80s\n",cs,rs) } } // city string matches another region string } // city string does not partially match region string } // not an exact match } // city is known } // for each address dataset } // add city to addresses and streets /* add subregion information */ { data__addr_t* addrp; uint32_t si,ci; // index of subregion and city const char* ss,*cs; // string of subregion and city int l; for(addrp= data__addr+1;addrpsubregion; if(si!=0) { // there is a subregion ci= addrp->city; if(ci!=0) { // there is a city ss= data_string_subregion(si); cs= data_string_city(ci); l= strzlcmp(ss,cs); if(l>=3) { // contents of subregion string starts // with city string AND // city string is at least 3 bytes long ci= data_index_city(ss); if(ci==0) PWARNv("missing city string for subregion %.80s",ss) else { // take subregion string as city string; addrp->city= ci; PLOGv(2,"%.80s -> sub %.80s\n",cs,ss) } } // contents of subregion string starts ... } // there is a city } // there is a subregion } // for each address dataset } /* order address data by city name, street name, etc. */ { qsort(data__addr,data__addrn,sizeof(data__addr[0]), data__addr_qsort); } // remove address dataset duplicates r= r && data__addrclean(); /* add used-by references */ { uint_fast32_t addri; data__addr_t* addrp; uint32_t si; // string index for(addri= 1,addrp= data__addr+1;addriz; \ if(si!=0) T(data__addused,z)(si,addri); D(region) D(subregion) D(city) D(street) D(housenumber) #undef D } // for each address dataset } // add used-by references // remove unused strings r= r && data__strclean_region(); r= r && data__strclean_subregion(); r= r && data__strclean_city(); r= r && data__strclean_street(); r= r && data__strclean_housenumber(); return r; } // data_addrcomplete() static bool data_write(const char* file) { // write address data and string data to output stream; // file: name of the file to write; // NULL: standard output; // return: success; // uses procedures from module wrs_; // // data format .ogb osmgeobase // // (uint8_t) 0xfb - start of file // // (uint8_t) 0xe0 - file format section // (int64_t) 15 - length of section contents // (char[14+1]) "osmgeobase0000" - file format name // // (uint8_t) 0x40 - address data section // (int64_t) - length of section contents // (data__addr_t) - first address dataset // (see definition of data__addr) // first dataset is a dummy for 'invalid address' // (data__addr_t) - second address dataset // (data__addr_t) - ... // (data__addt_t) - last address dataset // // (uint8_t) 0x51 - "region" statistics section // (int64_t) 5*8 - length of section contents // (int64_t) - number of strings // (int64_t) - sum of lengths of all strings (without term. zeros) // (int64_t) - length of longest string (without terminating zeros); // usually 40 because of limitation in data__str_t; // (int64_t) - total number of all used-references // (int64_t) - maximum number of used-references for a single string // // (uint8_t) 0x61 - "region" strings section // (int64_t) - length of section contents // (char[l+1]) - first string (zero-terminated); // usually "" for 'no information'; // (char[l+1]) - second string (zero-terminated) // (char[l+1]) - ... // (char[l+1]) - last string (zero-terminated) // // (uint8_t) 0x71 - "region" used-references section // (int64_t) - length of section contents // (uint32_t) - number of references for first string // (uint32_t[]) - references for first string // (uint32_t) 0 - references' terminator for first string // (uint32_t) - number of references for second string // (uint32_t[]) - references for second string // (uint32_t) 0 - references' terminator for second string // (uint32_t) - ... // (uint32_t[]) - ... // (uint32_t) - number of references for last string // (uint32_t[]) - references for last string // (uint32_t) 0 - references' terminator for last string // // (uint8_t) 0x52 - "subregion" statistics section // ... // (uint8_t) 0x62 - "subregion" strings section // ... // (uint8_t) 0x72 - "subregion" used-references section // ... // // (uint8_t) 0x53 - "city" statistics section // ... ... // // (uint8_t) 0x54 - "street" statistics section // ... ... // // (uint8_t) 0x55 - "housenumber" statistics section // ... ... // // all numbers are stored in little-endian order if(!wrs_open(file)) return false; // write file header wrs_char(0xfb); // file header byte wrs_char(0xe0); // file format section id wrs_int64(15); // length of this section wrs_strz("osmgeobase0000",80); // section contents /* write address data */ { int64_t len; wrs_char(0x40); // address data section id len= (uint8_t*)data__addre-(uint8_t*)data__addr; wrs_int64(len); // address data length wrs_mem(data__addr,len); } // write address data /* write string data */ { data__write_region(); data__write_subregion(); data__write_city(); data__write_street(); data__write_housenumber(); } // write address data // write eof mark wrs_char(0xfe); return true; } // data_write() #endif // TEMPLATEONCE static bool T(data_ini,I)(const char* file,int strtypeid) { // initialization of this instance; // data_ini() must have been called before; // file: file with zero-terminated strings; // strtypeid: data type identifier; // for region: 1, subregion: 2, city: 3, etc.; // return: success; // reads the string file and stores it in main memory; #define data__ini_bufM 2000000 // size of buffer uint_fast32_t strn; // number of strings static data__str_t* stree; // physical end in data__str_I int fd; // file descriptor atexit(T(data__end,I)); T(data__strtypeid,I)= strtypeid; // open the string file fd= open(file,O_RDONLY|O_BINARY); if(fd<0) { PERRv("could not open input file: %.80s",file) return false; } /* count all strings of the file */ { char* buf; // read buffer char* bufp; // read pointer in buffer char* bufe; // logical end in buffer char* bufee; // physical end in buffer size_t rb; // number of bytes read buf= (char*)malloc(data__ini_bufM+4); if(buf==NULL) { PERRv("not enough memory for analyzing file %.80s",file) return false; } strn= 1; // start with 1 because first entry is a dummy // to enable zero-index entries for "unknown value"; bufe= buf; bufee= buf+ data__ini_bufM; for(;;) { // for all strings in file rb= read(fd,buf,bufee-buf); if(rb<=0) break; bufe= buf+rb; bufp= buf; while(bufp=stree) { PERRv("string memory overflow: %.80s",file) FREE(buf) close(fd); return false; } i= data__strstringM; // maximum string length sp= T(data__stre,I)->string; while(*bufp!=0 && i>0) { *sp++= *bufp++; // copy string byte by byte i--; } while(i>=0) { *sp++= 0; // fill remaining string space with zeros i--; } while(*bufp!=0) bufp++; // ignore remaining characters of very long strings T(data__stre,I)->usedn= 0; strread++; T(data__stre,I)++; if(T(data__stre,I)>=stree) // end of string space break; bufp++; // jump over string terminator if(*bufp==0) // there is a second terminator => end of file break; } // for all strings in file FREE(buf) if(strread==0 && T(data__strtypeid,I)!=2 || 1+strread!=strn) { // (did not read any string AND not instance 'subregion') // OR did not read the expected number of strings PWARNv("could not completely read file %.80s",file) PINFOv("read %"PRIuFAST32" strings of %"PRIuFAST32".", strread,strn); close(fd); return false; } } // read all strings of the file // close the string file close(fd); T(data__strn,I)= strn; return true; } // data_ini_I() static inline const char* T(data_string,I)(uint32_t index) { // retrieve the string to a given index; return T(data__str,I)[index].string; } // data_string_I() static inline uint32_t T(data_index,I)(const char* string) { // determine the index to a given string; // string[]: string the index is to be determined for; // NULL: will return 0; // 0: there is no such string in string memory; int_fast32_t i,i1,i2; // iteration indexes: // middle, lower including, upper excluding; int cv; // compare value if(string==NULL) return 0; i1= 0; i2= T(data__strn,I); while(i1