#define PROGRAM "osmposition" #define VERSION "0.0.0" // 2016-03-30 05:20 // // compile this file: // gcc osmposition.c -O3 -o osmposition // // tests: // gcc osmposition.c -g -o osmposition // ./osmposition adr.ogb // // (c) 2016 Markus Weber, Nuernberg // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU Affero General Public License // version 3 as published by the Free Software Foundation. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // You should have received a copy of this license along // with this program; if not, see http://www.gnu.org/licenses/. // // Other licenses are available on request; please ask the author. //------------------------------------------------------------ // What this Program does //------------------------------------------------------------ // // This program provides a geocoding service based on OSM address // data which must have been generated by osmgeobase program. // Addresses to code are read from standard input; their // calculated geopositions will be written to standard output. // Examples ("<" means input, ">" means output): // < Bahnhofstraße, Nürnberg // > 1 Bahnhofstraße, Nürnberg [11.0873148,49.4472729,8,Nürnberg] // Input syntax: // Nürnberg (just the city name) // Bahnhofstraße, Nürnberg (street name, city name) // Bahnhofstraße 5, Nürnberg (street name [space] number, city name) #ifndef I // from here: main program //#define STATISTICS #define MAXLOGLEVEL 1 // keep at 1, unless you want do debug const char* shorthelptext= "\n" PROGRAM " " VERSION " Parameter Overview\n" "(Please use --help to get more information.)\n" "\n" " OSM geobase file name (.ogb)\n" "--random-address= write addresses to stdout and terminate\n" "-h display this parameter overview\n" "--help display a more detailed help\n" "--parameter-file= param. in file, separated by empty lines\n" "--verbose activate verbose mode\n"; const char* helptext= "\n" PROGRAM " " VERSION "\n" "\n" "This program provides a geocoding service based on OSM address\n" "data which must have been generated by osmgeobase program.\n" "Addresses to code are read from standard input; their\n" "calculated geopositions will be written to standard output.\n" "Examples (\"<\" means input, \">\" means output):\n" " < Bahnhofstraße, Nürnberg\n" " > 1 Bahnhofstraße, Nürnberg [11.0873148,49.4472729,8,Nürnberg]\n" "\n" "--random-address=\n" " This option will have the program to write randomly selected\n" " addresses to standard output. The value stands for the\n" " number of addresses to be written.\n" "\n" "-h\n" " Display a short parameter overview.\n" "\n" "--help\n" " Display this help.\n" "\n" "--parameter-file=FILE\n" " If you want to supply one ore more command line arguments\n" " by a parameter file, please use this option and specify the\n" " file name. Within the parameter file, parameters must be\n" " separated by empty lines. Line feeds inside a parameter will\n" " be converted to spaces.\n" " Lines starting with \"// \" will be treated as comments.\n" "\n" "-v\n" "--verbose\n" " With activated \'verbose\' mode, some statistical data and\n" " diagnosis data will be displayed.\n" " If -v resp. --verbose is the first parameter in the line,\n" " " PROGRAM " will display all input parameters.\n" "\n" "Example\n" "\n" "./" PROGRAM " adr.ogb <<<\"Rathausplatz, Hamburg\"\n" "\n" "Limitations\n" "\n" "This program is for experimental use. Expect malfunctions and data\n" "loss. Do not use the program in productive or commercial systems.\n" "\n" "There is NO WARRANTY, to the extent permitted by law.\n" "Please send any bug reports to marqqs@gmx.eu\n\n"; #define _FILE_OFFSET_BITS 64 #include #include #include #include #include #include #include #include #include #include #include typedef enum {false= 0,true= 1} bool; #define isdig(x) isdigit((unsigned char)(x)) static int loglevel= 0; // logging to stderr; // 0: no logging; 1: small logging; 2: normal logging; // 3: extended logging; #define DP(f) fprintf(stderr,"Debug: " #f "\n"); #define DPv(f,...) fprintf(stderr,"Debug: " #f "\n",__VA_ARGS__); #define DPM(f,p,m) { byte* pp; int i,mm; static int msgn= 3; \ if(--msgn>=0) { fprintf(stderr,"Debug memory: " #f); \ pp= (byte*)(p); mm= (m); if(pp==NULL) fprintf(stderr,"\n (null)"); \ else for(i= 0; i=0) \ fprintf(stderr,PROGRAM " Error: " f "\n"); } // print error message #define PERRv(f,...) { static int msgn= 3; if(--msgn>=0) \ fprintf(stderr,PROGRAM " Error: " f "\n",__VA_ARGS__); } // print error message with value(s) #define PWARN(f) { static int msgn= 3; if(--msgn>=0) \ fprintf(stderr,PROGRAM " Warning: " f "\n"); } // print a warning message, do it maximal 3 times #define PWARNv(f,...) { static int msgn= 3; if(--msgn>=0) \ fprintf(stderr,PROGRAM " Warning: " f "\n",__VA_ARGS__); } // print a warning message with value(s), do it maximal 3 times #define PINFO(f) \ fprintf(stderr,PROGRAM ": " f "\n"); // print info message #define PINFOv(f,...) \ fprintf(stderr,PROGRAM ": " f "\n",__VA_ARGS__); #define PLOG(ll,f,...) { \ if(ll<=MAXLOGLEVEL && loglevel>=ll) PINFO(f) } #define PLOGv(ll,f,...) { \ if(ll<=MAXLOGLEVEL && loglevel>=ll) PINFOv(f,__VA_ARGS__) } #define FREE(x) if(x!=NULL){free(x);x= NULL;} #define ONAME(i) \ (i==0? "node": i==1? "way": i==2? "relation": "unknown object") #define global_fileM 1002 // maximum number of input files //------------------------------------------------------------ // end Module Global global variables for this program //------------------------------------------------------------ #define COORDINATE_NIL INT32_C(2000000000) // nil value for geocoordinates which are stored as // 10^-7 fixpoint numbers in int32_t static uint32_t strtouint32(const char* s) { // read a number and convert it to an unsigned 32-bit integer; // return: number; int32_t i; uint8_t b; i= 0; for(;;) { b= (uint8_t)(*s++ -'0'); if(b>=10) break; i= i*10+b; } return i; } // strtouint32() static inline int32_t strtoint32(const char* s) { // read a number and convert it to a signed 32-bit integer; // return: number; int sign; int i; uint8_t b; if(*s=='-') { s++; sign= -1; } else sign= 1; i= 0; for(;;) { b= (uint8_t)(*s++ -'0'); if(b>=10) break; i= i*10+b; } return i*sign; } // strtoint32() static inline int64_t strtosint64(const char* s) { // read a number and convert it to a signed 64-bit integer; // return: number; int sign; int64_t i; uint8_t b; if(*s=='-') { s++; sign= -1; } else sign= 1; i= 0; for(;;) { b= (uint8_t)(*s++ -'0'); if(b>=10) break; i= i*10+b; } return i*sign; } // strtosint64() static int32_t strtoint32percent(const char* s) { // read an unsigned number and convert it to a signed 32-bit // integer; the result will be a positive number unless the // character string is followed by a percent sign; // this trailing percent sign will negate the value; // return: number; int32_t i; uint8_t b; if(*s=='-') s++; // ignore leading minus sign i= 0; for(;;) { b= (uint8_t)(*s-'0'); if(b>=10) break; i= i*10+b; s++; } if(*s==' ') s++; // jump over space sign if(*s=='%') return -i; return i; } // strtoint32percent() static inline char* uint32toa(uint32_t v,char* s) { // convert uint32_t integer into string; // v: long integer value to convert; // return: s; // s[]: digit string; char* s1,*s2; char c; s1= s; if(v==0) *s1++= '0'; s2= s1; while(v>0) { *s2++= "0123456789"[v%10]; v/= 10; } *s2--= 0; while(s2>s1) { c= *s1; *s1= *s2; *s2= c; s1++; s2--; } return s; } // end uint32toa() static inline char* int64toa(int64_t v,char* s) { // convert int64_t integer into string; // v: long integer value to convert; // return: s; // s[21]: digit string; char* s1,*s2; char c; s1= s; if(v<0) { *s1++= '-'; v= -v; } else if(v==0) *s1++= '0'; s2= s1; while(v>0) { *s2++= "0123456789"[v%10]; v/= 10; } *s2--= 0; while(s2>s1) { c= *s1; *s1= *s2; *s2= c; s1++; s2--; } return s; } // end int64toa() static inline char* int32fix7topa(int32_t v,char* s) { // convert a signed 7 decimals fixpoint value into a string; // v: fixpoint value // return: pointer to string terminator; // s[13]: destination string; char* s1,*s2,*sterm,c; int i; s1= s; if(v<0) { *s1++= '-'; v= -v; } s2= s1; i= 7; while(--i>=0) { *s2++= (v%10)+'0'; v/= 10; } *s2++= '.'; do { *s2++= (v%10)+'0'; v/= 10; } while(v>0); sterm= s2; *s2--= 0; while(s2>s1) { c= *s1; *s1= *s2; *s2= c; s1++; s2--; } return sterm; } // end int32fix7topa() static inline char *stpcpy0(char *dest, const char *src) { // redefinition of C99's stpcpy() because it's missing in MinGW, // and declaration in Linux seems to be wrong; while(*src!=0) *dest++= *src++; *dest= 0; return dest; } // end stpcpy0() static inline int strblankcmp(const char* s1,const char* s2) { // similar to strcmp(), this procedure compares two character strings; // here, possibly included blank characters are ignored; // example: "abc d ef" and " ab cdef " are treated as same; // s1[],s2[]: string to compare against each other; // return: // 0: both strings are similar; // <0: the first string is alphabetical smaller than the second; // >0: the first string is alphabetical greater than the second; for(;;) { while(*s1==' ') s1++; // skip blanks while(*s2==' ') s2++; // skip blanks if(*s1==0 || *s1!=*s2) return *(unsigned char*)s1 - *(unsigned char*)s2; s1++; s2++; } } // end strblankcmp() static inline char *strmcpy(char *dest, const char *src, size_t maxlen) { // similar to strcpy(), this procedure copies a character string; // here, the length is cared about, i.e. the target string will // be limited in case it is too long; // src[]: source string which is to be copied; // maxlen: maximum length of the destination string // (including terminator null); // return: // dest[]: destination string of the copy; this is the // function's return value too; char* d; if(maxlen==0) return dest; d= dest; while(--maxlen>0 && *src!=0) *d++= *src++; *d= 0; return dest; } // end strmcpy() #define strMcpy(d,s) strmcpy((d),(s),sizeof(d)) static inline char *stpmcpy(char *dest, const char *src, size_t maxlen) { // similar to strmcpy(), this procedure copies a character string; // however, it returns the address of the destination string's // terminating zero character; // this makes it easier to concatenate strings; char* d; if(maxlen==0) return dest; d= dest; while(--maxlen>0 && *src!=0) *d++= *src++; *d= 0; return d; } // end stpmcpy() #define stpMcpy(d,s) stpmcpy(d,s,sizeof(d)) static inline int strzcmp(const char* s1,const char* s2) { // similar to strcmp(), this procedure compares two character strings; // here, the number of characters which are to be compared is limited // to the length of the second string; // i.e., this procedure can be used to identify a short string s2 // within a long string s1; // s1[]: first string; // s2[]: string to compare with the first string; // return: // 0: both strings are identical; the first string may be longer than // the second; // -1: the first string is alphabetical smaller than the second; // 1: the first string is alphabetical greater than the second; while(*s1==*s2 && *s1!=0) { s1++; s2++; } if(*s2==0) return 0; return *(unsigned char*)s1 < *(unsigned char*)s2? -1: 1; } // end strzcmp() static inline int strzlcmp(const char* s1,const char* s2) { // similar to strzcmp(), this procedure compares two character strings; // and accepts the first string to be longer than the second; // other than strzcmp(), this procedure returns the length of s2[] in // case both string contents are identical, and returns 0 otherwise; // s1[]: first string; // s2[]: string to compare with the first string; // return: // >0: both strings are identical, the length of the second string is // returned; the first string may be longer than the second; // 0: the string contents are not identical; const char* s2a; s2a= s2; while(*s1==*s2 && *s1!=0) { s1++; s2++; } if(*s2==0) return s2-s2a; return 0; } // end strzlcmp() static inline int strycmp(const char* s1,const char* s2) { // similar to strcmp(), this procedure compares two character strings; // here, both strings are end-aligned; // not more characters will be compared than are existing in string s2; // i.e., this procedure can be used to identify a file name extension; const char* s1e; int l; l= strchr(s2,0)-s2; s1e= strchr(s1,0); if(s1e-s1900) lat= 900; // set maximum of 90 degree return ((uint64_t)cosrtab[lat]*(int64_t)londiff)/INT64_C(0x100000000); } // lonadapt() // the table in the previous procedure has been generated by this // program: #if 0 // file cosr.c, run it with: gcc cosr.c -lm -o cosr && ./cosr #include #include #include int main() { int i; printf(" static const uint32_t cosrtab[901]= " "{\n UINT32_C(4294967295),"); for(i= 1;i<900;i++) { if(i%3==0) printf("\n "); printf("UINT32_C(%"PRIu32"),",(uint32_t)( cos(i/1800.0*3.14159265359) * INT64_C(0x100000000) )); } printf("\n 0"); printf(" }; // cosr values for 10th degrees from 0 to 90\n"); return 0; } #endif static int32_t geodistance(int32_t x1,int32_t y1, int32_t x2,int32_t y2) { // approximates the geodistance between two points; // x1,y1: geocoordinates of first point; // x2,y2: geocoordinates of second point; // return: distance as angle; // all units in 100 nanodegrees; // how this is done: // distances in West-East direction and in South-North direction // are compared; the longer shorter distance is divided by 3 and // added to the value of the longer distance; // => all points on the edges of an octagon around point 1 // are interpreted as equidistant; // this approximation is close enough for this application; int32_t xdist,ydist; xdist= x2-x1; if(xdist<0) xdist= -xdist; ydist= y2-y1; if(ydist<0) ydist= -ydist; xdist= lonadapt(xdist,y1); if(xdist0) { // still bytes to read rb= read(rf__fd,bp,n); if(rb<=0) { return false; PERRv("read error %"PRIi64" at file %.80s", (int64_t)rb,rf__file) } bp= (uint8_t*)bp+rb; n-= rb; } // still bytes to read return true; } // end rf_read() //------------------------------------------------------------ // end Module rf_ read file module //------------------------------------------------------------ // "I" represents the name for the instances: // region, subregion, city, street, housenumber; #define I region #include __FILE__ #undef I #define I subregion #include __FILE__ #undef I #define I city #include __FILE__ #undef I #define I street #include __FILE__ #undef I #define I housenumber #include __FILE__ #undef I //------------------------------------------------------------ // Module geoc_ geocoding module //------------------------------------------------------------ // this module provides procedures for geocoding; // as usual, all identifiers of a module have the same prefix, // in this case 'geoc'; one underline will follow in case of a // global accessible object, two underlines in case of objects // which are not meant to be accessed from outside this module; // the sections of private and public definitions are separated // by a horizontal line: ---- static int geoc__uint64_qsort(const void* a,const void* b) { // uint64_t value comparison for qsort() uint64_t aval,bval; aval= *(uint64_t*)a; bval= *(uint64_t*)b; if(aval>bval) return 1; if(aval==bval) return 0; return -1; } // geoc__uint64_qsort() static void geoc__end() { // terminate the services of this module; // (nothing to do at present) } // geoc__end() //------------------------------------------------------------ static bool geoc_main() { // perform geocoding; // return: 0: success; !=0: error number; // three different syntax types will be recognized: // 1. // 2. , // 3. , // or: , , // types 2 and 3 are currently under construction; // the command "map" (or just "m") will display a map extract // positioned and zoomed to the previous search result char t0[300]; // input line char t1[300]; // copy of input line char* tp; // pointer in t[] char* cp,*sp,*hp; // input strings for city, street and housenumber uint32_t ci,si; // indexes for city and street uint32_t cn,sn; // number of matching strings uint32_t ai; // address dataset index uint32_t aifirst,ailast; // first and last address dataset index uint32_t* up; // pointer to used-by references #define cresultM 500 // maximum number of results for city string #define sresultM 500 // maximum number of results for street string #define resultM 25 // maximum number of results uint32_t result[resultM]; // address dataset indexes in result list int resultn; // number of results int i; atexit(geoc__end); resultn= 0; printf("osmposition ready for input. Terminate with Ctrl-D.\n"); for(;;) { // for all lines from standard input // read input, clip CR and LF t0[0]= 0; tp= fgets(t0,sizeof(t0)-2,stdin); if(tp==NULL) // EOF break; tp= t0; while(*tp!=0 && *tp!='\r' && *tp!='\n') tp++; *tp= 0; if(t0[0]==0) // end of input continue; // check for result selection if(isdig(t0[0]) && t0[1]==0 || isdig(t0[0]) && isdig(t0[1]) && t0[2]==0) { // user has selected a result from the list i= t0[0]-'0'; if(isdig(t0[1])) i= i*10+t0[1]-'0'; i--; if(i<0 || i>= resultn) { printf(" No valid result.\n"); continue; } ai= result[i]; /* open map in browser */ { int32_t x,y; const char* region,*subregion,*city,*street,*housenumber; uint8_t weight; char command[500]; char xs[15],ys[15]; int w; dat_getaddr(ai,&x,&y, ®ion,&subregion,&city,&street,&housenumber, NULL,&weight); int32fix7topa(x,xs); int32fix7topa(y,ys); w= 27-weight; if(w<1) w= 1; if(w>19) w= 19; printf(" (opening map position in browser)\n"); sprintf(command,"firefox http://www.openstreetmap.org/" "?mlat=%s\\&mlon=%s#map=%i/%s/%s &", ys,xs,w,ys,xs); system(command); } // open map in browser continue; } // user has selected a result from the list // make a copy of the input line strMcpy(t1,t0); // (needed for error messages only) // parse input line tp= strchr(t0,','); if(tp==NULL) { // there is no comma // must be syntax type 1 cp= t0; sp= hp= NULL; } // there is no comma else { // there is at least one comma sp= t0; // start of street string hp= tp+1; while(tp>sp && tp[-1]==' ') tp--; *tp= 0; // terminate street name string tp= strchr(hp,','); if(tp!=NULL) { // there is a second comma cp= tp+1; while(tp>hp && tp[-1]==' ') tp--; *tp= 0; // terminate house number string if(hp[0]==0) hp= NULL; } // there is a second comma else { // there is no second comma cp= hp; // city name starts where we assumed the // house number to start // search the house number within street name string: // must be the last number which is preceded by a blank; hp= NULL; // (default) tp= sp; while(*tp!=0) { if(tp[0]==' ' && isdig(tp[1])) hp= tp; tp++; } if(hp!=NULL) // found a house number *hp++= 0; // terminate street name string } // there is no second comma while(sp[0]==' ') sp++; if(sp[0]==0) sp= NULL; if(hp!=NULL) { while(hp[0]==' ') hp++; if(hp[0]==0) hp= NULL; } } // there is at least one comma while(cp[0]==' ' || isdig(cp[0])) cp++; // remove trailing spaces and numbers from city name if(cp[0]==0) cp= NULL; if(hp!=NULL) { // there is a housenumber tp= strstr(hp,"//"); if(tp!=NULL && tp>hp) { // there is a delimiter for additional remarks // (e.g. floor, appartment number) *tp= 0; // delete it while(tp>hp && tp[-1]==' ') *--tp= 0; // remove trailing spaces } } // there is a housenumber // initialize result seach resultn= 0; // invalidate result list // plausibility check if(cp==NULL && sp==NULL) { printf(" Invalid address format.\n"); continue; } if(hp!=NULL && sp==NULL) { printf( " Ignoring house number %.80s since street is unknown.\n",hp); hp= NULL; } // handle subdivided cities (add wildcards) if(sp!=NULL) { if(strcmp(cp,"Berlin")==0) { cp[6]= '*'; cp[7]= 0; } else if(strcmp(cp,"Köln")==0) { cp[5]= '*'; cp[6]= 0; } } // determine range for city index dat_indexrange_city(cp); if(dat_indexrangefirst_city==0) { printf(" No result for city %.80s.\n",cp); continue; } cn= dat_indexrangelast_city-dat_indexrangefirst_city+1; if(cn>cresultM) { printf(" Too many results for city %.80s: %"PRIu32".\n", cp,cn); continue; } // determine first and last address dataset index to // city index range aifirst= dat_usedfirst_city(dat_indexrangefirst_city); ailast= dat_usedlast_city(dat_indexrangelast_city); // determine range for street index if(sp==NULL) // there is no street string sn= 0; else { // there is a street string dat_indexrange_street(sp); if(dat_indexrangefirst_street==0) { printf(" No result for street %.80s.\n",sp); continue; } sn= dat_indexrangelast_street-dat_indexrangefirst_street+1; if(sn>sresultM) { printf(" Too many result for street %.80s: %"PRIu32".\n", sp,sn); continue; } } // there is a street string // determine city indexes for each street index if(sp==NULL) { // no street name in input line if(dat_usedfirst_city!=0) { // found at least one city // enter main address datasets for the city/cities // into result list for(ci= dat_indexrangefirst_city;ci<=dat_indexrangelast_city; ci++) { // for each city match ai= dat_usedfirst_city(ci); if(ai!=0) { // address dataset index is valid // enter the index of that address dataset into list if(resultn>=resultM) { // no more space in result list resultn= resultM+1; // mark that the limit has been exceded break; } // no more space in result list result[resultn++]= ai; } // address dataset index is valid } // for each city match } // found at least one city } // no street name in input line else { // street name in input line for(si= dat_indexrangefirst_street; si<=dat_indexrangelast_street && resultn<=resultM; si++) { // for each street index up= dat_usedafter_street(si,aifirst); for(;;) { // for each reference within the range of selected cities ai= *up; if(ai==0 || ai>ailast) break; // we have found a valid city/street combination // -> enter the index of that address dataset into list if(resultn>=resultM) { // no more space in result list resultn= resultM+1; // mark that the limit has been exceeded break; } result[resultn++]= ai; ci= dat_getaddrcityindex(ai)+1; if(!dat_indexvalid_city(ci)) // index out of bounds break; up= dat_usedafter_street(si, dat_usedfirst_city(ci)); } // for each reference within the range of selected cities } // for each street index } // street name in input line // check if there are valid results if(resultn==0) { printf(" No search result for address: %.80s\n",t1); resultn= 0; continue; } if(resultn>resultM) { printf(" Too many valid results.\n"); resultn= 0; continue; } // care about housenumber for(i= 0;i=2) { // there are at least 2 results uint64_t cowa[resultM]; // combining weight and address index to one sorting criterion uint32_t ai; // address index uint64_t co; // assemble combined values for(i= 0;i // 2. , // 3. , // or: , , // types 2 and 3 are currently under construction; // the command "map" (or just "m") will display a map extract // positioned and zoomed to the previous search result int64_t addrn; // number of address datasets addrn= dat_addrn()-1; if(addrn>RAND_MAX/4) PWARN("Constant RAND_MAX too small for so many addresses.") srandom((unsigned int)time(NULL)); while(--global_randomn>=0) { // for each address dataset to be written const char* city,*street,*housenumber; dat_getaddr(random()%addrn+1,NULL,NULL, NULL,NULL,&city,&street,&housenumber,NULL,NULL); if(city[0]==0) continue; if(street[0]==0) printf("%.80s\n",city); else if(housenumber[0]==0) printf("%.80s, %.80s\n",street,city); else printf("%.80s %.80s, %.80s\n",street,housenumber,city); } // for each address dataset to be written return true; } // geoc_random() static bool geoc_debug() { // this procedure is for debugging purpose only; // return: 0: success; !=0: error number; #if 0 //### uint32_t ai,ci,si; // indexes uint32_t uf,ul; // used first, used last uint32_t* up; // used-pointer uint32_t an; // number of address datasets an= (uint32_t)dat_addrn(); uf= dat_usedfirst_street(446067); ul= dat_usedlast_street(446067); printf("Found: %"PRIu32" %"PRIu32" \n",uf,ul); up= dat_usedafter_street(446067,0); printf("up: %"PRIu32" %"PRIu32" \n",*up,up[1]); uf= dat_usedfirst_city(5386); ul= dat_usedlast_city(5386); printf("Found: %"PRIu32" %"PRIu32" \n",uf,ul); dat_indexrange_city("Kürten"); printf("Found: %"PRIu32" %"PRIu32" \n", dat_indexrangefirst_city,dat_indexrangelast_city); dat_indexrange_street("Zur Kiefer"); printf("Found: %"PRIu32" %"PRIu32" \n", dat_indexrangefirst_street,dat_indexrangelast_street); for(ai= 1;ai0) { // for every parameter in command line if(parafile!=NULL) do { // there are parameters waiting in a parameter file ap= aa; for(;;) { aamax= main__aaM-1-(ap-aa); if(fgets(ap,aamax,parafile)==NULL) { if(ap>aa) { if(ap>aa && ap[-1]==' ') *--ap= 0; // cut one trailing space break; } goto parafileend; } if(strzcmp(ap,"// ")==0) continue; if(ap>aa && (*ap=='\r' || *ap=='\n' || *ap==0)) { // end of this parameter while(ap>aa && (ap[-1]=='\r' || ap[-1]=='\n')) *--ap= 0; // eliminate trailing NL if(ap>aa && ap[-1]==' ') *--ap= 0; // cut one trailing space break; } ap= strchr(ap,0); // find end of string while(ap>aa && (ap[-1]=='\r' || ap[-1]=='\n')) *--ap= 0; // cut newline chars *ap++= ' '; *ap= 0; // add a space } a= aa; while(*a!=0 && strchr(" \t\r\n",*a)!=NULL) a++; if(*a!=0) break; parafileend: fclose(parafile); parafile= NULL; free(aa); aa= NULL; } while(false); if(parafile==NULL) { if(--argc<=0) break; argv++; // switch to next parameter; as the first one is just // the program name, we must do this previous reading the // first 'real' parameter; a= argv[0]; } if((l= strzlcmp(a,"--parameter-file="))>0 && a[l]!=0) { // parameter file parafile= fopen(a+l,"r"); if(parafile==NULL) { PERRv("Cannot open parameter file: %.80s",a+l) perror(PROGRAM); return 1; } aa= (char*)malloc(main__aaM); if(aa==NULL) { PERR("Cannot get memory for parameter file.") fclose(parafile); parafile= NULL; return 1; } aa[0]= 0; continue; // take next parameter } if((l= strzlcmp(a,"--random-address="))>0 && a[l]!=0) { // define number of addresses to be selected randomly global_randomn= strtosint64(a+l); if(global_randomn<1) global_randomn= 1; continue; // take next parameter } if((l= strzlcmp(a,"--debug"))>0) { // do debugging if(a[l]=='=' && a[l+1]!=0) global_debug= strtosint64(a+l+1); else global_debug= 1; continue; // take next parameter } if(loglevel>0) // verbose mode PINFOv("Parameter %.2000s",a) if(strcmp(a,"-h")==0) { // user wants parameter overview fprintf(stdout,"%s",shorthelptext); // print brief help text // (took "%s", to prevent oversensitive compiler reactions) return 0; } if(strcmp(a,"-help")==0 || strcmp(a,"--help")==0) { // user wants help text fprintf(stdout,"%s",helptext); // print help text // (took "%s", to prevent oversensitive compiler reactions) return 0; } if((strcmp(a,"-v")==0 || strcmp(a,"--verbose")==0 || strzcmp(a,"-v=")==0 || strzcmp(a,"--verbose=")==0) && loglevel==0) { // test mode - if not given already char* sp; sp= strchr(a,'='); if(sp!=NULL) loglevel= sp[1]-'0'; else loglevel= 1; if(loglevel<1) loglevel= 1; if(loglevel>MAXLOGLEVEL) loglevel= MAXLOGLEVEL; if(a[1]=='-') { // must be "--verbose" and not "-v" if(loglevel==1) PINFO("Verbose mode.") else PINFOv("Verbose mode %i.",loglevel) } continue; // take next parameter } if(a[0]=='-') { PERRv("unrecognized option: %.80s",a) return 1; } // here: parameter must be a file name inputfilename= a; } // for every parameter in command line // initialization r= dat_ini(inputfilename); // do the work if(global_randomn>0) // write addresses selected randomly r= r && geoc_random(); else if(global_debug!=0) // user wants debugging r= r && geoc_debug(); else r= r && geoc_main(); return r? 0: 1; } // main() //------------------------------------------------------------ // Module dat_ geo data module //------------------------------------------------------------ #else // from here: procedure templates #define TEMPLATEM(f,a) f##_##a #define T(f,a) TEMPLATEM(f,a) #undef TEMPLATEONCE #ifndef TEMPLATEFIRSTRUN #define TEMPLATEFIRSTRUN #define TEMPLATEONCE // this module provides procedures for geocoding; // as usual, all identifiers of a module have the same prefix, // in this case 'dat'; one underline will follow in case of a // global accessible object, two underlines in case of objects // which are not meant to be accessed from outside this module; // container for address data struct dat__addr_struct { // (length 32 Bytes) int32_t x,y; // geocoordinates (unit 10^-7 degree) uint32_t region; // index of region string uint32_t subregion; // index of subregion string uint32_t city; // index of city string uint32_t street; // index of street string uint32_t housenumber; // index of housenumber string uint16_t housenumeric; // numeric representation of housenumber int8_t weight; // geographical extend: // binary logarithm of bounding-box width (meters) // as provided by 'osmconvert --add-bboxwidthweight'; uint8_t work; // temporary variable, for internal use; // 0: default; 1: street without city; 2: street was without city; } __attribute__((__packed__)); typedef struct dat__addr_struct dat__addr_t; static dat__addr_t* dat__addr= NULL; // address memory area static dat__addr_t* dat__addre= NULL; // logical end static uint_fast32_t dat__addrn= 0; // number of address objects static size_t dat__strlenmax= 1; // maximum string length // container for string indexes struct dat__idx_struct { const char* string; // pointer to start of string (zero-terminated) uint32_t* usedp; // pointer to used-by information; // it starts with the number of references and continues with // the references themselves } __attribute__((__packed__)); // procedure prototypes static bool dat__ini_region(int); static bool dat__ini_subregion(int); static bool dat__ini_city(int); static bool dat__ini_street(int); static bool dat__ini_housenumber(int); static void geoc_statistics(const char*); static inline const char* dat_string_region(uint32_t ); static inline const char* dat_string_subregion(uint32_t ); static inline const char* dat_string_city(uint32_t ); static inline const char* dat_string_street(uint32_t ); static inline const char* dat_string_housenumber(uint32_t ); #ifdef STATISTICS static int64_t dat__sum= 0; static int64_t dat__sumk= 0; static void dat__statistics(const char* str) { static bool firstrun= true; static char o[41],*op; static char s[41],*sp; int i,j; strncpy(s,str,40); if(firstrun) { firstrun= false; s[40]= 0; } else { dat__sum+= strlen(s)+1; j= 0; if(strycmp(s,"straße")==0) j= 7; if(strycmp(s,"Straße")==0) j= 7; if(strycmp(s,"weg")==0) j= 3; if(strycmp(s,"Weg")==0) j= 3; if(strycmp(s,"platz")==0) j= 5; if(strycmp(s,"Platz")==0) j= 5; s[strlen(s)-j]= 0; sp= s; op= o; i= 0; while(i<40 && *sp==*op) { sp++; op++; i++; } dat__sumk+= 1+strlen(s)-i+1; } strncpy(o,s,40); } // dat__statistics() #endif // for statistic purposes static void dat__end() { // terminate the services of this module; FREE(dat__addr) } // dat__end() #endif // TEMPLATEONCE static int T(dat__strtypeid,I)= 0; // data type id; // 1: region; 2: subregion; 3: city; 4: street; 5: housenumber; // memory area for strings static char* T(dat__str,I)= NULL; // string memory area static char* T(dat__stre,I)= NULL; // logical end static uint_fast32_t T(dat__strn,I)= NULL; // number of strings // memory area for used-by references typedef uint32_t dat__used_t; static dat__used_t* T(dat__used,I)= NULL; // used-by memory area static dat__used_t* T(dat__usede,I)= NULL; // logical end // memory area for string index typedef struct dat__idx_struct dat__idx_t; static dat__idx_t* T(dat__idx,I)= NULL; // index memory area static dat__idx_t* T(dat__idxe,I)= NULL; // logical end static uint_fast32_t T(dat__idxn,I)= NULL; // number of strings static void T(dat__end,I)() { // terminate the services of this instance; FREE(T(dat__str,I)) FREE(T(dat__used,I)) FREE(T(dat__idx,I)) } // dat__end_I() static bool T(dat__ini,I)(int strtypeid) { // initialization of this instance; // dat_ini() must have been called before; // strtypeid: data type identifier; // for region: 1, subregion: 2, city: 3, etc.; // return: success; // reads the strings and stores them in main memory; struct strinfo_struct { uint8_t sect; // 0x51 - string statistics section int64_t slen; // 5*8 - length of section contents int64_t strn; // number of strings int64_t strlensum; // sum of lengths of all strings (without term. zeros) int64_t strlenmax; // length of longest string (without terminating zeros) int64_t usednsum; // total number of all used-references int64_t usednmax; // maximum number of used-references for a single string } __attribute__((__packed__)) si; bool r; atexit(T(dat__end,I)); T(dat__strtypeid,I)= strtypeid; r= true; // (default) if(r) { // get string statistics section r= r && rf_read(&si,sizeof(si)); r= r && si.sect==0x50+T(dat__strtypeid,I) && si.slen==5*sizeof(int64_t) && si.strn>0 && si.strlenmax<=250 && ( ( si.strlensum>0 && si.strlenmax>0 && si.usednsum>0 && si.usednmax>0 ) || T(dat__strtypeid,I)==2 ); if(!r) PERRv("wrong-formatted string statistics section %i", T(dat__strtypeid,I)) else { if(si.strlenmax>dat__strlenmax) dat__strlenmax= si.strlenmax; } } // get string statistics section if(r) { // allocate memory space for strings T(dat__str,I)= (char*) malloc(sizeof(char)*(si.strlensum+si.strn)+4); if(T(dat__str,I)==NULL) { r= false; PERRv("not enough memory for string data %i", T(dat__strtypeid,I)) } else { T(dat__strn,I)= si.strn; // number of strings T(dat__stre,I)= T(dat__str,I)+(si.strlensum+si.strn); // logical end *T(dat__stre,I)= 0; // terminator for safety reasons } } // allocate memory space for strings if(r) { // allocate memory space for used-by references T(dat__used,I)= (dat__used_t*) malloc(sizeof(dat__used_t)*(si.strn+si.usednsum+si.strn)+4); if(T(dat__used,I)==NULL) { r= false; PERRv("not enough memory for string references %i", T(dat__strtypeid,I)) } else { T(dat__usede,I)= T(dat__used,I)+(si.strn+si.usednsum+si.strn); // logical end } } // allocate memory space for used-by references if(r) { // allocate memory space for string indexes T(dat__idx,I)= (dat__idx_t*) malloc(sizeof(dat__idx_t)*si.strn+4); if(T(dat__idx,I)==NULL) { r= false; PERRv("not enough memory for string indexes %i", T(dat__strtypeid,I)) } else { T(dat__idxe,I)= T(dat__idx,I)+si.strn; // logical end T(dat__idxn,I)= si.strn; // number of indexes } } // allocate memory space for string indexes if(r) { // get strings section uint8_t sect; // section id int64_t slen; // section length r= r && rf_read(§,sizeof(sect)); r= r && sect==0x60+T(dat__strtypeid,I); r= r && rf_read(&slen,sizeof(slen)); r= r && slen>0 && slen<=si.strlensum+si.strn && rf_read(T(dat__str,I),si.strlensum+si.strn); if(!r) PERRv("wrong-formatted string data section %i", T(dat__strtypeid,I)) } // get strings section if(r) { // get used-by section uint8_t sect; // section id int64_t slen; // section length r= r && rf_read(§,sizeof(sect)); r= r && sect==0x70+T(dat__strtypeid,I); r= r && rf_read(&slen,sizeof(slen)); r= r && slen>0 && slen<=sizeof(dat__used_t)*(si.strn+si.usednsum+si.strn) && rf_read(T(dat__used,I), sizeof(dat__used_t)*(si.strn+si.usednsum+si.strn)); if(!r) PERRv("wrong-formatted string references section %i", T(dat__strtypeid,I)) } // get used-by section if(r) { // assemble string index char* sp; dat__used_t* up; dat__idx_t* ip; sp= T(dat__str,I); up= T(dat__used,I); ip= T(dat__idx,I); while(ipstring= sp; ip->usedp= up; while(sp=T(dat__usede,I)) up= T(dat__used,I); // (prevent out-of-range errors) ip++; } } // assemble string index return r; } // dat_ini_I() //------------------------------------------------------------ #ifdef TEMPLATEONCE static bool dat_ini(const char* file) { // initialization of this module; // file[]: file name of .ogb address data file; // return: success; // // format of .ogb address file: // // (uint8_t) 0xfb - start of file // // (uint8_t) 0xe0 - file format section // (int64_t) 15 - length of section contents // (char[14+1]) "osmgeobase0000" - file format name // // (uint8_t) 0x40 - address data section // (int64_t) - length of section contents // (data__addr_t) - first address dataset // (see definition of dat__addr) // first dataset is a dummy for 'invalid address' // (data__addr_t) - second address dataset // (data__addr_t) - ... // (data__addt_t) - last address dataset // // (uint8_t) 0x51 - "region" statistics section // (int64_t) 5*8 - length of section contents // (int64_t) - number of strings // (int64_t) - sum of lengths of all strings (without term. zeros) // (int64_t) - length of longest string (without terminating zeros); // usually 40 because of limitation in osmgeobase; // (int64_t) - total number of all used-references // (int64_t) - maximum number of used-references for a single string // // (uint8_t) 0x61 - "region" strings section // (int64_t) - length of section contents // (char[l+1]) - first string (zero-terminated); // usually "" for 'no information'; // (char[l+1]) - second string (zero-terminated) // (char[l+1]) - ... // (char[l+1]) - last string (zero-terminated) // // (uint8_t) 0x71 - "region" used-references section // (int64_t) - length of section contents // (uint32_t) - number of references for first string // (uint32_t[]) - references for first string // (uint32_t) 0 - references' terminator for first string // (uint32_t) - number of references for second string // (uint32_t[]) - references for second string // (uint32_t) 0 - references' terminator for second string // (uint32_t) - ... // (uint32_t[]) - ... // (uint32_t) - number of references for last string // (uint32_t[]) - references for last string // (uint32_t) 0 - references' terminator for last string // // (uint8_t) 0x52 - "subregion" statistics section // ... // (uint8_t) 0x62 - "subregion" strings section // ... // (uint8_t) 0x72 - "subregion" used-references section // ... // // (uint8_t) 0x53 - "city" statistics section // ... ... // // (uint8_t) 0x54 - "street" statistics section // ... ... // // (uint8_t) 0x55 - "housenumber" statistics section // ... ... // // all numbers are stored in little-endian order struct addrinfo_struct { uint8_t sect; // 0x40 - address data section int64_t slen; // length of section contents } __attribute__((__packed__)) ai; bool r; atexit(dat__end); r= rf_open(file); if(!r) return false; if(r) { // check file type struct filetype_struct { uint8_t sof; // 0xfb - start of file uint8_t sect; // 0xe0 - file format section int64_t slen; // 15 - length of section contents char ffname[14+1]; // "osmgeobase0000" - file format name } __attribute__((__packed__)) ft; r= r && rf_read(&ft,sizeof(ft)); r= r && ft.sof==0xfb && ft.sect==0xe0 && ft.slen==15 && memcmp(ft.ffname,"osmgeobase0000",15)==0; if(!r) PERR("wrong filetype") } // check file type if(r) { // get address data section information r= r && rf_read(&ai,sizeof(ai)); r= r && ai.sect==0x40; if(!r) PERR("wrong-formatted address section information") } // get address data section information if(r) { // allocate memory space for address data dat__addr= (dat__addr_t*)malloc(ai.slen+4); if(dat__addr==NULL) { r= false; PERR("not enough memory for address data") } else { dat__addrn= ai.slen/sizeof(dat__addr[0]); // number of address objects dat__addre= dat__addr+dat__addrn; // logical end } } // allocate memory space for address data if(r) { // get address data r= r && rf_read(dat__addr,ai.slen); if(!r) PERR("could not read address data") } // get address data // do instances initialization r= r && dat__ini_region(1); r= r && dat__ini_subregion(2); r= r && dat__ini_city(3); r= r && dat__ini_street(4); r= r && dat__ini_housenumber(5); rf_close(); #ifdef STATISTICS PINFOv("statistics: %"PRIi64" %"PRIi64,dat__sum,dat__sumk) #endif return r; } // dat_ini() static inline uint64_t dat_addrn() { // get total number of address datasets; // return: number of address datasets; return dat__addrn; } // dat_addrn() static inline uint32_t dat_addrhousenumber(uint32_t aindex, const char* hstring) { // get address dataset index of that address dataset which // matches best to a certain housenumber; // aindex: first address dataset index matching city and street // in question; // hstring[]: housenumber to search for; // NULL: aindex will be returned; // return: index of that address dataset which matches best to hs[]; uint_fast32_t hnumeric; // numeric interpretation of hs[] uint32_t cindex,sindex; // indexes of reference address dataset dat__addr_t* arp; // address of reference address dataset dat__addr_t* ap; // matching or lower address dataset dat__addr_t* anp; // next dataset to examine if(hstring==NULL) // housenumber unknown return aindex; hnumeric= strtouint32(hstring); if(hnumeric>65535) hnumeric= 65535; // limit range to fit into 16 bit arp= dat__addr+aindex; // reference address dataset cindex= arp->city; sindex= arp->street; // reference indexes ap= arp; // start search at reference dataset // do wide-step search forward for(;;) { anp= ap+10; if(anp>=dat__addre) break; if(anp->city==cindex && anp->street==sindex && anp->housenumeric<=hnumeric) // matches reference dataset AND // housenumber not higher than reference housenumber ap= anp; // do this step forward else break; } // do small-step search backward or forward if(ap->housenumeric==hnumeric) { // wide-step search brought an exact numeric result // back-search to find first occurrence for(;;) { if(ap<=arp) break; anp= ap-1; if(anp->housenumeric!=hnumeric) break; ap= anp; } } // wide-step search brought an exact numeric result else { // wide-step search did not bring an exact numeric result // forward-search to find first occurrence for(;;) { anp= ap+1; if(anp>=dat__addre) break; if(anp->city!=cindex || anp->street!=sindex) // no match any more break; if(anp->housenumeric>hnumeric) // housenumber higher than reference housenumber break; ap= anp; // do this step forward if(anp->housenumeric==hnumeric) // found the first ocurrence of numeric housenumber break; } } // wide-step search did not bring an exact numeric result // do string search anp= ap; for(;;) { if(anp>=dat__addre) break; if(anp->city!=cindex || anp->street!=sindex || anp->housenumeric!=hnumeric) // no index match any more break; if(strblankcmp(dat_string_housenumber(anp->housenumber), hstring)==0) { // found a string match return anp-dat__addr; } anp++; } // here: no string match; // satisfy if numeric match if(ap->housenumeric==hnumeric) // numeric match return ap-dat__addr; // here: no numeric match // try to find the best matching neighbor anp= ap+1; if(anp>=dat__addre) anp= dat__addre-1; if(anp->city==cindex && anp->street==sindex && anp->housenumeric-hnumeric < hnumeric-ap->housenumeric) // next address dataset's housenumber is closer return anp-dat__addr; // take that dataset // here: could not improve the string match return ap-dat__addr; } // dat_addrhousenumber() static inline uint32_t dat_getaddrcityindex(uint32_t aindex) { // get city index of an address dataset; // aindex: address dataset index; // return: index of city string; // 0: no city string; return dat__addr[aindex].city; } // dat_getaddrcityindex() static inline void dat_getaddr(uint32_t aindex, int32_t* xp,int32_t* yp, const char** regionp,const char** subregionp,const char** cityp, const char** streetp,const char** housenumberp, uint32_t* housenumericp,uint8_t* weightp) { // get all data of an address dataset; // aindex: address dataset index; // return: // *xp,*yp: geocoordinates; // *regionp,*subregionp,*cityp,*streetp,*housenumberp: // strings of this address; // *housenumericp: numeric value of house number; // *weightp: weight; // each pointer parameter may be NULL in case its return value // is not needed; dat__addr_t* ap; ap= dat__addr+aindex; if(regionp!=NULL) *regionp= dat_string_region(ap->region); if(subregionp!=NULL) *subregionp= dat_string_subregion(ap->subregion); if(cityp!=NULL) *cityp= dat_string_city(ap->city); if(streetp!=NULL) *streetp= dat_string_street(ap->street); if(housenumberp!=NULL) *housenumberp= dat_string_housenumber(ap->housenumber); if(housenumericp!=NULL) *housenumericp= ap->housenumeric; if(xp!=NULL) *xp= ap->x; if(yp!=NULL) *yp= ap->y; if(weightp!=NULL) *weightp= ap->weight & 0x3f; } // dat_getaddr() static inline uint8_t dat_weight(uint32_t aindex) { // get weight of an address dataset; // aindex: address dataset index; // return: weight; return dat__addr[aindex].weight & 0x3f; } // dat_weight() #endif // TEMPLATEONCE static inline bool T(dat_indexvalid,I)(uint32_t index) { // determine if an index is valid; // index: index whose validity is to be determined; return index>=0 && index=T(dat__idxn,I)) i--; return i; } // dat_indexafter_I() static inline uint32_t T(dat_indexbefore,I)(const char* string) { // determine the index to a given string; // if there is no such string, this procedure will return the // index of the alphabetically preceding string; // string[]: string the index is to be determined for; // NULL: will return 0; // return: string index of matching or nearly matching string; // 0: there is no such string in string memory; int_fast32_t i,i1,i2; // iteration indexes: // middle, lower including, upper excluding; int cv; // compare value if(string==NULL) return 0; i1= 1; i2= T(dat__idxn,I); while(i10) i--; return i; } // dat_indexbefore_I() static uint32_t T(dat_indexrangefirst,I)= 0,T(dat_indexrangelast,I)= 0; // results of dat_indexrange_I(); // 0: no matching data; static inline void T(dat_indexrange,I)(const char* string) { // determine the index to a given string; // if there is no exactly matching string, this procedure // determines the index of the alphabetically following string // and the index of the alphabetically last string which starts // with the letters of the given string; // string[]: string the index is to be determined for; // NULL: will return 0; // return: dat_indexrangefirst_I, dat_indexrangelast_I; // 0: there is no such string index range; static char stringcopy[300],*sp; // (should be at least dat__strlenmax bytes long) static char stringfollower[300]; // (should be at least dat__strlenmax bytes long) const char* s0; s0= string; while(*s0!=0 && *s0!='*') s0++; if(*s0=='*') { // there is a wildcard strMcpy(stringcopy,string); // make a copy from search string stringcopy[s0-string]= 0; // delete wildcard character s0= stringcopy; } // there is a wildcard else { // there is no wildcard s0= string; T(dat_indexrangefirst,I)= T(dat_index,I)(s0); // check for exact match first if(T(dat_indexrangefirst,I)!=0) { // there is an exact match T(dat_indexrangelast,I)= T(dat_indexrangefirst,I); return; } // there is an exact match } // there is no wildcard T(dat_indexrangefirst,I)= T(dat_indexafter,I)(s0); if(T(dat_indexrangefirst,I)==0) { T(dat_indexrangelast,I)= 0; return; } sp= stpmcpy(stringfollower,s0,sizeof(stringfollower)-2); *sp++= '\xff'; *sp= 0; T(dat_indexrangelast,I)= T(dat_indexbefore,I)(stringfollower); if(T(dat_indexrangelast,I)