// osmchange 2011-04-27 22:00
#define VERSION "0.7"
// (c) Markus Weber, Nuernberg
// (main modules, algorithms for hash, border polygon clipping)
// (c) Stephan Knauss, Muenchen
// (C standards, i/o optimizing for Windows)
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Affero General Public License
// as published by the Free Software Foundation.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, see http://www.gnu.org/licenses/.
// Other licenses are available on request; please ask the author.
#define MAXLOGLEVEL 2
const char* helptext=
"\nosmchange " VERSION "\n"
"\n"
"This program updates an .osm file using one or more .osc files.\n"
"For example:\n"
"\n"
" cat a.osm | ./osmchange changefile.osc >b.osm\n"
" bzcat a.osm.bz2 | ./osmchange changefile.osc | gzip -1 >b.osm.gz\n"
" cat day24.osm | ./osmchange c24_25.osc c25_26.osc >day26.osm\n"
"\n"
"If you want to limit the region, you can use a bounding box. To do\n"
"this, enter the southwestern and the northeastern corners of the box.\n"
"For example:\n"
"\n"
" cat a.osm | ./osmchange -b=-0.5,51,0.5,52 >b.osm\n"
" cat a.osm | ./osmchange -b=8,-5,20,5 changefile.osc >b.osm\n"
"\n"
"For another option to limit the region, you can use a border polygon\n"
"file:\n"
"\n"
" cat a.osm | ./osmchange -B=border.poly changefile.osc >b.osm\n"
" cat a.osm | ./osmchange -B=france.poly >b.osm\n"
"\n"
"The format of a border polygon file can be found in the OSM Wiki:\n"
"http://wiki.openstreetmap.org/wiki/Osmosis/Polygon_Filter_File_Format\n"
"You do not need to follow strictly the format description, but you\n"
"must ensure that every line of coordinates starts with blanks.\n"
"\n"
"Tuning\n"
"\n"
"If your operating system provides only low performance pipes for\n"
"standard input, you can use the -i parameter to read an uncompressed\n"
".osm file directly. This may be useful under Windows. For example:\n"
"\n"
" ./osmchange -i=a.osm changefile.osc >b.osm\n"
"\n"
"To speed-up the process, the program uses some main memory for a\n"
"hash table. By default, it uses 320 MiB for storing a flag for every\n"
"possible node, 60 for the way flags, and 20 relation flags.\n"
"Every byte holds the flag for 8 ID numbers, i.e., in 320 MiB the\n"
"program can store 2684 million flags. As there are less than 1000\n"
"million IDs for nodes at present (Oct 2010), 120 MiB would suffice.\n"
"So, for example, you can decrease the hash sizes to e.g. 130, 12 and\n"
"2 MiB using this option:\n"
"\n"
" -h130-12-2\n"
"\n"
"But keep in mind that the OSM database is continuously expanding. For\n"
"this reason the program-own default value is higher than shown in the\n"
"example, and it may be appropriate to increase it in the future.\n"
"If you do not want to bother with the details, you can enter the\n"
"amount of memory as a sum, and the program will divide it by itself.\n"
"For example:\n"
"\n"
" -h1000\n"
"\n"
"These 1000 MiB will be split in three parts: 800 for nodes, 150 for\n"
"ways, and 50 for relations.\n"
"\n"
"Because we are taking hashes, it is not necessary to provide all the\n"
"suggested memory; the program will operate with less hash memory too.\n"
"But, in this case, the border filter will be less effective, i.e.,\n"
"some ways and some relations will be left in the output file although\n"
"they should have been excluded.\n"
"The maximum value the program accepts for the hash size is 4000 MiB;\n"
"If you exceed the maximum amount of memory available on your system,\n"
"the program will try to reduce this amount and display a warning\n"
"message.\n"
"\n"
"Limitations\n"
"\n"
"Input files must contain the objects strictly ordered by their type:\n"
"first, all nodes, next, all ways, followed by all relations. Within\n"
"each of these sections, the objects must be ordered by their ID tags.\n"
"In each object, the ID tag must be the first tag; i.e., it must\n"
"follow immediately the object type \'\n"
".osm and .osc files usually adhere to both of these conditions. This\n"
"means that you do not have to worry about these limitations. osmchange\n"
"will display an error message if this sequence is broken. You may also\n"
"test the sequence within a file without doing any other processing:\n"
"\n"
" cat file_to_test.osm | ./osmchange -t\n"
"\n"
"If a polygon file for borders is supplied, there are three further\n"
"limitations:\n"
"First, the maximum number of polygon points is about 40,000.\n"
"Second, ways without a node inside the border polygon will be filtered\n"
"out as well as relations which refer only to such ways. Relations\n"
"which refer solely to unused relations with a lower ID will also be\n"
"filtered out, whereas every other relation will stay. For this reason,\n"
"there might be a few unwanted relations in the program\'s output; but\n"
"usually, this does not matter because they do not have georeferences.\n"
"Third, extremely large relations (more than 200,000 characters) might\n"
"not be filtered out. In this case, you will get a warning message.\n"
"\n"
"Hint: If you apply change files to one regional .osm file over and\n"
"over, there is a chance a way could stay unrecognized:\n"
"The way was completely outside the limiting borders and has been moved\n"
"into the area without making changes to the data set of the way. But\n"
"this case is extremely rare.\n"
"\n"
"Presently, this program is in an experimental state. Please expect\n"
"errors and do not use the program in productive or commercial systems.\n"
"\n"
"There is NO WARRANTY, to the extent permitted by law.\n"
"Please send any bug reports to markus.weber@gmx.com\n\n";
#define _FILE_OFFSET_BITS 64
#include
#include
#include
#include
#include
#include
#include
typedef enum {false= 0,true= 1} bool;
#define isdig(x) isdigit((unsigned char)(x))
static int loglevel= 0; // logging to stderr;
// 0: no logging; 1: small logging; 2: normal logging;
// 3: extended logging;
static inline char *strmcpy(char *dest, const char *src, size_t maxlen) {
// similar to strcpy(), this procedure copies a character string;
// here, the length is cared about, i.e. the target string will
// be limited in case it is too long;
// src[]: source string which is to be copied;
// maxlen: maximum length of the destination string
// (including terminator null);
// return:
// dest[]: destination string of the copy; this is the
// function's return value too;
char* d;
if(maxlen==0)
return dest;
d= dest;
while(--maxlen>0 && *src!=0)
*d++= *src++;
*d= 0;
return dest;
} // end strmcpy()
#define strMcpy(d,s) strmcpy((d),(s),sizeof(d))
static inline int strzcmp(const char* s1,const char* s2) {
// similar to strcmp(), this procedure compares two character strings;
// here, the number of characters which are to be compared is limited
// to the length of the second string;
// i.e., this procedure can be used to identify a short string s2
// within a long string s1;
// s1[]: first string;
// s2[]: string to compare with the first string;
// return:
// 0: both strings are identical; the first string may be longer than
// the second;
// -1: the first string is alphabetical smaller than the second;
// 1: the first string is alphabetical greater than the second;
while(*s1==*s2 && *s1!=0) { s1++; s2++; }
if(*s2==0)
return 0;
return *(unsigned char*)s1 < *(unsigned char*)s2? -1: 1;
} // end strzcmp()
static inline int strzlcmp(const char* s1,const char* s2) {
// similar to strzcmp(), this procedure compares two character strings;
// and accepts the first string to be longer than the second;
// other than strzcmp(), this procedure returns the length of s2[] in
// case both string contents are identical, and returns 0 otherwise;
// s1[]: first string;
// s2[]: string to compare with the first string;
// return:
// >0: both strings are identical, the length of the second string is
// returned; the first string may be longer than the second;
// 0: the string contents are not identical;
const char* s2a;
s2a= s2;
while(*s1==*s2 && *s1!=0) { s1++; s2++; }
if(*s2==0)
return s2-s2a;
return 0;
} // end strzlcmp()
//------------------------------------------------------------
// Module write_ write module
//------------------------------------------------------------
// this module provides a procedure which writes a byte to
// standard output;
// as usual, all identifiers of a module have the same prefix,
// in this case 'write'; an underline will follow in case of a
// global accessible object, two underlines in case of objects
// which are not meant to be accessed from outside this module;
// the sections of private and public definitions are separated
// by a horizontal line: ----
static char write__buf[UINT64_C(16)*1024*1024];
static char* write__bufe= write__buf+sizeof(write__buf);
static char* write__bufp= write__buf;
//------------------------------------------------------------
static bool write_testmode= false; // no standard output
static inline void write_stdout(int c) {
// write one byte to stdout, use a buffer;
if(write__bufp>=write__bufe) { // the write buffer is full
if(!write_testmode)
write(1,write__buf,write__bufp-write__buf);
write__bufp= write__buf;
}
*write__bufp++= (char)c;
} // end write_stdout();
static inline void write_stdout_nl() {
// write a NL character to stdout;
// in case of Windows, precede NL with a CR;
#ifdef _WIN32
write_stdout('\r'); // add a CR character
#endif
write_stdout('\n'); // add a NL character
} // end write_stdout_nl();
static inline void write_flush() {
if(write__bufp>write__buf && !write_testmode)
// at least one byte in buffer AND not test mode
write(1,write__buf,write__bufp-write__buf);
write__bufp= write__buf;
} // end write_flush();
//------------------------------------------------------------
// end Module write_ write module
//------------------------------------------------------------
//------------------------------------------------------------
// Module hash_ OSM hash module
//------------------------------------------------------------
// this module provides three hash tables with default sizes
// of 320, 60 and 20 MB;
// the procedures hash_set() and hash_get() allow bitwise access
// to these tables;
// as usual, all identifiers of a module have the same prefix,
// in this case 'hash'; an underline will follow in case of a
// global accessible object, two underlines in case of objects
// which are not meant to be accessed from outside this module;
// the sections of private and public definitions are separated
// by a horizontal line: ----
static bool hash__initialized= false;
#define hash__M 3
static unsigned char* hash__mem[hash__M]= {NULL,NULL,NULL};
// start of the hash fields for each object type (node, way, relation);
static unsigned long hash__max[hash__M]= {0,0,0};
// size of the hash fields for each object type (node, way, relation);
static int hash__error_number= 0;
// 1: object too large
static void hash__end() {
// clean-up for hash module;
// will be called at program's end;
int o; // object type
for(o= 0;o4000) x= 4000; \
hash__max[o]= x*(1024*1024);
D(n,0) D(w,1) D(r,2)
#undef D
// allocate memory for each hash table
for(o= 0;o=1024);
if(hash__mem[o]==NULL) // allocation unsuccessful at all
error= true; // memorize that the program should be aborted
} // end for each hash table
atexit(hash__end); // chain-in the clean-up procedure
if(!error) hash__initialized= true;
return error? 2: warning? 1: 0;
} // end hash_ini()
#if 0 // currently not used in this program
static void hash_set(int o,const char* id) {
// set a flag for a specific object type and ID;
// o: object type; 0: node; 1: way; 2: relation;
// caution: due to performance reasons the boundaries
// are not checked;
// id: id of the object; the id is given as a string of decimal
// digits; a specific string terminator is not necessary,
// it is assumed that the id number ends with the first
// non-digit character;
unsigned char* mem; // address of byte in hash table
uint64_t idi; // bit number (0..7)
unsigned int ido; // bit offset to idi;
if(!hash__initialized) return; // error prevention
idi= 0;
if(*id!='-') { // positive id
while(isdig(*id)) { idi= idi*10+(*id-'0'); id++; }
}
else { // negative id
id++;
while(isdig(*id)) { idi= idi*10+(*id-'0'); id++; }
idi= hash__max[o]*8-idi;
}
ido= idi&0x7; // extract bit number (0..7)
idi>>=3; // calculate byte offset
idi%= hash__max[o]; // consider length of hash table
mem= hash__mem[o]; // get start address of hash table
mem+= idi; // calculate address of the byte
*mem|= (1<>=3; // calculate byte offset
idi%= hash__max[o]; // consider length of hash table
mem= hash__mem[o]; // get start address of hash table
mem+= idi; // calculate address of the byte
*mem|= (1<>=3; // calculate byte offset
idi%= hash__max[o]; // consider length of hash table
mem= hash__mem[o]; // get start address of hash table
mem+= idi; // calculate address of the byte
flag= (*mem&(1<>=3; // calculate byte offset
idi%= hash__max[o]; // consider length of hash table
mem= hash__mem[o]; // get start address of hash table
mem+= idi; // calculate address of the byte
flag= (*mem&(1<", "";
// fn[]: file name, for error messages;
// return: there is at least one object with a set flag OR
// there were no references at all;
// uses: hash_get();
int l; // length of matched key identifier
int o; // object type; 0: node; 1: way; 2: relation;
int ret;
ret= true; // (default)
while(*s!=0) { // still characters to parse
if(*s=='<') { // a key identifier starts here
if(strzcmp(s,"")==0 || strzcmp(s,"")==0)
return ret;
o= 0; // (just to get sure)
if(type==3 &&
(l= strzlcmp(s,"0) {
// this object is a relation and refers to another relation
int64_t refid; // ID of referenced relation
const char* sp;
o= 2;
// find out if the referenced relation has a lower id;
// if so, we can assume that that relation must have been
// flagged if it was needed; if it has a higher id,
// we cannot be sure if the referenced relation refers to
// objects within the border of the defined polygon,
// because the referenced relation may have not yet been
// tested for dependencies; so we must keep the object;
sp= s+l;
refid= 0;
if(*sp!='-') { // positive refid
while(isdig(*sp)) { refid= refid*10+(*sp-'0'); sp++; }
}
else { // negative refid
sp++;
while(isdig(*sp)) { refid= refid*10+(*sp-'0'); sp++; }
refid= -refid;
}
if(refid>=id)
return true;
if(hash_geti(o,refid)) // there is flag for the
// related object in hash table
return true;
ret= false; // memorize that there was at least one reference
s+= l-1; // jump over the search string
} // end this object refers to a relation;
else if((l= strzlcmp(s,"0) o= 0;
else if((l= strzlcmp(s,"0) o= 0;
else if((l= strzlcmp(s,"0) o= 1;
if(l>0) { // we found one of the searched key identifiers
if(hash_get(o,s+l)) // there is flag for the
// related object in hash table
return true;
ret= false; // memorize that there was at least one reference
s+= l-1; // jump over the search string
}
} // end a key identifier starts here
s++; // take the next character
} // end still characters to parse
fprintf(stderr,"osmchange: %s %"PRIi64" too large in %s\n",
(type==2? "Way": type==3? "Relation": "?"), id, fn);
hash__error_number= 1;
return true;
// return true, because we are not sure if this object has nodes
// within the borders (we did not get the whole object data);
} // end hash_getparse()
#if 0 // OSM XML examples
#endif
static int hash_queryerror() {
// determine if an error has occurred;
return hash__error_number;
} // end hash_queryerror()
//------------------------------------------------------------
// end Module hash_ OSM hash module
//------------------------------------------------------------
//------------------------------------------------------------
// Module border_ OSM border module
//------------------------------------------------------------
// this module provides procedures for reading the border file
// (.poly) and determine if a point lies inside or outside the
// border polygon;
// as usual, all identifiers of a module have the same prefix,
// in this case 'border'; an underline will follow in case of a
// global accessible object, two underlines in case of objects
// which are not meant to be accessed from outside this module;
// the sections of private and public definitions are separated
// by a horizontal line: ----
static const long border__nil= 2000000000L;
static long border__bx1= 2000000000L,border__by1,
border__bx2,border__by2;
// in case of a border box:
// coordinates of southwest and northeast corner;
// in case of a border polygon:
// for the border polygon, every edge is stored in a list;
// to speed-up the inside/outside question we need to sort the edges
// by x1; subsequently, for every edge there must be stored references
// which refer to all that edges which overlap horizontally with
// that region between x1 and the next higher x1 (x1 of the next edge
// in the sorted list);
#define border__edge_M 60004
typedef struct border__edge_t {
long x1,y1,x2,y2; // coordinates of the edge; always: x1=0) d++;
} while(d<7); // end for every digit
k*= dig[d]*sign;
return k;
} // end border__strtodeg()
static int border__qsort_edge(const void* a,const void* b) {
// edge comparison for qsort()
return ((border__edge_t*)a)->x1 > ((border__edge_t*)b)->x1;
}
//------------------------------------------------------------
static bool border_strtocoord(const char* s,long* xp,long* yp) {
// find the geographical coordinates in a string and
// convert them into degree numbers;
// in fixpoint format (10 millionth degrees);
// s[]: string with the coordinates; possible formats:
// id="1234" user="someone" lat="-70.500" lon= "89"
// lon= ".01" lat="-179.56789"
// the search ends as soon as > or a null character
// has been found;
// return: conversion successful;
// x,y: coordinates in 0.0000001 degrees;
// x=='border__nil': found no coordinate;
char c;
long x,y;
x= y= border__nil;
for(;;) { // for characters in string
c= *s;
if(c=='\"') { do c= *++s; while(c!='\"' && c!=0); }
if(c=='>' || c==0)
break;
if(c==' ' && s[1]=='l') { // may be start of " lat" or " lon"
if(s[2]=='o' && s[3]=='n' && s[4]=='=' && s[5]=='\"') {
// " lon=\""
x= border__strtodeg(s+6);
if(y!=border__nil)
break;
s+= 4;
}
else if(s[2]=='a' && s[3]=='t' && s[4]=='=' && s[5]=='\"') {
// " lat=\""
y= border__strtodeg(s+6);
if(x!=border__nil)
break;
s+= 4;
}
} // end may be start of " lat" or " lon"
s++;
} // end for characters in string
if(y==border__nil || y==border__nil) {
*xp= border__nil; *yp= border__nil;
return false;
}
*xp= x; *yp= y;
return true;
} // end // border_strtocoord()
static bool border_active= false; // borders are to be considered;
// this variable must not be written from outside of the module;
static bool border_box(const char* s) {
// read coordinates of a border box;
// s[]: coordinates as a string; example: "11,49,11.3,50"
// return: success;
double x1f,y1f; // coordinates of southwestern corner
double x2f,y2f; // coordinates of northeastern corner
int r;
x1f= y1f= x2f= y2f= 200.1;
r= sscanf(s,"%lG,%lG,%lG,%lG",&x1f,&y1f,&x2f,&y2f);
if(r!=4 || x1f<-180.1 || x1f>180.1 || y1f<-90.1 || y1f>90.1 ||
x2f<-180.1 || x2f>180.1 || y2f<-90.1 || y2f>90.1)
return false;
border_active=true;
border__bx1= x1f*10000000L; // convert floatingpoint to fixpoint
border__by1= y1f*10000000L;
border__bx2= x2f*10000000L;
border__by2= y2f*10000000L;
return true;
} // end border_box()
static bool border_file(const char* fn) {
// read border polygon file, store the coordinates, and determine
// an enclosing border box to speed-up the calculations;
// fn[]: file name;
// return: success;
static long nil;
if(!border__ini())
return false;
nil= border__nil;
/* get border polygon */ {
border__edge_t* bep; // growing pointer in border__edge[]
border__edge_t* bee; // memory end of border__edge[]
FILE* fi;
char s[80],*sp;
long x0,y0; // coordinate of the first point in a section;
// this is used to close an unclosed polygon;
long x1,y1; // last coordinates
long x,y;
border__edge[0].x1= nil;
fi= fopen(fn,"rb");
if(fi==NULL)
return false;
bee= border__edge+(border__edge_M-2);
bep= border__edge;
x0= nil; // (sign that there is no first coordinate at the moment)
x1= nil; // (sign that there is no last coordinate at the moment)
for(;;) { // for every line in border file
s[0]= 0;
sp= fgets(s,sizeof(s),fi);
if(bep>=bee)
return false;
if(s[0]!=' ' && s[0]!='\t') { // not inside a section
if(x0!=nil && x1!=nil && (x1!=x0 || y1!=y0)) {
// last polygon was not closed
if(x1==x0) { // the edge would be vertical
// we have to insert an additional edge
x0+= 3;
if(x0>x1)
{ bep->x1= x1; bep->y1= y1; bep->x2= x0; bep->y2= y0; }
else
{ bep->x1= x0; bep->y1= y0; bep->x2= x1; bep->y2= y1; }
bep->chain= NULL;
if(loglevel>=1)
fprintf(stderr,"+ %i %li,%li,%li,%li\n",
(int)(bep-border__edge),
bep->x1,bep->y1,bep->x2,bep->y2);
bep++;
x1= x0; y1= y0;
x0-= 3;
} // the edge would be vertical
// close the polygon
if(x0>x1)
{ bep->x1= x1; bep->y1= y1; bep->x2= x0; bep->y2= y0; }
else
{ bep->x1= x0; bep->y1= y0; bep->x2= x1; bep->y2= y1; }
bep->chain= NULL;
if(loglevel>=1)
fprintf(stderr,"c %i %li,%li,%li,%li\n",
(int)(bep-border__edge),bep->x1,bep->y1,bep->x2,bep->y2);
bep++;
} // end last polygon was not closed
x0= x1= nil;
} // end not inside a section
else { // inside a section
double xf,yf;
xf= yf= 200.1;
sscanf(s+1,"%lG %lG",&xf,&yf);
if(xf<-180.1 || xf>180.1 || yf<-90.1 || yf>90.1) x= nil;
else
{ x= xf*10000000+0.5; y= yf*10000000+0.5; }
if(x!=nil) { // data plausible
if(x1!=nil) { // there is a preceding coordinate
if(x==x1) x+= 2; // do not accept exact north-south
// lines, because then we may not be able to determine
// if a point lies inside or outside the polygon;
if(x>x1)
{ bep->x1= x1; bep->y1= y1; bep->x2= x; bep->y2= y; }
else
{ bep->x1= x; bep->y1= y; bep->x2= x1; bep->y2= y1; }
bep->chain= NULL;
if(loglevel>=1)
fprintf(stderr,"- %i %li,%li,%li,%li\n",
(int)(bep-border__edge),
bep->x1,bep->y1,bep->x2,bep->y2);
bep++;
} // end there is a preceding coordinate
x1= x; y1= y;
if(x0==nil)
{ x0= x; y0= y; }
} // end data plausible
} // end inside a section
if(sp==NULL) // end of border file
break;
} // end for every line in border file
bep->x1= nil; // set terminator of edge list
border__edge_n= bep-border__edge; // set number of edges
} // end get border polygon
// sort edges ascending by x1 value
if(loglevel>=1)
fprintf(stderr,"Border polygons: %i. Now sorting.\n",
border__edge_n);
qsort(border__edge,border__edge_n,sizeof(border__edge_t),
border__qsort_edge);
/* generate chains for each edge */ {
long x1,x2;
border__chain_t* bcp; // growing pointer in chain storage
border__edge_t* bep; // pointer in border__edge[]
border__edge_t* bep2; // referenced edge
border__chain_t* bcp2; // chain of referenced edge;
bep= border__edge;
bcp= border__chain;
while(bep->x1!=nil) { // for each edge in list
if(loglevel>=1)
fprintf(stderr,"> %i %li,%li,%li,%li\n",
(int)(bep-border__edge),bep->x1,bep->y1,bep->x2,bep->y2);
x1= bep->x1; x2= bep->x2;
bep2= bep;
while(bep2>border__edge && (bep2-1)->x1==bep2->x1) bep2--;
// we must examine previous edges having same x1 too;
while(bep2->x1!=nil && bep2->x1 <= x2) {
// for each following overlapping edge in list
if(bep2==bep) { // own edge
bep2++; // (needs not to be chained to itself)
continue;
}
if(bcp>=border__chain+border__chain_M)
// no more space in chain storage
return false;
if(loglevel>=2)
fprintf(stderr,"+ add to chain of %i\n",
(int)(bep2-border__edge));
bcp2= bep2->chain;
if(bcp2==NULL) // no chain yet
bep2->chain= bcp; // add first chain link
else { // edge already has a chain
// go to the chain's end and add new chain link there
while(bcp2->next!=NULL) bcp2= bcp2->next;
bcp2->next= bcp;
} // end edge already has a chain
bcp->edge= bep;
// add source edge to chain of overlapping edges
bcp->next= NULL; // new chain termination
bcp++;
bep2++;
} // for each following overlapping edge in list
bep++;
} // end for each edge in list
} // end generate chains for each edge
// test output
if(loglevel>=2) {
border__edge_t* bep,*bep2; // pointers in border__edge[]
border__chain_t* bcp; // pointer in chain storage
fprintf(stderr,"Chains:\n");
bep= border__edge;
while(bep->x1!=nil) { // for each edge in list
fprintf(stderr,"> %i %li,%li,%li,%li\n",
(int)(bep-border__edge),bep->x1,bep->y1,bep->x2,bep->y2);
bcp= bep->chain;
while(bcp!=NULL) { // for each chain link in edge
bep2= bcp->edge;
fprintf(stderr," %i %li,%li,%li,%li\n",
(int)(bep2-border__edge),
bep2->x1,bep2->y1,bep2->x2,bep2->y2);
bcp= bcp->next;
} // end for each chain link in edge
bep++;
} // end for each edge in list
} // end test output
/* determine enclosing border box */ {
border__edge_t* bep; // pointer in border__edge[]
border__bx1= border__edge[0].x1;
border__bx2= -2000000000L; // (default)
border__by1= 2000000000L; border__by2= -2000000000L; // (default)
bep= border__edge;
while(bep->x1!=nil) { // for each coordinate of the polygon
if(bep->x2>border__bx2) border__bx2= bep->x2;
if(bep->y1y1;
if(bep->y2y2;
if(bep->y1>border__by2) border__by2= bep->y1;
if(bep->y2>border__by2) border__by2= bep->y2;
bep++;
} // end for each coordinate of the polygon
} // end determine enclosing border box
border_active=true;
if(loglevel>=1)
fprintf(stderr,"End of border initialization.\n");
return true;
} // end border_file()
static bool border_queryinside(long x,long y) {
// determine if the given coordinate lies inside or outside the
// border polygon(s);
// x,y: coordinates of the given point in 0.0000001 degrees;
// return: point lies inside the border polygon(s);
static long nil;
nil= border__nil;
#if MAXLOGLEVEL>=3
if(loglevel>=3)
fprintf(stderr,"# %li,%li\n",x,y);
#endif
// first, consider border box (if any)
if(border__bx1!=nil) { // there is a border box
if(xborder__bx2 ||
yborder__by2)
// point lies outside the border box
return false;
} // end there is a border box
/* second, consider border polygon (if any) */ {
border__edge_t* bep; // pointer in border__edge[]
border__chain_t* bcp; // pointer in border__chain[]
int cross; // number of the crossings a line from the point
// to the north pole would have ageinst the border lines
// in border__coord[][];
if(border__edge==NULL)
return true;
cross= 0;
/* binary-search the edge with the closest x1 */ {
int i,i1,i2; // iteration indexes
i1= 0; i2= border__edge_n;
while(i2>i1+1) {
i= (i1+i2)/2;
bep= border__edge+i;
//fprintf(stderr,"s %i %i %i %li\n",i1,i,i2,bep->x1); ///
if(bep->x1 > x) i2= i;
else i1= i;
//fprintf(stderr," %i %i %i\n",i1,i,i2); ///
}
bep= border__edge+i1;
} // end binary-search the edge with the closest x1
bcp= NULL;
// (default, because we want to examine the own edge first)
for(;;) { // for own edge and each edge in chain
if(bep->x1 <= x && bep->x2 > x) { // point lies inside x-range
if(bep->y1 > y && bep->y2 > y) {
// line lies completely north of point
cross++;
#if MAXLOGLEVEL>=3
if(loglevel>=3)
fprintf(stderr,"= %i %li,%li,%li,%li\n",
(int)(bep-border__edge),bep->x1,bep->y1,bep->x2,bep->y2);
#endif
}
else if(bep->y1 > y || bep->y2 > y) {
// one line end lies north of point
if( (long long)(y-bep->y1)*(long long)(bep->x2-bep->x1) <
(long long)(x-bep->x1)*(long long)(bep->y2-bep->y1) ) {
// point lies south of the line
cross++;
#if MAXLOGLEVEL>=3
if(loglevel>=3)
fprintf(stderr,"/ %i %li,%li,%li,%li\n",
(int)(bep-border__edge),
bep->x1,bep->y1,bep->x2,bep->y2);
#endif
}
#if MAXLOGLEVEL>=3
else if(loglevel>=3)
fprintf(stderr,". %i %li,%li,%li,%li\n",
(int)(bep-border__edge),
bep->x1,bep->y1,bep->x2,bep->y2);
#endif
} // end one line end north of point
#if MAXLOGLEVEL>=3
else if(loglevel>=3)
fprintf(stderr,"_ %i %li,%li,%li,%li\n",
(int)(bep-border__edge),bep->x1,bep->y1,bep->x2,bep->y2);
#endif
} // end point lies inside x-range
if(bcp==NULL) // chain has not been examined
bcp= bep->chain; // get the first chain link
else
bcp= bcp->next; // get the next chain link
if(bcp==NULL) // no more chain links
break;
bep= bcp->edge;
} // end for own edge and each edge in chain
//if(loglevel>=3) fprintf(stderr,"# %li,%li cross %i\n",x,y,cross);
return (cross&1)!=0; // odd number of crossings
} // end second, consider border polygon (if any)
} // end border_queryinside()
//------------------------------------------------------------
// end Module border_ OSM border module
//------------------------------------------------------------
//------------------------------------------------------------
// Module read_ OSM read module
//------------------------------------------------------------
// this module provides procedures for reading the input .osm
// and .osc input files;
// as usual, all identifiers of a module have the same prefix,
// in this case 'read'; an underline will follow in case of a
// global accessible object, two underlines in case of objects
// which are not meant to be accessed from outside this module;
// the sections of private and public definitions are separated
// by a horizontal line: ----
static inline int64_t read__id(const char* s) {
// read the id of an OSM XML object;
// s[]: string with the ID, e.g.: id="12345" lat="49.0" ...
// the string must start with "id=\"";
// return: id of the object; 0: invalid id;
int64_t id;
if(s[0]!='i' || s[1]!='d' || s[2]!='=' ||
(s[3]!='\"' && s[3]!='\''))
return 0;
s+= 4; // jump over "id=\""
id= 0;
if(*s!='-') { // positive id
while(isdig(*s)) { id= id*10+(*s-'0'); s++; }
}
else { // negative id
s++;
while(isdig(*s)) { id= id*10+(*s-'0'); s++; }
id= -id;
}
return id;
} // end read__id()
#define read__prefetch UINT32_C(1000000)
// read prefetch because we want to get access to several
// bytes which are to be read next
typedef struct read__struct {
char buf[UINT32_C(19000000)+read__prefetch];
char* bufe;
char* bufp;
int fd; // file descriptor
bool shortobject; // the last encountered object may be written
// the short XML form, e.g.:
// otherwise it hast to be in the long form: ...
int section; // section we are inside;
// 0: no section; 1: delete; 2: create; 3: modify;
int type; // last encountered object type;
// 0: none; 1: node; 2: way; 3: relation;
int64_t id; // last encountered object id
uint64_t typeid; // last encountered object typ and id;
// lower bits: id; higher bits: type; mask: 0x7000000000000000LL;
// 0x1000000000000000LL: node;
// 0x2000000000000000LL: way;
// 0x3000000000000000LL: relation;
// note: to deal with negative ids, we add always:
// 0x800000000000000LL
uint64_t typeidold; // previous encountered
// object type and id, for plausibility checks;
bool writeflag; // every input byte shall be written to output;
char fn[32]; // file name, for error messages;
struct read__struct* next;
} read_info;
static read_info* read_infochain= NULL;
// pointer to first read info structure;
static int read__error_number= 0;
// 1: Wrong sequence in one of the files
static void read__closeall(void) {
// close all previously opened input files;
// this procedure has no parameters because we want to be able
// to call it via atexit();
read_info* ri;
while(read_infochain!=NULL) {
ri= read_infochain;
if(ri->fd!=0)
close(ri->fd);
ri->fd= -1;
read_infochain= ri->next;
free(ri);
}
} // end read__closeall()
static bool read__input(read_info* ri) {
// read data from an input file, use an internal buffer;
// in dependence of ri->writeflag, every read byte will be
// written to standard output;
// the procedure returns if either the end of the input file
// has been reached or a new object start, e.g. "",
// has been encountered;
// *ri: read information structure;
// return: there are no (more) bytes to read;
// *ri: information about last encountered object;
// to get object ID information easier, we use a prefetch of
// 'read__prefetch' bytes;
char c; // last character which has been read;
bool writeend; // end writing to stdout after the next '>';
int l,r;
bool ret;
writeend= false;
for(;;) { // until break;
if(ri->bufp+read__prefetch>=ri->bufe) { // read buffer is too low
if(ri->fd>=0) { // still bytes in the file
if(ri->bufe>ri->bufp) { // bytes remaining in buffer
memmove(ri->buf,ri->bufp,ri->bufe-ri->bufp);
// move remaining bytes to start of buffer
ri->bufe= ri->buf+(ri->bufe-ri->bufp);
// protect the remaining bytes at buffer start
}
else // no remaining bytes in buffer
ri->bufe= ri->buf; // no bytes remaining to protect
ri->bufp= ri->buf;
#if 1
do { // while buffer has not been filled
l= (ri->buf+sizeof(ri->buf))-ri->bufe-1;
// number of bytes to read
r= read(ri->fd,ri->bufe,l);
//fprintf(stderr,"read bytes: wanted %u, got %u\n",l,r);
if(r<=0) { // no more bytes in the file
ri->fd= -1; // memorize that there we are at end of file
break;
}
ri->bufe+= r; // set new mark for end of data
ri->bufe[0]= 0; // set null-terminator
} while(rfd,ri->bufe,sizeof(ri->buf)-read__prefetch-1);
// fill the rest of the buffer with bytes from the file
//fprintf(stderr,"read bytes: wanted %u, got %u\n",sizeof(ri->buf)-read__prefetch-1,r);
if(r<=0) // no more bytes in the file
ri->fd= -1; // memorize that there we are at end of file
else {
ri->bufe+= r; // set new mark for end of data
ri->bufe[0]= 0; // set null-terminator
}
#endif
} // end still bytes to read
if(ri->fd<0 && ri->bufp>=ri->bufe) {
// buffer empty and no more bytes in the file
ri->section= 0;
ri->type= 0;
ri->id= 0;
ri->typeid= 0;
ri->typeidold= 0;
ri->writeflag= false;
ret= true;
break;
}
} // end read buffer is too low
c= *ri->bufp;
ri->bufp++;
if(c=='<') { // new object
ri->shortobject= false; // (default)
if(strzcmp(ri->bufp,"delete>")==0)
ri->section= 1;
else if(strzcmp(ri->bufp,"create>")==0)
ri->section= 2;
else if(strzcmp(ri->bufp,"modify>")==0)
ri->section= 3;
else if(ri->bufp[0]=='/' && (
strzcmp(ri->bufp+1,"delete>")==0 ||
strzcmp(ri->bufp+1,"create>")==0 ||
strzcmp(ri->bufp+1,"modify>")==0))
ri->section= 0;
else if((l= strzlcmp(ri->bufp,"node "))>0) {
ri->type= 1;
ri->id= read__id(ri->bufp+l);
ri->typeid= UINT64_C(0x1800000000000000)+ri->id;
#define read__plausi(t) if(ri->typeid<=ri->typeidold) \
{ fprintf(stderr,"osmchange: Wrong sequence at " \
#t " %"PRIi64" in %s\n",ri->id,ri->fn); \
read__error_number= 1; } \
ri->typeidold= ri->typeid;
read__plausi(node);
ri->shortobject= true; // may be a short object
ret= false;
break;
}
else if((l= strzlcmp(ri->bufp,"way "))>0) {
ri->type= 2;
ri->id= read__id(ri->bufp+l);
ri->typeid= UINT64_C(0x2800000000000000)+ri->id;
read__plausi(way);
ri->shortobject= true; // may be a short object
ret= false;
break;
}
else if((l= strzlcmp(ri->bufp,"relation "))>0) {
ri->type= 3;
ri->id= read__id(ri->bufp+l);
ri->typeid= UINT64_C(0x3800000000000000)+ri->id;
read__plausi(relation);
ri->shortobject= true; // may be a short object
ret= false;
break;
}
else if(strzlcmp(ri->bufp,"osm version=\"") &&
ri->writeflag) {
char* p;
write_stdout('<');
p= ri->bufp;
for(;;) { write_stdout(*p); if(*p=='\"') break; p++; }
p++;
for(;;) { write_stdout(*p); if(*p=='\"') break; p++; }
p= " generator=\"osmchange "VERSION"\">";
do write_stdout(*p++); while(*p!=0);
write_stdout_nl();
ri->writeflag= false;
ri->shortobject= true; // may be a short object
ret= false;
break;
}
else if(strzcmp(ri->bufp,"/node>")==0 ||
strzcmp(ri->bufp,"/way>")==0 ||
strzcmp(ri->bufp,"/relation>")==0) {
ri->type= 0;
ri->id= 0;
ri->typeid= 0;
writeend= true;
}
} // end new object
else if(c=='>')
ri->shortobject= false;
else if(c=='/' && ri->bufp[0]=='>' && ri->shortobject) {
// end of object using the short XML form
ri->type= 0;
ri->id= 0;
ri->typeid= 0;
ri->shortobject= false;
writeend= true;
}
if(ri->writeflag) {
write_stdout(c); // write the read-in character to stdout
if(writeend && c=='>') { // writing is to end
write_stdout_nl();
ri->writeflag= false;
}
}
} // end until break
return ret;
} // end read__input()
#define read__queryend(ri) (ri->fd<0 && ri->bufp>=ri->bufe)
// query if the end of a specific input file has been reached
static uint64_t read__typeid= UINT64_C(0x7fffffffffffffff);
// presently considered typeid;
// a typeid is a combination of type and id of an object; example:
// type 'node', id "12345";
// as typeid is a 64 bit value, the object type will be coded
// within the highest bits to allow hierarchical sorts:
// primary type and secondary id;
// bits 0x7000000000000000LL: 1==node; 2==way; 3==relation;
// to consider negative ids, we always add an offset:
// 0x800000000000000LL;
//------------------------------------------------------------
static read_info* read_open(const char* fn) {
// open an input file;
// fn[]: name of the file:
// fn==NULL: read from stdin;
// return: pointer to read info structure;
// ==NULL: could not open this file;
// the read info structure is always initialized with
// default parameters;
// for the first call of this procedure, the member 'writeflag'
// will be set to ensure that the first section of the file
// (resp. standard input) will be copied to standard output;
static bool firstrun= true;
static bool firstwriteflag= true;
read_info* ri;
int fd;
if(firstrun) {
firstrun= false;
atexit(read__closeall);
}
if(fn==NULL) // to read from stdin
fd= 0;
else { // not to read from stdin
fd= open(fn,O_RDONLY);
if(fd<0)
return NULL;
} // end not to read from stdin
ri= (read_info*)malloc(sizeof(read_info));
if(ri==NULL)
return NULL;
// initialize read info structure
ri->bufe= ri->buf;
ri->bufp= ri->buf;
ri->fd= fd;
ri->shortobject= false;
ri->section= 0;
ri->type= 0;
ri->id= 0;
ri->typeid= 0;
ri->typeidold= 0;
ri->writeflag= firstwriteflag;
firstwriteflag= false;
if(fn==NULL) strcpy(ri->fn,"standard input");
else strMcpy(ri->fn,fn);
ri->next= read_infochain;
read_infochain= ri;
return ri;
} // end read_open()
static inline bool read_allinput(read_info** rif) {
// read all input files until the next relevant object has been
// encountered;
// but: files which have already been read till an object with a
// typeid greater read__typeid are not read;
// rif[]: NULL-terminated list with the addresses of the file's
// read info structures;
// return: all files have been read completely;
// the read info structures will be updated accordingly to the
// encountered object types and IDs;
read_info* ri;
bool ret;
int i;
ret= true; // (default)
for(;;) { // for all input files
ri= *rif;
if(ri==NULL)
break;
if(!read__queryend(ri)) {
ret= false;
if(ri->typeid==0) { // 2011-04-27,,,
i= 10;
while(--i>=0 && ri->typeid==0)
read__input(ri);
}
else if(ri->typeid<=read__typeid)
read__input(ri);
}
rif++;
} // end for all input files
return ret;
} // end read_allinput()
static void read_setwriteflag(read_info** rif) {
// pick out the most relevant object and set the write flag there;
// rif[]: NULL-terminated list with the addresses of the file's
// read info structures;
// the list is expected in the following order:
// .osm file, oldest .osc file .. newest .osc file;
// return:
// the 'writeflag' elements in the read info structures will
// be updated;
read_info** rip,*rim,*ri;
// delete all write flags
rip= rif;
for(;;) { // for all input files
ri= *rip; if(ri==NULL)
break;
ri->writeflag= false;
rip++;
} // end for all input files
// find the most recent file with the lowest typeid
read__typeid= UINT64_C(0x7fffffffffffffff);
// just to ensure that this value will be subceeded;
rim= NULL;
while(rip>rif) { // for all input files, starting with the last
rip--;
ri= *rip;
if(ri->typeid>0 && ri->typeidtypeid; // memorize this minimum
}
} // end for all input files, starting with the last
// set new write flag if applicable
if(rim!=NULL) { // found a minimum
if(rim->section!=1) { // no .osc delete section
rim->writeflag= true; // (default)
if(border_active) { // borders are to be considered
if(rim->type==1) { // type is "node"
long x,y;
if(border_strtocoord(rim->bufp,&x,&y)) {
// object has a coordinate
if(!border_queryinside(x,y))
// coordinate lies outside the region's borders
rim->writeflag= false;
} // end object has a coordinate
} // end type is "node"
else if(rim->type>=2) { // type is 'way' or 'relation'
// for a way:
// find out if there are references to nodes which have been
// marked in the hash table; if so, mark this way too;
// for a relation:
// find out if there are references to nodes or ways which
// have been marked in the hash table;
// if so, mark this relation too;
if(!hash_getparse(rim->type,rim->id,rim->bufp,rim->fn))
rim->writeflag= false;
} // end type is "way"
if(rim->writeflag && rim->type>0)
// writing is requested AND regular object type
hash_seti(rim->type-1,rim->id);
// set flag as sign that this object is valid
} // end borders are to be considered
} // end no .osc delete section
if(rim->writeflag) { // writing is requested
// make up for '<' because we jumped over it
#if 0
write_stdout(' '); write_stdout(' ');
write_stdout(' '); write_stdout(' ');
#endif
write_stdout('<');
}
} // end found a minimum
} // end read_setwriteflag()
static bool read_queryerror() {
// determine if an error has occurred;
return read__error_number;
} // end read_queryerror()
//------------------------------------------------------------
// end Module read_ OSM read module
//------------------------------------------------------------
int main(int argc,const char *argv[]) {
// main program;
// for the meaning of the calling line parameters please look at the
// contents of helptext[];
int h_n,h_w,h_r; // user-suggested hash size in MiB, for
// hash tables of nodes, ways, and relations;
static read_info* rifiles[4000];
// pointer to info structure for every input file
read_info** riend;
// end of the input structure pointers in rifiles[]
// initialization
h_n= h_w= h_r= 0;
riend= NULL; // 'no file has been opended yet'
/* read command line parameters and open input files */ {
if(argc<=1) { // no command line parameters given
fprintf(stderr,"osmchange " VERSION "\n"
"Applies changes to an .osm file and sets limiting borders.\n"
"To get detailed help, please enter: ./osmchange -h\n");
return 0; // end the program, because without having parameters
// we do not know what to do;
}
while(--argc>0) { // for every parameter in command line
argv++; // switch to next parameter; as the first one is just
// the program name, we must do this previous reading the
// first 'real' parameter;
if(strcmp(argv[0],"-h")==0) {
// user wants help text
fprintf(stderr,"%s",helptext); // print help text
// (strange format string just to suppress
// that silly compiler warning)
return 0;
}
if(strzcmp(argv[0],"-t")==0) {
// test mode
write_testmode= true;
if(argv[0][2]==0)
fprintf(stderr,"osmchange: Entering test mode.\n");
else {
loglevel= argv[0][2]-'0';
if(loglevel<1) loglevel= 1;
if(loglevel>MAXLOGLEVEL) loglevel= MAXLOGLEVEL;
fprintf(stderr,"osmchange: Entering loglevel %i.\n",loglevel);
}
continue; // take next parameter
}
if(argv[0][0]=='-' && argv[0][1]=='h' && isdig(argv[0][2])) {
// "-h...": user wants a specific hash size;
// note that we accept "-h" only if it is continued by a
// digit, so that a plain "-h" would not be recognized
// and therefore print the help text;
const char* p;
p= argv[0]+2; // jump over "-h"
h_n= h_w= h_r= 0;
// read the up to three values for hash tables' size;
// format examples: "-h200-20-10", "-h1200"
while(isdig(*p)) { h_n= h_n*10+*p-'0'; p++; }
if(*p!=0) { p++; while(isdig(*p)) { h_w= h_w*10+*p-'0'; p++; } }
if(*p!=0) { p++; while(isdig(*p)) { h_r= h_r*10+*p-'0'; p++; } }
continue; // take next parameter
}
if(strzcmp(argv[0],"-b=")==0) {
// border consideration by a bounding box
if(!border_box(argv[0]+3)) {
fprintf(stderr,"osmchange: Use border format: "
" -b\"x1,y1,x2,y2\"\n");
return 3;
} // end border consideration by a bounding box
continue; // take next parameter
}
if(strzcmp(argv[0],"-B=")==0) {
// border consideration by polygon file
if(!border_file(argv[0]+3)) {
fprintf(stderr,
"osmchange: No polygon file or too large: %s\n",
argv[0]);
return 4;
} // end border consideration by polygon file
continue; // take next parameter
}
if(strzcmp(argv[0],"-i=")==0 && riend==NULL) {
// use input file instead of stdin AND
// no file has been opened yet
riend= rifiles;
*riend= read_open(argv[0]+3);
if(*riend==NULL) {
fprintf(stderr,"osmchange: Could not open: %s\n",argv[0]+3);
return 1;
}
riend++;
continue; // take next parameter
}
if(riend>=rifiles+(sizeof(rifiles)/sizeof(rifiles[0]))-2) {
fprintf(stderr,"osmchange: Too many input files.\n");
return 11;
}
// here: not a parameter; hence we assume, it's a .osc filename
if(riend==NULL) { // no file has been opended yet
// assume that .osm file shall be read from stdin;
riend= rifiles;
*riend= read_open(NULL);
if(*riend==NULL) {
fprintf(stderr,"osmchange: Could not open standard input.\n");
return 1;
}
riend++;
}
*riend= read_open(argv[0]);
if(*riend==NULL) {
fprintf(stderr,"osmchange: Could not open: %s\n",argv[0]);
return 2;
}
riend++;
} // end for every parameter in command line
} // end read command line parameters
// open stdin in case it has not been opened yet
if(riend==NULL) { // no file has been opended yet
riend= rifiles;
*riend= read_open(NULL);
if(*riend==NULL) {
fprintf(stderr,"osmchange: Could not open standard input.\n");
return 1;
}
riend++;
}
*riend= NULL; // terminate list of pointers to read info structures
// process parameters
if(border_active) { // user wants borders
int r;
if(h_n==0) h_n= 400; // use standard value if not set otherwise
if(h_w==0 && h_r==0) {
// user chose simple form for hash memory value
// take the one given value as reference and determine the
// three values using these factors: 80%, 15%, 5%
h_w= h_n/5; h_r= h_n/20;
h_n-= h_w; h_w-= h_r; }
r= hash_ini(h_n,h_w,h_r); // initialize hash table
if(r==1)
fprintf(stderr,"osmchange: Hash size had to be reduced.\n");
else if(r==2)
fprintf(stderr,"osmchange: Not enough memory for hash.\n");
} // end user wants borders
// process input data
for(;;) { // until all files have been processed
bool e;
// read all files until the next relevant object
e= read_allinput(rifiles);
if(e)
break;
// set write flag for that file in list which has encountered the
// object with the lowest type/id; if there are more than one of
// these files, take the file with the highest index in file list;
read_setwriteflag(rifiles);
} // end until all files have been processed
// add bounding tag for the OSM file: ""
write_stdout('<'); write_stdout('/');
write_stdout('o'); write_stdout('s'); write_stdout('m');
write_stdout('>');
write_stdout_nl();
write_flush(); // flush write buffer
if(read_queryerror()!=0)
return 21;
if(hash_queryerror()!=0)
return 12;
return 0;
} // end main()