split_node_tags: New migration (in C).

This commit is contained in:
Gabriel Ebner 2008-05-03 14:32:30 +00:00
parent 7eb181999d
commit 4db3faa68c
8 changed files with 300 additions and 76 deletions

View file

@ -1,6 +1,6 @@
class AddUserPreferenceId < ActiveRecord::Migration
def self.up
add_column "user_preferences", "id", :bigint, :limit => 64, :null => false, :options => "AUTO_INCREMENT"
add_column "user_preferences", "id", :bigint, :limit => 64, :null => false
add_index "user_preferences", ["id"], :name => "user_preferences_id_idx"
end

View file

@ -1,18 +0,0 @@
class CreateNodeTags < ActiveRecord::Migration
def self.up
create_table "current_node_tags", myisam_table do |t|
t.column "id", :bigint, :limit => 64, :null => false
t.column "sequence_id", :bigint, :limit => 11, :null => false
t.column "k", :string, :default => "", :null => false
t.column "v", :string, :default => "", :null => false
end
add_primary_key "current_node_tags", ["id", "sequence_id"]
execute "CREATE FULLTEXT INDEX `current_node_tags_v_idx` ON `current_node_tags` (`v`)"
end
def self.down
drop_table :current_node_tags
end
end

View file

@ -1,17 +0,0 @@
class CreateOldNodeTags < ActiveRecord::Migration
def self.up
create_table "node_tags", myisam_table do |t|
t.column "id", :bigint, :limit => 64, :default => 0, :null => false
t.column "version", :bigint, :limit => 20, :null => false
t.column "sequence_id", :bigint, :limit => 11, :null => false
t.column "k", :string, :null => false
t.column "v", :string, :null => false
end
add_primary_key "node_tags", ["id", "version", "sequence_id"]
end
def self.down
drop_table :node_tags
end
end

View file

@ -0,0 +1,62 @@
class PopulateNodeTagsAndRemove < ActiveRecord::Migration
def self.up
have_nodes = select_value("SELECT count(*) FROM current_nodes").to_i != 0
if have_nodes
prefix = File.join Dir.tmpdir, "013_populate_node_tags_and_remove.#{$$}."
cmd = "db/migrate/013_populate_node_tags_and_remove_helper"
src = "#{cmd}.c"
if not File.exists? cmd or File.mtime(cmd) < File.mtime(src) then
system 'cc -O3 -Wall `mysql_config --cflags --libs` ' +
"#{src} -o #{cmd}" or fail
end
conn_opts = ActiveRecord::Base.connection.
instance_eval { @connection_options }
args = conn_opts.map { |arg| arg.to_s } + [prefix]
fail "#{cmd} failed" unless system cmd, *args
tempfiles = ['nodes', 'node_tags',
'current_nodes', 'current_node_tags'].
map { |base| prefix + base }
nodes, node_tags, current_nodes, current_node_tags = tempfiles
end
execute "TRUNCATE nodes"
remove_column :nodes, :tags
remove_column :current_nodes, :tags
add_column :nodes, :version, :bigint, :limit => 20, :null => false
create_table :current_node_tags, innodb_table do |t|
t.column :id, :bigint, :limit => 64, :null => false
t.column :k, :string, :default => "", :null => false
t.column :v, :string, :default => "", :null => false
end
create_table :node_tags, innodb_table do |t|
t.column :id, :bigint, :limit => 64, :null => false
t.column :version, :bigint, :limit => 20, :null => false
t.column :k, :string, :default => "", :null => false
t.column :v, :string, :default => "", :null => false
end
# now get the data back
csvopts = "FIELDS TERMINATED BY ',' ENCLOSED BY '\"' ESCAPED BY '\"' LINES TERMINATED BY '\\n'"
if have_nodes
execute "LOAD DATA INFILE '#{nodes}' INTO TABLE nodes #{csvopts} (id, latitude, longitude, user_id, visible, timestamp, tile, version)";
execute "LOAD DATA INFILE '#{node_tags}' INTO TABLE node_tags #{csvopts} (id, version, k, v)"
execute "LOAD DATA INFILE '#{current_node_tags}' INTO TABLE current_node_tags #{csvopts} (id, k, v)"
end
tempfiles.each { |fn| File.unlink fn } if have_nodes
end
def self.down
raise IrreversibleMigration.new
# add_column :nodes, "tags", :text, :default => "", :null => false
# add_column :current_nodes, "tags", :text, :default => "", :null => false
end
end

View file

@ -0,0 +1,234 @@
#include <mysql.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static void exit_mysql_err(MYSQL *mysql) {
const char *err = mysql_error(mysql);
if (err) {
fprintf(stderr, "013_populate_node_tags_and_remove_helper: MySQL error: %s\n", err);
} else {
fprintf(stderr, "013_populate_node_tags_and_remove_helper: MySQL error\n");
}
abort();
exit(EXIT_FAILURE);
}
static void write_csv_col(FILE *f, const char *str, char end) {
char *out = (char *) malloc(2 * strlen(str) + 4);
char *o = out;
size_t len;
*(o++) = '\"';
for (; *str; str++) {
if (*str == '\0') {
break;
} else if (*str == '\"') {
*(o++) = '\"';
*(o++) = '\"';
} else {
*(o++) = *str;
}
}
*(o++) = '\"';
*(o++) = end;
*(o++) = '\0';
len = strlen(out);
if (fwrite(out, len, 1, f) != 1) {
perror("fwrite");
exit(EXIT_FAILURE);
}
free(out);
}
static void unescape(char *str) {
char *i = str, *o = str;
while (*i) {
if (*i == '\\') {
i++;
switch (*i++) {
case 's': *o++ = ';'; break;
case 'e': *o++ = '='; break;
case '\\': *o++ = '\\'; break;
}
} else {
*o++ = *i++;
}
}
}
static int read_node_tags(char **tags, char **k, char **v) {
if (!**tags) return 0;
char *i = strchr(*tags, ';');
if (!i) i = *tags + strlen(*tags);
char *j = strchr(*tags, '=');
*k = *tags;
if (j && j < i) {
*v = j + 1;
} else {
*v = i;
}
*tags = *i ? i + 1 : i;
*i = '\0';
if (j) *j = '\0';
unescape(*k);
unescape(*v);
return 1;
}
struct data {
MYSQL *mysql;
size_t version_size;
uint32_t *version;
};
static void proc_nodes(struct data *d, const char *tbl, FILE *out, FILE *out_tags, int hist) {
MYSQL_RES *res;
MYSQL_ROW row;
char query[256];
snprintf(query, sizeof(query), "SELECT id, latitude, longitude, "
"user_id, visible, tags, timestamp, tile FROM %s", tbl);
if (mysql_query(d->mysql, query))
exit_mysql_err(d->mysql);
res = mysql_use_result(d->mysql);
if (!res) exit_mysql_err(d->mysql);
while ((row = mysql_fetch_row(res))) {
unsigned long id = strtoul(row[0], NULL, 10);
uint32_t version;
if (id > d->version_size) {
fprintf(stderr, "preallocated nodes size exceeded");
abort();
}
if (hist) {
version = ++(d->version[id]);
fprintf(out, "\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%u\"\n",
row[0], row[1], row[2], row[3], row[4], row[6], row[7], version);
} else {
/*fprintf(out, "\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\"\n",
row[0], row[1], row[2], row[3], row[4], row[6], row[7]);*/
}
char *tags_it = row[5], *k, *v;
while (read_node_tags(&tags_it, &k, &v)) {
if (hist) {
fprintf(out_tags, "\"%s\",\"%u\",", row[0], version);
} else {
fprintf(out_tags, "\"%s\",", row[0]);
}
write_csv_col(out_tags, k, ',');
write_csv_col(out_tags, v, '\n');
}
}
if (mysql_errno(d->mysql)) exit_mysql_err(d->mysql);
mysql_free_result(res);
}
static size_t select_size(MYSQL *mysql, const char *q) {
MYSQL_RES *res;
MYSQL_ROW row;
size_t ret;
if (mysql_query(mysql, q))
exit_mysql_err(mysql);
res = mysql_store_result(mysql);
if (!res) exit_mysql_err(mysql);
row = mysql_fetch_row(res);
if (!row) exit_mysql_err(mysql);
if (row[0]) {
ret = strtoul(row[0], NULL, 10);
} else {
ret = 0;
}
mysql_free_result(res);
return ret;
}
static MYSQL *connect_to_mysql(char **argv) {
MYSQL *mysql = mysql_init(NULL);
if (!mysql) exit_mysql_err(mysql);
if (!mysql_real_connect(mysql, argv[1], argv[2], argv[3], argv[4],
argv[5][0] ? atoi(argv[5]) : 0, argv[6][0] ? argv[6] : NULL, 0))
exit_mysql_err(mysql);
if (mysql_set_character_set(mysql, "utf8"))
exit_mysql_err(mysql);
return mysql;
}
static void open_file(FILE **f, char *fn) {
*f = fopen(fn, "w+");
if (!*f) {
perror("fopen");
exit(EXIT_FAILURE);
}
}
int main(int argc, char **argv) {
size_t prefix_len;
FILE *current_nodes, *current_node_tags, *nodes, *node_tags;
char *tempfn;
struct data data, *d = &data;
if (argc != 8) {
printf("Usage: 013_populate_node_tags_and_remove_helper host user passwd database port socket prefix\n");
exit(EXIT_FAILURE);
}
d->mysql = connect_to_mysql(argv);
d->version_size = 1 + select_size(d->mysql, "SELECT max(id) FROM current_nodes");
d->version = malloc(sizeof(uint32_t) * d->version_size);
prefix_len = strlen(argv[7]);
tempfn = (char *) malloc(prefix_len + 16);
strcpy(tempfn, argv[7]);
strcpy(tempfn + prefix_len, "current_nodes");
open_file(&current_nodes, tempfn);
strcpy(tempfn + prefix_len, "current_node_tags");
open_file(&current_node_tags, tempfn);
strcpy(tempfn + prefix_len, "nodes");
open_file(&nodes, tempfn);
strcpy(tempfn + prefix_len, "node_tags");
open_file(&node_tags, tempfn);
free(tempfn);
proc_nodes(d, "nodes", nodes, node_tags, 1);
proc_nodes(d, "current_nodes", current_nodes, current_node_tags, 0);
free(d->version);
mysql_close(d->mysql);
fclose(current_nodes);
fclose(current_node_tags);
fclose(nodes);
fclose(node_tags);
exit(EXIT_SUCCESS);
}

View file

@ -1,13 +0,0 @@
class PopulateNodeTagsAndRemove < ActiveRecord::Migration
def self.up
#rake import
#commented out to stop people from breaking their db
# remove_column :nodes, :tags
# remove_column :current_nodes, :tags
end
def self.down
# add_column :nodes, "tags", :text, :default => "", :null => false
# add_column :current_nodes, "tags", :text, :default => "", :null => false
end
end

View file

@ -1,24 +0,0 @@
class CreateTempOldNodes < ActiveRecord::Migration
def self.up
create_table "temp_nodes", myisam_table do |t|
t.column "id", :bigint, :limit => 64, :null => false
t.column "version", :bigint, :limit => 20, :null => false
t.column "latitude", :double, :null => false
t.column "longitude", :double, :null => false
t.column "user_id", :bigint, :limit => 20, :null => false
t.column "visible", :boolean, :null => false
t.column "timestamp", :datetime, :null => false
t.column "tile", :integer, :null => false
end
add_primary_key "temp_nodes", ["id", "version"]
add_index "temp_nodes", ["timestamp"], :name => "temp_nodes_timestamp_idx"
add_index "temp_nodes", ["tile"], :name => "temp_nodes_tile_idx"
change_column "temp_nodes", "version", :bigint, :limit => 20, :null => false, :options => "AUTO_INCREMENT"
end
def self.down
drop_table :temp_nodes
end
end