diff --git a/.rubocop.yml b/.rubocop.yml index 4cc76166b..940182bf5 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,7 +1,9 @@ require: - rubocop/rspec/focused - - ./lib/cops/unscoped.rb + - ./lib/cops/add_concurrent_index.rb - ./lib/cops/application_name.rb + - ./lib/cops/unscoped.rb + inherit_gem: rubocop-rails_config: - config/rails.yml @@ -14,7 +16,7 @@ AllCops: - "node_modules/**/*" - "vendor/**/*" -DS/Unscoped: +DS/AddConcurrentIndex: Enabled: true DS/ApplicationName: @@ -25,6 +27,9 @@ DS/ApplicationName: - './lib/linters/application_name_linter.rb' - "./spec/**/*" +DS/Unscoped: + Enabled: true + Bundler/DuplicatedGem: Enabled: true diff --git a/app/lib/database/migration_helpers.rb b/app/lib/database/migration_helpers.rb new file mode 100644 index 000000000..07061d888 --- /dev/null +++ b/app/lib/database/migration_helpers.rb @@ -0,0 +1,161 @@ +# Some of this file is lifted from Gitlab's `lib/gitlab/database/migration_helpers.rb`` + +# Copyright (c) 2011-present GitLab B.V. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +module Database::MigrationHelpers + # Given a combination of columns, return the records that appear twice of more + # with the same values. + # + # Returns tuples of ids. + # + # Example: + # + # find_duplicates :tags, [:post_id, :label] + # # [[7, 3], [1, 9, 4]] + def find_duplicates(table_name, column_names) + str_column_names = column_names.map(&:to_s) + columns = str_column_names.join(', ') + t_columns = str_column_names.map { |c| "t.#{c}" }.join(', ') + + duplicates = execute <<-SQL.squish + SELECT t.id, #{t_columns} + FROM #{table_name} t + INNER JOIN ( + SELECT #{columns}, COUNT(*) + FROM #{table_name} + GROUP BY #{columns} + HAVING COUNT(*) > 1 + ) dt + ON #{column_names.map { |c| "t.#{c} = dt.#{c}" }.join(' AND ')} + SQL + + grouped_duplicates = duplicates + .group_by { |r| r.values_at(*str_column_names) } + .values + + # Return the duplicate ids only (instead of a heavier record) + grouped_duplicates.map do |records| + records.map { |r| r["id"] } + end + end + + # Given a combination of columns, delete the records that appear twice of more + # with the same values. + # + # The first record found is kept, and the other are discarded. + # + # Example: + # + # delete_duplicates :tags, [:post_id, :label] + def delete_duplicates(table_name, column_names) + duplicates = nil + disable_statement_timeout do + duplicates = find_duplicates(table_name, column_names) + end + + duplicates.each do |ids| + duplicate_ids = ids.drop(1) # drop all records except the first + execute "DELETE FROM #{table_name} WHERE (#{table_name}.id IN (#{duplicate_ids.join(', ')}))" + end + end + + # Creates a new index, concurrently + # + # Example: + # + # add_concurrent_index :users, :some_column + # + # See Rails' `add_index` for more info on the available arguments. + def add_concurrent_index(table_name, column_name, **options) + if transaction_open? + raise 'add_concurrent_index can not be run inside a transaction, ' \ + 'you can disable transactions by calling disable_ddl_transaction! ' \ + 'in the body of your migration class' + end + + options = options.merge({ algorithm: :concurrently }) + + if index_exists?(table_name, column_name, **options) + Rails.logger.warn "Index not created because it already exists (this may be due to an aborted migration or similar): table_name: #{table_name}, column_name: #{column_name}" + return + end + + disable_statement_timeout do + add_index(table_name, column_name, **options) + end + end + + private + + def statement_timeout_disabled? + # This is a string of the form "100ms" or "0" when disabled + connection.select_value('SHOW statement_timeout') == "0" + end + + # Long-running migrations may take more than the timeout allowed by + # the database. Disable the session's statement timeout to ensure + # migrations don't get killed prematurely. + # + # There are two possible ways to disable the statement timeout: + # + # - Per transaction (this is the preferred and default mode) + # - Per connection (requires a cleanup after the execution) + # + # When using a per connection disable statement, code must be inside + # a block so we can automatically execute `RESET ALL` after block finishes + # otherwise the statement will still be disabled until connection is dropped + # or `RESET ALL` is executed + def disable_statement_timeout + if block_given? + if statement_timeout_disabled? + # Don't do anything if the statement_timeout is already disabled + # Allows for nested calls of disable_statement_timeout without + # resetting the timeout too early (before the outer call ends) + yield + else + begin + execute('SET statement_timeout TO 0') + + yield + ensure + execute('RESET ALL') + end + end + else + unless transaction_open? + raise <<~ERROR + Cannot call disable_statement_timeout() without a transaction open or outside of a transaction block. + If you don't want to use a transaction wrap your code in a block call: + + disable_statement_timeout { # code that requires disabled statement here } + + This will make sure statement_timeout is disabled before and reset after the block execution is finished. + ERROR + end + + execute('SET LOCAL statement_timeout TO 0') + end + end + + def execute(sql) + ActiveRecord::Base.connection.execute(sql) + end +end diff --git a/lib/cops/add_concurrent_index.rb b/lib/cops/add_concurrent_index.rb new file mode 100644 index 000000000..426d6165d --- /dev/null +++ b/lib/cops/add_concurrent_index.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +# Copyright (c) 2011-present GitLab B.V. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +if defined?(RuboCop) + module RuboCop + module Cop + module DS + # Cop that checks if `add_concurrent_index` is used with `up`/`down` methods + # and not `change`. + class AddConcurrentIndex < Cop + MSG = '`add_concurrent_index` is not reversible so you must manually define ' \ + 'the `up` and `down` methods in your migration class, using `remove_index` in `down`' + + def on_send(node) + dirname = File.dirname(node.location.expression.source_buffer.name) + return unless dirname.end_with?('db/migrate') + + name = node.children[1] + + return unless name == :add_concurrent_index + + node.each_ancestor(:def) do |def_node| + next unless method_name(def_node) == :change + + add_offense(def_node, location: :name) + end + end + + def method_name(node) + node.children.first + end + end + end + end + end +end diff --git a/spec/lib/database/migration_helpers_spec.rb b/spec/lib/database/migration_helpers_spec.rb new file mode 100644 index 000000000..4fab02099 --- /dev/null +++ b/spec/lib/database/migration_helpers_spec.rb @@ -0,0 +1,133 @@ +describe Database::MigrationHelpers do + class TestLabel < ApplicationRecord + end + + before(:all) do + ActiveRecord::Migration.suppress_messages do + ActiveRecord::Migration.create_table "test_labels", force: true do |t| + t.string :label + t.integer :user_id + end + end + end + + before(:each) do + # User 1 labels + TestLabel.create({ id: 1, label: 'Important', user_id: 1 }) + TestLabel.create({ id: 2, label: 'Urgent', user_id: 1 }) + TestLabel.create({ id: 3, label: 'Done', user_id: 1 }) + TestLabel.create({ id: 4, label: 'Bug', user_id: 1 }) + + # User 2 labels + TestLabel.create({ id: 5, label: 'Important', user_id: 2 }) + TestLabel.create({ id: 6, label: 'Critical', user_id: 2 }) + + # Duplicates + TestLabel.create({ id: 7, label: 'Urgent', user_id: 1 }) + TestLabel.create({ id: 8, label: 'Important', user_id: 2 }) + end + + after(:all) do + ActiveRecord::Migration.suppress_messages do + ActiveRecord::Migration.drop_table :test_labels, force: true + end + end + + let(:model) { ActiveRecord::Migration.new.extend(Database::MigrationHelpers) } + + describe '.find_duplicates' do + context 'using a single column for uniqueness' do + subject do + model.find_duplicates(:test_labels, [:label]) + end + + it 'finds duplicates' do + expect(subject.length).to eq 2 + end + + it 'finds three labels with "Important"' do + expect(subject).to include [1, 5, 8] + end + + it 'finds two labels with "Urgent"' do + expect(subject).to include [2, 7] + end + end + + context 'using multiple columns for uniqueness' do + subject do + model.find_duplicates(:test_labels, [:label, :user_id]) + end + + it 'finds duplicates' do + expect(subject.length).to eq 2 + end + + it 'finds two labels with "Important" for user 2' do + expect(subject).to include [5, 8] + end + + it 'finds two labels with "Urgent" for user 1' do + expect(subject).to include [2, 7] + end + end + end + + describe '.delete_duplicates' do + subject do + model.delete_duplicates(:test_labels, [:label]) + end + + it 'keeps the first item, and delete the others' do + expect { subject }.to change(TestLabel, :count).by(-3) + expect(TestLabel.where(label: 'Critical').count).to eq(1) + expect(TestLabel.where(label: 'Important').count).to eq(1) + expect(TestLabel.where(label: 'Urgent').count).to eq(1) + expect(TestLabel.where(label: 'Bug').count).to eq(1) + expect(TestLabel.where(label: 'Done').count).to eq(1) + end + end + + describe '.add_concurrent_index' do + let(:model) { ActiveRecord::Migration.new.extend(Database::MigrationHelpers) } + + context 'outside a transaction' do + before do + model.verbose = false + allow(model).to receive(:transaction_open?).and_return(false) + allow(model).to receive(:disable_statement_timeout).and_call_original + end + + it 'creates the index concurrently' do + expect(model).to receive(:add_index) + .with(:users, :foo, algorithm: :concurrently) + + model.add_concurrent_index(:users, :foo) + end + + it 'creates unique index concurrently' do + expect(model).to receive(:add_index) + .with(:users, :foo, { algorithm: :concurrently, unique: true }) + + model.add_concurrent_index(:users, :foo, unique: true) + end + + it 'does nothing if the index exists already' do + expect(model).to receive(:index_exists?) + .with(:users, :foo, { algorithm: :concurrently, unique: true }).and_return(true) + expect(model).not_to receive(:add_index) + + model.add_concurrent_index(:users, :foo, unique: true) + end + end + + context 'inside a transaction' do + it 'raises RuntimeError' do + expect(model).to receive(:transaction_open?).and_return(true) + + expect { model.add_concurrent_index(:users, :foo) } + .to raise_error(RuntimeError) + end + end + end +end