From abfef3bbf5f637c86032763632393ce1ffd23ccc Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 16 Apr 2014 23:54:05 -0700 Subject: [PATCH] git-svn: only look at the new parts of svn:mergeinfo In a Subversion repository where many feature branches are merged into a trunk, the svn:mergeinfo property can grow very large. This severely slows down git-svn's make_log_entry() because it is checking all mergeinfo entries every time the property changes. In most cases, the additions to svn:mergeinfo since the last commit are pretty small, and there is nothing to gain by checking merges that were already checked for the last commit in the branch. Add a mergeinfo_changes() function which computes the set of interesting changes to svn:mergeinfo since the last commit. Filter out merged branches whose ranges haven't changed, and remove a common prefix of ranges from other merged branches. This speeds up "git svn fetch" by several orders of magnitude on a large repository where thousands of feature branches have been merged. Signed-off-by: Jakob Stoklund Olesen Signed-off-by: Eric Wong --- perl/Git/SVN.pm | 84 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 72 insertions(+), 12 deletions(-) diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index 09cff135ef..abd51eadd2 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -1178,7 +1178,7 @@ sub find_parent_branch { or die "SVN connection failed somewhere...\n"; } print STDERR "Successfully followed parent\n" unless $::_q > 1; - return $self->make_log_entry($rev, [$parent], $ed); + return $self->make_log_entry($rev, [$parent], $ed, $r0, $branch_from); } return undef; } @@ -1210,7 +1210,7 @@ sub do_fetch { unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) { die "SVN connection failed somewhere...\n"; } - $self->make_log_entry($rev, \@parents, $ed); + $self->make_log_entry($rev, \@parents, $ed, $last_rev); } sub mkemptydirs { @@ -1478,9 +1478,9 @@ sub find_extra_svk_parents { sub lookup_svn_merge { my $uuid = shift; my $url = shift; - my $merge = shift; + my $source = shift; + my $revs = shift; - my ($source, $revs) = split ":", $merge; my $path = $source; $path =~ s{^/}{}; my $gs = Git::SVN->find_by_url($url.$source, $url, $path); @@ -1702,6 +1702,62 @@ sub parents_exclude { return @excluded; } +# Compute what's new in svn:mergeinfo. +sub mergeinfo_changes { + my ($self, $old_path, $old_rev, $path, $rev, $mergeinfo_prop) = @_; + my %minfo = map {split ":", $_ } split "\n", $mergeinfo_prop; + my $old_minfo = {}; + + # Initialize cache on the first call. + unless (defined $self->{cached_mergeinfo_rev}) { + $self->{cached_mergeinfo_rev} = {}; + $self->{cached_mergeinfo} = {}; + } + + my $cached_rev = $self->{cached_mergeinfo_rev}{$old_path}; + if (defined $cached_rev && $cached_rev == $old_rev) { + $old_minfo = $self->{cached_mergeinfo}{$old_path}; + } else { + my $ra = $self->ra; + # Give up if $old_path isn't in the repo. + # This is probably a merge on a subtree. + if ($ra->check_path($old_path, $old_rev) != $SVN::Node::dir) { + warn "W: ignoring svn:mergeinfo on $old_path, ", + "directory didn't exist in r$old_rev\n"; + return {}; + } + my (undef, undef, $props) = + $self->ra->get_dir($old_path, $old_rev); + if (defined $props->{"svn:mergeinfo"}) { + my %omi = map {split ":", $_ } split "\n", + $props->{"svn:mergeinfo"}; + $old_minfo = \%omi; + } + $self->{cached_mergeinfo}{$old_path} = $old_minfo; + $self->{cached_mergeinfo_rev}{$old_path} = $old_rev; + } + + # Cache the new mergeinfo. + $self->{cached_mergeinfo}{$path} = \%minfo; + $self->{cached_mergeinfo_rev}{$path} = $rev; + + my %changes = (); + foreach my $p (keys %minfo) { + my $a = $old_minfo->{$p} || ""; + my $b = $minfo{$p}; + # Omit merged branches whose ranges lists are unchanged. + next if $a eq $b; + # Remove any common range list prefix. + ($a ^ $b) =~ /^[\0]*/; + my $common_prefix = rindex $b, ",", $+[0] - 1; + $changes{$p} = substr $b, $common_prefix + 1; + } + print STDERR "Checking svn:mergeinfo changes since r$old_rev: ", + scalar(keys %minfo), " sources, ", + scalar(keys %changes), " changed\n"; + + return \%changes; +} # note: this function should only be called if the various dirprops # have actually changed @@ -1715,14 +1771,15 @@ sub find_extra_svn_parents { # history. Then, we figure out which git revisions are in # that tip, but not this revision. If all of those revisions # are now marked as merge, we can add the tip as a parent. - my @merges = split "\n", $mergeinfo; + my @merges = sort keys %$mergeinfo; my @merge_tips; my $url = $self->url; my $uuid = $self->ra_uuid; my @all_ranges; for my $merge ( @merges ) { my ($tip_commit, @ranges) = - lookup_svn_merge( $uuid, $url, $merge ); + lookup_svn_merge( $uuid, $url, + $merge, $mergeinfo->{$merge} ); unless (!$tip_commit or grep { $_ eq $tip_commit } @$parents ) { push @merge_tips, $tip_commit; @@ -1738,8 +1795,9 @@ sub find_extra_svn_parents { # check merge tips for new parents my @new_parents; for my $merge_tip ( @merge_tips ) { - my $spec = shift @merges; + my $merge = shift @merges; next unless $merge_tip and $excluded{$merge_tip}; + my $spec = "$merge:$mergeinfo->{$merge}"; # check out 'new' tips my $merge_base; @@ -1770,7 +1828,7 @@ sub find_extra_svn_parents { .@incomplete." commit(s) (eg $incomplete[0])\n"; } else { warn - "Found merge parent (svn:mergeinfo prop): ", + "Found merge parent ($spec): ", $merge_tip, "\n"; push @new_parents, $merge_tip; } @@ -1797,7 +1855,7 @@ sub find_extra_svn_parents { } sub make_log_entry { - my ($self, $rev, $parents, $ed) = @_; + my ($self, $rev, $parents, $ed, $parent_rev, $parent_path) = @_; my $untracked = $self->get_untracked($ed); my @parents = @$parents; @@ -1809,10 +1867,12 @@ sub make_log_entry { ($ed, $props->{"svk:merge"}, \@parents); } if ( $props->{"svn:mergeinfo"} ) { + my $mi_changes = $self->mergeinfo_changes + ($parent_path || $path, $parent_rev, + $path, $rev, + $props->{"svn:mergeinfo"}); $self->find_extra_svn_parents - ($ed, - $props->{"svn:mergeinfo"}, - \@parents); + ($ed, $mi_changes, \@parents); } }