mirror of
https://github.com/git/git.git
synced 2024-05-19 04:46:11 +02:00
7531e4b66e
Make parallel checkout configurable by introducing two new settings: checkout.workers and checkout.thresholdForParallelism. The first defines the number of workers (where one means sequential checkout), and the second defines the minimum number of entries to attempt parallel checkout. To decide the default value for checkout.workers, the parallel version was benchmarked during three operations in the linux repo, with cold cache: cloning v5.8, checking out v5.8 from v2.6.15 (checkout I) and checking out v5.8 from v5.7 (checkout II). The four tables below show the mean run times and standard deviations for 5 runs in: a local file system on SSD, a local file system on HDD, a Linux NFS server, and Amazon EFS (all on Linux). Each parallel checkout test was executed with the number of workers that brings the best overall results in that environment. Local SSD: Sequential 10 workers Speedup Clone 8.805 s ± 0.043 s 3.564 s ± 0.041 s 2.47 ± 0.03 Checkout I 9.678 s ± 0.057 s 4.486 s ± 0.050 s 2.16 ± 0.03 Checkout II 5.034 s ± 0.072 s 3.021 s ± 0.038 s 1.67 ± 0.03 Local HDD: Sequential 10 workers Speedup Clone 32.288 s ± 0.580 s 30.724 s ± 0.522 s 1.05 ± 0.03 Checkout I 54.172 s ± 7.119 s 54.429 s ± 6.738 s 1.00 ± 0.18 Checkout II 40.465 s ± 2.402 s 38.682 s ± 1.365 s 1.05 ± 0.07 Linux NFS server (v4.1, on EBS, single availability zone): Sequential 32 workers Speedup Clone 240.368 s ± 6.347 s 57.349 s ± 0.870 s 4.19 ± 0.13 Checkout I 242.862 s ± 2.215 s 58.700 s ± 0.904 s 4.14 ± 0.07 Checkout II 65.751 s ± 1.577 s 23.820 s ± 0.407 s 2.76 ± 0.08 EFS (v4.1, replicated over multiple availability zones): Sequential 32 workers Speedup Clone 922.321 s ± 2.274 s 210.453 s ± 3.412 s 4.38 ± 0.07 Checkout I 1011.300 s ± 7.346 s 297.828 s ± 0.964 s 3.40 ± 0.03 Checkout II 294.104 s ± 1.836 s 126.017 s ± 1.190 s 2.33 ± 0.03 The above benchmarks show that parallel checkout is most effective on repositories located on an SSD or over a distributed file system. For local file systems on spinning disks, and/or older machines, the parallelism does not always bring a good performance. For this reason, the default value for checkout.workers is one, a.k.a. sequential checkout. To decide the default value for checkout.thresholdForParallelism, another benchmark was executed in the "Local SSD" setup, where parallel checkout showed to be beneficial. This time, we compared the runtime of a `git checkout -f`, with and without parallelism, after randomly removing an increasing number of files from the Linux working tree. The "sequential fallback" column below corresponds to the executions where checkout.workers was 10 but checkout.thresholdForParallelism was equal to the number of to-be-updated files plus one (so that we end up writing sequentially). Each test case was sampled 15 times, and each sample had a randomly different set of files removed. Here are the results: sequential fallback 10 workers speedup 10 files 772.3 ms ± 12.6 ms 769.0 ms ± 13.6 ms 1.00 ± 0.02 20 files 780.5 ms ± 15.8 ms 775.2 ms ± 9.2 ms 1.01 ± 0.02 50 files 806.2 ms ± 13.8 ms 767.4 ms ± 8.5 ms 1.05 ± 0.02 100 files 833.7 ms ± 21.4 ms 750.5 ms ± 16.8 ms 1.11 ± 0.04 200 files 897.6 ms ± 30.9 ms 730.5 ms ± 14.7 ms 1.23 ± 0.05 500 files 1035.4 ms ± 48.0 ms 677.1 ms ± 22.3 ms 1.53 ± 0.09 1000 files 1244.6 ms ± 35.6 ms 654.0 ms ± 38.3 ms 1.90 ± 0.12 2000 files 1488.8 ms ± 53.4 ms 658.8 ms ± 23.8 ms 2.26 ± 0.12 From the above numbers, 100 files seems to be a reasonable default value for the threshold setting. Note: Up to 1000 files, we observe a drop in the execution time of the parallel code with an increase in the number of files. This is a rather odd behavior, but it was observed in multiple repetitions. Above 1000 files, the execution time increases according to the number of files, as one would expect. About the test environments: Local SSD tests were executed on an i7-7700HQ (4 cores with hyper-threading) running Manjaro Linux. Local HDD tests were executed on an Intel(R) Xeon(R) E3-1230 (also 4 cores with hyper-threading), HDD Seagate Barracuda 7200.14 SATA 3.1, running Debian. NFS and EFS tests were executed on an Amazon EC2 c5n.xlarge instance, with 4 vCPUs. The Linux NFS server was running on a m6g.large instance with 2 vCPUSs and a 1 TB EBS GP2 volume. Before each timing, the linux repository was removed (or checked out back to its previous state), and `sync && sysctl vm.drop_caches=3` was executed. Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br> Signed-off-by: Junio C Hamano <gitster@pobox.com>
109 lines
3.1 KiB
C
109 lines
3.1 KiB
C
#ifndef PARALLEL_CHECKOUT_H
|
|
#define PARALLEL_CHECKOUT_H
|
|
|
|
#include "convert.h"
|
|
|
|
struct cache_entry;
|
|
struct checkout;
|
|
|
|
/****************************************************************
|
|
* Users of parallel checkout
|
|
****************************************************************/
|
|
|
|
enum pc_status {
|
|
PC_UNINITIALIZED = 0,
|
|
PC_ACCEPTING_ENTRIES,
|
|
PC_RUNNING,
|
|
};
|
|
|
|
enum pc_status parallel_checkout_status(void);
|
|
void get_parallel_checkout_configs(int *num_workers, int *threshold);
|
|
|
|
/*
|
|
* Put parallel checkout into the PC_ACCEPTING_ENTRIES state. Should be used
|
|
* only when in the PC_UNINITIALIZED state.
|
|
*/
|
|
void init_parallel_checkout(void);
|
|
|
|
/*
|
|
* Return -1 if parallel checkout is currently not accepting entries or if the
|
|
* entry is not eligible for parallel checkout. Otherwise, enqueue the entry
|
|
* for later write and return 0.
|
|
*/
|
|
int enqueue_checkout(struct cache_entry *ce, struct conv_attrs *ca);
|
|
|
|
/*
|
|
* Write all the queued entries, returning 0 on success. If the number of
|
|
* entries is smaller than the specified threshold, the operation is performed
|
|
* sequentially.
|
|
*/
|
|
int run_parallel_checkout(struct checkout *state, int num_workers, int threshold);
|
|
|
|
/****************************************************************
|
|
* Interface with checkout--worker
|
|
****************************************************************/
|
|
|
|
enum pc_item_status {
|
|
PC_ITEM_PENDING = 0,
|
|
PC_ITEM_WRITTEN,
|
|
/*
|
|
* The entry could not be written because there was another file
|
|
* already present in its path or leading directories. Since
|
|
* checkout_entry_ca() removes such files from the working tree before
|
|
* enqueueing the entry for parallel checkout, it means that there was
|
|
* a path collision among the entries being written.
|
|
*/
|
|
PC_ITEM_COLLIDED,
|
|
PC_ITEM_FAILED,
|
|
};
|
|
|
|
struct parallel_checkout_item {
|
|
/*
|
|
* In main process ce points to a istate->cache[] entry. Thus, it's not
|
|
* owned by us. In workers they own the memory, which *must be* released.
|
|
*/
|
|
struct cache_entry *ce;
|
|
struct conv_attrs ca;
|
|
size_t id; /* position in parallel_checkout.items[] of main process */
|
|
|
|
/* Output fields, sent from workers. */
|
|
enum pc_item_status status;
|
|
struct stat st;
|
|
};
|
|
|
|
/*
|
|
* The fixed-size portion of `struct parallel_checkout_item` that is sent to the
|
|
* workers. Following this will be 2 strings: ca.working_tree_encoding and
|
|
* ce.name; These are NOT null terminated, since we have the size in the fixed
|
|
* portion.
|
|
*
|
|
* Note that not all fields of conv_attrs and cache_entry are passed, only the
|
|
* ones that will be required by the workers to smudge and write the entry.
|
|
*/
|
|
struct pc_item_fixed_portion {
|
|
size_t id;
|
|
struct object_id oid;
|
|
unsigned int ce_mode;
|
|
enum convert_crlf_action crlf_action;
|
|
int ident;
|
|
size_t working_tree_encoding_len;
|
|
size_t name_len;
|
|
};
|
|
|
|
/*
|
|
* The fields of `struct parallel_checkout_item` that are returned by the
|
|
* workers. Note: `st` must be the last one, as it is omitted on error.
|
|
*/
|
|
struct pc_item_result {
|
|
size_t id;
|
|
enum pc_item_status status;
|
|
struct stat st;
|
|
};
|
|
|
|
#define PC_ITEM_RESULT_BASE_SIZE offsetof(struct pc_item_result, st)
|
|
|
|
void write_pc_item(struct parallel_checkout_item *pc_item,
|
|
struct checkout *state);
|
|
|
|
#endif /* PARALLEL_CHECKOUT_H */
|