1 ############## FTI CONFIGURATION FILE ###############
3 # *****************************************************************
4 # *** Here are the main parameters you should provide to FTI ******
5 # *****************************************************************
8 # Set to 1 if you want to dedicate 1 MPI rank per node to FTI
9 # set to 0 if you want ALL ckpt. post-processing to be done inline
12 # The number of processes launched per node (Same for every node)
13 # including FTI-dedicated process.
16 # LOCAL directory where the local checkpoints will be stored
17 # This directory MUST exist and have write access
18 Ckpt_dir = ./Local #/path/to/local/storage/
20 # GLOBAL directory where the global checkpoints will be stored
21 # This directory MUST exist and have write access
22 Glbl_dir = ./Global #/path/to/global/storage/
24 # GLOBAL directory where the FTI metadata will be stored
25 # This directory MUST exist and have write access
26 Meta_dir = ./Meta #/home/username/.fti
28 # Level 1 ckpt interval in minutes of L1 ckpts (Local write)
31 # Level 2 ckpt interval in minutes of L2 ckpts (Partner copy)
34 # Level 3 ckpt interval in minutes of L3 ckpts (Reed-Solomon)
37 # Level 4 ckpt interval in minutes of L4 ckpts (PFS write)
40 # dCP interval in minutes for level 4 checkpoints
41 # dCP - differential checkpointing
42 # This setting requires io_mode=3 (FTI-FF) and dcp_enabled=1
45 # 1 if Level 2 ckpt is inline (synchronous) 0 if not (asynchronous)
48 # 1 if Level 3 ckpt is inline (synchronous) 0 if not (asynchronous)
51 # 1 if Level 4 ckpt is inline (synchronous) 0 if not (asynchronous)
54 # Set to 1 if you want to save the last checkpoint taken before finalize
55 # Set to 0 if you want to erase all checkpoints after finalize
58 # Enabled, all level 4 checkpoints of the execution will be kept in 'Glbl_dir/l4_archive'
61 # The size of the encoding groups (Something between 4 and 16)
62 # The total number of nodes MUST be multiple of this parameter
65 # Number of iterations between iteration length sync (0 => 512 iterations)
66 # If you app has iterations of varying length set this value between (1 and 10)
77 # Enable staging feature
80 # Enable differential checkpointing (dCP)
83 # Select dCP hashing algorithm:
86 # The modes may be set as well by the environment variable 'FTI_DCP_HASH_MODE=[0|1]'
87 # This will overwrite the setting from the configuration file!
90 # Set hash-partition block size
91 # The partition block size, b, must be: 512 < b < USHRT_MAX (Bytes)
92 # b may be set as well by the environment variable 'FTI_DCP_BLOCK_SIZE=b (in bytes)'
93 # This will overwrite the setting from the configuration file!
94 dCP_Block_Size = 16384
96 # The verbosity of FTI. (2 is recommended)
97 # 3 (Print only errors, silent mode)
98 # 2 (Print errors and some few important information)
99 # 1 (Print debug messages, very verbose)
102 # *****************************************************************
103 # *** Change these parameters ONLY in case of restart ***********
104 # *****************************************************************
108 # Set this to 0 if you are launching this job for the first time
109 # Set this to 1 if you are recovering this job after a failure
112 # Set with the execution ID in case of restart after failure
113 # Set to NULL if normal execution
114 Exec_ID = XXXX-XX-XX_XX-XX-XX
117 # *****************************************************************
118 # *** Change these parameters to inject failures. ***********
119 # *****************************************************************
123 # Rank of the process that injects the failures
126 # Total number of bit-flips to inject
129 # Bit position of the injection
132 # Injection frequency in seconds
136 # *****************************************************************
137 # *** Change something here ONLY if you know what you are doing ***
138 # *****************************************************************
141 # The ckpt files are decomposed in blocks of size Block_size KB
144 # The ckpt files are transfered in chunks of size Transfer_size MB
148 # The tags for MPI communications done within the FTI library
154 # Set to 1 if you are doing a test in local in a single computer
157 #This option only impacts if -DENABLE_LUSTRE was added to the Cmake command.
158 #It sets the striping unit for the MPI-IO file.
159 lustre_striping_unit = 4194304
161 #This option only impacts if -DENABLE_LUSTRE was added to the Cmake command.
162 #It sets the striping factor for the MPI-IO file.
163 lustre_striping_factor = -1
165 #This option only impacts if -DENABLE_LUSTRE was added to the Cmake command.
166 #It sets the striping offset for the MPI-IO file.
167 lustre_striping_offset = -1