Fault Tolerance Interface
|
Functions for the FTI File Format (FTI-FF). More...
#include "interface.h"
Macros | |
#define | _GNU_SOURCE |
Functions | |
int | FTIFF_ReadDbFTIFF (FTIT_configuration *FTI_Conf, FTIT_execution *FTI_Exec, FTIT_checkpoint *FTI_Ckpt) |
Reads datablock structure for FTI File Format from ckpt file. More... | |
int | FTIFF_GetFileChecksum (FTIFF_metaInfo *FTIFF_Meta, FTIT_checkpoint *FTI_Ckpt, int fd, unsigned char *hash) |
Determines checksum of checkpoint data. More... | |
int | FTIFF_UpdateDatastructFTIFF (FTIT_execution *FTI_Exec, FTIT_dataset *FTI_Data, FTIT_configuration *FTI_Conf) |
updates datablock structure for FTI File Format. More... | |
int | FTIFF_WriteFTIFF (FTIT_configuration *FTI_Conf, FTIT_execution *FTI_Exec, FTIT_topology *FTI_Topo, FTIT_checkpoint *FTI_Ckpt, FTIT_dataset *FTI_Data) |
Writes ckpt to local/PFS using FTIFF. More... | |
int | FTIFF_CreateMetadata (FTIT_execution *FTI_Exec, FTIT_topology *FTI_Topo, FTIT_dataset *FTI_Data, FTIT_configuration *FTI_Conf) |
Assign meta data to runtime and file meta data types. More... | |
int | FTIFF_Recover (FTIT_execution *FTI_Exec, FTIT_dataset *FTI_Data, FTIT_checkpoint *FTI_Ckpt) |
Recovers protected data to the variable pointers for FTI-FF. More... | |
int | FTIFF_RecoverVar (int id, FTIT_execution *FTI_Exec, FTIT_dataset *FTI_Data, FTIT_checkpoint *FTI_Ckpt) |
Recovers protected data to the variable pointer with id. More... | |
int | FTIFF_CheckL1RecoverInit (FTIT_execution *FTI_Exec, FTIT_topology *FTI_Topo, FTIT_checkpoint *FTI_Ckpt) |
Init of FTI-FF L1 recovery. More... | |
int | FTIFF_CheckL2RecoverInit (FTIT_execution *FTI_Exec, FTIT_topology *FTI_Topo, FTIT_checkpoint *FTI_Ckpt, int *exists) |
Init of FTI-FF L2 recovery. More... | |
int | FTIFF_CheckL3RecoverInit (FTIT_execution *FTI_Exec, FTIT_topology *FTI_Topo, FTIT_checkpoint *FTI_Ckpt, int *erased) |
Init of FTI-FF L3 recovery. More... | |
int | FTIFF_CheckL4RecoverInit (FTIT_execution *FTI_Exec, FTIT_topology *FTI_Topo, FTIT_checkpoint *FTI_Ckpt) |
Init of FTI-FF L4 recovery. More... | |
void | FTIFF_GetHashMetaInfo (unsigned char *hash, FTIFF_metaInfo *FTIFFMeta) |
Computes hash of the FTI-FF file meta data structure. More... | |
void | FTIFF_GetHashdb (unsigned char *hash, FTIFF_db *db) |
Computes hash of the FTI-FF file data block meta data structure. More... | |
void | FTIFF_GetHashdbvar (unsigned char *hash, FTIFF_dbvar *dbvar) |
Computes hash of the FTI-FF data chunk meta data structure. More... | |
void | FTIFF_InitMpiTypes () |
Initializes the derived MPI data types used for FTI-FF. More... | |
int | FTIFF_DeserializeFileMeta (FTIFF_metaInfo *meta, char *buffer_ser) |
deserializes FTI-FF file meta data More... | |
int | FTIFF_DeserializeDbMeta (FTIFF_db *db, char *buffer_ser) |
deserializes FTI-FF file data block meta data More... | |
int | FTIFF_DeserializeDbVarMeta (FTIFF_dbvar *dbvar, char *buffer_ser) |
deserializes FTI-FF data chunk meta data More... | |
int | FTIFF_SerializeFileMeta (FTIFF_metaInfo *meta, char *buffer_ser) |
serializes FTI-FF file meta data More... | |
int | FTIFF_SerializeDbMeta (FTIFF_db *db, char *buffer_ser) |
serializes FTI-FF file data block meta data More... | |
int | FTIFF_SerializeDbVarMeta (FTIFF_dbvar *dbvar, char *buffer_ser) |
serializes FTI-FF data chunk meta data More... | |
void | FTIFF_FreeDbFTIFF (FTIFF_db *last) |
Frees allocated memory for the FTI-FF meta data struct list. More... | |
void | FTIFF_PrintDataStructure (int rank, FTIT_execution *FTI_Exec, FTIT_dataset *FTI_Data) |
Variables | |
MPI_Datatype | FTIFF_MpiTypes [FTIFF_NUM_MPI_TYPES] |
Functions for the FTI File Format (FTI-FF).
Copyright (c) 2017 Leonardo A. Bautista-Gomez All rights reserved
FTI - A multi-level checkpointing library for C/C++/Fortran applications
Revision 1.0 : Fault Tolerance Interface (FTI)
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define _GNU_SOURCE |
int FTIFF_CheckL1RecoverInit | ( | FTIT_execution * | FTI_Exec, |
FTIT_topology * | FTI_Topo, | ||
FTIT_checkpoint * | FTI_Ckpt | ||
) |
Init of FTI-FF L1 recovery.
FTI_Exec | Execution metadata. |
FTI_Topo | Topology metadata. |
FTI_Ckpt | Checkpoint metadata. |
This function initializes the L1 checkpoint recovery. It checks for erasures and loads the required meta data.
int FTIFF_CheckL2RecoverInit | ( | FTIT_execution * | FTI_Exec, |
FTIT_topology * | FTI_Topo, | ||
FTIT_checkpoint * | FTI_Ckpt, | ||
int * | exists | ||
) |
Init of FTI-FF L2 recovery.
FTI_Exec | Execution metadata. |
FTI_Topo | Topology metadata. |
FTI_Ckpt | Checkpoint metadata. |
exists | Array with info of erased files |
This function initializes the L2 checkpoint recovery. It checks for erasures and loads the required meta data.
int FTIFF_CheckL3RecoverInit | ( | FTIT_execution * | FTI_Exec, |
FTIT_topology * | FTI_Topo, | ||
FTIT_checkpoint * | FTI_Ckpt, | ||
int * | erased | ||
) |
Init of FTI-FF L3 recovery.
FTI_Exec | Execution metadata. |
FTI_Topo | Topology metadata. |
FTI_Ckpt | Checkpoint metadata. |
erased | Array with info of erased files |
This function initializes the L3 checkpoint recovery. It checks for erasures and loads the required meta data.
int FTIFF_CheckL4RecoverInit | ( | FTIT_execution * | FTI_Exec, |
FTIT_topology * | FTI_Topo, | ||
FTIT_checkpoint * | FTI_Ckpt | ||
) |
Init of FTI-FF L4 recovery.
FTI_Exec | Execution metadata. |
FTI_Topo | Topology metadata. |
FTI_Ckpt | Checkpoint metadata. |
checksum | Ckpt file checksum |
This function initializes the L4 checkpoint recovery. It checks for erasures and loads the required meta data.
int FTIFF_CreateMetadata | ( | FTIT_execution * | FTI_Exec, |
FTIT_topology * | FTI_Topo, | ||
FTIT_dataset * | FTI_Data, | ||
FTIT_configuration * | FTI_Conf | ||
) |
Assign meta data to runtime and file meta data types.
FTI_Conf | Configuration metadata. |
FTI_Exec | Execution metadata. |
FTI_Topo | Topology metadata. |
FTI_Data | Dataset metadata. |
This function gathers information about the checkpoint files in the group and stores it in the respective meta data types runtime and ckpt file.
int FTIFF_DeserializeDbMeta | ( | FTIFF_db * | db, |
char * | buffer_ser | ||
) |
deserializes FTI-FF file data block meta data
db | FTI-FF file data block meta data. |
buffer_ser | serialized file data block meta data. |
int FTIFF_DeserializeDbVarMeta | ( | FTIFF_dbvar * | dbvar, |
char * | buffer_ser | ||
) |
deserializes FTI-FF data chunk meta data
dbvar | FTI-FF data chunk meta data. |
buffer_ser | serialized data chunk meta data. |
int FTIFF_DeserializeFileMeta | ( | FTIFF_metaInfo * | meta, |
char * | buffer_ser | ||
) |
deserializes FTI-FF file meta data
meta | FTI-FF file meta data. |
buffer_ser | serialized file meta data. |
void FTIFF_FreeDbFTIFF | ( | FTIFF_db * | last | ) |
Frees allocated memory for the FTI-FF meta data struct list.
last | Last element in FTI-FF metadata list. |
int FTIFF_GetFileChecksum | ( | FTIFF_metaInfo * | FTIFF_Meta, |
FTIT_checkpoint * | FTI_Ckpt, | ||
int | fd, | ||
unsigned char * | hash | ||
) |
Determines checksum of checkpoint data.
FTIFF_Meta | FTI-FF file meta data. |
FTI_Ckpt | Checkpoint metadata. |
fd | file descriptor. |
hash | pointer to MD5 digest container. |
This function computes the FTI-FF file checksum and places the MD5 digest into the 'hash' buffer. The buffer has to be allocated for at least MD5_DIGEST_LENGTH bytes.
void FTIFF_GetHashdb | ( | unsigned char * | hash, |
FTIFF_db * | db | ||
) |
Computes hash of the FTI-FF file data block meta data structure.
hash | hash to compute. |
FTIFFMeta | file data block meta data. |
void FTIFF_GetHashdbvar | ( | unsigned char * | hash, |
FTIFF_dbvar * | dbvar | ||
) |
Computes hash of the FTI-FF data chunk meta data structure.
hash | hash to compute. |
dbvar | data chunk meta data. |
void FTIFF_GetHashMetaInfo | ( | unsigned char * | hash, |
FTIFF_metaInfo * | FTIFFMeta | ||
) |
Computes hash of the FTI-FF file meta data structure.
hash | hash to compute. |
FTIFFMeta | Ckpt file meta data. |
void FTIFF_InitMpiTypes | ( | ) |
Initializes the derived MPI data types used for FTI-FF.
+----------------------------------------------------------------------—+ | FUNCTION DECLARATIONS | +----------------------------------------------------------------------—+
void FTIFF_PrintDataStructure | ( | int | rank, |
FTIT_execution * | FTI_Exec, | ||
FTIT_dataset * | FTI_Data | ||
) |
int FTIFF_ReadDbFTIFF | ( | FTIT_configuration * | FTI_Conf, |
FTIT_execution * | FTI_Exec, | ||
FTIT_checkpoint * | FTI_Ckpt | ||
) |
Reads datablock structure for FTI File Format from ckpt file.
+-------------------------------------------------------------------——+ | FUNCTION DEFINITIONS | +-------------------------------------------------------------------——+
FTI_Exec | Execution metadata. |
FTI_Ckpt | Checkpoint metadata. |
Builds meta data list from checkpoint file for the FTI File Format
int FTIFF_Recover | ( | FTIT_execution * | FTI_Exec, |
FTIT_dataset * | FTI_Data, | ||
FTIT_checkpoint * | FTI_Ckpt | ||
) |
Recovers protected data to the variable pointers for FTI-FF.
FTI_Exec | Execution metadata. |
FTI_Ckpt | Checkpoint metadata. |
FTI_Data | Dataset metadata. |
This function restores the data of the protected variables to the state of the last checkpoint. The function is called by the API function 'FTI_Recover'.
int FTIFF_RecoverVar | ( | int | id, |
FTIT_execution * | FTI_Exec, | ||
FTIT_dataset * | FTI_Data, | ||
FTIT_checkpoint * | FTI_Ckpt | ||
) |
Recovers protected data to the variable pointer with id.
id | Id of protected variable. |
FTI_Exec | Execution metadata. |
FTI_Data | Dataset metadata. |
FTI_Ckpt | Checkpoint metadata. |
This function restores the data to the protected variable with given id as it was checkpointed during the last checkpoint. The function is called by the API function 'FTI_RecoverVar'.
int FTIFF_SerializeDbMeta | ( | FTIFF_db * | db, |
char * | buffer_ser | ||
) |
serializes FTI-FF file data block meta data
db | FTI-FF file data block meta data. |
buffer_ser | serialized file data block meta data. |
int FTIFF_SerializeDbVarMeta | ( | FTIFF_dbvar * | dbvar, |
char * | buffer_ser | ||
) |
serializes FTI-FF data chunk meta data
dbvar | FTI-FF data chunk meta data. |
buffer_ser | serialized data chunk meta data. |
int FTIFF_SerializeFileMeta | ( | FTIFF_metaInfo * | meta, |
char * | buffer_ser | ||
) |
serializes FTI-FF file meta data
meta | FTI-FF file meta data. |
buffer_ser | serialized file meta data. |
int FTIFF_UpdateDatastructFTIFF | ( | FTIT_execution * | FTI_Exec, |
FTIT_dataset * | FTI_Data, | ||
FTIT_configuration * | FTI_Conf | ||
) |
updates datablock structure for FTI File Format.
FTI_Exec | Execution metadata. |
FTI_Data | Dataset metadata. |
FTI_Conf | Configuration metadata. |
Updates information about the checkpoint file. Updates file pointers in the dbvar structures and updates the db structure.
int FTIFF_WriteFTIFF | ( | FTIT_configuration * | FTI_Conf, |
FTIT_execution * | FTI_Exec, | ||
FTIT_topology * | FTI_Topo, | ||
FTIT_checkpoint * | FTI_Ckpt, | ||
FTIT_dataset * | FTI_Data | ||
) |
Writes ckpt to local/PFS using FTIFF.
FTI_Conf | Configuration metadata. |
FTI_Exec | Execution metadata. |
FTI_Topo | Topology metadata. |
FTI_Ckpt | Checkpoint metadata. |
FTI_Data | Dataset metadata. |
+-----------—+ +---------------------—+ | | | | | FB | | VB | | | | | +-----------—+ +---------------------—+
The FB (file block) holds meta data related to the file whereas the VB (variable block) holds meta and actual data of the variables protected by FTI.
|<---------------------------------— VB ---------------------------------—>|
|<---------— VCB_1------------—>| |<---------— VCB_n------------—>|
+--------------------------------—+ +--------------------------------—+ | +----—++----—+ +----—+ | | +----—++----—+ +----—+ | | | || | | | | | | || | | | | | | VMB_1 || VC_11 | -— | VC_1k | | -— | | VMB_n || VC_n1 | -— | VC_nl | | | | || | | | | | | || | | | | | +----—++----—+ +----—+ | | +----—++----—+ +----—+ | +--------------------------------—+ +--------------------------------—+
VMB_i (FTIFF_db + FTIFF_dbvar structures) keeps the data block metadata and VC_ij are the data chunks.
MPI_Datatype FTIFF_MpiTypes[FTIFF_NUM_MPI_TYPES] |
+----------------------------------------------------------------------—+ | STATIC TYPE DECLARATIONS | +----------------------------------------------------------------------—+