Docs

README

Binary Files in C

Table of Contents

  1. •Introduction to Binary Files
  2. •Binary vs Text Mode
  3. •Opening Files in Binary Mode
  4. •Writing Binary Data with fwrite()
  5. •Reading Binary Data with fread()
  6. •Storing Structures in Binary Files
  7. •Binary File Headers
  8. •Endianness Considerations
  9. •Portable Binary Files
  10. •Common Binary File Patterns
  11. •Best Practices
  12. •Common Pitfalls

Introduction to Binary Files

Binary files store data in the same format as it exists in memory - as raw bytes. Unlike text files that store human-readable characters, binary files store data in its native representation.

Why Use Binary Files?

  1. •Efficiency: Binary files are smaller and faster to read/write
  2. •Precision: No loss of precision for floating-point numbers
  3. •Speed: Direct memory-to-disk transfer without conversion
  4. •Structure Preservation: Complex data structures can be stored intact

Binary vs Text Representation

Integer 12345 in text mode:    "12345" (5 bytes)
Integer 12345 in binary mode:  4 bytes (raw int)

Float 3.14159 in text mode:    "3.14159" (7 bytes)
Float 3.14159 in binary mode:  4 bytes (raw float)

Binary vs Text Mode

Key Differences

AspectText ModeBinary Mode
Newline handlingConverted (\n ↔ \r\n on Windows)No conversion
EOF characterCtrl+Z may signal EOF on WindowsNo special meaning
Data formatHuman-readable charactersRaw bytes
SizeOften largerUsually smaller
PortabilityMore portableLess portable

Newline Translation (Windows Issue)

On Windows, text mode:

  • •Writes: \n → \r\n (LF to CRLF)
  • •Reads: \r\n → \n (CRLF to LF)

Binary mode performs NO translation.

When to Use Binary Mode

  • •Storing structs directly to disk
  • •Image, audio, or video files
  • •Database files
  • •Any non-text data
  • •When exact byte representation matters

Opening Files in Binary Mode

Binary Mode Specifiers

FILE *fp;

// Read binary
fp = fopen("data.bin", "rb");

// Write binary
fp = fopen("data.bin", "wb");

// Append binary
fp = fopen("data.bin", "ab");

// Read and write binary
fp = fopen("data.bin", "r+b");  // or "rb+"
fp = fopen("data.bin", "w+b");  // or "wb+"
fp = fopen("data.bin", "a+b");  // or "ab+"

Mode Summary

ModeDescription
"rb"Read binary, file must exist
"wb"Write binary, creates/truncates file
"ab"Append binary, creates if needed
"r+b"Read/write binary, file must exist
"w+b"Read/write binary, creates/truncates
"a+b"Read/append binary, creates if needed

Example: Opening Binary File

#include <stdio.h>

int main(void) {
    FILE *fp = fopen("data.bin", "rb");

    if (fp == NULL) {
        perror("Error opening file");
        return 1;
    }

    // File operations...

    fclose(fp);
    return 0;
}

Writing Binary Data with fwrite()

fwrite() Function

size_t fwrite(const void *ptr, size_t size, size_t count, FILE *stream);

Parameters:

  • •ptr: Pointer to data to write
  • •size: Size of each element in bytes
  • •count: Number of elements to write
  • •stream: File pointer

Returns: Number of elements successfully written

Writing Single Values

#include <stdio.h>

int main(void) {
    FILE *fp = fopen("numbers.bin", "wb");
    if (fp == NULL) return 1;

    // Write single integer
    int num = 12345;
    fwrite(&num, sizeof(int), 1, fp);

    // Write single float
    float pi = 3.14159f;
    fwrite(&pi, sizeof(float), 1, fp);

    // Write single double
    double e = 2.71828;
    fwrite(&e, sizeof(double), 1, fp);

    fclose(fp);
    return 0;
}

Writing Arrays

#include <stdio.h>

int main(void) {
    FILE *fp = fopen("array.bin", "wb");
    if (fp == NULL) return 1;

    int numbers[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};

    // Write entire array at once
    size_t written = fwrite(numbers, sizeof(int), 10, fp);

    printf("Wrote %zu integers\n", written);

    fclose(fp);
    return 0;
}

Checking Write Success

size_t written = fwrite(data, sizeof(int), count, fp);
if (written != count) {
    if (feof(fp)) {
        printf("End of file reached\n");
    }
    if (ferror(fp)) {
        printf("Write error occurred\n");
    }
}

Reading Binary Data with fread()

fread() Function

size_t fread(void *ptr, size_t size, size_t count, FILE *stream);

Parameters:

  • •ptr: Pointer to buffer to store data
  • •size: Size of each element in bytes
  • •count: Number of elements to read
  • •stream: File pointer

Returns: Number of elements successfully read

Reading Single Values

#include <stdio.h>

int main(void) {
    FILE *fp = fopen("numbers.bin", "rb");
    if (fp == NULL) return 1;

    int num;
    float pi;
    double e;

    fread(&num, sizeof(int), 1, fp);
    fread(&pi, sizeof(float), 1, fp);
    fread(&e, sizeof(double), 1, fp);

    printf("int: %d, float: %f, double: %f\n", num, pi, e);

    fclose(fp);
    return 0;
}

Reading Arrays

#include <stdio.h>

int main(void) {
    FILE *fp = fopen("array.bin", "rb");
    if (fp == NULL) return 1;

    int numbers[10];

    size_t read = fread(numbers, sizeof(int), 10, fp);

    printf("Read %zu integers:\n", read);
    for (int i = 0; i < read; i++) {
        printf("%d ", numbers[i]);
    }
    printf("\n");

    fclose(fp);
    return 0;
}

Reading Until EOF

#include <stdio.h>

int main(void) {
    FILE *fp = fopen("data.bin", "rb");
    if (fp == NULL) return 1;

    int value;
    int count = 0;

    while (fread(&value, sizeof(int), 1, fp) == 1) {
        printf("Value %d: %d\n", count++, value);
    }

    if (feof(fp)) {
        printf("End of file reached\n");
    }

    fclose(fp);
    return 0;
}

Storing Structures in Binary Files

Writing Structures

#include <stdio.h>
#include <string.h>

struct Student {
    int id;
    char name[50];
    float gpa;
};

int main(void) {
    struct Student students[] = {
        {1001, "Alice Johnson", 3.85},
        {1002, "Bob Smith", 3.42},
        {1003, "Carol Williams", 3.91}
    };
    int count = 3;

    FILE *fp = fopen("students.bin", "wb");
    if (fp == NULL) return 1;

    // Write count first
    fwrite(&count, sizeof(int), 1, fp);

    // Write all structures
    fwrite(students, sizeof(struct Student), count, fp);

    fclose(fp);
    printf("Wrote %d student records\n", count);

    return 0;
}

Reading Structures

#include <stdio.h>

struct Student {
    int id;
    char name[50];
    float gpa;
};

int main(void) {
    FILE *fp = fopen("students.bin", "rb");
    if (fp == NULL) return 1;

    int count;
    fread(&count, sizeof(int), 1, fp);

    struct Student students[count];
    fread(students, sizeof(struct Student), count, fp);

    for (int i = 0; i < count; i++) {
        printf("ID: %d, Name: %s, GPA: %.2f\n",
               students[i].id, students[i].name, students[i].gpa);
    }

    fclose(fp);
    return 0;
}

Structure Padding Considerations

Warning: Structure padding can cause portability issues!

struct Example {
    char a;      // 1 byte
    // 3 bytes padding (on many systems)
    int b;       // 4 bytes
    char c;      // 1 byte
    // 3 bytes padding
};
// Total size might be 12 bytes, not 6!

Check structure size:

printf("Size of struct: %zu\n", sizeof(struct Example));

Binary File Headers

Binary files often start with a header containing metadata about the file content.

Basic Header Structure

#include <stdio.h>
#include <string.h>
#include <time.h>

#define MAGIC_NUMBER 0x12345678
#define VERSION 1

struct FileHeader {
    unsigned int magic;        // Magic number to identify file type
    unsigned int version;      // File format version
    unsigned int record_count; // Number of records
    unsigned int record_size;  // Size of each record
    time_t created;           // Creation timestamp
};

struct DataRecord {
    int id;
    char data[100];
};

void write_file_with_header(void) {
    FILE *fp = fopen("data.bin", "wb");
    if (fp == NULL) return;

    // Prepare header
    struct FileHeader header;
    header.magic = MAGIC_NUMBER;
    header.version = VERSION;
    header.record_count = 3;
    header.record_size = sizeof(struct DataRecord);
    header.created = time(NULL);

    // Write header
    fwrite(&header, sizeof(struct FileHeader), 1, fp);

    // Write records
    struct DataRecord records[] = {
        {1, "First record"},
        {2, "Second record"},
        {3, "Third record"}
    };
    fwrite(records, sizeof(struct DataRecord), 3, fp);

    fclose(fp);
}

int read_file_with_header(void) {
    FILE *fp = fopen("data.bin", "rb");
    if (fp == NULL) return -1;

    struct FileHeader header;
    fread(&header, sizeof(struct FileHeader), 1, fp);

    // Validate magic number
    if (header.magic != MAGIC_NUMBER) {
        printf("Invalid file format!\n");
        fclose(fp);
        return -1;
    }

    // Check version
    if (header.version != VERSION) {
        printf("Unsupported version: %u\n", header.version);
        fclose(fp);
        return -1;
    }

    printf("Records: %u, Size: %u\n",
           header.record_count, header.record_size);

    // Read records
    struct DataRecord record;
    for (unsigned int i = 0; i < header.record_count; i++) {
        fread(&record, sizeof(struct DataRecord), 1, fp);
        printf("ID: %d, Data: %s\n", record.id, record.data);
    }

    fclose(fp);
    return 0;
}

Endianness Considerations

What is Endianness?

Endianness is the order in which bytes are stored in memory:

  • •Little-endian: Least significant byte first (x86, x64)
  • •Big-endian: Most significant byte first (some ARM, network order)

Integer 0x12345678 in Memory

Little-endian: 78 56 34 12
Big-endian:    12 34 56 78

Detecting Endianness

#include <stdio.h>

int is_little_endian(void) {
    unsigned int x = 1;
    return *((char *)&x) == 1;
}

int main(void) {
    if (is_little_endian()) {
        printf("This system is little-endian\n");
    } else {
        printf("This system is big-endian\n");
    }
    return 0;
}

Byte Swapping Functions

#include <stdint.h>

// Swap bytes for 16-bit value
uint16_t swap16(uint16_t val) {
    return (val << 8) | (val >> 8);
}

// Swap bytes for 32-bit value
uint32_t swap32(uint32_t val) {
    return ((val << 24) & 0xFF000000) |
           ((val <<  8) & 0x00FF0000) |
           ((val >>  8) & 0x0000FF00) |
           ((val >> 24) & 0x000000FF);
}

// Swap bytes for 64-bit value
uint64_t swap64(uint64_t val) {
    return ((val << 56) & 0xFF00000000000000ULL) |
           ((val << 40) & 0x00FF000000000000ULL) |
           ((val << 24) & 0x0000FF0000000000ULL) |
           ((val <<  8) & 0x000000FF00000000ULL) |
           ((val >>  8) & 0x00000000FF000000ULL) |
           ((val >> 24) & 0x0000000000FF0000ULL) |
           ((val >> 40) & 0x000000000000FF00ULL) |
           ((val >> 56) & 0x00000000000000FFULL);
}

Portable Binary Files

Strategy 1: Use Fixed-Size Types

#include <stdint.h>

struct PortableRecord {
    int32_t id;         // Always 32 bits
    int64_t timestamp;  // Always 64 bits
    float value;        // IEEE 754 float
};

Strategy 2: Write Fields Individually

#include <stdio.h>
#include <stdint.h>

void write_int32_le(FILE *fp, int32_t value) {
    unsigned char bytes[4];
    bytes[0] = value & 0xFF;
    bytes[1] = (value >> 8) & 0xFF;
    bytes[2] = (value >> 16) & 0xFF;
    bytes[3] = (value >> 24) & 0xFF;
    fwrite(bytes, 1, 4, fp);
}

int32_t read_int32_le(FILE *fp) {
    unsigned char bytes[4];
    fread(bytes, 1, 4, fp);
    return bytes[0] | (bytes[1] << 8) |
           (bytes[2] << 16) | (bytes[3] << 24);
}

Strategy 3: Store Endianness in Header

struct PortableHeader {
    char magic[4];        // "DATA"
    uint8_t endianness;   // 0 = little, 1 = big
    uint8_t version;
    uint16_t reserved;
    uint32_t record_count;
};

Common Binary File Patterns

Pattern 1: Fixed-Size Records

#define RECORD_SIZE 128

struct FixedRecord {
    int id;
    char data[RECORD_SIZE - sizeof(int)];
};

// Easy to calculate position: position = record_number * RECORD_SIZE

Pattern 2: Length-Prefixed Data

// Write variable-length string
void write_string(FILE *fp, const char *str) {
    uint32_t len = strlen(str);
    fwrite(&len, sizeof(uint32_t), 1, fp);
    fwrite(str, 1, len, fp);
}

// Read variable-length string
char *read_string(FILE *fp) {
    uint32_t len;
    fread(&len, sizeof(uint32_t), 1, fp);
    char *str = malloc(len + 1);
    fread(str, 1, len, fp);
    str[len] = '\0';
    return str;
}

Pattern 3: Index + Data

struct IndexEntry {
    uint32_t id;
    uint32_t offset;
    uint32_t length;
};

struct FileLayout {
    uint32_t index_count;
    // Index entries follow
    // Data blocks follow index
};

Pattern 4: Chunked Format

struct ChunkHeader {
    char type[4];      // e.g., "DATA", "META", "INDX"
    uint32_t size;     // Size of chunk data
    // Chunk data follows
};

Best Practices

1. Always Check Return Values

size_t written = fwrite(data, size, count, fp);
if (written != count) {
    // Handle error
    perror("Write failed");
}

2. Use Binary Mode Consistently

// Both reading and writing should use binary mode
FILE *fp_read = fopen("data.bin", "rb");
FILE *fp_write = fopen("data.bin", "wb");

3. Include Version Information

struct Header {
    uint32_t version;
    // ... other fields
};

// When reading:
if (header.version > CURRENT_VERSION) {
    printf("File was created by newer version\n");
}

4. Validate Data on Read

fread(&record, sizeof(record), 1, fp);
if (record.id < 0 || record.id > MAX_ID) {
    printf("Invalid record ID\n");
}

5. Handle Partial Reads

size_t total_read = 0;
size_t to_read = 1000;
char buffer[1000];

while (total_read < to_read) {
    size_t this_read = fread(buffer + total_read, 1,
                             to_read - total_read, fp);
    if (this_read == 0) break;  // EOF or error
    total_read += this_read;
}

6. Close Files Properly

if (fclose(fp) != 0) {
    perror("Error closing file");
}

Common Pitfalls

Pitfall 1: Forgetting Binary Mode

// WRONG - text mode may corrupt binary data
FILE *fp = fopen("data.bin", "w");

// CORRECT
FILE *fp = fopen("data.bin", "wb");

Pitfall 2: Assuming Structure Size

// WRONG - padding may vary
fwrite(&mystruct, sizeof(mystruct), 1, fp);

// BETTER - be explicit about what you're writing
fwrite(&mystruct.field1, sizeof(mystruct.field1), 1, fp);
fwrite(&mystruct.field2, sizeof(mystruct.field2), 1, fp);

Pitfall 3: Ignoring Endianness

// Files created on one system may not work on another
// Solution: Use consistent byte order or detect and convert

Pitfall 4: Storing Pointers

struct Wrong {
    int id;
    char *name;  // WRONG - pointer address is meaningless in file
};

struct Right {
    int id;
    char name[50];  // RIGHT - actual data
};

Pitfall 5: Not Checking EOF vs Error

if (fread(&data, sizeof(data), 1, fp) != 1) {
    if (feof(fp)) {
        printf("End of file\n");
    } else if (ferror(fp)) {
        printf("Read error\n");
    }
}

Complete Example: Binary Database

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <time.h>

#define MAGIC 0x44425F43  // "C_DB" in hex
#define VERSION 1

struct DBHeader {
    uint32_t magic;
    uint32_t version;
    uint32_t record_count;
    time_t created;
    time_t modified;
};

struct Record {
    int32_t id;
    char name[50];
    float value;
    uint8_t active;
};

int write_database(const char *filename, struct Record *records, int count) {
    FILE *fp = fopen(filename, "wb");
    if (!fp) return -1;

    struct DBHeader header = {
        .magic = MAGIC,
        .version = VERSION,
        .record_count = count,
        .created = time(NULL),
        .modified = time(NULL)
    };

    if (fwrite(&header, sizeof(header), 1, fp) != 1) {
        fclose(fp);
        return -1;
    }

    if (fwrite(records, sizeof(struct Record), count, fp) != count) {
        fclose(fp);
        return -1;
    }

    fclose(fp);
    return 0;
}

int read_database(const char *filename, struct Record **records, int *count) {
    FILE *fp = fopen(filename, "rb");
    if (!fp) return -1;

    struct DBHeader header;
    if (fread(&header, sizeof(header), 1, fp) != 1) {
        fclose(fp);
        return -1;
    }

    if (header.magic != MAGIC) {
        printf("Invalid file format\n");
        fclose(fp);
        return -1;
    }

    *count = header.record_count;
    *records = malloc(sizeof(struct Record) * (*count));

    if (fread(*records, sizeof(struct Record), *count, fp) != *count) {
        free(*records);
        fclose(fp);
        return -1;
    }

    fclose(fp);
    return 0;
}

Summary

Binary files provide efficient storage for structured data. Key points:

  1. •Use binary mode ("rb", "wb") to prevent newline translation
  2. •fwrite() and fread() are the primary functions for binary I/O
  3. •Always check return values for success/failure
  4. •Structure padding can cause portability issues
  5. •Endianness matters for cross-platform files
  6. •Include headers with version info and magic numbers
  7. •Never store pointers - store actual data
  8. •Validate data when reading to catch corruption

Binary files are essential for:

  • •Database storage
  • •Image and media files
  • •Configuration caches
  • •High-performance data storage
  • •Inter-process communication

Next Steps

After mastering binary files, explore:

  1. •Random access with fseek() and ftell()
  2. •Memory-mapped files
  3. •Database design patterns
  4. •File locking for concurrent access
README - C Programming Tutorial | DeepML