Docs
String Manipulation
String Manipulation in C
Table of Contents
- •Introduction
- •Case Conversion
- •Reversing Strings
- •Trimming and Padding
- •Searching and Replacing
- •Splitting and Joining
- •Extracting Substrings
- •String Validation
- •Common Algorithms
- •Performance Considerations
- •Best Practices
- •Summary
Introduction
String manipulation involves transforming, extracting, and processing string data. Unlike higher-level languages, C requires manual implementation of many string operations. This module covers essential string manipulation techniques:
- •Modifying characters (case, characters)
- •Transforming strings (reverse, trim, pad)
- •Searching and replacing content
- •Parsing and extracting data
- •Validating string content
Case Conversion
Character Functions (ctype.h)
#include <ctype.h>
int toupper(int c); // Convert to uppercase
int tolower(int c); // Convert to lowercase
int isupper(int c); // Test if uppercase
int islower(int c); // Test if lowercase
int isalpha(int c); // Test if alphabetic
int isdigit(int c); // Test if digit
int isalnum(int c); // Test if alphanumeric
int isspace(int c); // Test if whitespace
Convert String to Uppercase
void to_uppercase(char *str) {
while (*str) {
*str = toupper(*str);
str++;
}
}
// Usage
char text[] = "Hello World";
to_uppercase(text); // "HELLO WORLD"
Convert String to Lowercase
void to_lowercase(char *str) {
while (*str) {
*str = tolower(*str);
str++;
}
}
Title Case (Capitalize First Letter of Each Word)
void to_titlecase(char *str) {
int capitalize = 1; // Start with first letter
while (*str) {
if (isspace(*str)) {
capitalize = 1;
} else if (capitalize) {
*str = toupper(*str);
capitalize = 0;
} else {
*str = tolower(*str);
}
str++;
}
}
// Usage
char text[] = "hello world";
to_titlecase(text); // "Hello World"
Toggle Case
void toggle_case(char *str) {
while (*str) {
if (isupper(*str)) {
*str = tolower(*str);
} else if (islower(*str)) {
*str = toupper(*str);
}
str++;
}
}
// Usage
char text[] = "Hello World";
toggle_case(text); // "hELLO wORLD"
Reversing Strings
In-Place Reversal
void reverse_string(char *str) {
int len = strlen(str);
for (int i = 0; i < len / 2; i++) {
char temp = str[i];
str[i] = str[len - 1 - i];
str[len - 1 - i] = temp;
}
}
// Usage
char text[] = "Hello";
reverse_string(text); // "olleH"
Pointer-Based Reversal
void reverse_string_ptr(char *str) {
char *start = str;
char *end = str + strlen(str) - 1;
while (start < end) {
char temp = *start;
*start = *end;
*end = temp;
start++;
end--;
}
}
Reverse Words in a String
void reverse_words(char *str) {
// First, reverse the entire string
reverse_string(str);
// Then reverse each word
char *word_start = str;
char *ptr = str;
while (*ptr) {
if (*ptr == ' ') {
// Reverse the word before this space
*ptr = '\0';
reverse_string(word_start);
*ptr = ' ';
word_start = ptr + 1;
}
ptr++;
}
// Reverse the last word
reverse_string(word_start);
}
// Usage
char text[] = "Hello World";
reverse_words(text); // "World Hello"
Trimming and Padding
Trim Leading Whitespace
char* ltrim(char *str) {
while (isspace(*str)) {
str++;
}
return str;
}
// Note: Returns pointer to first non-space
// Original string unchanged
Trim Trailing Whitespace
void rtrim(char *str) {
int len = strlen(str);
while (len > 0 && isspace(str[len - 1])) {
len--;
}
str[len] = '\0';
}
Trim Both Ends (In-Place)
void trim(char *str) {
// Trim leading
char *start = str;
while (isspace(*start)) {
start++;
}
// All whitespace?
if (*start == '\0') {
*str = '\0';
return;
}
// Trim trailing
char *end = start + strlen(start) - 1;
while (end > start && isspace(*end)) {
end--;
}
// Move trimmed string and terminate
size_t len = end - start + 1;
memmove(str, start, len);
str[len] = '\0';
}
// Usage
char text[] = " Hello World ";
trim(text); // "Hello World"
Pad String (Left/Right)
// Left pad with character
void lpad(char *str, size_t total_len, char pad_char) {
size_t current_len = strlen(str);
if (current_len >= total_len) return;
size_t pad_count = total_len - current_len;
// Move string right
memmove(str + pad_count, str, current_len + 1);
// Fill with pad character
memset(str, pad_char, pad_count);
}
// Right pad with character
void rpad(char *str, size_t total_len, char pad_char) {
size_t current_len = strlen(str);
if (current_len >= total_len) return;
memset(str + current_len, pad_char, total_len - current_len);
str[total_len] = '\0';
}
// Usage
char num[10] = "42";
lpad(num, 5, '0'); // "00042"
Center String
void center(char *str, size_t total_len, char pad_char) {
size_t current_len = strlen(str);
if (current_len >= total_len) return;
size_t pad_total = total_len - current_len;
size_t left_pad = pad_total / 2;
size_t right_pad = pad_total - left_pad;
// Move string right
memmove(str + left_pad, str, current_len);
// Pad left
memset(str, pad_char, left_pad);
// Pad right and terminate
memset(str + left_pad + current_len, pad_char, right_pad);
str[total_len] = '\0';
}
// Usage
char text[20] = "Hi";
center(text, 10, '-'); // "----Hi----"
Searching and Replacing
Replace All Occurrences of Character
void replace_char(char *str, char old_char, char new_char) {
while (*str) {
if (*str == old_char) {
*str = new_char;
}
str++;
}
}
// Usage
char text[] = "Hello World";
replace_char(text, ' ', '_'); // "Hello_World"
Replace First Substring
int replace_first(char *str, size_t size,
const char *old_sub, const char *new_sub) {
char *pos = strstr(str, old_sub);
if (pos == NULL) return 0;
size_t old_len = strlen(old_sub);
size_t new_len = strlen(new_sub);
size_t tail_len = strlen(pos + old_len);
// Check if fits
if (strlen(str) - old_len + new_len >= size) {
return 0;
}
// Move tail
memmove(pos + new_len, pos + old_len, tail_len + 1);
// Copy new substring
memcpy(pos, new_sub, new_len);
return 1;
}
Replace All Substrings
int replace_all(char *str, size_t size,
const char *old_sub, const char *new_sub) {
int count = 0;
char *pos = str;
while ((pos = strstr(pos, old_sub)) != NULL) {
if (!replace_first(pos, size - (pos - str), old_sub, new_sub)) {
break; // No room
}
pos += strlen(new_sub);
count++;
}
return count;
}
Count Character Occurrences
int count_char(const char *str, char c) {
int count = 0;
while (*str) {
if (*str == c) count++;
str++;
}
return count;
}
Count Substring Occurrences
int count_substring(const char *str, const char *sub) {
int count = 0;
const char *ptr = str;
while ((ptr = strstr(ptr, sub)) != NULL) {
count++;
ptr++;
}
return count;
}
Splitting and Joining
Split String into Array
int split(char *str, const char *delim, char **tokens, int max_tokens) {
int count = 0;
char *token = strtok(str, delim);
while (token != NULL && count < max_tokens) {
tokens[count++] = token;
token = strtok(NULL, delim);
}
return count;
}
// Usage
char text[] = "apple,banana,cherry";
char *tokens[10];
int count = split(text, ",", tokens, 10);
// count = 3
// tokens[0] = "apple"
// tokens[1] = "banana"
// tokens[2] = "cherry"
Join Array into String
void join(char *result, size_t size,
char **strings, int count, const char *delim) {
if (count == 0) {
result[0] = '\0';
return;
}
result[0] = '\0';
for (int i = 0; i < count; i++) {
if (i > 0) {
strncat(result, delim, size - strlen(result) - 1);
}
strncat(result, strings[i], size - strlen(result) - 1);
}
}
// Usage
char *words[] = {"Hello", "World", "!"};
char result[50];
join(result, sizeof(result), words, 3, " ");
// result = "Hello World !"
Extracting Substrings
Extract Substring
void substring(char *dest, const char *src, int start, int length) {
int src_len = strlen(src);
if (start >= src_len) {
dest[0] = '\0';
return;
}
if (start + length > src_len) {
length = src_len - start;
}
strncpy(dest, src + start, length);
dest[length] = '\0';
}
// Usage
char result[20];
substring(result, "Hello World", 6, 5);
// result = "World"
Extract Until Character
void extract_until(char *dest, const char *src, char stop_char) {
while (*src && *src != stop_char) {
*dest++ = *src++;
}
*dest = '\0';
}
// Usage
char result[20];
extract_until(result, "name@email.com", '@');
// result = "name"
Extract Between Delimiters
int extract_between(char *dest, const char *src,
char start_char, char end_char) {
const char *start = strchr(src, start_char);
if (start == NULL) return 0;
start++; // Move past start character
const char *end = strchr(start, end_char);
if (end == NULL) return 0;
size_t len = end - start;
strncpy(dest, start, len);
dest[len] = '\0';
return 1;
}
// Usage
char result[50];
extract_between(result, "Hello (World) Test", '(', ')');
// result = "World"
String Validation
Check if String is Numeric
int is_numeric(const char *str) {
if (*str == '\0') return 0;
// Allow leading sign
if (*str == '+' || *str == '-') str++;
if (*str == '\0') return 0;
while (*str) {
if (!isdigit(*str)) return 0;
str++;
}
return 1;
}
// Usage
is_numeric("123"); // 1 (true)
is_numeric("-42"); // 1 (true)
is_numeric("12.5"); // 0 (false - has decimal)
is_numeric("abc"); // 0 (false)
Check if String is Alphabetic
int is_alphabetic(const char *str) {
if (*str == '\0') return 0;
while (*str) {
if (!isalpha(*str)) return 0;
str++;
}
return 1;
}
Check if String is Alphanumeric
int is_alphanumeric(const char *str) {
if (*str == '\0') return 0;
while (*str) {
if (!isalnum(*str)) return 0;
str++;
}
return 1;
}
Check if Valid Email (Simple)
int is_valid_email_simple(const char *email) {
const char *at = strchr(email, '@');
if (at == NULL || at == email) return 0;
const char *dot = strrchr(at, '.');
if (dot == NULL || dot == at + 1 || dot[1] == '\0') return 0;
return 1;
}
Check if Palindrome
int is_palindrome(const char *str) {
int len = strlen(str);
for (int i = 0; i < len / 2; i++) {
if (tolower(str[i]) != tolower(str[len - 1 - i])) {
return 0;
}
}
return 1;
}
Common Algorithms
Remove Duplicate Characters
void remove_duplicates(char *str) {
int seen[256] = {0}; // Track seen characters
char *write = str;
while (*str) {
unsigned char c = *str;
if (!seen[c]) {
seen[c] = 1;
*write++ = c;
}
str++;
}
*write = '\0';
}
// Usage
char text[] = "programming";
remove_duplicates(text); // "progamin"
Remove All Occurrences of Character
void remove_char(char *str, char c) {
char *read = str;
char *write = str;
while (*read) {
if (*read != c) {
*write++ = *read;
}
read++;
}
*write = '\0';
}
Squeeze Multiple Spaces
void squeeze_spaces(char *str) {
char *read = str;
char *write = str;
int prev_space = 0;
while (*read) {
if (isspace(*read)) {
if (!prev_space) {
*write++ = ' ';
prev_space = 1;
}
} else {
*write++ = *read;
prev_space = 0;
}
read++;
}
*write = '\0';
}
// Usage
char text[] = "Hello World !";
squeeze_spaces(text); // "Hello World !"
Find Longest Word
void longest_word(const char *str, char *result) {
const char *word_start = NULL;
size_t max_len = 0;
const char *max_start = NULL;
while (1) {
if (!isspace(*str) && *str != '\0') {
if (word_start == NULL) {
word_start = str;
}
} else {
if (word_start != NULL) {
size_t len = str - word_start;
if (len > max_len) {
max_len = len;
max_start = word_start;
}
word_start = NULL;
}
}
if (*str == '\0') break;
str++;
}
if (max_start != NULL) {
strncpy(result, max_start, max_len);
result[max_len] = '\0';
} else {
result[0] = '\0';
}
}
Performance Considerations
Avoid Repeated strlen Calls
// BAD - O(n²) for long strings
for (int i = 0; i < strlen(str); i++) {
// strlen called every iteration!
}
// GOOD - O(n)
size_t len = strlen(str);
for (int i = 0; i < len; i++) {
// ...
}
// BETTER - Loop until null directly
for (int i = 0; str[i] != '\0'; i++) {
// ...
}
Use Pointers for Large Strings
// Index-based (slightly slower due to offset calculation)
while (str[i] != '\0') {
// str[i] = *(str + i) each time
i++;
}
// Pointer-based (direct access)
while (*ptr) {
// *ptr = direct dereference
ptr++;
}
Pre-allocate for Dynamic Strings
// BAD - multiple reallocations
char *result = malloc(1);
for (...) {
result = realloc(result, strlen(result) + len + 1);
strcat(result, new_part);
}
// GOOD - estimate size first
size_t total = calculate_total_size();
char *result = malloc(total);
char *ptr = result;
for (...) {
memcpy(ptr, new_part, len);
ptr += len;
}
*ptr = '\0';
Best Practices
1. Always Check Buffer Sizes
// Ensure result buffer is large enough
void safe_copy(char *dest, size_t dest_size, const char *src) {
if (dest_size == 0) return;
strncpy(dest, src, dest_size - 1);
dest[dest_size - 1] = '\0';
}
2. Handle Edge Cases
void process_string(const char *str) {
// Check for NULL
if (str == NULL) return;
// Check for empty string
if (*str == '\0') return;
// Process...
}
3. Use const for Read-Only Strings
// Good: Clearly indicates string won't be modified
size_t count_vowels(const char *str) {
// ...
}
4. Return Values for Error Handling
// Return status or count
int replace_substring(char *str, size_t size,
const char *old, const char *new) {
// Return 0 on failure, 1+ on success (count)
}
5. Document Memory Requirements
/**
* Reverses the string in place.
*
* @param str Null-terminated string to reverse (modified in place)
* @note String must be modifiable (not a string literal)
*/
void reverse_string(char *str);
Summary
Common Operations Reference
| Operation | Function/Technique |
|---|---|
| Uppercase | toupper() loop |
| Lowercase | tolower() loop |
| Reverse | Swap from ends |
| Trim | Skip spaces + null terminate |
| Replace char | Loop and assign |
| Replace substring | strstr() + memmove() + memcpy() |
| Split | strtok() |
| Join | Loop with strcat()/strncat() |
| Extract | Pointer arithmetic + strncpy() |
| Validate | Character checks (isdigit, etc.) |
Key Patterns
// Iterate and modify
while (*str) { *str = transform(*str); str++; }
// Two-pointer for removal
read = write = str;
while (*read) { if (keep) *write++ = *read; read++; }
*write = '\0';
// In-place reversal
for (i = 0; i < len/2; i++) swap(str[i], str[len-1-i]);
// Substring replacement
pos = strstr(str, old);
memmove(pos + new_len, pos + old_len, tail_len + 1);
memcpy(pos, new, new_len);
Character Testing Functions (ctype.h)
| Function | Tests for |
|---|---|
| isalpha | Alphabetic (a-z, A-Z) |
| isdigit | Digit (0-9) |
| isalnum | Alphanumeric |
| isspace | Whitespace |
| isupper | Uppercase |
| islower | Lowercase |
| ispunct | Punctuation |
| isprint | Printable |