Efficient String Processing in C#

August 27, 2024

String allocations are one of the biggest sources of garbage collection pressure in C# applications. Understanding allocation patterns and modern alternatives can dramatically improve performance.

Available since: Various - .NET Framework 1.0 to .NET 8

The String Allocation Problem

// Each operation creates a new string object
public string BuildMessage(string name, int count)
{
    // 4 string allocations for a simple operation
    return "Hello " + name + ", you have " + count + " items";
}

// Hot path example - allocates in tight loop
public List<string> ProcessItems(string[] items)
{
    var results = new List<string>();
    foreach (var item in items)
    {
        // Each iteration allocates multiple strings
        results.Add("Item: " + item.ToUpper() + " [processed]");
    }
    return results;
}

StringBuilder for Multiple Concatenations

// Use StringBuilder for multiple operations
public string BuildQuery(Dictionary<string, string> parameters)
{
    var query = new StringBuilder("SELECT * FROM users WHERE ");
    
    bool first = true;
    foreach (var (key, value) in parameters)
    {
        if (!first) query.Append(" AND ");
        query.Append(key).Append(" = '").Append(value).Append("'");
        first = false;
    }
    
    return query.ToString(); // Single final allocation
}

// Pre-size StringBuilder when possible
public string BuildLargeString(IEnumerable<string> parts)
{
    var estimated = parts.Sum(p => p.Length) + 100; // Add buffer
    var builder = new StringBuilder(estimated);
    
    foreach (var part in parts)
        builder.Append(part);
        
    return builder.ToString();
}

String Interpolation vs Concatenation

// String interpolation - efficient for simple cases
public string FormatUser(string name, int age)
{
    return $"User: {name}, Age: {age}"; // Single allocation
}

// Avoid complex expressions in interpolation
public string BadInterpolation(User user)
{
    // Multiple allocations within interpolation
    return $"User: {user.Name.ToUpper().Trim()}, Email: {user.Email.ToLower()}";
}

// Better - extract to variables
public string GoodInterpolation(User user)
{
    string name = user.Name.ToUpper().Trim();
    string email = user.Email.ToLower();
    return $"User: {name}, Email: {email}";
}

ReadOnlySpan for Zero-Allocation Processing

// Traditional approach - multiple allocations
public bool IsValidEmailOld(string email)
{
    string trimmed = email.Trim();
    string lower = trimmed.ToLower();
    int atIndex = lower.IndexOf('@');
    
    if (atIndex <= 0) return false;
    
    string local = lower.Substring(0, atIndex);
    string domain = lower.Substring(atIndex + 1);
    
    return domain.Contains('.') && !local.StartsWith('.');
}

// Modern approach - zero allocations
public bool IsValidEmail(ReadOnlySpan<char> email)
{
    ReadOnlySpan<char> trimmed = email.Trim();
    
    int atIndex = trimmed.IndexOf('@');
    if (atIndex <= 0) return false;
    
    ReadOnlySpan<char> local = trimmed[..atIndex];
    ReadOnlySpan<char> domain = trimmed[(atIndex + 1)..];
    
    return domain.IndexOf('.') > 0 && 
           !local.StartsWith(".") && 
           !domain.EndsWith(".");
}

String.Create for Custom Formatting

// Efficient custom formatting without intermediate allocations
public static string FormatFileSize(long bytes)
{
    const int maxLength = 20; // "999.99 GB" is about 9 chars
    
    return String.Create(maxLength, bytes, (span, value) =>
    {
        string unit;
        double size;
        
        if (value >= 1_000_000_000)
        {
            size = value / 1_073_741_824.0; // 1024^3
            unit = " GB";
        }
        else if (value >= 1_000_000)
        {
            size = value / 1_048_576.0; // 1024^2
            unit = " MB";
        }
        else
        {
            size = value / 1024.0;
            unit = " KB";
        }
        
        // Format directly into the span
        bool success = size.TryFormat(span, out int written, "F2");
        if (success && written + unit.Length <= span.Length)
        {
            unit.AsSpan().CopyTo(span[written..]);
            span = span[..(written + unit.Length)];
        }
    });
}

ArrayBufferWriter for Complex Building

public static string BuildJsonArray(IEnumerable<object> items)
{
    var buffer = new ArrayBufferWriter<byte>();
    var writer = new Utf8JsonWriter(buffer);
    
    writer.WriteStartArray();
    foreach (var item in items)
    {
        JsonSerializer.Serialize(writer, item);
    }
    writer.WriteEndArray();
    
    // Single allocation for final string
    return Encoding.UTF8.GetString(buffer.WrittenSpan);
}

Avoid String.Split When Possible

// Allocates array and substrings
public List<string> ParseCsvOld(string line)
{
    return line.Split(',').ToList();
}

// Zero-allocation parsing with spans
public void ParseCsv(ReadOnlySpan<char> line, List<string> results)
{
    results.Clear();
    
    while (!line.IsEmpty)
    {
        int commaIndex = line.IndexOf(',');
        if (commaIndex == -1)
        {
            results.Add(line.ToString()); // Only allocate when storing
            break;
        }
        
        results.Add(line[..commaIndex].ToString());
        line = line[(commaIndex + 1)..];
    }
}

String Pooling for Repeated Values

// Use string interning for known repeated values
private static readonly ConcurrentDictionary<string, string> _stringPool = new();

public string GetCachedString(string input)
{
    return _stringPool.GetOrAdd(input, s => s);
}

// Or use built-in interning for compile-time constants
public string GetStatusMessage(int code)
{
    return code switch
    {
        200 => "OK",           // Interned at compile time
        404 => "Not Found",    // Interned at compile time
        500 => "Server Error", // Interned at compile time
        _ => $"Status: {code}" // Allocated
    };
}

Memory for Async String Building

public async ValueTask<string> BuildStringAsync(IAsyncEnumerable<string> parts)
{
    var buffer = new ArrayBufferWriter<char>();
    
    await foreach (var part in parts)
    {
        part.AsSpan().CopyTo(buffer.GetSpan(part.Length));
        buffer.Advance(part.Length);
    }
    
    // Single allocation for final result
    return buffer.WrittenSpan.ToString();
}

Performance Guidelines

Use StringBuilder when:

Use ReadOnlySpan<char> when:

Use String.Create when:

Avoid:

Modern C# provides powerful tools for zero-allocation string processing. Choose the right approach based on your specific use case and performance requirements.