writing multiple files using multiple threads

Hypothesis: Writing multiple files using multiple threads should be much faster than writing the files without using threads.

Experiment: Write 10 files using multi-threading vs single-threading.

#include <iostream>
#include <fstream>
#include <thread>
#include <future>
#include <chrono>
#include <vector>

using namespace std;
using namespace std::chrono;

// class to track timing information
class MyTimer {
public:
    // default constructor
    MyTimer(const std::string aText) :
        text(aText),
        start(high_resolution_clock::now()),
        end(high_resolution_clock::now()) {}
    
    // destructor: displays elapsed time
    ~MyTimer() {
        cleanup();
    }
    
private:
    std::string text;                          // custom text
    high_resolution_clock::time_point start;   // start time
    high_resolution_clock::time_point end;     // end time
    
    // records the elapsed time since the start and displays it to 
    // std ostream
    void cleanup() {
        end = high_resolution_clock::now();
        auto elapsedTime = duration_cast(end - start).count();
        std::cout << "Elapsed time for " << text.c_str()
                  << ": " << elapsedTime << " seconds"
                  << std::endl;
    }
};

// writes a million integers to the provided file name
void writeFile(const std::string & fileName) {
    std::ofstream aStr(fileName, std::ios::out);
    const int M = 1000000;
    for (int i = 0; i < M; ++i) {
        aStr << i << std::endl;
    }
    aStr.close();
}

// returns the file name based on the given string and number
std::string getFileName(const std::string & subStr, const int counter) {
    std::string fileName = "/Users/puneetk/Documents/files/";
    fileName += subStr;
    fileName += "_";
    fileName += std::to_string(counter);
    fileName += ".txt";
    return fileName;
}

// use multiple threads to write specified number of files to disk
void useMultipleThreads(const int numFiles) {
    MyTimer aTimer("multi-threading");
    
    // allocate the space for futures
    std::vector<std::future> futures;
    futures.reserve(numFiles);
    
    // launch separate threads to write each file
    for (int i = 0; i < numFiles; ++i) {
        const std::string fileName = 
            getFileName("myMultiThreadFile", i);
        futures.push_back(std::async(std::launch::async, 
            writeFile, fileName));
    }
    
    // wait for them to finish writing
    for (auto & f : futures) {
        f.get();
    }
}

// use a single thread to write specified number of files to disk
void useOneThread(const int numFiles) {
    MyTimer aTimer("single-threading");
    
    // write each file sequentially
    for (int i = 0; i < numFiles; ++i) {
        const std::string fileName = 
            getFileName("mySingleThreadedFile", i);
        writeFile(fileName);
    }
}

int main(int argc, const char * argv[]) {
    const int numFiles = 10;
    useMultipleThreads(numFiles);
    useOneThread(numFiles);
    
    return 0;
}

Results: The following was output on the console with numFiles = 10:

Elapsed time for multi-threading: 12 seconds
Elapsed time for single-threading: 28 seconds

Conclusions: The gains are only 2x while using multiple threads. Apparently, there is a file IO threshold that prevents performance gains for more than 2x. So, if you are looking toward using multiple threads to for file IO, this is what one can expect using the standard c++ libraries. YMMV.

Leave a Reply

Your email address will not be published. Required fields are marked *