read_input:

- Clarify what each regex does by assigning rule numbers
- Properly handle commented-out lines in input files
- Properly scope several vars which were in too broad a scope
- Rewrite string-to-vector<int> to use regex to parse malformed input
- Remove dependency on sstream.
This commit is contained in:
John 2022-04-22 21:12:22 -05:00
parent 43c5dd10ed
commit 952c318619

View File

@ -1,54 +1,58 @@
//TODO: Include C file IO header (stdio.h? Whatever it is) //TODO: Include C file IO header (stdio.h? Whatever it is)
#include <iostream> #include <iostream>
#include <sstream>
#include <fstream> #include <fstream>
#include <string> #include <string>
#include <vector> #include <vector>
#include <regex> #include <regex>
#include "graph.hpp" #include "graph.hpp"
#define MAX_LINE_LEN 1024 #define MAX_INT_LEN 16
// Vector of regices // Vector of regices, in order of precedence. Earlier entries will be checked before later entries.
std::vector<std::regex> patterns = { std::vector<std::regex> patterns = {
//* A line starting with a % is a comment, and should be skipped //* Rule 1: A line starting with a % is a comment, and should not be parsed
//* A blank line should be skipped std::regex("^\\s*(%)"),
//* A line starting with `num_processes=` contains the number of processes //* Rule 2: A line starting with `num_processes=` contains the number of processes
std::regex("^.*(num_processes)\\s*=\\s*(\\d+).*"), std::regex("^.*(num_processes)\\s*=\\s*(\\d+).*"),
//* A line starting with `num_resources=` contains the number of resources //* Rule 3: A line starting with `num_resources=` contains the number of resources
std::regex("^.*(num_resources)\\s*=\\s*(\\d+)"), std::regex("^.*(num_resources)\\s*=\\s*(\\d+)"),
//* A line containing comma-separated values should be returned as-is, for manual disassembly //* Rule 4: A line containing only comma-separated values should be captured completely
std::regex("^([\\-0-9, ]+)")}; // matches comma-separated space-separated signed decimal integers std::regex("^([\\-0-9, ]+)") // matches comma-separated space-separated signed decimal integers
};
// convert comma-separated string s to vector<int> // convert comma-separated string s to vector<int>
std::vector<int> stovi (const std::string &s); std::vector<int> stovi (const std::string &s);
// TODO: Implement reading from a file // TODO: Implement reading from a file
void graph::read(std::string filename) { void graph::read(std::string filename) {
printf("graph::read(%s)\n", filename.c_str());
// Open file with name filename as read-only // Open file with name filename as read-only
std::fstream f; f.open(filename, f.in); std::fstream f; f.open(filename, f.in);
// TODO: Check for file IO errors (I might have a solution for that in another project) // TODO: Check for file IO errors (I might have a solution for that in another project)
std::string line; // Lines can be no more than 1KB in size, a sensible limitation
while (!f.eof()) { while (!f.eof()) {
// acquire a line
std::string line;
std::getline (f, line); std::getline (f, line);
std::smatch res;
// Iterate over each pattern, and grab the associated data // Iterate over each pattern, and grab the associated data
for (auto pattern: patterns) { for (auto pattern: patterns) {
std::smatch res;
if (std::regex_search (line, res, pattern)) { if (std::regex_search (line, res, pattern)) {
// get the pattern type, value // get the pattern type, value
std::string type = res.format("$1"), value = res.format("$2"); std::string type = res.format("$1"), value = res.format("$2");
// Handle the pattern // Handle the pattern
// If num_processes= matched, assign value to num_processes // Rule 1: If line is a comment, ignore it and move on
if (type == "%"); else
// Rule 2: If num_processes= matched, assign value to num_processes
if (type == "num_processes") { num_processes = std::stoi(value); } else if (type == "num_processes") { num_processes = std::stoi(value); } else
// If num_resources= matched, assign value to num_resources
// Rule 3: If num_resources= matched, assign value to num_resources
if (type == "num_resources") { num_resources = std::stoi(value); } else if (type == "num_resources") { num_resources = std::stoi(value); } else
// If this line is a comma-separated list of numbers,
// Rule 4: If this line is a comma-separated list of numbers,
// and this is the first match, assign it to resource counts // and this is the first match, assign it to resource counts
if (!resource_counts.size()) { resource_counts = stovi(type); } else if (!resource_counts.size()) { resource_counts = stovi(type); } else
// and this is a subsequent match, push it onto the matrix // and this is a subsequent match, push it onto the matrix
@ -64,24 +68,16 @@ void graph::read(std::string filename) {
f.close(); f.close();
// TODO: Check for file IO errors (Shouldn't be any) // TODO: Check for file IO errors (Shouldn't be any)
// print information about the graph
printf("np: %d\tnr: %d\n", num_processes, num_resources);
printf("resource_counts:\n"); for (auto e: resource_counts) printf("%d\t", e); printf("\n");
printf("matrix:\n"); for (auto x: matrix.data) {for (auto y: x) printf("%d\t", y); printf("\n");}
return; return;
} }
std::vector<int> stovi (const std::string &s) { std::vector<int> stovi (const std::string &s) {
std::stringstream ss(s); // Create the number classifier
printf("%s\n", s.c_str()); std::regex breaker("[0-9]+");
size_t idx = 0; std::sregex_token_iterator first{s.begin(), s.end(), breaker, 0}, last;
std::vector<int> vi; // Match the numbers
// reserve enough space for an entire string of one-character ints std::vector<int> res;
vi.reserve((s.length()+1)/2); for (auto i = first; i != last; ++i) res.push_back(atoi(i->str().c_str()));
while (!ss.eof()) { // give the numbers back
char integer[16]; // " -WXXXYYYZZZ," return res;
ss.getline(integer, 16, ',');
vi.push_back(atoi( (const char* ) &integer));
}
return vi;
} }