1
0
mirror of synced 2026-01-16 16:17:19 +00:00
2020-09-09 15:11:45 -07:00

651 lines
24 KiB
C++

#define _CRT_SECURE_NO_WARNINGS
#include "utils.h"
#include <sstream>
#include <fstream>
#include <vector>
#include <algorithm>
#include <boost/filesystem.hpp>
// Normal sequences:
// BCW - if we're not in a dataset, this starts a dataset
// - if we're not in a file, this starts a file
// - if we're not in a record, this starts a record
// - otherwise, this is just a continuation of the current record
// EOR - if we're in a record, it ends the current record
// - otherwise it denotes a NULL record
// EOF - if we're in a file, it ends the current file
// - otherwise it denotes a NULL file
// EOD - if we're in a dataset, it ends the dataset
// - otherwise it denotes a NULL dataset
//
// Ending a file without ending a record is invalid, except for a NULL file
// Ending a dataset without ending a file is invalid
//
// If an EOR is followed by a BCW, that's fine, this
// simply means that the data for the next record is after
// the BCW. This technique can be used to skip over
// certain sections of a dataset: since the forward index
// of an EOR or EOF points to a BCW, the region between the
// the two control words is assumed to not contain data
//
// There is some RLL compression for 'blank fields' whatever that means
// See 2240011R part 1 2-6 - this seems like it have been removed later
//
// There are some new bigs that are not in the original description
// See SR-0011P 2-15
//
// So, to add a file to the end:
// 1. Remove the EOD
// 2. Add the file, starting with a BCW
// 3. Terminate with an EOD
//
// To add a file to the beginning:
// 1. Create the file as normal, starting with a BCW
// 2. Append the old dataset to it
//
// To add a file at an arbitrary position
// A. If the EOF of the previous file points to an EOR (that is the first record of the next file
// 1. Patch up EOF to point to immediately after itself
// 2. Add file as normal, starting with a BCW
// 3. Patch up our EOF to point to the first EOR of the old file
// 4. Add the rest of the Dataset
// B. If the EOF of the previous file points to a BCW
// 1. Add the file as normal, starting with a BCW
// 2. Make sure our EOF points immediately after itself
// 2. Add the rest of the dataset
//
// To remove the first file
//
class DsCtrlWord_c {
public:
DsCtrlWord_c() : mData(0) {}
explicit DsCtrlWord_c(uint64_t aData) : mData(SwapBytes(aData)) {}
enum CtrlWordType_e {
CtrlWordType_BCW = 000,
CtrlWordType_EOR = 010,
CtrlWordType_EOF = 016,
CtrlWordType_EOD = 017,
CtrlWordType_UNK = 020
};
void Clear() { mData = 0; }
CtrlWordType_e GetType() const {
uint8_t CtrlType = uint8_t(GetBitsReverse(mData, 0, 3));
switch (CtrlType) {
case CtrlWordType_BCW: return (CtrlWordType_e)CtrlType;
case CtrlWordType_EOR: return (CtrlWordType_e)CtrlType;
case CtrlWordType_EOF: return (CtrlWordType_e)CtrlType;
case CtrlWordType_EOD: return (CtrlWordType_e)CtrlType;
default: return CtrlWordType_UNK;
}
}
void SetType(CtrlWordType_e aType) { mData = SetBitsReverse(mData, 0, 3, aType); }
bool GetBadData() const { return GetBitsReverse(mData, 11, 11) != 0; }
void SetBadData(bool aValue) { mData = SetBitsReverse(mData, 11, 11, aValue ? 1 : 0); }
uint32_t GetForwardWordIdx() const { return uint32_t(GetBitsReverse(mData, 55, 63)); }
void SetForwardWordIdx(uint32_t aValue) { mData = SetBitsReverse(mData, 55, 63, aValue); }
// BCW fields
uint32_t GetBlockNumber() const { return uint32_t(GetBitsReverse(mData, 31, 54)); }
void SetBlockNumber(uint32_t aValue) { mData = SetBitsReverse(mData, 31, 54, aValue); }
// EOR,EOF,EOD fields
uint32_t GetUnusedBitCnt() const { return uint32_t(GetBitsReverse(mData, 4, 9)); }
void SetUnusedBitCnt(uint32_t aValue) { mData = SetBitsReverse(mData, 4, 9, aValue); }
bool GetTransparent() const { return GetBitsReverse(mData, 10, 10) != 0; }
void SetTransparent(bool aValue) { mData = SetBitsReverse(mData, 10, 10, aValue ? 1 : 0); }
bool GetSkipRemainderSector() const { return GetBitsReverse(mData, 12, 12) != 0; }
void SetSkipRemainderSector(bool aValue) { mData = SetBitsReverse(mData, 12, 12, aValue ? 1 : 0); }
uint32_t GetPreviousFileIdx() const { return uint32_t(GetBitsReverse(mData, 20, 39)); }
void SetPreviousFileIdx(uint32_t aValue) { mData = SetBitsReverse(mData, 20, 39, aValue); }
uint32_t GetPreviousRecordIdx() const { return uint32_t(GetBitsReverse(mData, 40, 54)); }
void SetPreviousRecordIdx(uint32_t aValue) { mData = SetBitsReverse(mData, 40, 54, aValue); }
std::string Print() const {
std::stringstream Str;
switch (GetType()) {
case CtrlWordType_BCW: Str << "BCW: "; break;
case CtrlWordType_EOR: Str << "EOR: "; break;
case CtrlWordType_EOF: Str << "EOF: "; break;
case CtrlWordType_EOD: Str << "EOD: "; break;
default: Str << "UNK: "; break;
}
Str << "Forward word IDX: " << DecPrinter(GetForwardWordIdx(), 5) << " ";
switch (GetType()) {
case CtrlWordType_BCW:
Str << "Block Number: " << GetBlockNumber() << " ";
break;
case CtrlWordType_EOR:
case CtrlWordType_EOF:
case CtrlWordType_EOD:
Str << "Unused bit count: " << DecPrinter(GetUnusedBitCnt(), 2) << " ";
Str << "Prev file idx: " << HexPrinter(GetPreviousFileIdx(), 7) << " ";
Str << "Prev record idx: " << HexPrinter(GetPreviousRecordIdx(), 6) << " ";
Str << (GetTransparent() ? "TRANS " : "");
Str << (GetSkipRemainderSector() ? "SKIP " : "");
break;
default: Str << "UNK: "; break;
}
Str << (GetBadData() ? "BAD" : "");
return Str.str();
}
uint64_t GetData() const { return SwapBytes(mData); }
void Read(std::fstream &aFile) {
uint64_t Data;
aFile.read((char*)(&Data), sizeof(Data));
if (aFile.gcount() != sizeof(Data)) throw Generic_x() << "Can't read from file";
mData = SwapBytes(Data);
}
void Write(std::fstream &aFile) const {
uint64_t Data = GetData();
aFile.write((char*)(&Data), sizeof(Data));
if (aFile.fail()) throw Generic_x() << "Can't write to file";
}
void Write(std::vector<uint8_t> &aBuffer, size_t aOffset) const {
uint64_t Data = GetData();
CRAY_ASSERT(aOffset <= aBuffer.size() - sizeof(Data));
uint8_t *DataBegin = (uint8_t*)(&Data);
uint8_t *DataEnd = DataBegin + sizeof(Data);
std::copy(DataBegin, DataEnd, aBuffer.begin() + aOffset);
}
protected:
uint64_t mData;
};
class DataSet_c: public std::fstream {
public:
static const size_t mBlockSize = 4096;
DataSet_c() : std::fstream(), mBlock(mBlockSize) { Init(); }
DataSet_c(const char *aFileName, std::ios_base::openmode aMode = std::ios_base::in | std::ios_base::out | std::ios_base::binary) : std::fstream(aFileName, aMode), mFileName(aFileName), mMode(aMode), mBlock(mBlockSize) { Init(); }
DataSet_c(const std::string &aFileName, std::ios_base::openmode aMode = std::ios_base::in | std::ios_base::out | std::ios_base::binary) : std::fstream(aFileName, aMode), mFileName(aFileName), mMode(aMode), mBlock(mBlockSize) { Init(); }
~DataSet_c() override { close(); }
void open(const char* aFileName, std::ios_base::openmode aMode = std::ios_base::in | std::ios_base::out | std::ios_base::binary) { close(); std::fstream::open(aFileName, aMode); mFileName = aFileName; mMode = aMode; Init(); }
void open(const std::string& aFileName, std::ios_base::openmode aMode = std::ios_base::in | std::ios_base::out | std::ios_base::binary) { close(); std::fstream::open(aFileName, aMode); mFileName = aFileName; mMode = aMode; Init(); }
void close() { Flush(); std::fstream::close(); }
void WriteRecord(std::vector<uint8_t> aRecord, bool aDoSwap);
std::vector<uint8_t> ReadRecord();
bool IsEof();
bool IsEod();
void WriteEof();
protected:
void Init();
void Flush();
void WriteBlock(bool aFullBlock = true);
std::vector<uint8_t> mBlock;
size_t mBlockOfs;
size_t mBlockCnt;
size_t mCurrentFileStart;
size_t mCurrentRecordStart;
DsCtrlWord_c mLastCtrlWord; // We'll hang on to the last control word and only push it out on close. The reason is that we have to patch up the forward pointer
std::string mFileName;
ios_base::openmode mMode;
};
void DataSet_c::Init() {
std::fill(mBlock.begin(), mBlock.end(), 0);
mBlockCnt = 0;
mBlockOfs = 0;
mCurrentFileStart = 0;
mCurrentRecordStart = 0;
mLastCtrlWord.Clear();
mLastCtrlWord.SetType(DsCtrlWord_c::CtrlWordType_BCW);
mLastCtrlWord.SetBlockNumber(uint32_t(mBlockCnt));
mLastCtrlWord.SetForwardWordIdx(0);
}
void DataSet_c::WriteBlock(bool aFullBlock) {
write((char*)(&mBlock[0]), aFullBlock ? mBlock.size() : mBlockOfs);
if (fail()) throw Generic_x() << "Can't write to file " << mFileName;
std::fill(mBlock.begin(), mBlock.end(), 0);
++mBlockCnt;
mBlockOfs = 0;
}
void DataSet_c::WriteRecord(std::vector<uint8_t> aRecord, bool aDoSwap) {
size_t RecordOfs = 0;
mCurrentRecordStart = mBlockCnt;
do {
size_t RecordLeft = aRecord.size() - RecordOfs;
size_t BlockLeft = mBlock.size() - mBlockOfs;
size_t RoundedRecordLeft = ((RecordLeft + 7) / 8) * 8;
if (RoundedRecordLeft <= BlockLeft-8) {
// Rest of the record fits in the block:
// - Patch up the last ctr word
// - Put last ctr word into block
// - Put the data into the block
// - Prepare the EOR marker
// - Check if the buffer is now full, if full
// - Write block to disk
// - Create new block
// - Put BCW in new record
// - Prepare EOW in last ctrl word
// - Break out of loop
mLastCtrlWord.SetForwardWordIdx(uint32_t(RoundedRecordLeft / 8));
mLastCtrlWord.Write(mBlock, mBlockOfs);
mLastCtrlWord.Clear();
mBlockOfs += 8;
std::copy(aRecord.begin() + RecordOfs, aRecord.begin() + aRecord.size(), mBlock.begin() + mBlockOfs);
mBlockOfs += RoundedRecordLeft;
if (mBlockOfs == mBlock.size()) {
WriteBlock();
DsCtrlWord_c BCW;
BCW.SetType(DsCtrlWord_c::CtrlWordType_BCW);
BCW.SetBlockNumber(uint32_t(mBlockCnt));
BCW.SetForwardWordIdx(0);
BCW.Write(mBlock, mBlockOfs);
mBlockOfs += 8;
}
mLastCtrlWord.SetType(DsCtrlWord_c::CtrlWordType_EOR);
mLastCtrlWord.SetUnusedBitCnt(uint32_t((RoundedRecordLeft - RecordLeft) * 8));
mLastCtrlWord.SetPreviousFileIdx(uint32_t(mBlockCnt - mCurrentFileStart));
mLastCtrlWord.SetPreviousRecordIdx(uint32_t(mBlockCnt - mCurrentRecordStart));
break;
}
else {
// Rest of the record is still too large
// - Patch up the last ctrl word
// - Put last ctrl word into block
// - Put the data into the block
// - Write the block to disk
// - Create new block
// - Prepare BCW in last ctrl word
// - Continue looping
CRAY_ASSERT(BlockLeft % 8 == 0);
mLastCtrlWord.SetForwardWordIdx(uint32_t(BlockLeft / 8 - 1));
mLastCtrlWord.Write(mBlock, mBlockOfs);
mLastCtrlWord.Clear();
mBlockOfs += 8;
BlockLeft -= 8;
std::copy(aRecord.begin() + RecordOfs, aRecord.begin() + RecordOfs + BlockLeft, mBlock.begin() + mBlockOfs);
WriteBlock();
mLastCtrlWord.SetType(DsCtrlWord_c::CtrlWordType_BCW);
mLastCtrlWord.SetBlockNumber(uint32_t(mBlockCnt));
mLastCtrlWord.SetForwardWordIdx(0);
RecordOfs += BlockLeft;
}
} while (true);
}
void DataSet_c::WriteEof() {
CRAY_ASSERT((mBlock.size() - mBlockOfs) % 8 == 0);
CRAY_ASSERT((mBlock.size() - mBlockOfs) >= 8);
// - Patch up the last ctrl word
// - Put last ctrl word into block
// - Check if the buffer is now full, if full
// - Write block to disk
// - Create new block
// - Put BCW in new record
// - Prepare EOF in last ctrl word
mLastCtrlWord.SetForwardWordIdx(0);
mLastCtrlWord.Write(mBlock, mBlockOfs);
mLastCtrlWord.Clear();
mBlockOfs += 8;
if (mBlockOfs == mBlock.size()) {
WriteBlock();
DsCtrlWord_c BCW;
BCW.SetType(DsCtrlWord_c::CtrlWordType_BCW);
BCW.SetBlockNumber(uint32_t(mBlockCnt));
BCW.SetForwardWordIdx(0);
BCW.Write(mBlock, mBlockOfs);
mBlockOfs += 8;
}
mLastCtrlWord.SetType(DsCtrlWord_c::CtrlWordType_EOF);
mLastCtrlWord.SetUnusedBitCnt(0);
mLastCtrlWord.SetPreviousFileIdx(uint32_t(mBlockCnt - mCurrentFileStart));
mLastCtrlWord.SetPreviousRecordIdx(0);
mCurrentFileStart = mBlockCnt;
}
void DataSet_c::Flush() {
if ((mMode & std::ios_base::out) == 0) return;
size_t BlockLeft = mBlock.size() - mBlockOfs;
CRAY_ASSERT(BlockLeft % 8 == 0);
CRAY_ASSERT(BlockLeft >= 8);
// - Patch up the last ctrl word
// - Put last ctrl word into block
// - Check if the buffer is now full, if full
// - Write block to disk
// - Create new block
// - Put BCW in new record
// - Prepare EOF in last ctrl word
mLastCtrlWord.SetForwardWordIdx(0);
mLastCtrlWord.Write(mBlock, mBlockOfs);
mLastCtrlWord.Clear();
mBlockOfs += 8;
if (mBlockOfs == mBlock.size()) {
WriteBlock();
DsCtrlWord_c BCW;
BCW.SetType(DsCtrlWord_c::CtrlWordType_BCW);
BCW.SetBlockNumber(uint32_t(mBlockCnt));
BCW.SetForwardWordIdx(0);
BCW.Write(mBlock, mBlockOfs);
mBlockOfs += 8;
}
mLastCtrlWord.SetType(DsCtrlWord_c::CtrlWordType_EOD);
mLastCtrlWord.SetUnusedBitCnt(0);
mLastCtrlWord.SetPreviousFileIdx(0);
mLastCtrlWord.SetPreviousRecordIdx(0);
mLastCtrlWord.Write(mBlock, mBlockOfs);
mLastCtrlWord.Clear();
mBlockOfs += 8;
WriteBlock(false);
}
bool DataSet_c::IsEof() {
DsCtrlWord_c CtrlWord;
CtrlWord.Read(*this);
bool RetVal = CtrlWord.GetType() == DsCtrlWord_c::CtrlWordType_EOF;
if (CtrlWord.GetType() == DsCtrlWord_c::CtrlWordType_EOR) {
CtrlWord.Read(*this);
if (CtrlWord.GetType() == DsCtrlWord_c::CtrlWordType_EOF) {
RetVal = true;
} else {
seekg(-8, std::ios::cur);
}
}
// Position back to before the EOF record
seekg(-8, std::ios::cur);
if (fail()) throw Generic_x() << "Can't read dataset " << mFileName;
return RetVal;
}
bool DataSet_c::IsEod() {
DsCtrlWord_c CtrlWord;
CtrlWord.Read(*this);
bool RetVal = CtrlWord.GetType() == DsCtrlWord_c::CtrlWordType_EOD;
if (CtrlWord.GetType() == DsCtrlWord_c::CtrlWordType_EOF) {
CtrlWord.Read(*this);
if (CtrlWord.GetType() == DsCtrlWord_c::CtrlWordType_EOD) {
RetVal = true;
}
else {
seekg(-8, std::ios::cur);
}
}
// Position back to before the RCW record
seekg(-8, std::ios::cur);
if (fail()) throw Generic_x() << "Can't read dataset " << mFileName;
return RetVal;
}
std::vector<uint8_t> DataSet_c::ReadRecord() {
std::vector<uint8_t> Record;
DsCtrlWord_c CtrlWord;
CtrlWord.Read(*this);
do {
switch (CtrlWord.GetType()) {
case DsCtrlWord_c::CtrlWordType_BCW: break;
case DsCtrlWord_c::CtrlWordType_EOR: break;
case DsCtrlWord_c::CtrlWordType_EOF: break;
case DsCtrlWord_c::CtrlWordType_EOD: throw Generic_x("End of dataset encountered");
default: throw Generic_x("Unknwon RCW encountered");
}
size_t OldRecordSize = Record.size();
size_t BlockSize = size_t(CtrlWord.GetForwardWordIdx() * 8);
Record.resize(OldRecordSize + BlockSize);
if (BlockSize != 0) {
read((char*)(&Record[OldRecordSize]), BlockSize);
if (size_t(gcount()) != BlockSize) throw Generic_x() << "Can't read dataset " << mFileName;
}
CtrlWord.Read(*this);
} while (CtrlWord.GetType() != DsCtrlWord_c::CtrlWordType_EOR);
CRAY_ASSERT(Record.size() % 8 == 0);
if (Record.size() >= 8) {
Record.resize(Record.size() - CtrlWord.GetUnusedBitCnt() / 8);
}
// Position back to before the EOR record
seekg(-8, std::ios::cur);
if (fail()) throw Generic_x() << "Can't read dataset " << mFileName;
return Record;
}
int PrintUsage(const char *aExecName, const char *aErrorStr = nullptr) {
if (aErrorStr != nullptr) {
std::cout << "Error interpreting the command line: " << aErrorStr << std::endl;
std::cout << std::endl;
}
std::cout << "Usage: " << aExecName << " <command> <switchs>" << std::endl;
std::cout << "There are two commands: create and extract." << std::endl;
std::cout << "You can abbreviate create with 'c' and extract with 'e'." << std::endl;
std::cout << std::endl;
std::cout << "CREATE switches" << std::endl;
std::cout << " [-a[a] <input file>] ... -o <output dataset file name>" << std::endl;
std::cout << " -a[a] <input file> [record size]" << std::endl;
std::cout << " Add the specified binary file to the dataset" << std::endl;
std::cout << " If 'a' is added, the file is treated as an ASCII file" << std::endl;
std::cout << " If record size is specified, the input file is considered to consist of multiple " << std::endl;
std::cout << " fixed sized records.Record size is specified in bytes" << std::endl;
std::cout << " Record size can only be specified for binary files" << std::endl;
std::cout << std::endl;
std::cout << "EXTRACT switches" << std::endl;
std::cout << " -i <input dataset file name> [-xall[a] <file name patter>] [-x[a] <position> <output file name>] ..." << std::endl;
std::cout << " -x[a] <position> <output file name>" << std::endl;
std::cout << " Position specifies the n-th file in the dataset" << std::endl;
std::cout << " The specified file is extracted as binary or if 'a' is added, in ASCII" << std::endl;
std::cout << " -xall[a] <file name patter>" << std::endl;
std::cout << " Extract all files in the dataset" << std::endl;
std::cout << " In the file name pattern %d will be replaced with the file index" << std::endl;
std::cout << " The specified file is extracted as binary or if 'a' is added, in ASCII" << std::endl;
return 1;
}
int Create(const char * aExecName, CommandLine_c &aCommandLine) {
struct InputFile_s {
InputFile_s(bool aIsBinary, const std::string &aFileName) : IsBinary(aIsBinary), FileName(aFileName), RecordSize(MaxValue(RecordSize)) {}
InputFile_s(bool aIsBinary, const std::string &aFileName, size_t aRecordSize) : IsBinary(aIsBinary), FileName(aFileName), RecordSize(aRecordSize) {}
bool IsBinary;
std::string FileName;
size_t RecordSize;
};
std::string OutputFileName;
std::vector<InputFile_s> InputFiles;
try {
while (aCommandLine.HasMoreParams()) {
std::string CurParam = aCommandLine.GetNextParam();
if (CurParam.length() == 0) continue;
if (CurParam == "-a") {
std::string FileName = aCommandLine.GetNextParam();
size_t RecordSize = MaxValue(RecordSize);
try {
RecordSize = std::stol(aCommandLine.GetNextParam());
}
catch (std::exception &) {
aCommandLine.UnGetParam();
}
InputFiles.push_back(InputFile_s(true, FileName, RecordSize));
}
else if (CurParam == "-aa") {
InputFiles.push_back(InputFile_s(false, aCommandLine.GetNextParam()));
}
else if (CurParam == "-o") {
if (!OutputFileName.empty()) throw Generic_x("Output file name is already specified");
OutputFileName = aCommandLine.GetNextParam();
}
else {
throw Generic_x("Unkown command line parameter");
}
}
if (OutputFileName.empty()) throw Generic_x("Output file name must be specified");
if (InputFiles.size() == 0) throw Generic_x("No input files are specified");
for (auto &File : InputFiles) {
if (!boost::filesystem::exists(File.FileName)) throw Generic_x() << "Input file " << File.FileName << " doesn't exist";
}
}
catch (std::exception &Ex) {
return PrintUsage(aExecName, Ex.what());
}
// Walk the input file list and build the dataset
try {
DataSet_c OutFile(OutputFileName, std::ios::out | std::ios::binary);
size_t FileIdx = 0;
for (auto &File : InputFiles) {
std::ifstream InFile(File.FileName, std::ios_base::in | File.IsBinary ? std::ios::binary : std::ios_base::openmode(0));
if (File.IsBinary) {
size_t RecordSize = std::min(uintmax_t(File.RecordSize), boost::filesystem::file_size(File.FileName));
std::vector<uint8_t> Record(RecordSize);
// We read the file one record at a time, put it in the dataset, while being careful about the partial record in the end
while (!InFile.eof()) {
InFile.read((char*)(&Record[0]), RecordSize);
size_t ReadCount = size_t(InFile.gcount());
if (ReadCount != RecordSize && !InFile.eof()) throw Generic_x() << "Can't read from input file " << File.FileName;
if (InFile.eof()) break;
Record.resize(ReadCount);
OutFile.WriteRecord(Record, false); // TODO: add swapping feature
}
OutFile.WriteEof();
}
else {
while (!InFile.eof()) {
std::string Line;
std::getline(InFile, Line);
if (InFile.eof()) break;
auto LastChar = Line.end() - 1;
while (*LastChar == 0x0d || *LastChar == 0x0a) --LastChar;
std::vector<uint8_t> LineBuf(Line.begin(), LastChar + 1);
OutFile.WriteRecord(LineBuf, false);
}
OutFile.WriteEof();
}
++FileIdx;
}
}
catch (std::exception &Ex) {
std::cout << "Can't create output file with error: " << Ex.what() << std::endl;
return 1;
}
return 0;
}
int Extract(const char * aExecName, CommandLine_c &aCommandLine) {
struct OutputFile_s {
OutputFile_s(size_t aPosition, bool aIsBinary, const std::string &aFileName, bool aDoAll) : Position(aPosition), IsBinary(aIsBinary), FileName(aFileName), DoAll(aDoAll) {}
size_t Position;
bool IsBinary;
std::string FileName;
bool DoAll;
};
std::string InputFileName;
std::vector<OutputFile_s> OutputFiles;
try {
while (aCommandLine.HasMoreParams()) {
std::string CurParam = aCommandLine.GetNextParam();
if (CurParam.length() == 0) continue;
if (CurParam == "-x" || CurParam == "-xa") {
size_t Position;
try {
Position = std::stol(aCommandLine.GetNextParam());
}
catch (std::exception &) {
throw Generic_x("Position must be a natural number");
}
std::string FileName = aCommandLine.GetNextParam();
OutputFiles.push_back(OutputFile_s(Position, CurParam == "-x", FileName, false));
}
else if (CurParam == "-xall" || CurParam == "-xalla") {
std::string FileName = aCommandLine.GetNextParam();
OutputFiles.push_back(OutputFile_s(-1, CurParam == "-xall", FileName, true));
}
else if (CurParam == "-i") {
if (!InputFileName.empty()) throw Generic_x("Input file name is already specified");
InputFileName = aCommandLine.GetNextParam();
}
else {
throw Generic_x("Unkown command line parameter");
}
}
if (InputFileName.empty()) throw Generic_x("Input file name must be specified");
if (OutputFiles.size() == 0) throw Generic_x("No output files are specified");
if (!boost::filesystem::exists(InputFileName)) throw Generic_x() << "Input file " << InputFileName << " doesn't exist";
}
catch (std::exception &Ex) {
return PrintUsage(aExecName, Ex.what());
}
// Walk the output file list and extract each of them
try {
DataSet_c InFile(InputFileName, std::ios::in | std::ios::binary);
size_t FileIdx = 0;
for (auto &File : OutputFiles) {
if (File.DoAll) {
InFile.seekg(0);
FileIdx = 0;
do {
std::stringstream FileIdxStr;
FileIdxStr << DecPrinter(FileIdx);
std::string FileName = Replace(File.FileName, "%d", FileIdxStr.str());
std::ofstream OutFile(FileName, std::ios_base::out | (File.IsBinary ? std::ios::binary : std::ios_base::openmode(0)));
do {
std::vector<uint8_t> Record = InFile.ReadRecord();
OutFile.write((char*)(&Record[0]), Record.size());
if (!File.IsBinary) OutFile << std::endl;
if (OutFile.fail()) throw Generic_x() << "Can't write to file " << File.FileName;
} while (!InFile.IsEof());
++FileIdx;
} while (!InFile.IsEod());
InFile.seekg(0);
FileIdx = 0;
}
else {
if (File.Position < FileIdx) {
InFile.seekg(0);
FileIdx = 0;
}
std::ofstream OutFile(File.FileName, std::ios_base::out | (File.IsBinary ? std::ios::binary : std::ios_base::openmode(0)));
// Seek to the right file in the dataset
while (FileIdx != File.Position) {
do {
InFile.ReadRecord();
} while (!InFile.IsEof());
++FileIdx;
}
do {
std::vector<uint8_t> Record = InFile.ReadRecord();
OutFile.write((char*)(&Record[0]), Record.size());
if (!File.IsBinary) OutFile << std::endl;
if (OutFile.fail()) throw Generic_x() << "Can't write to file " << File.FileName;
} while (!InFile.IsEof());
++FileIdx;
}
}
}
catch (std::exception &Ex) {
std::cout << "Can't extract from dataset with error: " << Ex.what() << std::endl;
return 1;
}
return 0;
}
int main(int argc, const char* argv[]) {
CommandLine_c CommandLine(argc, argv);
if (!CommandLine.HasMoreParams()) return PrintUsage(argv[0]);
std::string Command;
try {
while (CommandLine.HasMoreParams()) {
Command = CommandLine.GetNextParam();
if (Command.length() != 0) break;
}
if (Command == "create" || Command == "c") return Create(argv[0], CommandLine);
else if (Command == "extract" || Command == "e") return Extract(argv[0], CommandLine);
else throw Generic_x() << "Invalid command line argument: " << Command;
}
catch (std::exception &Ex) {
return PrintUsage(argv[0], Ex.what());
}
CRAY_ASSERT(false); // We should never get here, but are compilers smart enough to figure that out?
return 0;
}