Skip to content

Commit

Permalink
fix(sdk/csv-parser): replace csv-parser with papaparse (#73)
Browse files Browse the repository at this point in the history
Signed-off-by: kacper-koza-arianelabs <[email protected]>
  • Loading branch information
kacper-koza-arianelabs committed May 28, 2024
1 parent a30be61 commit bd847d2
Show file tree
Hide file tree
Showing 11 changed files with 129 additions and 106 deletions.
34 changes: 32 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@
"magic-bytes.js": "^1.8.0",
"mathjs": "^12.3.2",
"npm-run-all": "^4.1.5",
"papaparse": "^5.4.1",
"zod": "^3.22.4",
"zod-error": "^1.5.0"
},
Expand All @@ -124,6 +125,7 @@
"@types/jest": "^29.5.12",
"@types/lodash": "^4.14.202",
"@types/node": "^20.11.17",
"@types/papaparse": "^5.3.14",
"@types/uuid": "^9.0.8",
"@typescript-eslint/eslint-plugin": "^5.62.0",
"@typescript-eslint/parser": "^5.62.0",
Expand Down
2 changes: 1 addition & 1 deletion src/browser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ export { PinataService } from './services/file-storages/pinata/pinata-service';
export { AWSService } from './services/file-storages/aws/aws-service';
export { MockStorageService } from './services/file-storages/mock-storage/mock-storage-service';
export { UploadService } from './services/upload-service';
export { convertCSVToMetadataObjects } from './file-management/convert-csv-to-metadata-objects';
/**
* Function below is not browser supported
* @browserUnsupported
Expand All @@ -69,7 +70,6 @@ export { UploadService } from './services/upload-service';
* Function below is not browser supported
* @browserUnsupported
*/
// export { convertMetadataObjectsToJsonFiles } from './file-management/convert-metadata-objects-to-json-files';
export { prepareMetadataObjectsFromCSVRows } from './file-management/prepare-metadata-objects-from-csv-rows';
export { getHolderAndDuration } from './get-holder-and-duration';

12 changes: 4 additions & 8 deletions src/file-management/convert-csv-to-metadata-objects.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,10 @@ import { AMOUNT_OF_HEADERS, OMITTED_HEADER_COUNT } from '../utils/constants/csv-
import { dictionary } from '../utils/constants/dictionary';
import { prepareMetadataObjectsFromCSVRows } from './prepare-metadata-objects-from-csv-rows';

export const convertCSVToMetadataObjects = async (csvFilePath: string, limit?: number): Promise<MetadataObject[]> => {
const csvParsedRows = await readCSVFile(csvFilePath, limit);

export const convertCSVToMetadataObjects = async (csvFile: Blob, limit?: number): Promise<MetadataObject[]> => {
const csvParsedRows = await readCSVFile(csvFile, limit);
if (csvParsedRows.length <= AMOUNT_OF_HEADERS - OMITTED_HEADER_COUNT) {
throw new Error(dictionary.validation.csvFileIsEmpty(csvFilePath));
throw new Error(dictionary.validation.csvFileIsEmpty());
}

const metadataObjects = prepareMetadataObjectsFromCSVRows({ csvParsedRows });

return metadataObjects;
return prepareMetadataObjectsFromCSVRows({ csvParsedRows });
};
27 changes: 0 additions & 27 deletions src/helpers/select-separator.ts

This file was deleted.

4 changes: 0 additions & 4 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,6 @@ export { PinataService } from './services/file-storages/pinata/pinata-service';
export { AWSService } from './services/file-storages/aws/aws-service';
export { MockStorageService } from './services/file-storages/mock-storage/mock-storage-service';
export { UploadService } from './services/upload-service';
/**
* Function below is not browser supported
* @browserUnsupported
*/
export { convertCSVToMetadataObjects } from './file-management/convert-csv-to-metadata-objects';
/**
* Function below is not browser supported
Expand Down
76 changes: 42 additions & 34 deletions src/services/csv-file-reader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,11 @@
* limitations under the License.
*
*/
import fs from 'fs';
import { NFTS_LIMIT_ERROR } from '../utils/constants/nfts-limit-error';
import { dictionary } from '../utils/constants/dictionary';
import type { CSVRow } from '../types/csv';
import csvParser from 'csv-parser';
import { selectSeparator } from '../helpers/select-separator';
import { ATTRIBUTES, PROPERTIES, OMITTED_HEADER_COUNT } from '../utils/constants/csv-constants';
import Papa from 'papaparse';
import { dictionary } from '../utils/constants/dictionary';

type CSVReaderErrorId = 'invalid-headers';

Expand Down Expand Up @@ -52,7 +50,7 @@ function checkForErrorsAndLimit({
}

const effectiveLimit = Number(limit) + OMITTED_HEADER_COUNT;
if (limit && currentRowCount >= effectiveLimit) {
if (limit && currentRowCount > effectiveLimit) {
throw new Error(NFTS_LIMIT_ERROR);
}
}
Expand All @@ -64,11 +62,11 @@ function processHeader(
attributesIndex: number,
refToErrorArray: string[]
): {
result: string | null;
currentType: CurrentType;
propertyIndex: number;
attributesIndex: number;
} {
result: string;
currentType: CurrentType;
propertyIndex: number;
attributesIndex: number;
} {
let result: string | null = null;

// TODO: try to simplyfy this
Expand Down Expand Up @@ -100,42 +98,46 @@ function processHeader(
attributesIndex++;
}

return { result, currentType, propertyIndex, attributesIndex };
return { result: result || header.header, currentType, propertyIndex, attributesIndex };
}

export async function readCSVFile(absolutePath: string, limit?: number): Promise<CSVRow[]> {
const separator = selectSeparator();
export async function readCSVFile(fileAsBlob: Blob, limit?: number): Promise<CSVRow[]> {
if (!fileAsBlob.type.includes('text/csv')) {
throw new Error(dictionary.validation.invalidCsvFileType);
}
let rowsCount = 0;
const rows: CSVRow[] = [];
const readStream = fs.createReadStream(absolutePath);
const fileAsText = await fileAsBlob.text();
const headersErrors: string[] = [];

try {
await new Promise((resolve, reject) => {
readStream
.pipe(
csvParser({
separator,
mapHeaders: mapHeadersForCSV(headersErrors),
})
)
.on('data', (row: CSVRow) => {
await new Promise<CSVRow[]>((resolve, reject) => {
Papa.parse<CSVRow>(fileAsText, {
header: true,
skipEmptyLines: true,
transformHeader: mapHeadersForCSV(headersErrors),
step: (data) => {
try {
rowsCount++;
checkForErrorsAndLimit({
headersErrors,
limit,
currentRowCount: rows.length,
currentRowCount: rowsCount,
});

rows.push(row);
rows.push(data.data);
} catch (e) {
return reject(e);
}
})
.on('end', () => resolve(readStream.read()))
.on('error', (e) => {
return reject(e);
});
},
complete: (result) => {
resolve(result.data);
},
error: (error: Error) => {
reject(new Error(error.message));
},
});
});
return rows;
} catch (e) {
// We want to throw only error related to CSV headers. In this case we want to ignore errors like limit for example and return rows as it is so the whole process can continue.
if (e instanceof CSVReaderError) {
Expand All @@ -146,18 +148,24 @@ export async function readCSVFile(absolutePath: string, limit?: number): Promise
return rows;
}

function mapHeadersForCSV(refToErrorArray: string[]): (header: { header: string; index: number }) => string | null {
function mapHeadersForCSV(refToErrorArray: string[]): (header: string, index: number) => string {
let propertyIndex = 0;
let attributesIndex = 0;
let currentType: CurrentType = null;

return (header: { header: string; index: number }): string | null => {
return (header: string, index: number): string => {
if (index === 0) {
currentType = null;
propertyIndex = 0;
attributesIndex = 0;
}

const {
result,
currentType: updatedType,
propertyIndex: updatedPropertyIndex,
attributesIndex: updatedAttributesIndex,
} = processHeader(header, currentType, propertyIndex, attributesIndex, refToErrorArray);
} = processHeader({ header, index: Number(index) }, currentType, propertyIndex, attributesIndex, refToErrorArray);

currentType = updatedType;
propertyIndex = updatedPropertyIndex;
Expand Down
24 changes: 18 additions & 6 deletions src/test/integration/convert-csv-to-metadata-objects.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,17 @@
*
*/
import fs from 'fs';
import { CSV_EXAMPLE_WITH_ALL_FIELDS, CSV_EXAMPLE_EMPTY_FILE, CSV_EXAMPLE_WITH_HEADERS_ONLY } from '../__mocks__/consts';
import { CSV_EXAMPLE_WITH_ALL_FIELDS, CSV_EXAMPLE_EMPTY_FILE } from '../__mocks__/consts';
import { convertCSVToMetadataObjects } from '../../file-management/convert-csv-to-metadata-objects';
import { AMOUNT_OF_HEADERS } from '../../utils/constants/csv-constants';

describe('convertCSVToMetadataObjects Integration Test', () => {
it('should create correct number of metadata objects based on the CSV file', async () => {
const csvContent = fs.readFileSync(CSV_EXAMPLE_WITH_ALL_FIELDS, 'utf-8');
const csvRows = csvContent.trim().split('\n').length - AMOUNT_OF_HEADERS;
const metadataObjects = await convertCSVToMetadataObjects(CSV_EXAMPLE_WITH_ALL_FIELDS);
const bufferFile = fs.readFileSync(CSV_EXAMPLE_WITH_ALL_FIELDS, { encoding: 'utf8' });
const blob = new Blob([bufferFile], { type: 'text/csv' });
const metadataObjects = await convertCSVToMetadataObjects(blob);

expect(metadataObjects.length).toBe(csvRows);
});
Expand All @@ -49,21 +51,31 @@ describe('convertCSVToMetadataObjects Integration Test', () => {
],
};

const metadataObjects = await convertCSVToMetadataObjects(CSV_EXAMPLE_WITH_ALL_FIELDS);
const bufferFile = fs.readFileSync(CSV_EXAMPLE_WITH_ALL_FIELDS, { encoding: 'utf8' });
const blob = new Blob([bufferFile], { type: 'text/csv' });
const metadataObjects = await convertCSVToMetadataObjects(blob);

expect(metadataObjects.length).toBeGreaterThan(0);
expect(metadataObjects[0]).toEqual(EXPECTED_FIRST_OBJECT);
});

it('should return a number of metadataObjects no greater than specified limit', async () => {
const limit = 5;
const metadataObjects = await convertCSVToMetadataObjects(CSV_EXAMPLE_WITH_ALL_FIELDS, limit);
const bufferFile = fs.readFileSync(CSV_EXAMPLE_WITH_ALL_FIELDS, { encoding: 'utf8' });
const blob = new Blob([bufferFile], { type: 'text/csv' });
const metadataObjects = await convertCSVToMetadataObjects(blob, limit);

expect(metadataObjects.length).toBe(limit);
});

it('should throw an error if the CSV file is empty or contains only headers', async () => {
await expect(convertCSVToMetadataObjects(CSV_EXAMPLE_EMPTY_FILE)).rejects.toThrow();
await expect(convertCSVToMetadataObjects(CSV_EXAMPLE_WITH_HEADERS_ONLY)).rejects.toThrow();
const bufferEmptyFile = fs.readFileSync(CSV_EXAMPLE_EMPTY_FILE, { encoding: 'utf8' });
const blobEmptyFile = new Blob([bufferEmptyFile]);

const bufferFileWithHeadersOnly = fs.readFileSync(CSV_EXAMPLE_EMPTY_FILE, { encoding: 'utf8' });
const blobFileWIthHeadersOnly = new Blob([bufferFileWithHeadersOnly]);

await expect(convertCSVToMetadataObjects(blobEmptyFile)).rejects.toThrow();
await expect(convertCSVToMetadataObjects(blobFileWIthHeadersOnly)).rejects.toThrow();
});
});
Loading

0 comments on commit bd847d2

Please sign in to comment.