diff --git a/lib/Iso2709.js b/lib/Iso2709.js index 567a0dd..6f0f5a1 100644 --- a/lib/Iso2709.js +++ b/lib/Iso2709.js @@ -4,9 +4,9 @@ const Record = require('./Record'); // eslint-disable-next-line no-unused-vars class Iso2709 extends Duplex { - constructor(stream) { + constructor(stream, encoding) { super({ objectMode: true }); - + this.encoding = encoding || 'utf8'; // this.prevData; Le buffer précédent du stream en cours de lecture this.prevStart = -1; // La position de ce qu'il reste à lire this.stream = stream; @@ -54,7 +54,7 @@ class Iso2709 extends Duplex { raw = Buffer.alloc(pos - start + 1); data.copy(raw, 0, start, pos); } - records.push(Iso2709.parse(raw)); + records.push(Iso2709.parse(raw, this.encoding)); pos += 1; start = pos; } @@ -77,18 +77,18 @@ class Iso2709 extends Duplex { callback(null); } - static parse(data) { + static parse(data, encoding) { const record = new Record(); - record.leader = data.toString('utf8', 0, 24); - const directoryLen = parseInt(data.toString('utf8', 12, 17), 10) - 25; + record.leader = data.toString(encoding, 0, 24); + const directoryLen = parseInt(data.toString(encoding, 12, 17), 10) - 25; const numberOfTag = directoryLen / 12; record.fields = []; for (let i = 0; i < numberOfTag; i += 1) { const off = 24 + i * 12; - const tag = data.toString('utf8', off, off + 3); - const len = parseInt(data.toString('utf8', off + 3, off + 7), 10) - 1; - const pos = parseInt(data.toString('utf8', off + 7, off + 12), 10) + 25 + directoryLen; - let value = data.toString('utf-8', pos, pos + len); + const tag = data.toString(encoding, off, off + 3); + const len = parseInt(data.toString(encoding, off + 3, off + 7), 10) - 1; + const pos = parseInt(data.toString(encoding, off + 7, off + 12), 10) + 25 + directoryLen; + let value = data.toString(encoding, pos, pos + len); const parts = [tag]; if (parseInt(tag, 10) < '010') { parts.push(value); diff --git a/lib/Marc.js b/lib/Marc.js index c12c40a..244ad35 100644 --- a/lib/Marc.js +++ b/lib/Marc.js @@ -28,12 +28,13 @@ const Marc = { * Parse and returns a MARC record. * @param {string} raw - The raw MARC record. * @param {string} type - The type of format to parse: iso2709, marcxml, mij. + * @param {string} encoding - The text encoding of stream: utf8, latin1 etc. utf8 is the default * @return a MARC record. */ - parse: (raw, type) => { + parse: (raw, type, encoding) => { const parse = Marc.parser[type.toLowerCase()]; if (parse) { - return parse(raw); + return parse(raw, encoding); } throw new Error(`Unknown MARC format: ${type}`); }, @@ -50,12 +51,13 @@ const Marc = { * Get a Writable/Readable Stream based on a Node.js stream * @param {Stream} stream - The stream on which read/write * @param {string} type - The type of stream: iso2709, marcxml, text, json, mij + * @param {string} encoding - The text encoding of stream: utf8, latin1 etc. utf8 is the default * @return {Stream} */ - stream: (stream, type) => { + stream: (stream, type, encoding) => { switch (type.toLocaleLowerCase()) { case 'iso2709': - return new Iso2709(stream); + return new Iso2709(stream, encoding); case 'marcxml': return new Marcxml(stream); case 'mij': diff --git a/lib/Marcxml.js b/lib/Marcxml.js index f178b8a..f62adfd 100644 --- a/lib/Marcxml.js +++ b/lib/Marcxml.js @@ -96,6 +96,8 @@ class Marcxml extends Duplex { let values; let ind1; let ind2; + let tagEnd; + let isSelfClosingTag; while (true) { end += 1; start = xml.indexOf('<', end); @@ -110,6 +112,16 @@ class Marcxml extends Duplex { value = he.decode(value); values = [tag, value]; } else { + if(begin === '', start); + isSelfClosingTag = ("/" === xml[tagEnd -1]); + if(isSelfClosingTag) { + //if self closing tag then skip over it and continue + end = tagEnd; + continue; + } + } end = xml.indexOf(' + + + 00000nam a2200000 a 4500 + 012301230123 + 0123012301230123012.0 + 0123012s0123 0123 0123 0123 1 0123 + + 012301230123 + + + 0123012301230123 (0123.) + + + 012301230123 (0123.) + + + 0123012301230123 (0123.) + + + 0123012301230123 + (01230123) + + + (0123)012301230123 + + + (0123)012301230123 + + + ABC + def + ghi + JKL + MNO + PQR + STU + VWX + YZA + + + AB12.CD45 + Xy 1234 + + + [Sci] + 12 + + + SF SERIES L + + + Smith, John, + author. + + + The Galactic Adventures of Space Cat / + John Smith. + + + Mars : + Space Press, + 2023. + + + 200 p. : + col. ill. ; + 24 cm. + datafield tag="490" ind1="0" ind2=" "> + Random Title; + [1] + + + RANDOM DESCRIPTION TEXT HERE. + + + Ages 8-12. + + + Harry Potter + (Fictitious character) + Juvenile fiction. + + + Wizards + Juvenile fiction. + + + Magic + Juvenile fiction. + + + Friendship + Juvenile fiction. + + + Schools + Juvenile fiction. + + + Fantasy + Juvenile fiction. + + + +`; + +let res = Marcxml.parse(xml_selfclosing); +let data = JSON.parse(JSON.stringify(res)); +console.log(data); + +const xml_notselfclosing = ` + + + + 00001aaa a2200001Ki 4500 + 1000001 + 1231231231230 + 1234567891m223467891nyua j 6 000 1 aaa d + + 1000002 + v.1 : hardcover) + + + 1000003 + (v.1 paperback) + + + 1000004 + v.1 : hardcover) + + + 1000005 + v.2 : hardcover) + + + 1000006 + v.2 : hardcover) + + + 1000007 + v.2 : paperback) + + + 1000008 + v.3 : hardcover) + + + 1000009 + v.3 : hardcover) + + + 1000010 + (v.4 ; hardcover) + + + 1000011 + (v.4 ; hardcover) + + + (OCoLC)1000012 + (OCoLC)1000013 + + + (OCoLC)ocn1000014 + + + AA1 + eng + rda + AA1 + AA2 + AA3 + AA4 + AA5 + AA6 + AA7 + + + 123.4/567 + 12 + + + J GRAPHIC ABC + + + Author, A. A., + author. + + + Random title / + written by A. A. Author ; illustrations by B. B. Illustrator. + + + At head of title: + XYZ PUBLISHERS + + + City : + Publisher Inc., + [1234]- + + + volumes : + color illustrations ; + 22 cm + + + text + txt + rdacontent + + + still image + sti + rdacontent + + + unmediated + n + rdamedia + + + volume + nc + rdacarrier + + + Volume 1. + Title of volume 1 -- + Volume 2. + Title of volume 2 -- + Volume 3. + Title of volume 3 -- + v.4. + Title of volume 4 -- + v.5. + Title of volume 5 -- + + + "Description text here."--Back cover of Volume 1. + + + Topic1 + Comic books, strips, etc. + + + Topic2 + Comic books, strips, etc. + + + Topic3 + Comic books, strips, etc. + + + Genre1. + lcgft + + + Illustrator, B. B., + illustrator. + + + XYZ Publishers, Inc. + + + +`; + +const res2 = Marcxml.parse(xml_notselfclosing); +console.log(res2); \ No newline at end of file