node.js: read a text file into an array. (Each line an item in the array.)

2020/10/24 12:01 · javascript ·  · 0评论

I would like to read a very, very large file into a JavaScript array in node.js.

So, if the file is like this:

first line
two 
three
...
...

I would have the array:

['first line','two','three', ... , ... ] 

The function would look like this:

var array = load(filename); 

Therefore the idea of loading it all as a string and then splitting it is not acceptable.

If you can fit the final data into an array then wouldn't you also be able to fit it in a string and split it, as has been suggested?
In any case if you would like to process the file one line at a time you can also try something like this:

var fs = require('fs');

function readLines(input, func) {
  var remaining = '';

  input.on('data', function(data) {
    remaining += data;
    var index = remaining.indexOf('\n');
    while (index > -1) {
      var line = remaining.substring(0, index);
      remaining = remaining.substring(index + 1);
      func(line);
      index = remaining.indexOf('\n');
    }
  });

  input.on('end', function() {
    if (remaining.length > 0) {
      func(remaining);
    }
  });
}

function func(data) {
  console.log('Line: ' + data);
}

var input = fs.createReadStream('lines.txt');
readLines(input, func);

EDIT: (in response to comment by phopkins) I think (at least in newer versions) substring does not copy data but creates a special SlicedString object (from a quick glance at the v8 source code). In any case here is a modification that avoids the mentioned substring (tested on a file several megabytes worth of "All work and no play makes Jack a dull boy"):

function readLines(input, func) {
  var remaining = '';

  input.on('data', function(data) {
    remaining += data;
    var index = remaining.indexOf('\n');
    var last  = 0;
    while (index > -1) {
      var line = remaining.substring(last, index);
      last = index + 1;
      func(line);
      index = remaining.indexOf('\n', last);
    }

    remaining = remaining.substring(last);
  });

  input.on('end', function() {
    if (remaining.length > 0) {
      func(remaining);
    }
  });
}

Synchronous:

var fs = require('fs');
var array = fs.readFileSync('file.txt').toString().split("\n");
for(i in array) {
    console.log(array[i]);
}

Asynchronous:

var fs = require('fs');
fs.readFile('file.txt', function(err, data) {
    if(err) throw err;
    var array = data.toString().split("\n");
    for(i in array) {
        console.log(array[i]);
    }
});

Using the Node.js readline module.

var fs = require('fs');
var readline = require('readline');

var filename = process.argv[2];
readline.createInterface({
    input: fs.createReadStream(filename),
    terminal: false
}).on('line', function(line) {
   console.log('Line: ' + line);
});

js:

var array = fs.readFileSync('file.txt', 'utf8').split('\n');

ts:

var array = fs.readFileSync('file.txt', 'utf8').toString().split('\n');

use readline (documentation). here's an example reading a css file, parsing for icons and writing them to json

var results = [];
  var rl = require('readline').createInterface({
    input: require('fs').createReadStream('./assets/stylesheets/_icons.scss')
  });


  // for every new line, if it matches the regex, add it to an array
  // this is ugly regex :)
  rl.on('line', function (line) {
    var re = /\.icon-icon.*:/;
    var match;
    if ((match = re.exec(line)) !== null) {
      results.push(match[0].replace(".",'').replace(":",''));
    }
  });


  // readline emits a close event when the file is read.
  rl.on('close', function(){
    var outputFilename = './icons.json';
    fs.writeFile(outputFilename, JSON.stringify(results, null, 2), function(err) {
        if(err) {
          console.log(err);
        } else {
          console.log("JSON saved to " + outputFilename);
        }
    });
  });

file.lines with JFile package

Pseudo

var JFile=require('jfile');

var myF=new JFile("./data.txt");
myF.lines // ["first line","second line"] ....

Don't forget before :

npm install jfile --save

With a BufferedReader, but the function should be asynchronous:

var load = function (file, cb){
    var lines = [];
    new BufferedReader (file, { encoding: "utf8" })
        .on ("error", function (error){
            cb (error, null);
        })
        .on ("line", function (line){
            lines.push (line);
        })
        .on ("end", function (){
            cb (null, lines);
        })
        .read ();
};

load ("file", function (error, lines){
    if (error) return console.log (error);
    console.log (lines);
});

i just want to add @finbarr great answer, a little fix in the asynchronous example:

Asynchronous:

var fs = require('fs');
fs.readFile('file.txt', function(err, data) {
    if(err) throw err;
    var array = data.toString().split("\n");
    for(i in array) {
        console.log(array[i]);
    }
    done();
});

@MadPhysicist, done() is what releases the async. call.

This is a variation on the answer above by @mtomis.

It creates a stream of lines. It emits 'data' and 'end' events, allowing you to handle the end of the stream.

var events = require('events');

var LineStream = function (input) {
    var remaining = '';

    input.on('data', function (data) {
        remaining += data;
        var index = remaining.indexOf('\n');
        var last = 0;
        while (index > -1) {
            var line = remaining.substring(last, index);
            last = index + 1;
            this.emit('data', line);
            index = remaining.indexOf('\n', last);
        }
        remaining = remaining.substring(last);
    }.bind(this));

    input.on('end', function() {
        if (remaining.length > 0) {
            this.emit('data', remaining);
        }
        this.emit('end');
    }.bind(this));
}

LineStream.prototype = new events.EventEmitter;

Use it as a wrapper:

var lineInput = new LineStream(input);

lineInput.on('data', function (line) {
    // handle line
});

lineInput.on('end', function() {
    // wrap it up
});

Essentially this will do the job: .replace(/\r\n/g,'\n').split('\n').
This works on Mac, Linux & Windows.

Code Snippets

Synchronous:

const { readFileSync } = require('fs');

const array = readFileSync('file.txt').toString().replace(/\r\n/g,'\n').split('\n');

for(let i of array) {
    console.log(i);
}

Asynchronous:

With the fs.promises API that provides an alternative set of asynchronous file system methods that return Promise objects rather than using callbacks. (No need to promisify, you can use async-await with this too, available on and after Node.js version 10.0.0)

const { readFile } = require('fs').promises;

readFile('file.txt', function(err, data) {
    if(err) throw err;

    const arr = data.toString().replace(/\r\n/g,'\n').split('\n');

    for(let i of arr) {
        console.log(i);
    }
});

More about \r & \n here: \r\n, \r and \n what is the difference between them?

I had the same problem, and I have solved it with the module line-by-line

https://www.npmjs.com/package/line-by-line

At least for me works like a charm, both in synchronous and asynchronous mode.

Also, the problem with lines terminating not terminating \n can be solved with the option:

{ encoding: 'utf8', skipEmptyLines: false }

Synchronous processing of lines:

var LineByLineReader = require('line-by-line'),
    lr = new LineByLineReader('big_file.txt');

lr.on('error', function (err) {
    // 'err' contains error object
});

lr.on('line', function (line) {
    // 'line' contains the current line without the trailing newline character.
});

lr.on('end', function () {
    // All lines are read, file is closed now.
}); 

Using Node.js v8 or later has a new feature that converts normal function into an async function.

util.promisify

It's an awesome feature. Here's the example of parsing 10000 numbers from the txt file into an array, counting inversions using merge sort on the numbers.

// read from txt file
const util = require('util');
const fs = require('fs')
fs.readFileAsync = util.promisify(fs.readFile);
let result = []

const parseTxt = async (csvFile) => {
  let fields, obj
  const data = await fs.readFileAsync(csvFile)
  const str = data.toString()
  const lines = str.split('\r\n')
  // const lines = str
  console.log("lines", lines)
  // console.log("str", str)

  lines.map(line => {
    if(!line) {return null}
    result.push(Number(line))
  })
  console.log("result",result)
  return result
}
parseTxt('./count-inversion.txt').then(() => {
  console.log(mergeSort({arr: result, count: 0}))
})

To read a big file into array you can read line by line or chunk by chunk.

line by line refer to my answer here

var fs = require('fs'),
    es = require('event-stream'),

var lines = [];

var s = fs.createReadStream('filepath')
    .pipe(es.split())
    .pipe(es.mapSync(function(line) {
        //pause the readstream
        s.pause();
        lines.push(line);
        s.resume();
    })
    .on('error', function(err) {
        console.log('Error:', err);
    })
    .on('end', function() {
        console.log('Finish reading.');
        console.log(lines);
    })
);

chunk by chunk refer to this article

var offset = 0;
var chunkSize = 2048;
var chunkBuffer = new Buffer(chunkSize);
var fp = fs.openSync('filepath', 'r');
var bytesRead = 0;
while(bytesRead = fs.readSync(fp, chunkBuffer, 0, chunkSize, offset)) {
    offset += bytesRead;
    var str = chunkBuffer.slice(0, bytesRead).toString();
    var arr = str.split('\n');

    if(bytesRead = chunkSize) {
        // the last item of the arr may be not a full line, leave it to the next chunk
        offset -= arr.pop().length;
    }
    lines.push(arr);
}
console.log(lines);
本文地址:http://javascript.askforanswer.com/node-js-read-a-text-file-into-an-array-each-line-an-item-in-the-array.html
文章标签: ,  
版权声明:本文为原创文章,版权归 javascript 所有,欢迎分享本文,转载请保留出处!

文件下载

老薛主机终身7折优惠码boke112

上一篇:
下一篇:

评论已关闭!