Generating Fake Data From JSON Schema
Published: Dec 10, 2021
Last updated: Dec 10, 2021
This post will demonstrate the usage of json-schema-faker to generate fake data from a JSON schema document.
Prerequisites
- Read my related post Generating JSON Schema from TypeScript Types.
Getting started
Ensure that you have this in the file __generated__/schema.json
:
{ "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { "User": { "type": "object", "properties": { "id": { "type": "string" }, "email": { "type": ["string", "null"] }, "name": { "type": ["string", "null"], "x-faker": "name.findName" }, "emailVerified": { "anyOf": [ { "type": "string", "format": "date-time" }, { "type": "null" } ] }, "image": { "type": ["string", "null"] } }, "required": ["id", "email", "name", "emailVerified", "image", "role"], "additionalProperties": false, "description": "Model User" }, "Post": { "type": "object", "properties": { "id": { "type": "string" }, "createdAt": { "type": "string", "format": "date-time" }, "updatedAt": { "type": "string", "format": "date-time" }, "title": { "type": "string" }, "content": { "type": ["string", "null"] }, "published": { "type": "boolean" }, "authorId": { "type": "string" } }, "required": [ "id", "createdAt", "updatedAt", "title", "content", "published", "authorId" ], "additionalProperties": false, "description": "Model Post" } } }
Assuming you already have the npm
project initialized you will also need to add a few packages:
$ npm i json-schema-faker faker cuid
Writing our script
With that there, we can create the file scripts/generate-data-from-json-schema.js
and add the following:
const jsf = require("json-schema-faker"); const fs = require("fs"); const path = require("path"); const schema = JSON.parse( fs.readFileSync(path.join(__dirname, "../__generated__/schema.json"), "utf8") ); const dataPath = path.join(__dirname, "../__generated__/data.json"); // Extend the base functionality jsf.extend("faker", () => require("faker")); jsf.extend("cuid", () => { const cuid = require("cuid"); const res = { cuid: () => cuid(), }; return res; }); const data = {}; async function main() { let currentData; if (fs.existsSync(dataPath)) { currentData = JSON.parse(fs.readFileSync(dataPath, "utf8")); } const entries = Object.entries(schema.definitions); for (const [key, value] of entries) { if (currentData && currentData[key]) { console.log("Using existing data for", key); data[key] = currentData[key]; continue; } const injected = { ...value, definitions: schema.definitions, }; // use the async-version (preferred way) const sample = await jsf.resolve(injected); data[key] = sample; } const outputJson = JSON.stringify(data, null, 2); fs.writeFileSync(dataPath, outputJson); } main();
Our script does the following:
- Add capabilities for the
faker
andcuid
packages to be used. - Read the schema from the file
__generated__/schema.json
- For each definition, generate the data if it does not exist.
- Write the data to the file
__generated__/data.json
.
Updating our schema
In order for this package to work, we need to update our schema.json
file to take some extra values x-faker
and x-cuid
to know what to use and when:
{ "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { "User": { "type": "object", "properties": { "id": { "type": "string", "x-cuid": "cuid" }, "email": { "type": ["string", "null"], "x-faker": "internet.email" }, "name": { "type": ["string", "null"], "x-faker": "name.findName" }, "emailVerified": { "anyOf": [ { "type": "string", "format": "date-time" }, { "type": "null" } ] }, "image": { "type": ["string", "null"], "x-faker": "image.avatar" } }, "required": ["id", "email", "name", "emailVerified", "image", "role"], "additionalProperties": false, "description": "Model User" }, "UserRole": { "type": "string", "enum": ["USER", "SUPERUSER"], "description": "Enums" }, "Post": { "type": "object", "properties": { "id": { "type": "string", "x-cuid": "cuid" }, "createdAt": { "type": "string", "format": "date-time" }, "updatedAt": { "type": "string", "format": "date-time" }, "title": { "type": "string" }, "content": { "type": ["string", "null"] }, "published": { "type": "boolean" }, "authorId": { "type": "string" } }, "required": [ "id", "createdAt", "updatedAt", "title", "content", "published", "authorId" ], "additionalProperties": false, "description": "Model Post" } } }
Note: I am using
cuid
since that is how I am generating IDs for my relational databases. You don't specifically need to use CUID.
Running the script
Finally, we can run node scripts/generate-data-from-json-schema.js
to generate the data.
Once run successfully, we can see the output in __generated__/data.json
:
{ "User": { "id": "ckwn7ovf00000kvv5cg0kemkb", "email": "Karianne.Runolfsdottir0@gmail.com", "name": "Joyce Stamm", "emailVerified": null, "image": "https://cdn.fakercloud.com/avatars/gojeanyn_128.jpg" }, "Post": { "id": "ckwn7ovfd0008kvv58ud5092m", "createdAt": "2005-08-29T14:00:00.0Z", "updatedAt": "1983-05-04T14:00:00.0Z", "title": "dolor", "content": null, "published": true, "authorId": "ckwn7ovf00000kvv5cg0kemkb" } }
Summary
Today's post demonstrated how to generate fake data based on the JSON Schema Faker library.
It also demonstrated how to extend the functionality to use our own libraries to generate the data.
Resources and further reading
Photo credit: wisniewski
Generating Fake Data From JSON Schema
Introduction