Generating Fake Data From JSON Schema

Published: Dec 10, 2021

Last updated: Dec 10, 2021

This post will demonstrate the usage of json-schema-faker to generate fake data from a JSON schema document.

Prerequisites

Getting started

Ensure that you have this in the file __generated__/schema.json:

{ "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { "User": { "type": "object", "properties": { "id": { "type": "string" }, "email": { "type": ["string", "null"] }, "name": { "type": ["string", "null"], "x-faker": "name.findName" }, "emailVerified": { "anyOf": [ { "type": "string", "format": "date-time" }, { "type": "null" } ] }, "image": { "type": ["string", "null"] } }, "required": ["id", "email", "name", "emailVerified", "image", "role"], "additionalProperties": false, "description": "Model User" }, "Post": { "type": "object", "properties": { "id": { "type": "string" }, "createdAt": { "type": "string", "format": "date-time" }, "updatedAt": { "type": "string", "format": "date-time" }, "title": { "type": "string" }, "content": { "type": ["string", "null"] }, "published": { "type": "boolean" }, "authorId": { "type": "string" } }, "required": [ "id", "createdAt", "updatedAt", "title", "content", "published", "authorId" ], "additionalProperties": false, "description": "Model Post" } } }

Assuming you already have the npm project initialized you will also need to add a few packages:

$ npm i json-schema-faker faker cuid

Writing our script

With that there, we can create the file scripts/generate-data-from-json-schema.js and add the following:

const jsf = require("json-schema-faker"); const fs = require("fs"); const path = require("path"); const schema = JSON.parse( fs.readFileSync(path.join(__dirname, "../__generated__/schema.json"), "utf8") ); const dataPath = path.join(__dirname, "../__generated__/data.json"); // Extend the base functionality jsf.extend("faker", () => require("faker")); jsf.extend("cuid", () => { const cuid = require("cuid"); const res = { cuid: () => cuid(), }; return res; }); const data = {}; async function main() { let currentData; if (fs.existsSync(dataPath)) { currentData = JSON.parse(fs.readFileSync(dataPath, "utf8")); } const entries = Object.entries(schema.definitions); for (const [key, value] of entries) { if (currentData && currentData[key]) { console.log("Using existing data for", key); data[key] = currentData[key]; continue; } const injected = { ...value, definitions: schema.definitions, }; // use the async-version (preferred way) const sample = await jsf.resolve(injected); data[key] = sample; } const outputJson = JSON.stringify(data, null, 2); fs.writeFileSync(dataPath, outputJson); } main();

Our script does the following:

  1. Add capabilities for the faker and cuid packages to be used.
  2. Read the schema from the file __generated__/schema.json
  3. For each definition, generate the data if it does not exist.
  4. Write the data to the file __generated__/data.json.

Updating our schema

In order for this package to work, we need to update our schema.json file to take some extra values x-faker and x-cuid to know what to use and when:

{ "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { "User": { "type": "object", "properties": { "id": { "type": "string", "x-cuid": "cuid" }, "email": { "type": ["string", "null"], "x-faker": "internet.email" }, "name": { "type": ["string", "null"], "x-faker": "name.findName" }, "emailVerified": { "anyOf": [ { "type": "string", "format": "date-time" }, { "type": "null" } ] }, "image": { "type": ["string", "null"], "x-faker": "image.avatar" } }, "required": ["id", "email", "name", "emailVerified", "image", "role"], "additionalProperties": false, "description": "Model User" }, "UserRole": { "type": "string", "enum": ["USER", "SUPERUSER"], "description": "Enums" }, "Post": { "type": "object", "properties": { "id": { "type": "string", "x-cuid": "cuid" }, "createdAt": { "type": "string", "format": "date-time" }, "updatedAt": { "type": "string", "format": "date-time" }, "title": { "type": "string" }, "content": { "type": ["string", "null"] }, "published": { "type": "boolean" }, "authorId": { "type": "string" } }, "required": [ "id", "createdAt", "updatedAt", "title", "content", "published", "authorId" ], "additionalProperties": false, "description": "Model Post" } } }

Note: I am using cuid since that is how I am generating IDs for my relational databases. You don't specifically need to use CUID.

Running the script

Finally, we can run node scripts/generate-data-from-json-schema.js to generate the data.

Once run successfully, we can see the output in __generated__/data.json:

{ "User": { "id": "ckwn7ovf00000kvv5cg0kemkb", "email": "Karianne.Runolfsdottir0@gmail.com", "name": "Joyce Stamm", "emailVerified": null, "image": "https://cdn.fakercloud.com/avatars/gojeanyn_128.jpg" }, "Post": { "id": "ckwn7ovfd0008kvv58ud5092m", "createdAt": "2005-08-29T14:00:00.0Z", "updatedAt": "1983-05-04T14:00:00.0Z", "title": "dolor", "content": null, "published": true, "authorId": "ckwn7ovf00000kvv5cg0kemkb" } }

Summary

Today's post demonstrated how to generate fake data based on the JSON Schema Faker library.

It also demonstrated how to extend the functionality to use our own libraries to generate the data.

Resources and further reading

Photo credit: wisniewski

Personal image

Dennis O'Keeffe

Byron Bay, Australia

Dennis O'Keeffe

2020-present Dennis O'Keeffe.

All Rights Reserved.