Dotjs CRDT Slides

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 54

CRDTs for Mortas

Why haven’t “offline-first” apps taken off?


Syncing is hard
You must admit that local apps are a
distributed system
Actual — https://actualbudget.com
available offline
must be fast
focus on privacy
arbitrary queries

… a local app!
a mobile app!

uh oh, syncing?
simple data (510 MB database)
syncing on top of sqlite
unreliable ordering
conflicts

also… needs to work 100% of the time


unreliable ordering
conflicts
{ x: 2 } { y: 4 }

{ x: 3 } { z: 10 }
State X

State X

eventual consistency
how do we solve unreliable ordering?
we need to assign timestamps
“Time is relative; its only worth depends
upon what we do as it is passing.”

 Albert Einstein
1 3

2 4
• vector clock
• hybrid logical clock (HLC *
• per-device
• assigns “timestamps” to changes

* https://cse.buffalo.edu/tech-reports/201404.pdf
{ x: 3,
timestamp: “2019-11-05T15:29:40.273Z-0001-eede1195b7d94dd5" }

{ x: 5,
timestamp: “2019-11-04T15:35:40.273Z-0001-85b8c0d2bbb57d99" }
“Simplicity is a prerequisite
for reliability.”

 Edsger W. Dijkstra
class MutableTimestamp extends Timestamp {
// clock.js setMillis(n) {
let _clock = null; this._state.millis = n;
}
function setClock(clock) {
// Calculate the next logical time and counter.
_clock = clock; setCounter(n) {
// Ensure that the logical time never goes backward;
} this._state.counter = n;
// * if all logical clocks are equal, increment the max counter,
}
// * if max = old > message, increment local counter,
function getClock() {
// * if max = messsage > old, increment message counter,
return _clock; setNode(n) {
// * otherwise, clocks are monotonic, reset counter
} this._state.node = n;
var lNew = Math.max(Math.max(lOld, phys), lMsg);
}
var cNew =
function makeClock(timestamp, merkle = {}) { }
lNew === lOld && lNew === lMsg
return { timestamp: MutableTimestamp.from(timestamp), merkle };
? Math.max(cOld, cMsg) + 1
} MutableTimestamp.from = timestamp => {
: lNew === lOld
return new MutableTimestamp(
? cOld + 1
function serializeClock(clock) { timestamp.millis(),
: lNew === lMsg
return JSON.stringify({ timestamp.counter(),
? cMsg + 1
timestamp: clock.timestamp.toString(), timestamp.node()
: 0;
merkle: clock.merkle );
}); };
// Check the result for drift and counter overflow
}
if (lNew - phys > config.maxDrift) {
// Timestamp generator initialization
throw new Timestamp.ClockDriftError();
function deserializeClock(clock) { // * sets the node ID to an arbitrary value
}
const data = JSON.parse(clock); // * useful for mocking/unit testing
if (cNew > 65535) {
return { Timestamp.init = function(options = {}) {
throw new Timestamp.OverflowError();
timestamp: Timestamp.from(Timestamp.parse(data.timestamp)), if (options.maxDrift) {
}
merkle: data.merkle config.maxDrift = options.maxDrift;
}; }
// Repack the logical time/counter
} };
clock.timestamp.setMillis(lNew);
clock.timestamp.setCounter(cNew);
function makeClientId() { /**
return uuidv4() * Timestamp send. Generates a unique, monotonic timestamp suitable
return new Timestamp(
.replace(/-/g, '') * for transmission to another system in string format
clock.timestamp.millis(),
.slice(-16); */
clock.timestamp.counter(),
} Timestamp.send = function(clock) {
clock.timestamp.node()
// Retrieve the local wall time
);
var phys = Date.now();
};
// timestamp.js
var config = { // Unpack the clock.timestamp logical time and counter
/**
// Maximum physical clock drift allowed, in ms var lOld = clock.timestamp.millis();
* Converts a fixed-length string timestamp to the structured value
maxDrift: 60000 var cOld = clock.timestamp.counter();
*/
};
Timestamp.parse = function(timestamp) {
// Calculate the next logical time and counter
if (typeof timestamp === 'string') {
class Timestamp { // * ensure that the logical time never goes backward
var parts = timestamp.split('-');
constructor(millis, counter, node) { // * increment the counter if phys time does not advance
if (parts && parts.length === 5) {
this._state = { var lNew = Math.max(lOld, phys);
var millis = Date.parse(parts.slice(0, 3).join('-')).valueOf();
millis: millis, var cNew = lOld === lNew ? cOld + 1 : 0;
var counter = parseInt(parts[3], 16);
counter: counter,
var node = parts[4];
node: node // Check the result for drift and counter overflow
if (!isNaN(millis) && !isNaN(counter))
}; if (lNew - phys > config.maxDrift) {
return new Timestamp(millis, counter, node);
} throw new Timestamp.ClockDriftError(lNew, phys, config.maxDrift);
}
}
}
valueOf() { if (cNew > 65535) {
return null;
return this.toString(); throw new Timestamp.OverflowError();
};
} }
Timestamp.since = isoString => {
toString() { // Repack the logical time/counter
return isoString + '-0000-0000000000000000';
return [ clock.timestamp.setMillis(lNew);
};
new Date(this.millis()).toISOString(), clock.timestamp.setCounter(cNew);
(
Timestamp.DuplicateNodeError = class extends Error {
'0000' + return new Timestamp(
constructor(node) {
this.counter() clock.timestamp.millis(),
super();
.toString(16) clock.timestamp.counter(),
this.type = 'DuplicateNodeError';
.toUpperCase() clock.timestamp.node()
this.message = 'duplicate node identifier ' + node;
).slice(-4), );
}
('0000000000000000' + this.node()).slice(-16) };
};
].join('-');
} // Timestamp receive. Parses and merges a timestamp from a remote
Timestamp.ClockDriftError = class extends Error {
// system with the local timeglobal uniqueness and monotonicity are
constructor(...args) {
millis() { // preserved
super();
return this._state.millis; Timestamp.recv = function(clock, msg) {
this.type = 'ClockDriftError';
} var phys = Date.now();
this.message = ['maximum clock drift exceeded'].concat(args).join(' ');
}
counter() { // Unpack the message wall time/counter
};
return this._state.counter; var lMsg = msg.millis();
} var cMsg = msg.counter();
Timestamp.OverflowError = class extends Error {
constructor() {
node() { // Assert the node id and remote clock drift
super();
return this._state.node; if (msg.node() === clock.timestamp.node()) {
this.type = 'OverflowError';
} throw new Timestamp.DuplicateNodeError(clock.timestamp.node());
this.message = 'timestamp counter overflow';
}
}
hash() { if (lMsg - phys > config.maxDrift) {
};
return murmurhash.v3(this.toString()); throw new Timestamp.ClockDriftError();
} }
setClock(makeClock(new Timestamp(0, 0, makeClientId())));
}
// Unpack the clock.timestamp logical time and counter
var lOld = clock.timestamp.millis();
var cOld = clock.timestamp.counter();
unreliable ordering
conflicts
manual conflict resolution
manual conflict resolution
CRDTs
partially ordered monoid in the category of
endofunctors with a least upper bound
conflict-free replicated data types

GCounter LWWElement-Set

PNCounter ORSet

GSet ORSWOT ???

2PSet and more…


conflict-free replicated data types

commutative idempotent
23325 f(x)
f(x) f(x)
f(x) f(x) f(x)
{ x: 300,
timestamp: “2019-11-05T15:29:40.273Z-0000-eede1195b7d94dd5” }

{ y: 73,
timestamp: “2019-11-02T15:35:32.743Z-0000-85b8c0d2bbb57d99" }

{ x: 8,
timestamp: “2019-11-02T15:35:32.743Z-0001-85b8c0d2bbb57d99" }

{ z: 114,
timestamp: “2019-11-02T15:35:32.743Z-0002-85b8c0d2bbb57d99" }

{ x: {300,
x:{ 300,
x:
y:{}300
73,
y: }
z:
73 114
} }
{ x: 300,
timestamp: “2019-11-05T15:29:40.273Z-0000-eede1195b7d94dd5” }

{ y: 73,
timestamp: “2019-11-02T15:35:32.743Z-0000-85b8c0d2bbb57d99" }

{ x: 8,
timestamp: “2019-11-02T15:35:32.743Z-0001-85b8c0d2bbb57d99" }

{ z: 114,
timestamp: “2019-11-02T15:35:32.743Z-0002-85b8c0d2bbb57d99" }

Ma p
{ x:{{
300,
x:
x:{300,
8,
y:
x:{}y:
73,
8y:}73
73
z:}}
114 } LWW
Map → Last-Write-Wins-Map (LWWMap)

Set → Grow-Only Set (GSet)


{ id: “0aead5b3-203e-475f-b3f5-1ab9ace69620”,
timestamp: “2019-11-05T15:29:40.273Z-0000-eede1195b7d94dd5” }

{ id: “0aead5b3-203e-475f-b3f5-1ab9ace69620”,
timestamp: “2019-11-02T15:35:32.743Z-0000-85b8c0d2bbb57d99" }

{ id: “e5b4c695-a632-4cec-a646-d61b32b2351f”,
timestamp: “2019-11-02T15:35:32.743Z-0001-85b8c0d2bbb57d99" }

(“0aead5b3-203e-475f-b3f5-1ab9ace69620”, et
“e5b4c695-a632-4cec-a646-d61b32b2351f”) GS
How to take basic relational data and
turn it into CRDTs?
SQLite table GSet of LWWMaps
A new table: messages_crdt
update("transactions", {
id: "30127b2e-f74c-4a19-af65-debfb7a6a55b",
name: "Kroger",
amount: 450
})

// becomes

{
dataset: "transactions",
row: "30127b2e-f74c-4a19-af65-debfb7a6a55b",
column: "name",
value: "Kroger",
timestamp: "2019-11-02T15:35:32.743Z-0000-85b8c0d2bbb57d99"
}
{
dataset: "transactions",
row: "30127b2e-f74c-4a19-af65-debfb7a6a55b",
column: "amount",
value: 450,
timestamp: "2019-11-02T15:35:32.743Z-0001-85b8c0d2bbb57d99"
}
delete("transactions", "30127b2e-f74c-4a19-af65-debfb7a6a55b")

// becomes

{
dataset: "transactions",
row: "30127b2e-f74c-4a19-af65-debfb7a6a55b",
column: "tombstone",
value: 1,
timestamp: "2019-11-02T15:35:32.743Z-0000-85b8c0d2bbb57d99"
}
Other features

Ensuring consistency with a merkle tree

End-to-end encryption
Live: https://crdt.jlongster.com/
Source: https://github.com/jlongster/crdt-example-app
function getTodoTypes() {
let _messages = [];
return _data.todoTypes.filter(todoType => todoType.tombstone !== 1);
let _data = {
}
todos: [],
todoTypes: [],
function insertTodoType({ name, color }) {
todoTypeMapping: [] existingMessages.set(msg1, existingMsg);
let id = insert('todoTypes', { name, color });
}; });
// Create an entry in the mapping table that points it to itself
function insert(table, row) { return existingMessages;
insert('todoTypeMapping', { id, targetId: id });
let id = uuidv4(); }
}
let fields = Object.keys(row);
function applyMessages(messages) {
function deleteTodoType(id, targetId) {
sendMessages( let existingMessages = compareMessages(messages);
if (targetId) {
fields.map(k => { let clock = getClock();
// We need to update all the pointers the point to the type that
return {
// we are deleting and point it to the new type. This already
dataset: table, messages.forEach(msg => {
// includes the type we are deleting (when created, it creates a
row: row.id || id, let existingMsg = existingMessages.get(msg);
// mapping to itself)
column: k,
for (let mapping of _data.todoTypeMapping) {
value: row[k], if (!existingMsg || existingMsg.timestamp < msg.timestamp) {
if (mapping.targetId === id) {
timestamp: Timestamp.send(getClock()).toString() apply(msg);
update('todoTypeMapping', { id: mapping.id, targetId });
}; }
}
})
}
); if (!existingMsg || existingMsg.timestamp !== msg.timestamp) {
}
clock.merkle = merkle.insert(
return id; clock.merkle,
delete_('todoTypes', id);
} Timestamp.parse(msg.timestamp)
}
);
function update(table, params) { _messages.push(msg);
setClock(makeClock(new Timestamp(0, 0, makeClientId())));
let fields = Object.keys(params).filter(k => k !== 'id'); }
});
let _onSync = null;
sendMessages(
let _syncEnabled = true;
fields.map(k => { _onSync && _onSync();
return { }
function setSyncingEnabled(flag) {
dataset: table,
_syncEnabled = flag;
row: params.id, function sendMessages(messages) {
}
column: k, applyMessages(messages);
value: params[k], sync(messages);
async function post(data) {
timestamp: Timestamp.send(getClock()).toString() }
let res = await fetch('https://crdt.jlongster.com/server/sync', {
};
method: 'POST',
}) function receiveMessages(messages) {
body: JSON.stringify(data),
); messages.forEach(msg =>
headers: {
} Timestamp.recv(getClock(), Timestamp.parse(msg.timestamp))
'Content-Type': 'application/json'
);
}
function delete_(table, id) {
});
sendMessages([ applyMessages(messages);
res = await res.json();
{ }
dataset: table,
if (res.status !== 'ok') {
row: id, function onSync(func) {
throw new Error('API error: ' + res.reason);
column: 'tombstone', _onSync = func;
}
value: 1, }
return res.data;
timestamp: Timestamp.send(getClock()).toString()
}
} async function sync(initialMessages = [], since = null) {
]); if (!_syncEnabled) {
function apply(msg) {
} return;
let table = _data[msg.dataset];
}
if (!table) {
function getTodos() {
throw new Error('Unknown dataset: ' + msg.dataset);
let todos = _data.todos let messages = initialMessages;
}
.filter(todo => todo.tombstone !== 1)
.map(todo => ({ if (since) {
let row = table.find(row => row.id === msg.row);
...todo, let timestamp = new Timestamp(since, 0, '0').toString();
if (!row) {
type: todo.type ? getTodoType(todo.type) : null messages = _messages.filter(msg => msg.timestamp >= timestamp);
table.push({ id: msg.row, [msg.column]: msg.value });
})); }
} else {
row[msg.column] = msg.value;
todos.sort((t1, t2) => { let result = await post({
}
if (t1.order < t2.order) { group_id: 'my-group',
}
return 1; client_id: getClock().timestamp.node(),
} else if (t1.order > t2.order) { messages,
function compareMessages(messages) {
return -1; merkle: getClock().merkle
let existingMessages = new Map();
} });
return 0;
// This could be optimized, but keeping it simple for now. Need to
}); receiveMessages(result.messages);
// find the latest message that exists for the dataset/row/column
// for each incoming message, so sort it first
return todos; let diffTime = merkle.diff(result.merkle, getClock().merkle);
} if (diffTime) {
let sortedMessages = [..._messages].sort((m1, m2) => {
if (since && since === diffTime) {
if (m1.timestamp < m2.timestamp) {
function getTodoType(id) { throw new Error(
return 1;
// Go through the mapping table, which is a layer of indirection. In 'A bug happened while syncing and the client ' +
} else if (m1.timestamp > m2.timestamp) {
// SQL you could think of doing a LEFT JOIN onto this table and 'was unable to get in sync with the server. ' +
return -1;
// using the id from the mapping table instead of the raw id "This is an internal error that shouldn't happen"
}
let mapping = _data.todoTypeMapping.find(m => m.id === id); );
return 0;
let type = }
});
mapping && _data.todoTypes.find(type => type.id === mapping.targetId);
return type && type.tombstone !== 1 ? type : null; return sync([], diffTime);
messages.forEach(msg1 => {
} }
let existingMsg = sortedMessages.find(
}
msg2 =>
function getNumTodos() {
msg1.dataset === msg2.dataset &&
return _data.todos.length;
msg1.row === msg2.row &&
}
msg1.column === msg2.column
);
server client

132 lines of JS 639 lines of JS

65 tweets!
only dependencies: uuid murmurhash
Conclusion

Local apps have superior UX. They are super


fast, no latency, and work offline

We’ve got to start simplifying our solutions

Clocks (particularly HLCs) and CRDTs are an


elegant solution to distributed apps
• Actual: https://actualbudget.com/

• Hybrid logical clocks: https://cse.buffalo.edu/tech-reports/201404.pdf

• CRDTs: https://bit.ly/2DMk0AD

• Demo app:

• Live: https://crdt.jlongster.com/

• Source: https://github.com/jlongster/crdt-example-app

You might also like