Professional Documents
Culture Documents
Dotjs CRDT Slides
Dotjs CRDT Slides
Dotjs CRDT Slides
… a local app!
a mobile app!
uh oh, syncing?
simple data (510 MB database)
syncing on top of sqlite
unreliable ordering
conflicts
{ x: 3 } { z: 10 }
State X
State X
eventual consistency
how do we solve unreliable ordering?
we need to assign timestamps
“Time is relative; its only worth depends
upon what we do as it is passing.”
Albert Einstein
1 3
2 4
• vector clock
• hybrid logical clock (HLC *
• per-device
• assigns “timestamps” to changes
* https://cse.buffalo.edu/tech-reports/201404.pdf
{ x: 3,
timestamp: “2019-11-05T15:29:40.273Z-0001-eede1195b7d94dd5" }
{ x: 5,
timestamp: “2019-11-04T15:35:40.273Z-0001-85b8c0d2bbb57d99" }
“Simplicity is a prerequisite
for reliability.”
Edsger W. Dijkstra
class MutableTimestamp extends Timestamp {
// clock.js setMillis(n) {
let _clock = null; this._state.millis = n;
}
function setClock(clock) {
// Calculate the next logical time and counter.
_clock = clock; setCounter(n) {
// Ensure that the logical time never goes backward;
} this._state.counter = n;
// * if all logical clocks are equal, increment the max counter,
}
// * if max = old > message, increment local counter,
function getClock() {
// * if max = messsage > old, increment message counter,
return _clock; setNode(n) {
// * otherwise, clocks are monotonic, reset counter
} this._state.node = n;
var lNew = Math.max(Math.max(lOld, phys), lMsg);
}
var cNew =
function makeClock(timestamp, merkle = {}) { }
lNew === lOld && lNew === lMsg
return { timestamp: MutableTimestamp.from(timestamp), merkle };
? Math.max(cOld, cMsg) + 1
} MutableTimestamp.from = timestamp => {
: lNew === lOld
return new MutableTimestamp(
? cOld + 1
function serializeClock(clock) { timestamp.millis(),
: lNew === lMsg
return JSON.stringify({ timestamp.counter(),
? cMsg + 1
timestamp: clock.timestamp.toString(), timestamp.node()
: 0;
merkle: clock.merkle );
}); };
// Check the result for drift and counter overflow
}
if (lNew - phys > config.maxDrift) {
// Timestamp generator initialization
throw new Timestamp.ClockDriftError();
function deserializeClock(clock) { // * sets the node ID to an arbitrary value
}
const data = JSON.parse(clock); // * useful for mocking/unit testing
if (cNew > 65535) {
return { Timestamp.init = function(options = {}) {
throw new Timestamp.OverflowError();
timestamp: Timestamp.from(Timestamp.parse(data.timestamp)), if (options.maxDrift) {
}
merkle: data.merkle config.maxDrift = options.maxDrift;
}; }
// Repack the logical time/counter
} };
clock.timestamp.setMillis(lNew);
clock.timestamp.setCounter(cNew);
function makeClientId() { /**
return uuidv4() * Timestamp send. Generates a unique, monotonic timestamp suitable
return new Timestamp(
.replace(/-/g, '') * for transmission to another system in string format
clock.timestamp.millis(),
.slice(-16); */
clock.timestamp.counter(),
} Timestamp.send = function(clock) {
clock.timestamp.node()
// Retrieve the local wall time
);
var phys = Date.now();
};
// timestamp.js
var config = { // Unpack the clock.timestamp logical time and counter
/**
// Maximum physical clock drift allowed, in ms var lOld = clock.timestamp.millis();
* Converts a fixed-length string timestamp to the structured value
maxDrift: 60000 var cOld = clock.timestamp.counter();
*/
};
Timestamp.parse = function(timestamp) {
// Calculate the next logical time and counter
if (typeof timestamp === 'string') {
class Timestamp { // * ensure that the logical time never goes backward
var parts = timestamp.split('-');
constructor(millis, counter, node) { // * increment the counter if phys time does not advance
if (parts && parts.length === 5) {
this._state = { var lNew = Math.max(lOld, phys);
var millis = Date.parse(parts.slice(0, 3).join('-')).valueOf();
millis: millis, var cNew = lOld === lNew ? cOld + 1 : 0;
var counter = parseInt(parts[3], 16);
counter: counter,
var node = parts[4];
node: node // Check the result for drift and counter overflow
if (!isNaN(millis) && !isNaN(counter))
}; if (lNew - phys > config.maxDrift) {
return new Timestamp(millis, counter, node);
} throw new Timestamp.ClockDriftError(lNew, phys, config.maxDrift);
}
}
}
valueOf() { if (cNew > 65535) {
return null;
return this.toString(); throw new Timestamp.OverflowError();
};
} }
Timestamp.since = isoString => {
toString() { // Repack the logical time/counter
return isoString + '-0000-0000000000000000';
return [ clock.timestamp.setMillis(lNew);
};
new Date(this.millis()).toISOString(), clock.timestamp.setCounter(cNew);
(
Timestamp.DuplicateNodeError = class extends Error {
'0000' + return new Timestamp(
constructor(node) {
this.counter() clock.timestamp.millis(),
super();
.toString(16) clock.timestamp.counter(),
this.type = 'DuplicateNodeError';
.toUpperCase() clock.timestamp.node()
this.message = 'duplicate node identifier ' + node;
).slice(-4), );
}
('0000000000000000' + this.node()).slice(-16) };
};
].join('-');
} // Timestamp receive. Parses and merges a timestamp from a remote
Timestamp.ClockDriftError = class extends Error {
// system with the local timeglobal uniqueness and monotonicity are
constructor(...args) {
millis() { // preserved
super();
return this._state.millis; Timestamp.recv = function(clock, msg) {
this.type = 'ClockDriftError';
} var phys = Date.now();
this.message = ['maximum clock drift exceeded'].concat(args).join(' ');
}
counter() { // Unpack the message wall time/counter
};
return this._state.counter; var lMsg = msg.millis();
} var cMsg = msg.counter();
Timestamp.OverflowError = class extends Error {
constructor() {
node() { // Assert the node id and remote clock drift
super();
return this._state.node; if (msg.node() === clock.timestamp.node()) {
this.type = 'OverflowError';
} throw new Timestamp.DuplicateNodeError(clock.timestamp.node());
this.message = 'timestamp counter overflow';
}
}
hash() { if (lMsg - phys > config.maxDrift) {
};
return murmurhash.v3(this.toString()); throw new Timestamp.ClockDriftError();
} }
setClock(makeClock(new Timestamp(0, 0, makeClientId())));
}
// Unpack the clock.timestamp logical time and counter
var lOld = clock.timestamp.millis();
var cOld = clock.timestamp.counter();
unreliable ordering
conflicts
manual conflict resolution
manual conflict resolution
CRDTs
partially ordered monoid in the category of
endofunctors with a least upper bound
conflict-free replicated data types
GCounter LWWElement-Set
PNCounter ORSet
commutative idempotent
23325 f(x)
f(x) f(x)
f(x) f(x) f(x)
{ x: 300,
timestamp: “2019-11-05T15:29:40.273Z-0000-eede1195b7d94dd5” }
{ y: 73,
timestamp: “2019-11-02T15:35:32.743Z-0000-85b8c0d2bbb57d99" }
{ x: 8,
timestamp: “2019-11-02T15:35:32.743Z-0001-85b8c0d2bbb57d99" }
{ z: 114,
timestamp: “2019-11-02T15:35:32.743Z-0002-85b8c0d2bbb57d99" }
{ x: {300,
x:{ 300,
x:
y:{}300
73,
y: }
z:
73 114
} }
{ x: 300,
timestamp: “2019-11-05T15:29:40.273Z-0000-eede1195b7d94dd5” }
{ y: 73,
timestamp: “2019-11-02T15:35:32.743Z-0000-85b8c0d2bbb57d99" }
{ x: 8,
timestamp: “2019-11-02T15:35:32.743Z-0001-85b8c0d2bbb57d99" }
{ z: 114,
timestamp: “2019-11-02T15:35:32.743Z-0002-85b8c0d2bbb57d99" }
Ma p
{ x:{{
300,
x:
x:{300,
8,
y:
x:{}y:
73,
8y:}73
73
z:}}
114 } LWW
Map → Last-Write-Wins-Map (LWWMap)
{ id: “0aead5b3-203e-475f-b3f5-1ab9ace69620”,
timestamp: “2019-11-02T15:35:32.743Z-0000-85b8c0d2bbb57d99" }
{ id: “e5b4c695-a632-4cec-a646-d61b32b2351f”,
timestamp: “2019-11-02T15:35:32.743Z-0001-85b8c0d2bbb57d99" }
(“0aead5b3-203e-475f-b3f5-1ab9ace69620”, et
“e5b4c695-a632-4cec-a646-d61b32b2351f”) GS
How to take basic relational data and
turn it into CRDTs?
SQLite table GSet of LWWMaps
A new table: messages_crdt
update("transactions", {
id: "30127b2e-f74c-4a19-af65-debfb7a6a55b",
name: "Kroger",
amount: 450
})
// becomes
{
dataset: "transactions",
row: "30127b2e-f74c-4a19-af65-debfb7a6a55b",
column: "name",
value: "Kroger",
timestamp: "2019-11-02T15:35:32.743Z-0000-85b8c0d2bbb57d99"
}
{
dataset: "transactions",
row: "30127b2e-f74c-4a19-af65-debfb7a6a55b",
column: "amount",
value: 450,
timestamp: "2019-11-02T15:35:32.743Z-0001-85b8c0d2bbb57d99"
}
delete("transactions", "30127b2e-f74c-4a19-af65-debfb7a6a55b")
// becomes
{
dataset: "transactions",
row: "30127b2e-f74c-4a19-af65-debfb7a6a55b",
column: "tombstone",
value: 1,
timestamp: "2019-11-02T15:35:32.743Z-0000-85b8c0d2bbb57d99"
}
Other features
End-to-end encryption
Live: https://crdt.jlongster.com/
Source: https://github.com/jlongster/crdt-example-app
function getTodoTypes() {
let _messages = [];
return _data.todoTypes.filter(todoType => todoType.tombstone !== 1);
let _data = {
}
todos: [],
todoTypes: [],
function insertTodoType({ name, color }) {
todoTypeMapping: [] existingMessages.set(msg1, existingMsg);
let id = insert('todoTypes', { name, color });
}; });
// Create an entry in the mapping table that points it to itself
function insert(table, row) { return existingMessages;
insert('todoTypeMapping', { id, targetId: id });
let id = uuidv4(); }
}
let fields = Object.keys(row);
function applyMessages(messages) {
function deleteTodoType(id, targetId) {
sendMessages( let existingMessages = compareMessages(messages);
if (targetId) {
fields.map(k => { let clock = getClock();
// We need to update all the pointers the point to the type that
return {
// we are deleting and point it to the new type. This already
dataset: table, messages.forEach(msg => {
// includes the type we are deleting (when created, it creates a
row: row.id || id, let existingMsg = existingMessages.get(msg);
// mapping to itself)
column: k,
for (let mapping of _data.todoTypeMapping) {
value: row[k], if (!existingMsg || existingMsg.timestamp < msg.timestamp) {
if (mapping.targetId === id) {
timestamp: Timestamp.send(getClock()).toString() apply(msg);
update('todoTypeMapping', { id: mapping.id, targetId });
}; }
}
})
}
); if (!existingMsg || existingMsg.timestamp !== msg.timestamp) {
}
clock.merkle = merkle.insert(
return id; clock.merkle,
delete_('todoTypes', id);
} Timestamp.parse(msg.timestamp)
}
);
function update(table, params) { _messages.push(msg);
setClock(makeClock(new Timestamp(0, 0, makeClientId())));
let fields = Object.keys(params).filter(k => k !== 'id'); }
});
let _onSync = null;
sendMessages(
let _syncEnabled = true;
fields.map(k => { _onSync && _onSync();
return { }
function setSyncingEnabled(flag) {
dataset: table,
_syncEnabled = flag;
row: params.id, function sendMessages(messages) {
}
column: k, applyMessages(messages);
value: params[k], sync(messages);
async function post(data) {
timestamp: Timestamp.send(getClock()).toString() }
let res = await fetch('https://crdt.jlongster.com/server/sync', {
};
method: 'POST',
}) function receiveMessages(messages) {
body: JSON.stringify(data),
); messages.forEach(msg =>
headers: {
} Timestamp.recv(getClock(), Timestamp.parse(msg.timestamp))
'Content-Type': 'application/json'
);
}
function delete_(table, id) {
});
sendMessages([ applyMessages(messages);
res = await res.json();
{ }
dataset: table,
if (res.status !== 'ok') {
row: id, function onSync(func) {
throw new Error('API error: ' + res.reason);
column: 'tombstone', _onSync = func;
}
value: 1, }
return res.data;
timestamp: Timestamp.send(getClock()).toString()
}
} async function sync(initialMessages = [], since = null) {
]); if (!_syncEnabled) {
function apply(msg) {
} return;
let table = _data[msg.dataset];
}
if (!table) {
function getTodos() {
throw new Error('Unknown dataset: ' + msg.dataset);
let todos = _data.todos let messages = initialMessages;
}
.filter(todo => todo.tombstone !== 1)
.map(todo => ({ if (since) {
let row = table.find(row => row.id === msg.row);
...todo, let timestamp = new Timestamp(since, 0, '0').toString();
if (!row) {
type: todo.type ? getTodoType(todo.type) : null messages = _messages.filter(msg => msg.timestamp >= timestamp);
table.push({ id: msg.row, [msg.column]: msg.value });
})); }
} else {
row[msg.column] = msg.value;
todos.sort((t1, t2) => { let result = await post({
}
if (t1.order < t2.order) { group_id: 'my-group',
}
return 1; client_id: getClock().timestamp.node(),
} else if (t1.order > t2.order) { messages,
function compareMessages(messages) {
return -1; merkle: getClock().merkle
let existingMessages = new Map();
} });
return 0;
// This could be optimized, but keeping it simple for now. Need to
}); receiveMessages(result.messages);
// find the latest message that exists for the dataset/row/column
// for each incoming message, so sort it first
return todos; let diffTime = merkle.diff(result.merkle, getClock().merkle);
} if (diffTime) {
let sortedMessages = [..._messages].sort((m1, m2) => {
if (since && since === diffTime) {
if (m1.timestamp < m2.timestamp) {
function getTodoType(id) { throw new Error(
return 1;
// Go through the mapping table, which is a layer of indirection. In 'A bug happened while syncing and the client ' +
} else if (m1.timestamp > m2.timestamp) {
// SQL you could think of doing a LEFT JOIN onto this table and 'was unable to get in sync with the server. ' +
return -1;
// using the id from the mapping table instead of the raw id "This is an internal error that shouldn't happen"
}
let mapping = _data.todoTypeMapping.find(m => m.id === id); );
return 0;
let type = }
});
mapping && _data.todoTypes.find(type => type.id === mapping.targetId);
return type && type.tombstone !== 1 ? type : null; return sync([], diffTime);
messages.forEach(msg1 => {
} }
let existingMsg = sortedMessages.find(
}
msg2 =>
function getNumTodos() {
msg1.dataset === msg2.dataset &&
return _data.todos.length;
msg1.row === msg2.row &&
}
msg1.column === msg2.column
);
server client
65 tweets!
only dependencies: uuid murmurhash
Conclusion
• CRDTs: https://bit.ly/2DMk0AD
• Demo app:
• Live: https://crdt.jlongster.com/
• Source: https://github.com/jlongster/crdt-example-app