123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506 |
- /*
- * EliasDB
- *
- * Copyright 2016 Matthias Ladkau. All rights reserved.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- */
- /*
- Package cluster contains EliasDB's clustering code.
- The clustering code provides an abstraction layer to EliasDB's graphstorage.Storage.
- This means the actual storage of a cluster can be entirely memory based or use
- any other backend as long as it satisfies the graphstorage.Storage interface.
- DistributedStorage wraps a graphstorage.Storage and has a manager.MemberManager
- object.
- Members are identified by a unique name. Calling Start() on manager.MemberManager
- registers and starts the RPC server for the member. Cluster internal RPC requests
- are served by manager.Server. It is a singleton object which routes RPC calls
- to registered MemberManagers - this architecture makes it easy to unit test
- the cluster code. The manager.MemberManager has a manager.Client object which
- can be used to send messages to the cluster.
- The integrity of the cluster is protected by a shared secret (string) among
- all members of the cluster. A new member can only join and communicate with
- the cluster if it has the secret string. The secret string is never transferred
- directly over the network - it is only used for generating a member specific
- token which can be verified by all other members.
- The clustering code was inspired by Amazon DynamoDB
- http://www.allthingsdistributed.com/2012/01/amazon-dynamodb.html
- */
- package cluster
- import (
- "fmt"
- "math"
- "sync"
- "devt.de/krotik/common/datautil"
- "devt.de/krotik/eliasdb/cluster/manager"
- "devt.de/krotik/eliasdb/graph/graphstorage"
- "devt.de/krotik/eliasdb/storage"
- )
- /*
- DistributedStorageError is an error related to the distribution storage. This
- error is returned when the data distribution fails for example when too many
- cluster members have failed.
- */
- type DistributedStorageError struct {
- err error // Wrapped error
- }
- /*
- newError creates a new DistributedStorageError.
- */
- func newError(err error) error {
- return &DistributedStorageError{err}
- }
- /*
- Error returns a string representation of a DistributedStorageError.
- */
- func (dse *DistributedStorageError) Error() string {
- return fmt.Sprint("Storage disabled: ", dse.err.Error())
- }
- /*
- DistributedStorage data structure
- */
- type DistributedStorage struct {
- MemberManager *manager.MemberManager // Manager object
- distributionTableLock *sync.Mutex // Mutex to access the distribution table
- distributionTable *DistributionTable // Distribution table for the cluster - may be nil
- distributionTableError error // Error detail if the storage is disabled
- localName string // Name of the local graph storage
- localDRHandler func(interface{}, *interface{}) error // Local data request handler
- localFlushHandler func() error // Handler to flush the local storage
- localCloseHandler func() error // Handler to close the local storage
- mainDB map[string]string // Local main copy (only set when requested)
- mainDBError error // Last error when main db was requested
- }
- /*
- NewDistributedStorage creates a new cluster graph storage. The distributed storage
- wraps around a local graphstorage.Storage. The configuration of the distributed
- storage consists of two parts: A normal config map which defines static information
- like rpc port, secret string, etc and a StateInfo object which is used for dynamic
- information like cluster members, member status, etc. An empty StateInfo means
- that the cluster has only one member.
- */
- func NewDistributedStorage(gs graphstorage.Storage, config map[string]interface{},
- stateInfo manager.StateInfo) (*DistributedStorage, error) {
- ds, ms, err := newDistributedAndMemberStorage(gs, config, stateInfo)
- if _, ok := gs.(*graphstorage.MemoryGraphStorage); ok {
- msmap[ds] = ms // Keep track of memory storages for debugging
- }
- return ds, err
- }
- /*
- DSRetNew is the return value on successful creating a distributed storage
- (used for testing)
- */
- var DSRetNew error
- /*
- newDistributedAndMemberStorage creates a new cluster graph storage but also returns a
- reference to the internal memberStorage object.
- */
- func newDistributedAndMemberStorage(gs graphstorage.Storage, config map[string]interface{},
- stateInfo manager.StateInfo) (*DistributedStorage, *memberStorage, error) {
- var repFac int
- // Merge given configuration with default configuration
- clusterconfig := datautil.MergeMaps(manager.DefaultConfig, config)
- // Make 100% sure there is a secret string
- if clusterconfig[manager.ConfigClusterSecret] == "" {
- clusterconfig[manager.ConfigClusterSecret] = manager.DefaultConfig[manager.ConfigClusterSecret]
- }
- // Set replication factor
- if f, ok := stateInfo.Get(manager.StateInfoREPFAC); !ok {
- repFac = int(math.Max(clusterconfig[manager.ConfigReplicationFactor].(float64), 1))
- stateInfo.Put(manager.StateInfoREPFAC, repFac)
- stateInfo.Flush()
- } else {
- repFac = f.(int)
- }
- // Create member objects - these calls will initialise this member's state info
- mm := manager.NewMemberManager(clusterconfig[manager.ConfigRPC].(string),
- clusterconfig[manager.ConfigMemberName].(string),
- clusterconfig[manager.ConfigClusterSecret].(string), stateInfo)
- dt, err := NewDistributionTable(mm.Members(), repFac)
- if err != nil {
- mm.LogInfo("Storage disabled:", err)
- }
- ds := &DistributedStorage{mm, &sync.Mutex{}, dt, err, gs.Name(), nil, nil, nil, nil, nil}
- // Create MemberStorage instance which is not exposed - the object will
- // only be used by the RPC server and called during start and stop. It is
- // the only instance which has access to the wrapped storage.GraphStorage.
- memberStorage, err := newMemberStorage(ds, gs)
- if err != nil {
- return nil, nil, err
- }
- // Register handler function for RPC calls and for closing the local storage
- mm.SetHandleDataRequest(memberStorage.handleDataRequest)
- ds.localDRHandler = memberStorage.handleDataRequest
- ds.localFlushHandler = memberStorage.gs.FlushAll
- ds.localCloseHandler = memberStorage.gs.Close
- // Set update handler
- ds.MemberManager.SetEventHandler(func() {
- // Handler for state info updates (this handler is called once the state
- // info object has been updated from the current state)
- si := mm.StateInfo()
- rfo, ok := si.Get(manager.StateInfoREPFAC)
- rf := rfo.(int)
- members, ok2 := si.Get(manager.StateInfoMEMBERS)
- if ok && ok2 {
- distTable, distTableErr := ds.DistributionTable()
- numMembers := len(members.([]string)) / 2
- numFailedPeers := len(mm.Client.FailedPeers())
- // Check if the cluster is operational
- if distTableErr == nil && rf > 0 && numFailedPeers > rf-1 {
- // Cluster is not operational
- if distTable != nil {
- err := fmt.Errorf("Too many members failed (total: %v, failed: %v, replication: %v)",
- numMembers, numFailedPeers, rf)
- mm.LogInfo("Storage disabled:", err.Error())
- ds.SetDistributionTableError(err)
- }
- return
- }
- // Check if the replication factor has changed or the amount of members
- if distTable == nil ||
- numMembers != len(distTable.Members()) ||
- rf != distTable.repFac {
- // Try to renew the distribution table
- if dt, err := NewDistributionTable(mm.Members(), rf); err == nil {
- ds.SetDistributionTable(dt)
- }
- }
- }
- }, memberStorage.transferWorker)
- return ds, memberStorage, DSRetNew
- }
- /*
- Start starts the distributed storage.
- */
- func (ds *DistributedStorage) Start() error {
- return ds.MemberManager.Start()
- }
- /*
- Close closes the distributed storage.
- */
- func (ds *DistributedStorage) Close() error {
- ds.MemberManager.Shutdown()
- return ds.localCloseHandler()
- }
- /*
- IsOperational returns if this distribution storage is operational
- */
- func (ds *DistributedStorage) IsOperational() bool {
- ds.distributionTableLock.Lock()
- defer ds.distributionTableLock.Unlock()
- return ds.distributionTableError == nil && ds.distributionTable != nil
- }
- /*
- DistributionTable returns the current distribution table or an error if the
- storage is not available.
- */
- func (ds *DistributedStorage) DistributionTable() (*DistributionTable, error) {
- ds.distributionTableLock.Lock()
- defer ds.distributionTableLock.Unlock()
- return ds.distributionTable, ds.distributionTableError
- }
- /*
- SetDistributionTable sets the distribution table and clears any error.
- */
- func (ds *DistributedStorage) SetDistributionTable(dt *DistributionTable) {
- ds.distributionTableLock.Lock()
- defer ds.distributionTableLock.Unlock()
- ds.distributionTable = dt
- ds.distributionTableError = nil
- }
- /*
- SetDistributionTableError records an distribution table related error. This
- clears the current distribution table.
- */
- func (ds *DistributedStorage) SetDistributionTableError(err error) {
- ds.distributionTableLock.Lock()
- defer ds.distributionTableLock.Unlock()
- ds.distributionTable = nil
- ds.distributionTableError = newError(err)
- }
- /*
- sendDataRequest is used to send data requests into the cluster.
- */
- func (ds *DistributedStorage) sendDataRequest(member string, request *DataRequest) (interface{}, error) {
- // Check if the request should be handled locally
- if member == ds.MemberManager.Name() {
- // Make sure to copy the request value for local insert or update requests.
- // This is necessary since the serialization buffers are pooled and never
- // dismissed. Locally the values are just passed around.
- if request.RequestType == RTInsert || request.RequestType == RTUpdate {
- var val []byte
- datautil.CopyObject(request.Value, &val)
- request.Value = val
- }
- var response interface{}
- err := ds.localDRHandler(request, &response)
- return response, err
- }
- return ds.MemberManager.Client.SendDataRequest(member, request)
- }
- /*
- Name returns the name of the cluster DistributedStorage instance.
- */
- func (ds *DistributedStorage) Name() string {
- return ds.MemberManager.Name()
- }
- /*
- LocalName returns the local name of the wrapped DistributedStorage instance.
- */
- func (ds *DistributedStorage) LocalName() string {
- return ds.localName
- }
- /*
- ReplicationFactor returns the replication factor of this cluster member. A
- value of 0 means the cluster is not operational in the moment.
- */
- func (ds *DistributedStorage) ReplicationFactor() int {
- // Do not do anything is the cluster is not operational
- distTable, distTableErr := ds.DistributionTable()
- if distTableErr != nil {
- return 0
- }
- return distTable.repFac
- }
- /*
- MainDB returns the main database. The main database is a quick
- lookup map for meta data which is always kept in memory.
- */
- func (ds *DistributedStorage) MainDB() map[string]string {
- ret := make(map[string]string)
- // Clear the current mainDB cache
- ds.mainDB = nil
- // Do not do anything is the cluster is not operational
- distTable, distTableErr := ds.DistributionTable()
- if distTableErr != nil {
- ds.mainDBError = distTableErr
- return ret
- }
- // Main db requests always go to member 1
- member := distTable.Members()[0]
- request := &DataRequest{RTGetMain, nil, nil, false}
- mainDB, err := ds.sendDataRequest(member, request)
- if err != nil {
- // Cycle through all replicating members if there was an error.
- // (as long as the cluster is considered operational there must be a
- // replicating member available to accept the request)
- for _, rmember := range distTable.Replicas(member) {
- mainDB, err = ds.sendDataRequest(rmember, request)
- if err == nil {
- break
- }
- }
- }
- ds.mainDBError = err
- if mainDB != nil {
- ds.mainDB = mainDB.(map[string]string)
- ret = ds.mainDB
- }
- // We failed to get the main db - any flush will fail.
- return ret
- }
- /*
- RollbackMain rollback the main database.
- */
- func (ds *DistributedStorage) RollbackMain() error {
- // Nothing to do here - the main db will be updated next time it is requested
- ds.mainDB = nil
- ds.mainDBError = nil
- return nil
- }
- /*
- FlushMain writes the main database to the storage.
- */
- func (ds *DistributedStorage) FlushMain() error {
- // Make sure there is no error
- distTable, distTableErr := ds.DistributionTable()
- if ds.mainDBError != nil {
- return ds.mainDBError
- } else if distTableErr != nil {
- return distTableErr
- }
- // Main db requests always go to member 1
- member := distTable.Members()[0]
- request := &DataRequest{RTSetMain, nil, ds.mainDB, false}
- _, err := ds.sendDataRequest(member, request)
- if err != nil {
- // Cycle through all replicating members if there was an error.
- // (as long as the cluster is considered operational there must be a
- // replicating member available to accept the request)
- for _, rmember := range distTable.Replicas(member) {
- _, err = ds.sendDataRequest(rmember, request)
- if err == nil {
- break
- }
- }
- }
- return err
- }
- /*
- FlushAll writes all pending local changes to the storage.
- */
- func (ds *DistributedStorage) FlushAll() error {
- return ds.localFlushHandler()
- }
- /*
- StorageManager gets a storage manager with a certain name. A non-exisClusterting StorageManager
- is not created automatically if the create flag is set to false.
- */
- func (ds *DistributedStorage) StorageManager(smname string, create bool) storage.Manager {
- // Make sure there is no error
- distTable, distTableErr := ds.DistributionTable()
- if ds.mainDBError != nil {
- return nil
- } else if distTableErr != nil {
- return nil
- }
- if !create {
- // Make sure the storage manage exists if it should not be created.
- // Try to get its 1st root value. If nil is returned then the storage
- // manager does not exist.
- // Root ids always go to member 1 as well as the first insert request for data.
- member := distTable.Members()[0]
- request := &DataRequest{RTGetRoot, map[DataRequestArg]interface{}{
- RPStoreName: smname,
- RPRoot: 1,
- }, nil, false}
- res, err := ds.sendDataRequest(member, request)
- if res == nil && err == nil {
- return nil
- }
- }
- return &DistributedStorageManager{smname, 0, ds, nil}
- }
|