// The MIT License // // Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. // // Copyright (c) 2020 Uber Technologies, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. package internal // All code in this file is private to the package. import ( "context" "errors" "fmt" "sync" "time" "github.com/gogo/protobuf/types" "github.com/pborman/uuid" commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" historypb "go.temporal.io/api/history/v1" taskqueuepb "go.temporal.io/api/taskqueue/v1" "go.temporal.io/api/workflowservice/v1" "go.temporal.io/sdk/converter" "go.temporal.io/sdk/internal/common" "go.temporal.io/sdk/internal/common/metrics" "go.temporal.io/sdk/internal/common/serializer" "go.temporal.io/sdk/log" ) const ( // Server returns empty task after dynamicconfig.MatchingLongPollExpirationInterval (default is 60 seconds). // pollTaskServiceTimeOut should be dynamicconfig.MatchingLongPollExpirationInterval + some delta for full round trip to matching // because empty task should be returned before timeout is expired (expired timeout counts against SLO). pollTaskServiceTimeOut = 70 * time.Second stickyWorkflowTaskScheduleToStartTimeoutSeconds = 5 ratioToForceCompleteWorkflowTaskComplete = 0.8 ) type ( // taskPoller interface to poll and process for task taskPoller interface { // PollTask polls for one new task PollTask() (interface{}, error) // ProcessTask processes a task ProcessTask(interface{}) error } // basePoller is the base class for all poller implementations basePoller struct { metricsHandler metrics.Handler // base metric handler used for rpc calls stopC <-chan struct{} } // workflowTaskPoller implements polling/processing a workflow task workflowTaskPoller struct { basePoller namespace string taskQueueName string identity string service workflowservice.WorkflowServiceClient taskHandler WorkflowTaskHandler logger log.Logger dataConverter converter.DataConverter stickyUUID string StickyScheduleToStartTimeout time.Duration pendingRegularPollCount int pendingStickyPollCount int stickyBacklog int64 requestLock sync.Mutex stickyCacheSize int } // activityTaskPoller implements polling/processing a workflow task activityTaskPoller struct { basePoller namespace string taskQueueName string identity string service workflowservice.WorkflowServiceClient taskHandler ActivityTaskHandler logger log.Logger activitiesPerSecond float64 } historyIteratorImpl struct { iteratorFunc func(nextPageToken []byte) (*historypb.History, []byte, error) execution *commonpb.WorkflowExecution nextPageToken []byte namespace string service workflowservice.WorkflowServiceClient maxEventID int64 metricsHandler metrics.Handler taskQueue string } localActivityTaskPoller struct { basePoller handler *localActivityTaskHandler logger log.Logger laTunnel *localActivityTunnel } localActivityTaskHandler struct { userContext context.Context metricsHandler metrics.Handler logger log.Logger dataConverter converter.DataConverter contextPropagators []ContextPropagator interceptors []WorkerInterceptor } localActivityResult struct { result *commonpb.Payloads err error task *localActivityTask backoff time.Duration } localActivityTunnel struct { taskCh chan *localActivityTask resultCh chan interface{} stopCh <-chan struct{} } ) func newLocalActivityTunnel(stopCh <-chan struct{}) *localActivityTunnel { return &localActivityTunnel{ taskCh: make(chan *localActivityTask, 100000), resultCh: make(chan interface{}), stopCh: stopCh, } } func (lat *localActivityTunnel) getTask() *localActivityTask { select { case task := <-lat.taskCh: return task case <-lat.stopCh: return nil } } func (lat *localActivityTunnel) sendTask(task *localActivityTask) bool { select { case lat.taskCh <- task: return true case <-lat.stopCh: return false } } func isClientSideError(err error) bool { // If an activity execution exceeds deadline. return err == context.DeadlineExceeded } // stopping returns true if worker is stopping right now func (bp *basePoller) stopping() bool { select { case <-bp.stopC: return true default: return false } } // doPoll runs the given pollFunc in a separate go routine. Returns when either of the conditions are met: // - poll succeeds, poll fails or worker is stopping func (bp *basePoller) doPoll(pollFunc func(ctx context.Context) (interface{}, error)) (interface{}, error) { if bp.stopping() { return nil, errStop } var err error var result interface{} doneC := make(chan struct{}) ctx, cancel := newGRPCContext(context.Background(), grpcTimeout(pollTaskServiceTimeOut), grpcLongPoll(true)) go func() { result, err = pollFunc(ctx) cancel() close(doneC) }() select { case <-doneC: return result, err case <-bp.stopC: cancel() return nil, errStop } } // newWorkflowTaskPoller creates a new workflow task poller which must have a one to one relationship to workflow worker func newWorkflowTaskPoller(taskHandler WorkflowTaskHandler, service workflowservice.WorkflowServiceClient, params workerExecutionParameters) *workflowTaskPoller { return &workflowTaskPoller{ basePoller: basePoller{metricsHandler: params.MetricsHandler, stopC: params.WorkerStopChannel}, service: service, namespace: params.Namespace, taskQueueName: params.TaskQueue, identity: params.Identity, taskHandler: taskHandler, logger: params.Logger, dataConverter: params.DataConverter, stickyUUID: uuid.New(), StickyScheduleToStartTimeout: params.StickyScheduleToStartTimeout, stickyCacheSize: params.cache.MaxWorkflowCacheSize(), } } // PollTask polls a new task func (wtp *workflowTaskPoller) PollTask() (interface{}, error) { // Get the task. workflowTask, err := wtp.doPoll(wtp.poll) if err != nil { return nil, err } return workflowTask, nil } // ProcessTask processes a task which could be workflow task or local activity result func (wtp *workflowTaskPoller) ProcessTask(task interface{}) error { if wtp.stopping() { return errStop } switch task := task.(type) { case *workflowTask: return wtp.processWorkflowTask(task) case *resetStickinessTask: return wtp.processResetStickinessTask(task) default: panic("unknown task type.") } } func (wtp *workflowTaskPoller) processWorkflowTask(task *workflowTask) error { if task.task == nil { // We didn't have task, poll might have timeout. traceLog(func() { wtp.logger.Debug("Workflow task unavailable") }) return nil } doneCh := make(chan struct{}) laResultCh := make(chan *localActivityResult) laRetryCh := make(chan *localActivityTask) // close doneCh so local activity worker won't get blocked forever when trying to send back result to laResultCh. defer close(doneCh) for { var response *workflowservice.RespondWorkflowTaskCompletedResponse startTime := time.Now() task.doneCh = doneCh task.laResultCh = laResultCh task.laRetryCh = laRetryCh completedRequest, err := wtp.taskHandler.ProcessWorkflowTask( task, func(response interface{}, startTime time.Time) (*workflowTask, error) { wtp.logger.Debug("Force RespondWorkflowTaskCompleted.", "TaskStartedEventID", task.task.GetStartedEventId()) heartbeatResponse, err := wtp.RespondTaskCompletedWithMetrics(response, nil, task.task, startTime) if err != nil { return nil, err } if heartbeatResponse == nil || heartbeatResponse.WorkflowTask == nil { return nil, nil } task := wtp.toWorkflowTask(heartbeatResponse.WorkflowTask) task.doneCh = doneCh task.laResultCh = laResultCh task.laRetryCh = laRetryCh return task, nil }, ) if completedRequest == nil && err == nil { return nil } if _, ok := err.(workflowTaskHeartbeatError); ok { return err } response, err = wtp.RespondTaskCompletedWithMetrics(completedRequest, err, task.task, startTime) if err != nil { return err } if response == nil || response.WorkflowTask == nil { return nil } // we are getting new workflow task, so reset the workflowTask and continue process the new one task = wtp.toWorkflowTask(response.WorkflowTask) } } func (wtp *workflowTaskPoller) processResetStickinessTask(rst *resetStickinessTask) error { grpcCtx, cancel := newGRPCContext(context.Background()) defer cancel() // WorkflowType information is not available on reset sticky task. Emit using base scope. wtp.metricsHandler.Counter(metrics.StickyCacheTotalForcedEviction).Inc(1) if _, err := wtp.service.ResetStickyTaskQueue(grpcCtx, rst.task); err != nil { wtp.logger.Warn("ResetStickyTaskQueue failed", tagWorkflowID, rst.task.Execution.GetWorkflowId(), tagRunID, rst.task.Execution.GetRunId(), tagError, err) return err } return nil } func (wtp *workflowTaskPoller) RespondTaskCompletedWithMetrics( completedRequest interface{}, taskErr error, task *workflowservice.PollWorkflowTaskQueueResponse, startTime time.Time, ) (response *workflowservice.RespondWorkflowTaskCompletedResponse, err error) { metricsHandler := wtp.metricsHandler.WithTags(metrics.WorkflowTags(task.WorkflowType.GetName())) if taskErr != nil { metricsHandler.Counter(metrics.WorkflowTaskExecutionFailureCounter).Inc(1) wtp.logger.Warn("Failed to process workflow task.", tagWorkflowType, task.WorkflowType.GetName(), tagWorkflowID, task.WorkflowExecution.GetWorkflowId(), tagRunID, task.WorkflowExecution.GetRunId(), tagAttempt, task.Attempt, tagError, taskErr) // convert err to WorkflowTaskFailed completedRequest = errorToFailWorkflowTask(task.TaskToken, taskErr, wtp.identity, wtp.dataConverter, wtp.namespace) } metricsHandler.Timer(metrics.WorkflowTaskExecutionLatency).Record(time.Since(startTime)) response, err = wtp.RespondTaskCompleted(completedRequest, task) return } func (wtp *workflowTaskPoller) RespondTaskCompleted(completedRequest interface{}, task *workflowservice.PollWorkflowTaskQueueResponse) (response *workflowservice.RespondWorkflowTaskCompletedResponse, err error) { ctx := context.Background() // Respond task completion. grpcCtx, cancel := newGRPCContext(ctx, grpcMetricsHandler( wtp.metricsHandler.WithTags(metrics.RPCTags(task.GetWorkflowType().GetName(), metrics.NoneTagValue, metrics.NoneTagValue))), defaultGrpcRetryParameters(ctx)) defer cancel() switch request := completedRequest.(type) { case *workflowservice.RespondWorkflowTaskFailedRequest: // Only fail workflow task on first attempt, subsequent failure on the same workflow task will timeout. // This is to avoid spin on the failed workflow task. Checking Attempt not nil for older server. if task.GetAttempt() == 1 { _, err = wtp.service.RespondWorkflowTaskFailed(grpcCtx, request) if err != nil { traceLog(func() { wtp.logger.Debug("RespondWorkflowTaskFailed failed.", tagError, err) }) } } case *workflowservice.RespondWorkflowTaskCompletedRequest: if request.StickyAttributes == nil && wtp.stickyCacheSize > 0 { request.StickyAttributes = &taskqueuepb.StickyExecutionAttributes{ WorkerTaskQueue: &taskqueuepb.TaskQueue{ Name: getWorkerTaskQueue(wtp.stickyUUID), Kind: enumspb.TASK_QUEUE_KIND_STICKY, }, ScheduleToStartTimeout: &wtp.StickyScheduleToStartTimeout, } } response, err = wtp.service.RespondWorkflowTaskCompleted(grpcCtx, request) if err != nil { traceLog(func() { wtp.logger.Debug("RespondWorkflowTaskCompleted failed.", tagError, err) }) } case *workflowservice.RespondQueryTaskCompletedRequest: _, err = wtp.service.RespondQueryTaskCompleted(grpcCtx, request) if err != nil { traceLog(func() { wtp.logger.Debug("RespondQueryTaskCompleted failed.", tagError, err) }) } default: // should not happen panic("unknown request type from ProcessWorkflowTask()") } return } func newLocalActivityPoller( params workerExecutionParameters, laTunnel *localActivityTunnel, interceptors []WorkerInterceptor, ) *localActivityTaskPoller { handler := &localActivityTaskHandler{ userContext: params.UserContext, metricsHandler: params.MetricsHandler, logger: params.Logger, dataConverter: params.DataConverter, contextPropagators: params.ContextPropagators, interceptors: interceptors, } return &localActivityTaskPoller{ basePoller: basePoller{metricsHandler: params.MetricsHandler, stopC: params.WorkerStopChannel}, handler: handler, logger: params.Logger, laTunnel: laTunnel, } } func (latp *localActivityTaskPoller) PollTask() (interface{}, error) { return latp.laTunnel.getTask(), nil } func (latp *localActivityTaskPoller) ProcessTask(task interface{}) error { if latp.stopping() { return errStop } result := latp.handler.executeLocalActivityTask(task.(*localActivityTask)) // We need to send back the local activity result to unblock workflowTaskPoller.processWorkflowTask() which is // synchronously listening on the laResultCh. We also want to make sure we don't block here forever in case // processWorkflowTask() already returns and nobody is receiving from laResultCh. We guarantee that doneCh is closed // before returning from workflowTaskPoller.processWorkflowTask(). select { case result.task.workflowTask.laResultCh <- result: return nil case <-result.task.workflowTask.doneCh: // processWorkflowTask() already returns, just drop this local activity result. return nil } } func (lath *localActivityTaskHandler) executeLocalActivityTask(task *localActivityTask) (result *localActivityResult) { workflowType := task.params.WorkflowInfo.WorkflowType.Name activityType := task.params.ActivityType metricsHandler := lath.metricsHandler.WithTags(metrics.LocalActivityTags(workflowType, activityType)) metricsHandler.Counter(metrics.LocalActivityTotalCounter).Inc(1) ae := activityExecutor{name: activityType, fn: task.params.ActivityFn} traceLog(func() { lath.logger.Debug("Processing new local activity task", tagWorkflowID, task.params.WorkflowInfo.WorkflowExecution.ID, tagRunID, task.params.WorkflowInfo.WorkflowExecution.RunID, tagActivityType, activityType, tagAttempt, task.attempt, ) }) ctx, err := WithLocalActivityTask(lath.userContext, task, lath.logger, lath.metricsHandler, lath.dataConverter, lath.interceptors) if err != nil { return &localActivityResult{task: task, err: fmt.Errorf("failed building context: %w", err)} } // propagate context information into the local activity activity context from the headers ctx, err = contextWithHeaderPropagated(ctx, task.header, lath.contextPropagators) if err != nil { return &localActivityResult{task: task, err: err} } timeout := task.params.ScheduleToCloseTimeout if task.params.StartToCloseTimeout != 0 && task.params.StartToCloseTimeout < timeout { timeout = task.params.StartToCloseTimeout } timeoutDuration := timeout deadline := time.Now().Add(timeoutDuration) if task.attempt > 1 && !task.expireTime.IsZero() && task.expireTime.Before(deadline) { // this is attempt and expire time is before SCHEDULE_TO_CLOSE timeout deadline = task.expireTime } ctx, cancel := context.WithDeadline(ctx, deadline) defer cancel() task.Lock() if task.canceled { task.Unlock() return &localActivityResult{err: ErrCanceled, task: task} } task.cancelFunc = cancel task.Unlock() var laResult *commonpb.Payloads doneCh := make(chan struct{}) go func(ch chan struct{}) { laStartTime := time.Now() defer close(ch) // panic handler defer func() { if p := recover(); p != nil { topLine := fmt.Sprintf("local activity for %s [panic]:", activityType) st := getStackTraceRaw(topLine, 7, 0) lath.logger.Error("LocalActivity panic.", tagWorkflowID, task.params.WorkflowInfo.WorkflowExecution.ID, tagRunID, task.params.WorkflowInfo.WorkflowExecution.RunID, tagActivityType, activityType, tagAttempt, task.attempt, tagPanicError, fmt.Sprintf("%v", p), tagPanicStack, st) metricsHandler.Counter(metrics.LocalActivityErrorCounter).Inc(1) err = newPanicError(p, st) } if err != nil { metricsHandler.Counter(metrics.LocalActivityFailedCounter).Inc(1) } }() laResult, err = ae.ExecuteWithActualArgs(ctx, task.params.InputArgs) executionLatency := time.Since(laStartTime) metricsHandler.Timer(metrics.LocalActivityExecutionLatency).Record(executionLatency) if executionLatency > timeoutDuration { // If local activity takes longer than expected timeout, the context would already be DeadlineExceeded and // the result would be discarded. Print a warning in this case. lath.logger.Warn("LocalActivity takes too long to complete.", "LocalActivityID", task.activityID, "LocalActivityType", activityType, "ScheduleToCloseTimeout", task.params.ScheduleToCloseTimeout, "ActualExecutionDuration", executionLatency) } }(doneCh) WaitResult: select { case <-ctx.Done(): select { case <-doneCh: // double check if result is ready. break WaitResult default: } // context is done if ctx.Err() == context.Canceled { metricsHandler.Counter(metrics.LocalActivityCanceledCounter).Inc(1) return &localActivityResult{err: ErrCanceled, task: task} } else if ctx.Err() == context.DeadlineExceeded { return &localActivityResult{err: ErrDeadlineExceeded, task: task} } else { // should not happen return &localActivityResult{err: NewApplicationError("unexpected context done", "", true, nil), task: task} } case <-doneCh: // local activity completed } if err == nil { metricsHandler. Timer(metrics.LocalActivitySucceedEndToEndLatency). Record(time.Since(task.params.ScheduledTime)) } return &localActivityResult{result: laResult, err: err, task: task} } func (wtp *workflowTaskPoller) release(kind enumspb.TaskQueueKind) { if wtp.stickyCacheSize <= 0 { return } wtp.requestLock.Lock() if kind == enumspb.TASK_QUEUE_KIND_STICKY { wtp.pendingStickyPollCount-- } else { wtp.pendingRegularPollCount-- } wtp.requestLock.Unlock() } func (wtp *workflowTaskPoller) updateBacklog(taskQueueKind enumspb.TaskQueueKind, backlogCountHint int64) { if taskQueueKind == enumspb.TASK_QUEUE_KIND_NORMAL || wtp.stickyCacheSize <= 0 { // we only care about sticky backlog for now. return } wtp.requestLock.Lock() wtp.stickyBacklog = backlogCountHint wtp.requestLock.Unlock() } // getNextPollRequest returns appropriate next poll request based on poller configuration. // Simple rules: // 1) if sticky execution is disabled, always poll for regular task queue // 2) otherwise: // 2.1) if sticky task queue has backlog, always prefer to process sticky task first // 2.2) poll from the task queue that has less pending requests (prefer sticky when they are the same). // TODO: make this more smart to auto adjust based on poll latency func (wtp *workflowTaskPoller) getNextPollRequest() (request *workflowservice.PollWorkflowTaskQueueRequest) { taskQueueName := wtp.taskQueueName taskQueueKind := enumspb.TASK_QUEUE_KIND_NORMAL if wtp.stickyCacheSize > 0 { wtp.requestLock.Lock() if wtp.stickyBacklog > 0 || wtp.pendingStickyPollCount <= wtp.pendingRegularPollCount { wtp.pendingStickyPollCount++ taskQueueName = getWorkerTaskQueue(wtp.stickyUUID) taskQueueKind = enumspb.TASK_QUEUE_KIND_STICKY } else { wtp.pendingRegularPollCount++ } wtp.requestLock.Unlock() } taskQueue := &taskqueuepb.TaskQueue{ Name: taskQueueName, Kind: taskQueueKind, } return &workflowservice.PollWorkflowTaskQueueRequest{ Namespace: wtp.namespace, TaskQueue: taskQueue, Identity: wtp.identity, BinaryChecksum: getBinaryChecksum(), } } // Poll for a single workflow task from the service func (wtp *workflowTaskPoller) poll(ctx context.Context) (interface{}, error) { traceLog(func() { wtp.logger.Debug("workflowTaskPoller::Poll") }) request := wtp.getNextPollRequest() defer wtp.release(request.TaskQueue.GetKind()) response, err := wtp.service.PollWorkflowTaskQueue(ctx, request) if err != nil { wtp.updateBacklog(request.TaskQueue.GetKind(), 0) return nil, err } if response == nil || len(response.TaskToken) == 0 { // Emit using base scope as no workflow type information is available in the case of empty poll wtp.metricsHandler.Counter(metrics.WorkflowTaskQueuePollEmptyCounter).Inc(1) wtp.updateBacklog(request.TaskQueue.GetKind(), 0) return &workflowTask{}, nil } wtp.updateBacklog(request.TaskQueue.GetKind(), response.GetBacklogCountHint()) task := wtp.toWorkflowTask(response) traceLog(func() { var firstEventID int64 = -1 if response.History != nil && len(response.History.Events) > 0 { firstEventID = response.History.Events[0].GetEventId() } wtp.logger.Debug("workflowTaskPoller::Poll Succeed", "StartedEventID", response.GetStartedEventId(), "Attempt", response.GetAttempt(), "FirstEventID", firstEventID, "IsQueryTask", response.Query != nil) }) metricsHandler := wtp.metricsHandler.WithTags(metrics.WorkflowTags(response.WorkflowType.GetName())) metricsHandler.Counter(metrics.WorkflowTaskQueuePollSucceedCounter).Inc(1) scheduleToStartLatency := common.TimeValue(response.GetStartedTime()).Sub(common.TimeValue(response.GetScheduledTime())) metricsHandler.Timer(metrics.WorkflowTaskScheduleToStartLatency).Record(scheduleToStartLatency) return task, nil } func (wtp *workflowTaskPoller) toWorkflowTask(response *workflowservice.PollWorkflowTaskQueueResponse) *workflowTask { historyIterator := &historyIteratorImpl{ execution: response.WorkflowExecution, nextPageToken: response.NextPageToken, namespace: wtp.namespace, service: wtp.service, maxEventID: response.GetStartedEventId(), metricsHandler: wtp.metricsHandler, taskQueue: wtp.taskQueueName, } task := &workflowTask{ task: response, historyIterator: historyIterator, } return task } func (h *historyIteratorImpl) GetNextPage() (*historypb.History, error) { if h.iteratorFunc == nil { h.iteratorFunc = newGetHistoryPageFunc( context.Background(), h.service, h.namespace, h.execution, h.maxEventID, h.metricsHandler, h.taskQueue, ) } history, token, err := h.iteratorFunc(h.nextPageToken) if err != nil { return nil, err } h.nextPageToken = token return history, nil } func (h *historyIteratorImpl) Reset() { h.nextPageToken = nil } func (h *historyIteratorImpl) HasNextPage() bool { return h.nextPageToken != nil } func newGetHistoryPageFunc( ctx context.Context, service workflowservice.WorkflowServiceClient, namespace string, execution *commonpb.WorkflowExecution, atWorkflowTaskCompletedEventID int64, metricsHandler metrics.Handler, taskQueue string, ) func(nextPageToken []byte) (*historypb.History, []byte, error) { return func(nextPageToken []byte) (*historypb.History, []byte, error) { var resp *workflowservice.GetWorkflowExecutionHistoryResponse grpcCtx, cancel := newGRPCContext(ctx, grpcMetricsHandler( metricsHandler.WithTags(metrics.RPCTags(metrics.NoneTagValue, metrics.NoneTagValue, taskQueue))), defaultGrpcRetryParameters(ctx)) defer cancel() resp, err := service.GetWorkflowExecutionHistory(grpcCtx, &workflowservice.GetWorkflowExecutionHistoryRequest{ Namespace: namespace, Execution: execution, NextPageToken: nextPageToken, }) if err != nil { return nil, nil, err } var h *historypb.History if resp.RawHistory != nil { h, err = serializer.DeserializeBlobDataToHistoryEvents(resp.RawHistory, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) if err != nil { return nil, nil, nil } } else { h = resp.History } size := len(h.Events) if size > 0 && atWorkflowTaskCompletedEventID > 0 && h.Events[size-1].GetEventId() > atWorkflowTaskCompletedEventID { first := h.Events[0].GetEventId() // eventIds start from 1 h.Events = h.Events[:atWorkflowTaskCompletedEventID-first+1] if h.Events[len(h.Events)-1].GetEventType() != enumspb.EVENT_TYPE_WORKFLOW_TASK_COMPLETED { return nil, nil, fmt.Errorf("newGetHistoryPageFunc: atWorkflowTaskCompletedEventID(%v) "+ "points to event that is not WorkflowTaskCompleted", atWorkflowTaskCompletedEventID) } return h, nil, nil } return h, resp.NextPageToken, nil } } func newActivityTaskPoller(taskHandler ActivityTaskHandler, service workflowservice.WorkflowServiceClient, params workerExecutionParameters) *activityTaskPoller { return &activityTaskPoller{ basePoller: basePoller{metricsHandler: params.MetricsHandler, stopC: params.WorkerStopChannel}, taskHandler: taskHandler, service: service, namespace: params.Namespace, taskQueueName: params.TaskQueue, identity: params.Identity, logger: params.Logger, activitiesPerSecond: params.TaskQueueActivitiesPerSecond, } } // Poll for a single activity task from the service func (atp *activityTaskPoller) poll(ctx context.Context) (interface{}, error) { startTime := time.Now() traceLog(func() { atp.logger.Debug("activityTaskPoller::Poll") }) request := &workflowservice.PollActivityTaskQueueRequest{ Namespace: atp.namespace, TaskQueue: &taskqueuepb.TaskQueue{Name: atp.taskQueueName, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, Identity: atp.identity, TaskQueueMetadata: &taskqueuepb.TaskQueueMetadata{MaxTasksPerSecond: &types.DoubleValue{Value: atp.activitiesPerSecond}}, } response, err := atp.service.PollActivityTaskQueue(ctx, request) if err != nil { return nil, err } if response == nil || len(response.TaskToken) == 0 { // No activity info is available on empty poll. Emit using base scope. atp.metricsHandler.Counter(metrics.ActivityPollNoTaskCounter).Inc(1) return &activityTask{}, nil } workflowType := response.WorkflowType.GetName() activityType := response.ActivityType.GetName() metricsHandler := atp.metricsHandler.WithTags(metrics.ActivityTags(workflowType, activityType, atp.taskQueueName)) scheduleToStartLatency := common.TimeValue(response.GetStartedTime()).Sub(common.TimeValue(response.GetCurrentAttemptScheduledTime())) metricsHandler.Timer(metrics.ActivityScheduleToStartLatency).Record(scheduleToStartLatency) return &activityTask{task: response, pollStartTime: startTime}, nil } // PollTask polls a new task func (atp *activityTaskPoller) PollTask() (interface{}, error) { // Get the task. activityTask, err := atp.doPoll(atp.poll) if err != nil { return nil, err } return activityTask, nil } // ProcessTask processes a new task func (atp *activityTaskPoller) ProcessTask(task interface{}) error { if atp.stopping() { return errStop } activityTask := task.(*activityTask) if activityTask.task == nil { // We didn't have task, poll might have timeout. traceLog(func() { atp.logger.Debug("Activity task unavailable") }) return nil } workflowType := activityTask.task.WorkflowType.GetName() activityType := activityTask.task.ActivityType.GetName() activityMetricsHandler := atp.metricsHandler.WithTags(metrics.ActivityTags(workflowType, activityType, atp.taskQueueName)) executionStartTime := time.Now() // Process the activity task. request, err := atp.taskHandler.Execute(atp.taskQueueName, activityTask.task) // err is returned in case of internal failure, such as unable to propagate context or context timeout. if err != nil { activityMetricsHandler.Counter(metrics.ActivityExecutionFailedCounter).Inc(1) return err } // in case if activity execution failed, request should be of type RespondActivityTaskFailedRequest if _, ok := request.(*workflowservice.RespondActivityTaskFailedRequest); ok { activityMetricsHandler.Counter(metrics.ActivityExecutionFailedCounter).Inc(1) } activityMetricsHandler.Timer(metrics.ActivityExecutionLatency).Record(time.Since(executionStartTime)) if request == ErrActivityResultPending { return nil } rpcMetricsHandler := atp.metricsHandler.WithTags(metrics.RPCTags(workflowType, activityType, metrics.NoneTagValue)) reportErr := reportActivityComplete(context.Background(), atp.service, request, rpcMetricsHandler) if reportErr != nil { traceLog(func() { atp.logger.Debug("reportActivityComplete failed", tagError, reportErr) }) return reportErr } activityMetricsHandler. Timer(metrics.ActivitySucceedEndToEndLatency). Record(time.Since(common.TimeValue(activityTask.task.GetScheduledTime()))) return nil } func reportActivityComplete( ctx context.Context, service workflowservice.WorkflowServiceClient, request interface{}, rpcMetricsHandler metrics.Handler, ) error { if request == nil { // nothing to report return nil } var reportErr error switch request := request.(type) { case *workflowservice.RespondActivityTaskCanceledRequest: grpcCtx, cancel := newGRPCContext(ctx, grpcMetricsHandler(rpcMetricsHandler), defaultGrpcRetryParameters(ctx)) defer cancel() _, err := service.RespondActivityTaskCanceled(grpcCtx, request) reportErr = err case *workflowservice.RespondActivityTaskFailedRequest: grpcCtx, cancel := newGRPCContext(ctx, grpcMetricsHandler(rpcMetricsHandler), defaultGrpcRetryParameters(ctx)) defer cancel() _, err := service.RespondActivityTaskFailed(grpcCtx, request) reportErr = err case *workflowservice.RespondActivityTaskCompletedRequest: grpcCtx, cancel := newGRPCContext(ctx, grpcMetricsHandler(rpcMetricsHandler), defaultGrpcRetryParameters(ctx)) defer cancel() _, err := service.RespondActivityTaskCompleted(grpcCtx, request) reportErr = err } return reportErr } func reportActivityCompleteByID( ctx context.Context, service workflowservice.WorkflowServiceClient, request interface{}, rpcMetricsHandler metrics.Handler, ) error { if request == nil { // nothing to report return nil } var reportErr error switch request := request.(type) { case *workflowservice.RespondActivityTaskCanceledByIdRequest: grpcCtx, cancel := newGRPCContext(ctx, grpcMetricsHandler(rpcMetricsHandler), defaultGrpcRetryParameters(ctx)) defer cancel() _, err := service.RespondActivityTaskCanceledById(grpcCtx, request) reportErr = err case *workflowservice.RespondActivityTaskFailedByIdRequest: grpcCtx, cancel := newGRPCContext(ctx, grpcMetricsHandler(rpcMetricsHandler), defaultGrpcRetryParameters(ctx)) defer cancel() _, err := service.RespondActivityTaskFailedById(grpcCtx, request) reportErr = err case *workflowservice.RespondActivityTaskCompletedByIdRequest: grpcCtx, cancel := newGRPCContext(ctx, grpcMetricsHandler(rpcMetricsHandler), defaultGrpcRetryParameters(ctx)) defer cancel() _, err := service.RespondActivityTaskCompletedById(grpcCtx, request) reportErr = err } return reportErr } func convertActivityResultToRespondRequest( identity string, taskToken []byte, result *commonpb.Payloads, err error, dataConverter converter.DataConverter, namespace string, cancelAllowed bool, ) interface{} { if err == ErrActivityResultPending { // activity result is pending and will be completed asynchronously. // nothing to report at this point return ErrActivityResultPending } if err == nil { return &workflowservice.RespondActivityTaskCompletedRequest{ TaskToken: taskToken, Result: result, Identity: identity, Namespace: namespace} } // Only respond with canceled if allowed if cancelAllowed { var canceledErr *CanceledError if errors.As(err, &canceledErr) { return &workflowservice.RespondActivityTaskCanceledRequest{ TaskToken: taskToken, Details: convertErrDetailsToPayloads(canceledErr.details, dataConverter), Identity: identity, Namespace: namespace} } if errors.Is(err, context.Canceled) { return &workflowservice.RespondActivityTaskCanceledRequest{ TaskToken: taskToken, Identity: identity, Namespace: namespace} } } // If a canceled error is returned but it wasn't allowed, we have to wrap in // an unexpected-cancel application error if _, isCanceledErr := err.(*CanceledError); isCanceledErr { err = fmt.Errorf("unexpected activity cancel error: %w", err) } return &workflowservice.RespondActivityTaskFailedRequest{ TaskToken: taskToken, Failure: ConvertErrorToFailure(err, dataConverter), Identity: identity, Namespace: namespace} } func convertActivityResultToRespondRequestByID( identity string, namespace string, workflowID string, runID string, activityID string, result *commonpb.Payloads, err error, dataConverter converter.DataConverter, cancelAllowed bool, ) interface{} { if err == ErrActivityResultPending { // activity result is pending and will be completed asynchronously. // nothing to report at this point return nil } if err == nil { return &workflowservice.RespondActivityTaskCompletedByIdRequest{ Namespace: namespace, WorkflowId: workflowID, RunId: runID, ActivityId: activityID, Result: result, Identity: identity} } // Only respond with canceled if allowed if cancelAllowed { var canceledErr *CanceledError if errors.As(err, &canceledErr) { return &workflowservice.RespondActivityTaskCanceledByIdRequest{ Namespace: namespace, WorkflowId: workflowID, RunId: runID, ActivityId: activityID, Details: convertErrDetailsToPayloads(canceledErr.details, dataConverter), Identity: identity} } if errors.Is(err, context.Canceled) { return &workflowservice.RespondActivityTaskCanceledByIdRequest{ Namespace: namespace, WorkflowId: workflowID, RunId: runID, ActivityId: activityID, Identity: identity} } } // If a canceled error is returned but it wasn't allowed, we have to wrap in // an unexpected-cancel application error if _, isCanceledErr := err.(*CanceledError); isCanceledErr { err = fmt.Errorf("unexpected activity cancel error: %w", err) } return &workflowservice.RespondActivityTaskFailedByIdRequest{ Namespace: namespace, WorkflowId: workflowID, RunId: runID, ActivityId: activityID, Failure: ConvertErrorToFailure(err, dataConverter), Identity: identity} }