Monday, March 24, 2025

Auto Update of Argo Deployment

As part of the CI/CD, I need to update a deployment on argo, and then run system tests on this deployment. Instead of doing this manually, I've created a small Go code to handle the version replace, the sync after the update, and the waiting for the sync completion. The code is below. Feel free to copy and get inspiration from it.


type Test struct {
    automationbase.AutomationBase
    webClient *web.Client
    token     string
}

func TestValidation(_ *testing.T) {
    t := Test{
       AutomationBase: *automationbase.ProduceAutomationBase(),
       webClient:      web.CreateClient(0),
    }

    t.AutomationWorker = t.check
    t.RunAutomation()
}

func (t *Test) check() {
    t.login()

    summary := t.getSummary()
    updatedParameters := t.updateVersionInSummary(summary)
    t.setSummary(updatedParameters)

    t.sync()

    for {
       time.Sleep(5 * time.Second)

       summary = t.getSummary()
       if t.isSynced(summary) {
          t.Log("sync done")
          return
       }
    }
}

func (t *Test) getEnvSecure(
    key string,
) string {
    value := os.Getenv(key)
    if value == "" {
       kiterr.RaiseIfError(fmt.Errorf("%v environment variable is empty", key))
    }
    return value
}

func (t *Test) login() {
    password := t.getEnvSecure("PIB_PASSWORD")
    body := map[string]string{
       "username": "admin",
       "password": password,
    }
    response := t.sendRequestToArgo("POST", "/api/v1/session", body)
    responseMap := t.interfaceJsonFromString(response)
    token := responseMap["token"]
    t.token = token.(string)
}

func (t *Test) sync() string {
    version := t.getEnvSecure("PIB_VERSION")
    fullVersion := fmt.Sprintf("%v-dev-%v", project, version)
    data := fmt.Sprintf(`{"revision":"%v","prune":false,"dryRun":false,"strategy":{"hook":{"force":false}},"resources":null,"syncOptions":{"items":["CreateNamespace=true"]}}`, fullVersion)
    bodyJson := t.interfaceJsonFromString(data)
    return t.sendRequestToArgo("POST", "/api/v1/applications/"+project+"/sync", bodyJson)
}

func (t *Test) getSummary() string {
    return t.sendRequestToArgo("GET", "/api/v1/applications/"+project, nil)
}

func (t *Test) setSummary(
    parameters map[string]interface{},
) {
    t.sendRequestToArgo("PUT", "/api/v1/applications/"+project, parameters)
}

func (t *Test) updateVersionInSummary(
    summary string,
) map[string]interface{} {

    parametersMap := t.interfaceJsonFromString(summary)
    spec := t.interfaceJsonFromMap(parametersMap, "spec")
    source := t.interfaceJsonFromMap(spec, "source")
    helm := t.interfaceJsonFromMap(source, "helm")
    version := t.getEnvSecure("PIB_VERSION")
    fullVersion := fmt.Sprintf("%v-dev-%v", project, version)
    source["targetRevision"] = fullVersion
    helm["values"] = fmt.Sprintf("global:\n  image:\n    version: :dev-%v\n\n", version)
    return parametersMap
}

func (t *Test) sendRequestToArgo(
    method string,
    path string,
    body interface{},
) string {

    var requestHeaders *web.SectionHeaders
    if t.token != "" {
       cookie := fmt.Sprintf("argocd.token=%v", t.token)
       requestHeaders = web.ProduceSectionHeaders()
       requestHeaders.SetHeader("Cookie", cookie)
    }

    t.Log("sending %v %v with body:\n%v", method, path, body)

    fullPath := fmt.Sprintf("http://pib8.cloud-ng.net:31390%v", path)
    var response string
    t.webClient.SendRequestWithHeaders(
       method,
       fullPath,
       body,
       requestHeaders,
       &response,
    )

    if len(response) > 0 {
       jsonData := t.interfaceJsonFromString(response)
       t.Log("response is:\n%v", kitjson.ObjectToStringIndented(jsonData))
    }

    // don't rush argo
    time.Sleep(time.Second)

    return response

}

func (t *Test) interfaceJsonFromMap(
    input map[string]interface{},
    key string,
) map[string]interface{} {
    value := input[key]
    if value == nil {
       kiterr.RaiseIfError(fmt.Errorf("key not found: %v", key))
    }
    valueMap, ok := value.(map[string]interface{})
    if !ok {
       kiterr.RaiseIfError(fmt.Errorf("convert key %v failed for value:\n%v", key, kitjson.ObjectToStringIndented(value)))
    }

    return valueMap
}
func (t *Test) interfaceJsonArrayFromMap(
    input map[string]interface{},
    key string,
) []interface{} {
    value := input[key]
    if value == nil {
       kiterr.RaiseIfError(fmt.Errorf("key not found: %v", key))
    }
    array, ok := value.([]interface{})
    if !ok {
       kiterr.RaiseIfError(fmt.Errorf("convert key %v failed", key))
    }

    return array
}

func (t *Test) interfaceJsonFromString(
    data string,
) map[string]interface{} {
    var jsonMap map[string]interface{}
    err := json.Unmarshal([]byte(data), &jsonMap)
    if err != nil {
       kiterr.RaiseIfError(fmt.Errorf("unmarshalling failed: %v", data))
    }
    return jsonMap
}

func (t *Test) isSynced(
    summary string,
) bool {
    parametersMap := t.interfaceJsonFromString(summary)
    status := t.interfaceJsonFromMap(parametersMap, "status")
    if !t.isSyncOperationsDone(status) {
       return false
    }

    if !t.isResourcesSyncDone(status) {
       return false
    }

    return true
}

func (t *Test) isResourcesSyncDone(status map[string]interface{}) bool {
    resources := t.interfaceJsonArrayFromMap(status, "resources")
    for _, resource := range resources {
       resourceMap, ok := resource.(map[string]interface{})
       if !ok {
          kiterr.RaiseIfError(fmt.Errorf("convert failed"))
       }
       kind := resourceMap["kind"]
       if kind == "Job" || kind == "Role" || kind == "RoleBinding" {
          // never synced
          continue
       }
       resourceStatus := resourceMap["status"]
       if resourceStatus != nil && resourceStatus != "Synced" {
          t.Log("pending sync for:\n%v", kitjson.ObjectToStringIndented(resourceMap))
          return false
       }

       if kind == "Deployment" || kind == "StatefulSet" {
          health := t.interfaceJsonFromMap(resourceMap, "health")
          heathStatus := health["status"]
          if heathStatus != "Healthy" {
             t.Log("pending sync for:\n%v", kitjson.ObjectToStringIndented(resourceMap))
             return false
          }
       }
    }
    return true
}

func (t *Test) isSyncOperationsDone(status map[string]interface{}) bool {
    operationState := t.interfaceJsonFromMap(status, "operationState")
    phase := operationState["phase"]

    if phase == "Succeeded" {
       return true
    }

    t.Log("sync state %v", phase)
    return false
}

Monday, March 17, 2025

Basic Must Have Training For New Software Engineer

In this post we will review the items a new software engineer arriving at a new work place should learn. I don't pretend that I can setup a list that would match any work place, but I do believe this match 80% of the jobs.

Listed below the items that the new comer should find short courses to learn about. The minimum investment time for each subject is also specified, as well a an example of a short free course.

Docker
1 day
https://www.docker.com/resources/trainings
https://www.youtube.com/watch?v=RqTEHSBrYFw&ab_channel=DevOpsDirective
Kubernetes
3 days
https://www.infracloud.io/kubernetes-school/#
https://www.youtube.com/watch?v=2T86xAtR6Fo&ab_channel=DevOpsDirective
Helm
half of a day
https://www.youtube.com/watch?v=DQk8HOVlumI&ab_channel=RahulWagh
https://www.youtube.com/watch?v=x77NzZxj670&list=PLSwo-wAGP1b8svO5fbAr7ko2Buz6GuH1g&ab_channel=RichardChesterwood
The most common programming language at your job, such as GO
3 days
https://go.dev/tour/welcome/1
https://www.youtube.com/watch?v=un6ZyFkqFKo&ab_channel=freeCodeCamp.org
Redis
1 day
https://www.youtube.com/watch?v=tip2mgC6rwQ&ab_channel=VikasJha
https://www.youtube.com/watch?v=8sHCdz_tOjk&list=PL4cUxeGkcC9h3V2eqhi8rRdIDJshP-b4P&ab_channel=NetNinja
NATS
1 day
https://www.youtube.com/watch?v=_CN1OO7yN0I&ab_channel=Synadia
React & Redux
2 days
https://www.youtube.com/watch?v=SqcY0GlETPk&ab_channel=ProgrammingwithMosh
https://www.youtube.com/watch?v=5yEG6GhoJBs&ab_channel=CosdenSolutions

Saturday, March 1, 2025

Kafka Batch Consume Using confluent-kafka-go

In this post we will show a performance test for kafka batch consumer.

First, let review a basic wrapper for the confluent kafka library.

package kafka

import (
  "fmt"
  kafkaApi "github.com/confluentinc/confluent-kafka-go/kafka"
  "sync"
  "time"
)

type Producer struct {
  kafkaTopic    string
  producer      *kafkaApi.Producer
  errorsCount   int
  mutex         sync.Mutex
}

type Consumer struct {
  consumer    *kafkaApi.Consumer
  lastMessage *kafkaApi.Message
}

func CreateKafkaProducer(
  kafkaBroker string,
  kafkaTopic string,
) *Producer {
  config := make(kafkaApi.ConfigMap)
  config["bootstrap.servers"] = kafkaBroker
  producer, err := kafkaApi.NewProducer(&config)
  if err != nil {
   panic(err)
  }

  go func() {
   for {
    event := <-producer.Events()
   }
  }()

  return &Producer{
   kafkaTopic:    kafkaTopic,
   producer:      producer,
  }
}

func (p *Producer) ProduceMessage(
  key string,
  messageData []byte,
) {
  message := kafkaApi.Message{
   Key:   []byte(key),
   Value: messageData,
   TopicPartition: kafkaApi.TopicPartition{
    Topic:     &p.kafkaTopic,
    Partition: kafkaApi.PartitionAny,
   },
  }

  err := p.producer.Produce(&message, nil)
  if err != nil {
   panic(err)
  }
}

func (p *Producer) Close() {
  p.producer.Close()
}

func CreateKafkaConsumer(
  kafkaBroker string,
  kafkaTopic string,
  consumerGroup string,
) *Consumer {
  config := make(kafkaApi.ConfigMap)
  config["bootstrap.servers"] = kafkaBroker
  config["group.id"] = consumerGroup
  //config["fetch.max.bytes"] = 50 * 1024 * 1024
  //config["max.partition.fetch.bytes"] = 50 * 1024 * 1024
  //config["auto.offset.reset"] = "earliest"
  //config["api.version.request"] = false
  //config["debug"] = "all"
  consumer, err := kafkaApi.NewConsumer(&config)
  if err != nil {
   panic(err)
  }

  go func() {
   for {
    event := <-consumer.Events()
    fmt.Printf("kafka consumer event: %v\n", event)
   }
  }()

  err = consumer.Subscribe(kafkaTopic, nil)
  if err != nil {
   panic(err)
  }
  return &Consumer{
   consumer: consumer,
  }
}

func (c *Consumer) ReadMessage() []byte {
  msg, err := c.consumer.ReadMessage(-1)
  if err != nil {
   panic(err)
  }
  c.lastMessage = msg
  return msg.Value
}

func (c *Consumer) CommitLastMessage() {
  _, err := c.consumer.CommitMessage(c.lastMessage)
  if err != nil {
   panic(err)
  }
}

func (c *Consumer) CommitBulk() {
  lastPartition := c.lastMessage.TopicPartition
  partitions := []kafkaApi.TopicPartition{
   {
    Topic:     lastPartition.Topic,
    Partition: lastPartition.Partition,
    Offset:    lastPartition.Offset + 1,
   },
  }
  _, err := c.consumer.CommitOffsets(partitions)
  if err != nil {
   panic(err)
  }
}

To test this we run a consumer and a producer in parallel:


const broker = "localhost:9092"
const topic = "my-topic"

type Data struct {
  MyId int64
  A00  string
  A01  string
  A02  string
  A03  string
  A04  string
  A05  string
  A06  string
  A07  string
  A08  string
  A09  string
}
type Test struct {
  stubs.Stubs
  id int64
}

func TestValidation(t *testing.T) {
  test := Test{
   Stubs: stubs.ProduceStubs(t),
  }
  defer test.TestCleanup()

  test.check()
}

func (t *Test) check() {
  if false {
   return
  }

  go t.runProducer()
  t.runConsumer()
}

func (t *Test) runConsumer() {
  consumer := kafka.CreateKafkaConsumer(broker, topic, "my-group")
  consumeLogger := progress.ProduceProgress(0, "consume")
  consumeLogger.OnlyDelta = true
  bulk := 0
  startTime := time.Now()
  lastLog := time.Now()
  var totalConsume int64
  for {
   bytes := consumer.ReadMessage()
   totalConsume++
   var data Data
   err := json.Unmarshal(bytes, &data)
   kiterr.RaiseIfError(err)
   consumeLogger.Increment()
   bulk++
   if bulk > 1000 {
    bulk = 0
    consumer.CommitBulk()
    if time.Now().Sub(lastLog) > time.Second*10 {
     lastLog = time.Now()
     passed := time.Since(startTime)
     perSecond := totalConsume / int64(passed.Seconds())
     t.Log("average consume %v messages/sec", perSecond)
    }
   }
  }
}

func (t *Test) runProducer() {
  producer := kafka.CreateKafkaProducer(t.NowTime, broker, topic)
  produceLogger := progress.ProduceProgress(0, "produce")
  produceLogger.OnlyDelta = true
  data := Data{
   A00: kitstring.GetRandomString(100),
   A01: kitstring.GetRandomString(100),
   A02: kitstring.GetRandomString(100),
   A03: kitstring.GetRandomString(100),
   A04: kitstring.GetRandomString(100),
   A05: kitstring.GetRandomString(100),
   A06: kitstring.GetRandomString(100),
   A07: kitstring.GetRandomString(100),
   A08: kitstring.GetRandomString(100),
   A09: kitstring.GetRandomString(100),
  }
  bytes := kitjson.ObjectToBytes(data)
  for {
   err := producer.ProduceMessage("", bytes)
   if err != nil {
    t.Log("ignoring error: %v", err)
    time.Sleep(5 * time.Second)
   }
   produceLogger.Increment()
  }
}

The kafka can be run as a docker container:

docker run -d --name=kafka -p 9092:9092 apache/kafka

The consumer reaches a rate of 120K on one core.

Monday, February 24, 2025

NATS GUI in kubernetes

In this post we deploy NATS GUI in kubernetes.

NATS GUI is a very simple and nice tool to view NATS messages. Its deployment is also super simple. We include a service and a deployment.

The service:

apiVersion: v1
kind: Service
metadata:
  name: natsgui-service
spec:
  selector:
    configid: natsgui-container
  type: ClusterIP
  ports:
    - port: 80
      targetPort: 31311
      name: tcp-api
      protocol: TCP

The deployment:

apiVersion: apps/v1
kind: Deployment
metadata:
  name: natsgui-deployment
spec:
  replicas: 1
  selector:
    matchLabels:
      configid: natsgui-container
  template:
    metadata:
      labels:
        configid: natsgui-container        
    spec:
      containers:
        - name: natsgui
          image: ghcr.io/nats-nui/nui:latest
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /db
              name: db
      volumes:
        - emptyDir: {}
          name: db

That's all.

Now we can see the messages in queues. We can also use wildcards, for example:

Another nice feature is the ability to click on the arrow (near the X to close window). This allows us to save the message details open while checking another message.

Sunday, February 9, 2025

Multi Metrics Scaling Using KEDA and Prometheus

We've review KEDA usage with prometheus in this post. However, in real life things get complicated.

How do we handle scaling based on multiple metrics? KEDA does not provide support for this, and the documentation for such task is missing.

Let review an example: We have a deployment with multiple pods that handle some granular tasks. We want the scale the replica pods by the following metrics:

CPU is over 80%
or
Memory is over 80%
or
Tasks rate per second per pod is over 100

First, we need to understand the requirements:

When do we want to scale up?

We want to scale up if any of these metrics is over the thresholds in any pod.

For example:

pod1 CPU=90%, Memory=50%, Tasks rate=20.

pod2 CPU=10%, Memory=50%, Tasks rate=20.

We should scale in this state even we have only a single metric above the threshold.

How do we achieve this?

The trick is to implement a new Prometheus metric with our application.

We create a code in our application that calculate the following metric:

scale_metric=max(memory_ratio, cpu_ratio, tasks_ratio)

Where

memory_ratio = used_memory_percentage / 80%

cpu_ratio = used_cpu_percentage / 80%

tasks_ratio = tasks_per_second / 100

Next we configure KEDA scaling by the max of this metric for all the pods:

triggers:
  - type: kafka
    metadata:
      serverAddress: {{ .Values.keda.prometheusServerUrl }}
      metricName: scale_metric
      threshold: '1'
      query: max(scale_metric)

Final Note

While this solution requires actual coding, and not just configuration, it provides a solid scaling based on all required features of our business logic.

Monday, February 3, 2025

Should We Use JSON as Message Format?

In this post we discuss alternatives for sending messages between microservices.

I have recently designed a system where one microservice sends captured HTTP requests to another microservice. The messaging system could be kafka, NATS, or similar.

The question is how to send the data?

The first intuition is sending as JSON, for example using a GO struct representation:

type TransactionObject struct {
  Method    string
  Path      string
  QueryArgs map[string]string
  Cookies   map[string]string
  Headers   map[string]string
}

The first microservice parse the captures HTTP requests, converts them to objects, marshals the object to JSON, and sends JSON text. The second microservice would read the JSON text and unmarshal it back to object.

While this might sound the simple and obvious methodology, it is not always the best. We spend time on converting to object, and on JSON marshal and unmarshal.

Instead, we can use a simple text for the message. The first microservice parse the captures HTTP requests, and sends the text itself. The second microservice would read the text and parse the HTTP request. Hence we reduce the marshal from and to JSON.

If we choose to use such a methodology, we need to notice that unlike JSON, the text message cannot be dynamically updated with attributes, such as the request time, the processing time, the geo location of the source IP, etc. Luckily when using NATS and kafka we can easily add these attributes as message headers.

Another issue the the amount of consumers. What if we have 100 consumers subscribed to the NATS subject or the kafka topic? Most would think the parsing of the transaction in 100 microservices instead of a simple JSON parsing would be cheaper.

Is it? I've created a sample test for this:

package main

import (
  "encoding/json"
  "fmt"
  "math/rand"
  "strings"
  "time"
)

var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")

type TransactionObject struct {
  Method    string
  Path      string
  QueryArgs map[string]string
  Cookies   map[string]string
  Headers   map[string]string
}

func generateText(
  stringLength int,
) string {
  b := make([]rune, stringLength)
  for i := range b {
   b[i] = letterRunes[rand.Intn(len(letterRunes))]
  }
  return string(b)
}

func generatePath() string {
  var sections []string
  amount := rand.Intn(10)
  for range amount {
   sectionLength := 1 + rand.Intn(15)
   sections = append(sections, generateText(sectionLength))
  }
  return "/" + strings.Join(sections, "/")
}

func generateQuery() string {
  amount := rand.Intn(4)
  if amount == 0 {
   return ""
  }
  var sections []string
  for range amount {
   nameLength := 1 + rand.Intn(15)
   valueLength := 1 + rand.Intn(15)
   query := fmt.Sprintf("%v=%v", generateText(nameLength), generateText(valueLength))
   sections = append(sections, query)
  }
  return "?" + strings.Join(sections, "&")
}

func generateCookies() string {
  amount := rand.Intn(3)
  if amount == 0 {
   return ""
  }
  var sections []string
  for range amount {
   nameLength := 1 + rand.Intn(15)
   valueLength := 1 + rand.Intn(15)
   cookie := fmt.Sprintf("Set-Cookie: %v=%v", generateText(nameLength), generateText(valueLength))
   sections = append(sections, cookie)
  }
  return "\n" + strings.Join(sections, "\n")
}

func generateHeaders() string {
  amount := rand.Intn(10)
  if amount == 0 {
   return ""
  }
  var sections []string
  for range amount {
   nameLength := 1 + rand.Intn(15)
   valueLength := 1 + rand.Intn(15)
   header := fmt.Sprintf("%v: %v", generateText(nameLength), generateText(valueLength))
   sections = append(sections, header)
  }
  return "\n" + strings.Join(sections, "\n")
}

func generateTransactionText() string {
  var lines []string

  line := fmt.Sprintf(
   "GET %v%v HTTP/1.1%v%v",
   generatePath(),
   generateQuery(),
   generateHeaders(),
   generateCookies(),
  )

  lines = append(lines, line)
  return strings.Join(lines, "\n")
}

func generateMap(
  sizeLimit int,
) map[string]string {
  amount := rand.Intn(sizeLimit)
  generatedMap := make(map[string]string)
  for range amount {
   generatedMap[generateText(15)] = generateText(15)
  }
  return generatedMap
}

func generateTransactionObject() *TransactionObject {
  return &TransactionObject{
   Method:    "GET",
   Path:      generatePath(),
   QueryArgs: generateMap(4),
   Cookies:   generateMap(3),
   Headers:   generateMap(10),
  }
}

func parseTransactionText(
  text string,
) {
  /*
  out of scope for this blog.
  we use a proprietary parsed, but the GO parser can be used as well
   */
}

func main() {
  transactionsAmount := 10000

  var objects []string
  var texts []string
  for range transactionsAmount {

   text := generateTransactionText()
   texts = append(texts, text)

   object := generateTransactionObject()
   bytes, err := json.Marshal(object)
   if err != nil {
    panic(err)
   }
   objects = append(objects, string(bytes))
  }

  activations := 1000000

  startTimeText := time.Now()
  for i := range activations {
   text := texts[i%transactionsAmount]
   parseTransactionText(text)
  }
  passedTimeText := time.Since(startTimeText)

  startTimeObject := time.Now()
  for i := range activations {
   text := objects[i%transactionsAmount]
   var transactionObject TransactionObject
   err := json.Unmarshal([]byte(text), &transactionObject)
   if err != nil {
    panic(err)
   }
  }
  passedTimeObjects := time.Since(startTimeObject)
  fmt.Printf("text per call time: %v\n", passedTimeText/time.Duration(activations))
  fmt.Printf("objects per call time: %v\n", passedTimeObjects/time.Duration(activations))
}

and the results are:

JSON parsing ~6 microseconds.

Text parsing ~4 microseconds.

We find that JSON parsing has it cost.

Final Note

We find that using simple text instead of JSON for microservices communication is a good alternative that can be used for performance critical pipelines.

Sunday, January 26, 2025

Finetune an Embedding SentenceTransformer

In this post we discuss finetune of a SentenceTransformer model. We've already presented a method of finetune for a torchvision based model, and in this post we will show a text embedding model finetune.

We start by presenting the related code.

from datasets import Dataset
from sentence_transformers import SentenceTransformer
from sentence_transformers import (
    SentenceTransformerTrainer,
    losses
)
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
from sentence_transformers.similarity_functions import SimilarityFunction
from sentence_transformers.training_args import SentenceTransformerTrainingArguments


def finetune():
    sentence1 = [
        "Here is my horse",
        "You are my light",
        "I will be there in Monday",
        "No way I kan do this",
        "What is going on, who is it?",
    ]
    sentence2 = [
        "Here si my horse",
        "You are my lihgt",
        "I will be there on Monday",
        "No way I can do that",
        "What is went there, what was that?",
    ]
    scores = [
        1,
        1,
        1,
        0.9,
        0.5,
    ]
    finetune_examples = Dataset.from_dict({
        'sentence1': sentence1,
        'sentence2': sentence2,
        'score': scores,
    })

    model = SentenceTransformer(
        "estrogen/ModernBERT-base-sbert-initialized",
        trust_remote_code=True,
        config_kwargs={"reference_compile": False}
    )
    model.gradient_checkpointing_enable()
    model.max_seq_length = 4096

    print('running finetune')
    first_split = finetune_examples.train_test_split(test_size=0.333, shuffle=False)
    train_dataset = first_split["train"]
    first_split_test = first_split["test"]

    second_split = first_split_test.train_test_split(test_size=0.5, shuffle=False)

    eval_dataset = second_split["train"]
    test_dataset = second_split["test"]
    print(f'train size {train_dataset.shape[0]}')
    print(f'eval size {eval_dataset.shape[0]}')
    print(f'test size {test_dataset.shape[0]}')

    train_loss = losses.CoSENTLoss(model=model)

    dev_evaluator = EmbeddingSimilarityEvaluator(
        sentences1=eval_dataset["sentence1"],
        sentences2=eval_dataset["sentence2"],
        scores=eval_dataset["score"],
        main_similarity=SimilarityFunction.COSINE,
        name="bla_dev_eval",
    )
    test_evaluator = EmbeddingSimilarityEvaluator(
        sentences1=test_dataset["sentence1"],
        sentences2=test_dataset["sentence2"],
        scores=test_dataset["score"],
        main_similarity=SimilarityFunction.COSINE,
        name="bla_test_eval",
    )

    train_batch_size = 16
    num_epochs = 4

    args = SentenceTransformerTrainingArguments(
        output_dir="output/training",
        num_train_epochs=num_epochs,
        per_device_train_batch_size=train_batch_size,
        per_device_eval_batch_size=train_batch_size,
        warmup_ratio=0.1,
        fp16=True,
        bf16=False,
        eval_strategy="steps",
        eval_steps=100,
        save_strategy="steps",
        save_steps=100,
        save_total_limit=2,
        logging_steps=100,
        run_name="API BLA AI Sequences COSINE loss",
    )

    trainer = SentenceTransformerTrainer(
        model=model,
        args=args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        loss=train_loss,
        evaluator=dev_evaluator,
    )
    print('training')
    trainer.train()

    dev_results = dev_evaluator(model)
    print('dev evaluation')
    print(dev_results)

    test_results = test_evaluator(model)
    print('test evaluation')
    print(test_results)

    output_folder_path = "output/tuned_model"
    model.save(output_folder_path)


finetune()

The code is pretty straightforward: we create examples for embedding scoring, each example includes two sentences, and a score indicating how related are these sentences. A score of 1 means the sentences are identical, and a score of zero means the are not related.

The real art is not in the code, but in the finetune examples generation. Here are some rules I've found useful:

Do not use the same data that the model is expected to run embedding for. We want to avoid over-fitting of the model to the actual data, and instead we want to make it understand the general idea of the expected similarity.
The finetune examples should include the full spectrum of the expected behavior. We need to supply examples where the score is one, examples where the score is zero, and the entire range between these.
In case you generate the text for embedding, try to generate a succinct text. Long text confuses the model, and tends to require more finetune.
Create metrics to visualize what had the model learnt. In case we have different kinds of ideas we want to teach the model, run the model after the finetune of any type of this ideas, multiple examples for each idea, and compare the expected score vs. the actual score for each idea. Notice the score does not have to match, but instead we expect a general leveling of the score. For example:
The expected score for idea-1 is 0.9.
The expected score for idea-2 is 0.8.
The expected score for idea-3 is 0.5.

Then we might find the following good enough:
The actual score for idea-1 is 0.8.
The actual score for idea-2 is 0.6.
The actual score for idea-3 is 0.3.
Supply enough samples for the finetune. From my experience it is somewhere between 10K to 30K examples.

Sunday, January 19, 2025

Gunicorn

In this post we review usage of the python framework Guicorn.

Gunicorn provides a concurrent processing of sockets request, while using a polling mode that enables the same worker to handle multiple requests.

A Simple Flask Application

Gunicorn can be used to wrap the flask application. For example, let's assume we have the following flask application code:

main.py

from flask import Flask

app = Flask(__name__)

@app.route("/")
def index():
    return "Hello World"

if __name__ == "__main__":
    app.run(host='0.0.0.0', port=8080)

We deploy the dependencies:

pip install flask

and run the application:

python main.py

Let's stress test the flask application without gunicorn, first install the apache stress tool:

sudo apt install apache2-utils

and next run the stress with 10K requests and concurrency of 10 threads:

ab -n 10000 -c 10 http://localhost:8080/

The results are:

Wrap the Flask with Gunicorn

To wrap the flask application with gunicorn, we deploy:

pip install gunicorn

and run gunicorn:

gunicorn --workers=2 'main:app'

Notice that guicorn starts by default on the port 8000, so let's run the stress test:

ab -n 10000 -c 10 http://localhost:8000/

The results are:

And we've got ~10 times better results, which is pretty amazing.

Final Note

Using guicorn seems to make python run like a real multithreaded application, however we must be aware to the fact that gunicorn spawns workers processes, hence swe cannot share memory between the workers.

Using guicorn is a nice solution if we do not have the resources to rewrite the application in GO, but notice that running gunicorn in kubernetes is a bad practice as the guideline is:

one container == one process

and gunicorn does not follow it. Still, it is reasonable solution for non-critical services.

Wednesday, January 1, 2025

Setting Up a Publicly Accessible VM with Docker, Nginx, and SSL on GCP

In this post we review the step to setup a publicly accessible web site.

The web site is based on a docker container running the famous juice-shop in a GCP based VM.

We use Let's Encrypt to produce a valid SSL certificate for the site.

All the steps below are using "demo" prefix for the entities. Make sure to use your own suitable prefix instead.

GCP Steps

Add VPC

Create a new VPC network named demo-vpc.

use IPv4
add a subnet
add Firewall rules to allow TCP ports 22 (SSH), 80 (HTTP), 443(HTTPS)

Add VM

Open the GCP compute engine service.

Add new VM named demo-vm.

Stop the VM, and wait for the stopping to complete.

Edit the VM, and update the VM network interfaces to use the demo-vpc.

Start the VM.

Open the GCP VPC network service, and select IP addresses.

Reserve new external static IP named demo-static-ip, and assign it to the VM.

Add DNS

Open the GCP cloud domains service.

Add new domain registration named demo.com, and complete the verification process.

Open the GCP networking service, and select cloud DNS.

Click on the demo-com zone.

Add a standard A-record www.demo.com, and set the IP to the value of demo-static-ip.

Site Steps

Create Docker Compose

Open the GCP compute engine service.

Click on the demo-vm, and connect using SSH button.

Install docker on the machine, and enable non-root access.

Create docker-compose.yaml file:

version: '3'

services:
  juiceshop:
    image: bkimminich/juice-shop
    container_name: juiceshop
    environment:
      - NODE_ENV=production
    ports:
      - "3000:3000"
    restart: always

  nginx:
    image: nginx:latest
    container_name: nginx
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf
      - ./data/certbot/conf:/etc/letsencrypt
      - ./data/certbot/www:/var/www/certbot
    ports:
      - "80:80"
      - "443:443"
    depends_on:
      - juiceshop
    restart: always

  certbot:
    image: certbot/certbot
    container_name: certbot
    volumes:
      - ./data/certbot/conf:/etc/letsencrypt
      - ./data/certbot/www:/var/www/certbot
    entrypoint: "/bin/sh -c 'trap exit TERM; while :; do certbot renew; sleep 12h & wait $${!}; done;'"
    restart: always

Create NGINX

Create nginx.conf file:


events {
    use     epoll;
    worker_connections  128;
}

error_log   /var/log/nginx.log info;

http {


  server {
    listen 80;
    server_name www.demo.com;

    location / {
      proxy_pass http://juiceshop:3000;
      proxy_set_header Host $host;
      proxy_set_header X-Real-IP $remote_addr;
      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
      proxy_set_header X-Forwarded-Proto $scheme;
    }
     
    location /.well-known/acme-challenge/ {
      root /var/www/certbot;
    }
  }

  server {
      listen 443;
      server_name www.demo.com;
      #replace with this block later
      #listen 443 ssl;
      #server_name www.demo.com;
      #ssl_certificate /etc/letsencrypt/live/www.demo.com/fullchain.pem;
      #ssl_certificate_key /etc/letsencrypt/live/www.demo.com/privkey.pem;

      location / {
    proxy_pass http://juiceshop:3000;
    proxy_set_header Host $host;
    proxy_set_header X-Real-IP $remote_addr;
    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
    proxy_set_header X-Forwarded-Proto $scheme;
      }
  }
}

Notice that the nginx.conf SSL section is commented out. We will revive it after issuing a valid certificate.

Create SSL Certification

Start the containers:

docker compose up -d

Initiate a certificate request:

docker-compose exec certbot certbot certonly --webroot --webroot-path=/var/www/certbot -d www.demo.com --email your-email@demo.com --agree-tos --non-interactive

Update the nginx.conf, and revive the commented SSL section.

Restart the containers:

docker compose restart

Final Note

I have created this post since that is a common practice required for many engineer, but the information is scattered over the sites with many mistakes.

When following these steps, make sure to validate that each step has successfully completed. For example - check connection to the public IP from a client machine, check DNS resolution, etc.

Full Blog TOC

Full Blog Table Of Content with Keywords Available HERE

Monday, March 24, 2025

Auto Update of Argo Deployment

Monday, March 17, 2025

Basic Must Have Training For New Software Engineer

Saturday, March 1, 2025

Kafka Batch Consume Using confluent-kafka-go

Monday, February 24, 2025

NATS GUI in kubernetes

Sunday, February 9, 2025

Multi Metrics Scaling Using KEDA and Prometheus

Final Note

Monday, February 3, 2025

Should We Use JSON as Message Format?

Final Note

Sunday, January 26, 2025

Finetune an Embedding SentenceTransformer

Sunday, January 19, 2025

Gunicorn

A Simple Flask Application

Wrap the Flask with Gunicorn

Final Note

Wednesday, January 1, 2025

Setting Up a Publicly Accessible VM with Docker, Nginx, and SSL on GCP

GCP Steps

Add VPC

Add VM

Add DNS

Site Steps

Create Docker Compose

Create NGINX

Create SSL Certification

Final Note