Full Blog TOC

Full Blog Table Of Content with Keywords Available HERE

Monday, March 31, 2025

Wrong Job Interview

 


Lately I've heard about a question in a job interview:

You are given a shuffled list of 2*N+1 numbers, that contains N pairs of numbers, and one number that dos not have a pair. Find the non-paired number using only 2 integer variables.

Scroll down only when you want to know the answer...




















The solution to this, is to use one integer variable as index to scan the list, and the second variable as XOR based storage, so the algorithm would be:


for i in list:

    x = x XOR list[i]


The paired numbers XOR themselves to zero, and the only non-paired number remains in x.

This is since:

v XOR v = 0

and

v XOR 0 = v



Now, while this is a nice question, with a nice trick, the real question is what is the benefit of asking this question in a job interview? 

What do you understand if the interviewed person did managed to find the answer?
What do you understand if that person failed to find the answer?

Nothing.


In an interview we should pursue 3 main goals:

  1. Get a feeling about the kind of person. Would you have a beer with that person?
  2. Test the knowledge of the person in a specific field or programming language.
  3. See how does this person cope with thinking of complex and changing problems.

The XOR question does not contribute to these goals, but instead only tell you if that person had the luck to think about the solution. So it is only testing if that person is lucky.

Listed below are interview related posts I've previously posted which you might find useful.

Monday, March 24, 2025

Auto Update of Argo Deployment

 



As part of the CI/CD, I need to update a deployment on argo, and then run system tests on this deployment. Instead of doing this manually, I've created a small Go code to handle the version replace, the sync after the update, and the waiting for the sync completion. The code is below. Feel free to copy and get inspiration from it.




type Test struct {
automationbase.AutomationBase
webClient *web.Client
token string
}

func TestValidation(_ *testing.T) {
t := Test{
AutomationBase: *automationbase.ProduceAutomationBase(),
webClient: web.CreateClient(0),
}

t.AutomationWorker = t.check
t.RunAutomation()
}

func (t *Test) check() {
t.login()

summary := t.getSummary()
updatedParameters := t.updateVersionInSummary(summary)
t.setSummary(updatedParameters)

t.sync()

for {
time.Sleep(5 * time.Second)

summary = t.getSummary()
if t.isSynced(summary) {
t.Log("sync done")
return
}
}
}

func (t *Test) getEnvSecure(
key string,
) string {
value := os.Getenv(key)
if value == "" {
kiterr.RaiseIfError(fmt.Errorf("%v environment variable is empty", key))
}
return value
}

func (t *Test) login() {
password := t.getEnvSecure("PIB_PASSWORD")
body := map[string]string{
"username": "admin",
"password": password,
}
response := t.sendRequestToArgo("POST", "/api/v1/session", body)
responseMap := t.interfaceJsonFromString(response)
token := responseMap["token"]
t.token = token.(string)
}

func (t *Test) sync() string {
version := t.getEnvSecure("PIB_VERSION")
fullVersion := fmt.Sprintf("%v-dev-%v", project, version)
data := fmt.Sprintf(`{"revision":"%v","prune":false,"dryRun":false,"strategy":{"hook":{"force":false}},"resources":null,"syncOptions":{"items":["CreateNamespace=true"]}}`, fullVersion)
bodyJson := t.interfaceJsonFromString(data)
return t.sendRequestToArgo("POST", "/api/v1/applications/"+project+"/sync", bodyJson)
}

func (t *Test) getSummary() string {
return t.sendRequestToArgo("GET", "/api/v1/applications/"+project, nil)
}

func (t *Test) setSummary(
parameters map[string]interface{},
) {
t.sendRequestToArgo("PUT", "/api/v1/applications/"+project, parameters)
}

func (t *Test) updateVersionInSummary(
summary string,
) map[string]interface{} {

parametersMap := t.interfaceJsonFromString(summary)
spec := t.interfaceJsonFromMap(parametersMap, "spec")
source := t.interfaceJsonFromMap(spec, "source")
helm := t.interfaceJsonFromMap(source, "helm")
version := t.getEnvSecure("PIB_VERSION")
fullVersion := fmt.Sprintf("%v-dev-%v", project, version)
source["targetRevision"] = fullVersion
helm["values"] = fmt.Sprintf("global:\n image:\n version: :dev-%v\n\n", version)
return parametersMap
}

func (t *Test) sendRequestToArgo(
method string,
path string,
body interface{},
) string {

var requestHeaders *web.SectionHeaders
if t.token != "" {
cookie := fmt.Sprintf("argocd.token=%v", t.token)
requestHeaders = web.ProduceSectionHeaders()
requestHeaders.SetHeader("Cookie", cookie)
}

t.Log("sending %v %v with body:\n%v", method, path, body)

fullPath := fmt.Sprintf("http://pib8.cloud-ng.net:31390%v", path)
var response string
t.webClient.SendRequestWithHeaders(
method,
fullPath,
body,
requestHeaders,
&response,
)

if len(response) > 0 {
jsonData := t.interfaceJsonFromString(response)
t.Log("response is:\n%v", kitjson.ObjectToStringIndented(jsonData))
}

// don't rush argo
time.Sleep(time.Second)

return response

}

func (t *Test) interfaceJsonFromMap(
input map[string]interface{},
key string,
) map[string]interface{} {
value := input[key]
if value == nil {
kiterr.RaiseIfError(fmt.Errorf("key not found: %v", key))
}
valueMap, ok := value.(map[string]interface{})
if !ok {
kiterr.RaiseIfError(fmt.Errorf("convert key %v failed for value:\n%v", key, kitjson.ObjectToStringIndented(value)))
}

return valueMap
}
func (t *Test) interfaceJsonArrayFromMap(
input map[string]interface{},
key string,
) []interface{} {
value := input[key]
if value == nil {
kiterr.RaiseIfError(fmt.Errorf("key not found: %v", key))
}
array, ok := value.([]interface{})
if !ok {
kiterr.RaiseIfError(fmt.Errorf("convert key %v failed", key))
}

return array
}

func (t *Test) interfaceJsonFromString(
data string,
) map[string]interface{} {
var jsonMap map[string]interface{}
err := json.Unmarshal([]byte(data), &jsonMap)
if err != nil {
kiterr.RaiseIfError(fmt.Errorf("unmarshalling failed: %v", data))
}
return jsonMap
}

func (t *Test) isSynced(
summary string,
) bool {
parametersMap := t.interfaceJsonFromString(summary)
status := t.interfaceJsonFromMap(parametersMap, "status")
if !t.isSyncOperationsDone(status) {
return false
}

if !t.isResourcesSyncDone(status) {
return false
}

return true
}

func (t *Test) isResourcesSyncDone(status map[string]interface{}) bool {
resources := t.interfaceJsonArrayFromMap(status, "resources")
for _, resource := range resources {
resourceMap, ok := resource.(map[string]interface{})
if !ok {
kiterr.RaiseIfError(fmt.Errorf("convert failed"))
}
kind := resourceMap["kind"]
if kind == "Job" || kind == "Role" || kind == "RoleBinding" {
// never synced
continue
}
resourceStatus := resourceMap["status"]
if resourceStatus != nil && resourceStatus != "Synced" {
t.Log("pending sync for:\n%v", kitjson.ObjectToStringIndented(resourceMap))
return false
}

if kind == "Deployment" || kind == "StatefulSet" {
health := t.interfaceJsonFromMap(resourceMap, "health")
heathStatus := health["status"]
if heathStatus != "Healthy" {
t.Log("pending sync for:\n%v", kitjson.ObjectToStringIndented(resourceMap))
return false
}
}
}
return true
}

func (t *Test) isSyncOperationsDone(status map[string]interface{}) bool {
operationState := t.interfaceJsonFromMap(status, "operationState")
phase := operationState["phase"]

if phase == "Succeeded" {
return true
}

t.Log("sync state %v", phase)
return false
}

Monday, March 17, 2025

Basic Must Have Training For New Software Engineer



 


In this post we will review the items a new software engineer arriving at a new work place should learn. I don't pretend that I can setup a list that would match any work place, but I do believe this match 80% of the jobs.


Listed below the items that the new comer should find short courses to learn about. The minimum investment time for each subject is also specified, as well a an example of a short free course.


Saturday, March 1, 2025

Kafka Batch Consume Using confluent-kafka-go


 


In this post we will show a performance test for kafka batch consumer.

First, let review a basic wrapper for the confluent kafka library.


package kafka

import (
"fmt"
kafkaApi "github.com/confluentinc/confluent-kafka-go/kafka"
"sync"
"time"
)

type Producer struct {
kafkaTopic string
producer *kafkaApi.Producer
errorsCount int
mutex sync.Mutex
}

type Consumer struct {
consumer *kafkaApi.Consumer
lastMessage *kafkaApi.Message
}

func CreateKafkaProducer(
kafkaBroker string,
kafkaTopic string,
) *Producer {
config := make(kafkaApi.ConfigMap)
config["bootstrap.servers"] = kafkaBroker
producer, err := kafkaApi.NewProducer(&config)
if err != nil {
panic(err)
}

go func() {
for {
event := <-producer.Events()
}
}()

return &Producer{
kafkaTopic: kafkaTopic,
producer: producer,
}
}

func (p *Producer) ProduceMessage(
key string,
messageData []byte,
) {
message := kafkaApi.Message{
Key: []byte(key),
Value: messageData,
TopicPartition: kafkaApi.TopicPartition{
Topic: &p.kafkaTopic,
Partition: kafkaApi.PartitionAny,
},
}

err := p.producer.Produce(&message, nil)
if err != nil {
panic(err)
}
}

func (p *Producer) Close() {
p.producer.Close()
}

func CreateKafkaConsumer(
kafkaBroker string,
kafkaTopic string,
consumerGroup string,
) *Consumer {
config := make(kafkaApi.ConfigMap)
config["bootstrap.servers"] = kafkaBroker
config["group.id"] = consumerGroup
//config["fetch.max.bytes"] = 50 * 1024 * 1024
//config["max.partition.fetch.bytes"] = 50 * 1024 * 1024
//config["auto.offset.reset"] = "earliest"
//config["api.version.request"] = false
//config["debug"] = "all"
consumer, err := kafkaApi.NewConsumer(&config)
if err != nil {
panic(err)
}

go func() {
for {
event := <-consumer.Events()
fmt.Printf("kafka consumer event: %v\n", event)
}
}()

err = consumer.Subscribe(kafkaTopic, nil)
if err != nil {
panic(err)
}
return &Consumer{
consumer: consumer,
}
}

func (c *Consumer) ReadMessage() []byte {
msg, err := c.consumer.ReadMessage(-1)
if err != nil {
panic(err)
}
c.lastMessage = msg
return msg.Value
}

func (c *Consumer) CommitLastMessage() {
_, err := c.consumer.CommitMessage(c.lastMessage)
if err != nil {
panic(err)
}
}

func (c *Consumer) CommitBulk() {
lastPartition := c.lastMessage.TopicPartition
partitions := []kafkaApi.TopicPartition{
{
Topic: lastPartition.Topic,
Partition: lastPartition.Partition,
Offset: lastPartition.Offset + 1,
},
}
_, err := c.consumer.CommitOffsets(partitions)
if err != nil {
panic(err)
}
}


To test this we run a consumer and a producer in parallel:



const broker = "localhost:9092"
const topic = "my-topic"

type Data struct {
MyId int64
A00 string
A01 string
A02 string
A03 string
A04 string
A05 string
A06 string
A07 string
A08 string
A09 string
}
type Test struct {
stubs.Stubs
id int64
}

func TestValidation(t *testing.T) {
test := Test{
Stubs: stubs.ProduceStubs(t),
}
defer test.TestCleanup()

test.check()
}

func (t *Test) check() {
if false {
return
}

go t.runProducer()
t.runConsumer()
}

func (t *Test) runConsumer() {
consumer := kafka.CreateKafkaConsumer(broker, topic, "my-group")
consumeLogger := progress.ProduceProgress(0, "consume")
consumeLogger.OnlyDelta = true
bulk := 0
startTime := time.Now()
lastLog := time.Now()
var totalConsume int64
for {
bytes := consumer.ReadMessage()
totalConsume++
var data Data
err := json.Unmarshal(bytes, &data)
kiterr.RaiseIfError(err)
consumeLogger.Increment()
bulk++
if bulk > 1000 {
bulk = 0
consumer.CommitBulk()
if time.Now().Sub(lastLog) > time.Second*10 {
lastLog = time.Now()
passed := time.Since(startTime)
perSecond := totalConsume / int64(passed.Seconds())
t.Log("average consume %v messages/sec", perSecond)
}
}
}
}

func (t *Test) runProducer() {
producer := kafka.CreateKafkaProducer(t.NowTime, broker, topic)
produceLogger := progress.ProduceProgress(0, "produce")
produceLogger.OnlyDelta = true
data := Data{
A00: kitstring.GetRandomString(100),
A01: kitstring.GetRandomString(100),
A02: kitstring.GetRandomString(100),
A03: kitstring.GetRandomString(100),
A04: kitstring.GetRandomString(100),
A05: kitstring.GetRandomString(100),
A06: kitstring.GetRandomString(100),
A07: kitstring.GetRandomString(100),
A08: kitstring.GetRandomString(100),
A09: kitstring.GetRandomString(100),
}
bytes := kitjson.ObjectToBytes(data)
for {
err := producer.ProduceMessage("", bytes)
if err != nil {
t.Log("ignoring error: %v", err)
time.Sleep(5 * time.Second)
}
produceLogger.Increment()
}
}


The kafka can be run as a docker container:

docker run -d --name=kafka -p 9092:9092 apache/kafka


The consumer reaches a rate of 120K on one core.




Monday, February 24, 2025

NATS GUI in kubernetes

 


In this post we deploy NATS GUI in kubernetes.


NATS GUI is a very simple and nice tool to view NATS messages. Its deployment is also super simple. We include a service and a deployment.

The service:


apiVersion: v1
kind: Service
metadata:
name: natsgui-service
spec:
selector:
configid: natsgui-container
type: ClusterIP
ports:
- port: 80
targetPort: 31311
name: tcp-api
protocol: TCP


The deployment:

apiVersion: apps/v1
kind: Deployment
metadata:
name: natsgui-deployment
spec:
replicas: 1
selector:
matchLabels:
configid: natsgui-container
template:
metadata:
labels:
configid: natsgui-container
spec:
containers:
- name: natsgui
image: ghcr.io/nats-nui/nui:latest
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /db
name: db
volumes:
- emptyDir: {}
name: db



That's all.

Now we can see the messages in queues. We can also use wildcards, for example:



Another nice feature is the ability to click on  the arrow (near the X to close window). This allows us to save the message details open while checking another message.





Sunday, February 9, 2025

Multi Metrics Scaling Using KEDA and Prometheus


 


We've review KEDA usage with prometheus in this post. However, in real life things get complicated.

How do we handle scaling based on multiple metrics? KEDA does not provide support for this, and the documentation for such task is missing.

Let review an example: We have a deployment with multiple pods that handle some granular tasks. We want the scale the replica pods by the following metrics:

  • CPU is over 80%
    or
  • Memory is over 80%
    or
  • Tasks rate per second per pod is over 100

First, we need to understand the requirements:
When do we want to scale up?

We want to scale up if any of these metrics is over the thresholds in any pod.
For example:


pod1 CPU=90%, Memory=50%, Tasks rate=20.
pod2 CPU=10%, Memory=50%, Tasks rate=20.


We should scale in this state even we have only a single metric above the threshold.

How do we achieve this?

The trick is to implement a new Prometheus metric with our application.
We create a code in our application that calculate the following metric:


scale_metric=max(memory_ratio, cpu_ratio, tasks_ratio)


Where


memory_ratio = used_memory_percentage / 80%
cpu_ratio = used_cpu_percentage / 80%
tasks_ratio = tasks_per_second / 100



Next we configure KEDA scaling by the max of this metric for all the pods:

triggers:
- type: kafka
metadata:
serverAddress: {{ .Values.keda.prometheusServerUrl }}
metricName: scale_metric
threshold: '1'
query: max(scale_metric)


Final Note

While this solution requires actual coding, and not just configuration, it provides a solid scaling based on all required features of our business logic.




Monday, February 3, 2025

Should We Use JSON as Message Format?

 



In this post we discuss alternatives for sending messages between microservices.


I have recently designed a system where one microservice sends captured HTTP requests to another microservice. The messaging system could be kafka, NATS, or similar.

The question is how to send the data?

The first intuition is sending as JSON, for example using a GO struct representation:


type TransactionObject struct {
Method string
Path string
QueryArgs map[string]string
Cookies map[string]string
Headers map[string]string
}

 

The first microservice parse the captures HTTP requests, converts them to objects, marshals the object to JSON, and sends JSON text. The second microservice would read the JSON text and unmarshal it back to object.

While this might sound the simple and obvious methodology, it is not always the best. We spend time on converting to object, and on JSON marshal and unmarshal. 

Instead, we can use a simple text for the message. The first microservice parse the captures HTTP requests, and sends the text itself. The second microservice would read the text and parse the HTTP request. Hence we reduce the marshal from and to JSON. 

If we choose to use such a methodology, we need to notice that unlike JSON, the text message cannot be dynamically updated with attributes, such as the request time, the processing time, the geo location of the source IP, etc. Luckily when using NATS and kafka we can easily add these attributes as message headers.

Another issue the the amount of consumers. What if we have 100 consumers subscribed to the NATS subject or the kafka topic? Most would think the parsing of the transaction in 100 microservices instead of a simple JSON parsing would be cheaper. 

Is it? I've created a sample test for this:


package main

import (
"encoding/json"
"fmt"
"math/rand"
"strings"
"time"
)

var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")

type TransactionObject struct {
Method string
Path string
QueryArgs map[string]string
Cookies map[string]string
Headers map[string]string
}

func generateText(
stringLength int,
) string {
b := make([]rune, stringLength)
for i := range b {
b[i] = letterRunes[rand.Intn(len(letterRunes))]
}
return string(b)
}

func generatePath() string {
var sections []string
amount := rand.Intn(10)
for range amount {
sectionLength := 1 + rand.Intn(15)
sections = append(sections, generateText(sectionLength))
}
return "/" + strings.Join(sections, "/")
}

func generateQuery() string {
amount := rand.Intn(4)
if amount == 0 {
return ""
}
var sections []string
for range amount {
nameLength := 1 + rand.Intn(15)
valueLength := 1 + rand.Intn(15)
query := fmt.Sprintf("%v=%v", generateText(nameLength), generateText(valueLength))
sections = append(sections, query)
}
return "?" + strings.Join(sections, "&")
}

func generateCookies() string {
amount := rand.Intn(3)
if amount == 0 {
return ""
}
var sections []string
for range amount {
nameLength := 1 + rand.Intn(15)
valueLength := 1 + rand.Intn(15)
cookie := fmt.Sprintf("Set-Cookie: %v=%v", generateText(nameLength), generateText(valueLength))
sections = append(sections, cookie)
}
return "\n" + strings.Join(sections, "\n")
}

func generateHeaders() string {
amount := rand.Intn(10)
if amount == 0 {
return ""
}
var sections []string
for range amount {
nameLength := 1 + rand.Intn(15)
valueLength := 1 + rand.Intn(15)
header := fmt.Sprintf("%v: %v", generateText(nameLength), generateText(valueLength))
sections = append(sections, header)
}
return "\n" + strings.Join(sections, "\n")
}

func generateTransactionText() string {
var lines []string

line := fmt.Sprintf(
"GET %v%v HTTP/1.1%v%v",
generatePath(),
generateQuery(),
generateHeaders(),
generateCookies(),
)

lines = append(lines, line)
return strings.Join(lines, "\n")
}

func generateMap(
sizeLimit int,
) map[string]string {
amount := rand.Intn(sizeLimit)
generatedMap := make(map[string]string)
for range amount {
generatedMap[generateText(15)] = generateText(15)
}
return generatedMap
}

func generateTransactionObject() *TransactionObject {
return &TransactionObject{
Method: "GET",
Path: generatePath(),
QueryArgs: generateMap(4),
Cookies: generateMap(3),
Headers: generateMap(10),
}
}

func parseTransactionText(
text string,
) {
/*
out of scope for this blog.
we use a proprietary parsed, but the GO parser can be used as well
*/
}

func main() {
transactionsAmount := 10000

var objects []string
var texts []string
for range transactionsAmount {

text := generateTransactionText()
texts = append(texts, text)

object := generateTransactionObject()
bytes, err := json.Marshal(object)
if err != nil {
panic(err)
}
objects = append(objects, string(bytes))
}

activations := 1000000

startTimeText := time.Now()
for i := range activations {
text := texts[i%transactionsAmount]
parseTransactionText(text)
}
passedTimeText := time.Since(startTimeText)

startTimeObject := time.Now()
for i := range activations {
text := objects[i%transactionsAmount]
var transactionObject TransactionObject
err := json.Unmarshal([]byte(text), &transactionObject)
if err != nil {
panic(err)
}
}
passedTimeObjects := time.Since(startTimeObject)
fmt.Printf("text per call time: %v\n", passedTimeText/time.Duration(activations))
fmt.Printf("objects per call time: %v\n", passedTimeObjects/time.Duration(activations))
}


and the results are:

JSON parsing ~6 microseconds.

Text parsing ~4 microseconds.

We find that JSON parsing has it cost.


Final Note

We find that using simple text instead of JSON for microservices communication is a good alternative that can be used for performance critical pipelines.