run KISS: May 2025

I've recently had a failure in a product due to huge JSON state that was saved to a file. I was trying to understand which part of the JSON is so big and due to the complexity and multiple hierarchies it was very difficult. Hence I decide creating a GO code that analyzes the JSON, and print report on the JSON sizes in different paths. I add this code here so you could use it for similar issues.


var sizes = make(map[string]int)

func main() {
    analyzeJsonSize()

    printReport("", 0)
    printReport("", 0.01)
    printReport("/limit/to/path", 0)
    printReport("/limit/to/path", 0.01)

}

func analyzeJsonSize() {
    bytes := kitio.ReadFileWrapper("data.json")
    var data interface{}
    err := json.Unmarshal(bytes, &data)
    if err != nil {
       panic(err)
    }
    recursiveAnalyze(data, []string{""})
}

func IsNilInterface(val any) bool {
    if val == nil {
       return true
    }

    v := reflect.ValueOf(val)
    k := v.Kind()
    switch k {
    case reflect.Chan, reflect.Func, reflect.Map, reflect.Pointer,
       reflect.UnsafePointer, reflect.Interface, reflect.Slice:
       return v.IsNil()
    default:
       return false
    }
}

func recursiveAnalyze(data interface{}, paths []string) {
    if IsNilInterface(data) {
       return
    }
    lastPath := paths[len(paths)-1]
    switch data.(type) {
    case []interface{}:
       dataArray := data.([]interface{})
       for i := range dataArray {
          newPath := lastPath + "[]"
          nextPaths := append(paths, newPath)
          recursiveAnalyze(dataArray[i], nextPaths)
       }
    case map[string]interface{}:
       dataObject := data.(map[string]interface{})
       for key, value := range dataObject {
          addSizeForPaths(paths, len(key))
          newPath := lastPath + "/" + key
          nextPaths := append(paths, newPath)
          recursiveAnalyze(value, nextPaths)
       }
    case float64, int, int64:
       addSizeForPaths(paths, 8)
    case bool:
       addSizeForPaths(paths, 4)
    case string:
       dataString := data.(string)
       addSizeForPaths(paths, len(dataString))
    case nil:
    default:
       panic(fmt.Errorf("non supported data type %v", data))
    }
}

func addSizeForPaths(
    paths []string,
    size int,
) {
    for _, path := range paths {
       sizes[path] += size
    }
}

func printReport(
    rootElement string,
    minFraction float32,
) {
    total := float32(sizes[rootElement])
    var lines []string
    for _, key := range kitmap.MapGetSortedKeys(sizes, true) {
       if strings.HasPrefix(key, rootElement) {
          value := sizes[key]
          part := float32(value) / total
          if part >= minFraction {
             line := fmt.Sprintf("%10.6f = %15d %v", part, value, key)
             lines = append(lines, line)
          }
       }
    }

    safeRootName := rootElement
    safeRootName = strings.ReplaceAll(safeRootName, "/", "_")
    safeRootName = strings.ReplaceAll(safeRootName, ".", "_")
    resultPath := fmt.Sprintf("result_%v_%v.txt", safeRootName, minFraction)
    resultData := strings.Join(lines, "\n")
    kitio.WriteFileWrapper(resultPath, []byte(resultData))
}

An example of output below. Here we analyze the entire JSON, but filtered to display only items with more than 1% of the memory usage.

1.000000 =        55706185 
1.000000 =        55706163 /item1
1.000000 =        55706163 /item1[]
0.079118 =         4407368 /item1[]/AllowedItemsStat
0.079109 =         4406858 /item1[]/AllowedItemsStat/ProbesStat
0.112621 =         6273686 /item1[]/IdenticalItemsStat
0.112612 =         6273176 /item1[]/IdenticalItemsStat/ProbesStat
0.010019 =          558099 /item1[]/LearningCycles
0.043961 =         2448910 /item1[]/ItemArrayStat
0.043952 =         2448400 /item1[]/ItemArrayStat/ProbesStat
0.222970 =        12420785 /item1[]/ItemCardinalityStat
0.222960 =        12420275 /item1[]/ItemCardinalityStat/ProbesStat
0.049291 =         2745825 /item1[]/ItemMandatoryStat
0.049282 =         2745315 /item1[]/ItemMandatoryStat/ProbesStat
0.082091 =         4572969 /item1[]/ItemTypeStats
0.082082 =         4572459 /item1[]/ItemTypeStats/ProbesStat
0.389507 =        21697971 /item1[]/ItemValuesStat
0.389498 =        21697461 /item1[]/ItemValuesStat/ProbesStat

In this post we will review the steps to run a GPU based docker container on AWS EC2.

A. Launch EC2 Instance

The first step is to launch a new EC2 instance, however, there some issues to notice.

First we should select a suitable AMI that includes the drivers to enable the GPU usage. The best match I found is to select Ubuntu with AMI: Deep Learning Base OSS Nvidia GPU AMI.

I wanted the cheapest instance type that includes GPU, and selected the g4dn.xlarge

One more thing is to configure a larger storage than the default 75G, since most LLM models and python libraries require a lot of disk space.

We will need a way to connect to the instance, so we probably need to configure security group that allows our IPs to connect to the EC2 instance using SSH, and allocate a public IPv4 to the instance.

B. Run The Docker Container

Once the EC2 instance is up, connect to instance using SSH, and install docker support for GPU.

distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit
sudo systemctl restart docker

Now we can build our docker image, and run it with a flag enabling it to use the GPU.

We would probably want to expose the relevant ports as well.

docker run --rm --name models -it -p 0.0.0.0:9090:9090 --gpus all  my-image:latest

That's all, our GPU base container is up and serving requests.

Full Blog TOC

Full Blog Table Of Content with Keywords Available HERE

Sunday, May 11, 2025

JSON Size Analysis

Sunday, May 4, 2025

Run GPU based docker on AWS EC2

A. Launch EC2 Instance

B. Run The Docker Container