I've recently had a failure in a product due to huge JSON state that was saved to a file. I was trying to understand which part of the JSON is so big and due to the complexity and multiple hierarchies it was very difficult. Hence I decide creating a GO code that analyzes the JSON, and print report on the JSON sizes in different paths. I add this code here so you could use it for similar issues.
var sizes = make(map[string]int)
func main() {
analyzeJsonSize()
printReport("", 0)
printReport("", 0.01)
printReport("/limit/to/path", 0)
printReport("/limit/to/path", 0.01)
}
func analyzeJsonSize() {
bytes := kitio.ReadFileWrapper("data.json")
var data interface{}
err := json.Unmarshal(bytes, &data)
if err != nil {
panic(err)
}
recursiveAnalyze(data, []string{""})
}
func IsNilInterface(val any) bool {
if val == nil {
return true
}
v := reflect.ValueOf(val)
k := v.Kind()
switch k {
case reflect.Chan, reflect.Func, reflect.Map, reflect.Pointer,
reflect.UnsafePointer, reflect.Interface, reflect.Slice:
return v.IsNil()
default:
return false
}
}
func recursiveAnalyze(data interface{}, paths []string) {
if IsNilInterface(data) {
return
}
lastPath := paths[len(paths)-1]
switch data.(type) {
case []interface{}:
dataArray := data.([]interface{})
for i := range dataArray {
newPath := lastPath + "[]"
nextPaths := append(paths, newPath)
recursiveAnalyze(dataArray[i], nextPaths)
}
case map[string]interface{}:
dataObject := data.(map[string]interface{})
for key, value := range dataObject {
addSizeForPaths(paths, len(key))
newPath := lastPath + "/" + key
nextPaths := append(paths, newPath)
recursiveAnalyze(value, nextPaths)
}
case float64, int, int64:
addSizeForPaths(paths, 8)
case bool:
addSizeForPaths(paths, 4)
case string:
dataString := data.(string)
addSizeForPaths(paths, len(dataString))
case nil:
default:
panic(fmt.Errorf("non supported data type %v", data))
}
}
func addSizeForPaths(
paths []string,
size int,
) {
for _, path := range paths {
sizes[path] += size
}
}
func printReport(
rootElement string,
minFraction float32,
) {
total := float32(sizes[rootElement])
var lines []string
for _, key := range kitmap.MapGetSortedKeys(sizes, true) {
if strings.HasPrefix(key, rootElement) {
value := sizes[key]
part := float32(value) / total
if part >= minFraction {
line := fmt.Sprintf("%10.6f = %15d %v", part, value, key)
lines = append(lines, line)
}
}
}
safeRootName := rootElement
safeRootName = strings.ReplaceAll(safeRootName, "/", "_")
safeRootName = strings.ReplaceAll(safeRootName, ".", "_")
resultPath := fmt.Sprintf("result_%v_%v.txt", safeRootName, minFraction)
resultData := strings.Join(lines, "\n")
kitio.WriteFileWrapper(resultPath, []byte(resultData))
}
An example of output below. Here we analyze the entire JSON, but filtered to display only items with more than 1% of the memory usage.
1.000000 = 55706185
1.000000 = 55706163 /item1
1.000000 = 55706163 /item1[]
0.079118 = 4407368 /item1[]/AllowedItemsStat
0.079109 = 4406858 /item1[]/AllowedItemsStat/ProbesStat
0.112621 = 6273686 /item1[]/IdenticalItemsStat
0.112612 = 6273176 /item1[]/IdenticalItemsStat/ProbesStat
0.010019 = 558099 /item1[]/LearningCycles
0.043961 = 2448910 /item1[]/ItemArrayStat
0.043952 = 2448400 /item1[]/ItemArrayStat/ProbesStat
0.222970 = 12420785 /item1[]/ItemCardinalityStat
0.222960 = 12420275 /item1[]/ItemCardinalityStat/ProbesStat
0.049291 = 2745825 /item1[]/ItemMandatoryStat
0.049282 = 2745315 /item1[]/ItemMandatoryStat/ProbesStat
0.082091 = 4572969 /item1[]/ItemTypeStats
0.082082 = 4572459 /item1[]/ItemTypeStats/ProbesStat
0.389507 = 21697971 /item1[]/ItemValuesStat
0.389498 = 21697461 /item1[]/ItemValuesStat/ProbesStat