Search for a value in a nested dictionary python

Search for a value in a nested dictionary python

Heres a simple recursive version:

def getpath(nested_dict, value, prepath=()):
    for k, v in nested_dict.items():
        path = prepath + (k,)
        if v == value: # found value
            return path
        elif hasattr(v, items): # v is a dict
            p = getpath(v, value, path) # recursive call
            if p is not None:
                return p

Example:

print(getpath(dictionary, image/svg+xml))
# -> (dict1, part2, .svg)

This is an iterative traversal of your nested dicts that additionally keeps track of all the keys leading up to a particular point. Therefore as soon as you find the correct value inside your dicts, you also already have the keys needed to get to that value.

The code below will run as-is if you put it in a .py file. The find_mime_type(...) function returns the sequence of keys that will get you from the original dictionary to the value you want. The demo() function shows how to use it.

d = {dict1:
         {part1:
              {.wbxml: application/vnd.wap.wbxml,
               .rl: application/resource-lists+xml},
          part2:
              {.wsdl: application/wsdl+xml,
               .rs: application/rls-services+xml,
               .xop: application/xop+xml,
               .svg: image/svg+xml}},
     dict2:
         {part1:
              {.dotx: application/vnd.openxmlformats-..,
               .zaz: application/vnd.zzazz.deck+xml,
               .xer: application/patch-ops-error+xml}}}


def demo():
    mime_type = image/svg+xml
    try:
        key_chain = find_mime_type(d, mime_type)
    except KeyError:
        print (Could not find this mime type: {0}.format(mime_type))
        exit()
    print (Found {0} mime type here: {1}.format(mime_type, key_chain))
    nested = d
    for key in key_chain:
        nested = nested[key]
    print (Confirmation lookup: {0}.format(nested))


def find_mime_type(d, mime_type):
    reverse_linked_q = list()
    reverse_linked_q.append((list(), d))
    while reverse_linked_q:
        this_key_chain, this_v = reverse_linked_q.pop()
        # finish search if found the mime type
        if this_v == mime_type:
            return this_key_chain
        # not found. keep searching
        # queue dicts for checking / ignore anything thats not a dict
        try:
            items = this_v.items()
        except AttributeError:
            continue  # this was not a nested dict. ignore it
        for k, v in items:
            reverse_linked_q.append((this_key_chain + [k], v))
    # if we havent returned by this point, weve exhausted all the contents
    raise KeyError


if __name__ == __main__:
    demo()

Output:

Found image/svg+xml mime type here: [dict1, part2, .svg]

Confirmation lookup: image/svg+xml

Search for a value in a nested dictionary python

Here is a solution that works for a complex data structure of nested lists and dicts

import pprint

def search(d, search_pattern, prev_datapoint_path=):
    output = []
    current_datapoint = d
    current_datapoint_path = prev_datapoint_path
    if type(current_datapoint) is dict:
        for dkey in current_datapoint:
            if search_pattern in str(dkey):
                c = current_datapoint_path
                c+=[+dkey+]
                output.append(c)
            c = current_datapoint_path
            c+=[+dkey+]
            for i in search(current_datapoint[dkey], search_pattern, c):
                output.append(i)
    elif type(current_datapoint) is list:
        for i in range(0, len(current_datapoint)):
            if search_pattern in str(i):
                c = current_datapoint_path
                c += [ + str(i) + ]
                output.append(i)
            c = current_datapoint_path
            c+=[+ str(i) +]
            for i in search(current_datapoint[i], search_pattern, c):
                output.append(i)
    elif search_pattern in str(current_datapoint):
        c = current_datapoint_path
        output.append(c)
    output = filter(None, output)
    return list(output)


if __name__ == __main__:
    d = {dict1:
             {part1:
                  {.wbxml: application/vnd.wap.wbxml,
                   .rl: application/resource-lists+xml},
              part2:
                  {.wsdl: application/wsdl+xml,
                   .rs: application/rls-services+xml,
                   .xop: application/xop+xml,
                   .svg: image/svg+xml}},
         dict2:
             {part1:
                  {.dotx: application/vnd.openxmlformats-..,
                   .zaz: application/vnd.zzazz.deck+xml,
                   .xer: application/patch-ops-error+xml}}}

    d2 = {
        items:
            {
                item:
                    [
                        {
                            id: 0001,
                            type: donut,
                            name: Cake,
                            ppu: 0.55,
                            batters:
                                {
                                    batter:
                                        [
                                            {id: 1001, type: Regular},
                                            {id: 1002, type: Chocolate},
                                            {id: 1003, type: Blueberry},
                                            {id: 1004, type: Devils Food}
                                        ]
                                },
                            topping:
                                [
                                    {id: 5001, type: None},
                                    {id: 5002, type: Glazed},
                                    {id: 5005, type: Sugar},
                                    {id: 5007, type: Powdered Sugar},
                                    {id: 5006, type: Chocolate with Sprinkles},
                                    {id: 5003, type: Chocolate},
                                    {id: 5004, type: Maple}
                                ]
                        },

                        ...

                    ]
            }
    }

pprint.pprint(search(d,svg+xml,d))
>> [d[dict1][part2][.svg]]

pprint.pprint(search(d2,500,d2))
>> [d2[items][item][0][topping][0][id],
 d2[items][item][0][topping][1][id],
 d2[items][item][0][topping][2][id],
 d2[items][item][0][topping][3][id],
 d2[items][item][0][topping][4][id],
 d2[items][item][0][topping][5][id],
 d2[items][item][0][topping][6][id]]

Leave a Reply

Your email address will not be published. Required fields are marked *